hpcg: add first granularity/scalability exps for tampi+isend+oss+task

- oss.nix runs valid hpcg layouts whereas slices.nix does not
This commit is contained in:
Raúl Peñacoba 2021-02-23 18:24:21 +01:00 committed by Rodrigo Arias Mallo
parent 12ff1fd506
commit 1a6075a2b1
14 changed files with 665 additions and 148 deletions

View File

@ -1,13 +1,13 @@
{
stdenv
, cc
, nanos6 ? null
, mcxx ? null
, mpi ? null
, nanos6
, mcxx
, mpi
, tampi
, gitBranch
}:
with stdenv.lib;
stdenv.mkDerivation rec {
name = "hpcg";
@ -16,16 +16,13 @@ stdenv.mkDerivation rec {
ref = "${gitBranch}";
};
prePatch = ''
#export NIX_DEBUG=6
'';
# prePatch = ''
# #export NIX_DEBUG=6
# '';
buildInputs = [
cc
]
++ optional (mcxx != null) mcxx
++ optional (nanos6 != null) nanos6
++ optional (mpi != null) mpi;
cc nanos6 mcxx mpi tampi
];
makeFlags = [
"CC=${cc.CC}"

View File

@ -28,7 +28,7 @@
};
hpcg = callPackage ./hpcg/default.nix {
gitBranch = "garlic/oss";
gitBranch = "garlic/tampi+isend+oss+task";
};
bigsort = {

View File

@ -5,7 +5,6 @@
, targetMachine
, stages
, garlicTools
, resultFromTrebuchet
}:
with stdenv.lib;
@ -14,28 +13,23 @@ with garlicTools;
let
# Generate the complete configuration for each unit
genConf = with bsc; c: targetMachine.config // rec {
genConf = c: targetMachine.config // rec {
expName = "${c.expName}.gen";
unitName = "${expName}.n${toString n.x}";
inherit (targetMachine.config) hw;
# hpcg options
cc = bsc.icc;
mcxx = bsc.mcxx;
nanos6 = bsc.nanos6;
mpi = null; # TODO: Remove this for oss
# Only the n and gitBranch options are inherited
inherit (c) n gitBranch;
inherit (c) n nprocs disableAspectRatio nodes ntasksPerNode gitBranch;
# Repeat the execution of each unit 30 times
loops = 1;
# Resources
qos = "debug";
ntasksPerNode = 1;
nodes = 1;
time = "02:00:00";
# ntasksPerNode = hw.socketsPerNode;
# nodes = 2;
time = "00:30:00";
# task in one socket
cpusPerTask = hw.cpusPerSocket;
jobName = unitName;
@ -43,25 +37,24 @@ let
exec = {nextStage, conf, ...}: with conf; stages.exec {
inherit nextStage;
env = "NANOS6_DEPENDENCIES=discrete";
argv = [
"--nx=${toString n.x}"
"--ny=${toString n.y}"
"--nz=${toString n.z}"
# The nblocks is ignored
#"--nblocks=${toString nblocks}"
"--nx=${toString conf.n.x}"
"--ny=${toString conf.n.y}"
"--nz=${toString conf.n.z}"
"--npx=${toString conf.nprocs.x}"
"--npy=${toString conf.nprocs.y}"
"--npz=${toString conf.nprocs.z}"
# nblocks and ncomms are ignored
"--nblocks=1"
"--ncomms=1"
# Store the results in the same directory
"--store=."
];
] ++ optional (conf.disableAspectRatio) "--no-ar=1";
};
program = {nextStage, conf, ...}: with conf;
let
customPkgs = stdexp.replaceMpi conf.mpi;
in
customPkgs.apps.hpcg.override {
inherit cc nanos6 mcxx gitBranch;
};
program = {nextStage, conf, ...}: bsc.apps.hpcg.override {
inherit (conf) gitBranch;
};
pipeline = stdexp.stdPipeline ++ [ exec program ];
@ -78,7 +71,7 @@ let
inputExp = getExperimentStage inputTre;
# Then load the result. This is only used to ensure that we have the
# results, so it has been executed.
inputRes = resultFromTrebuchet inputTre;
inputRes = inputTre.result;
# We also need the unit, to compute the path.
inputUnit = stages.unit {
conf = genConf conf;
@ -95,7 +88,9 @@ let
# ${inputRes}
# Then we simply link the input result directory in "input"
ln -s ${relPath} input
# We use || true because all ranks will execute this and
# the execution will fail
ln -sf ${relPath} input || true
'';
};

View File

@ -21,7 +21,7 @@ let
n = c.n;
cc = bsc.icc;
mpi = bsc.impi;
gitBranch = "garlic/mpi+omp";
gitBranch = "garlic/mpi+send+omp+fork";
# Repeat the execution of each unit 30 times
loops = 30;

View File

@ -22,7 +22,7 @@ let
nblocks = c.nblocks;
cc = bsc.icc;
mpi = null; # TODO: Remove this for omp
gitBranch = "garlic/omp";
gitBranch = "garlic/omp+fork";
# Repeat the execution of each unit 30 times
loops = 30;

View File

@ -0,0 +1,89 @@
{
stdenv
, stdexp
, bsc
, targetMachine
, stages
, genInput
}:
with stdenv.lib;
let
# Initial variable configuration
varConf = {
n = [ { x = 192; y = 192; z = 192; } ];
nprocs = [
{ x = 2; y = 1; z = 1; }
# { x = 2; y = 2; z = 1; }
# { x = 2; y = 2; z = 2; }
# { x = 4; y = 2; z = 2; }
# { x = 4; y = 4; z = 2; }
];
nblocks = [ 12 24 48 96 192 384 768 1536 ];
# nblocks = [ 384 ];
ncommblocks = [ 1 ];
# nodes = [ 1 ];
# nodes = [ 1 2 4 8 16 ];
};
# Generate the complete configuration for each unit
genConf = c: targetMachine.config // rec {
expName = "hpcg.oss";
unitName = "${expName}.nb${toString nblocks}";
inherit (targetMachine.config) hw;
# hpcg options
inherit (c) n nprocs nblocks ncommblocks;
gitBranch = "garlic/tampi+isend+oss+task";
# Repeat the execution of each unit 30 times
loops = 3;
disableAspectRatio = false;
# Resources
qos = "debug";
ntasksPerNode = hw.socketsPerNode;
time = "02:00:00";
# task in one socket
cpusPerTask = hw.cpusPerSocket;
nodes = (nprocs.x * nprocs.y * nprocs.z) / ntasksPerNode;
jobName = "hpcg-${toString n.x}-${toString n.y}-${toString n.z}-${gitBranch}";
};
# Compute the array of configurations
configs = stdexp.buildConfigs {
inherit varConf genConf;
};
input = genInput configs;
exec = {nextStage, conf, ...}: stages.exec {
inherit nextStage;
argv = [
"--nx=${toString conf.n.x}"
"--ny=${toString conf.n.y}"
"--nz=${toString conf.n.z}"
"--npx=${toString conf.nprocs.x}"
"--npy=${toString conf.nprocs.y}"
"--npz=${toString conf.nprocs.z}"
"--nblocks=${toString conf.nblocks}"
"--ncomms=${toString conf.ncommblocks}"
# The input symlink is generated by the input stage, which is generated by
# the genInput function.
"--load=input"
];
};
program = {nextStage, conf, ...}: bsc.apps.hpcg.override {
inherit (conf) gitBranch;
};
pipeline = stdexp.stdPipeline ++ [ input exec program ];
in
stdexp.genExperiment { inherit configs pipeline; }

View File

@ -11,39 +11,46 @@ with stdenv.lib;
let
# Initial variable configuration
varConf = with bsc; {
# FIXME: Temporally reduce the input size until we can load a precomputed
# input in each run, otherwise the execution time is very large.
#n = [ { x = 104; y = 104; z = 104; } ];
n = [ { x = 256; y = 288; z = 288; } ];
nblocks = [ 12 24 48 96 192 384 ];
varConf = {
n = [ { x = 192; y = 192; z = 192; } ];
nprocs = [
{ x = 2; y = 1; z = 1; }
{ x = 2; y = 2; z = 1; }
{ x = 2; y = 2; z = 2; }
{ x = 4; y = 2; z = 2; }
{ x = 4; y = 4; z = 2; }
];
# nblocks = [ 12 24 48 96 192 384 768 1536 ];
nblocks = [ 384 768 1536 ];
ncommblocks = [ 1 ];
# nodes = [ 1 ];
# nodes = [ 1 2 4 8 16 ];
};
# Generate the complete configuration for each unit
genConf = with bsc; c: targetMachine.config // rec {
genConf = c: targetMachine.config // rec {
expName = "hpcg.oss";
unitName = "${expName}.nb${toString nblocks}";
inherit (targetMachine.config) hw;
# hpcg options
n = c.n;
nblocks = c.nblocks;
cc = bsc.icc;
mcxx = bsc.mcxx;
nanos6 = bsc.nanos6;
mpi = null; # TODO: Remove this for oss
gitBranch = "garlic/oss";
inherit (c) n nprocs nblocks ncommblocks;
gitBranch = "garlic/tampi+isend+oss+task";
# Repeat the execution of each unit 30 times
loops = 30;
loops = 3;
disableAspectRatio = false;
# Resources
qos = "debug";
ntasksPerNode = 1;
nodes = 1;
ntasksPerNode = hw.socketsPerNode;
time = "02:00:00";
# task in one socket
cpusPerTask = hw.cpusPerSocket;
nodes = (nprocs.x * nprocs.y * nprocs.z) / ntasksPerNode;
jobName = "hpcg-${toString n.x}-${toString n.y}-${toString n.z}-${gitBranch}";
};
@ -54,31 +61,29 @@ let
input = genInput configs;
exec = {nextStage, conf, ...}: with conf; stages.exec {
exec = {nextStage, conf, ...}: stages.exec {
inherit nextStage;
env = "NANOS6_DEPENDENCIES=discrete";
argv = [
"--nx=${toString n.x}"
"--ny=${toString n.y}"
"--nz=${toString n.z}"
"--nblocks=${toString nblocks}"
"--nx=${toString conf.n.x}"
"--ny=${toString conf.n.y}"
"--nz=${toString conf.n.z}"
"--npx=${toString conf.nprocs.x}"
"--npy=${toString conf.nprocs.y}"
"--npz=${toString conf.nprocs.z}"
"--nblocks=${toString conf.nblocks}"
"--ncomms=${toString conf.ncommblocks}"
# The input symlink is generated by the input stage, which is generated by
# the genInput function.
"--load=input"
];
};
program = {nextStage, conf, ...}: with conf;
let
customPkgs = stdexp.replaceMpi conf.mpi;
in
customPkgs.apps.hpcg.override {
inherit cc nanos6 mcxx gitBranch;
};
program = {nextStage, conf, ...}: bsc.apps.hpcg.override {
inherit (conf) gitBranch;
};
pipeline = stdexp.stdPipeline ++ [ input exec program ];
in
#{ inherit configs pipeline; }
stdexp.genExperiment { inherit configs pipeline; }

View File

@ -0,0 +1,89 @@
{
stdenv
, stdexp
, bsc
, targetMachine
, stages
, genInput
}:
with stdenv.lib;
let
# Initial variable configuration
varConf = {
n = [ { x = 192; y = 192; z = 192; } ];
nprocs = [
{ x = 2; y = 1; z = 1; }
{ x = 2; y = 2; z = 1; }
{ x = 2; y = 2; z = 2; }
{ x = 4; y = 2; z = 2; }
{ x = 4; y = 4; z = 2; }
];
# nblocks = [ 12 24 48 96 192 384 768 1536 ];
nblocks = [ 384 768 1536 ];
ncommblocks = [ 1 ];
# nodes = [ 1 ];
# nodes = [ 1 2 4 8 16 ];
};
# Generate the complete configuration for each unit
genConf = c: targetMachine.config // rec {
expName = "hpcg.oss";
unitName = "${expName}.nb${toString nblocks}";
inherit (targetMachine.config) hw;
# hpcg options
inherit (c) n nprocs nblocks ncommblocks;
gitBranch = "garlic/tampi+isend+oss+task";
# Repeat the execution of each unit 30 times
loops = 10;
disableAspectRatio = false;
# Resources
qos = "debug";
ntasksPerNode = hw.socketsPerNode;
time = "02:00:00";
# task in one socket
cpusPerTask = hw.cpusPerSocket;
nodes = (nprocs.x * nprocs.y * nprocs.z) / ntasksPerNode;
jobName = "hpcg-${toString n.x}-${toString n.y}-${toString n.z}-${gitBranch}";
};
# Compute the array of configurations
configs = stdexp.buildConfigs {
inherit varConf genConf;
};
input = genInput configs;
exec = {nextStage, conf, ...}: stages.exec {
inherit nextStage;
argv = [
"--nx=${toString conf.n.x}"
"--ny=${toString conf.n.y}"
"--nz=${toString conf.n.z}"
"--npx=${toString conf.nprocs.x}"
"--npy=${toString conf.nprocs.y}"
"--npz=${toString conf.nprocs.z}"
"--nblocks=${toString conf.nblocks}"
"--ncomms=${toString conf.ncommblocks}"
# The input symlink is generated by the input stage, which is generated by
# the genInput function.
"--load=input"
];
};
program = {nextStage, conf, ...}: bsc.apps.hpcg.override {
inherit (conf) gitBranch;
};
pipeline = stdexp.stdPipeline ++ [ input exec program ];
in
stdexp.genExperiment { inherit configs pipeline; }

View File

@ -0,0 +1,91 @@
{
stdenv
, stdexp
, bsc
, targetMachine
, stages
, genInput
}:
with stdenv.lib;
let
# Initial variable configuration
varConf = {
n = [ { x = 192; y = 192; z = 192; } ];
nprocs = [
{ x = 2; y = 1; z = 1; }
{ x = 4; y = 1; z = 1; }
{ x = 8; y = 1; z = 1; }
{ x = 16; y = 1; z = 1; }
{ x = 32; y = 1; z = 1; }
];
# nblocks = [ 12 24 48 96 192 384 768 1536 ];
nblocks = [ 384 ];
ncommblocks = [ 1 ];
# nodes = [ 1 ];
# nodes = [ 1 2 4 8 16 ];
};
# Generate the complete configuration for each unit
genConf = c: targetMachine.config // rec {
expName = "hpcg.oss";
unitName = "${expName}.nb${toString nblocks}";
inherit (targetMachine.config) hw;
# hpcg options
inherit (c) n nprocs nblocks ncommblocks;
gitBranch = "garlic/tampi+isend+oss+task";
# Repeat the execution of each unit 30 times
loops = 3;
disableAspectRatio = true;
# Resources
qos = "debug";
ntasksPerNode = hw.socketsPerNode;
time = "02:00:00";
# task in one socket
cpusPerTask = hw.cpusPerSocket;
nodes = (nprocs.x * nprocs.y * nprocs.z) / ntasksPerNode;
jobName = "hpcg-${toString n.x}-${toString n.y}-${toString n.z}-${gitBranch}";
};
# Compute the array of configurations
configs = stdexp.buildConfigs {
inherit varConf genConf;
};
input = genInput configs;
exec = {nextStage, conf, ...}: stages.exec {
inherit nextStage;
argv = [
"--nx=${toString conf.n.x}"
"--ny=${toString conf.n.y}"
"--nz=${toString conf.n.z}"
# Distribute all processes in X axis
"--npx=${toString conf.nprocs.x}"
"--npy=${toString conf.nprocs.y}"
"--npz=${toString conf.nprocs.z}"
"--nblocks=${toString conf.nblocks}"
"--ncomms=${toString conf.ncommblocks}"
# The input symlink is generated by the input stage, which is generated by
# the genInput function.
"--load=input"
# Disable HPCG Aspect Ratio to run any mpi layout
] ++ optional (conf.disableAspectRatio) "--no-ar=1";
};
program = {nextStage, conf, ...}: bsc.apps.hpcg.override {
inherit (conf) gitBranch;
};
pipeline = stdexp.stdPipeline ++ [ input exec program ];
in
stdexp.genExperiment { inherit configs pipeline; }

View File

@ -55,12 +55,23 @@
# inherit (bsc.garlic.pp) resultFromTrebuchet;
#};
genInput = callPackage ./hpcg/gen.nix {
inherit (bsc.garlic.pp) resultFromTrebuchet;
};
oss = callPackage ./hpcg/oss.nix {
inherit genInput;
};
ossGranularity = callPackage ./hpcg/oss.granularity.192.nix {
inherit genInput;
};
ossScalability = callPackage ./hpcg/oss.scalability.192.nix {
inherit genInput;
};
# slices = callPackage ./hpcg/slices.nix {
# inherit genInput;
# };
};
heat = rec {

View File

@ -1,102 +1,112 @@
# This R program takes as argument the dataset that contains the results of the
# execution of the heat example experiment and produces some plots. All the
# knowledge to understand how this script works is covered by this nice R book:
#
# Winston Chang, R Graphics Cookbook: Practical Recipes for Visualizing Data,
# OReilly Media (2020). 2nd edition
#
# Which can be freely read it online here: https://r-graphics.org/
#
# Please, search in this book before copying some random (and probably oudated)
# reply on stack overflow.
# We load some R packages to import the required functions. We mainly use the
# tidyverse packages, which are very good for ploting data,
library(ggplot2)
library(dplyr)
library(dplyr, warn.conflicts = FALSE)
library(scales)
library(jsonlite)
library(viridis, warn.conflicts = FALSE)
args=commandArgs(trailingOnly=TRUE)
# Here we simply load the arguments to find the input dataset. If nothing is
# specified we use the file named `input` in the current directory.
# We can run this script directly using:
# Rscript <path-to-this-script> <input-dataset>
# Read the timetable from args[1]
input_file = "input.json"
if (length(args)>0) { input_file = args[1] }
# Load the arguments (argv)
args = commandArgs(trailingOnly=TRUE)
# Load the dataset in NDJSON format
dataset = jsonlite::stream_in(file(input_file)) %>%
jsonlite::flatten()
# Set the input dataset if given in argv[1], or use "input" as default
if (length(args)>0) { input_file = args[1] } else { input_file = "input" }
particles = unique(dataset$config.particles)
df = jsonlite::stream_in(file(input_file), verbose=FALSE) %>%
# We only need the nblocks and time
df = select(dataset, config.nblocks, config.hw.cpusPerSocket, time) %>%
rename(nblocks=config.nblocks,
cpusPerSocket=config.hw.cpusPerSocket)
# Then we flatten it, as it may contain dictionaries inside the columns
jsonlite::flatten() %>%
df = df %>% mutate(blocksPerCpu = nblocks / cpusPerSocket)
df$nblocks = as.factor(df$nblocks)
df$blocksPerCpuFactor = as.factor(df$blocksPerCpu)
# Now the dataframe contains all the configuration of the units inside the
# columns named `config.*`, for example `config.cbs`. We first select only
# the columns that we need:
select(config.nblocks, config.ncommblocks, config.hw.cpusPerSocket, unit, time) %>%
# Normalize the time by the median
D=group_by(df, nblocks) %>%
mutate(tnorm = time / median(time) - 1)
# And then we rename those columns to something shorter:
rename(nblocks=config.nblocks,
ncommblocks=config.ncommblocks,
cpusPerSocket=config.hw.cpusPerSocket) %>%
bs_unique = unique(df$nblocks)
nbs=length(bs_unique)
mutate(blocksPerCpu = nblocks / cpusPerSocket) %>%
print(D)
mutate(nblocks = as.factor(nblocks)) %>%
mutate(blocksPerCpu = as.factor(blocksPerCpu)) %>%
mutate(unit = as.factor(unit)) %>%
ppi=300
group_by(unit) %>%
# And compute some metrics which are applied to each group. For example we
# compute the median time within the runs of a unit:
mutate(median.time = median(time)) %>%
mutate(normalized.time = time / median.time - 1) %>%
mutate(log.median.time = log(median.time)) %>%
# Then, we remove the grouping. This step is very important, otherwise the
# plotting functions get confused:
ungroup()
dpi=300
h=5
w=5
png("box.png", width=w*ppi, height=h*ppi, res=ppi)
#
#
#
# Create the plot with the normalized time vs nblocks
p = ggplot(data=D, aes(x=blocksPerCpuFactor, y=tnorm)) +
p = ggplot(df, aes(x=blocksPerCpu, y=normalized.time)) +
# Labels
labs(x="Num blocks", y="Normalized time",
title="HPCG normalized time",
subtitle=input_file) +
# The boxplots are useful to identify outliers and problems with the
# distribution of time
geom_boxplot() +
# Center the title
#theme(plot.title = element_text(hjust = 0.5)) +
# We add a line to mark the 1% limit above and below the median
geom_hline(yintercept=c(-0.01, 0.01), linetype="dashed", color="red") +
# Black and white mode (useful for printing)
#theme_bw() +
# The bw theme is recommended for publications
theme_bw() +
# Add the maximum allowed error lines
geom_hline(yintercept=c(-0.01, 0.01),
linetype="dashed", color="red") +
# Here we add the title and the labels of the axes
labs(x="Blocks per CPU", y="Normalized time", title="HPCG granularity: normalized time",
subtitle=input_file) +
# Draw boxplots
geom_boxplot() +
# And set the subtitle font size a bit smaller, so it fits nicely
theme(plot.subtitle=element_text(size=8))
#scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) +
# Then, we save the plot both in png and pdf
ggsave("normalized.time.png", plot=p, width=w, height=h, dpi=dpi)
ggsave("normalized.time.pdf", plot=p, width=w, height=h, dpi=dpi)
theme_bw() +
# We plot the time of each run as we vary the block size
p = ggplot(df, aes(x=blocksPerCpu, y=time)) +
theme(plot.subtitle=element_text(size=8)) +
# We add a points (scatter plot) using circles (shape=21) a bit larger
# than the default (size=3)
geom_point(shape=21, size=3) +
theme(legend.position = c(0.85, 0.85)) #+
# The bw theme is recommended for publications
theme_bw() +
# Here we add the title and the labels of the axes
labs(x="Blocks Per CPU", y="Time (s)", title="HPCG granularity: time",
subtitle=input_file) +
# And set the subtitle font size a bit smaller, so it fits nicely
theme(plot.subtitle=element_text(size=8))
# Then, we save the plot both in png and pdf
ggsave("time.png", plot=p, width=w, height=h, dpi=dpi)
ggsave("time.pdf", plot=p, width=w, height=h, dpi=dpi)
# Render the plot
print(p)
## Save the png image
dev.off()
#
png("scatter.png", width=w*ppi, height=h*ppi, res=ppi)
#
## Create the plot with the normalized time vs nblocks
p = ggplot(D, aes(x=blocksPerCpuFactor, y=time)) +
labs(x="Blocks/CPU", y="Time (s)",
title="HPCG granularity",
subtitle=input_file) +
theme_bw() +
theme(plot.subtitle=element_text(size=8)) +
theme(legend.position = c(0.5, 0.88)) +
geom_point(shape=21, size=3) +
#scale_x_continuous(trans=log2_trans()) +
scale_y_continuous(trans=log2_trans())
# Render the plot
print(p)
# Save the png image
dev.off()

View File

@ -0,0 +1,112 @@
# This R program takes as argument the dataset that contains the results of the
# execution of the heat example experiment and produces some plots. All the
# knowledge to understand how this script works is covered by this nice R book:
#
# Winston Chang, R Graphics Cookbook: Practical Recipes for Visualizing Data,
# OReilly Media (2020). 2nd edition
#
# Which can be freely read it online here: https://r-graphics.org/
#
# Please, search in this book before copying some random (and probably oudated)
# reply on stack overflow.
# We load some R packages to import the required functions. We mainly use the
# tidyverse packages, which are very good for ploting data,
library(ggplot2)
library(dplyr, warn.conflicts = FALSE)
library(scales)
library(jsonlite)
library(viridis, warn.conflicts = FALSE)
# Here we simply load the arguments to find the input dataset. If nothing is
# specified we use the file named `input` in the current directory.
# We can run this script directly using:
# Rscript <path-to-this-script> <input-dataset>
# Load the arguments (argv)
args = commandArgs(trailingOnly=TRUE)
# Set the input dataset if given in argv[1], or use "input" as default
if (length(args)>0) { input_file = args[1] } else { input_file = "input" }
df = jsonlite::stream_in(file(input_file), verbose=FALSE) %>%
# Then we flatten it, as it may contain dictionaries inside the columns
jsonlite::flatten() %>%
# Now the dataframe contains all the configuration of the units inside the
# columns named `config.*`, for example `config.cbs`. We first select only
# the columns that we need:
select(config.nblocks, config.ncommblocks, config.hw.cpusPerSocket, unit, time) %>%
# And then we rename those columns to something shorter:
rename(nblocks=config.nblocks,
ncommblocks=config.ncommblocks,
cpusPerSocket=config.hw.cpusPerSocket) %>%
mutate(blocksPerCpu = nblocks / cpusPerSocket) %>%
mutate(nblocks = as.factor(nblocks)) %>%
mutate(blocksPerCpu = as.factor(blocksPerCpu)) %>%
mutate(unit = as.factor(unit)) %>%
group_by(unit) %>%
# And compute some metrics which are applied to each group. For example we
# compute the median time within the runs of a unit:
mutate(median.time = median(time)) %>%
mutate(normalized.time = time / median.time - 1) %>%
mutate(log.median.time = log(median.time)) %>%
# Then, we remove the grouping. This step is very important, otherwise the
# plotting functions get confused:
ungroup()
dpi=300
h=5
w=5
p = ggplot(df, aes(x=blocksPerCpu, y=normalized.time)) +
# The boxplots are useful to identify outliers and problems with the
# distribution of time
geom_boxplot() +
# We add a line to mark the 1% limit above and below the median
geom_hline(yintercept=c(-0.01, 0.01), linetype="dashed", color="red") +
# The bw theme is recommended for publications
theme_bw() +
# Here we add the title and the labels of the axes
labs(x="Blocks per CPU", y="Normalized time", title="HPCG granularity: normalized time",
subtitle=input_file) +
# And set the subtitle font size a bit smaller, so it fits nicely
theme(plot.subtitle=element_text(size=8))
# Then, we save the plot both in png and pdf
ggsave("normalized.time.png", plot=p, width=w, height=h, dpi=dpi)
ggsave("normalized.time.pdf", plot=p, width=w, height=h, dpi=dpi)
# We plot the time of each run as we vary the block size
p = ggplot(df, aes(x=blocksPerCpu, y=time)) +
# We add a points (scatter plot) using circles (shape=21) a bit larger
# than the default (size=3)
geom_point(shape=21, size=3) +
# The bw theme is recommended for publications
theme_bw() +
# Here we add the title and the labels of the axes
labs(x="Blocks Per CPU", y="Time (s)", title="HPCG granularity: time",
subtitle=input_file) +
# And set the subtitle font size a bit smaller, so it fits nicely
theme(plot.subtitle=element_text(size=8))
# Then, we save the plot both in png and pdf
ggsave("time.png", plot=p, width=w, height=h, dpi=dpi)
ggsave("time.pdf", plot=p, width=w, height=h, dpi=dpi)

View File

@ -0,0 +1,116 @@
# This R program takes as argument the dataset that contains the results of the
# execution of the heat example experiment and produces some plots. All the
# knowledge to understand how this script works is covered by this nice R book:
#
# Winston Chang, R Graphics Cookbook: Practical Recipes for Visualizing Data,
# OReilly Media (2020). 2nd edition
#
# Which can be freely read it online here: https://r-graphics.org/
#
# Please, search in this book before copying some random (and probably oudated)
# reply on stack overflow.
# We load some R packages to import the required functions. We mainly use the
# tidyverse packages, which are very good for ploting data,
library(ggplot2)
library(dplyr, warn.conflicts = FALSE)
library(scales)
library(jsonlite)
library(viridis, warn.conflicts = FALSE)
# Here we simply load the arguments to find the input dataset. If nothing is
# specified we use the file named `input` in the current directory.
# We can run this script directly using:
# Rscript <path-to-this-script> <input-dataset>
# Load the arguments (argv)
args = commandArgs(trailingOnly=TRUE)
# Set the input dataset if given in argv[1], or use "input" as default
if (length(args)>0) { input_file = args[1] } else { input_file = "input" }
df = jsonlite::stream_in(file(input_file), verbose=FALSE) %>%
# Then we flatten it, as it may contain dictionaries inside the columns
jsonlite::flatten() %>%
# Now the dataframe contains all the configuration of the units inside the
# columns named `config.*`, for example `config.cbs`. We first select only
# the columns that we need:
select(config.nblocks, config.ncommblocks, config.hw.cpusPerSocket, config.nodes, unit, time) %>%
# And then we rename those columns to something shorter:
rename(nblocks=config.nblocks,
ncommblocks=config.ncommblocks,
cpusPerSocket=config.hw.cpusPerSocket,
nodes=config.nodes) %>%
mutate(blocksPerCpu = nblocks / cpusPerSocket) %>%
mutate(nblocks = as.factor(nblocks)) %>%
mutate(blocksPerCpu = as.factor(blocksPerCpu)) %>%
mutate(nodes = as.factor(nodes)) %>%
mutate(unit = as.factor(unit)) %>%
group_by(unit) %>%
# And compute some metrics which are applied to each group. For example we
# compute the median time within the runs of a unit:
mutate(median.time = median(time)) %>%
mutate(normalized.time = time / median.time - 1) %>%
mutate(log.median.time = log(median.time)) %>%
# Then, we remove the grouping. This step is very important, otherwise the
# plotting functions get confused:
ungroup()
dpi=300
h=5
w=5
p = ggplot(df, aes(x=nodes, y=normalized.time, color=blocksPerCpu)) +
# The boxplots are useful to identify outliers and problems with the
# distribution of time
geom_boxplot() +
# We add a line to mark the 1% limit above and below the median
geom_hline(yintercept=c(-0.01, 0.01), linetype="dashed", color="red") +
# The bw theme is recommended for publications
theme_bw() +
# Here we add the title and the labels of the axes
labs(x="Nodes", y="Normalized time", title="HPCG weak scalability: normalized time",
color="Blocks per CPU",
subtitle=input_file) +
# And set the subtitle font size a bit smaller, so it fits nicely
theme(plot.subtitle=element_text(size=8))
# Then, we save the plot both in png and pdf
ggsave("normalized.time.png", plot=p, width=w, height=h, dpi=dpi)
ggsave("normalized.time.pdf", plot=p, width=w, height=h, dpi=dpi)
# We plot the time of each run as we vary the block size
p = ggplot(df, aes(x=nodes, y=time, color=blocksPerCpu)) +
# We add a points (scatter plot) using circles (shape=21) a bit larger
# than the default (size=3)
geom_point(shape=21, size=3) +
# The bw theme is recommended for publications
theme_bw() +
# Here we add the title and the labels of the axes
labs(x="Nodes", y="Time (s)", title="HPCG weak scalability: time",
color="Blocks per CPU",
subtitle=input_file) +
# And set the subtitle font size a bit smaller, so it fits nicely
theme(plot.subtitle=element_text(size=8))
# Then, we save the plot both in png and pdf
ggsave("time.png", plot=p, width=w, height=h, dpi=dpi)
ggsave("time.pdf", plot=p, width=w, height=h, dpi=dpi)

View File

@ -38,7 +38,9 @@ in
};
hpcg = with exp.hpcg; {
oss = stdPlot ./hpcg/oss.R [ oss ];
ossGranularity = stdPlot ./hpcg/oss.granularity.R [ ossGranularity ];
ossScalability = stdPlot ./hpcg/oss.scalability.R [ ossScalability ];
# slices = stdPlot ./hpcg/oss.R [ slices ];
};
saiph = with exp.saiph; {