hpcg: add first granularity/scalability exps for tampi+isend+oss+task

- oss.nix runs valid hpcg layouts whereas slices.nix does not
2021-02-23 18:24:21 +01:00 · 2021-02-23 18:24:21 +01:00 · 1a6075a2b1
commit 1a6075a2b1
parent 12ff1fd506
14 changed files with 665 additions and 148 deletions
--- a/garlic/apps/hpcg/default.nix
+++ b/garlic/apps/hpcg/default.nix
@ -1,13 +1,13 @@
 {
  stdenv
 , cc
-, nanos6 ? null
-, mcxx ? null
-, mpi ? null
+, nanos6
+, mcxx
+, mpi
+, tampi
 , gitBranch
 }:

-with stdenv.lib;
 stdenv.mkDerivation rec {
  name = "hpcg";

@ -16,16 +16,13 @@ stdenv.mkDerivation rec {
    ref = "${gitBranch}";
  };

-  prePatch = ''
-    #export NIX_DEBUG=6
-  '';
+  # prePatch = ''
+  #   #export NIX_DEBUG=6
+  # '';

  buildInputs = [
-    cc
-  ]
-  ++ optional (mcxx != null) mcxx
-  ++ optional (nanos6 != null) nanos6
-  ++ optional (mpi != null) mpi;
+    cc nanos6 mcxx mpi tampi
+  ];

  makeFlags = [
    "CC=${cc.CC}"
--- a/garlic/apps/index.nix
+++ b/garlic/apps/index.nix
@ -28,7 +28,7 @@
  };

  hpcg = callPackage ./hpcg/default.nix {
-    gitBranch = "garlic/oss";
+    gitBranch = "garlic/tampi+isend+oss+task";
  };

  bigsort = {
--- a/garlic/exp/hpcg/gen.nix
+++ b/garlic/exp/hpcg/gen.nix
@ -5,7 +5,6 @@
 , targetMachine
 , stages
 , garlicTools
-, resultFromTrebuchet
 }:

 with stdenv.lib;
@ -14,28 +13,23 @@ with garlicTools;

 let
  # Generate the complete configuration for each unit
-  genConf = with bsc; c: targetMachine.config // rec {
+  genConf = c: targetMachine.config // rec {
    expName = "${c.expName}.gen";
    unitName = "${expName}.n${toString n.x}";

    inherit (targetMachine.config) hw;
-    # hpcg options
-    cc = bsc.icc;
-    mcxx = bsc.mcxx;
-    nanos6 = bsc.nanos6;
-    mpi = null; # TODO: Remove this for oss

    # Only the n and gitBranch options are inherited
-    inherit (c) n gitBranch;
+    inherit (c) n nprocs disableAspectRatio nodes ntasksPerNode gitBranch;

    # Repeat the execution of each unit 30 times
    loops = 1;

    # Resources
    qos = "debug";
-    ntasksPerNode = 1;
-    nodes = 1;
-    time = "02:00:00";
+    # ntasksPerNode = hw.socketsPerNode;
+    # nodes = 2;
+    time = "00:30:00";
    # task in one socket
    cpusPerTask = hw.cpusPerSocket;
    jobName = unitName;
@ -43,25 +37,24 @@ let

  exec = {nextStage, conf, ...}: with conf; stages.exec {
    inherit nextStage;
-    env = "NANOS6_DEPENDENCIES=discrete";
    argv = [
-      "--nx=${toString n.x}"
-      "--ny=${toString n.y}"
-      "--nz=${toString n.z}"
-      # The nblocks is ignored
-      #"--nblocks=${toString nblocks}"
+      "--nx=${toString conf.n.x}"
+      "--ny=${toString conf.n.y}"
+      "--nz=${toString conf.n.z}"
+      "--npx=${toString conf.nprocs.x}"
+      "--npy=${toString conf.nprocs.y}"
+      "--npz=${toString conf.nprocs.z}"
+      # nblocks and ncomms are ignored
+      "--nblocks=1"
+      "--ncomms=1"
      # Store the results in the same directory
      "--store=."
-    ];
+    ] ++ optional (conf.disableAspectRatio) "--no-ar=1";
  };

-  program = {nextStage, conf, ...}: with conf;
-  let
-    customPkgs = stdexp.replaceMpi conf.mpi;
-  in
-    customPkgs.apps.hpcg.override {
-      inherit cc nanos6 mcxx gitBranch;
-    };
+  program = {nextStage, conf, ...}: bsc.apps.hpcg.override {
+    inherit (conf) gitBranch;
+  };

  pipeline = stdexp.stdPipeline ++ [ exec program ];

@ -78,7 +71,7 @@ let
    inputExp = getExperimentStage inputTre;
    # Then load the result. This is only used to ensure that we have the
    # results, so it has been executed.
-    inputRes = resultFromTrebuchet inputTre;
+    inputRes = inputTre.result;
    # We also need the unit, to compute the path.
    inputUnit = stages.unit {
      conf = genConf conf;
@ -95,7 +88,9 @@ let
      # ${inputRes}

      # Then we simply link the input result directory in "input"
-      ln -s ${relPath} input
+      # We use || true because all ranks will execute this and
+      # the execution will fail
+      ln -sf ${relPath} input || true
    '';
  };

--- a/garlic/exp/hpcg/mpi+omp.nix
+++ b/garlic/exp/hpcg/mpi+omp.nix
@ -21,7 +21,7 @@ let
    n = c.n;
    cc = bsc.icc;
    mpi = bsc.impi;
-    gitBranch = "garlic/mpi+omp";
+    gitBranch = "garlic/mpi+send+omp+fork";

    # Repeat the execution of each unit 30 times
    loops = 30;
--- a/garlic/exp/hpcg/omp.nix
+++ b/garlic/exp/hpcg/omp.nix
@ -22,7 +22,7 @@ let
    nblocks = c.nblocks;
    cc = bsc.icc;
    mpi = null; # TODO: Remove this for omp
-    gitBranch = "garlic/omp";
+    gitBranch = "garlic/omp+fork";

    # Repeat the execution of each unit 30 times
    loops = 30;
--- a/garlic/exp/hpcg/oss.granularity.192.nix
+++ b/garlic/exp/hpcg/oss.granularity.192.nix
@ -0,0 +1,89 @@
+{
+  stdenv
+, stdexp
+, bsc
+, targetMachine
+, stages
+, genInput
+}:
+
+with stdenv.lib;
+
+let
+  # Initial variable configuration
+  varConf = {
+    n = [ { x = 192; y = 192; z = 192; } ];
+    nprocs = [
+        { x = 2; y = 1; z = 1; }
+        # { x = 2; y = 2; z = 1; }
+        # { x = 2; y = 2; z = 2; }
+        # { x = 4; y = 2; z = 2; }
+        # { x = 4; y = 4; z = 2; }
+    ];
+    nblocks = [ 12 24 48 96 192 384 768 1536 ];
+    # nblocks = [ 384 ];
+    ncommblocks = [ 1 ];
+    # nodes = [ 1 ];
+    # nodes = [ 1 2 4 8 16 ];
+  };
+
+  # Generate the complete configuration for each unit
+  genConf = c: targetMachine.config // rec {
+    expName = "hpcg.oss";
+    unitName = "${expName}.nb${toString nblocks}";
+
+    inherit (targetMachine.config) hw;
+
+    # hpcg options
+    inherit (c) n nprocs nblocks ncommblocks;
+
+    gitBranch = "garlic/tampi+isend+oss+task";
+
+    # Repeat the execution of each unit 30 times
+    loops = 3;
+
+    disableAspectRatio = false;
+
+    # Resources
+    qos = "debug";
+    ntasksPerNode = hw.socketsPerNode;
+    time = "02:00:00";
+    # task in one socket
+    cpusPerTask = hw.cpusPerSocket;
+    nodes = (nprocs.x * nprocs.y * nprocs.z) / ntasksPerNode;
+    jobName = "hpcg-${toString n.x}-${toString n.y}-${toString n.z}-${gitBranch}";
+  };
+
+  # Compute the array of configurations
+  configs = stdexp.buildConfigs {
+    inherit varConf genConf;
+  };
+
+  input = genInput configs;
+
+  exec = {nextStage, conf, ...}: stages.exec {
+    inherit nextStage;
+    argv = [
+      "--nx=${toString conf.n.x}"
+      "--ny=${toString conf.n.y}"
+      "--nz=${toString conf.n.z}"
+      "--npx=${toString conf.nprocs.x}"
+      "--npy=${toString conf.nprocs.y}"
+      "--npz=${toString conf.nprocs.z}"
+      "--nblocks=${toString conf.nblocks}"
+      "--ncomms=${toString conf.ncommblocks}"
+      # The input symlink is generated by the input stage, which is generated by
+      # the genInput function.
+      "--load=input"
+    ];
+  };
+
+  program = {nextStage, conf, ...}: bsc.apps.hpcg.override {
+    inherit (conf) gitBranch;
+  };
+
+  pipeline = stdexp.stdPipeline ++ [ input exec program ];
+
+in
+
+  stdexp.genExperiment { inherit configs pipeline; }
--- a/garlic/exp/hpcg/oss.nix
+++ b/garlic/exp/hpcg/oss.nix
@ -11,39 +11,46 @@ with stdenv.lib;

 let
  # Initial variable configuration
-  varConf = with bsc; {
-    # FIXME: Temporally reduce the input size until we can load a precomputed
-    # input in each run, otherwise the execution time is very large.
-    #n = [ { x = 104; y = 104; z = 104; } ];
-    n = [ { x = 256; y = 288; z = 288; } ];
-    nblocks = [ 12 24 48 96 192 384 ];
+  varConf = {
+    n = [ { x = 192; y = 192; z = 192; } ];
+    nprocs = [
+        { x = 2; y = 1; z = 1; }
+        { x = 2; y = 2; z = 1; }
+        { x = 2; y = 2; z = 2; }
+        { x = 4; y = 2; z = 2; }
+        { x = 4; y = 4; z = 2; }
+    ];
+    # nblocks = [ 12 24 48 96 192 384 768 1536 ];
+    nblocks = [ 384 768 1536 ];
+    ncommblocks = [ 1 ];
+    # nodes = [ 1 ];
+    # nodes = [ 1 2 4 8 16 ];
  };

  # Generate the complete configuration for each unit
-  genConf = with bsc; c: targetMachine.config // rec {
+  genConf = c: targetMachine.config // rec {
    expName = "hpcg.oss";
    unitName = "${expName}.nb${toString nblocks}";

    inherit (targetMachine.config) hw;
+
    # hpcg options
-    n = c.n;
-    nblocks = c.nblocks;
-    cc = bsc.icc;
-    mcxx = bsc.mcxx;
-    nanos6 = bsc.nanos6;
-    mpi = null; # TODO: Remove this for oss
-    gitBranch = "garlic/oss";
+    inherit (c) n nprocs nblocks ncommblocks;
+
+    gitBranch = "garlic/tampi+isend+oss+task";

    # Repeat the execution of each unit 30 times
-    loops = 30;
+    loops = 3;
+
+    disableAspectRatio = false;

    # Resources
    qos = "debug";
-    ntasksPerNode = 1;
-    nodes = 1;
+    ntasksPerNode = hw.socketsPerNode;
    time = "02:00:00";
    # task in one socket
    cpusPerTask = hw.cpusPerSocket;
+    nodes = (nprocs.x * nprocs.y * nprocs.z) / ntasksPerNode;
    jobName = "hpcg-${toString n.x}-${toString n.y}-${toString n.z}-${gitBranch}";
  };

@ -54,31 +61,29 @@ let

  input = genInput configs;

-  exec = {nextStage, conf, ...}: with conf; stages.exec {
+  exec = {nextStage, conf, ...}: stages.exec {
    inherit nextStage;
-    env = "NANOS6_DEPENDENCIES=discrete";
    argv = [
-      "--nx=${toString n.x}"
-      "--ny=${toString n.y}"
-      "--nz=${toString n.z}"
-      "--nblocks=${toString nblocks}"
+      "--nx=${toString conf.n.x}"
+      "--ny=${toString conf.n.y}"
+      "--nz=${toString conf.n.z}"
+      "--npx=${toString conf.nprocs.x}"
+      "--npy=${toString conf.nprocs.y}"
+      "--npz=${toString conf.nprocs.z}"
+      "--nblocks=${toString conf.nblocks}"
+      "--ncomms=${toString conf.ncommblocks}"
      # The input symlink is generated by the input stage, which is generated by
      # the genInput function.
      "--load=input"
    ];
  };

-  program = {nextStage, conf, ...}: with conf;
-  let
-    customPkgs = stdexp.replaceMpi conf.mpi;
-  in
-    customPkgs.apps.hpcg.override {
-      inherit cc nanos6 mcxx gitBranch;
-    };
+  program = {nextStage, conf, ...}: bsc.apps.hpcg.override {
+    inherit (conf) gitBranch;
+  };

  pipeline = stdexp.stdPipeline ++ [ input exec program ];

 in

-  #{ inherit configs pipeline; }
  stdexp.genExperiment { inherit configs pipeline; }
--- a/garlic/exp/hpcg/oss.scalability.192.nix
+++ b/garlic/exp/hpcg/oss.scalability.192.nix
@ -0,0 +1,89 @@
+{
+  stdenv
+, stdexp
+, bsc
+, targetMachine
+, stages
+, genInput
+}:
+
+with stdenv.lib;
+
+let
+  # Initial variable configuration
+  varConf = {
+    n = [ { x = 192; y = 192; z = 192; } ];
+    nprocs = [
+        { x = 2; y = 1; z = 1; }
+        { x = 2; y = 2; z = 1; }
+        { x = 2; y = 2; z = 2; }
+        { x = 4; y = 2; z = 2; }
+        { x = 4; y = 4; z = 2; }
+    ];
+    # nblocks = [ 12 24 48 96 192 384 768 1536 ];
+    nblocks = [ 384 768 1536 ];
+    ncommblocks = [ 1 ];
+    # nodes = [ 1 ];
+    # nodes = [ 1 2 4 8 16 ];
+  };
+
+  # Generate the complete configuration for each unit
+  genConf = c: targetMachine.config // rec {
+    expName = "hpcg.oss";
+    unitName = "${expName}.nb${toString nblocks}";
+
+    inherit (targetMachine.config) hw;
+
+    # hpcg options
+    inherit (c) n nprocs nblocks ncommblocks;
+
+    gitBranch = "garlic/tampi+isend+oss+task";
+
+    # Repeat the execution of each unit 30 times
+    loops = 10;
+
+    disableAspectRatio = false;
+
+    # Resources
+    qos = "debug";
+    ntasksPerNode = hw.socketsPerNode;
+    time = "02:00:00";
+    # task in one socket
+    cpusPerTask = hw.cpusPerSocket;
+    nodes = (nprocs.x * nprocs.y * nprocs.z) / ntasksPerNode;
+    jobName = "hpcg-${toString n.x}-${toString n.y}-${toString n.z}-${gitBranch}";
+  };
+
+  # Compute the array of configurations
+  configs = stdexp.buildConfigs {
+    inherit varConf genConf;
+  };
+
+  input = genInput configs;
+
+  exec = {nextStage, conf, ...}: stages.exec {
+    inherit nextStage;
+    argv = [
+      "--nx=${toString conf.n.x}"
+      "--ny=${toString conf.n.y}"
+      "--nz=${toString conf.n.z}"
+      "--npx=${toString conf.nprocs.x}"
+      "--npy=${toString conf.nprocs.y}"
+      "--npz=${toString conf.nprocs.z}"
+      "--nblocks=${toString conf.nblocks}"
+      "--ncomms=${toString conf.ncommblocks}"
+      # The input symlink is generated by the input stage, which is generated by
+      # the genInput function.
+      "--load=input"
+    ];
+  };
+
+  program = {nextStage, conf, ...}: bsc.apps.hpcg.override {
+    inherit (conf) gitBranch;
+  };
+
+  pipeline = stdexp.stdPipeline ++ [ input exec program ];
+
+in
+
+  stdexp.genExperiment { inherit configs pipeline; }
--- a/garlic/exp/hpcg/slices.nix
+++ b/garlic/exp/hpcg/slices.nix
@ -0,0 +1,91 @@
+{
+  stdenv
+, stdexp
+, bsc
+, targetMachine
+, stages
+, genInput
+}:
+
+with stdenv.lib;
+
+let
+  # Initial variable configuration
+  varConf = {
+    n = [ { x = 192; y = 192; z = 192; } ];
+    nprocs = [
+        { x = 2; y = 1; z = 1; }
+        { x = 4; y = 1; z = 1; }
+        { x = 8; y = 1; z = 1; }
+        { x = 16; y = 1; z = 1; }
+        { x = 32; y = 1; z = 1; }
+    ];
+    # nblocks = [ 12 24 48 96 192 384 768 1536 ];
+    nblocks = [ 384 ];
+    ncommblocks = [ 1 ];
+    # nodes = [ 1 ];
+    # nodes = [ 1 2 4 8 16 ];
+  };
+
+  # Generate the complete configuration for each unit
+  genConf = c: targetMachine.config // rec {
+    expName = "hpcg.oss";
+    unitName = "${expName}.nb${toString nblocks}";
+
+    inherit (targetMachine.config) hw;
+
+    # hpcg options
+    inherit (c) n nprocs nblocks ncommblocks;
+
+    gitBranch = "garlic/tampi+isend+oss+task";
+
+    # Repeat the execution of each unit 30 times
+    loops = 3;
+
+    disableAspectRatio = true;
+
+    # Resources
+    qos = "debug";
+    ntasksPerNode = hw.socketsPerNode;
+    time = "02:00:00";
+    # task in one socket
+    cpusPerTask = hw.cpusPerSocket;
+    nodes = (nprocs.x * nprocs.y * nprocs.z) / ntasksPerNode;
+    jobName = "hpcg-${toString n.x}-${toString n.y}-${toString n.z}-${gitBranch}";
+  };
+
+  # Compute the array of configurations
+  configs = stdexp.buildConfigs {
+    inherit varConf genConf;
+  };
+
+  input = genInput configs;
+
+  exec = {nextStage, conf, ...}: stages.exec {
+    inherit nextStage;
+    argv = [
+      "--nx=${toString conf.n.x}"
+      "--ny=${toString conf.n.y}"
+      "--nz=${toString conf.n.z}"
+      # Distribute all processes in X axis
+      "--npx=${toString conf.nprocs.x}"
+      "--npy=${toString conf.nprocs.y}"
+      "--npz=${toString conf.nprocs.z}"
+      "--nblocks=${toString conf.nblocks}"
+      "--ncomms=${toString conf.ncommblocks}"
+      # The input symlink is generated by the input stage, which is generated by
+      # the genInput function.
+      "--load=input"
+      # Disable HPCG Aspect Ratio to run any mpi layout
+    ] ++ optional (conf.disableAspectRatio) "--no-ar=1";
+  };
+
+  program = {nextStage, conf, ...}: bsc.apps.hpcg.override {
+    inherit (conf) gitBranch;
+  };
+
+  pipeline = stdexp.stdPipeline ++ [ input exec program ];
+
+in
+
+  stdexp.genExperiment { inherit configs pipeline; }
--- a/garlic/exp/index.nix
+++ b/garlic/exp/index.nix
@ -55,12 +55,23 @@
    #  inherit (bsc.garlic.pp) resultFromTrebuchet;
    #};
    genInput = callPackage ./hpcg/gen.nix {
-      inherit (bsc.garlic.pp) resultFromTrebuchet;
    };

    oss = callPackage ./hpcg/oss.nix {
      inherit genInput;
    };
+
+    ossGranularity = callPackage ./hpcg/oss.granularity.192.nix {
+      inherit genInput;
+    };
+
+    ossScalability = callPackage ./hpcg/oss.scalability.192.nix {
+      inherit genInput;
+    };
+
+    # slices = callPackage ./hpcg/slices.nix {
+    #   inherit genInput;
+    # };
  };

  heat = rec {
--- a/garlic/fig/hpcg/oss.R
+++ b/garlic/fig/hpcg/oss.R
@ -1,102 +1,112 @@
+# This R program takes as argument the dataset that contains the results of the
+# execution of the heat example experiment and produces some plots. All the
+# knowledge to understand how this script works is covered by this nice R book:
+#
+# Winston Chang, R Graphics Cookbook: Practical Recipes for Visualizing Data,
+# O’Reilly Media (2020). 2nd edition
+#
+# Which can be freely read it online here: https://r-graphics.org/
+#
+# Please, search in this book before copying some random (and probably oudated)
+# reply on stack overflow.
+
+# We load some R packages to import the required functions. We mainly use the
+# tidyverse packages, which are very good for ploting data,
 library(ggplot2)
-library(dplyr)
+library(dplyr, warn.conflicts = FALSE)
 library(scales)
 library(jsonlite)
+library(viridis, warn.conflicts = FALSE)

-args=commandArgs(trailingOnly=TRUE)
+# Here we simply load the arguments to find the input dataset. If nothing is
+# specified we use the file named `input` in the current directory.
+# We can run this script directly using:
+# Rscript <path-to-this-script> <input-dataset>

-# Read the timetable from args[1]
-input_file = "input.json"
-if (length(args)>0) { input_file = args[1] }
+# Load the arguments (argv)
+args = commandArgs(trailingOnly=TRUE)

-# Load the dataset in NDJSON format
-dataset = jsonlite::stream_in(file(input_file)) %>%
-	jsonlite::flatten()
+# Set the input dataset if given in argv[1], or use "input" as default
+if (length(args)>0) { input_file = args[1] } else { input_file = "input" }

-particles = unique(dataset$config.particles)
+df = jsonlite::stream_in(file(input_file), verbose=FALSE) %>%

-# We only need the nblocks and time
-df = select(dataset, config.nblocks, config.hw.cpusPerSocket, time) %>%
-	rename(nblocks=config.nblocks,
-		cpusPerSocket=config.hw.cpusPerSocket)
+  # Then we flatten it, as it may contain dictionaries inside the columns
+  jsonlite::flatten() %>%

-df = df %>% mutate(blocksPerCpu = nblocks / cpusPerSocket)
-df$nblocks = as.factor(df$nblocks)
-df$blocksPerCpuFactor = as.factor(df$blocksPerCpu)
+  # Now the dataframe contains all the configuration of the units inside the
+  # columns named `config.*`, for example `config.cbs`. We first select only
+  # the columns that we need:
+  select(config.nblocks, config.ncommblocks, config.hw.cpusPerSocket, unit, time) %>%

-# Normalize the time by the median
-D=group_by(df, nblocks) %>%
-	mutate(tnorm = time / median(time) - 1)
+  # And then we rename those columns to something shorter:
+  rename(nblocks=config.nblocks,
+         ncommblocks=config.ncommblocks,
+         cpusPerSocket=config.hw.cpusPerSocket) %>%

-bs_unique = unique(df$nblocks)
-nbs=length(bs_unique)
+  mutate(blocksPerCpu = nblocks / cpusPerSocket) %>%

-print(D)
+  mutate(nblocks = as.factor(nblocks)) %>%
+  mutate(blocksPerCpu = as.factor(blocksPerCpu)) %>%
+  mutate(unit = as.factor(unit)) %>%

-ppi=300
+  group_by(unit) %>%
+
+  # And compute some metrics which are applied to each group. For example we
+  # compute the median time within the runs of a unit:
+  mutate(median.time = median(time)) %>%
+  mutate(normalized.time = time / median.time - 1) %>%
+  mutate(log.median.time = log(median.time)) %>%
+
+  # Then, we remove the grouping. This step is very important, otherwise the
+  # plotting functions get confused:
+  ungroup()
+
+dpi=300
 h=5
 w=5

-png("box.png", width=w*ppi, height=h*ppi, res=ppi)
-#
-#
-#
-# Create the plot with the normalized time vs nblocks
-p = ggplot(data=D, aes(x=blocksPerCpuFactor, y=tnorm)) +
+p = ggplot(df, aes(x=blocksPerCpu, y=normalized.time)) +

-	# Labels
-	labs(x="Num blocks", y="Normalized time",
-              title="HPCG normalized time", 
-              subtitle=input_file) +
+  # The boxplots are useful to identify outliers and problems with the
+  # distribution of time
+  geom_boxplot() +

-	# Center the title
-	#theme(plot.title = element_text(hjust = 0.5)) +
+  # We add a line to mark the 1% limit above and below the median
+  geom_hline(yintercept=c(-0.01, 0.01), linetype="dashed", color="red") +

-	# Black and white mode (useful for printing)
-	#theme_bw() +
+  # The bw theme is recommended for publications
+  theme_bw() +

-	# Add the maximum allowed error lines
-	geom_hline(yintercept=c(-0.01, 0.01),
-		linetype="dashed", color="red") +
+  # Here we add the title and the labels of the axes
+  labs(x="Blocks per CPU", y="Normalized time", title="HPCG granularity: normalized time",
+    subtitle=input_file) +

-	# Draw boxplots
-	geom_boxplot() +
+  # And set the subtitle font size a bit smaller, so it fits nicely
+  theme(plot.subtitle=element_text(size=8))

-	#scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) +
+# Then, we save the plot both in png and pdf
+ggsave("normalized.time.png", plot=p, width=w, height=h, dpi=dpi)
+ggsave("normalized.time.pdf", plot=p, width=w, height=h, dpi=dpi)

-	theme_bw() +
+# We plot the time of each run as we vary the block size
+p = ggplot(df, aes(x=blocksPerCpu, y=time)) +

-	theme(plot.subtitle=element_text(size=8)) +
+  # We add a points (scatter plot) using circles (shape=21) a bit larger
+  # than the default (size=3)
+  geom_point(shape=21, size=3) +

-	theme(legend.position = c(0.85, 0.85)) #+
+  # The bw theme is recommended for publications
+  theme_bw() +

+  # Here we add the title and the labels of the axes
+  labs(x="Blocks Per CPU", y="Time (s)", title="HPCG granularity: time",
+    subtitle=input_file) +

+  # And set the subtitle font size a bit smaller, so it fits nicely
+  theme(plot.subtitle=element_text(size=8))

+# Then, we save the plot both in png and pdf
+ggsave("time.png", plot=p, width=w, height=h, dpi=dpi)
+ggsave("time.pdf", plot=p, width=w, height=h, dpi=dpi)

-# Render the plot
-print(p)
-
-## Save the png image
-dev.off()
-#
-png("scatter.png", width=w*ppi, height=h*ppi, res=ppi)
-#
-## Create the plot with the normalized time vs nblocks
-p = ggplot(D, aes(x=blocksPerCpuFactor, y=time)) +
-
-	labs(x="Blocks/CPU", y="Time (s)",
-              title="HPCG granularity", 
-              subtitle=input_file) +
-	theme_bw() +
-	theme(plot.subtitle=element_text(size=8)) +
-	theme(legend.position = c(0.5, 0.88)) +
-
-	geom_point(shape=21, size=3) +
-	#scale_x_continuous(trans=log2_trans()) +
-	scale_y_continuous(trans=log2_trans())
-
-# Render the plot
-print(p)
-
-# Save the png image
-dev.off()
--- a/garlic/fig/hpcg/oss.granularity.R
+++ b/garlic/fig/hpcg/oss.granularity.R
@ -0,0 +1,112 @@
+# This R program takes as argument the dataset that contains the results of the
+# execution of the heat example experiment and produces some plots. All the
+# knowledge to understand how this script works is covered by this nice R book:
+#
+# Winston Chang, R Graphics Cookbook: Practical Recipes for Visualizing Data,
+# O’Reilly Media (2020). 2nd edition
+#
+# Which can be freely read it online here: https://r-graphics.org/
+#
+# Please, search in this book before copying some random (and probably oudated)
+# reply on stack overflow.
+
+# We load some R packages to import the required functions. We mainly use the
+# tidyverse packages, which are very good for ploting data,
+library(ggplot2)
+library(dplyr, warn.conflicts = FALSE)
+library(scales)
+library(jsonlite)
+library(viridis, warn.conflicts = FALSE)
+
+# Here we simply load the arguments to find the input dataset. If nothing is
+# specified we use the file named `input` in the current directory.
+# We can run this script directly using:
+# Rscript <path-to-this-script> <input-dataset>
+
+# Load the arguments (argv)
+args = commandArgs(trailingOnly=TRUE)
+
+# Set the input dataset if given in argv[1], or use "input" as default
+if (length(args)>0) { input_file = args[1] } else { input_file = "input" }
+
+df = jsonlite::stream_in(file(input_file), verbose=FALSE) %>%
+
+  # Then we flatten it, as it may contain dictionaries inside the columns
+  jsonlite::flatten() %>%
+
+  # Now the dataframe contains all the configuration of the units inside the
+  # columns named `config.*`, for example `config.cbs`. We first select only
+  # the columns that we need:
+  select(config.nblocks, config.ncommblocks, config.hw.cpusPerSocket, unit, time) %>%
+
+  # And then we rename those columns to something shorter:
+  rename(nblocks=config.nblocks,
+         ncommblocks=config.ncommblocks,
+         cpusPerSocket=config.hw.cpusPerSocket) %>%
+
+  mutate(blocksPerCpu = nblocks / cpusPerSocket) %>%
+
+  mutate(nblocks = as.factor(nblocks)) %>%
+  mutate(blocksPerCpu = as.factor(blocksPerCpu)) %>%
+  mutate(unit = as.factor(unit)) %>%
+
+  group_by(unit) %>%
+
+  # And compute some metrics which are applied to each group. For example we
+  # compute the median time within the runs of a unit:
+  mutate(median.time = median(time)) %>%
+  mutate(normalized.time = time / median.time - 1) %>%
+  mutate(log.median.time = log(median.time)) %>%
+
+  # Then, we remove the grouping. This step is very important, otherwise the
+  # plotting functions get confused:
+  ungroup()
+
+dpi=300
+h=5
+w=5
+
+p = ggplot(df, aes(x=blocksPerCpu, y=normalized.time)) +
+
+  # The boxplots are useful to identify outliers and problems with the
+  # distribution of time
+  geom_boxplot() +
+
+  # We add a line to mark the 1% limit above and below the median
+  geom_hline(yintercept=c(-0.01, 0.01), linetype="dashed", color="red") +
+
+  # The bw theme is recommended for publications
+  theme_bw() +
+
+  # Here we add the title and the labels of the axes
+  labs(x="Blocks per CPU", y="Normalized time", title="HPCG granularity: normalized time",
+    subtitle=input_file) +
+
+  # And set the subtitle font size a bit smaller, so it fits nicely
+  theme(plot.subtitle=element_text(size=8))
+
+# Then, we save the plot both in png and pdf
+ggsave("normalized.time.png", plot=p, width=w, height=h, dpi=dpi)
+ggsave("normalized.time.pdf", plot=p, width=w, height=h, dpi=dpi)
+
+# We plot the time of each run as we vary the block size
+p = ggplot(df, aes(x=blocksPerCpu, y=time)) +
+
+  # We add a points (scatter plot) using circles (shape=21) a bit larger
+  # than the default (size=3)
+  geom_point(shape=21, size=3) +
+
+  # The bw theme is recommended for publications
+  theme_bw() +
+
+  # Here we add the title and the labels of the axes
+  labs(x="Blocks Per CPU", y="Time (s)", title="HPCG granularity: time",
+    subtitle=input_file) +
+
+  # And set the subtitle font size a bit smaller, so it fits nicely
+  theme(plot.subtitle=element_text(size=8))
+
+# Then, we save the plot both in png and pdf
+ggsave("time.png", plot=p, width=w, height=h, dpi=dpi)
+ggsave("time.pdf", plot=p, width=w, height=h, dpi=dpi)
+
--- a/garlic/fig/hpcg/oss.scalability.R
+++ b/garlic/fig/hpcg/oss.scalability.R
@ -0,0 +1,116 @@
+# This R program takes as argument the dataset that contains the results of the
+# execution of the heat example experiment and produces some plots. All the
+# knowledge to understand how this script works is covered by this nice R book:
+#
+# Winston Chang, R Graphics Cookbook: Practical Recipes for Visualizing Data,
+# O’Reilly Media (2020). 2nd edition
+#
+# Which can be freely read it online here: https://r-graphics.org/
+#
+# Please, search in this book before copying some random (and probably oudated)
+# reply on stack overflow.
+
+# We load some R packages to import the required functions. We mainly use the
+# tidyverse packages, which are very good for ploting data,
+library(ggplot2)
+library(dplyr, warn.conflicts = FALSE)
+library(scales)
+library(jsonlite)
+library(viridis, warn.conflicts = FALSE)
+
+# Here we simply load the arguments to find the input dataset. If nothing is
+# specified we use the file named `input` in the current directory.
+# We can run this script directly using:
+# Rscript <path-to-this-script> <input-dataset>
+
+# Load the arguments (argv)
+args = commandArgs(trailingOnly=TRUE)
+
+# Set the input dataset if given in argv[1], or use "input" as default
+if (length(args)>0) { input_file = args[1] } else { input_file = "input" }
+
+df = jsonlite::stream_in(file(input_file), verbose=FALSE) %>%
+
+  # Then we flatten it, as it may contain dictionaries inside the columns
+  jsonlite::flatten() %>%
+
+  # Now the dataframe contains all the configuration of the units inside the
+  # columns named `config.*`, for example `config.cbs`. We first select only
+  # the columns that we need:
+  select(config.nblocks, config.ncommblocks, config.hw.cpusPerSocket, config.nodes, unit, time) %>%
+
+  # And then we rename those columns to something shorter:
+  rename(nblocks=config.nblocks,
+         ncommblocks=config.ncommblocks,
+         cpusPerSocket=config.hw.cpusPerSocket,
+         nodes=config.nodes) %>%
+
+  mutate(blocksPerCpu = nblocks / cpusPerSocket) %>%
+
+  mutate(nblocks = as.factor(nblocks)) %>%
+  mutate(blocksPerCpu = as.factor(blocksPerCpu)) %>%
+  mutate(nodes = as.factor(nodes)) %>%
+  mutate(unit = as.factor(unit)) %>%
+
+  group_by(unit) %>%
+
+  # And compute some metrics which are applied to each group. For example we
+  # compute the median time within the runs of a unit:
+  mutate(median.time = median(time)) %>%
+  mutate(normalized.time = time / median.time - 1) %>%
+  mutate(log.median.time = log(median.time)) %>%
+
+  # Then, we remove the grouping. This step is very important, otherwise the
+  # plotting functions get confused:
+  ungroup()
+
+dpi=300
+h=5
+w=5
+
+p = ggplot(df, aes(x=nodes, y=normalized.time, color=blocksPerCpu)) +
+
+  # The boxplots are useful to identify outliers and problems with the
+  # distribution of time
+  geom_boxplot() +
+
+  # We add a line to mark the 1% limit above and below the median
+  geom_hline(yintercept=c(-0.01, 0.01), linetype="dashed", color="red") +
+
+  # The bw theme is recommended for publications
+  theme_bw() +
+
+  # Here we add the title and the labels of the axes
+  labs(x="Nodes", y="Normalized time", title="HPCG weak scalability: normalized time",
+    color="Blocks per CPU",
+    subtitle=input_file) +
+
+  # And set the subtitle font size a bit smaller, so it fits nicely
+  theme(plot.subtitle=element_text(size=8))
+
+# Then, we save the plot both in png and pdf
+ggsave("normalized.time.png", plot=p, width=w, height=h, dpi=dpi)
+ggsave("normalized.time.pdf", plot=p, width=w, height=h, dpi=dpi)
+
+# We plot the time of each run as we vary the block size
+p = ggplot(df, aes(x=nodes, y=time, color=blocksPerCpu)) +
+
+  # We add a points (scatter plot) using circles (shape=21) a bit larger
+  # than the default (size=3)
+  geom_point(shape=21, size=3) +
+
+  # The bw theme is recommended for publications
+  theme_bw() +
+
+  # Here we add the title and the labels of the axes
+  labs(x="Nodes", y="Time (s)", title="HPCG weak scalability: time",
+    color="Blocks per CPU",
+    subtitle=input_file) +
+
+  # And set the subtitle font size a bit smaller, so it fits nicely
+  theme(plot.subtitle=element_text(size=8))
+
+# Then, we save the plot both in png and pdf
+ggsave("time.png", plot=p, width=w, height=h, dpi=dpi)
+ggsave("time.pdf", plot=p, width=w, height=h, dpi=dpi)
+
--- a/garlic/fig/index.nix
+++ b/garlic/fig/index.nix
@ -38,7 +38,9 @@ in
  };

  hpcg = with exp.hpcg; {
-    oss = stdPlot ./hpcg/oss.R [ oss ];
+    ossGranularity = stdPlot ./hpcg/oss.granularity.R [ ossGranularity ];
+    ossScalability = stdPlot ./hpcg/oss.scalability.R [ ossScalability ];
+    # slices = stdPlot ./hpcg/oss.R [ slices ];
  };

  saiph = with exp.saiph; {