From 1a6075a2b180b5b1d9fe1c21dde3dc9c5ed781f7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Pe=C3=B1acoba?= <rpenacob@bsc.es>
Date: Tue, 23 Feb 2021 18:24:21 +0100
Subject: [PATCH] hpcg: add first granularity/scalability exps for
 tampi+isend+oss+task

- oss.nix runs valid hpcg layouts whereas slices.nix does not
---
 garlic/apps/hpcg/default.nix            |  21 ++--
 garlic/apps/index.nix                   |   2 +-
 garlic/exp/hpcg/gen.nix                 |  49 ++++----
 garlic/exp/hpcg/mpi+omp.nix             |   2 +-
 garlic/exp/hpcg/omp.nix                 |   2 +-
 garlic/exp/hpcg/oss.granularity.192.nix |  89 ++++++++++++++
 garlic/exp/hpcg/oss.nix                 |  67 +++++-----
 garlic/exp/hpcg/oss.scalability.192.nix |  89 ++++++++++++++
 garlic/exp/hpcg/slices.nix              |  91 ++++++++++++++
 garlic/exp/index.nix                    |  13 +-
 garlic/fig/hpcg/oss.R                   | 156 +++++++++++++-----------
 garlic/fig/hpcg/oss.granularity.R       | 112 +++++++++++++++++
 garlic/fig/hpcg/oss.scalability.R       | 116 ++++++++++++++++++
 garlic/fig/index.nix                    |   4 +-
 14 files changed, 665 insertions(+), 148 deletions(-)
 create mode 100644 garlic/exp/hpcg/oss.granularity.192.nix
 create mode 100644 garlic/exp/hpcg/oss.scalability.192.nix
 create mode 100644 garlic/exp/hpcg/slices.nix
 create mode 100644 garlic/fig/hpcg/oss.granularity.R
 create mode 100644 garlic/fig/hpcg/oss.scalability.R

diff --git a/garlic/apps/hpcg/default.nix b/garlic/apps/hpcg/default.nix
index c864bb2..7a69a72 100644
--- a/garlic/apps/hpcg/default.nix
+++ b/garlic/apps/hpcg/default.nix
@@ -1,13 +1,13 @@
 {
   stdenv
 , cc
-, nanos6 ? null
-, mcxx ? null
-, mpi ? null
+, nanos6
+, mcxx
+, mpi
+, tampi
 , gitBranch
 }:
 
-with stdenv.lib;
 stdenv.mkDerivation rec {
   name = "hpcg";
 
@@ -16,16 +16,13 @@ stdenv.mkDerivation rec {
     ref = "${gitBranch}";
   };
 
-  prePatch = ''
-    #export NIX_DEBUG=6
-  '';
+  # prePatch = ''
+  #   #export NIX_DEBUG=6
+  # '';
 
   buildInputs = [
-    cc
-  ]
-  ++ optional (mcxx != null) mcxx
-  ++ optional (nanos6 != null) nanos6
-  ++ optional (mpi != null) mpi;
+    cc nanos6 mcxx mpi tampi
+  ];
 
   makeFlags = [
     "CC=${cc.CC}"
diff --git a/garlic/apps/index.nix b/garlic/apps/index.nix
index c6ef494..c9e32b8 100644
--- a/garlic/apps/index.nix
+++ b/garlic/apps/index.nix
@@ -28,7 +28,7 @@
   };
 
   hpcg = callPackage ./hpcg/default.nix {
-    gitBranch = "garlic/oss";
+    gitBranch = "garlic/tampi+isend+oss+task";
   };
 
   bigsort = {
diff --git a/garlic/exp/hpcg/gen.nix b/garlic/exp/hpcg/gen.nix
index cb16cf6..a9e7316 100644
--- a/garlic/exp/hpcg/gen.nix
+++ b/garlic/exp/hpcg/gen.nix
@@ -5,7 +5,6 @@
 , targetMachine
 , stages
 , garlicTools
-, resultFromTrebuchet
 }:
 
 with stdenv.lib;
@@ -14,28 +13,23 @@ with garlicTools;
 
 let
   # Generate the complete configuration for each unit
-  genConf = with bsc; c: targetMachine.config // rec {
+  genConf = c: targetMachine.config // rec {
     expName = "${c.expName}.gen";
     unitName = "${expName}.n${toString n.x}";
 
     inherit (targetMachine.config) hw;
-    # hpcg options
-    cc = bsc.icc;
-    mcxx = bsc.mcxx;
-    nanos6 = bsc.nanos6;
-    mpi = null; # TODO: Remove this for oss
 
     # Only the n and gitBranch options are inherited
-    inherit (c) n gitBranch;
+    inherit (c) n nprocs disableAspectRatio nodes ntasksPerNode gitBranch;
 
     # Repeat the execution of each unit 30 times
     loops = 1;
 
     # Resources
     qos = "debug";
-    ntasksPerNode = 1;
-    nodes = 1;
-    time = "02:00:00";
+    # ntasksPerNode = hw.socketsPerNode;
+    # nodes = 2;
+    time = "00:30:00";
     # task in one socket
     cpusPerTask = hw.cpusPerSocket;
     jobName = unitName;
@@ -43,25 +37,24 @@ let
 
   exec = {nextStage, conf, ...}: with conf; stages.exec {
     inherit nextStage;
-    env = "NANOS6_DEPENDENCIES=discrete";
     argv = [
-      "--nx=${toString n.x}"
-      "--ny=${toString n.y}"
-      "--nz=${toString n.z}"
-      # The nblocks is ignored
-      #"--nblocks=${toString nblocks}"
+      "--nx=${toString conf.n.x}"
+      "--ny=${toString conf.n.y}"
+      "--nz=${toString conf.n.z}"
+      "--npx=${toString conf.nprocs.x}"
+      "--npy=${toString conf.nprocs.y}"
+      "--npz=${toString conf.nprocs.z}"
+      # nblocks and ncomms are ignored
+      "--nblocks=1"
+      "--ncomms=1"
       # Store the results in the same directory
       "--store=."
-    ];
+    ] ++ optional (conf.disableAspectRatio) "--no-ar=1";
   };
 
-  program = {nextStage, conf, ...}: with conf;
-  let
-    customPkgs = stdexp.replaceMpi conf.mpi;
-  in
-    customPkgs.apps.hpcg.override {
-      inherit cc nanos6 mcxx gitBranch;
-    };
+  program = {nextStage, conf, ...}: bsc.apps.hpcg.override {
+    inherit (conf) gitBranch;
+  };
 
   pipeline = stdexp.stdPipeline ++ [ exec program ];
 
@@ -78,7 +71,7 @@ let
     inputExp = getExperimentStage inputTre;
     # Then load the result. This is only used to ensure that we have the
     # results, so it has been executed.
-    inputRes = resultFromTrebuchet inputTre;
+    inputRes = inputTre.result;
     # We also need the unit, to compute the path.
     inputUnit = stages.unit {
       conf = genConf conf;
@@ -95,7 +88,9 @@ let
       # ${inputRes}
 
       # Then we simply link the input result directory in "input"
-      ln -s ${relPath} input
+      # We use || true because all ranks will execute this and
+      # the execution will fail
+      ln -sf ${relPath} input || true
     '';
   };
 
diff --git a/garlic/exp/hpcg/mpi+omp.nix b/garlic/exp/hpcg/mpi+omp.nix
index dfe4696..6ea245c 100644
--- a/garlic/exp/hpcg/mpi+omp.nix
+++ b/garlic/exp/hpcg/mpi+omp.nix
@@ -21,7 +21,7 @@ let
     n = c.n;
     cc = bsc.icc;
     mpi = bsc.impi;
-    gitBranch = "garlic/mpi+omp";
+    gitBranch = "garlic/mpi+send+omp+fork";
 
     # Repeat the execution of each unit 30 times
     loops = 30;
diff --git a/garlic/exp/hpcg/omp.nix b/garlic/exp/hpcg/omp.nix
index 9960302..c4dcc8c 100644
--- a/garlic/exp/hpcg/omp.nix
+++ b/garlic/exp/hpcg/omp.nix
@@ -22,7 +22,7 @@ let
     nblocks = c.nblocks;
     cc = bsc.icc;
     mpi = null; # TODO: Remove this for omp
-    gitBranch = "garlic/omp";
+    gitBranch = "garlic/omp+fork";
 
     # Repeat the execution of each unit 30 times
     loops = 30;
diff --git a/garlic/exp/hpcg/oss.granularity.192.nix b/garlic/exp/hpcg/oss.granularity.192.nix
new file mode 100644
index 0000000..04a484a
--- /dev/null
+++ b/garlic/exp/hpcg/oss.granularity.192.nix
@@ -0,0 +1,89 @@
+{
+  stdenv
+, stdexp
+, bsc
+, targetMachine
+, stages
+, genInput
+}:
+
+with stdenv.lib;
+
+let
+  # Initial variable configuration
+  varConf = {
+    n = [ { x = 192; y = 192; z = 192; } ];
+    nprocs = [
+        { x = 2; y = 1; z = 1; }
+        # { x = 2; y = 2; z = 1; }
+        # { x = 2; y = 2; z = 2; }
+        # { x = 4; y = 2; z = 2; }
+        # { x = 4; y = 4; z = 2; }
+    ];
+    nblocks = [ 12 24 48 96 192 384 768 1536 ];
+    # nblocks = [ 384 ];
+    ncommblocks = [ 1 ];
+    # nodes = [ 1 ];
+    # nodes = [ 1 2 4 8 16 ];
+  };
+
+  # Generate the complete configuration for each unit
+  genConf = c: targetMachine.config // rec {
+    expName = "hpcg.oss";
+    unitName = "${expName}.nb${toString nblocks}";
+
+    inherit (targetMachine.config) hw;
+
+    # hpcg options
+    inherit (c) n nprocs nblocks ncommblocks;
+
+    gitBranch = "garlic/tampi+isend+oss+task";
+
+    # Repeat the execution of each unit 30 times
+    loops = 3;
+
+    disableAspectRatio = false;
+
+    # Resources
+    qos = "debug";
+    ntasksPerNode = hw.socketsPerNode;
+    time = "02:00:00";
+    # task in one socket
+    cpusPerTask = hw.cpusPerSocket;
+    nodes = (nprocs.x * nprocs.y * nprocs.z) / ntasksPerNode;
+    jobName = "hpcg-${toString n.x}-${toString n.y}-${toString n.z}-${gitBranch}";
+  };
+
+  # Compute the array of configurations
+  configs = stdexp.buildConfigs {
+    inherit varConf genConf;
+  };
+
+  input = genInput configs;
+
+  exec = {nextStage, conf, ...}: stages.exec {
+    inherit nextStage;
+    argv = [
+      "--nx=${toString conf.n.x}"
+      "--ny=${toString conf.n.y}"
+      "--nz=${toString conf.n.z}"
+      "--npx=${toString conf.nprocs.x}"
+      "--npy=${toString conf.nprocs.y}"
+      "--npz=${toString conf.nprocs.z}"
+      "--nblocks=${toString conf.nblocks}"
+      "--ncomms=${toString conf.ncommblocks}"
+      # The input symlink is generated by the input stage, which is generated by
+      # the genInput function.
+      "--load=input"
+    ];
+  };
+
+  program = {nextStage, conf, ...}: bsc.apps.hpcg.override {
+    inherit (conf) gitBranch;
+  };
+
+  pipeline = stdexp.stdPipeline ++ [ input exec program ];
+
+in
+
+  stdexp.genExperiment { inherit configs pipeline; }
diff --git a/garlic/exp/hpcg/oss.nix b/garlic/exp/hpcg/oss.nix
index 48b4ef2..b04215b 100644
--- a/garlic/exp/hpcg/oss.nix
+++ b/garlic/exp/hpcg/oss.nix
@@ -11,39 +11,46 @@ with stdenv.lib;
 
 let
   # Initial variable configuration
-  varConf = with bsc; {
-    # FIXME: Temporally reduce the input size until we can load a precomputed
-    # input in each run, otherwise the execution time is very large.
-    #n = [ { x = 104; y = 104; z = 104; } ];
-    n = [ { x = 256; y = 288; z = 288; } ];
-    nblocks = [ 12 24 48 96 192 384 ];
+  varConf = {
+    n = [ { x = 192; y = 192; z = 192; } ];
+    nprocs = [
+        { x = 2; y = 1; z = 1; }
+        { x = 2; y = 2; z = 1; }
+        { x = 2; y = 2; z = 2; }
+        { x = 4; y = 2; z = 2; }
+        { x = 4; y = 4; z = 2; }
+    ];
+    # nblocks = [ 12 24 48 96 192 384 768 1536 ];
+    nblocks = [ 384 768 1536 ];
+    ncommblocks = [ 1 ];
+    # nodes = [ 1 ];
+    # nodes = [ 1 2 4 8 16 ];
   };
 
   # Generate the complete configuration for each unit
-  genConf = with bsc; c: targetMachine.config // rec {
+  genConf = c: targetMachine.config // rec {
     expName = "hpcg.oss";
     unitName = "${expName}.nb${toString nblocks}";
 
     inherit (targetMachine.config) hw;
+
     # hpcg options
-    n = c.n;
-    nblocks = c.nblocks;
-    cc = bsc.icc;
-    mcxx = bsc.mcxx;
-    nanos6 = bsc.nanos6;
-    mpi = null; # TODO: Remove this for oss
-    gitBranch = "garlic/oss";
+    inherit (c) n nprocs nblocks ncommblocks;
+
+    gitBranch = "garlic/tampi+isend+oss+task";
 
     # Repeat the execution of each unit 30 times
-    loops = 30;
+    loops = 3;
+
+    disableAspectRatio = false;
 
     # Resources
     qos = "debug";
-    ntasksPerNode = 1;
-    nodes = 1;
+    ntasksPerNode = hw.socketsPerNode;
     time = "02:00:00";
     # task in one socket
     cpusPerTask = hw.cpusPerSocket;
+    nodes = (nprocs.x * nprocs.y * nprocs.z) / ntasksPerNode;
     jobName = "hpcg-${toString n.x}-${toString n.y}-${toString n.z}-${gitBranch}";
   };
 
@@ -54,31 +61,29 @@ let
 
   input = genInput configs;
 
-  exec = {nextStage, conf, ...}: with conf; stages.exec {
+  exec = {nextStage, conf, ...}: stages.exec {
     inherit nextStage;
-    env = "NANOS6_DEPENDENCIES=discrete";
     argv = [
-      "--nx=${toString n.x}"
-      "--ny=${toString n.y}"
-      "--nz=${toString n.z}"
-      "--nblocks=${toString nblocks}"
+      "--nx=${toString conf.n.x}"
+      "--ny=${toString conf.n.y}"
+      "--nz=${toString conf.n.z}"
+      "--npx=${toString conf.nprocs.x}"
+      "--npy=${toString conf.nprocs.y}"
+      "--npz=${toString conf.nprocs.z}"
+      "--nblocks=${toString conf.nblocks}"
+      "--ncomms=${toString conf.ncommblocks}"
       # The input symlink is generated by the input stage, which is generated by
       # the genInput function.
       "--load=input"
     ];
   };
 
-  program = {nextStage, conf, ...}: with conf;
-  let
-    customPkgs = stdexp.replaceMpi conf.mpi;
-  in
-    customPkgs.apps.hpcg.override {
-      inherit cc nanos6 mcxx gitBranch;
-    };
+  program = {nextStage, conf, ...}: bsc.apps.hpcg.override {
+    inherit (conf) gitBranch;
+  };
 
   pipeline = stdexp.stdPipeline ++ [ input exec program ];
 
 in
 
-  #{ inherit configs pipeline; }
   stdexp.genExperiment { inherit configs pipeline; }
diff --git a/garlic/exp/hpcg/oss.scalability.192.nix b/garlic/exp/hpcg/oss.scalability.192.nix
new file mode 100644
index 0000000..534e3cd
--- /dev/null
+++ b/garlic/exp/hpcg/oss.scalability.192.nix
@@ -0,0 +1,89 @@
+{
+  stdenv
+, stdexp
+, bsc
+, targetMachine
+, stages
+, genInput
+}:
+
+with stdenv.lib;
+
+let
+  # Initial variable configuration
+  varConf = {
+    n = [ { x = 192; y = 192; z = 192; } ];
+    nprocs = [
+        { x = 2; y = 1; z = 1; }
+        { x = 2; y = 2; z = 1; }
+        { x = 2; y = 2; z = 2; }
+        { x = 4; y = 2; z = 2; }
+        { x = 4; y = 4; z = 2; }
+    ];
+    # nblocks = [ 12 24 48 96 192 384 768 1536 ];
+    nblocks = [ 384 768 1536 ];
+    ncommblocks = [ 1 ];
+    # nodes = [ 1 ];
+    # nodes = [ 1 2 4 8 16 ];
+  };
+
+  # Generate the complete configuration for each unit
+  genConf = c: targetMachine.config // rec {
+    expName = "hpcg.oss";
+    unitName = "${expName}.nb${toString nblocks}";
+
+    inherit (targetMachine.config) hw;
+
+    # hpcg options
+    inherit (c) n nprocs nblocks ncommblocks;
+
+    gitBranch = "garlic/tampi+isend+oss+task";
+
+    # Repeat the execution of each unit 30 times
+    loops = 10;
+
+    disableAspectRatio = false;
+
+    # Resources
+    qos = "debug";
+    ntasksPerNode = hw.socketsPerNode;
+    time = "02:00:00";
+    # task in one socket
+    cpusPerTask = hw.cpusPerSocket;
+    nodes = (nprocs.x * nprocs.y * nprocs.z) / ntasksPerNode;
+    jobName = "hpcg-${toString n.x}-${toString n.y}-${toString n.z}-${gitBranch}";
+  };
+
+  # Compute the array of configurations
+  configs = stdexp.buildConfigs {
+    inherit varConf genConf;
+  };
+
+  input = genInput configs;
+
+  exec = {nextStage, conf, ...}: stages.exec {
+    inherit nextStage;
+    argv = [
+      "--nx=${toString conf.n.x}"
+      "--ny=${toString conf.n.y}"
+      "--nz=${toString conf.n.z}"
+      "--npx=${toString conf.nprocs.x}"
+      "--npy=${toString conf.nprocs.y}"
+      "--npz=${toString conf.nprocs.z}"
+      "--nblocks=${toString conf.nblocks}"
+      "--ncomms=${toString conf.ncommblocks}"
+      # The input symlink is generated by the input stage, which is generated by
+      # the genInput function.
+      "--load=input"
+    ];
+  };
+
+  program = {nextStage, conf, ...}: bsc.apps.hpcg.override {
+    inherit (conf) gitBranch;
+  };
+
+  pipeline = stdexp.stdPipeline ++ [ input exec program ];
+
+in
+
+  stdexp.genExperiment { inherit configs pipeline; }
diff --git a/garlic/exp/hpcg/slices.nix b/garlic/exp/hpcg/slices.nix
new file mode 100644
index 0000000..cff1f75
--- /dev/null
+++ b/garlic/exp/hpcg/slices.nix
@@ -0,0 +1,91 @@
+{
+  stdenv
+, stdexp
+, bsc
+, targetMachine
+, stages
+, genInput
+}:
+
+with stdenv.lib;
+
+let
+  # Initial variable configuration
+  varConf = {
+    n = [ { x = 192; y = 192; z = 192; } ];
+    nprocs = [
+        { x = 2; y = 1; z = 1; }
+        { x = 4; y = 1; z = 1; }
+        { x = 8; y = 1; z = 1; }
+        { x = 16; y = 1; z = 1; }
+        { x = 32; y = 1; z = 1; }
+    ];
+    # nblocks = [ 12 24 48 96 192 384 768 1536 ];
+    nblocks = [ 384 ];
+    ncommblocks = [ 1 ];
+    # nodes = [ 1 ];
+    # nodes = [ 1 2 4 8 16 ];
+  };
+
+  # Generate the complete configuration for each unit
+  genConf = c: targetMachine.config // rec {
+    expName = "hpcg.oss";
+    unitName = "${expName}.nb${toString nblocks}";
+
+    inherit (targetMachine.config) hw;
+
+    # hpcg options
+    inherit (c) n nprocs nblocks ncommblocks;
+
+    gitBranch = "garlic/tampi+isend+oss+task";
+
+    # Repeat the execution of each unit 30 times
+    loops = 3;
+
+    disableAspectRatio = true;
+
+    # Resources
+    qos = "debug";
+    ntasksPerNode = hw.socketsPerNode;
+    time = "02:00:00";
+    # task in one socket
+    cpusPerTask = hw.cpusPerSocket;
+    nodes = (nprocs.x * nprocs.y * nprocs.z) / ntasksPerNode;
+    jobName = "hpcg-${toString n.x}-${toString n.y}-${toString n.z}-${gitBranch}";
+  };
+
+  # Compute the array of configurations
+  configs = stdexp.buildConfigs {
+    inherit varConf genConf;
+  };
+
+  input = genInput configs;
+
+  exec = {nextStage, conf, ...}: stages.exec {
+    inherit nextStage;
+    argv = [
+      "--nx=${toString conf.n.x}"
+      "--ny=${toString conf.n.y}"
+      "--nz=${toString conf.n.z}"
+      # Distribute all processes in X axis
+      "--npx=${toString conf.nprocs.x}"
+      "--npy=${toString conf.nprocs.y}"
+      "--npz=${toString conf.nprocs.z}"
+      "--nblocks=${toString conf.nblocks}"
+      "--ncomms=${toString conf.ncommblocks}"
+      # The input symlink is generated by the input stage, which is generated by
+      # the genInput function.
+      "--load=input"
+      # Disable HPCG Aspect Ratio to run any mpi layout
+    ] ++ optional (conf.disableAspectRatio) "--no-ar=1";
+  };
+
+  program = {nextStage, conf, ...}: bsc.apps.hpcg.override {
+    inherit (conf) gitBranch;
+  };
+
+  pipeline = stdexp.stdPipeline ++ [ input exec program ];
+
+in
+
+  stdexp.genExperiment { inherit configs pipeline; }
diff --git a/garlic/exp/index.nix b/garlic/exp/index.nix
index 6a80b40..910cffe 100644
--- a/garlic/exp/index.nix
+++ b/garlic/exp/index.nix
@@ -55,12 +55,23 @@
     #  inherit (bsc.garlic.pp) resultFromTrebuchet;
     #};
     genInput = callPackage ./hpcg/gen.nix {
-      inherit (bsc.garlic.pp) resultFromTrebuchet;
     };
 
     oss = callPackage ./hpcg/oss.nix {
       inherit genInput;
     };
+
+    ossGranularity = callPackage ./hpcg/oss.granularity.192.nix {
+      inherit genInput;
+    };
+
+    ossScalability = callPackage ./hpcg/oss.scalability.192.nix {
+      inherit genInput;
+    };
+
+    # slices = callPackage ./hpcg/slices.nix {
+    #   inherit genInput;
+    # };
   };
 
   heat = rec {
diff --git a/garlic/fig/hpcg/oss.R b/garlic/fig/hpcg/oss.R
index 73d1659..3b68e1d 100644
--- a/garlic/fig/hpcg/oss.R
+++ b/garlic/fig/hpcg/oss.R
@@ -1,102 +1,112 @@
+# This R program takes as argument the dataset that contains the results of the
+# execution of the heat example experiment and produces some plots. All the
+# knowledge to understand how this script works is covered by this nice R book:
+#
+# Winston Chang, R Graphics Cookbook: Practical Recipes for Visualizing Data,
+# O’Reilly Media (2020). 2nd edition
+#
+# Which can be freely read it online here: https://r-graphics.org/
+#
+# Please, search in this book before copying some random (and probably oudated)
+# reply on stack overflow.
+
+# We load some R packages to import the required functions. We mainly use the
+# tidyverse packages, which are very good for ploting data,
 library(ggplot2)
-library(dplyr)
+library(dplyr, warn.conflicts = FALSE)
 library(scales)
 library(jsonlite)
+library(viridis, warn.conflicts = FALSE)
 
-args=commandArgs(trailingOnly=TRUE)
+# Here we simply load the arguments to find the input dataset. If nothing is
+# specified we use the file named `input` in the current directory.
+# We can run this script directly using:
+# Rscript <path-to-this-script> <input-dataset>
 
-# Read the timetable from args[1]
-input_file = "input.json"
-if (length(args)>0) { input_file = args[1] }
+# Load the arguments (argv)
+args = commandArgs(trailingOnly=TRUE)
 
-# Load the dataset in NDJSON format
-dataset = jsonlite::stream_in(file(input_file)) %>%
-	jsonlite::flatten()
+# Set the input dataset if given in argv[1], or use "input" as default
+if (length(args)>0) { input_file = args[1] } else { input_file = "input" }
 
-particles = unique(dataset$config.particles)
+df = jsonlite::stream_in(file(input_file), verbose=FALSE) %>%
 
-# We only need the nblocks and time
-df = select(dataset, config.nblocks, config.hw.cpusPerSocket, time) %>%
-	rename(nblocks=config.nblocks,
-		cpusPerSocket=config.hw.cpusPerSocket)
+  # Then we flatten it, as it may contain dictionaries inside the columns
+  jsonlite::flatten() %>%
 
-df = df %>% mutate(blocksPerCpu = nblocks / cpusPerSocket)
-df$nblocks = as.factor(df$nblocks)
-df$blocksPerCpuFactor = as.factor(df$blocksPerCpu)
+  # Now the dataframe contains all the configuration of the units inside the
+  # columns named `config.*`, for example `config.cbs`. We first select only
+  # the columns that we need:
+  select(config.nblocks, config.ncommblocks, config.hw.cpusPerSocket, unit, time) %>%
 
-# Normalize the time by the median
-D=group_by(df, nblocks) %>%
-	mutate(tnorm = time / median(time) - 1)
+  # And then we rename those columns to something shorter:
+  rename(nblocks=config.nblocks,
+         ncommblocks=config.ncommblocks,
+         cpusPerSocket=config.hw.cpusPerSocket) %>%
 
-bs_unique = unique(df$nblocks)
-nbs=length(bs_unique)
+  mutate(blocksPerCpu = nblocks / cpusPerSocket) %>%
 
-print(D)
+  mutate(nblocks = as.factor(nblocks)) %>%
+  mutate(blocksPerCpu = as.factor(blocksPerCpu)) %>%
+  mutate(unit = as.factor(unit)) %>%
 
-ppi=300
+  group_by(unit) %>%
+
+  # And compute some metrics which are applied to each group. For example we
+  # compute the median time within the runs of a unit:
+  mutate(median.time = median(time)) %>%
+  mutate(normalized.time = time / median.time - 1) %>%
+  mutate(log.median.time = log(median.time)) %>%
+
+  # Then, we remove the grouping. This step is very important, otherwise the
+  # plotting functions get confused:
+  ungroup()
+
+dpi=300
 h=5
 w=5
 
-png("box.png", width=w*ppi, height=h*ppi, res=ppi)
-#
-#
-#
-# Create the plot with the normalized time vs nblocks
-p = ggplot(data=D, aes(x=blocksPerCpuFactor, y=tnorm)) +
+p = ggplot(df, aes(x=blocksPerCpu, y=normalized.time)) +
 
-	# Labels
-	labs(x="Num blocks", y="Normalized time",
-              title="HPCG normalized time", 
-              subtitle=input_file) +
+  # The boxplots are useful to identify outliers and problems with the
+  # distribution of time
+  geom_boxplot() +
 
-	# Center the title
-	#theme(plot.title = element_text(hjust = 0.5)) +
+  # We add a line to mark the 1% limit above and below the median
+  geom_hline(yintercept=c(-0.01, 0.01), linetype="dashed", color="red") +
 
-	# Black and white mode (useful for printing)
-	#theme_bw() +
+  # The bw theme is recommended for publications
+  theme_bw() +
 
-	# Add the maximum allowed error lines
-	geom_hline(yintercept=c(-0.01, 0.01),
-		linetype="dashed", color="red") +
+  # Here we add the title and the labels of the axes
+  labs(x="Blocks per CPU", y="Normalized time", title="HPCG granularity: normalized time",
+    subtitle=input_file) +
 
-	# Draw boxplots
-	geom_boxplot() +
+  # And set the subtitle font size a bit smaller, so it fits nicely
+  theme(plot.subtitle=element_text(size=8))
 
-	#scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) +
+# Then, we save the plot both in png and pdf
+ggsave("normalized.time.png", plot=p, width=w, height=h, dpi=dpi)
+ggsave("normalized.time.pdf", plot=p, width=w, height=h, dpi=dpi)
 
-	theme_bw() +
+# We plot the time of each run as we vary the block size
+p = ggplot(df, aes(x=blocksPerCpu, y=time)) +
 
-	theme(plot.subtitle=element_text(size=8)) +
+  # We add a points (scatter plot) using circles (shape=21) a bit larger
+  # than the default (size=3)
+  geom_point(shape=21, size=3) +
 
-	theme(legend.position = c(0.85, 0.85)) #+
+  # The bw theme is recommended for publications
+  theme_bw() +
 
+  # Here we add the title and the labels of the axes
+  labs(x="Blocks Per CPU", y="Time (s)", title="HPCG granularity: time",
+    subtitle=input_file) +
 
+  # And set the subtitle font size a bit smaller, so it fits nicely
+  theme(plot.subtitle=element_text(size=8))
 
+# Then, we save the plot both in png and pdf
+ggsave("time.png", plot=p, width=w, height=h, dpi=dpi)
+ggsave("time.pdf", plot=p, width=w, height=h, dpi=dpi)
 
-# Render the plot
-print(p)
-
-## Save the png image
-dev.off()
-#
-png("scatter.png", width=w*ppi, height=h*ppi, res=ppi)
-#
-## Create the plot with the normalized time vs nblocks
-p = ggplot(D, aes(x=blocksPerCpuFactor, y=time)) +
-
-	labs(x="Blocks/CPU", y="Time (s)",
-              title="HPCG granularity", 
-              subtitle=input_file) +
-	theme_bw() +
-	theme(plot.subtitle=element_text(size=8)) +
-	theme(legend.position = c(0.5, 0.88)) +
-
-	geom_point(shape=21, size=3) +
-	#scale_x_continuous(trans=log2_trans()) +
-	scale_y_continuous(trans=log2_trans())
-
-# Render the plot
-print(p)
-
-# Save the png image
-dev.off()
diff --git a/garlic/fig/hpcg/oss.granularity.R b/garlic/fig/hpcg/oss.granularity.R
new file mode 100644
index 0000000..3b68e1d
--- /dev/null
+++ b/garlic/fig/hpcg/oss.granularity.R
@@ -0,0 +1,112 @@
+# This R program takes as argument the dataset that contains the results of the
+# execution of the heat example experiment and produces some plots. All the
+# knowledge to understand how this script works is covered by this nice R book:
+#
+# Winston Chang, R Graphics Cookbook: Practical Recipes for Visualizing Data,
+# O’Reilly Media (2020). 2nd edition
+#
+# Which can be freely read it online here: https://r-graphics.org/
+#
+# Please, search in this book before copying some random (and probably oudated)
+# reply on stack overflow.
+
+# We load some R packages to import the required functions. We mainly use the
+# tidyverse packages, which are very good for ploting data,
+library(ggplot2)
+library(dplyr, warn.conflicts = FALSE)
+library(scales)
+library(jsonlite)
+library(viridis, warn.conflicts = FALSE)
+
+# Here we simply load the arguments to find the input dataset. If nothing is
+# specified we use the file named `input` in the current directory.
+# We can run this script directly using:
+# Rscript <path-to-this-script> <input-dataset>
+
+# Load the arguments (argv)
+args = commandArgs(trailingOnly=TRUE)
+
+# Set the input dataset if given in argv[1], or use "input" as default
+if (length(args)>0) { input_file = args[1] } else { input_file = "input" }
+
+df = jsonlite::stream_in(file(input_file), verbose=FALSE) %>%
+
+  # Then we flatten it, as it may contain dictionaries inside the columns
+  jsonlite::flatten() %>%
+
+  # Now the dataframe contains all the configuration of the units inside the
+  # columns named `config.*`, for example `config.cbs`. We first select only
+  # the columns that we need:
+  select(config.nblocks, config.ncommblocks, config.hw.cpusPerSocket, unit, time) %>%
+
+  # And then we rename those columns to something shorter:
+  rename(nblocks=config.nblocks,
+         ncommblocks=config.ncommblocks,
+         cpusPerSocket=config.hw.cpusPerSocket) %>%
+
+  mutate(blocksPerCpu = nblocks / cpusPerSocket) %>%
+
+  mutate(nblocks = as.factor(nblocks)) %>%
+  mutate(blocksPerCpu = as.factor(blocksPerCpu)) %>%
+  mutate(unit = as.factor(unit)) %>%
+
+  group_by(unit) %>%
+
+  # And compute some metrics which are applied to each group. For example we
+  # compute the median time within the runs of a unit:
+  mutate(median.time = median(time)) %>%
+  mutate(normalized.time = time / median.time - 1) %>%
+  mutate(log.median.time = log(median.time)) %>%
+
+  # Then, we remove the grouping. This step is very important, otherwise the
+  # plotting functions get confused:
+  ungroup()
+
+dpi=300
+h=5
+w=5
+
+p = ggplot(df, aes(x=blocksPerCpu, y=normalized.time)) +
+
+  # The boxplots are useful to identify outliers and problems with the
+  # distribution of time
+  geom_boxplot() +
+
+  # We add a line to mark the 1% limit above and below the median
+  geom_hline(yintercept=c(-0.01, 0.01), linetype="dashed", color="red") +
+
+  # The bw theme is recommended for publications
+  theme_bw() +
+
+  # Here we add the title and the labels of the axes
+  labs(x="Blocks per CPU", y="Normalized time", title="HPCG granularity: normalized time",
+    subtitle=input_file) +
+
+  # And set the subtitle font size a bit smaller, so it fits nicely
+  theme(plot.subtitle=element_text(size=8))
+
+# Then, we save the plot both in png and pdf
+ggsave("normalized.time.png", plot=p, width=w, height=h, dpi=dpi)
+ggsave("normalized.time.pdf", plot=p, width=w, height=h, dpi=dpi)
+
+# We plot the time of each run as we vary the block size
+p = ggplot(df, aes(x=blocksPerCpu, y=time)) +
+
+  # We add a points (scatter plot) using circles (shape=21) a bit larger
+  # than the default (size=3)
+  geom_point(shape=21, size=3) +
+
+  # The bw theme is recommended for publications
+  theme_bw() +
+
+  # Here we add the title and the labels of the axes
+  labs(x="Blocks Per CPU", y="Time (s)", title="HPCG granularity: time",
+    subtitle=input_file) +
+
+  # And set the subtitle font size a bit smaller, so it fits nicely
+  theme(plot.subtitle=element_text(size=8))
+
+# Then, we save the plot both in png and pdf
+ggsave("time.png", plot=p, width=w, height=h, dpi=dpi)
+ggsave("time.pdf", plot=p, width=w, height=h, dpi=dpi)
+
diff --git a/garlic/fig/hpcg/oss.scalability.R b/garlic/fig/hpcg/oss.scalability.R
new file mode 100644
index 0000000..3e2a20b
--- /dev/null
+++ b/garlic/fig/hpcg/oss.scalability.R
@@ -0,0 +1,116 @@
+# This R program takes as argument the dataset that contains the results of the
+# execution of the heat example experiment and produces some plots. All the
+# knowledge to understand how this script works is covered by this nice R book:
+#
+# Winston Chang, R Graphics Cookbook: Practical Recipes for Visualizing Data,
+# O’Reilly Media (2020). 2nd edition
+#
+# Which can be freely read it online here: https://r-graphics.org/
+#
+# Please, search in this book before copying some random (and probably oudated)
+# reply on stack overflow.
+
+# We load some R packages to import the required functions. We mainly use the
+# tidyverse packages, which are very good for ploting data,
+library(ggplot2)
+library(dplyr, warn.conflicts = FALSE)
+library(scales)
+library(jsonlite)
+library(viridis, warn.conflicts = FALSE)
+
+# Here we simply load the arguments to find the input dataset. If nothing is
+# specified we use the file named `input` in the current directory.
+# We can run this script directly using:
+# Rscript <path-to-this-script> <input-dataset>
+
+# Load the arguments (argv)
+args = commandArgs(trailingOnly=TRUE)
+
+# Set the input dataset if given in argv[1], or use "input" as default
+if (length(args)>0) { input_file = args[1] } else { input_file = "input" }
+
+df = jsonlite::stream_in(file(input_file), verbose=FALSE) %>%
+
+  # Then we flatten it, as it may contain dictionaries inside the columns
+  jsonlite::flatten() %>%
+
+  # Now the dataframe contains all the configuration of the units inside the
+  # columns named `config.*`, for example `config.cbs`. We first select only
+  # the columns that we need:
+  select(config.nblocks, config.ncommblocks, config.hw.cpusPerSocket, config.nodes, unit, time) %>%
+
+  # And then we rename those columns to something shorter:
+  rename(nblocks=config.nblocks,
+         ncommblocks=config.ncommblocks,
+         cpusPerSocket=config.hw.cpusPerSocket,
+         nodes=config.nodes) %>%
+
+  mutate(blocksPerCpu = nblocks / cpusPerSocket) %>%
+
+  mutate(nblocks = as.factor(nblocks)) %>%
+  mutate(blocksPerCpu = as.factor(blocksPerCpu)) %>%
+  mutate(nodes = as.factor(nodes)) %>%
+  mutate(unit = as.factor(unit)) %>%
+
+  group_by(unit) %>%
+
+  # And compute some metrics which are applied to each group. For example we
+  # compute the median time within the runs of a unit:
+  mutate(median.time = median(time)) %>%
+  mutate(normalized.time = time / median.time - 1) %>%
+  mutate(log.median.time = log(median.time)) %>%
+
+  # Then, we remove the grouping. This step is very important, otherwise the
+  # plotting functions get confused:
+  ungroup()
+
+dpi=300
+h=5
+w=5
+
+p = ggplot(df, aes(x=nodes, y=normalized.time, color=blocksPerCpu)) +
+
+  # The boxplots are useful to identify outliers and problems with the
+  # distribution of time
+  geom_boxplot() +
+
+  # We add a line to mark the 1% limit above and below the median
+  geom_hline(yintercept=c(-0.01, 0.01), linetype="dashed", color="red") +
+
+  # The bw theme is recommended for publications
+  theme_bw() +
+
+  # Here we add the title and the labels of the axes
+  labs(x="Nodes", y="Normalized time", title="HPCG weak scalability: normalized time",
+    color="Blocks per CPU",
+    subtitle=input_file) +
+
+  # And set the subtitle font size a bit smaller, so it fits nicely
+  theme(plot.subtitle=element_text(size=8))
+
+# Then, we save the plot both in png and pdf
+ggsave("normalized.time.png", plot=p, width=w, height=h, dpi=dpi)
+ggsave("normalized.time.pdf", plot=p, width=w, height=h, dpi=dpi)
+
+# We plot the time of each run as we vary the block size
+p = ggplot(df, aes(x=nodes, y=time, color=blocksPerCpu)) +
+
+  # We add a points (scatter plot) using circles (shape=21) a bit larger
+  # than the default (size=3)
+  geom_point(shape=21, size=3) +
+
+  # The bw theme is recommended for publications
+  theme_bw() +
+
+  # Here we add the title and the labels of the axes
+  labs(x="Nodes", y="Time (s)", title="HPCG weak scalability: time",
+    color="Blocks per CPU",
+    subtitle=input_file) +
+
+  # And set the subtitle font size a bit smaller, so it fits nicely
+  theme(plot.subtitle=element_text(size=8))
+
+# Then, we save the plot both in png and pdf
+ggsave("time.png", plot=p, width=w, height=h, dpi=dpi)
+ggsave("time.pdf", plot=p, width=w, height=h, dpi=dpi)
+
diff --git a/garlic/fig/index.nix b/garlic/fig/index.nix
index b268498..b4ade9a 100644
--- a/garlic/fig/index.nix
+++ b/garlic/fig/index.nix
@@ -38,7 +38,9 @@ in
   };
 
   hpcg = with exp.hpcg; {
-    oss = stdPlot ./hpcg/oss.R [ oss ];
+    ossGranularity = stdPlot ./hpcg/oss.granularity.R [ ossGranularity ];
+    ossScalability = stdPlot ./hpcg/oss.scalability.R [ ossScalability ];
+    # slices = stdPlot ./hpcg/oss.R [ slices ];
   };
 
   saiph = with exp.saiph; {