bscpkgs/garlic/fig/hpcg/oss.slices.strongscaling.R
Raúl Peñacoba cb6577b439 hpcg: add strongscaling
HPCG rounds problem size axis when its value is < 16
2021-04-16 09:32:28 +02:00

110 lines
3.6 KiB
R
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# This R program takes as argument the dataset that contains the results of the
# execution of the heat example experiment and produces some plots. All the
# knowledge to understand how this script works is covered by this nice R book:
#
# Winston Chang, R Graphics Cookbook: Practical Recipes for Visualizing Data,
# OReilly Media (2020). 2nd edition
#
# Which can be freely read it online here: https://r-graphics.org/
#
# Please, search in this book before copying some random (and probably oudated)
# reply on stack overflow.
# We load some R packages to import the required functions. We mainly use the
# tidyverse packages, which are very good for ploting data,
library(ggplot2)
library(dplyr, warn.conflicts = FALSE)
library(scales)
library(jsonlite)
library(viridis, warn.conflicts = FALSE)
# Here we simply load the arguments to find the input dataset. If nothing is
# specified we use the file named `input` in the current directory.
# We can run this script directly using:
# Rscript <path-to-this-script> <input-dataset>
# Load the arguments (argv)
args = commandArgs(trailingOnly=TRUE)
# Set the input dataset if given in argv[1], or use "input" as default
if (length(args)>0) { input_file = args[1] } else { input_file = "input" }
df = jsonlite::stream_in(file(input_file), verbose=FALSE) %>%
# Then we flatten it, as it may contain dictionaries inside the columns
jsonlite::flatten() %>%
# Now the dataframe contains all the configuration of the units inside the
# columns named `config.*`, for example `config.cbs`. We first select only
# the columns that we need:
select(config.nblocks,
config.ncommblocks,
config.hw.cpusPerSocket,
config.nodes,
config.nprocs.x,
config.nprocs.y,
config.nprocs.z,
unit,
time
) %>%
# And then we rename those columns to something shorter:
rename(nblocks=config.nblocks,
ncommblocks=config.ncommblocks,
cpusPerSocket=config.hw.cpusPerSocket,
nodes=config.nodes,
npx=config.nprocs.x,
npy=config.nprocs.y,
npz=config.nprocs.z
) %>%
mutate(axisColor=as.factor(ifelse(npx != 1, "X", ifelse(npy != 1, "Y", "Z")))) %>%
mutate(blocksPerCpu = nblocks / cpusPerSocket) %>%
mutate(nblocks = as.factor(nblocks)) %>%
mutate(blocksPerCpu = as.factor(blocksPerCpu)) %>%
mutate(nodes = as.factor(nodes)) %>%
mutate(unit = as.factor(unit)) %>%
mutate(timePerNprocs = time * npz) %>%
group_by(unit) %>%
# And compute some metrics which are applied to each group. For example we
# compute the median time within the runs of a unit:
mutate(median.time = median(time)) %>%
mutate(normalized.time = time / median.time - 1) %>%
mutate(log.median.time = log(median.time)) %>%
# Then, we remove the grouping. This step is very important, otherwise the
# plotting functions get confused:
ungroup()
dpi=300
h=5
w=5
# We plot the time of each run as we vary the block size
p = ggplot(df, aes(x=nodes, y=timePerNprocs, color=blocksPerCpu)) +
# We add a points (scatter plot) using circles (shape=21) a bit larger
# than the default (size=3)
geom_point(shape=21, size=3) +
# The bw theme is recommended for publications
theme_bw() +
# Here we add the title and the labels of the axes
labs(x="Nodes", y="Time * Num Procs", title="HPCG strong scalability: Z axis",
color="Blocks Per CPU",
subtitle=input_file) +
# And set the subtitle font size a bit smaller, so it fits nicely
theme(plot.subtitle=element_text(size=8))
# Then, we save the plot both in png and pdf
ggsave("time.png", plot=p, width=w, height=h, dpi=dpi)
ggsave("time.pdf", plot=p, width=w, height=h, dpi=dpi)