# This R program takes as argument the dataset that contains the results of the # execution of the heat example experiment and produces some plots. All the # knowledge to understand how this script works is covered by this nice R book: # # Winston Chang, R Graphics Cookbook: Practical Recipes for Visualizing Data, # O’Reilly Media (2020). 2nd edition # # Which can be freely read it online here: https://r-graphics.org/ # # Please, search in this book before copying some random (and probably oudated) # reply on stack overflow. # We load some R packages to import the required functions. We mainly use the # tidyverse packages, which are very good for ploting data, library(ggplot2) library(dplyr, warn.conflicts = FALSE) library(scales) library(jsonlite) library(viridis, warn.conflicts = FALSE) # Here we simply load the arguments to find the input dataset. If nothing is # specified we use the file named `input` in the current directory. # We can run this script directly using: # Rscript # Load the arguments (argv) args = commandArgs(trailingOnly=TRUE) # Set the input dataset if given in argv[1], or use "input" as default if (length(args)>0) { input_file = args[1] } else { input_file = "input" } df = jsonlite::stream_in(file(input_file), verbose=FALSE) %>% # Then we flatten it, as it may contain dictionaries inside the columns jsonlite::flatten() %>% # Now the dataframe contains all the configuration of the units inside the # columns named `config.*`, for example `config.cbs`. We first select only # the columns that we need: select(config.nblocks, config.ncommblocks, config.hw.cpusPerSocket, config.nodes, config.nprocs.x, config.nprocs.y, config.nprocs.z, unit, time ) %>% # And then we rename those columns to something shorter: rename(nblocks=config.nblocks, ncommblocks=config.ncommblocks, cpusPerSocket=config.hw.cpusPerSocket, nodes=config.nodes, npx=config.nprocs.x, npy=config.nprocs.y, npz=config.nprocs.z ) %>% mutate(axisColor=as.factor(ifelse(npx != 1, "X", ifelse(npy != 1, "Y", "Z")))) %>% mutate(blocksPerCpu = nblocks / cpusPerSocket) %>% mutate(nblocks = as.factor(nblocks)) %>% mutate(blocksPerCpu = as.factor(blocksPerCpu)) %>% mutate(nodes = as.factor(nodes)) %>% mutate(unit = as.factor(unit)) %>% mutate(timePerNprocs = time * npz) %>% group_by(unit) %>% # And compute some metrics which are applied to each group. For example we # compute the median time within the runs of a unit: mutate(median.time = median(time)) %>% mutate(normalized.time = time / median.time - 1) %>% mutate(log.median.time = log(median.time)) %>% # Then, we remove the grouping. This step is very important, otherwise the # plotting functions get confused: ungroup() dpi=300 h=5 w=5 # We plot the time of each run as we vary the block size p = ggplot(df, aes(x=nodes, y=timePerNprocs, color=blocksPerCpu)) + # We add a points (scatter plot) using circles (shape=21) a bit larger # than the default (size=3) geom_point(shape=21, size=3) + # The bw theme is recommended for publications theme_bw() + # Here we add the title and the labels of the axes labs(x="Nodes", y="Time * Num Procs", title="HPCG strong scalability: Z axis", color="Blocks Per CPU", subtitle=input_file) + # And set the subtitle font size a bit smaller, so it fits nicely theme(plot.subtitle=element_text(size=8)) # Then, we save the plot both in png and pdf ggsave("time.png", plot=p, width=w, height=h, dpi=dpi) ggsave("time.pdf", plot=p, width=w, height=h, dpi=dpi)