bscpkgs/garlic/fig/nbody/baseline.R

212 lines
5.6 KiB
R
Raw Normal View History

2020-10-16 21:55:52 +08:00
library(ggplot2)
library(dplyr)
library(scales)
2020-10-23 16:53:39 +08:00
library(jsonlite)
2021-02-03 20:03:28 +08:00
library(egg)
2020-10-16 21:55:52 +08:00
2020-10-23 16:53:39 +08:00
args=commandArgs(trailingOnly=TRUE)
2020-10-16 21:55:52 +08:00
2020-10-23 16:53:39 +08:00
# Read the timetable from args[1]
input_file = "input.json"
if (length(args)>0) { input_file = args[1] }
# Load the dataset in NDJSON format
dataset = jsonlite::stream_in(file(input_file)) %>%
jsonlite::flatten()
2020-10-23 16:53:39 +08:00
particles = unique(dataset$config.particles)
# We only need the nblocks and time
2021-02-03 20:03:28 +08:00
df = select(dataset,
config.nblocks,
config.hw.cpusPerSocket,
config.nodes,
config.blocksize,
config.particles,
config.gitBranch,
time) %>%
rename(nblocks=config.nblocks,
2021-02-03 20:03:28 +08:00
nodes=config.nodes,
blocksize=config.blocksize,
particles=config.particles,
gitBranch=config.gitBranch,
cpusPerSocket=config.hw.cpusPerSocket)
2020-10-23 16:53:39 +08:00
df = df %>% mutate(blocksPerCpu = nblocks / cpusPerSocket)
df$nblocks = as.factor(df$nblocks)
2021-02-03 20:03:28 +08:00
df$nodesFactor = as.factor(df$nodes)
2020-11-02 17:37:22 +08:00
df$blocksPerCpuFactor = as.factor(df$blocksPerCpu)
2021-02-03 20:03:28 +08:00
df$blocksizeFactor = as.factor(df$blocksize)
df$particlesFactor = as.factor(df$particles)
df$gitBranch = as.factor(df$gitBranch)
2020-10-16 21:55:52 +08:00
# Normalize the time by the median
2021-02-03 20:03:28 +08:00
D=group_by(df, nblocks, nodesFactor, gitBranch) %>%
mutate(tmedian = median(time)) %>%
mutate(tn = tmedian * nodes) %>%
mutate(tnorm = time / median(time) - 1) %>%
mutate(bad = max(ifelse(abs(tnorm) >= 0.01, 1, 0))) %>%
2021-02-03 20:03:28 +08:00
ungroup() %>%
group_by(nodesFactor, gitBranch) %>%
mutate(tmedian_min = min(tmedian)) %>%
2021-02-03 20:03:28 +08:00
ungroup() %>%
group_by(gitBranch) %>%
mutate(tmin_max = max(tmedian_min)) %>%
mutate(tideal = tmin_max / nodes) %>%
2021-02-03 20:03:28 +08:00
ungroup()
2020-11-06 02:43:39 +08:00
D$bad = as.factor(D$bad)
#D$bad = as.factor(ifelse(abs(D$tnorm) >= 0.01, 2,
# ifelse(abs(D$tnorm) >= 0.005, 1, 0)))
2020-10-23 16:53:39 +08:00
bs_unique = unique(df$nblocks)
nbs=length(bs_unique)
print(D)
2020-10-23 16:53:39 +08:00
ppi=300
2021-02-03 20:48:50 +08:00
h=7.5
w=7.5
2020-10-23 16:53:39 +08:00
png("box.png", width=w*ppi, height=h*ppi, res=ppi)
#
#
#
2020-10-23 16:53:39 +08:00
# Create the plot with the normalized time vs nblocks
2020-11-06 02:43:39 +08:00
p = ggplot(data=D, aes(x=blocksPerCpuFactor, y=tnorm, color=bad)) +
2020-10-16 21:55:52 +08:00
# Labels
labs(x="Blocks/CPU", y="Normalized time",
2020-10-23 16:53:39 +08:00
title=sprintf("Nbody normalized time. Particles=%d", particles),
subtitle=input_file) +
2020-10-16 21:55:52 +08:00
2020-11-06 02:43:39 +08:00
# Center the title
#theme(plot.title = element_text(hjust = 0.5)) +
2020-10-16 21:55:52 +08:00
# Black and white mode (useful for printing)
#theme_bw() +
2020-10-16 21:55:52 +08:00
# Add the maximum allowed error lines
geom_hline(yintercept=c(-0.01, 0.01),
linetype="dashed", color="gray") +
2020-10-16 21:55:52 +08:00
# Draw boxplots
geom_boxplot(aes(fill=nodesFactor)) +
scale_color_manual(values=c("black", "brown")) +
2021-02-03 20:03:28 +08:00
facet_grid(gitBranch ~ .) +
2020-10-16 21:55:52 +08:00
#scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) +
2020-10-16 21:55:52 +08:00
2020-10-23 16:53:39 +08:00
#theme(legend.position = "none")
#theme(legend.position = c(0.85, 0.85))
theme_bw()+
theme(plot.subtitle=element_text(size=8))
2020-10-23 16:53:39 +08:00
2020-10-16 21:55:52 +08:00
# Render the plot
print(p)
2021-02-03 20:03:28 +08:00
dev.off()
2021-02-03 20:48:50 +08:00
p1 = ggplot(D, aes(x=blocksizeFactor, y=time)) +
2021-02-03 20:03:28 +08:00
labs(x="Blocksize", y="Time (s)",
2021-02-03 20:03:28 +08:00
title=sprintf("Nbody granularity. Particles=%d", particles),
subtitle=input_file) +
theme_bw() +
theme(plot.subtitle=element_text(size=8)) +
#theme(legend.position = c(0.5, 0.8)) +
2021-02-03 20:03:28 +08:00
geom_line(aes(y=tmedian,
2021-02-03 20:03:28 +08:00
group=interaction(gitBranch, nodesFactor),
color=nodesFactor)) +
geom_point(aes(color=nodesFactor), size=3, shape=21) +
2021-02-03 20:03:28 +08:00
facet_grid(gitBranch ~ .) +
scale_shape_manual(values=c(21, 22)) +
scale_y_continuous(trans=log2_trans())
2021-02-03 20:03:28 +08:00
2021-02-03 20:48:50 +08:00
png("time-blocksize.png", width=w*ppi, height=h*ppi, res=ppi)
2021-02-03 20:03:28 +08:00
print(p1)
2020-10-23 16:53:39 +08:00
dev.off()
2021-02-03 20:03:28 +08:00
2021-02-03 20:48:50 +08:00
p2 = ggplot(D, aes(x=blocksPerCpuFactor, y=time)) +
2020-10-23 16:53:39 +08:00
labs(x="Blocks/CPU", y="Time (s)",
2020-10-23 16:53:39 +08:00
title=sprintf("Nbody granularity. Particles=%d", particles),
subtitle=input_file) +
theme_bw() +
theme(plot.subtitle=element_text(size=8)) +
2020-10-23 16:53:39 +08:00
geom_line(aes(y=tmedian,
2021-02-03 20:03:28 +08:00
group=interaction(gitBranch, nodesFactor),
color=nodesFactor)) +
geom_point(aes(color=nodesFactor), size=3, shape=21) +
2021-02-03 20:03:28 +08:00
facet_grid(gitBranch ~ .) +
scale_shape_manual(values=c(21, 22)) +
scale_y_continuous(trans=log2_trans())
2020-10-23 16:53:39 +08:00
2021-02-03 20:48:50 +08:00
png("time-blocks-per-cpu.png", width=w*ppi, height=h*ppi, res=ppi)
2021-02-03 20:03:28 +08:00
print(p2)
dev.off()
#p = ggarrange(p1, p2, ncol=2)
#png("time-gra.png", width=2*w*ppi, height=h*ppi, res=ppi)
#print(p)
#dev.off()
png("exp-space.png", width=w*ppi, height=h*ppi, res=ppi)
p = ggplot(data=df, aes(x=nodesFactor, y=particlesFactor)) +
labs(x="Nodes", y="Particles", title="Nbody: Experiment space") +
geom_line(aes(group=particles)) +
geom_point(aes(color=nodesFactor), size=3) +
2021-02-03 20:03:28 +08:00
facet_grid(gitBranch ~ .) +
theme_bw()
2020-10-23 16:53:39 +08:00
print(p)
2021-02-03 20:03:28 +08:00
dev.off()
2020-10-16 21:55:52 +08:00
2021-02-03 20:03:28 +08:00
png("gra-space.png", width=w*ppi, height=h*ppi, res=ppi)
p = ggplot(data=D, aes(x=nodesFactor, y=blocksPerCpuFactor)) +
labs(x="Nodes", y="Blocks/CPU", title="Nbody: Granularity space") +
geom_line(aes(group=nodesFactor)) +
geom_point(aes(color=nodesFactor), size=3) +
2021-02-03 20:03:28 +08:00
facet_grid(gitBranch ~ .) +
theme_bw()
2021-02-03 20:03:28 +08:00
print(p)
dev.off()
2021-02-03 20:48:50 +08:00
png("performance.png", width=w*ppi, height=h*ppi, res=ppi)
2021-02-03 20:03:28 +08:00
p = ggplot(D, aes(x=nodesFactor)) +
labs(x="Nodes", y="Time (s)", title="Nbody strong scaling") +
theme_bw() +
geom_line(aes(y=tmedian,
2021-02-03 20:03:28 +08:00
linetype=blocksPerCpuFactor,
group=interaction(gitBranch, blocksPerCpuFactor))) +
geom_line(aes(y=tideal, group=gitBranch), color="red") +
geom_point(aes(y=tmedian, color=nodesFactor), size=3) +
2021-02-03 20:03:28 +08:00
facet_grid(gitBranch ~ .) +
scale_shape_manual(values=c(21, 22)) +
scale_y_continuous(trans=log2_trans())
2021-02-03 20:03:28 +08:00
print(p)
2020-10-23 16:53:39 +08:00
dev.off()
2021-03-04 01:31:43 +08:00
png("time-nodes.png", width=w*ppi, height=h*ppi, res=ppi)
p = ggplot(D, aes(x=nodesFactor)) +
labs(x="Nodes", y="Time * nodes (s)", title="Nbody strong scaling") +
theme_bw() +
geom_line(aes(y=tn, group=gitBranch)) +
facet_grid(gitBranch ~ .) +
scale_y_continuous(trans=log2_trans())
print(p)
dev.off()