bscpkgs/garlic/exp/nbody/tampi.nix

219 lines
5.1 KiB
Nix
Raw Normal View History

2020-08-10 21:27:46 +08:00
{
2020-09-16 18:22:55 +08:00
stdenv
, nixpkgs
, pkgs
2020-08-10 21:27:46 +08:00
, genApp
, genConfigs
2020-09-02 23:07:09 +08:00
, runWrappers
2020-08-10 21:27:46 +08:00
}:
2020-09-02 23:07:09 +08:00
with stdenv.lib;
2020-08-10 21:27:46 +08:00
let
2020-09-21 20:34:08 +08:00
bsc = pkgs.bsc;
2020-09-02 23:07:09 +08:00
# Set variable configuration for the experiment
varConfig = {
2020-09-21 20:34:08 +08:00
cc = [ bsc.icc ];
2020-10-05 18:33:44 +08:00
mpi = [ bsc.impi ];
2020-10-07 15:49:42 +08:00
#mpi = [ bsc.mpichDebug ];
2020-10-09 01:00:55 +08:00
blocksize = [ 1024 2048 ];
2020-08-12 20:00:04 +08:00
};
2020-09-02 23:07:09 +08:00
# Common configuration
common = {
# Compile time nbody config
2020-10-07 15:49:42 +08:00
gitBranch = "garlic/mpi+send";
#gitBranch = "garlic/tampi+send+oss+task";
2020-09-02 23:07:09 +08:00
# nbody runtime options
particles = 1024*4;
timesteps = 10;
2020-09-02 23:07:09 +08:00
# Resources
ntasksPerNode = "2";
2020-10-07 15:49:42 +08:00
nodes = "1";
2020-09-02 23:07:09 +08:00
# Stage configuration
2020-10-07 15:49:42 +08:00
enableTrebuchet = true;
enableSbatch = true;
enableControl = true;
enableExtrae = false;
enablePerf = false;
enableCtf = false;
enableStrace = true;
2020-09-02 23:07:09 +08:00
# MN4 path
nixPrefix = "/gpfs/projects/bsc15/nix";
2020-08-10 21:27:46 +08:00
};
# Compute the cartesian product of all configurations
2020-09-02 23:07:09 +08:00
configs = map (conf: conf // common) (genConfigs varConfig);
2020-08-12 20:00:04 +08:00
2020-09-02 23:07:09 +08:00
stageProgram = stage:
if stage ? programPath
then "${stage}${stage.programPath}" else "${stage}";
w = runWrappers;
2020-09-30 15:32:25 +08:00
sbatch = {stage, conf, ...}: with conf; w.sbatch (
# Allow a user to define a custom reservation for the job in MareNostrum4,
# by setting the garlic.sbatch.reservation attribute in the
# ~/.config/nixpkgs/config.nix file. If the attribute is not set, no
# reservation is used. The user reservation may be overwritten by the
# experiment, if the reservation is set like with nodes or ntasksPerNode.
optionalAttrs (pkgs.config ? garlic.sbatch.reservation) {
inherit (pkgs.config.garlic.sbatch) reservation;
} // {
program = stageProgram stage;
exclusive = true;
time = "02:00:00";
qos = "debug";
jobName = "nbody-tampi";
inherit nixPrefix nodes ntasksPerNode;
}
);
2020-08-12 20:00:04 +08:00
2020-09-02 23:07:09 +08:00
control = {stage, conf, ...}: with conf; w.control {
program = stageProgram stage;
};
srun = {stage, conf, ...}: with conf; w.srun {
program = stageProgram stage;
srunOptions = "--cpu-bind=verbose,socket";
2020-09-02 23:07:09 +08:00
inherit nixPrefix;
2020-08-18 00:50:18 +08:00
};
2020-09-02 23:07:09 +08:00
statspy = {stage, conf, ...}: with conf; w.statspy {
program = stageProgram stage;
};
perf = {stage, conf, ...}: with conf; w.perf {
program = stageProgram stage;
perfArgs = "sched record -a";
};
2020-10-07 15:49:42 +08:00
nixsetup = {stage, conf, ...}: with conf; w.nixsetup {
program = stageProgram stage;
nixsetup = "${nixPrefix}/bin/nix-setup";
};
2020-10-05 22:40:22 +08:00
isolate = {stage, conf, ...}: with conf; w.isolate {
2020-09-02 23:07:09 +08:00
program = stageProgram stage;
2020-10-05 18:33:44 +08:00
clusterName = "mn4";
2020-10-09 01:48:20 +08:00
inherit stage nixPrefix;
2020-09-02 23:07:09 +08:00
};
extrae = {stage, conf, ...}: w.extrae {
program = stageProgram stage;
traceLib = "mpi"; # mpi -> libtracempi.so
configFile = ./extrae.xml;
};
2020-09-21 20:34:08 +08:00
ctf = {stage, conf, ...}: w.argv {
program = stageProgram stage;
env = ''
export NANOS6=ctf
export NANOS6_CTF2PRV=0
'';
};
2020-10-07 15:49:42 +08:00
strace = {stage, conf, ...}: w.strace {
program = stageProgram stage;
};
2020-09-02 23:07:09 +08:00
argv = {stage, conf, ...}: w.argv {
program = stageProgram stage;
2020-10-08 01:01:34 +08:00
#env = ''
# #export I_MPI_PMI_LIBRARY=${bsc.slurm17-libpmi2}/lib/libpmi2.so
# export I_MPI_DEBUG=+1000
# #export I_MPI_FABRICS=shm
2020-10-07 15:49:42 +08:00
2020-10-08 01:01:34 +08:00
# export MPICH_DBG_OUTPUT=VERBOSE
# export MPICH_DBG_CLASS=ALL
# export MPICH_DBG_OUTPUT=stdout
2020-10-07 15:49:42 +08:00
2020-10-08 01:01:34 +08:00
# export FI_LOG_LEVEL=Info
#'';
2020-09-02 23:07:09 +08:00
argv = ''( -t ${toString conf.timesteps}
-p ${toString conf.particles} )'';
};
2020-09-21 20:34:08 +08:00
bscOverlay = import ../../../overlay.nix;
2020-09-16 18:22:55 +08:00
genPkgs = newOverlay: nixpkgs {
overlays = [
bscOverlay
newOverlay
];
2020-09-02 23:07:09 +08:00
};
2020-09-16 18:22:55 +08:00
# We may be able to use overlays by invoking the fix function directly, but we
# have to get the definition of the bsc packages and the garlic ones as
# overlays.
nbodyFn = {stage, conf, ...}: with conf;
let
# We set the mpi implementation to the one specified in the conf, so all
# packages in bsc will use that one.
customPkgs = genPkgs (self: super: {
bsc = super.bsc // { mpi = conf.mpi; };
});
in
customPkgs.bsc.garlic.nbody.override {
inherit cc blocksize mpi gitBranch;
};
2020-10-09 01:00:55 +08:00
experimentFn = w.experiment.override {
2020-10-05 18:33:44 +08:00
nixPrefix = common.nixPrefix;
};
2020-10-08 01:01:34 +08:00
stdStages = [
sbatch
isolate
control
srun
isolate
];
2020-09-02 23:07:09 +08:00
2020-10-08 01:01:34 +08:00
debugStages = with common; []
2020-09-02 23:07:09 +08:00
# Intrumentation with extrae
++ optional enableExtrae extrae
# Optionally profile the next stages with perf
++ optional enablePerf perf
2020-09-21 20:34:08 +08:00
# Optionally profile nanos6 with the new ctf
++ optional enableCtf ctf
2020-10-08 01:01:34 +08:00
# Optionally run the program with strace
++ optional enableStrace strace
;
2020-10-07 15:49:42 +08:00
2020-10-08 01:01:34 +08:00
stages = stdStages ++ debugStages ++ [ argv nbodyFn ];
2020-09-02 23:07:09 +08:00
# List of actual programs to be executed
2020-10-09 01:00:55 +08:00
units = map (conf: w.unit { inherit conf stages; }) configs;
2020-08-12 20:00:04 +08:00
2020-10-09 01:00:55 +08:00
experiment = experimentFn units;
2020-10-05 22:40:22 +08:00
2020-10-07 15:49:42 +08:00
trebuchet = stage: w.trebuchet {
2020-10-05 22:40:22 +08:00
program = stageProgram stage;
nixPrefix = common.nixPrefix;
2020-10-09 01:48:20 +08:00
#experiment = experiment;
inherit stage;
2020-10-05 22:40:22 +08:00
};
isolatedRun = stage: isolate {
inherit stage;
conf = common;
};
2020-10-09 01:00:55 +08:00
final = trebuchet (isolatedRun experiment);
2020-10-05 22:40:22 +08:00
2020-08-10 21:27:46 +08:00
in
2020-09-02 23:07:09 +08:00
# We simply run each program one after another
2020-10-05 22:40:22 +08:00
#launch jobs
final
2020-10-09 01:00:55 +08:00
#jobs