71c06d02da
This workaround stage prevents srun from returning 0 to the upper stages when a signal happens after MPI_Finalize. It writes the return code to a file named .srun.rc.$rank and later checks that exists and contains a 0. When the program is killed, exits with non-zero and the error is propagated to the baywatch stage, which aborts immediately without creating the rc file.
27 lines
362 B
Nix
27 lines
362 B
Nix
{
|
|
stdenv
|
|
, garlicTools
|
|
}:
|
|
{
|
|
nextStage
|
|
}:
|
|
|
|
with garlicTools;
|
|
|
|
stdenv.mkDerivation rec {
|
|
name = "baywatch";
|
|
phases = [ "installPhase" ];
|
|
preferLocalBuild = true;
|
|
dontPatchShebangs = true;
|
|
installPhase = ''
|
|
cat > $out <<'EOF'
|
|
#!/bin/sh -e
|
|
|
|
${stageProgram nextStage}
|
|
echo $? >> .srun.rc.$SLURM_PROCID
|
|
|
|
EOF
|
|
chmod +x $out
|
|
'';
|
|
}
|