diff --git a/garlic/exp/fwi/common.nix b/garlic/exp/fwi/common.nix index cbc49a4..e9a6d41 100644 --- a/garlic/exp/fwi/common.nix +++ b/garlic/exp/fwi/common.nix @@ -40,67 +40,70 @@ rec { ntasksPerNode = hw.cpusPerNode; }; - srun = {nextStage, conf, ...}: + exec = {nextStage, conf, ...}: let fwiParams = bsc.apps.fwi.params.override { inherit (conf) nx ny nz; }; - in - stdexp.stdStages.srun { - inherit nextStage conf; - # Now we add some commands to execute before calling srun. These will - # only run in one rank (the first in the list of allocated nodes) - preSrun = '' - export GARLIC_FWI_SRUNDIR=$(pwd) - export GARLIC_FWI_EXECDIR="${conf.tempDir}/out/$GARLIC_USER/$GARLIC_UNIT/$GARLIC_RUN" - mkdir -p "$GARLIC_FWI_EXECDIR" - - export GARLIC_FWI_PARAMS="${fwiParams}/fwi_params.txt" - export GARLIC_FWI_FREQ="${fwiParams}/fwi_frequencies.txt" - - # We cannot change the working directory of srun, so we use a - # subshell to ignore the cd - ( - # Generate the input dataset - >&2 echo "generating the input dataset" - cd "$GARLIC_FWI_EXECDIR" - ${fwiParams}/bin/ModelGenerator \ - -m "$GARLIC_FWI_PARAMS" "$GARLIC_FWI_FREQ" - ) - ''; - - postSrun = optionalString (conf.enableCTF) '' - # Save the traces - mv "$GARLIC_FWI_EXECDIR"/trace_* . - '' + '' - # Remove everything else - rm -rf "$GARLIC_FWI_EXECDIR" - ''; - }; - - exec = {nextStage, conf, ...}: stages.exec { + in stages.exec { inherit nextStage; # FIXME: FWI should allow the I/O directory to be specified as a # parameter pre = '' - # Run fwi at the in a directory with fast local storage - cd "$GARLIC_FWI_EXECDIR" + FWI_SRUNDIR=$(pwd) + FWI_EXECDIR="${conf.tempDir}/out/$GARLIC_USER/$GARLIC_UNIT/$GARLIC_RUN" + FWI_PARAMS="${fwiParams}/fwi_params.txt" + FWI_FREQ="${fwiParams}/fwi_frequencies.txt" + + # Run fwi in a directory with fast local storage + mkdir -p "$FWI_EXECDIR" + cd "$FWI_EXECDIR" + + # Only generate the input if we have the CPU 0 (once per node) + if grep -o 'Cpus_allowed_list:[[:space:]]0' \ + /proc/self/status > /dev/null; + then + FWI_CAPTAIN=1 + fi + + if [ $FWI_CAPTAIN ]; then + >&2 echo "generating the input dataset" + ${fwiParams}/bin/ModelGenerator -m "$FWI_PARAMS" "$FWI_FREQ" + fi echo >&2 "Current dir: $(pwd)" - echo >&2 "Using PARAMS=$GARLIC_FWI_PARAMS and FREQ=$GARLIC_FWI_FREQ" + echo >&2 "Using PARAMS=$FWI_PARAMS and FREQ=$FWI_FREQ" '' + optionalString (conf.enableCTF) '' export NANOS6_CONFIG_OVERRIDE="version.instrument=ctf" ''; argv = [ - ''"$GARLIC_FWI_PARAMS"'' - ''"$GARLIC_FWI_FREQ"'' + ''"$FWI_PARAMS"'' + ''"$FWI_FREQ"'' ] ++ optional (needsBlocksize conf) conf.blocksize ++ [ "-1" # Fordward steps "-1" # Backward steps conf.ioFreq # Write/read frequency ]; + + post = '' + # Go back to the garlic out directory + cd "$FWI_SRUNDIR" + + if [ $FWI_CAPTAIN ]; then + '' + optionalString (conf.enableCTF) '' + # FIXME: We should specify the path in the nanos6 config, so we + # can avoid the race condition while they are generating the + # traces + sleep 3 + + # Save the traces + mv "$FWI_EXECDIR"/trace_* . + '' + '' + rm -rf "$FWI_EXECDIR" + fi + ''; }; apps = bsc.garlic.apps; @@ -117,8 +120,5 @@ rec { inherit fwiParams; }; - pipeline = stdexp.stdPipelineOverride { - # Replace the stdandard srun stage with our own - overrides = { inherit srun; }; - } ++ [ exec program ]; + pipeline = stdexp.stdPipeline ++ [ exec program ]; }