packages.vasp.nvidia: use mpi with slurm support

This commit is contained in:
2025-01-12 14:22:53 +08:00
parent 29c0ff1b9a
commit aca4dea8d2
6 changed files with 76 additions and 13 deletions

View File

@@ -150,8 +150,7 @@ inputs:
);
extraCgroupConfig =
''
# cause job random crash
# ConstrainCores=yes
ConstrainCores=yes
ConstrainRAMSpace=yes
ConstrainSwapSpace=yes
AllowedSwapSpace=20

View File

@@ -39,7 +39,7 @@ inputs: rec
};
nvidia = inputs.pkgs.callPackage ./vasp/nvidia
{
inherit (nvhpcPackages) stdenv hdf5;
inherit (nvhpcPackages) stdenv hdf5 mpi;
inherit src;
vtst = inputs.topInputs.self.src.vtst.patch;
wannier90 = inputs.pkgs.wannier90.overrideAttrs { buildFlags = [ "dynlib" ]; };
@@ -96,13 +96,13 @@ inputs: rec
spectroscopy = inputs.pkgs.callPackage ./spectroscopy.nix { src = inputs.topInputs.spectroscopy; };
mirism = inputs.pkgs.callPackage ./mirism { inherit biu; stdenv = inputs.pkgs.clang18Stdenv; };
vaspberry = inputs.pkgs.callPackage ./vaspberry.nix { src = inputs.topInputs.vaspberry; };
nvhpcStdenv = inputs.pkgs.callPackage ./nvhpcStdenv.nix { src = inputs.topInputs.self.src.nvhpc; };
nvhpcPackages = inputs.pkgs.lib.makeScope inputs.pkgs.newScope (final:
{
stdenv = nvhpcStdenv;
stdenv = inputs.pkgs.callPackage ./nvhpc/stdenv.nix { src = inputs.topInputs.self.src.nvhpc; };
fmt = (inputs.pkgs.fmt.override { inherit (final) stdenv; }).overrideAttrs { doCheck = false; };
hdf5 = inputs.pkgs.hdf5.override
{ inherit (final) stdenv; cppSupport = false; fortranSupport = true; enableShared = false; enableStatic = true; };
mpi = inputs.pkgs.callPackage ./nvhpc/mpi.nix { inherit (final) stdenv; };
});
gccFull = inputs.pkgs.symlinkJoin
{

66
packages/nvhpc/mpi.nix Normal file
View File

@@ -0,0 +1,66 @@
# pick from nixpkgs 24e16d8b21f698cbe372be67b645a1919bfd0d20
{
requireFile, stdenv, lib,
perl, libnl, rdma-core, zlib, numactl, libevent, hwloc, libpsm2, libfabric, pmix, ucx, ucc, prrte
}: stdenv.mkDerivation
{
name = "openmpi";
src = requireFile
{
name = "openmpi-gitclone.tar.gz";
# nix-prefetch-url file://$(pwd)/openmpi-gitclone.tar.gz
sha256 = "05r5x6mgw2f2kcq9vhdkfj42panchzlbpns8qy57y4jsbmabwabi";
message = "Source file not found.";
};
postPatch = ''
patchShebangs ./
${lib.pipe {
sse3 = true;
sse41 = true;
avx = true;
avx2 = stdenv.hostPlatform.avx2Support;
avx512 = stdenv.hostPlatform.avx512Support;
} [
(lib.mapAttrsToList (
option: val: ''
substituteInPlace configure \
--replace-fail \
ompi_cv_op_avx_check_${option}=yes \
ompi_cv_op_avx_check_${option}=${if val then "yes" else "no"}
''
))
(lib.concatStringsSep "\n")
]}
'';
env = {
USER = "nixbld";
HOSTNAME = "localhost";
SOURCE_DATE_EPOCH = "0";
};
buildInputs = [ zlib libevent hwloc libnl numactl pmix ucx ucc prrte rdma-core libpsm2 libfabric ];
nativeBuildInputs = [ perl ];
configureFlags = [
"--enable-mca-dso"
"--enable-mpi-fortran"
"--with-libnl=${lib.getDev libnl}"
"--with-pmix=${lib.getDev pmix}"
"--with-pmix-libdir=${lib.getLib pmix}/lib"
"--with-prrte=${lib.getBin prrte}"
"--enable-sge"
"--enable-mpirun-prefix-by-default"
"--with-cuda=${stdenv.cc.cc}/Linux_x86_64/${stdenv.cc.cc.version}/cuda/${stdenv.cc.cc.passthru.src.cudaVersion}"
"--enable-dlopen"
"--with-psm2=${lib.getDev libpsm2}"
"--with-ofi=${lib.getDev libfabric}"
"--with-ofi-libdir=${lib.getLib libfabric}/lib"
"--with-slurm"
"--with-libevent=${lib.getDev libevent}"
"--with-hwloc=${lib.getDev hwloc}"
];
enableParallelBuilding = true;
doCheck = true;
}

View File

@@ -43,7 +43,6 @@ let
passthru = { inherit src cudaCapability buildEnv runEnv; };
};
compilerDir = "${nvhpc}/Linux_x86_64/${src.version}/compilers";
mpiDir = "${nvhpc}/Linux_x86_64/${src.version}/comm_libs/mpi";
cudaCapability = builtins.concatStringsSep ","
(
(builtins.map (cap: "cc${builtins.replaceStrings ["."] [""] cap}") config.cudaCapabilities)
@@ -53,7 +52,6 @@ let
''
addNvhpcEnv() {
addToSearchPath PATH ${compilerDir}/bin
addToSearchPath PATH ${mpiDir}/bin
addToSearchPath PATH ${gcc.cc}/bin
}
addEnvHooks "$hostOffset" addNvhpcEnv
@@ -62,7 +60,7 @@ let
''
#!${bash}/bin/bash
# make mpirun and nvaccelinfo accessible
export PATH=${compilerDir}/bin:${mpiDir}/bin''${PATH:+:$PATH}
export PATH=${compilerDir}/bin''${PATH:+:$PATH}
# NVPL need this to load libgomp.so (actually libnvomp.so) from nvhpc instead of from gcc
# https://docs.nvidia.com/nvpl/
export LD_LIBRARY_PATH=${compilerDir}/lib''${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}

View File

@@ -223,12 +223,12 @@ int main()
if (state.gpu_scheme_entries[state.gpu_scheme_selected] == "any single GPU")
state.submit_command =
"sbatch --partition={}\n--ntasks=1 --cpus-per-gpu=1 --gpus=1\n--job-name='{}' --output='{}'\n"
"--wrap=\"vasp-nvidia mpirun --bind-to none vasp-{}\""_f
"--wrap=\"vasp-nvidia srun vasp-{}\""_f
(device.GpuPartition, state.job_name, state.output_file, state.vasp_entries[state.vasp_selected]);
else
state.submit_command =
"sbatch --partition={}\n--ntasks=1 --cpus-per-gpu=1 --gpus={}:1\n--job-name='{}' --output='{}'\n"
"--wrap=\"vasp-nvidia mpirun --bind-to none vasp-{}\""_f
"--wrap=\"vasp-nvidia srun vasp-{}\""_f
(
device.GpuPartition, state.gpu_entries[state.gpu_selected],
state.job_name, state.output_file, state.vasp_entries[state.vasp_selected]

View File

@@ -1,6 +1,6 @@
{
stdenv, src, writeShellScriptBin,
rsync, which, wannier90, hdf5, vtst, mkl
rsync, which, wannier90, hdf5, vtst, mkl, mpi
}:
let vasp = stdenv.mkDerivation
{
@@ -16,7 +16,7 @@ let vasp = stdenv.mkDerivation
chmod -R +w src
'';
buildInputs = [ hdf5 wannier90 mkl ];
nativeBuildInputs = [ rsync which ];
nativeBuildInputs = [ rsync which mpi ];
installPhase =
''
mkdir -p $out/bin
@@ -38,7 +38,7 @@ let vasp = stdenv.mkDerivation
};
in writeShellScriptBin "vasp-nvidia"
''
export PATH=${vasp}/bin''${PATH:+:$PATH}
export PATH=${vasp}/bin:${mpi}/bin''${PATH:+:$PATH}
# set OMP_NUM_THREADS if SLURM_CPUS_PER_TASK is set
if [ -z "$OMP_NUM_THREADS" ] && [ -n "$SLURM_CPUS_PER_TASK" ]; then