From 796eb0ae508de8900fe9d6ea42e46a095ecc7f4c Mon Sep 17 00:00:00 2001 From: chn Date: Fri, 31 Jan 2025 19:25:07 +0800 Subject: [PATCH] package.info: finish --- modules/services/slurm.nix | 5 +++ packages/default.nix | 2 +- packages/info/CMakeLists.txt | 5 ++- packages/info/src/main.cpp | 83 +++++++++++++++++++++++++++++++++++- 4 files changed, 90 insertions(+), 5 deletions(-) diff --git a/modules/services/slurm.nix b/modules/services/slurm.nix index 3ed77842..73580560 100644 --- a/modules/services/slurm.nix +++ b/modules/services/slurm.nix @@ -160,6 +160,11 @@ inputs: # this make job hang, not sure why # ConstrainDevices=yes ''; + extraPlugstackConfig = + let info = inputs.pkgs.localPackages.info.override { slurm = inputs.config.services.slurm.package; }; in + '' + required ${info}/lib/libinfo.so + ''; }; munge = { enable = true; password = inputs.config.sops.secrets."munge.key".path; }; }; diff --git a/packages/default.nix b/packages/default.nix index 5dbf6524..1d82771a 100644 --- a/packages/default.nix +++ b/packages/default.nix @@ -125,7 +125,7 @@ inputs: rec nglview = inputs.pkgs.python3Packages.callPackage ./nglview.nix { src = inputs.topInputs.self.src.nglview; }; octodns-cloudflare = inputs.pkgs.python3Packages.callPackage ./octodns-cloudflare.nix { src = inputs.topInputs.octodns-cloudflare; }; - info = inputs.pkgs.callPackage ./info.nix { inherit biu; stdenv = inputs.pkgs.clang18Stdenv; }; + info = inputs.pkgs.callPackage ./info { inherit biu; stdenv = inputs.pkgs.clang18Stdenv; }; fromYaml = content: builtins.fromJSON (builtins.readFile (inputs.pkgs.runCommand "toJSON" {} diff --git a/packages/info/CMakeLists.txt b/packages/info/CMakeLists.txt index 7a2665cc..5e1642fb 100644 --- a/packages/info/CMakeLists.txt +++ b/packages/info/CMakeLists.txt @@ -12,10 +12,11 @@ endif() find_package(biu REQUIRED) find_path(SLURM_INCLUDE_PATH slurm/spank.h REQUIRED) -add_library(info src/main.cpp) -target_include_directories(info PRIVATE $ ${SLURM_INCLUDE_PATH}) +add_library(info SHARED src/main.cpp) +target_include_directories(info PRIVATE ${SLURM_INCLUDE_PATH}) target_link_libraries(info PRIVATE biu::biu) target_compile_features(info PUBLIC cxx_std_23) +set_target_properties(info PROPERTIES CXX_VISIBILITY_PRESET hidden) install(TARGETS info LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}) get_property(ImportedTargets DIRECTORY "${CMAKE_SOURCE_DIR}" PROPERTY IMPORTED_TARGETS) diff --git a/packages/info/src/main.cpp b/packages/info/src/main.cpp index 1bddb47b..ca151f46 100644 --- a/packages/info/src/main.cpp +++ b/packages/info/src/main.cpp @@ -1,5 +1,84 @@ -# include +# include -int main() +extern "C" { +# include +# include +// ac argv: configuration count and flags in plugstack.conf +[[gnu::visibility("default")]] int slurm_spank_exit(spank_t spank, int ac, char** argv); } + +int slurm_spank_exit(spank_t spank, int ac, char** argv) +{ + using namespace biu::literals; + if (spank_context() == S_CTX_REMOTE) + { + std::stringstream ss; + ss << "------------------------------------------------------------\n"; + std::uint32_t jid; + auto result = spank_get_item(spank, S_JOB_ID, &jid); + if (result != ESPANK_SUCCESS) ss << "error getting job id: {}\n"_f(int(result)); + else + { + ss << "info for job {}:\n"_f(jid); + job_info_msg_t* job_info; + auto result = slurm_load_job(&job_info, jid, 0); + if (result != SLURM_SUCCESS) ss << "error loading job info: {}\n"_f(slurm_strerror(result)); + else if (job_info->record_count != 1) ss << "record_count {} != 1\n"_f(job_info->record_count); + else + { + auto null_to_empty = [](const char* str) { return str ? str : ""; }; + auto timepoint = [](time_t time) + { return "{:%Y-%m-%d %H:%M:%S}"_f(*std::localtime(&time)); }; + auto timespan = [](time_t time) + { return "{:%H:%M:%S}"_f(std::chrono::seconds(time)); }; + YAML::Node info; + info["Job Id"] = job_info->job_array->job_id; + info["Job Name"] = null_to_empty(job_info->job_array->name); + info["User Id"] = job_info->job_array->user_id; + info["Work Directory"] = null_to_empty(job_info->job_array->work_dir); + info["Partition"] = null_to_empty(job_info->job_array->partition); + info["Submit Time"] = timepoint(job_info->job_array->submit_time); + info["Start Time"] = timepoint(job_info->job_array->start_time); + info["End Time"] = timepoint(job_info->job_array->end_time); + info["Nodes"] = null_to_empty(job_info->job_array->nodes); + info["TREs Allocated"] = null_to_empty(job_info->job_array->tres_alloc_str); + info["GREs Allocated"] = null_to_empty(job_info->job_array->gres_total); + info["Status"] = job_info->job_array->job_state; + job_step_info_response_msg_t* step_info; + auto result = slurm_get_job_steps + (0, jid, NO_VAL, &step_info, 0); + if (result != SLURM_SUCCESS) ss << "error getting job steps: {}\n"_f(slurm_strerror(result)); + else + { + std::vector steps; + for (std::uint32_t i = 0; i < step_info->job_step_count; i++) + { + YAML::Node step; + step["Step Id"] = step_info->job_steps[i].step_id.step_id; + step["Step Name"] = null_to_empty(step_info->job_steps[i].name); + step["Start Time"] = timepoint(step_info->job_steps[i].start_time); + step["Run Time"] = timespan(step_info->job_steps[i].run_time); + step["Command Line"] = null_to_empty(step_info->job_steps[i].submit_line); + step["Status"] = step_info->job_steps[i].state; + steps.push_back(step); + } + info["Steps"] = steps; + } + ss << "------------------------------------------------------------\n" << info; + } + slurm_free_job_info_msg(job_info); + } + slurm_spank_log("%s", ss.str().c_str()); + } + return 0; +} + +[[gnu::visibility("default")]] extern const char* plugin_name; +[[gnu::visibility("default")]] extern const char* plugin_type; +[[gnu::visibility("default")]] extern const unsigned int plugin_version; +[[gnu::visibility("default")]] extern const unsigned int spank_plugin_version; +const char* plugin_name = "info"; +const char* plugin_type = "spank"; +const unsigned int plugin_version = SLURM_VERSION_NUMBER; +const unsigned int spank_plugin_version = 0;