From b06bb55bdd20d8346144c148572804a10f9e3c6f Mon Sep 17 00:00:00 2001 From: chn Date: Tue, 25 Feb 2025 10:21:49 +0800 Subject: [PATCH] packages.info: push notice --- devices/pc/secrets/default.yaml | 9 +- devices/srv1/node0/secrets/default.yaml | 9 +- devices/srv2/node0/secrets/default.yaml | 5 +- modules/services/slurm.nix | 36 ++- packages/biu/include/biu/format.hpp | 3 + packages/biu/include/biu/logger.hpp | 2 + packages/biu/include/biu/logger.tpp | 10 +- packages/info/CMakeLists.txt | 12 +- packages/info/default.nix | 3 +- packages/info/src/main.cpp | 302 ++++++++++++++++-------- 10 files changed, 274 insertions(+), 117 deletions(-) diff --git a/devices/pc/secrets/default.yaml b/devices/pc/secrets/default.yaml index 9649a810..2f17731a 100644 --- a/devices/pc/secrets/default.yaml +++ b/devices/pc/secrets/default.yaml @@ -35,6 +35,9 @@ user: zzn: ENC[AES256_GCM,data:xBSve41JclBYQULPN7yV/1Eyo3u+CHAewVetKHwjvl6Te0kk/+aLx6gs8EpOJGmVaiSAdt6F2ayHXUD8RXXpJIOnnEHk88kqbw==,iv:XPxMLvlVtaZvpWnau5Jwlj/5ty5Zyw4F44ix5G64Z84=,tag:uJfWb0PCebdMtxXMfueULQ==,type:str] wechat2tg: token: ENC[AES256_GCM,data:PrZWR8WiZ7grkpTLqMxwbnkwZttl7n0e1lc1mdHJiFUWq/PqG2wNBC27C58jMg==,iv:02XHhfpN8YPix0REbJDnsBbvCwifbdwBwfuJ2glbvjo=,tag:6aWNqBfwulsjMbl+D6L9vw==,type:str] +telegram: + token: ENC[AES256_GCM,data:fqOn2FiLDWZeTUV3hrLIclHTVE0LBDKUW7BK1bRCe0ni5D+hsbM2NdUPWaT4dQ==,iv:j7zQdnz7x7xqVAA882gyCQdjukOLOEvpJ+h5QdS6IP0=,tag:ypeg5xmiqtQ3n+WoF1mNqQ==,type:str] + chn: ENC[AES256_GCM,data:aNuyajxDw+yD,iv:RIoxl+f8rjXG/CpVROkci3mikE21v2DEbzHl/zHH3yw=,tag:5y9K/Wif1ThhDjld1qjtmA==,type:str] sops: kms: [] gcp_kms: [] @@ -59,8 +62,8 @@ sops: OUlxNjdQaXdXMkZ6bnV1ek4yZ2dpbkEKpKGOAxo5Eef2jtGrg4iSzmGCeg+vTgvu +K8b+O19MIkGMDBm6UbYUPtc/7eqoEZRiTUzNMTmfkLVS4ul5zou9A== -----END AGE ENCRYPTED FILE----- - lastmodified: "2024-12-14T01:12:53Z" - mac: ENC[AES256_GCM,data:Ak+LR+PkQG1g9wwlfLtDN2Dm8GdGfbb0qA9Spb3X0LkdCSFLBWqW0Jf88gHB0j/4HszYVaCAUFs+OlTvTjOtboOCTM7tH6z3dd0sU+EMHeK9cPz9kmDlF1LFFhD8dyqytEwq8/xN2MlTmbVoYQvVoGsrD8tP0B9NBPaQiLMPcrQ=,iv:9DthG+HGB3lCxb85YpfitNw2PWYwpdqWTo660gTOUew=,tag:yAH6o3LkGfvKF1UOdgWyyQ==,type:str] + lastmodified: "2025-02-24T08:24:27Z" + mac: ENC[AES256_GCM,data:BjvmH3V9/Ulmoz07PcS+JTwgUSV5YKnPSmAHDpAjnCURmfadCj5wBtN87W1UbnwqyA1FLQyW3IMWuzgYvOgHtwS8RaqCv04sTkt0X+UWpp5rLh04LLLJwCw4uGYSVNduD79tYinoFYk5Kidg7hTvFg0eZ1SegDTrGhXs8cWCvDY=,iv:EDmo3CxoJwoudPlMo2bn4Pqj1rOwM7nPQnBrM7bLa8Q=,tag:1+uR8yEUumOv8rkHQliu0g==,type:str] pgp: [] unencrypted_suffix: _unencrypted - version: 3.9.1 + version: 3.9.2 diff --git a/devices/srv1/node0/secrets/default.yaml b/devices/srv1/node0/secrets/default.yaml index 241fe697..ffc15e03 100644 --- a/devices/srv1/node0/secrets/default.yaml +++ b/devices/srv1/node0/secrets/default.yaml @@ -25,6 +25,9 @@ users: GROUPIII-2: ENC[AES256_GCM,data:ifWnLx1YEewdviqHK8fdesM3c1m1T4g6twnz1cGv1yc4jit68pQWLrRMivdsM4tUcyU9GKwCaElVlvh+dgyy8EZQPKCbvJX6GA==,iv:T5FWReeZ0QOkGJiNfrVrUBhAhbXxlFQJKqQV2tzw9AQ=,tag:XClXGZDWGuoGxzPW7ne2Pg==,type:str] #ENC[AES256_GCM,data:t8QUVYG4v7fE,iv:N8hDAV7wulPHcfnYTXuZRhb9dQPZqKpfMKK1+ITaZTA=,tag:eKMJDOmqoWWQbv/mm3LaAw==,type:comment] GROUPIII-3: ENC[AES256_GCM,data:VlAA+g7SRZyhPSl0Gd1KS7dCwNgRA/o+d8anN88A7E8bSE1ckeTSp+J4YrbbUlLasLhliOZ/nDC0rti+hckGCrjMwweMorSIWg==,iv:7u1yNrN7uxHCF1MsJ2qt1jyQ0ZYYCYKUHwRff50P9oI=,tag:3raCWjdButfmcdy8mH25Jw==,type:str] +telegram: + token: ENC[AES256_GCM,data:OVbdcyczH4O7TUsTL0fX3fhx9mL+8QQF3b9SIShmH/gwcJ1jy9WtWtx9wHRvFA==,iv:SX/fLPMkqmslHcRlqQQhqwodC0FHhWrpp6GR2eSF/vQ=,tag:0odoc4CpoI6yA08OWxmYRw==,type:str] + chn: ENC[AES256_GCM,data:+jN6IGuQSyWg,iv:VIKdyzhp6N9jvVniIDBlMsgVth7+X/paOc/q6fM38xo=,tag:I0I44OwTWkqt3firl0/vEw==,type:str] sops: kms: [] gcp_kms: [] @@ -49,8 +52,8 @@ sops: OThDMWRsWnVTbzRGTTZqSDBkNWZJMlEKdQ/ipO7O5OvaGa81c2P7fi1ncufueSzX 2njlHHz1gJCtjpktYaVvS6KSYtJoI9oNrF0YN5D/3kKW8TicsSGKaA== -----END AGE ENCRYPTED FILE----- - lastmodified: "2024-09-29T06:38:23Z" - mac: ENC[AES256_GCM,data:n7MVBKCUW4xpIiVO4ysBqlG89LjzpDBx9GJWQTrSenLWV/YrIGUxA6QDlRg7yhqV9ldF9Q7hDve1KHw7OxKRx5ot5OZiD3Bq3TwJfS2DarJ2vi9oc1J+CXXach8gp3m4C4RkPJ/y1i3jB2nRfSw5Z/TtdPMbvGXlHh+hhriAqxM=,iv:tyBcXMZzgeUOgYJtU1XkptPOlNoFwH+4z6xTD89aKOw=,tag:apXU989ZL+D8WhWKFTdXTg==,type:str] + lastmodified: "2025-02-25T02:28:19Z" + mac: ENC[AES256_GCM,data:zmRl695i9qZ2PNvHYMFi9SZdETfkpXFaKM20ZRAswLyYS7kbhDHklpDhdoQydCLP07P44aCAGtH/YVugb5RxvRvP+25oDQSOkbD0BFygMFRMTUAVDFZfdT5Fw5wHzAQgCTirR28RExlvFoclc64KLxRInVpELwGJ6p0rk/lWcT0=,iv:C+JyZtbsdXQPygwdsoFBT+WKaiQOziOK1c9YtTcYkh0=,tag:BaPnvA+Gp4Vdh1sNXJvAaA==,type:str] pgp: [] unencrypted_suffix: _unencrypted - version: 3.9.0 + version: 3.9.2 diff --git a/devices/srv2/node0/secrets/default.yaml b/devices/srv2/node0/secrets/default.yaml index 3a189a18..5399a9d5 100644 --- a/devices/srv2/node0/secrets/default.yaml +++ b/devices/srv2/node0/secrets/default.yaml @@ -28,6 +28,7 @@ hpcstat: telegram: token: ENC[AES256_GCM,data:dCDqQhNiuIGJAdbun2uwCBV1smrpvKvwi5AGOs+QWK0ANNVBoSHuUNPeNH2Ivg==,iv:Vcp/OPW8IRPHlqumPxYAfVLtZbdG3rB8VeXM34xBYSk=,tag:vKMihlMdwrPY0XKErtgwIA==,type:str] chat: ENC[AES256_GCM,data:zw2me9Jc7XKl,iv:b699uod4AtF37Ih/9qdQUZN/uhdN+UUeR0ojKogpuTI=,tag:BsDWzbk8175SX6b9ajsPlw==,type:str] + chn: ENC[AES256_GCM,data:mex+blmf0864,iv:H9g7gM4MfSO94XTixAzEItsm06gc7t/4oPh1IPmQuFw=,tag:+7hlOSCmVcwATvM0altAQw==,type:str] wireless: #ENC[AES256_GCM,data:xrg3Wxj/ghbWgg==,iv:6stu7voI5no2Y3YmnMrvTS8hev3eqjoWAyD5zTgyehc=,tag:cxkS7y7S1oM+/SJmlT10fw==,type:comment] 457的5G: ENC[AES256_GCM,data:QjHlyGU4JIYymyh41T+c33T3EOpbqDOoD3U+v6/BzjlWLLeZQXU2hwPCVh4fi2bwn7yNkp4ygAYmFPVPZWoT1A==,iv:Tc6Guzsn5hkjWH6UWSb1KlfWCBXIi2OWdn/wttmCXnQ=,tag:FhyH6JmjSTuqSeFy+GyQhg==,type:str] @@ -55,8 +56,8 @@ sops: M0xoL1dQR0kvMWpzN0RMNWVCTFQxNFUKj9LPjBo5NGOrGYNvu8qZ13PLYjLEWllU LARzEn4XgkeHckouwvxZYMCx7WxmAruRWaOvnxTIczzSNP7wIrqnkA== -----END AGE ENCRYPTED FILE----- - lastmodified: "2025-01-11T11:25:26Z" - mac: ENC[AES256_GCM,data:RFoPSvTM1+sxQNxHVWYw/PYOmIYFiYg81/ICZMsDtQdwRYUzCAoOmJFeWAKNRWRJgRW9cNYvaowcjuLGXGcCoWlepJ9T48G16Id7sL99Y5BHeul9UHsZTF5dWGvL7JoKbJr4lVJBU/oGNtNJib7qe9TO2ts5JYU511acJUBMKx0=,iv:ZZKLZ3wXRR6pi9zZuuizYXm5EvJY90zD9V7Eymz9XOU=,tag:edIQTpwNjGxm1zPQ9pvhuw==,type:str] + lastmodified: "2025-02-25T02:27:56Z" + mac: ENC[AES256_GCM,data:u3BiYAhHporS5+9cMA8LzXAMJoK5UITWljHwcHls8w2saClWz3hlePGEX0jERIRajv0LIUChDmXqGlAT4lCE/ZYxigvAW9Bg5jaJhFd2hvggh76SarGbjmhCoS4f9wAxZszSQJKey4xdI6kpX/yp/qyD6HXsWb2GEuH7JkaQop4=,iv:/tLqD0VSECoTXOXeDvWIjxAeI9pzwJlBDvXhH7uirtk=,tag:m4ZycaTl3IaRq687bopE+Q==,type:str] pgp: [] unencrypted_suffix: _unencrypted version: 3.9.2 diff --git a/modules/services/slurm.nix b/modules/services/slurm.nix index ad6ed87e..aef3d2b4 100644 --- a/modules/services/slurm.nix +++ b/modules/services/slurm.nix @@ -160,11 +160,6 @@ inputs: # this make job hang, not sure why # ConstrainDevices=yes ''; - extraPlugstackConfig = - let info = inputs.pkgs.localPackages.info.override { slurm = inputs.config.services.slurm.package; }; in - '' - required ${info}/lib/libinfo.so - ''; }; munge = { enable = true; password = inputs.config.sops.secrets."munge.key".path; }; }; @@ -206,9 +201,38 @@ inputs: StorageLoc=slurm ''; }; + extraConfig = + let info = inputs.pkgs.localPackages.info.override + { + slurm = inputs.config.services.slurm.package; + configFile = inputs.config.sops.templates."info.yaml".path; + }; + in + '' + PrologSlurmctld=${info}/bin/info + EpilogSlurmctld=${info}/bin/info + ''; }; systemd.tmpfiles.rules = [ "d /var/log/slurmctld 700 slurm slurm" ]; - sops.secrets."slurm/db" = { owner = "slurm"; key = "mariadb/slurm"; }; + sops = + { + secrets = + { + "slurm/db" = { owner = "slurm"; key = "mariadb/slurm"; }; + "telegram/token" = {}; + "telegram/chn" = {}; + }; + templates."info.yaml" = + { + owner = "slurm"; + content = let inherit (inputs.config.sops) placeholder; in builtins.toJSON + { + token = placeholder."telegram/token"; + user.chn = placeholder."telegram/chn"; + slurmConf = "${inputs.config.services.slurm.etcSlurm}/slurm.conf"; + }; + }; + }; nixos = { packages.packages._packages = [ inputs.pkgs.localPackages.sbatch-tui ]; diff --git a/packages/biu/include/biu/format.hpp b/packages/biu/include/biu/format.hpp index 6a47bec6..af89c0ef 100644 --- a/packages/biu/include/biu/format.hpp +++ b/packages/biu/include/biu/format.hpp @@ -6,6 +6,7 @@ # include # include # include +# include namespace biu { @@ -74,4 +75,6 @@ namespace fmt template struct formatter, Char> : basic_ostream_formatter {}; + + template struct formatter : basic_ostream_formatter {}; } diff --git a/packages/biu/include/biu/logger.hpp b/packages/biu/include/biu/logger.hpp index 300634fe..48355f3e 100644 --- a/packages/biu/include/biu/logger.hpp +++ b/packages/biu/include/biu/logger.hpp @@ -61,6 +61,8 @@ namespace biu public: const char* what() const noexcept final {return Message_.c_str();} }; + public: template static void try_exec(Function&& function); + // Monitor the start and end of a function, as well as corresponding thread. // This object should be construct at the beginning of the function, and should never be passed to another // function or thread. diff --git a/packages/biu/include/biu/logger.tpp b/packages/biu/include/biu/logger.tpp index e3bc47f8..0b9fad2e 100644 --- a/packages/biu/include/biu/logger.tpp +++ b/packages/biu/include/biu/logger.tpp @@ -5,6 +5,7 @@ # include # include # include +# include namespace biu { @@ -66,7 +67,14 @@ namespace biu template Logger::Exception::Exception(const std::string& message) { Logger::Guard log(message); - log.print_exception(nameof::nameof_full_type(), message, Stacktrace_, {}); + log.print_exception(nameof::nameof_full_type(), message, Stacktrace_, {}); + } + + template inline void Logger::try_exec(Function&& function) + { + Logger::Guard log; + try { function(); } + catch (...) { log.error(boost::current_exception_diagnostic_information()); } } inline thread_local unsigned Logger::Guard::Indent_ = 0; diff --git a/packages/info/CMakeLists.txt b/packages/info/CMakeLists.txt index f5f73b97..e67e5d2d 100644 --- a/packages/info/CMakeLists.txt +++ b/packages/info/CMakeLists.txt @@ -11,15 +11,17 @@ endif() find_package(biu REQUIRED) find_path(SLURM_INCLUDE_PATH slurm/spank.h REQUIRED) +find_library(SLURM_LIBRARY slurm REQUIRED) find_package(Boost REQUIRED COMPONENTS process) -add_library(info SHARED src/main.cpp) +add_executable(info src/main.cpp) target_include_directories(info PRIVATE ${SLURM_INCLUDE_PATH}) -target_link_libraries(info PUBLIC biu::biu Boost::process) -target_compile_features(info PUBLIC cxx_std_23) -set_target_properties(info PROPERTIES CXX_VISIBILITY_PRESET hidden) -install(TARGETS info LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}) +target_link_libraries(info PRIVATE biu::biu ${SLURM_LIBRARY} Boost::process) +target_compile_features(info PRIVATE cxx_std_23) +target_compile_definitions(info PRIVATE INFO_CONFIG_FILE="${INFO_CONFIG_FILE}") +install(TARGETS info RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) get_property(ImportedTargets DIRECTORY "${CMAKE_SOURCE_DIR}" PROPERTY IMPORTED_TARGETS) message("Imported targets: ${ImportedTargets}") message("List of compile features: ${CMAKE_CXX_COMPILE_FEATURES}") +message("CMake build type: ${CMAKE_BUILD_TYPE}") diff --git a/packages/info/default.nix b/packages/info/default.nix index 69423b26..944719cf 100644 --- a/packages/info/default.nix +++ b/packages/info/default.nix @@ -1,7 +1,8 @@ -{ stdenv, cmake, pkg-config, slurm, biu }: stdenv.mkDerivation +{ lib, stdenv, cmake, pkg-config, configFile ? null, slurm, biu }: stdenv.mkDerivation { name = "info"; src = ./.; buildInputs = [ slurm biu ]; nativeBuildInputs = [ cmake pkg-config ]; + cmakeFlags = lib.optional (configFile != null) [ "-DINFO_CONFIG_FILE=${configFile}" ]; } diff --git a/packages/info/src/main.cpp b/packages/info/src/main.cpp index 6647d9c0..587527e5 100644 --- a/packages/info/src/main.cpp +++ b/packages/info/src/main.cpp @@ -1,13 +1,132 @@ # include +# include +# include +# include # include # include +# ifndef INFO_CONFIG_FILE +# define INFO_CONFIG_FILE "/etc/info.yaml" +# endif -extern "C" +int main() { -# include -# include -// ac argv: configuration count and flags in plugstack.conf -[[gnu::visibility("default")]] int slurm_spank_job_epilog(spank_t spank, int ac, char** argv); + using namespace biu::literals; + biu::Logger::init(std::make_shared("/var/log/slurmctld/info.log", std::ios::app), + biu::Logger::Level::Info); + biu::Logger::Guard log; + biu::Logger::try_exec([] + { + // 读取配置 + std::string token; + std::map user_map; + std::string slurm_conf; + std::map context_map + { + { "prolog_slurmctld", "RUN" }, + { "epilog_slurmctld", "END" } + }; + { + auto config = YAML::LoadFile(INFO_CONFIG_FILE); + token = config["token"].as(); + user_map = config["user"].as>(); + slurm_conf = config["slurmConf"].as(); + } + + // 读取用户名、任务 id、阶段 + std::string user; + std::uint32_t jid; + std::string context; + { + auto user_cstr = std::getenv("SLURM_JOB_USER"); + if (!user_cstr) throw std::runtime_error("SLURM_JOB_USER not found"); + user = user_cstr; + if (!user_map.contains(user)) return; + auto jid_cstr = std::getenv("SLURM_JOB_ID"); + if (!jid_cstr) throw std::runtime_error("SLURM_JOB_ID not found"); + jid = std::stoul(jid_cstr); + auto context_cstr = std::getenv("SLURM_SCRIPT_CONTEXT"); + if (!context_cstr) throw std::runtime_error("SLURM_SCRIPT_CONTEXT not found"); + if (!context_map.contains(context_cstr)) throw std::runtime_error("unknown SLURM_SCRIPT_CONTEXT"); + context = context_cstr; + } + + // 从 slurm 处查询信息 + YAML::Node info; + { + job_info_msg_t* job_info; + slurm_init(slurm_conf.c_str()); + auto slurm_result = slurm_load_job(&job_info, jid, 0); + if (slurm_result != SLURM_SUCCESS) throw std::runtime_error("slurm_load_job failed: {}"_f(slurm_strerror(slurm_result))); + else if (job_info->record_count != 1) throw std::runtime_error("job_info->record_count != 1"); + else + { + auto null_to_empty = [](const char* str) { return str ? str : ""; }; + auto timepoint = [](time_t time) + { return "{:%Y-%m-%d %H:%M:%S}"_f(*std::localtime(&time)); }; + auto get_status = [](int code) + { return std::vector{ "{}"_f(job_states(code & 0xff)), "{:#x}"_f(code) }; }; + info["Job Id"] = job_info->job_array->job_id; + info["Job Name"] = null_to_empty(job_info->job_array->name); + info["Working Directory"] = null_to_empty(job_info->job_array->work_dir); + info["Output File"] = null_to_empty(job_info->job_array->std_out); + info["Partition"] = null_to_empty(job_info->job_array->partition); + info["Submit Time"] = timepoint(job_info->job_array->submit_time); + info["Start Time"] = timepoint(job_info->job_array->start_time); + if (context == "epilog_slurmctld") info["End Time"] = timepoint(job_info->job_array->end_time); + info["Nodes"] = null_to_empty(job_info->job_array->nodes); + info["TREs Allocated"] = null_to_empty(job_info->job_array->tres_alloc_str); + info["GREs Allocated"] = null_to_empty(job_info->job_array->gres_total); + if (context == "epilog_slurmctld") info["Exit Code"] = job_info->job_array->exit_code; + info["Status"] = get_status(job_info->job_array->job_state); + info["Status"].SetStyle(YAML::EmitterStyle::Flow); + } + slurm_free_job_info_msg(job_info); + slurm_fini(); + } + + // 从 slurmdbd 处查询信息 + // if (context == "epilog_slurmctld") + // { + // slurm_init(slurm_conf.c_str()); + // uint16_t conn_flags = 0; + // auto conn = slurmdb_connection_get(&conn_flags); + // if (!conn || errno != SLURM_SUCCESS) throw std::runtime_error("slurmdb_connection_get failed."); + + // 构造查询 + // slurmdb_job_cond_t* query = new slurmdb_job_cond_t; + // query->step_list = slurm_list_create(slurm_destroy_selected_step); + // slurm_selected_step_t* step = new slurm_selected_step_t; + // step->step_id.step_het_comp = NO_VAL; + // step->step_id.step_id = NO_VAL; + // step->step_id.job_id = jid; + // step->array_task_id = NO_VAL; + // step->het_job_offset = NO_VAL; + // step->array_bitmap = nullptr; + // slurm_list_append(query->step_list, step); + // // 查询 + // auto result = slurmdb_jobs_get(conn, query); + // if (slurm_list_count(result) != 1) throw std::runtime_error("slurmdb_jobs_get failed."); + // auto data = reinterpret_cast(slurm_list_pop(result)); + // // 读取需要的信息并清理 + // slurm_list_destroy(result); + // slurmdb_destroy_job_cond(query); + // info["aaaa"] = data->uid; + // slurmdb_destroy_job_rec(data); + + // auto close_result = slurmdb_connection_close(&conn); + // if (close_result != SLURM_SUCCESS) throw std::runtime_error("slurmdb_connection_close failed."); + // } + + // 发送消息 + { + TgBot::Bot bot(token); + std::stringstream ss; + ss << "{} {} {}\n"_f(context, info["Job Id"], info["Job Name"]); + ss << "
{}
"_f(info); + bot.getApi().sendMessage + (user_map[user], ss.str(), nullptr, nullptr, nullptr, "HTML"); + } + }); } struct switch_user @@ -23,94 +142,85 @@ struct switch_user } }; -int slurm_spank_job_epilog(spank_t spank, int ac, char** argv) -{ - using namespace biu::literals; - auto [info, outfile, uid, gid] = [&] - { - std::stringstream ss; - std::optional outfile; - ss << "------------------------------------------------------------\n"; - std::uint32_t jid, uid = -1, gid = -1; - auto result = spank_get_item(spank, S_JOB_ID, &jid); - if (result != ESPANK_SUCCESS) ss << "error getting job id: {}\n"_f(int(result)); - else - { - ss << "info for job {}:\n"_f(jid); - YAML::Node info; - - // gather info from slurmctld - job_info_msg_t* job_info; - slurm_init(nullptr); - auto result = slurm_load_job(&job_info, jid, 0); - if (result != SLURM_SUCCESS) ss << "error loading job info: {}\n"_f(slurm_strerror(result)); - else if (job_info->record_count != 1) ss << "record_count {} != 1\n"_f(job_info->record_count); - else - { - auto null_to_empty = [](const char* str) { return str ? str : ""; }; - auto timepoint = [](time_t time) - { return "{:%Y-%m-%d %H:%M:%S}"_f(*std::localtime(&time)); }; - auto timespan = [](time_t time) - { return "{:%H:%M:%S}"_f(std::chrono::seconds(time)); }; - auto get_status = [](int code) - { return std::vector{ "{}"_f(job_states(code & 0xff)), "{:#x}"_f(code) }; }; - info["Job Id"] = job_info->job_array->job_id; - info["Job Name"] = null_to_empty(job_info->job_array->name); - info["User Id"] = job_info->job_array->user_id; - info["Work Directory"] = null_to_empty(job_info->job_array->work_dir); - info["Output File"] = null_to_empty(job_info->job_array->std_out); - info["Partition"] = null_to_empty(job_info->job_array->partition); - info["Submit Time"] = timepoint(job_info->job_array->submit_time); - info["Start Time"] = timepoint(job_info->job_array->start_time); - info["End Time"] = timepoint(job_info->job_array->end_time); - info["Nodes"] = null_to_empty(job_info->job_array->nodes); - info["TREs Allocated"] = null_to_empty(job_info->job_array->tres_alloc_str); - info["GREs Allocated"] = null_to_empty(job_info->job_array->gres_total); - info["Exit Code"] = job_info->job_array->exit_code; - info["Status"] = get_status(job_info->job_array->job_state); - info["Status"].SetStyle(YAML::EmitterStyle::Flow); - info["Context"] = "{}"_f(spank_context()); - info["Remote"] = spank_remote(spank); - if (job_info->job_array->std_out != nullptr) outfile = job_info->job_array->std_out; - uid = job_info->job_array->user_id; - gid = job_info->job_array->group_id; - } - slurm_free_job_info_msg(job_info); - slurm_fini(); - - ss << "------------------------------------------------------------\n" << info << '\n'; - } - return std::tuple(ss.str(), outfile, uid, gid); - }(); - slurm_spank_log("%s", info.c_str()); - if (outfile) - { - try - { - boost::asio::io_context context; - boost::system::error_code ec; - boost::asio::writable_pipe wp{context}; - boost::process::v2::process proc - ( - context, "/run/current-system/sw/bin/tee", { "-a", outfile->c_str() }, - boost::process::v2::process_stdio{wp, nullptr, nullptr}, switch_user(uid, gid) - ); - boost::asio::write(wp, boost::asio::buffer(info)); - wp.close(); - proc.wait(); - } - catch (boost::system::system_error& e) { slurm_spank_log("boost error writing to output file: %s", e.what()); } - catch (std::exception& e) { slurm_spank_log("error writing to output file: %s", e.what()); } - catch (...) { slurm_spank_log("error writing to output file"); } - } - return 0; -} - -[[gnu::visibility("default")]] extern const char* plugin_name; -[[gnu::visibility("default")]] extern const char* plugin_type; -[[gnu::visibility("default")]] extern const unsigned int plugin_version; -[[gnu::visibility("default")]] extern const unsigned int spank_plugin_version; -const char* plugin_name = "info"; -const char* plugin_type = "spank"; -const unsigned int plugin_version = SLURM_VERSION_NUMBER; -const unsigned int spank_plugin_version = 0; +// int slurm_spank_job_epilog(spank_t spank, int ac, char** argv) +// { +// using namespace biu::literals; +// auto [info, outfile, uid, gid] = [&] +// { +// std::stringstream ss; +// std::optional outfile; +// ss << "------------------------------------------------------------\n"; +// std::uint32_t jid, uid = -1, gid = -1; +// auto result = spank_get_item(spank, S_JOB_ID, &jid); +// if (result != ESPANK_SUCCESS) ss << "error getting job id: {}\n"_f(int(result)); +// else +// { +// ss << "info for job {}:\n"_f(jid); +// YAML::Node info; +// +// // gather info from slurmctld +// job_info_msg_t* job_info; +// slurm_init(nullptr); +// auto result = slurm_load_job(&job_info, jid, 0); +// if (result != SLURM_SUCCESS) ss << "error loading job info: {}\n"_f(slurm_strerror(result)); +// else if (job_info->record_count != 1) ss << "record_count {} != 1\n"_f(job_info->record_count); +// else +// { +// auto null_to_empty = [](const char* str) { return str ? str : ""; }; +// auto timepoint = [](time_t time) +// { return "{:%Y-%m-%d %H:%M:%S}"_f(*std::localtime(&time)); }; +// auto timespan = [](time_t time) +// { return "{:%H:%M:%S}"_f(std::chrono::seconds(time)); }; +// auto get_status = [](int code) +// { return std::vector{ "{}"_f(job_states(code & 0xff)), "{:#x}"_f(code) }; }; +// info["Job Id"] = job_info->job_array->job_id; +// info["Job Name"] = null_to_empty(job_info->job_array->name); +// info["User Id"] = job_info->job_array->user_id; +// info["Work Directory"] = null_to_empty(job_info->job_array->work_dir); +// info["Output File"] = null_to_empty(job_info->job_array->std_out); +// info["Partition"] = null_to_empty(job_info->job_array->partition); +// info["Submit Time"] = timepoint(job_info->job_array->submit_time); +// info["Start Time"] = timepoint(job_info->job_array->start_time); +// info["End Time"] = timepoint(job_info->job_array->end_time); +// info["Nodes"] = null_to_empty(job_info->job_array->nodes); +// info["TREs Allocated"] = null_to_empty(job_info->job_array->tres_alloc_str); +// info["GREs Allocated"] = null_to_empty(job_info->job_array->gres_total); +// info["Exit Code"] = job_info->job_array->exit_code; +// info["Status"] = get_status(job_info->job_array->job_state); +// info["Status"].SetStyle(YAML::EmitterStyle::Flow); +// info["Context"] = "{}"_f(spank_context()); +// info["Remote"] = spank_remote(spank); +// if (job_info->job_array->std_out != nullptr) outfile = job_info->job_array->std_out; +// uid = job_info->job_array->user_id; +// gid = job_info->job_array->group_id; +// } +// slurm_free_job_info_msg(job_info); +// slurm_fini(); +// +// ss << "------------------------------------------------------------\n" << info << '\n'; +// } +// return std::tuple(ss.str(), outfile, uid, gid); +// }(); +// slurm_spank_log("%s", info.c_str()); +// if (outfile) +// { +// try +// { +// boost::asio::io_context context; +// boost::system::error_code ec; +// boost::asio::writable_pipe wp{context}; +// boost::process::v2::process proc +// ( +// context, "/run/current-system/sw/bin/tee", { "-a", outfile->c_str() }, +// boost::process::v2::process_stdio{wp, nullptr, nullptr}, switch_user(uid, gid) +// ); +// boost::asio::write(wp, boost::asio::buffer(info)); +// wp.close(); +// proc.wait(); +// } +// catch (boost::system::system_error& e) { slurm_spank_log("boost error writing to output file: %s", e.what()); } +// catch (std::exception& e) { slurm_spank_log("error writing to output file: %s", e.what()); } +// catch (...) { slurm_spank_log("error writing to output file"); } +// } +// return 0; +// }