localPackages.hpcstat: init export

This commit is contained in:
2024-05-05 19:07:16 +08:00
parent 422c1f7615
commit 7533ef13cc
10 changed files with 174 additions and 19 deletions

View File

@@ -1,13 +1,7 @@
{ stdenv, src }: stdenv.mkDerivation
{ stdenv, src, cmake, pkg-config }: stdenv.mkDerivation
{
name = "date";
inherit src;
phases = [ "installPhase" ];
installPhase =
''
runHook preInstall
mkdir -p $out
cp -r $src/{include,src} $out
runHook postInstall
'';
nativeBuildInputs = [ cmake pkg-config ];
cmakeFlags = [ "-DBUILD_TZ_LIB=ON" "-DUSE_SYSTEM_TZ_DB=ON" ];
}

View File

@@ -85,7 +85,7 @@ inputs: rec
{ src = inputs.topInputs.kylin-virtual-keyboard; };
biu = inputs.pkgs.callPackage ./biu { inherit nameof; };
zxorm = inputs.pkgs.callPackage ./zxorm { src = inputs.topInputs.zxorm; };
hpcstat = inputs.pkgs.callPackage ./hpcstat { inherit nameof sqlite-orm zpp-bits; };
hpcstat = inputs.pkgs.callPackage ./hpcstat { inherit nameof sqlite-orm zpp-bits date openxlsx; };
openxlsx = inputs.pkgs.callPackage ./openxlsx { src = inputs.topInputs.openxlsx; };
sqlite-orm = inputs.pkgs.callPackage ./sqlite-orm { src = inputs.topInputs.sqlite-orm; };

View File

@@ -16,12 +16,14 @@ find_package(nlohmann_json REQUIRED)
find_path(ZPP_BITS_INCLUDE_DIR zpp_bits.h REQUIRED)
find_package(range-v3 REQUIRED)
find_path(NAMEOF_INCLUDE_DIR nameof.hpp REQUIRED)
find_package(date REQUIRED)
find_package(OpenXLSX REQUIRED)
add_executable(hpcstat src/main.cpp src/env.cpp src/keys.cpp src/ssh.cpp src/sql.cpp src/lfs.cpp src/common.cpp)
target_compile_features(hpcstat PUBLIC cxx_std_23)
target_include_directories(hpcstat PRIVATE ${PROJECT_SOURCE_DIR}/include ${ZPP_BITS_INCLUDE_DIR})
target_link_libraries(hpcstat PRIVATE fmt::fmt Boost::headers Boost::filesystem sqlite_orm::sqlite_orm
nlohmann_json::nlohmann_json range-v3::range-v3)
nlohmann_json::nlohmann_json range-v3::range-v3 date::date date::date-tz OpenXLSX::OpenXLSX)
install(TARGETS hpcstat RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
install(DIRECTORY share/ DESTINATION ${CMAKE_INSTALL_DATADIR}/hpcstat)

View File

@@ -1,11 +1,11 @@
{
stdenv, cmake, pkg-config, standalone ? false, makeWrapper,
boost, fmt, sqlite-orm, nlohmann_json, zpp-bits, range-v3, nameof, openssh, sqlite
boost, fmt, sqlite-orm, nlohmann_json, zpp-bits, range-v3, nameof, openssh, sqlite, date, openxlsx
}: stdenv.mkDerivation
{
name = "hpcstat";
src = ./.;
buildInputs = [ boost fmt sqlite-orm nlohmann_json zpp-bits range-v3 nameof sqlite ];
buildInputs = [ boost fmt sqlite-orm nlohmann_json zpp-bits range-v3 nameof sqlite date openxlsx ];
nativeBuildInputs = [ cmake pkg-config makeWrapper ];
postInstall =
if standalone then "cp ${openssh}/bin/{ssh-add,ssh-keygen} $out/bin"

View File

@@ -9,13 +9,19 @@
# include <iostream>
# include <regex>
# include <thread>
# include <chrono>
# include <fmt/format.h>
# include <date/date.h>
# include <date/tz.h>
namespace hpcstat
{
// run a program, wait until it exit, return its stdout if it return 0, otherwise nullopt
std::optional<std::string> exec
(std::filesystem::path program, std::vector<std::string> args, std::optional<std::string> stdin = std::nullopt);
(
std::filesystem::path program, std::vector<std::string> args, std::optional<std::string> stdin = std::nullopt,
std::map<std::string, std::string> extra_env = {}
);
// get current time
long now();

View File

@@ -52,4 +52,6 @@ namespace hpcstat::sql
// 三个字符串分别是序列化后的数据,签名,指纹
std::optional<std::vector<std::tuple<std::string, std::string, std::string>>>
verify(std::string old_db, std::string new_db);
// 将某个月份的数据导出到文件
bool export_data(long start_time, long end_time, std::string filename);
}

View File

@@ -4,21 +4,26 @@
namespace hpcstat
{
std::optional<std::string> exec
(std::filesystem::path program, std::vector<std::string> args, std::optional<std::string> stdin)
(
std::filesystem::path program, std::vector<std::string> args, std::optional<std::string> stdin,
std::map<std::string, std::string> extra_env
)
{
namespace bp = boost::process;
bp::ipstream output;
bp::opstream input;
std::unique_ptr<bp::child> process;
bp::environment env = boost::this_process::environment();
for (const auto& [key, value] : extra_env) env[key] = value;
if (stdin)
{
process = std::make_unique<bp::child>
(program.string(), bp::args(args), bp::std_out > output, bp::std_err > stderr, bp::std_in < input);
(program.string(), bp::args(args), bp::std_out > output, bp::std_err > stderr, bp::std_in < input, env);
input << *stdin;
input.pipe().close();
}
else process = std::make_unique<bp::child>
(program.string(), bp::args(args), bp::std_out > output, bp::std_err > stderr, bp::std_in < bp::null);
(program.string(), bp::args(args), bp::std_out > output, bp::std_err > stderr, bp::std_in < bp::null, env);
process->wait();
if (process->exit_code() != 0) return std::nullopt;
std::stringstream ss;

View File

@@ -51,7 +51,8 @@ namespace hpcstat::lfs
auto result = exec
(
boost::process::search_path("bjobs").string(),
{ "-a", "-o", "jobid submit_time stat cpu_used", "-json" }
{ "-a", "-o", "jobid submit_time stat cpu_used", "-json" },
std::nullopt, { { "LSB_DISPLAY_YEAR", "Y" } }
);
!result
)

View File

@@ -14,7 +14,8 @@ int main(int argc, const char** argv)
using namespace std::literals;
std::vector<std::string> args(argv, argv + argc);
if (args.size() == 1) { std::cout << "Usage: hpcstat initdb|login|logout|submitjob|finishjob|verify\n"; return 1; }
if (args.size() == 1)
{ std::cout << "Usage: hpcstat initdb|login|logout|submitjob|finishjob|verify|export\n"; return 1; }
else if (args[1] == "initdb") { if (!sql::initdb()) { std::cerr << "Failed to initialize database\n"; return 1; } }
else if (args[1] == "login")
{
@@ -116,6 +117,21 @@ int main(int argc, const char** argv)
if (!std::apply(ssh::verify, data))
{ std::cerr << fmt::format("Failed to verify data: {}\n", std::get<0>(data)); return 1; }
}
else if (args[1] == "export")
{
if (args.size() < 4) { std::cerr << "Usage: hpcstat export <year> <month>\n"; return 1; }
auto year_n = std::stoi(args[2]), month_n = std::stoi(args[3]);
using namespace std::chrono;
auto begin = sys_seconds(sys_days(month(month_n) / 1 / year_n)).time_since_epoch().count();
auto end = sys_seconds(sys_days(month(month_n) / 1 / year_n + months(1)))
.time_since_epoch().count();
if
(
!sql::export_data
(begin, end, fmt::format("hpcstat-{}-{}.xlsx", year_n, month_n))
)
return 1;
}
else { std::cerr << "Unknown command.\n"; return 1; }
}
catch (...) { std::cerr << boost::current_exception_diagnostic_information() << std::endl; return 1; }

View File

@@ -1,10 +1,12 @@
# include <hpcstat/sql.hpp>
# include <hpcstat/env.hpp>
# include <hpcstat/keys.hpp>
# include <range/v3/range.hpp>
# include <range/v3/view.hpp>
# include <nameof.hpp>
# define SQLITE_ORM_OPTIONAL_SUPPORTED
# include <sqlite_orm/sqlite_orm.h>
# include <OpenXLSX.hpp>
namespace hpcstat::sql
{
@@ -155,4 +157,131 @@ namespace hpcstat::sql
return check_many.operator()<LoginData, LogoutData, SubmitJobData, FinishJobData>(check_many);
}
}
bool export_data(long start_time, long end_time, std::string filename)
{
if (auto conn = connect(); !conn) return false;
else
{
struct StatResult
{
double CpuTime = 0;
unsigned LoginInteractive = 0, LoginNonInteractive = 0, SubmitJob = 0, FinishJobSuccess = 0,
FinishJobFailed = 0;
StatResult& operator+=(const StatResult& rhs)
{
CpuTime += rhs.CpuTime;
LoginInteractive += rhs.LoginInteractive;
LoginNonInteractive += rhs.LoginNonInteractive;
SubmitJob += rhs.SubmitJob;
FinishJobSuccess += rhs.FinishJobSuccess;
FinishJobFailed += rhs.FinishJobFailed;
return *this;
}
};
// Key SubAccount -> StatResult
std::map<std::pair<std::string, std::optional<std::string>>, StatResult> stat;
// CpuTime & FinishJobSuccess & FinishJobFailed
for
(
auto& it : conn->get_all<FinishJobData>(sqlite_orm::where
(sqlite_orm::between(&FinishJobData::Time, start_time, end_time)))
)
{
auto job_in_submit = [&conn](FinishJobData& job) -> std::optional<SubmitJobData>
{
std::optional<SubmitJobData> result;
long submit_date = [&]
{
std::chrono::system_clock::time_point submit_date;
std::stringstream(job.SubmitTime) >> date::parse("%b %d %H:%M:%S %Y", submit_date);
return std::chrono::duration_cast<std::chrono::seconds>(submit_date.time_since_epoch()).count();
}();
auto submit_jobs = conn->get_all<SubmitJobData>
(sqlite_orm::where(sqlite_orm::is_equal(&SubmitJobData::JobId, job.JobId)));
for (auto& job_submit : submit_jobs)
if (auto diff = job.Time - submit_date; std::abs(diff) < 3600)
{
result = job_submit;
if (std::abs(diff) > 60)
std::cerr << fmt::format("large difference found: {} {}\n", job.JobId, diff);
break;
}
return result;
}(it);
std::pair<std::string, std::optional<std::string>> key;
if (!job_in_submit) key = { "", {} };
else key = std::make_pair(job_in_submit->Key, job_in_submit->Subaccount);
stat[key].CpuTime += it.CpuTime / 3600;
if (it.JobResult == "Done") stat[key].FinishJobSuccess++;
else stat[key].FinishJobFailed++;
}
// LoginInteractive & LoginNonInteractive
for
(
auto& it : conn->get_all<LoginData>(sqlite_orm::where
(sqlite_orm::between(&LoginData::Time, start_time, end_time)))
)
{
auto key = std::make_pair(it.Key, it.Subaccount);
if (it.Interactive) stat[key].LoginInteractive++; else stat[key].LoginNonInteractive++;
}
// SubmitJob
for
(
auto& it : conn->get_all<SubmitJobData>(sqlite_orm::where
(sqlite_orm::between(&SubmitJobData::Time, start_time, end_time)))
)
stat[{it.Key,it.Subaccount }].SubmitJob++;
// add all result with subaccount into result without subaccount
std::map<std::string, StatResult> stat_without_subaccount;
for (auto& [key, value] : stat) if (key.second) stat_without_subaccount[key.first] += value;
// remove all result without subaccount
for (auto it = stat.begin(); it != stat.end(); it++)
while (it != stat.end() && !it->first.second) stat.erase(it++);
// write to excel
OpenXLSX::XLDocument doc;
doc.create(filename);
doc.workbook().deleteSheet("Sheet1");
doc.workbook().addWorksheet("Statistics");
auto wks1 = doc.workbook().worksheet("Statistics");
wks1.row(1).values() = std::vector<std::string>
{
"Username", "FingerPrint", "CpuTime", "LoginInteractive", "LoginNonInteractive",
"SubmitJob", "FinishJobSuccess", "FinishJobFailed"
};
for
(
auto [row, it] = std::tuple(2, stat_without_subaccount.begin());
it != stat_without_subaccount.end();
it++, row++
)
wks1.row(row).values() = std::vector<std::string>
{
Keys.contains(it->first) ? Keys[it->first].Username : "(unknown)",
it->first, fmt::format("{:.2f}", it->second.CpuTime),
std::to_string(it->second.LoginInteractive), std::to_string(it->second.LoginNonInteractive),
std::to_string(it->second.SubmitJob), std::to_string(it->second.FinishJobSuccess),
std::to_string(it->second.FinishJobFailed)
};
doc.workbook().addWorksheet("StatisticsWithSubAccount");
auto wks2 = doc.workbook().worksheet("StatisticsWithSubAccount");
wks2.row(1).values() = std::vector<std::string>
{
"Username::SubAccount", "CpuTime", "LoginInteractive", "LoginNonInteractive",
"SubmitJob", "FinishJobSuccess", "FinishJobFailed"
};
for (auto [row, it] = std::tuple(2, stat.begin()); it != stat.end(); it++, row++)
wks2.row(row).values() = std::vector<std::string>
{
(Keys.contains(it->first.first) ? Keys[it->first.first].Username : "(unknown)")
+ "::" + *it->first.second,
fmt::format("{:.2f}", it->second.CpuTime),
std::to_string(it->second.LoginInteractive), std::to_string(it->second.LoginNonInteractive),
std::to_string(it->second.SubmitJob), std::to_string(it->second.FinishJobSuccess),
std::to_string(it->second.FinishJobFailed)
};
doc.save();
return true;
}
}
}