localPackages.hpcstat: use duc to collect disk usage statistic

This commit is contained in:
陈浩南 2024-06-08 15:24:17 +08:00
parent 97b826a53b
commit 1ae27fe593
5 changed files with 109 additions and 46 deletions

View File

@ -22,6 +22,7 @@ if [ -z "${BASHRC_SOURCED-}" ]; then
export HPCSTAT_DATADIR=$HOME/linwei/chn/software/hpcstat/var/lib/hpcstat
export HPCSTAT_SHAREDIR=$HOME/linwei/chn/software/hpcstat/share/hpcstat
export HPCSTAT_SSH_BINDIR=$HOME/linwei/chn/software/hpcstat/bin
export HPCSTAT_DUC_BINDIR=$HOME/linwei/chn/software/hpcstat/bin
export HPCSTAT_BSUB=/opt/ibm/lsfsuite/lsf/10.1/linux2.6-glibc2.3-x86_64/bin/bsub
${HPCSTAT_SSH_BINDIR}/hpcstat login
if [ "$?" -ne 0 ]; then

View File

@ -11,6 +11,7 @@
# include <thread>
# include <chrono>
# include <fstream>
# include <future>
# include <fmt/format.h>
# include <date/date.h>
# include <date/tz.h>

View File

@ -8,11 +8,10 @@ namespace hpcstat::disk
double Total;
std::vector<std::pair<std::string, double>> Teacher; // 已排序
std::vector<std::pair<std::string, double>> Student; // 已排序
long Time;
using serialize = zpp::bits::members<4>;
std::string Time;
};
// 统计当前磁盘使用情况,并存入数据库
bool stat(boost::interprocess::file_lock& lock);
bool stat();
// 从数据库中读取磁盘使用情况
std::optional<Usage> get();
}

View File

@ -2,71 +2,129 @@
# include <hpcstat/env.hpp>
# include <hpcstat/sql.hpp>
// 需要统计的目录,是否统计子目录
std::map<std::string, bool> Directories =
namespace hpcstat::disk
{
{ "caiduanjun", true },
{ "Gaona", true },
{ "huangkai", true },
{ "huangshengli", false },
{ "kangjunyong", true },
{ "lijing", true },
{ "linwei", true },
{ "Lixu", true },
{ "wanghao", false },
{ "wuyaping", true },
{ "wuzhiming", true },
{ "zhanhuahan", false }
};
bool hpcstat::disk::stat(boost::interprocess::file_lock &lock)
{
if (auto homedir = env::env("HOME"); !homedir)
{ std::cerr << "HOME not set\n"; return false; }
else
// 需要统计的目录,是否统计子目录
std::map<std::string, bool> Directories =
{
auto get_size = [](std::string path) -> std::optional<double>
{ "caiduanjun", true },
{ "Gaona", true },
{ "huangkai", true },
{ "huangshengli", false },
{ "kangjunyong", true },
{ "lijing", true },
{ "linwei", true },
{ "Lixu", true },
{ "wanghao", false },
{ "wuyaping", true },
{ "wuzhiming", true },
{ "zhanhuahan", false }
};
bool stat()
{
if (auto homedir = env::env("HOME"); !homedir)
{ std::cerr << "HOME not set\n"; return false; }
else if (auto ducbindir = env::env("HPCSTAT_DUC_BINDIR"); !ducbindir)
{ std::cerr << "HPCSTAT_DUC_BINDIR not set\n"; return false; }
else if (auto datadir = env::env("HPCSTAT_DATADIR"); !datadir)
{ std::cerr << "HPCSTAT_DATADIR not set\n"; return false; }
else if
(
auto result = exec
(
// duc index -d ./duc.db -p ~
fmt::format("{}/duc", *ducbindir),
{ "index", "-d", fmt::format("{}/duc.db", *datadir), "-p", *homedir }
);
!result
)
{ std::cerr << "failed to index\n"; return false; }
else return true;
}
std::optional<Usage> get()
{
std::optional<std::string> homedir, ducbindir, datadir;
if (homedir = env::env("HOME"); !homedir)
{ std::cerr << "HOME not set\n"; return {}; }
else if (ducbindir = env::env("HPCSTAT_DUC_BINDIR"); !ducbindir)
{ std::cerr << "HPCSTAT_DUC_BINDIR not set\n"; return {}; }
else if (datadir = env::env("HPCSTAT_DATADIR"); !datadir)
{ std::cerr << "HPCSTAT_DATADIR not set\n"; return {}; }
auto get_size = [&](std::optional<std::string> path) -> std::optional<double>
{
if (auto result = exec("/usr/bin/du", { "-s", path }); !result)
{ std::cerr << fmt::format("failed to stat {}\n", path); return std::nullopt; }
if
(
auto result = exec
(
// duc ls -d ./duc.db -b -D /data/gpfs01/jykang/linwei/xxx
fmt::format("{}/duc", *ducbindir),
{
"ls", "-d", fmt::format("{}/duc.db", *datadir), "-b", "-D",
fmt::format("{}{}{}", *homedir, path ? "/" : "", path.value_or(""))
}
);
!result
)
{ std::cerr << fmt::format("failed to ls {}\n", path.value_or("home")); return {}; }
else
{
std::smatch match;
if (!std::regex_search(*result, match, std::regex(R"((\d+))")))
{ std::cerr << fmt::format("failed to parse {}\n", *result); return std::nullopt; }
return std::stod(match[1]) / 1024 / 1024;
return std::stod(match[1]) / 1024 / 1024 / 1024;
}
};
auto get_subdir = [](std::string path) -> std::vector<std::string>
auto get_subdir = [&](std::string path) -> std::vector<std::string>
{
std::filesystem::directory_iterator it(path);
std::filesystem::directory_iterator it(*homedir + "/" + path);
std::vector<std::string> result;
for (const auto& entry : it)
if (entry.is_directory()) result.push_back(entry.path().filename().string());
return result;
};
auto get_date = [&]() -> std::optional<std::string>
{
if
(
auto result = exec
(
// duc info -d ./duc.db
fmt::format("{}/duc", *ducbindir),
{ "info", "-d", fmt::format("{}/duc.db", *datadir) });
!result
)
{ std::cerr << fmt::format("failed to get duc info\n"); return {}; }
else
{
std::smatch match;
// search string like 2024-06-08 13:45:19
if (!std::regex_search(*result, match, std::regex(R"((\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}))")))
{ std::cerr << fmt::format("failed to parse {}\n", *result); return {}; }
return match[1];
}
};
Usage usage;
usage.Time = now();
if (auto size = get_size(*homedir); size) usage.Total = *size; else return false;
if (auto size = get_size({})) usage.Total = *size; else return {};
if (auto date = get_date()) usage.Time = *date; else return {};
for (const auto& [dir, recursive] : Directories)
{
if (auto size = get_size(*homedir + "/" + dir); size)
usage.Teacher.push_back({ dir, *size });
else return false;
if (recursive) for (const auto& subdir : get_subdir(*homedir + "/" + dir))
if (!std::filesystem::exists(*homedir + "/" + dir))
{ std::cerr << fmt::format("{} does not exist\n", *homedir + "/" + dir); continue; }
if (auto size = get_size(dir)) usage.Teacher.push_back({ dir, *size });
else return {};
if (recursive) for (const auto& subdir : get_subdir(dir))
{
if (auto size = get_size(*homedir + "/" + dir + "/" + subdir); size)
if (auto size = get_size(dir + "/" + subdir); size)
usage.Student.push_back({ dir + "/" + subdir, *size });
else return false;
else return {};
}
}
std::sort(usage.Teacher.begin(), usage.Teacher.end(),
[](const auto& a, const auto& b) { return a.second > b.second; });
std::sort(usage.Student.begin(), usage.Student.end(),
[](const auto& a, const auto& b) { return a.second > b.second; });
lock.lock();
if (!sql::writedb(sql::DiskStatData{.Stat = serialize(usage),}))
{ std::cerr << "Failed to write to database\n"; return false; }
return true;
return usage;
}
}

View File

@ -59,7 +59,7 @@ int main(int argc, const char** argv)
"\33[2K\rLogged in as {} (Fingerprint: SHA256:{}{}).\n", Keys[*fp].Username, *fp,
sub_account ? fmt::format(" Subaccount {}", *sub_account) : ""
);
if (auto disk_stat = sql::get_disk_stat(); !disk_stat)
if (auto disk_stat = disk::get(); !disk_stat)
std::cerr << "Failed to get disk usage statistic.\n";
else
{
@ -68,13 +68,11 @@ int main(int argc, const char** argv)
percent > 80 ? termcolor::yellow<char> : termcolor::green<char>;
auto bgcolor = percent > 95 ? termcolor::on_red<char> :
percent > 80 ? termcolor::on_yellow<char> : termcolor::on_green<char>;
auto time = std::format("{:%F:%R}", std::chrono::zoned_time(std::chrono::current_zone(),
std::chrono::sys_seconds(std::chrono::seconds(disk_stat->Time))));
std::cout
<< color << "disk usage: " << termcolor::reset
<< bgcolor << termcolor::white
<< fmt::format("{:.1f}% ({:.1f}GB / ~800GB)", percent, disk_stat->Total) << termcolor::reset
<< color << fmt::format(" (estimated, counted at {})\n", time) << termcolor::reset;
<< color << fmt::format(" (estimated, counted at {})\n", disk_stat->Time) << termcolor::reset;
if (percent > 80)
{
std::cout << color << "Top 3 directories owned by teacher:\n";
@ -193,7 +191,13 @@ int main(int argc, const char** argv)
}
else if (args[1] == "version") { std::cout << HPCSTAT_VERSION << std::endl; }
else if (args[1] == "diskstat")
{ if (!disk::stat(lock)) { std::cerr << "Failed to get disk stat\n"; return 1; } }
{
auto stat_thread = std::async(std::launch::async, []{ return disk::stat(); });
std::cout << "Waiting for disk usage statistic to be collected... 0s" << std::flush;
for (unsigned i = 1; stat_thread.wait_for(1s) != std::future_status::ready; i++)
std::cout << fmt::format("\rWaiting for disk usage statistic to be collected... {}s", i) << std::flush;
if (!stat_thread.get()) { std::cerr << "Failed to collect disk usage statistic.\n"; return 1; }
}
else { std::cerr << "Unknown command.\n"; return 1; }
}
catch (...) { std::cerr << boost::current_exception_diagnostic_information() << std::endl; return 1; }