#include "PerformanceMonitor.h" #include "DBMgr.h" #define DEFAULT_PFM_CHECK_TIME 5000 // 5s #define NVML_LIB_NAME "libnvidia-ml.so" #define LOCAL_NET_NAME "lo" NAMESPACE_MAS_BEGIN PerformanceMonitor * PerformanceMonitor::_ins = nullptr; tzc::Mutex PerformanceMonitor::_insLock = FALSE; PerformanceMonitor * PerformanceMonitor::GetInstance() { if (!_ins) { _insLock.Lock(); if (!_ins) { _ins = new PerformanceMonitor(); } _insLock.Unlock(); } return _ins; } void PerformanceMonitor::DestoryInstance() { _insLock.Lock(); TZ_delete(_ins); _insLock.Unlock(); } PerformanceMonitor::PerformanceMonitor(): m_inited(FALSE), m_timeCnt(0), m_intervalTime(DEFAULT_PFM_CHECK_TIME), m_cpuUse(0), m_fNvmlInit(nullptr), m_fNvmlShutdown(nullptr), m_fNvmlDeviceGetCount(nullptr), m_fNvmlDeviceGetHandleByIndex(nullptr), m_fNvmlDeviceGetMemoryInfo(nullptr), m_fNvmlDeviceGetUtilizationRate(nullptr), m_fNvmlDeviceGetTemperature(nullptr), m_hdLib(nullptr) { } PerformanceMonitor::~PerformanceMonitor() { this->Dispose(); } /* 参数:time,采集频率,单位秒 */ TZ_INT PerformanceMonitor::Initialize() { if (m_inited) { TZLogWarn("PerformanceMonitor is initialized~~~"); return MEC_OK; } this->initData(); m_hdLib = tzc::SysUtils::LoadLibrary(NVML_LIB_NAME); if (m_hdLib == nullptr) { TZLogError("load dll %s failed!!", NVML_LIB_NAME); return MEC_FAILED; } if (this->loadNvmlFunc(m_fNvmlInit, "nvmlInit") == 0 && this->loadNvmlFunc(m_fNvmlShutdown, "nvmlShutdown") == 0 && this->loadNvmlFunc(m_fNvmlDeviceGetCount, "nvmlDeviceGetCount") == 0 && this->loadNvmlFunc(m_fNvmlDeviceGetHandleByIndex, "nvmlDeviceGetHandleByIndex") == 0 && this->loadNvmlFunc(m_fNvmlDeviceGetMemoryInfo, "nvmlDeviceGetMemoryInfo") == 0 && this->loadNvmlFunc(m_fNvmlDeviceGetUtilizationRate, "nvmlDeviceGetUtilizationRates") == 0 && this->loadNvmlFunc(m_fNvmlDeviceGetTemperature, "nvmlDeviceGetTemperature") == 0) { if (m_fNvmlInit() != nvmlReturn_t::NVML_SUCCESS) { TZLogInfo("call m_fNvmlInit failed!!!"); return MEC_FAILED; } TZLogInfo("Init NVML Lib success!!!!"); this->Start(); m_inited = TRUE; TZLogInfo("PerformanceMonitor Initialize success~~~"); return MEC_OK; } TZLogInfo("Init NVML Lib Failed!!!!!"); return MEC_FAILED; } TZ_INT PerformanceMonitor::Dispose() { if (!m_inited) return MEC_OK; this->StopAndWait(); if (m_hdLib != nullptr) { if (m_fNvmlShutdown != nullptr) { m_fNvmlShutdown(); } tzc::SysUtils::FreeLibrary(m_hdLib); m_hdLib = nullptr; } m_inited = FALSE; TZLogInfo("PerformanceMonitor Dispose success~~~"); return MEC_OK; } void PerformanceMonitor::SetIntervalTime(TZ_INT time) { m_intervalTime = time; TZLogInfo("PerformanceMonitor IntervalTime set to %ds~~~", time); } void PerformanceMonitor::initData() { /* 获取当前cpu各项数据信息 */ FileHelper::GetSysStatData(m_cpuDataInfo); /* 获取内存数据信息 */ FileHelper::GetSysMemData(m_memDataInfo); /* 获取磁盘读取写入数据信息 */ FileHelper::GetSysDiskIOData(m_diskDataList); /* 获取当前网口数据信息 */ FileHelper::GetSysNetBandData(m_netDataList); /* 并将所拥有的当前网口 加载到系统信息参数的m_interFaces */ for (auto & iter : m_netDataList) { InterfaceInfo info; info.Name = iter.NicName; m_interFaces.push_back(info); } /* 获取CPU名称以及核心总数 */ FileHelper::GetCPUInfo(m_cpuInfo); /* 获取网络连接信息 */ FileHelper::GetNetPortInfo(m_netInfos); /* 获取磁盘使用率 */ FileHelper::GetDiskInfo(m_diskInfos); /* 获取GPU信息 */ FileHelper::GetGPUInfo(m_GPUName); TZLogInfo("after initData"); return; } void PerformanceMonitor::Entry() { while (!this->IsStop()) { if (m_timeCnt < m_intervalTime) { tzc::SysUtils::DelayMseconds(100); m_timeCnt += 100; continue; } m_timeCnt = 0; this->collectCPUData(); this->collectMemData(); // this->collectDiskIOData(); this->collectBandWidthRate(); this->collectGPUData(); this->collectCPUInfo(); this->collectNetworkInfo(); this->collectDiskInfo(); this->collectGPUInfo(); m_tbl.Tb_RecordTime = TIME_STAMP_NOW; m_tbl.Tb_Id = INVALID_PRIMARY_KEY; DBMGR->AddOrUpdateTblPerformanceRecord(m_tbl); } } void PerformanceMonitor::collectCPUData() { SysStatInfo data; FileHelper::GetSysStatData(data); /* 计算 */ m_cpuUse = TZ_DOUBLE(data.user - m_cpuDataInfo.user) / TZ_DOUBLE(data.GetCPUTime() - m_cpuDataInfo.GetCPUTime()) * 100; m_cpuDataInfo = data; TZLogDebug(2, "DEBUG:System CPU use (%lf)~~~", m_cpuUse); m_tbl.Tb_Cpu = m_cpuUse; } void PerformanceMonitor::collectMemData() { FileHelper::GetSysMemData(m_memDataInfo); TZLogDebug(2, "DEBUG:System MEM rate (%lf)~~~", m_memDataInfo.GetMemRate()); m_tbl.Tb_Memory = m_memDataInfo.GetMemRate(); } void PerformanceMonitor::collectDiskIOData() { /* !!此处计算的是总体的磁盘使用情况 */ std::list datas; FileHelper::GetSysDiskIOData(datas); TZ_LONG readDiskNum = 0; TZ_LONG writeDiskNum = 0; TZ_LONG collectTime = 0; for (auto & data : datas) { readDiskNum += data.ReadSectionCount; writeDiskNum += data.WriteSectionCount; collectTime = data.collectTime; } TZ_LONG readDiskNum_d = 0; TZ_LONG writeDiskNum_d = 0; TZ_LONG collectTime_d = 0; for (auto & data : m_diskDataList) { readDiskNum_d += data.ReadSectionCount; writeDiskNum_d += data.WriteSectionCount; collectTime_d = data.collectTime; } if ((collectTime - collectTime_d) == 0) return; m_readSpeed = (TZ_DOUBLE(readDiskNum - readDiskNum_d) * 512 / 1024) / (collectTime - collectTime_d); m_writeSpeed = (TZ_FLOAT(writeDiskNum - writeDiskNum_d) * 512 / 1024) / (collectTime - collectTime_d); TZLogDebug(2, "DEBUG:System DISK read speed (%lf)~~~", m_readSpeed); TZLogDebug(2, "DEBUG:System DISK write speed (%lf)~~~", m_writeSpeed); m_diskDataList.clear(); m_diskDataList.swap(datas); // TODO: 暂不需要记录硬盘 I/O } void PerformanceMonitor::collectBandWidthRate() { std::list datas; FileHelper::GetSysNetBandData(datas); m_bandWidthInfoMap.clear(); TZ_DOUBLE totalUse = 0.0; TZ_DOUBLE totalBandWidth = 0.0; for (auto & iter : datas) { for (auto & info : m_netDataList) { if (info.NicName.size() == sizeof(LOCAL_NET_NAME) && info.NicName.find(LOCAL_NET_NAME) == 0) { continue; } if (iter.NicName == info.NicName) { TZ_DOUBLE sendRate = /* 单位: bps */ TZ_DOUBLE((iter.Receive.bytes - info.Receive.bytes + iter.Transmit.bytes - info.Transmit.bytes) * 8) / TZ_DOUBLE(1000000 * (iter.collectTime - info.collectTime)); TZ_DOUBLE BandWidthRate = sendRate / TZ_DOUBLE(iter.BandWidth) * 100; totalUse += sendRate; totalBandWidth += iter.BandWidth; TZLogDebug(2, "DEBUG:System BUF [%s](%lf)~~~", iter.NicName.c_str(), BandWidthRate); m_bandWidthInfoMap.emplace(iter.NicName, BandWidthRate); } } } m_netDataList.clear(); m_netDataList.assign(datas.begin(), datas.end()); m_tbl.Tb_Nic = totalUse / TZ_DOUBLE(totalBandWidth) * 100; TZLogDebug(2, "DEBUG: Total Net Useagr (%d)~~~", m_tbl.Tb_Nic); } void PerformanceMonitor::collectCPUInfo() { FileHelper::GetCPUInfo(m_cpuInfo); } void PerformanceMonitor::collectNetworkInfo() { FileHelper::GetNetPortInfo(m_netInfos); } void PerformanceMonitor::collectDiskInfo() { FileHelper::GetDiskInfo(m_diskInfos); // TODO: 改为挂载 /var/mas2.0 的硬盘 for (auto & i : m_diskInfos) { if (i.MountedOn == "/") { m_tbl.Tb_Disk = atoi(i.UseRate.c_str()); } } } void PerformanceMonitor::collectGPUInfo() { FileHelper::GetGPUInfo(m_GPUName); } void PerformanceMonitor::collectGPUData() { TZ_INT iRet(nvmlReturn_t::NVML_SUCCESS); TZ_Uint32 uDevCnt(0); iRet = m_fNvmlDeviceGetCount(&uDevCnt); if (iRet != nvmlReturn_t::NVML_SUCCESS) { TZLogWarn("Call m_fNvmlDeviceGetCount failed!!!:%d", iRet); this->clearGPUperf(); return; } TZ_Uint64 totalUse = 0; TZ_Uint64 totalMemUse = 0; TZ_Uint64 totalMem = 0; std::vector vecTmpGPUPerf; for (TZ_Uint32 uIndex = 0; uIndex < uDevCnt; ++uIndex) { nvmlDevice_t nvmlDevice; iRet = m_fNvmlDeviceGetHandleByIndex(uIndex, &nvmlDevice); if (iRet != nvmlReturn_t::NVML_SUCCESS) { TZLogWarn("Call m_fNvmlDeviceGetHandleByIndex failed!!!:%d", iRet); continue; } nvmlMemory_t nvmlMemory; iRet = m_fNvmlDeviceGetMemoryInfo(nvmlDevice, &nvmlMemory); if (iRet != nvmlReturn_t::NVML_SUCCESS) { TZLogWarn("Call m_fNvmlDeviceGetMemoryInfo failed!!!:%d", iRet); continue; } nvmlUtilization_t nvmlUtilization; iRet = m_fNvmlDeviceGetUtilizationRate(nvmlDevice, &nvmlUtilization); if (iRet != nvmlReturn_t::NVML_SUCCESS) { TZLogWarn("Call m_fNvmlDeviceGetUtilizationRate failed!!!:%d", iRet); continue; } SysGPUPerf gpuPerf; gpuPerf.gpuIndex = uIndex; gpuPerf.gpuUseRate = nvmlUtilization.gpu; // nvmlUtilization.memory; // TODO: 可以使用这个查询 Mem 使用率 gpuPerf.memTotal = nvmlMemory.total; gpuPerf.memFree = nvmlMemory.free; gpuPerf.memUsed = nvmlMemory.used; totalUse += nvmlUtilization.gpu; totalMem += nvmlMemory.total; totalMemUse += nvmlMemory.used; vecTmpGPUPerf.push_back(gpuPerf); } m_lockGPUPerf.Lock(); m_vGPUPerf = vecTmpGPUPerf; m_lockGPUPerf.Unlock(); m_tbl.Tb_GpuMemory = 100.0 * totalMemUse / totalMem; m_tbl.Tb_GpuUtil = totalUse / m_vGPUPerf.size(); // TODO: 评价使用率暂无更好的计算方法 } void PerformanceMonitor::clearGPUperf() { m_lockGPUPerf.Lock(); m_vGPUPerf.clear(); m_lockGPUPerf.Unlock(); } NAMESPACE_MAS_END