nvml.h 218 KB


  1. /*
  2. * Copyright 1993-2016 NVIDIA Corporation. All rights reserved.
  3. *
  4. * NOTICE TO USER:
  5. *
  6. * This source code is subject to NVIDIA ownership rights under U.S. and
  7. * international Copyright laws. Users and possessors of this source code
  8. * are hereby granted a nonexclusive, royalty-free license to use this code
  9. * in individual and commercial software.
  10. *
  11. * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
  12. * CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
  13. * IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
  14. * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
  15. * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
  16. * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
  17. * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
  18. * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
  19. * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
  20. * OR PERFORMANCE OF THIS SOURCE CODE.
  21. *
  22. * U.S. Government End Users. This source code is a "commercial item" as
  23. * that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
  24. * "commercial computer software" and "commercial computer software
  25. * documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
  26. * and is provided to the U.S. Government only as a commercial end item.
  27. * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
  28. * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
  29. * source code with only those rights set forth herein.
  30. *
  31. * Any use of this source code in individual and commercial software must
  32. * include, in the user documentation and internal comments to the code,
  33. * the above Disclaimer and U.S. Government End Users Notice.
  34. */
  35. /*
  36. NVML API Reference
  37. The NVIDIA Management Library (NVML) is a C-based programmatic interface for monitoring and
  38. managing various states within NVIDIA Tesla &tm; GPUs. It is intended to be a platform for building
  39. 3rd party applications, and is also the underlying library for the NVIDIA-supported nvidia-smi
  40. tool. NVML is thread-safe so it is safe to make simultaneous NVML calls from multiple threads.
  41. API Documentation
  42. Supported platforms:
  43. - Windows: Windows Server 2008 R2 64bit, Windows Server 2012 R2 64bit, Windows 7 64bit, Windows 8 64bit, Windows 10 64bit
  44. - Linux: 32-bit and 64-bit
  45. - Hypervisors: Windows Server 2008R2/2012 Hyper-V 64bit, Citrix XenServer 6.2 SP1+, VMware ESX 5.1/5.5
  46. Supported products:
  47. - Full Support
  48. - All Tesla products, starting with the Fermi architecture
  49. - All Quadro products, starting with the Fermi architecture
  50. - All GRID products, starting with the Kepler architecture
  51. - Selected GeForce Titan products
  52. - Limited Support
  53. - All Geforce products, starting with the Fermi architecture
  54. The NVML library can be found at \%ProgramW6432\%\\"NVIDIA Corporation"\\NVSMI\\ on Windows. It is
  55. not be added to the system path by default. To dynamically link to NVML, add this path to the PATH
  56. environmental variable. To dynamically load NVML, call LoadLibrary with this path.
  57. On Linux the NVML library will be found on the standard library path. For 64 bit Linux, both the 32 bit
  58. and 64 bit NVML libraries will be installed.
  59. Online documentation for this library is available at http://docs.nvidia.com/deploy/nvml-api/index.html
  60. */
  61. #ifndef __nvml_nvml_h__
  62. #define __nvml_nvml_h__
  63. #ifdef __cplusplus
  64. extern "C" {
  65. #endif
  66. /*
  67. * On Windows, set up methods for DLL export
  68. * define NVML_STATIC_IMPORT when using nvml_loader library
  69. */
  70. #if defined _WINDOWS
  71. #if !defined NVML_STATIC_IMPORT
  72. #if defined NVML_LIB_EXPORT
  73. #define DECLDIR __declspec(dllexport)
  74. #else
  75. #define DECLDIR __declspec(dllimport)
  76. #endif
  77. #else
  78. #define DECLDIR
  79. #endif
  80. #else
  81. #define DECLDIR
  82. #endif
  83. /**
  84. * NVML API versioning support
  85. */
  86. #define NVML_API_VERSION 8
  87. #define NVML_API_VERSION_STR "8"
  88. #define nvmlInit nvmlInit_v2
  89. #define nvmlDeviceGetPciInfo nvmlDeviceGetPciInfo_v2
  90. #define nvmlDeviceGetCount nvmlDeviceGetCount_v2
  91. #define nvmlDeviceGetHandleByIndex nvmlDeviceGetHandleByIndex_v2
  92. #define nvmlDeviceGetHandleByPciBusId nvmlDeviceGetHandleByPciBusId_v2
  93. /***************************************************************************************************/
  94. /** @defgroup nvmlDeviceStructs Device Structs
  95. * @{
  96. */
  97. /***************************************************************************************************/
  98. /**
  99. * Special constant that some fields take when they are not available.
  100. * Used when only part of the struct is not available.
  101. *
  102. * Each structure explicitly states when to check for this value.
  103. */
  104. #define NVML_VALUE_NOT_AVAILABLE (-1)
  105. typedef struct nvmlDevice_st* nvmlDevice_t;
  106. /**
  107. * Buffer size guaranteed to be large enough for pci bus id
  108. */
  109. #define NVML_DEVICE_PCI_BUS_ID_BUFFER_SIZE 16
  110. /**
  111. * PCI information about a GPU device.
  112. */
  113. typedef struct nvmlPciInfo_st
  114. {
  115. char busId[NVML_DEVICE_PCI_BUS_ID_BUFFER_SIZE]; //!< The tuple domain:bus:device.function PCI identifier (&amp; NULL terminator)
  116. unsigned int domain; //!< The PCI domain on which the device's bus resides, 0 to 0xffff
  117. unsigned int bus; //!< The bus on which the device resides, 0 to 0xff
  118. unsigned int device; //!< The device's id on the bus, 0 to 31
  119. unsigned int pciDeviceId; //!< The combined 16-bit device id and 16-bit vendor id
  120. // Added in NVML 2.285 API
  121. unsigned int pciSubSystemId; //!< The 32-bit Sub System Device ID
  122. // NVIDIA reserved for internal use only
  123. unsigned int reserved0;
  124. unsigned int reserved1;
  125. unsigned int reserved2;
  126. unsigned int reserved3;
  127. } nvmlPciInfo_t;
  128. /**
  129. * Detailed ECC error counts for a device.
  130. *
  131. * @deprecated Different GPU families can have different memory error counters
  132. * See \ref nvmlDeviceGetMemoryErrorCounter
  133. */
  134. typedef struct nvmlEccErrorCounts_st
  135. {
  136. unsigned long long l1Cache; //!< L1 cache errors
  137. unsigned long long l2Cache; //!< L2 cache errors
  138. unsigned long long deviceMemory; //!< Device memory errors
  139. unsigned long long registerFile; //!< Register file errors
  140. } nvmlEccErrorCounts_t;
  141. /**
  142. * Utilization information for a device.
  143. * Each sample period may be between 1 second and 1/6 second, depending on the product being queried.
  144. */
  145. typedef struct nvmlUtilization_st
  146. {
  147. unsigned int gpu; //!< Percent of time over the past sample period during which one or more kernels was executing on the GPU
  148. unsigned int memory; //!< Percent of time over the past sample period during which global (device) memory was being read or written
  149. } nvmlUtilization_t;
  150. /**
  151. * Memory allocation information for a device.
  152. */
  153. typedef struct nvmlMemory_st
  154. {
  155. unsigned long long total; //!< Total installed FB memory (in bytes)
  156. unsigned long long free; //!< Unallocated FB memory (in bytes)
  157. unsigned long long used; //!< Allocated FB memory (in bytes). Note that the driver/GPU always sets aside a small amount of memory for bookkeeping
  158. } nvmlMemory_t;
  159. /**
  160. * BAR1 Memory allocation Information for a device
  161. */
  162. typedef struct nvmlBAR1Memory_st
  163. {
  164. unsigned long long bar1Total; //!< Total BAR1 Memory (in bytes)
  165. unsigned long long bar1Free; //!< Unallocated BAR1 Memory (in bytes)
  166. unsigned long long bar1Used; //!< Allocated Used Memory (in bytes)
  167. }nvmlBAR1Memory_t;
  168. /**
  169. * Information about running compute processes on the GPU
  170. */
  171. typedef struct nvmlProcessInfo_st
  172. {
  173. unsigned int pid; //!< Process ID
  174. unsigned long long usedGpuMemory; //!< Amount of used GPU memory in bytes.
  175. //! Under WDDM, \ref NVML_VALUE_NOT_AVAILABLE is always reported
  176. //! because Windows KMD manages all the memory and not the NVIDIA driver
  177. } nvmlProcessInfo_t;
  178. /**
  179. * Enum to represent type of bridge chip
  180. */
  181. typedef enum nvmlBridgeChipType_enum
  182. {
  183. NVML_BRIDGE_CHIP_PLX = 0,
  184. NVML_BRIDGE_CHIP_BRO4 = 1
  185. }nvmlBridgeChipType_t;
  186. /**
  187. * Maximum number of NvLink links supported
  188. */
  189. #define NVML_NVLINK_MAX_LINKS 4
  190. /**
  191. * Enum to represent the NvLink utilization counter packet units
  192. */
  193. typedef enum nvmlNvLinkUtilizationCountUnits_enum
  194. {
  195. NVML_NVLINK_COUNTER_UNIT_CYCLES = 0, // count by cycles
  196. NVML_NVLINK_COUNTER_UNIT_PACKETS = 1, // count by packets
  197. NVML_NVLINK_COUNTER_UNIT_BYTES = 2, // count by bytes
  198. // this must be last
  199. NVML_NVLINK_COUNTER_UNIT_COUNT
  200. } nvmlNvLinkUtilizationCountUnits_t;
  201. /**
  202. * Enum to represent the NvLink utilization counter packet types to count
  203. * ** this is ONLY applicable with the units as packets or bytes
  204. * ** as specified in \a nvmlNvLinkUtilizationCountUnits_t
  205. * ** all packet filter descriptions are target GPU centric
  206. * ** these can be "OR'd" together
  207. */
  208. typedef enum nvmlNvLinkUtilizationCountPktTypes_enum
  209. {
  210. NVML_NVLINK_COUNTER_PKTFILTER_NOP = 0x1, // no operation packets
  211. NVML_NVLINK_COUNTER_PKTFILTER_READ = 0x2, // read packets
  212. NVML_NVLINK_COUNTER_PKTFILTER_WRITE = 0x4, // write packets
  213. NVML_NVLINK_COUNTER_PKTFILTER_RATOM = 0x8, // reduction atomic requests
  214. NVML_NVLINK_COUNTER_PKTFILTER_NRATOM = 0x10, // non-reduction atomic requests
  215. NVML_NVLINK_COUNTER_PKTFILTER_FLUSH = 0x20, // flush requests
  216. NVML_NVLINK_COUNTER_PKTFILTER_RESPDATA = 0x40, // responses with data
  217. NVML_NVLINK_COUNTER_PKTFILTER_RESPNODATA = 0x80, // responses without data
  218. NVML_NVLINK_COUNTER_PKTFILTER_ALL = 0xFF // all packets
  219. } nvmlNvLinkUtilizationCountPktTypes_t;
  220. /**
  221. * Struct to define the NVLINK counter controls
  222. */
  223. typedef struct nvmlNvLinkUtilizationControl_st
  224. {
  225. nvmlNvLinkUtilizationCountUnits_t units;
  226. nvmlNvLinkUtilizationCountPktTypes_t pktfilter;
  227. } nvmlNvLinkUtilizationControl_t;
  228. /**
  229. * Enum to represent NvLink queryable capabilities
  230. */
  231. typedef enum nvmlNvLinkCapability_enum
  232. {
  233. NVML_NVLINK_CAP_P2P_SUPPORTED = 0, // P2P over NVLink is supported
  234. NVML_NVLINK_CAP_SYSMEM_ACCESS = 1, // Access to system memory is supported
  235. NVML_NVLINK_CAP_P2P_ATOMICS = 2, // P2P atomics are supported
  236. NVML_NVLINK_CAP_SYSMEM_ATOMICS= 3, // System memory atomics are supported
  237. NVML_NVLINK_CAP_SLI_BRIDGE = 4, // SLI is supported over this link
  238. NVML_NVLINK_CAP_VALID = 5, // Link is supported on this device
  239. // should be last
  240. NVML_NVLINK_CAP_COUNT
  241. } nvmlNvLinkCapability_t;
  242. /**
  243. * Enum to represent NvLink queryable error counters
  244. */
  245. typedef enum nvmlNvLinkErrorCounter_enum
  246. {
  247. NVML_NVLINK_ERROR_DL_REPLAY = 0, // Data link transmit replay error counter
  248. NVML_NVLINK_ERROR_DL_RECOVERY = 1, // Data link transmit recovery error counter
  249. NVML_NVLINK_ERROR_DL_CRC_FLIT = 2, // Data link receive flow control digit CRC error counter
  250. NVML_NVLINK_ERROR_DL_CRC_DATA = 3, // Data link receive data CRC error counter
  251. // this must be last
  252. NVML_NVLINK_ERROR_COUNT
  253. } nvmlNvLinkErrorCounter_t;
  254. /**
  255. * Represents level relationships within a system between two GPUs
  256. * The enums are spaced to allow for future relationships
  257. */
  258. typedef enum nvmlGpuLevel_enum
  259. {
  260. NVML_TOPOLOGY_INTERNAL = 0, // e.g. Tesla K80
  261. NVML_TOPOLOGY_SINGLE = 10, // all devices that only need traverse a single PCIe switch
  262. NVML_TOPOLOGY_MULTIPLE = 20, // all devices that need not traverse a host bridge
  263. NVML_TOPOLOGY_HOSTBRIDGE = 30, // all devices that are connected to the same host bridge
  264. NVML_TOPOLOGY_CPU = 40, // all devices that are connected to the same CPU but possibly multiple host bridges
  265. NVML_TOPOLOGY_SYSTEM = 50, // all devices in the system
  266. // there is purposefully no COUNT here because of the need for spacing above
  267. } nvmlGpuTopologyLevel_t;
  268. /* P2P Capability Index Status*/
  269. typedef enum nvmlGpuP2PStatus_enum
  270. {
  271. NVML_P2P_STATUS_OK = 0,
  272. NVML_P2P_STATUS_CHIPSET_NOT_SUPPORED,
  273. NVML_P2P_STATUS_GPU_NOT_SUPPORTED,
  274. NVML_P2P_STATUS_IOH_TOPOLOGY_NOT_SUPPORTED,
  275. NVML_P2P_STATUS_DISABLED_BY_REGKEY,
  276. NVML_P2P_STATUS_NOT_SUPPORTED,
  277. NVML_P2P_STATUS_UNKNOWN
  278. } nvmlGpuP2PStatus_t;
  279. /* P2P Capability Index*/
  280. typedef enum nvmlGpuP2PCapsIndex_enum
  281. {
  282. NVML_P2P_CAPS_INDEX_READ = 0,
  283. NVML_P2P_CAPS_INDEX_WRITE,
  284. NVML_P2P_CAPS_INDEX_NVLINK,
  285. NVML_P2P_CAPS_INDEX_ATOMICS,
  286. NVML_P2P_CAPS_INDEX_PROP,
  287. NVML_P2P_CAPS_INDEX_UNKNOWN
  288. }nvmlGpuP2PCapsIndex_t;
  289. /**
  290. * Maximum limit on Physical Bridges per Board
  291. */
  292. #define NVML_MAX_PHYSICAL_BRIDGE (128)
  293. /**
  294. * Information about the Bridge Chip Firmware
  295. */
  296. typedef struct nvmlBridgeChipInfo_st
  297. {
  298. nvmlBridgeChipType_t type; //!< Type of Bridge Chip
  299. unsigned int fwVersion; //!< Firmware Version. 0=Version is unavailable
  300. }nvmlBridgeChipInfo_t;
  301. /**
  302. * This structure stores the complete Hierarchy of the Bridge Chip within the board. The immediate
  303. * bridge is stored at index 0 of bridgeInfoList, parent to immediate bridge is at index 1 and so forth.
  304. */
  305. typedef struct nvmlBridgeChipHierarchy_st
  306. {
  307. unsigned char bridgeCount; //!< Number of Bridge Chips on the Board
  308. nvmlBridgeChipInfo_t bridgeChipInfo[NVML_MAX_PHYSICAL_BRIDGE]; //!< Hierarchy of Bridge Chips on the board
  309. }nvmlBridgeChipHierarchy_t;
  310. /**
  311. * Represents Type of Sampling Event
  312. */
  313. typedef enum nvmlSamplingType_enum
  314. {
  315. NVML_TOTAL_POWER_SAMPLES = 0, //!< To represent total power drawn by GPU
  316. NVML_GPU_UTILIZATION_SAMPLES = 1, //!< To represent percent of time during which one or more kernels was executing on the GPU
  317. NVML_MEMORY_UTILIZATION_SAMPLES = 2, //!< To represent percent of time during which global (device) memory was being read or written
  318. NVML_ENC_UTILIZATION_SAMPLES = 3, //!< To represent percent of time during which NVENC remains busy
  319. NVML_DEC_UTILIZATION_SAMPLES = 4, //!< To represent percent of time during which NVDEC remains busy
  320. NVML_PROCESSOR_CLK_SAMPLES = 5, //!< To represent processor clock samples
  321. NVML_MEMORY_CLK_SAMPLES = 6, //!< To represent memory clock samples
  322. // Keep this last
  323. NVML_SAMPLINGTYPE_COUNT
  324. }nvmlSamplingType_t;
  325. /**
  326. * Represents the queryable PCIe utilization counters
  327. */
  328. typedef enum nvmlPcieUtilCounter_enum
  329. {
  330. NVML_PCIE_UTIL_TX_BYTES = 0, // 1KB granularity
  331. NVML_PCIE_UTIL_RX_BYTES = 1, // 1KB granularity
  332. // Keep this last
  333. NVML_PCIE_UTIL_COUNT
  334. } nvmlPcieUtilCounter_t;
  335. /**
  336. * Represents the type for sample value returned
  337. */
  338. typedef enum nvmlValueType_enum
  339. {
  340. NVML_VALUE_TYPE_DOUBLE = 0,
  341. NVML_VALUE_TYPE_UNSIGNED_INT = 1,
  342. NVML_VALUE_TYPE_UNSIGNED_LONG = 2,
  343. NVML_VALUE_TYPE_UNSIGNED_LONG_LONG = 3,
  344. // Keep this last
  345. NVML_VALUE_TYPE_COUNT
  346. }nvmlValueType_t;
  347. /**
  348. * Union to represent different types of Value
  349. */
  350. typedef union nvmlValue_st
  351. {
  352. double dVal; //!< If the value is double
  353. unsigned int uiVal; //!< If the value is unsigned int
  354. unsigned long ulVal; //!< If the value is unsigned long
  355. unsigned long long ullVal; //!< If the value is unsigned long long
  356. }nvmlValue_t;
  357. /**
  358. * Information for Sample
  359. */
  360. typedef struct nvmlSample_st
  361. {
  362. unsigned long long timeStamp; //!< CPU Timestamp in microseconds
  363. nvmlValue_t sampleValue; //!< Sample Value
  364. }nvmlSample_t;
  365. /**
  366. * Represents type of perf policy for which violation times can be queried
  367. */
  368. typedef enum nvmlPerfPolicyType_enum
  369. {
  370. NVML_PERF_POLICY_POWER = 0,
  371. NVML_PERF_POLICY_THERMAL = 1,
  372. NVML_PERF_POLICY_SYNC_BOOST = 2,
  373. // Keep this last
  374. NVML_PERF_POLICY_COUNT
  375. }nvmlPerfPolicyType_t;
  376. /**
  377. * Struct to hold perf policy violation status data
  378. */
  379. typedef struct nvmlViolationTime_st
  380. {
  381. unsigned long long referenceTime; //!< referenceTime represents CPU timestamp in microseconds
  382. unsigned long long violationTime; //!< violationTime in Nanoseconds
  383. }nvmlViolationTime_t;
  384. /** @} */
  385. /***************************************************************************************************/
  386. /** @defgroup nvmlDeviceEnumvs Device Enums
  387. * @{
  388. */
  389. /***************************************************************************************************/
  390. /**
  391. * Generic enable/disable enum.
  392. */
  393. typedef enum nvmlEnableState_enum
  394. {
  395. NVML_FEATURE_DISABLED = 0, //!< Feature disabled
  396. NVML_FEATURE_ENABLED = 1 //!< Feature enabled
  397. } nvmlEnableState_t;
  398. //! Generic flag used to specify the default behavior of some functions. See description of particular functions for details.
  399. #define nvmlFlagDefault 0x00
  400. //! Generic flag used to force some behavior. See description of particular functions for details.
  401. #define nvmlFlagForce 0x01
  402. /**
  403. * * The Brand of the GPU
  404. * */
  405. typedef enum nvmlBrandType_enum
  406. {
  407. NVML_BRAND_UNKNOWN = 0,
  408. NVML_BRAND_QUADRO = 1,
  409. NVML_BRAND_TESLA = 2,
  410. NVML_BRAND_NVS = 3,
  411. NVML_BRAND_GRID = 4,
  412. NVML_BRAND_GEFORCE = 5,
  413. // Keep this last
  414. NVML_BRAND_COUNT
  415. } nvmlBrandType_t;
  416. /**
  417. * Temperature thresholds.
  418. */
  419. typedef enum nvmlTemperatureThresholds_enum
  420. {
  421. NVML_TEMPERATURE_THRESHOLD_SHUTDOWN = 0, // Temperature at which the GPU will shut down
  422. // for HW protection
  423. NVML_TEMPERATURE_THRESHOLD_SLOWDOWN = 1, // Temperature at which the GPU will begin slowdown
  424. // Keep this last
  425. NVML_TEMPERATURE_THRESHOLD_COUNT
  426. } nvmlTemperatureThresholds_t;
  427. /**
  428. * Temperature sensors.
  429. */
  430. typedef enum nvmlTemperatureSensors_enum
  431. {
  432. NVML_TEMPERATURE_GPU = 0, //!< Temperature sensor for the GPU die
  433. // Keep this last
  434. NVML_TEMPERATURE_COUNT
  435. } nvmlTemperatureSensors_t;
  436. /**
  437. * Compute mode.
  438. *
  439. * NVML_COMPUTEMODE_EXCLUSIVE_PROCESS was added in CUDA 4.0.
  440. * Earlier CUDA versions supported a single exclusive mode,
  441. * which is equivalent to NVML_COMPUTEMODE_EXCLUSIVE_THREAD in CUDA 4.0 and beyond.
  442. */
  443. typedef enum nvmlComputeMode_enum
  444. {
  445. NVML_COMPUTEMODE_DEFAULT = 0, //!< Default compute mode -- multiple contexts per device
  446. NVML_COMPUTEMODE_EXCLUSIVE_THREAD = 1, //!< Support Removed
  447. NVML_COMPUTEMODE_PROHIBITED = 2, //!< Compute-prohibited mode -- no contexts per device
  448. NVML_COMPUTEMODE_EXCLUSIVE_PROCESS = 3, //!< Compute-exclusive-process mode -- only one context per device, usable from multiple threads at a time
  449. // Keep this last
  450. NVML_COMPUTEMODE_COUNT
  451. } nvmlComputeMode_t;
  452. /**
  453. * ECC bit types.
  454. *
  455. * @deprecated See \ref nvmlMemoryErrorType_t for a more flexible type
  456. */
  457. #define nvmlEccBitType_t nvmlMemoryErrorType_t
  458. /**
  459. * Single bit ECC errors
  460. *
  461. * @deprecated Mapped to \ref NVML_MEMORY_ERROR_TYPE_CORRECTED
  462. */
  463. #define NVML_SINGLE_BIT_ECC NVML_MEMORY_ERROR_TYPE_CORRECTED
  464. /**
  465. * Double bit ECC errors
  466. *
  467. * @deprecated Mapped to \ref NVML_MEMORY_ERROR_TYPE_UNCORRECTED
  468. */
  469. #define NVML_DOUBLE_BIT_ECC NVML_MEMORY_ERROR_TYPE_UNCORRECTED
  470. /**
  471. * Memory error types
  472. */
  473. typedef enum nvmlMemoryErrorType_enum
  474. {
  475. /**
  476. * A memory error that was corrected
  477. *
  478. * For ECC errors, these are single bit errors
  479. * For Texture memory, these are errors fixed by resend
  480. */
  481. NVML_MEMORY_ERROR_TYPE_CORRECTED = 0,
  482. /**
  483. * A memory error that was not corrected
  484. *
  485. * For ECC errors, these are double bit errors
  486. * For Texture memory, these are errors where the resend fails
  487. */
  488. NVML_MEMORY_ERROR_TYPE_UNCORRECTED = 1,
  489. // Keep this last
  490. NVML_MEMORY_ERROR_TYPE_COUNT //!< Count of memory error types
  491. } nvmlMemoryErrorType_t;
  492. /**
  493. * ECC counter types.
  494. *
  495. * Note: Volatile counts are reset each time the driver loads. On Windows this is once per boot. On Linux this can be more frequent.
  496. * On Linux the driver unloads when no active clients exist. If persistence mode is enabled or there is always a driver
  497. * client active (e.g. X11), then Linux also sees per-boot behavior. If not, volatile counts are reset each time a compute app
  498. * is run.
  499. */
  500. typedef enum nvmlEccCounterType_enum
  501. {
  502. NVML_VOLATILE_ECC = 0, //!< Volatile counts are reset each time the driver loads.
  503. NVML_AGGREGATE_ECC = 1, //!< Aggregate counts persist across reboots (i.e. for the lifetime of the device)
  504. // Keep this last
  505. NVML_ECC_COUNTER_TYPE_COUNT //!< Count of memory counter types
  506. } nvmlEccCounterType_t;
  507. /**
  508. * Clock types.
  509. *
  510. * All speeds are in Mhz.
  511. */
  512. typedef enum nvmlClockType_enum
  513. {
  514. NVML_CLOCK_GRAPHICS = 0, //!< Graphics clock domain
  515. NVML_CLOCK_SM = 1, //!< SM clock domain
  516. NVML_CLOCK_MEM = 2, //!< Memory clock domain
  517. NVML_CLOCK_VIDEO = 3, //!< Video encoder/decoder clock domain
  518. // Keep this last
  519. NVML_CLOCK_COUNT //<! Count of clock types
  520. } nvmlClockType_t;
  521. /**
  522. * Clock Ids. These are used in combination with nvmlClockType_t
  523. * to specify a single clock value.
  524. */
  525. typedef enum nvmlClockId_enum
  526. {
  527. NVML_CLOCK_ID_CURRENT = 0, //!< Current actual clock value
  528. NVML_CLOCK_ID_APP_CLOCK_TARGET = 1, //!< Target application clock
  529. NVML_CLOCK_ID_APP_CLOCK_DEFAULT = 2, //!< Default application clock target
  530. NVML_CLOCK_ID_CUSTOMER_BOOST_MAX = 3, //!< OEM-defined maximum clock rate
  531. //Keep this last
  532. NVML_CLOCK_ID_COUNT //<! Count of Clock Ids.
  533. } nvmlClockId_t;
  534. /**
  535. * Driver models.
  536. *
  537. * Windows only.
  538. */
  539. typedef enum nvmlDriverModel_enum
  540. {
  541. NVML_DRIVER_WDDM = 0, //!< WDDM driver model -- GPU treated as a display device
  542. NVML_DRIVER_WDM = 1 //!< WDM (TCC) model (recommended) -- GPU treated as a generic device
  543. } nvmlDriverModel_t;
  544. /**
  545. * Allowed PStates.
  546. */
  547. typedef enum nvmlPStates_enum
  548. {
  549. NVML_PSTATE_0 = 0, //!< Performance state 0 -- Maximum Performance
  550. NVML_PSTATE_1 = 1, //!< Performance state 1
  551. NVML_PSTATE_2 = 2, //!< Performance state 2
  552. NVML_PSTATE_3 = 3, //!< Performance state 3
  553. NVML_PSTATE_4 = 4, //!< Performance state 4
  554. NVML_PSTATE_5 = 5, //!< Performance state 5
  555. NVML_PSTATE_6 = 6, //!< Performance state 6
  556. NVML_PSTATE_7 = 7, //!< Performance state 7
  557. NVML_PSTATE_8 = 8, //!< Performance state 8
  558. NVML_PSTATE_9 = 9, //!< Performance state 9
  559. NVML_PSTATE_10 = 10, //!< Performance state 10
  560. NVML_PSTATE_11 = 11, //!< Performance state 11
  561. NVML_PSTATE_12 = 12, //!< Performance state 12
  562. NVML_PSTATE_13 = 13, //!< Performance state 13
  563. NVML_PSTATE_14 = 14, //!< Performance state 14
  564. NVML_PSTATE_15 = 15, //!< Performance state 15 -- Minimum Performance
  565. NVML_PSTATE_UNKNOWN = 32 //!< Unknown performance state
  566. } nvmlPstates_t;
  567. /**
  568. * GPU Operation Mode
  569. *
  570. * GOM allows to reduce power usage and optimize GPU throughput by disabling GPU features.
  571. *
  572. * Each GOM is designed to meet specific user needs.
  573. */
  574. typedef enum nvmlGom_enum
  575. {
  576. NVML_GOM_ALL_ON = 0, //!< Everything is enabled and running at full speed
  577. NVML_GOM_COMPUTE = 1, //!< Designed for running only compute tasks. Graphics operations
  578. //!< are not allowed
  579. NVML_GOM_LOW_DP = 2 //!< Designed for running graphics applications that don't require
  580. //!< high bandwidth double precision
  581. } nvmlGpuOperationMode_t;
  582. /**
  583. * Available infoROM objects.
  584. */
  585. typedef enum nvmlInforomObject_enum
  586. {
  587. NVML_INFOROM_OEM = 0, //!< An object defined by OEM
  588. NVML_INFOROM_ECC = 1, //!< The ECC object determining the level of ECC support
  589. NVML_INFOROM_POWER = 2, //!< The power management object
  590. // Keep this last
  591. NVML_INFOROM_COUNT //!< This counts the number of infoROM objects the driver knows about
  592. } nvmlInforomObject_t;
  593. /**
  594. * Return values for NVML API calls.
  595. */
  596. typedef enum nvmlReturn_enum
  597. {
  598. NVML_SUCCESS = 0, //!< The operation was successful
  599. NVML_ERROR_UNINITIALIZED = 1, //!< NVML was not first initialized with nvmlInit()
  600. NVML_ERROR_INVALID_ARGUMENT = 2, //!< A supplied argument is invalid
  601. NVML_ERROR_NOT_SUPPORTED = 3, //!< The requested operation is not available on target device
  602. NVML_ERROR_NO_PERMISSION = 4, //!< The current user does not have permission for operation
  603. NVML_ERROR_ALREADY_INITIALIZED = 5, //!< Deprecated: Multiple initializations are now allowed through ref counting
  604. NVML_ERROR_NOT_FOUND = 6, //!< A query to find an object was unsuccessful
  605. NVML_ERROR_INSUFFICIENT_SIZE = 7, //!< An input argument is not large enough
  606. NVML_ERROR_INSUFFICIENT_POWER = 8, //!< A device's external power cables are not properly attached
  607. NVML_ERROR_DRIVER_NOT_LOADED = 9, //!< NVIDIA driver is not loaded
  608. NVML_ERROR_TIMEOUT = 10, //!< User provided timeout passed
  609. NVML_ERROR_IRQ_ISSUE = 11, //!< NVIDIA Kernel detected an interrupt issue with a GPU
  610. NVML_ERROR_LIBRARY_NOT_FOUND = 12, //!< NVML Shared Library couldn't be found or loaded
  611. NVML_ERROR_FUNCTION_NOT_FOUND = 13, //!< Local version of NVML doesn't implement this function
  612. NVML_ERROR_CORRUPTED_INFOROM = 14, //!< infoROM is corrupted
  613. NVML_ERROR_GPU_IS_LOST = 15, //!< The GPU has fallen off the bus or has otherwise become inaccessible
  614. NVML_ERROR_RESET_REQUIRED = 16, //!< The GPU requires a reset before it can be used again
  615. NVML_ERROR_OPERATING_SYSTEM = 17, //!< The GPU control device has been blocked by the operating system/cgroups
  616. NVML_ERROR_LIB_RM_VERSION_MISMATCH = 18, //!< RM detects a driver/library version mismatch
  617. NVML_ERROR_IN_USE = 19, //!< An operation cannot be performed because the GPU is currently in use
  618. NVML_ERROR_NO_DATA = 20, //!< No data
  619. NVML_ERROR_UNKNOWN = 999 //!< An internal driver error occurred
  620. } nvmlReturn_t;
  621. /**
  622. * Memory locations
  623. *
  624. * See \ref nvmlDeviceGetMemoryErrorCounter
  625. */
  626. typedef enum nvmlMemoryLocation_enum
  627. {
  628. NVML_MEMORY_LOCATION_L1_CACHE = 0, //!< GPU L1 Cache
  629. NVML_MEMORY_LOCATION_L2_CACHE = 1, //!< GPU L2 Cache
  630. NVML_MEMORY_LOCATION_DEVICE_MEMORY = 2, //!< GPU Device Memory
  631. NVML_MEMORY_LOCATION_REGISTER_FILE = 3, //!< GPU Register File
  632. NVML_MEMORY_LOCATION_TEXTURE_MEMORY = 4, //!< GPU Texture Memory
  633. NVML_MEMORY_LOCATION_TEXTURE_SHM = 5, //!< Shared memory
  634. // Keep this last
  635. NVML_MEMORY_LOCATION_COUNT //!< This counts the number of memory locations the driver knows about
  636. } nvmlMemoryLocation_t;
  637. /**
  638. * Causes for page retirement
  639. */
  640. typedef enum nvmlPageRetirementCause_enum
  641. {
  642. NVML_PAGE_RETIREMENT_CAUSE_MULTIPLE_SINGLE_BIT_ECC_ERRORS = 0, //!< Page was retired due to multiple single bit ECC error
  643. NVML_PAGE_RETIREMENT_CAUSE_DOUBLE_BIT_ECC_ERROR = 1, //!< Page was retired due to double bit ECC error
  644. // Keep this last
  645. NVML_PAGE_RETIREMENT_CAUSE_COUNT
  646. } nvmlPageRetirementCause_t;
  647. /**
  648. * API types that allow changes to default permission restrictions
  649. */
  650. typedef enum nvmlRestrictedAPI_enum
  651. {
  652. NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS = 0, //!< APIs that change application clocks, see nvmlDeviceSetApplicationsClocks
  653. //!< and see nvmlDeviceResetApplicationsClocks
  654. NVML_RESTRICTED_API_SET_AUTO_BOOSTED_CLOCKS = 1, //!< APIs that enable/disable Auto Boosted clocks
  655. //!< see nvmlDeviceSetAutoBoostedClocksEnabled
  656. // Keep this last
  657. NVML_RESTRICTED_API_COUNT
  658. } nvmlRestrictedAPI_t;
  659. /** @} */
  660. /***************************************************************************************************/
  661. /** @defgroup nvmlUnitStructs Unit Structs
  662. * @{
  663. */
  664. /***************************************************************************************************/
  665. typedef struct nvmlUnit_st* nvmlUnit_t;
  666. /**
  667. * Description of HWBC entry
  668. */
  669. typedef struct nvmlHwbcEntry_st
  670. {
  671. unsigned int hwbcId;
  672. char firmwareVersion[32];
  673. } nvmlHwbcEntry_t;
  674. /**
  675. * Fan state enum.
  676. */
  677. typedef enum nvmlFanState_enum
  678. {
  679. NVML_FAN_NORMAL = 0, //!< Fan is working properly
  680. NVML_FAN_FAILED = 1 //!< Fan has failed
  681. } nvmlFanState_t;
  682. /**
  683. * Led color enum.
  684. */
  685. typedef enum nvmlLedColor_enum
  686. {
  687. NVML_LED_COLOR_GREEN = 0, //!< GREEN, indicates good health
  688. NVML_LED_COLOR_AMBER = 1 //!< AMBER, indicates problem
  689. } nvmlLedColor_t;
  690. /**
  691. * LED states for an S-class unit.
  692. */
  693. typedef struct nvmlLedState_st
  694. {
  695. char cause[256]; //!< If amber, a text description of the cause
  696. nvmlLedColor_t color; //!< GREEN or AMBER
  697. } nvmlLedState_t;
  698. /**
  699. * Static S-class unit info.
  700. */
  701. typedef struct nvmlUnitInfo_st
  702. {
  703. char name[96]; //!< Product name
  704. char id[96]; //!< Product identifier
  705. char serial[96]; //!< Product serial number
  706. char firmwareVersion[96]; //!< Firmware version
  707. } nvmlUnitInfo_t;
  708. /**
  709. * Power usage information for an S-class unit.
  710. * The power supply state is a human readable string that equals "Normal" or contains
  711. * a combination of "Abnormal" plus one or more of the following:
  712. *
  713. * - High voltage
  714. * - Fan failure
  715. * - Heatsink temperature
  716. * - Current limit
  717. * - Voltage below UV alarm threshold
  718. * - Low-voltage
  719. * - SI2C remote off command
  720. * - MOD_DISABLE input
  721. * - Short pin transition
  722. */
  723. typedef struct nvmlPSUInfo_st
  724. {
  725. char state[256]; //!< The power supply state
  726. unsigned int current; //!< PSU current (A)
  727. unsigned int voltage; //!< PSU voltage (V)
  728. unsigned int power; //!< PSU power draw (W)
  729. } nvmlPSUInfo_t;
  730. /**
  731. * Fan speed reading for a single fan in an S-class unit.
  732. */
  733. typedef struct nvmlUnitFanInfo_st
  734. {
  735. unsigned int speed; //!< Fan speed (RPM)
  736. nvmlFanState_t state; //!< Flag that indicates whether fan is working properly
  737. } nvmlUnitFanInfo_t;
  738. /**
  739. * Fan speed readings for an entire S-class unit.
  740. */
  741. typedef struct nvmlUnitFanSpeeds_st
  742. {
  743. nvmlUnitFanInfo_t fans[24]; //!< Fan speed data for each fan
  744. unsigned int count; //!< Number of fans in unit
  745. } nvmlUnitFanSpeeds_t;
  746. /** @} */
  747. /***************************************************************************************************/
  748. /** @addtogroup nvmlEvents
  749. * @{
  750. */
  751. /***************************************************************************************************/
  752. /**
  753. * Handle to an event set
  754. */
  755. typedef struct nvmlEventSet_st* nvmlEventSet_t;
  756. /** @defgroup nvmlEventType Event Types
  757. * @{
  758. * Event Types which user can be notified about.
  759. * See description of particular functions for details.
  760. *
  761. * See \ref nvmlDeviceRegisterEvents and \ref nvmlDeviceGetSupportedEventTypes to check which devices
  762. * support each event.
  763. *
  764. * Types can be combined with bitwise or operator '|' when passed to \ref nvmlDeviceRegisterEvents
  765. */
  766. //! Event about single bit ECC errors
  767. /**
  768. * \note A corrected texture memory error is not an ECC error, so it does not generate a single bit event
  769. */
  770. #define nvmlEventTypeSingleBitEccError 0x0000000000000001LL
  771. //! Event about double bit ECC errors
  772. /**
  773. * \note An uncorrected texture memory error is not an ECC error, so it does not generate a double bit event
  774. */
  775. #define nvmlEventTypeDoubleBitEccError 0x0000000000000002LL
  776. //! Event about PState changes
  777. /**
  778. * \note On Fermi architecture PState changes are also an indicator that GPU is throttling down due to
  779. * no work being executed on the GPU, power capping or thermal capping. In a typical situation,
  780. * Fermi-based GPU should stay in P0 for the duration of the execution of the compute process.
  781. */
  782. #define nvmlEventTypePState 0x0000000000000004LL
  783. //! Event that Xid critical error occurred
  784. #define nvmlEventTypeXidCriticalError 0x0000000000000008LL
  785. //! Event about clock changes
  786. /**
  787. * Kepler only
  788. */
  789. #define nvmlEventTypeClock 0x0000000000000010LL
  790. //! Mask with no events
  791. #define nvmlEventTypeNone 0x0000000000000000LL
  792. //! Mask of all events
  793. #define nvmlEventTypeAll (nvmlEventTypeNone \
  794. | nvmlEventTypeSingleBitEccError \
  795. | nvmlEventTypeDoubleBitEccError \
  796. | nvmlEventTypePState \
  797. | nvmlEventTypeClock \
  798. | nvmlEventTypeXidCriticalError \
  799. )
  800. /** @} */
  801. /**
  802. * Information about occurred event
  803. */
  804. typedef struct nvmlEventData_st
  805. {
  806. nvmlDevice_t device; //!< Specific device where the event occurred
  807. unsigned long long eventType; //!< Information about what specific event occurred
  808. unsigned long long eventData; //!< Stores last XID error for the device in the event of nvmlEventTypeXidCriticalError,
  809. // eventData is 0 for any other event. eventData is set as 999 for unknown xid error.
  810. } nvmlEventData_t;
  811. /** @} */
  812. /***************************************************************************************************/
  813. /** @addtogroup nvmlClocksThrottleReasons
  814. * @{
  815. */
  816. /***************************************************************************************************/
  817. /** Nothing is running on the GPU and the clocks are dropping to Idle state
  818. * \note This limiter may be removed in a later release
  819. */
  820. #define nvmlClocksThrottleReasonGpuIdle 0x0000000000000001LL
  821. /** GPU clocks are limited by current setting of applications clocks
  822. *
  823. * @see nvmlDeviceSetApplicationsClocks
  824. * @see nvmlDeviceGetApplicationsClock
  825. */
  826. #define nvmlClocksThrottleReasonApplicationsClocksSetting 0x0000000000000002LL
  827. /**
  828. * @deprecated Renamed to \ref nvmlClocksThrottleReasonApplicationsClocksSetting
  829. * as the name describes the situation more accurately.
  830. */
  831. #define nvmlClocksThrottleReasonUserDefinedClocks nvmlClocksThrottleReasonApplicationsClocksSetting
  832. /** SW Power Scaling algorithm is reducing the clocks below requested clocks
  833. *
  834. * @see nvmlDeviceGetPowerUsage
  835. * @see nvmlDeviceSetPowerManagementLimit
  836. * @see nvmlDeviceGetPowerManagementLimit
  837. */
  838. #define nvmlClocksThrottleReasonSwPowerCap 0x0000000000000004LL
  839. /** HW Slowdown (reducing the core clocks by a factor of 2 or more) is engaged
  840. *
  841. * This is an indicator of:
  842. * - temperature being too high
  843. * - External Power Brake Assertion is triggered (e.g. by the system power supply)
  844. * - Power draw is too high and Fast Trigger protection is reducing the clocks
  845. * - May be also reported during PState or clock change
  846. * - This behavior may be removed in a later release.
  847. *
  848. * @see nvmlDeviceGetTemperature
  849. * @see nvmlDeviceGetTemperatureThreshold
  850. * @see nvmlDeviceGetPowerUsage
  851. */
  852. #define nvmlClocksThrottleReasonHwSlowdown 0x0000000000000008LL
  853. /** Sync Boost
  854. *
  855. * This GPU has been added to a Sync boost group with nvidia-smi or DCGM in
  856. * order to maximize performance per watt. All GPUs in the sync boost group
  857. * will boost to the minimum possible clocks across the entire group. Look at
  858. * the throttle reasons for other GPUs in the system to see why those GPUs are
  859. * holding this one at lower clocks.
  860. *
  861. */
  862. #define nvmlClocksThrottleReasonSyncBoost 0x0000000000000010LL
  863. /** Some other unspecified factor is reducing the clocks */
  864. #define nvmlClocksThrottleReasonUnknown 0x8000000000000000LL
  865. /** Bit mask representing no clocks throttling
  866. *
  867. * Clocks are as high as possible.
  868. * */
  869. #define nvmlClocksThrottleReasonNone 0x0000000000000000LL
  870. /** Bit mask representing all supported clocks throttling reasons
  871. * New reasons might be added to this list in the future
  872. */
  873. #define nvmlClocksThrottleReasonAll (nvmlClocksThrottleReasonNone \
  874. | nvmlClocksThrottleReasonGpuIdle \
  875. | nvmlClocksThrottleReasonApplicationsClocksSetting \
  876. | nvmlClocksThrottleReasonSwPowerCap \
  877. | nvmlClocksThrottleReasonHwSlowdown \
  878. | nvmlClocksThrottleReasonSyncBoost \
  879. | nvmlClocksThrottleReasonUnknown \
  880. )
  881. /** @} */
  882. /***************************************************************************************************/
  883. /** @defgroup nvmlAccountingStats Accounting Statistics
  884. * @{
  885. *
  886. * Set of APIs designed to provide per process information about usage of GPU.
  887. *
  888. * @note All accounting statistics and accounting mode live in nvidia driver and reset
  889. * to default (Disabled) when driver unloads.
  890. * It is advised to run with persistence mode enabled.
  891. *
  892. * @note Enabling accounting mode has no negative impact on the GPU performance.
  893. */
  894. /***************************************************************************************************/
  895. /**
  896. * Describes accounting statistics of a process.
  897. */
  898. typedef struct nvmlAccountingStats_st {
  899. unsigned int gpuUtilization; //!< Percent of time over the process's lifetime during which one or more kernels was executing on the GPU.
  900. //! Utilization stats just like returned by \ref nvmlDeviceGetUtilizationRates but for the life time of a
  901. //! process (not just the last sample period).
  902. //! Set to NVML_VALUE_NOT_AVAILABLE if nvmlDeviceGetUtilizationRates is not supported
  903. unsigned int memoryUtilization; //!< Percent of time over the process's lifetime during which global (device) memory was being read or written.
  904. //! Set to NVML_VALUE_NOT_AVAILABLE if nvmlDeviceGetUtilizationRates is not supported
  905. unsigned long long maxMemoryUsage; //!< Maximum total memory in bytes that was ever allocated by the process.
  906. //! Set to NVML_VALUE_NOT_AVAILABLE if nvmlProcessInfo_t->usedGpuMemory is not supported
  907. unsigned long long time; //!< Amount of time in ms during which the compute context was active. The time is reported as 0 if
  908. //!< the process is not terminated
  909. unsigned long long startTime; //!< CPU Timestamp in usec representing start time for the process
  910. unsigned int isRunning; //!< Flag to represent if the process is running (1 for running, 0 for terminated)
  911. unsigned int reserved[5]; //!< Reserved for future use
  912. } nvmlAccountingStats_t;
  913. /** @} */
  914. /***************************************************************************************************/
  915. /** @defgroup nvmlInitializationAndCleanup Initialization and Cleanup
  916. * This chapter describes the methods that handle NVML initialization and cleanup.
  917. * It is the user's responsibility to call \ref nvmlInit() before calling any other methods, and
  918. * nvmlShutdown() once NVML is no longer being used.
  919. * @{
  920. */
  921. /***************************************************************************************************/
  922. /**
  923. * Initialize NVML, but don't initialize any GPUs yet.
  924. *
  925. * \note In NVML 5.319 new nvmlInit_v2 has replaced nvmlInit"_v1" (default in NVML 4.304 and older) that
  926. * did initialize all GPU devices in the system.
  927. *
  928. * This allows NVML to communicate with a GPU
  929. * when other GPUs in the system are unstable or in a bad state. When using this API, GPUs are
  930. * discovered and initialized in nvmlDeviceGetHandleBy* functions instead.
  931. *
  932. * \note To contrast nvmlInit_v2 with nvmlInit"_v1", NVML 4.304 nvmlInit"_v1" will fail when any detected GPU is in
  933. * a bad or unstable state.
  934. *
  935. * For all products.
  936. *
  937. * This method, should be called once before invoking any other methods in the library.
  938. * A reference count of the number of initializations is maintained. Shutdown only occurs
  939. * when the reference count reaches zero.
  940. *
  941. * @return
  942. * - \ref NVML_SUCCESS if NVML has been properly initialized
  943. * - \ref NVML_ERROR_DRIVER_NOT_LOADED if NVIDIA driver is not running
  944. * - \ref NVML_ERROR_NO_PERMISSION if NVML does not have permission to talk to the driver
  945. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  946. */
  947. nvmlReturn_t DECLDIR nvmlInit(void);
  948. /**
  949. * Shut down NVML by releasing all GPU resources previously allocated with \ref nvmlInit().
  950. *
  951. * For all products.
  952. *
  953. * This method should be called after NVML work is done, once for each call to \ref nvmlInit()
  954. * A reference count of the number of initializations is maintained. Shutdown only occurs
  955. * when the reference count reaches zero. For backwards compatibility, no error is reported if
  956. * nvmlShutdown() is called more times than nvmlInit().
  957. *
  958. * @return
  959. * - \ref NVML_SUCCESS if NVML has been properly shut down
  960. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  961. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  962. */
  963. nvmlReturn_t DECLDIR nvmlShutdown(void);
  964. /** @} */
  965. /***************************************************************************************************/
  966. /** @defgroup nvmlErrorReporting Error reporting
  967. * This chapter describes helper functions for error reporting routines.
  968. * @{
  969. */
  970. /***************************************************************************************************/
  971. /**
  972. * Helper method for converting NVML error codes into readable strings.
  973. *
  974. * For all products.
  975. *
  976. * @param result NVML error code to convert
  977. *
  978. * @return String representation of the error.
  979. *
  980. */
  981. const DECLDIR char* nvmlErrorString(nvmlReturn_t result);
  982. /** @} */
  983. /***************************************************************************************************/
  984. /** @defgroup nvmlConstants Constants
  985. * @{
  986. */
  987. /***************************************************************************************************/
  988. /**
  989. * Buffer size guaranteed to be large enough for \ref nvmlDeviceGetInforomVersion and \ref nvmlDeviceGetInforomImageVersion
  990. */
  991. #define NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE 16
  992. /**
  993. * Buffer size guaranteed to be large enough for \ref nvmlDeviceGetUUID
  994. */
  995. #define NVML_DEVICE_UUID_BUFFER_SIZE 80
  996. /**
  997. * Buffer size guaranteed to be large enough for \ref nvmlDeviceGetBoardPartNumber
  998. */
  999. #define NVML_DEVICE_PART_NUMBER_BUFFER_SIZE 80
  1000. /**
  1001. * Buffer size guaranteed to be large enough for \ref nvmlSystemGetDriverVersion
  1002. */
  1003. #define NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE 80
  1004. /**
  1005. * Buffer size guaranteed to be large enough for \ref nvmlSystemGetNVMLVersion
  1006. */
  1007. #define NVML_SYSTEM_NVML_VERSION_BUFFER_SIZE 80
  1008. /**
  1009. * Buffer size guaranteed to be large enough for \ref nvmlDeviceGetName
  1010. */
  1011. #define NVML_DEVICE_NAME_BUFFER_SIZE 64
  1012. /**
  1013. * Buffer size guaranteed to be large enough for \ref nvmlDeviceGetSerial
  1014. */
  1015. #define NVML_DEVICE_SERIAL_BUFFER_SIZE 30
  1016. /**
  1017. * Buffer size guaranteed to be large enough for \ref nvmlDeviceGetVbiosVersion
  1018. */
  1019. #define NVML_DEVICE_VBIOS_VERSION_BUFFER_SIZE 32
  1020. /** @} */
  1021. /***************************************************************************************************/
  1022. /** @defgroup nvmlSystemQueries System Queries
  1023. * This chapter describes the queries that NVML can perform against the local system. These queries
  1024. * are not device-specific.
  1025. * @{
  1026. */
  1027. /***************************************************************************************************/
  1028. /**
  1029. * Retrieves the version of the system's graphics driver.
  1030. *
  1031. * For all products.
  1032. *
  1033. * The version identifier is an alphanumeric string. It will not exceed 80 characters in length
  1034. * (including the NULL terminator). See \ref nvmlConstants::NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE.
  1035. *
  1036. * @param version Reference in which to return the version identifier
  1037. * @param length The maximum allowed length of the string returned in \a version
  1038. *
  1039. * @return
  1040. * - \ref NVML_SUCCESS if \a version has been set
  1041. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  1042. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a version is NULL
  1043. * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a length is too small
  1044. */
  1045. nvmlReturn_t DECLDIR nvmlSystemGetDriverVersion(char *version, unsigned int length);
  1046. /**
  1047. * Retrieves the version of the NVML library.
  1048. *
  1049. * For all products.
  1050. *
  1051. * The version identifier is an alphanumeric string. It will not exceed 80 characters in length
  1052. * (including the NULL terminator). See \ref nvmlConstants::NVML_SYSTEM_NVML_VERSION_BUFFER_SIZE.
  1053. *
  1054. * @param version Reference in which to return the version identifier
  1055. * @param length The maximum allowed length of the string returned in \a version
  1056. *
  1057. * @return
  1058. * - \ref NVML_SUCCESS if \a version has been set
  1059. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a version is NULL
  1060. * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a length is too small
  1061. */
  1062. nvmlReturn_t DECLDIR nvmlSystemGetNVMLVersion(char *version, unsigned int length);
  1063. /**
  1064. * Gets name of the process with provided process id
  1065. *
  1066. * For all products.
  1067. *
  1068. * Returned process name is cropped to provided length.
  1069. * name string is encoded in ANSI.
  1070. *
  1071. * @param pid The identifier of the process
  1072. * @param name Reference in which to return the process name
  1073. * @param length The maximum allowed length of the string returned in \a name
  1074. *
  1075. * @return
  1076. * - \ref NVML_SUCCESS if \a name has been set
  1077. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  1078. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a name is NULL or \a length is 0.
  1079. * - \ref NVML_ERROR_NOT_FOUND if process doesn't exists
  1080. * - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to perform this operation
  1081. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  1082. */
  1083. nvmlReturn_t DECLDIR nvmlSystemGetProcessName(unsigned int pid, char *name, unsigned int length);
  1084. /** @} */
  1085. /***************************************************************************************************/
  1086. /** @defgroup nvmlUnitQueries Unit Queries
  1087. * This chapter describes that queries that NVML can perform against each unit. For S-class systems only.
  1088. * In each case the device is identified with an nvmlUnit_t handle. This handle is obtained by
  1089. * calling \ref nvmlUnitGetHandleByIndex().
  1090. * @{
  1091. */
  1092. /***************************************************************************************************/
  1093. /**
  1094. * Retrieves the number of units in the system.
  1095. *
  1096. * For S-class products.
  1097. *
  1098. * @param unitCount Reference in which to return the number of units
  1099. *
  1100. * @return
  1101. * - \ref NVML_SUCCESS if \a unitCount has been set
  1102. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  1103. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a unitCount is NULL
  1104. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  1105. */
  1106. nvmlReturn_t DECLDIR nvmlUnitGetCount(unsigned int *unitCount);
  1107. /**
  1108. * Acquire the handle for a particular unit, based on its index.
  1109. *
  1110. * For S-class products.
  1111. *
  1112. * Valid indices are derived from the \a unitCount returned by \ref nvmlUnitGetCount().
  1113. * For example, if \a unitCount is 2 the valid indices are 0 and 1, corresponding to UNIT 0 and UNIT 1.
  1114. *
  1115. * The order in which NVML enumerates units has no guarantees of consistency between reboots.
  1116. *
  1117. * @param index The index of the target unit, >= 0 and < \a unitCount
  1118. * @param unit Reference in which to return the unit handle
  1119. *
  1120. * @return
  1121. * - \ref NVML_SUCCESS if \a unit has been set
  1122. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  1123. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a index is invalid or \a unit is NULL
  1124. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  1125. */
  1126. nvmlReturn_t DECLDIR nvmlUnitGetHandleByIndex(unsigned int index, nvmlUnit_t *unit);
  1127. /**
  1128. * Retrieves the static information associated with a unit.
  1129. *
  1130. * For S-class products.
  1131. *
  1132. * See \ref nvmlUnitInfo_t for details on available unit info.
  1133. *
  1134. * @param unit The identifier of the target unit
  1135. * @param info Reference in which to return the unit information
  1136. *
  1137. * @return
  1138. * - \ref NVML_SUCCESS if \a info has been populated
  1139. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  1140. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a unit is invalid or \a info is NULL
  1141. */
  1142. nvmlReturn_t DECLDIR nvmlUnitGetUnitInfo(nvmlUnit_t unit, nvmlUnitInfo_t *info);
  1143. /**
  1144. * Retrieves the LED state associated with this unit.
  1145. *
  1146. * For S-class products.
  1147. *
  1148. * See \ref nvmlLedState_t for details on allowed states.
  1149. *
  1150. * @param unit The identifier of the target unit
  1151. * @param state Reference in which to return the current LED state
  1152. *
  1153. * @return
  1154. * - \ref NVML_SUCCESS if \a state has been set
  1155. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  1156. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a unit is invalid or \a state is NULL
  1157. * - \ref NVML_ERROR_NOT_SUPPORTED if this is not an S-class product
  1158. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  1159. *
  1160. * @see nvmlUnitSetLedState()
  1161. */
  1162. nvmlReturn_t DECLDIR nvmlUnitGetLedState(nvmlUnit_t unit, nvmlLedState_t *state);
  1163. /**
  1164. * Retrieves the PSU stats for the unit.
  1165. *
  1166. * For S-class products.
  1167. *
  1168. * See \ref nvmlPSUInfo_t for details on available PSU info.
  1169. *
  1170. * @param unit The identifier of the target unit
  1171. * @param psu Reference in which to return the PSU information
  1172. *
  1173. * @return
  1174. * - \ref NVML_SUCCESS if \a psu has been populated
  1175. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  1176. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a unit is invalid or \a psu is NULL
  1177. * - \ref NVML_ERROR_NOT_SUPPORTED if this is not an S-class product
  1178. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  1179. */
  1180. nvmlReturn_t DECLDIR nvmlUnitGetPsuInfo(nvmlUnit_t unit, nvmlPSUInfo_t *psu);
  1181. /**
  1182. * Retrieves the temperature readings for the unit, in degrees C.
  1183. *
  1184. * For S-class products.
  1185. *
  1186. * Depending on the product, readings may be available for intake (type=0),
  1187. * exhaust (type=1) and board (type=2).
  1188. *
  1189. * @param unit The identifier of the target unit
  1190. * @param type The type of reading to take
  1191. * @param temp Reference in which to return the intake temperature
  1192. *
  1193. * @return
  1194. * - \ref NVML_SUCCESS if \a temp has been populated
  1195. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  1196. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a unit or \a type is invalid or \a temp is NULL
  1197. * - \ref NVML_ERROR_NOT_SUPPORTED if this is not an S-class product
  1198. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  1199. */
  1200. nvmlReturn_t DECLDIR nvmlUnitGetTemperature(nvmlUnit_t unit, unsigned int type, unsigned int *temp);
  1201. /**
  1202. * Retrieves the fan speed readings for the unit.
  1203. *
  1204. * For S-class products.
  1205. *
  1206. * See \ref nvmlUnitFanSpeeds_t for details on available fan speed info.
  1207. *
  1208. * @param unit The identifier of the target unit
  1209. * @param fanSpeeds Reference in which to return the fan speed information
  1210. *
  1211. * @return
  1212. * - \ref NVML_SUCCESS if \a fanSpeeds has been populated
  1213. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  1214. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a unit is invalid or \a fanSpeeds is NULL
  1215. * - \ref NVML_ERROR_NOT_SUPPORTED if this is not an S-class product
  1216. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  1217. */
  1218. nvmlReturn_t DECLDIR nvmlUnitGetFanSpeedInfo(nvmlUnit_t unit, nvmlUnitFanSpeeds_t *fanSpeeds);
  1219. /**
  1220. * Retrieves the set of GPU devices that are attached to the specified unit.
  1221. *
  1222. * For S-class products.
  1223. *
  1224. * The \a deviceCount argument is expected to be set to the size of the input \a devices array.
  1225. *
  1226. * @param unit The identifier of the target unit
  1227. * @param deviceCount Reference in which to provide the \a devices array size, and
  1228. * to return the number of attached GPU devices
  1229. * @param devices Reference in which to return the references to the attached GPU devices
  1230. *
  1231. * @return
  1232. * - \ref NVML_SUCCESS if \a deviceCount and \a devices have been populated
  1233. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  1234. * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a deviceCount indicates that the \a devices array is too small
  1235. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a unit is invalid, either of \a deviceCount or \a devices is NULL
  1236. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  1237. */
  1238. nvmlReturn_t DECLDIR nvmlUnitGetDevices(nvmlUnit_t unit, unsigned int *deviceCount, nvmlDevice_t *devices);
  1239. /**
  1240. * Retrieves the IDs and firmware versions for any Host Interface Cards (HICs) in the system.
  1241. *
  1242. * For S-class products.
  1243. *
  1244. * The \a hwbcCount argument is expected to be set to the size of the input \a hwbcEntries array.
  1245. * The HIC must be connected to an S-class system for it to be reported by this function.
  1246. *
  1247. * @param hwbcCount Size of hwbcEntries array
  1248. * @param hwbcEntries Array holding information about hwbc
  1249. *
  1250. * @return
  1251. * - \ref NVML_SUCCESS if \a hwbcCount and \a hwbcEntries have been populated
  1252. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  1253. * - \ref NVML_ERROR_INVALID_ARGUMENT if either \a hwbcCount or \a hwbcEntries is NULL
  1254. * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a hwbcCount indicates that the \a hwbcEntries array is too small
  1255. */
  1256. nvmlReturn_t DECLDIR nvmlSystemGetHicVersion(unsigned int *hwbcCount, nvmlHwbcEntry_t *hwbcEntries);
  1257. /** @} */
  1258. /***************************************************************************************************/
  1259. /** @defgroup nvmlDeviceQueries Device Queries
  1260. * This chapter describes that queries that NVML can perform against each device.
  1261. * In each case the device is identified with an nvmlDevice_t handle. This handle is obtained by
  1262. * calling one of \ref nvmlDeviceGetHandleByIndex(), \ref nvmlDeviceGetHandleBySerial(),
  1263. * \ref nvmlDeviceGetHandleByPciBusId(). or \ref nvmlDeviceGetHandleByUUID().
  1264. * @{
  1265. */
  1266. /***************************************************************************************************/
  1267. /**
  1268. * Retrieves the number of compute devices in the system. A compute device is a single GPU.
  1269. *
  1270. * For all products.
  1271. *
  1272. * Note: New nvmlDeviceGetCount_v2 (default in NVML 5.319) returns count of all devices in the system
  1273. * even if nvmlDeviceGetHandleByIndex_v2 returns NVML_ERROR_NO_PERMISSION for such device.
  1274. * Update your code to handle this error, or use NVML 4.304 or older nvml header file.
  1275. * For backward binary compatibility reasons _v1 version of the API is still present in the shared
  1276. * library.
  1277. * Old _v1 version of nvmlDeviceGetCount doesn't count devices that NVML has no permission to talk to.
  1278. *
  1279. * @param deviceCount Reference in which to return the number of accessible devices
  1280. *
  1281. * @return
  1282. * - \ref NVML_SUCCESS if \a deviceCount has been set
  1283. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  1284. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a deviceCount is NULL
  1285. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  1286. */
  1287. nvmlReturn_t DECLDIR nvmlDeviceGetCount(unsigned int *deviceCount);
  1288. /**
  1289. * Acquire the handle for a particular device, based on its index.
  1290. *
  1291. * For all products.
  1292. *
  1293. * Valid indices are derived from the \a accessibleDevices count returned by
  1294. * \ref nvmlDeviceGetCount(). For example, if \a accessibleDevices is 2 the valid indices
  1295. * are 0 and 1, corresponding to GPU 0 and GPU 1.
  1296. *
  1297. * The order in which NVML enumerates devices has no guarantees of consistency between reboots. For that reason it
  1298. * is recommended that devices be looked up by their PCI ids or UUID. See
  1299. * \ref nvmlDeviceGetHandleByUUID() and \ref nvmlDeviceGetHandleByPciBusId().
  1300. *
  1301. * Note: The NVML index may not correlate with other APIs, such as the CUDA device index.
  1302. *
  1303. * Starting from NVML 5, this API causes NVML to initialize the target GPU
  1304. * NVML may initialize additional GPUs if:
  1305. * - The target GPU is an SLI slave
  1306. *
  1307. * Note: New nvmlDeviceGetCount_v2 (default in NVML 5.319) returns count of all devices in the system
  1308. * even if nvmlDeviceGetHandleByIndex_v2 returns NVML_ERROR_NO_PERMISSION for such device.
  1309. * Update your code to handle this error, or use NVML 4.304 or older nvml header file.
  1310. * For backward binary compatibility reasons _v1 version of the API is still present in the shared
  1311. * library.
  1312. * Old _v1 version of nvmlDeviceGetCount doesn't count devices that NVML has no permission to talk to.
  1313. *
  1314. * This means that nvmlDeviceGetHandleByIndex_v2 and _v1 can return different devices for the same index.
  1315. * If you don't touch macros that map old (_v1) versions to _v2 versions at the top of the file you don't
  1316. * need to worry about that.
  1317. *
  1318. * @param index The index of the target GPU, >= 0 and < \a accessibleDevices
  1319. * @param device Reference in which to return the device handle
  1320. *
  1321. * @return
  1322. * - \ref NVML_SUCCESS if \a device has been set
  1323. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  1324. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a index is invalid or \a device is NULL
  1325. * - \ref NVML_ERROR_INSUFFICIENT_POWER if any attached devices have improperly attached external power cables
  1326. * - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to talk to this device
  1327. * - \ref NVML_ERROR_IRQ_ISSUE if NVIDIA kernel detected an interrupt issue with the attached GPUs
  1328. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  1329. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  1330. *
  1331. * @see nvmlDeviceGetIndex
  1332. * @see nvmlDeviceGetCount
  1333. */
  1334. nvmlReturn_t DECLDIR nvmlDeviceGetHandleByIndex(unsigned int index, nvmlDevice_t *device);
  1335. /**
  1336. * Acquire the handle for a particular device, based on its board serial number.
  1337. *
  1338. * For Fermi &tm; or newer fully supported devices.
  1339. *
  1340. * This number corresponds to the value printed directly on the board, and to the value returned by
  1341. * \ref nvmlDeviceGetSerial().
  1342. *
  1343. * @deprecated Since more than one GPU can exist on a single board this function is deprecated in favor
  1344. * of \ref nvmlDeviceGetHandleByUUID.
  1345. * For dual GPU boards this function will return NVML_ERROR_INVALID_ARGUMENT.
  1346. *
  1347. * Starting from NVML 5, this API causes NVML to initialize the target GPU
  1348. * NVML may initialize additional GPUs as it searches for the target GPU
  1349. *
  1350. * @param serial The board serial number of the target GPU
  1351. * @param device Reference in which to return the device handle
  1352. *
  1353. * @return
  1354. * - \ref NVML_SUCCESS if \a device has been set
  1355. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  1356. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a serial is invalid, \a device is NULL or more than one
  1357. * device has the same serial (dual GPU boards)
  1358. * - \ref NVML_ERROR_NOT_FOUND if \a serial does not match a valid device on the system
  1359. * - \ref NVML_ERROR_INSUFFICIENT_POWER if any attached devices have improperly attached external power cables
  1360. * - \ref NVML_ERROR_IRQ_ISSUE if NVIDIA kernel detected an interrupt issue with the attached GPUs
  1361. * - \ref NVML_ERROR_GPU_IS_LOST if any GPU has fallen off the bus or is otherwise inaccessible
  1362. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  1363. *
  1364. * @see nvmlDeviceGetSerial
  1365. * @see nvmlDeviceGetHandleByUUID
  1366. */
  1367. nvmlReturn_t DECLDIR nvmlDeviceGetHandleBySerial(const char *serial, nvmlDevice_t *device);
  1368. /**
  1369. * Acquire the handle for a particular device, based on its globally unique immutable UUID associated with each device.
  1370. *
  1371. * For all products.
  1372. *
  1373. * @param uuid The UUID of the target GPU
  1374. * @param device Reference in which to return the device handle
  1375. *
  1376. * Starting from NVML 5, this API causes NVML to initialize the target GPU
  1377. * NVML may initialize additional GPUs as it searches for the target GPU
  1378. *
  1379. * @return
  1380. * - \ref NVML_SUCCESS if \a device has been set
  1381. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  1382. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a uuid is invalid or \a device is null
  1383. * - \ref NVML_ERROR_NOT_FOUND if \a uuid does not match a valid device on the system
  1384. * - \ref NVML_ERROR_INSUFFICIENT_POWER if any attached devices have improperly attached external power cables
  1385. * - \ref NVML_ERROR_IRQ_ISSUE if NVIDIA kernel detected an interrupt issue with the attached GPUs
  1386. * - \ref NVML_ERROR_GPU_IS_LOST if any GPU has fallen off the bus or is otherwise inaccessible
  1387. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  1388. *
  1389. * @see nvmlDeviceGetUUID
  1390. */
  1391. nvmlReturn_t DECLDIR nvmlDeviceGetHandleByUUID(const char *uuid, nvmlDevice_t *device);
  1392. /**
  1393. * Acquire the handle for a particular device, based on its PCI bus id.
  1394. *
  1395. * For all products.
  1396. *
  1397. * This value corresponds to the nvmlPciInfo_t::busId returned by \ref nvmlDeviceGetPciInfo().
  1398. *
  1399. * Starting from NVML 5, this API causes NVML to initialize the target GPU
  1400. * NVML may initialize additional GPUs if:
  1401. * - The target GPU is an SLI slave
  1402. *
  1403. * \note NVML 4.304 and older version of nvmlDeviceGetHandleByPciBusId"_v1" returns NVML_ERROR_NOT_FOUND
  1404. * instead of NVML_ERROR_NO_PERMISSION.
  1405. *
  1406. * @param pciBusId The PCI bus id of the target GPU
  1407. * @param device Reference in which to return the device handle
  1408. *
  1409. * @return
  1410. * - \ref NVML_SUCCESS if \a device has been set
  1411. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  1412. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a pciBusId is invalid or \a device is NULL
  1413. * - \ref NVML_ERROR_NOT_FOUND if \a pciBusId does not match a valid device on the system
  1414. * - \ref NVML_ERROR_INSUFFICIENT_POWER if the attached device has improperly attached external power cables
  1415. * - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to talk to this device
  1416. * - \ref NVML_ERROR_IRQ_ISSUE if NVIDIA kernel detected an interrupt issue with the attached GPUs
  1417. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  1418. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  1419. */
  1420. nvmlReturn_t DECLDIR nvmlDeviceGetHandleByPciBusId(const char *pciBusId, nvmlDevice_t *device);
  1421. /**
  1422. * Retrieves the name of this device.
  1423. *
  1424. * For all products.
  1425. *
  1426. * The name is an alphanumeric string that denotes a particular product, e.g. Tesla &tm; C2070. It will not
  1427. * exceed 64 characters in length (including the NULL terminator). See \ref
  1428. * nvmlConstants::NVML_DEVICE_NAME_BUFFER_SIZE.
  1429. *
  1430. * @param device The identifier of the target device
  1431. * @param name Reference in which to return the product name
  1432. * @param length The maximum allowed length of the string returned in \a name
  1433. *
  1434. * @return
  1435. * - \ref NVML_SUCCESS if \a name has been set
  1436. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  1437. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, or \a name is NULL
  1438. * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a length is too small
  1439. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  1440. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  1441. */
  1442. nvmlReturn_t DECLDIR nvmlDeviceGetName(nvmlDevice_t device, char *name, unsigned int length);
  1443. /**
  1444. * Retrieves the brand of this device.
  1445. *
  1446. * For all products.
  1447. *
  1448. * The type is a member of \ref nvmlBrandType_t defined above.
  1449. *
  1450. * @param device The identifier of the target device
  1451. * @param type Reference in which to return the product brand type
  1452. *
  1453. * @return
  1454. * - \ref NVML_SUCCESS if \a name has been set
  1455. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  1456. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, or \a type is NULL
  1457. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  1458. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  1459. */
  1460. nvmlReturn_t DECLDIR nvmlDeviceGetBrand(nvmlDevice_t device, nvmlBrandType_t *type);
  1461. /**
  1462. * Retrieves the NVML index of this device.
  1463. *
  1464. * For all products.
  1465. *
  1466. * Valid indices are derived from the \a accessibleDevices count returned by
  1467. * \ref nvmlDeviceGetCount(). For example, if \a accessibleDevices is 2 the valid indices
  1468. * are 0 and 1, corresponding to GPU 0 and GPU 1.
  1469. *
  1470. * The order in which NVML enumerates devices has no guarantees of consistency between reboots. For that reason it
  1471. * is recommended that devices be looked up by their PCI ids or GPU UUID. See
  1472. * \ref nvmlDeviceGetHandleByPciBusId() and \ref nvmlDeviceGetHandleByUUID().
  1473. *
  1474. * Note: The NVML index may not correlate with other APIs, such as the CUDA device index.
  1475. *
  1476. * @param device The identifier of the target device
  1477. * @param index Reference in which to return the NVML index of the device
  1478. *
  1479. * @return
  1480. * - \ref NVML_SUCCESS if \a index has been set
  1481. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  1482. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, or \a index is NULL
  1483. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  1484. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  1485. *
  1486. * @see nvmlDeviceGetHandleByIndex()
  1487. * @see nvmlDeviceGetCount()
  1488. */
  1489. nvmlReturn_t DECLDIR nvmlDeviceGetIndex(nvmlDevice_t device, unsigned int *index);
  1490. /**
  1491. * Retrieves the globally unique board serial number associated with this device's board.
  1492. *
  1493. * For all products with an inforom.
  1494. *
  1495. * The serial number is an alphanumeric string that will not exceed 30 characters (including the NULL terminator).
  1496. * This number matches the serial number tag that is physically attached to the board. See \ref
  1497. * nvmlConstants::NVML_DEVICE_SERIAL_BUFFER_SIZE.
  1498. *
  1499. * @param device The identifier of the target device
  1500. * @param serial Reference in which to return the board/module serial number
  1501. * @param length The maximum allowed length of the string returned in \a serial
  1502. *
  1503. * @return
  1504. * - \ref NVML_SUCCESS if \a serial has been set
  1505. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  1506. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, or \a serial is NULL
  1507. * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a length is too small
  1508. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature
  1509. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  1510. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  1511. */
  1512. nvmlReturn_t DECLDIR nvmlDeviceGetSerial(nvmlDevice_t device, char *serial, unsigned int length);
  1513. /**
  1514. * Retrieves an array of unsigned ints (sized to cpuSetSize) of bitmasks with the ideal CPU affinity for the device
  1515. * For example, if processors 0, 1, 32, and 33 are ideal for the device and cpuSetSize == 2,
  1516. * result[0] = 0x3, result[1] = 0x3
  1517. *
  1518. * For Kepler &tm; or newer fully supported devices.
  1519. * Supported on Linux only.
  1520. *
  1521. * @param device The identifier of the target device
  1522. * @param cpuSetSize The size of the cpuSet array that is safe to access
  1523. * @param cpuSet Array reference in which to return a bitmask of CPUs, 64 CPUs per
  1524. * unsigned long on 64-bit machines, 32 on 32-bit machines
  1525. *
  1526. * @return
  1527. * - \ref NVML_SUCCESS if \a cpuAffinity has been filled
  1528. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  1529. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, cpuSetSize == 0, or cpuSet is NULL
  1530. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature
  1531. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  1532. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  1533. */
  1534. nvmlReturn_t DECLDIR nvmlDeviceGetCpuAffinity(nvmlDevice_t device, unsigned int cpuSetSize, unsigned long *cpuSet);
  1535. /**
  1536. * Sets the ideal affinity for the calling thread and device using the guidelines
  1537. * given in nvmlDeviceGetCpuAffinity(). Note, this is a change as of version 8.0.
  1538. * Older versions set the affinity for a calling process and all children.
  1539. * Currently supports up to 64 processors.
  1540. *
  1541. * For Kepler &tm; or newer fully supported devices.
  1542. * Supported on Linux only.
  1543. *
  1544. * @param device The identifier of the target device
  1545. *
  1546. * @return
  1547. * - \ref NVML_SUCCESS if the calling process has been successfully bound
  1548. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  1549. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid
  1550. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature
  1551. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  1552. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  1553. */
  1554. nvmlReturn_t DECLDIR nvmlDeviceSetCpuAffinity(nvmlDevice_t device);
  1555. /**
  1556. * Clear all affinity bindings for the calling thread. Note, this is a change as of version
  1557. * 8.0 as older versions cleared the affinity for a calling process and all children.
  1558. *
  1559. * For Kepler &tm; or newer fully supported devices.
  1560. * Supported on Linux only.
  1561. *
  1562. * @param device The identifier of the target device
  1563. *
  1564. * @return
  1565. * - \ref NVML_SUCCESS if the calling process has been successfully unbound
  1566. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid
  1567. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  1568. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  1569. */
  1570. nvmlReturn_t DECLDIR nvmlDeviceClearCpuAffinity(nvmlDevice_t device);
  1571. /**
  1572. * Retrieve the common ancestor for two devices
  1573. * For all products.
  1574. * Supported on Linux only.
  1575. *
  1576. * @param device1 The identifier of the first device
  1577. * @param device2 The identifier of the second device
  1578. * @param pathInfo A \ref nvmlGpuTopologyLevel_t that gives the path type
  1579. *
  1580. * @return
  1581. * - \ref NVML_SUCCESS if \a pathInfo has been set
  1582. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device1, or \a device2 is invalid, or \a pathInfo is NULL
  1583. * - \ref NVML_ERROR_NOT_SUPPORTED if the device or OS does not support this feature
  1584. * - \ref NVML_ERROR_UNKNOWN an error has occurred in underlying topology discovery
  1585. */
  1586. nvmlReturn_t DECLDIR nvmlDeviceGetTopologyCommonAncestor(nvmlDevice_t device1, nvmlDevice_t device2, nvmlGpuTopologyLevel_t *pathInfo);
  1587. /**
  1588. * Retrieve the set of GPUs that are nearest to a given device at a specific interconnectivity level
  1589. * For all products.
  1590. * Supported on Linux only.
  1591. *
  1592. * @param device The identifier of the first device
  1593. * @param level The \ref nvmlGpuTopologyLevel_t level to search for other GPUs
  1594. * @param count When zero, is set to the number of matching GPUs such that \a deviceArray
  1595. * can be malloc'd. When non-zero, \a deviceArray will be filled with \a count
  1596. * number of device handles.
  1597. * @param deviceArray An array of device handles for GPUs found at \a level
  1598. *
  1599. * @return
  1600. * - \ref NVML_SUCCESS if \a deviceArray or \a count (if initially zero) has been set
  1601. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device, \a level, or \a count is invalid, or \a deviceArray is NULL with a non-zero \a count
  1602. * - \ref NVML_ERROR_NOT_SUPPORTED if the device or OS does not support this feature
  1603. * - \ref NVML_ERROR_UNKNOWN an error has occurred in underlying topology discovery
  1604. */
  1605. nvmlReturn_t DECLDIR nvmlDeviceGetTopologyNearestGpus(nvmlDevice_t device, nvmlGpuTopologyLevel_t level, unsigned int *count, nvmlDevice_t *deviceArray);
  1606. /**
  1607. * Retrieve the set of GPUs that have a CPU affinity with the given CPU number
  1608. * For all products.
  1609. * Supported on Linux only.
  1610. *
  1611. * @param cpuNumber The CPU number
  1612. * @param count When zero, is set to the number of matching GPUs such that \a deviceArray
  1613. * can be malloc'd. When non-zero, \a deviceArray will be filled with \a count
  1614. * number of device handles.
  1615. * @param deviceArray An array of device handles for GPUs found with affinity to \a cpuNumber
  1616. *
  1617. * @return
  1618. * - \ref NVML_SUCCESS if \a deviceArray or \a count (if initially zero) has been set
  1619. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a cpuNumber, or \a count is invalid, or \a deviceArray is NULL with a non-zero \a count
  1620. * - \ref NVML_ERROR_NOT_SUPPORTED if the device or OS does not support this feature
  1621. * - \ref NVML_ERROR_UNKNOWN an error has occurred in underlying topology discovery
  1622. */
  1623. nvmlReturn_t DECLDIR nvmlSystemGetTopologyGpuSet(unsigned int cpuNumber, unsigned int *count, nvmlDevice_t *deviceArray);
  1624. /**
  1625. * Retrieve the status for a given p2p capability index between a given pair of GPU
  1626. *
  1627. * @param device1 The first device
  1628. * @param device2 The second device
  1629. * @param p2pIndex p2p Capability Index being looked for between \a device1 and \a device2
  1630. * @param p2pStatus Reference in which to return the status of the \a p2pIndex
  1631. * between \a device1 and \a device2
  1632. * @return
  1633. * - \ref NVML_SUCCESS if \a p2pStatus has been populated
  1634. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device1 or \a device2 or \a p2pIndex is invalid or \a p2pStatus is NULL
  1635. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  1636. */
  1637. nvmlReturn_t DECLDIR nvmlDeviceGetP2PStatus(nvmlDevice_t device1, nvmlDevice_t device2, nvmlGpuP2PCapsIndex_t p2pIndex,nvmlGpuP2PStatus_t *p2pStatus);
  1638. /**
  1639. * Retrieves the globally unique immutable UUID associated with this device, as a 5 part hexadecimal string,
  1640. * that augments the immutable, board serial identifier.
  1641. *
  1642. * For all products.
  1643. *
  1644. * The UUID is a globally unique identifier. It is the only available identifier for pre-Fermi-architecture products.
  1645. * It does NOT correspond to any identifier printed on the board. It will not exceed 80 characters in length
  1646. * (including the NULL terminator). See \ref nvmlConstants::NVML_DEVICE_UUID_BUFFER_SIZE.
  1647. *
  1648. * @param device The identifier of the target device
  1649. * @param uuid Reference in which to return the GPU UUID
  1650. * @param length The maximum allowed length of the string returned in \a uuid
  1651. *
  1652. * @return
  1653. * - \ref NVML_SUCCESS if \a uuid has been set
  1654. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  1655. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, or \a uuid is NULL
  1656. * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a length is too small
  1657. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature
  1658. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  1659. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  1660. */
  1661. nvmlReturn_t DECLDIR nvmlDeviceGetUUID(nvmlDevice_t device, char *uuid, unsigned int length);
  1662. /**
  1663. * Retrieves minor number for the device. The minor number for the device is such that the Nvidia device node file for
  1664. * each GPU will have the form /dev/nvidia[minor number].
  1665. *
  1666. * For all products.
  1667. * Supported only for Linux
  1668. *
  1669. * @param device The identifier of the target device
  1670. * @param minorNumber Reference in which to return the minor number for the device
  1671. * @return
  1672. * - \ref NVML_SUCCESS if the minor number is successfully retrieved
  1673. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  1674. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a minorNumber is NULL
  1675. * - \ref NVML_ERROR_NOT_SUPPORTED if this query is not supported by the device
  1676. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  1677. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  1678. */
  1679. nvmlReturn_t DECLDIR nvmlDeviceGetMinorNumber(nvmlDevice_t device, unsigned int *minorNumber);
  1680. /**
  1681. * Retrieves the the device board part number which is programmed into the board's InfoROM
  1682. *
  1683. * For all products.
  1684. *
  1685. * @param device Identifier of the target device
  1686. * @param partNumber Reference to the buffer to return
  1687. * @param length Length of the buffer reference
  1688. *
  1689. * @return
  1690. * - \ref NVML_SUCCESS if \a partNumber has been set
  1691. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  1692. * - \ref NVML_ERROR_NOT_SUPPORTED if the needed VBIOS fields have not been filled
  1693. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a serial is NULL
  1694. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  1695. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  1696. */
  1697. nvmlReturn_t DECLDIR nvmlDeviceGetBoardPartNumber(nvmlDevice_t device, char* partNumber, unsigned int length);
  1698. /**
  1699. * Retrieves the version information for the device's infoROM object.
  1700. *
  1701. * For all products with an inforom.
  1702. *
  1703. * Fermi and higher parts have non-volatile on-board memory for persisting device info, such as aggregate
  1704. * ECC counts. The version of the data structures in this memory may change from time to time. It will not
  1705. * exceed 16 characters in length (including the NULL terminator).
  1706. * See \ref nvmlConstants::NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE.
  1707. *
  1708. * See \ref nvmlInforomObject_t for details on the available infoROM objects.
  1709. *
  1710. * @param device The identifier of the target device
  1711. * @param object The target infoROM object
  1712. * @param version Reference in which to return the infoROM version
  1713. * @param length The maximum allowed length of the string returned in \a version
  1714. *
  1715. * @return
  1716. * - \ref NVML_SUCCESS if \a version has been set
  1717. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  1718. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a version is NULL
  1719. * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a length is too small
  1720. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not have an infoROM
  1721. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  1722. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  1723. *
  1724. * @see nvmlDeviceGetInforomImageVersion
  1725. */
  1726. nvmlReturn_t DECLDIR nvmlDeviceGetInforomVersion(nvmlDevice_t device, nvmlInforomObject_t object, char *version, unsigned int length);
  1727. /**
  1728. * Retrieves the global infoROM image version
  1729. *
  1730. * For all products with an inforom.
  1731. *
  1732. * Image version just like VBIOS version uniquely describes the exact version of the infoROM flashed on the board
  1733. * in contrast to infoROM object version which is only an indicator of supported features.
  1734. * Version string will not exceed 16 characters in length (including the NULL terminator).
  1735. * See \ref nvmlConstants::NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE.
  1736. *
  1737. * @param device The identifier of the target device
  1738. * @param version Reference in which to return the infoROM image version
  1739. * @param length The maximum allowed length of the string returned in \a version
  1740. *
  1741. * @return
  1742. * - \ref NVML_SUCCESS if \a version has been set
  1743. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  1744. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a version is NULL
  1745. * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a length is too small
  1746. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not have an infoROM
  1747. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  1748. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  1749. *
  1750. * @see nvmlDeviceGetInforomVersion
  1751. */
  1752. nvmlReturn_t DECLDIR nvmlDeviceGetInforomImageVersion(nvmlDevice_t device, char *version, unsigned int length);
  1753. /**
  1754. * Retrieves the checksum of the configuration stored in the device's infoROM.
  1755. *
  1756. * For all products with an inforom.
  1757. *
  1758. * Can be used to make sure that two GPUs have the exact same configuration.
  1759. * Current checksum takes into account configuration stored in PWR and ECC infoROM objects.
  1760. * Checksum can change between driver releases or when user changes configuration (e.g. disable/enable ECC)
  1761. *
  1762. * @param device The identifier of the target device
  1763. * @param checksum Reference in which to return the infoROM configuration checksum
  1764. *
  1765. * @return
  1766. * - \ref NVML_SUCCESS if \a checksum has been set
  1767. * - \ref NVML_ERROR_CORRUPTED_INFOROM if the device's checksum couldn't be retrieved due to infoROM corruption
  1768. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  1769. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a checksum is NULL
  1770. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature
  1771. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  1772. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  1773. */
  1774. nvmlReturn_t DECLDIR nvmlDeviceGetInforomConfigurationChecksum(nvmlDevice_t device, unsigned int *checksum);
  1775. /**
  1776. * Reads the infoROM from the flash and verifies the checksums.
  1777. *
  1778. * For all products with an inforom.
  1779. *
  1780. * @param device The identifier of the target device
  1781. *
  1782. * @return
  1783. * - \ref NVML_SUCCESS if infoROM is not corrupted
  1784. * - \ref NVML_ERROR_CORRUPTED_INFOROM if the device's infoROM is corrupted
  1785. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  1786. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature
  1787. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  1788. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  1789. */
  1790. nvmlReturn_t DECLDIR nvmlDeviceValidateInforom(nvmlDevice_t device);
  1791. /**
  1792. * Retrieves the display mode for the device.
  1793. *
  1794. * For all products.
  1795. *
  1796. * This method indicates whether a physical display (e.g. monitor) is currently connected to
  1797. * any of the device's connectors.
  1798. *
  1799. * See \ref nvmlEnableState_t for details on allowed modes.
  1800. *
  1801. * @param device The identifier of the target device
  1802. * @param display Reference in which to return the display mode
  1803. *
  1804. * @return
  1805. * - \ref NVML_SUCCESS if \a display has been set
  1806. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  1807. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a display is NULL
  1808. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature
  1809. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  1810. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  1811. */
  1812. nvmlReturn_t DECLDIR nvmlDeviceGetDisplayMode(nvmlDevice_t device, nvmlEnableState_t *display);
  1813. /**
  1814. * Retrieves the display active state for the device.
  1815. *
  1816. * For all products.
  1817. *
  1818. * This method indicates whether a display is initialized on the device.
  1819. * For example whether X Server is attached to this device and has allocated memory for the screen.
  1820. *
  1821. * Display can be active even when no monitor is physically attached.
  1822. *
  1823. * See \ref nvmlEnableState_t for details on allowed modes.
  1824. *
  1825. * @param device The identifier of the target device
  1826. * @param isActive Reference in which to return the display active state
  1827. *
  1828. * @return
  1829. * - \ref NVML_SUCCESS if \a isActive has been set
  1830. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  1831. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a isActive is NULL
  1832. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature
  1833. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  1834. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  1835. */
  1836. nvmlReturn_t DECLDIR nvmlDeviceGetDisplayActive(nvmlDevice_t device, nvmlEnableState_t *isActive);
  1837. /**
  1838. * Retrieves the persistence mode associated with this device.
  1839. *
  1840. * For all products.
  1841. * For Linux only.
  1842. *
  1843. * When driver persistence mode is enabled the driver software state is not torn down when the last
  1844. * client disconnects. By default this feature is disabled.
  1845. *
  1846. * See \ref nvmlEnableState_t for details on allowed modes.
  1847. *
  1848. * @param device The identifier of the target device
  1849. * @param mode Reference in which to return the current driver persistence mode
  1850. *
  1851. * @return
  1852. * - \ref NVML_SUCCESS if \a mode has been set
  1853. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  1854. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a mode is NULL
  1855. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature
  1856. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  1857. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  1858. *
  1859. * @see nvmlDeviceSetPersistenceMode()
  1860. */
  1861. nvmlReturn_t DECLDIR nvmlDeviceGetPersistenceMode(nvmlDevice_t device, nvmlEnableState_t *mode);
  1862. /**
  1863. * Retrieves the PCI attributes of this device.
  1864. *
  1865. * For all products.
  1866. *
  1867. * See \ref nvmlPciInfo_t for details on the available PCI info.
  1868. *
  1869. * @param device The identifier of the target device
  1870. * @param pci Reference in which to return the PCI info
  1871. *
  1872. * @return
  1873. * - \ref NVML_SUCCESS if \a pci has been populated
  1874. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  1875. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a pci is NULL
  1876. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  1877. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  1878. */
  1879. nvmlReturn_t DECLDIR nvmlDeviceGetPciInfo(nvmlDevice_t device, nvmlPciInfo_t *pci);
  1880. /**
  1881. * Retrieves the maximum PCIe link generation possible with this device and system
  1882. *
  1883. * I.E. for a generation 2 PCIe device attached to a generation 1 PCIe bus the max link generation this function will
  1884. * report is generation 1.
  1885. *
  1886. * For Fermi &tm; or newer fully supported devices.
  1887. *
  1888. * @param device The identifier of the target device
  1889. * @param maxLinkGen Reference in which to return the max PCIe link generation
  1890. *
  1891. * @return
  1892. * - \ref NVML_SUCCESS if \a maxLinkGen has been populated
  1893. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  1894. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a maxLinkGen is null
  1895. * - \ref NVML_ERROR_NOT_SUPPORTED if PCIe link information is not available
  1896. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  1897. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  1898. */
  1899. nvmlReturn_t DECLDIR nvmlDeviceGetMaxPcieLinkGeneration(nvmlDevice_t device, unsigned int *maxLinkGen);
  1900. /**
  1901. * Retrieves the maximum PCIe link width possible with this device and system
  1902. *
  1903. * I.E. for a device with a 16x PCIe bus width attached to a 8x PCIe system bus this function will report
  1904. * a max link width of 8.
  1905. *
  1906. * For Fermi &tm; or newer fully supported devices.
  1907. *
  1908. * @param device The identifier of the target device
  1909. * @param maxLinkWidth Reference in which to return the max PCIe link generation
  1910. *
  1911. * @return
  1912. * - \ref NVML_SUCCESS if \a maxLinkWidth has been populated
  1913. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  1914. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a maxLinkWidth is null
  1915. * - \ref NVML_ERROR_NOT_SUPPORTED if PCIe link information is not available
  1916. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  1917. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  1918. */
  1919. nvmlReturn_t DECLDIR nvmlDeviceGetMaxPcieLinkWidth(nvmlDevice_t device, unsigned int *maxLinkWidth);
  1920. /**
  1921. * Retrieves the current PCIe link generation
  1922. *
  1923. * For Fermi &tm; or newer fully supported devices.
  1924. *
  1925. * @param device The identifier of the target device
  1926. * @param currLinkGen Reference in which to return the current PCIe link generation
  1927. *
  1928. * @return
  1929. * - \ref NVML_SUCCESS if \a currLinkGen has been populated
  1930. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  1931. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a currLinkGen is null
  1932. * - \ref NVML_ERROR_NOT_SUPPORTED if PCIe link information is not available
  1933. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  1934. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  1935. */
  1936. nvmlReturn_t DECLDIR nvmlDeviceGetCurrPcieLinkGeneration(nvmlDevice_t device, unsigned int *currLinkGen);
  1937. /**
  1938. * Retrieves the current PCIe link width
  1939. *
  1940. * For Fermi &tm; or newer fully supported devices.
  1941. *
  1942. * @param device The identifier of the target device
  1943. * @param currLinkWidth Reference in which to return the current PCIe link generation
  1944. *
  1945. * @return
  1946. * - \ref NVML_SUCCESS if \a currLinkWidth has been populated
  1947. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  1948. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a currLinkWidth is null
  1949. * - \ref NVML_ERROR_NOT_SUPPORTED if PCIe link information is not available
  1950. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  1951. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  1952. */
  1953. nvmlReturn_t DECLDIR nvmlDeviceGetCurrPcieLinkWidth(nvmlDevice_t device, unsigned int *currLinkWidth);
  1954. /**
  1955. * Retrieve PCIe utilization information.
  1956. * This function is querying a byte counter over a 20ms interval and thus is the
  1957. * PCIe throughput over that interval.
  1958. *
  1959. * For Maxwell &tm; or newer fully supported devices.
  1960. *
  1961. * This method is not supported on virtualized GPU environments.
  1962. *
  1963. * @param device The identifier of the target device
  1964. * @param counter The specific counter that should be queried \ref nvmlPcieUtilCounter_t
  1965. * @param value Reference in which to return throughput in KB/s
  1966. *
  1967. * @return
  1968. * - \ref NVML_SUCCESS if \a value has been set
  1969. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  1970. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device or \a counter is invalid, or \a value is NULL
  1971. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature
  1972. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  1973. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  1974. */
  1975. nvmlReturn_t DECLDIR nvmlDeviceGetPcieThroughput(nvmlDevice_t device, nvmlPcieUtilCounter_t counter, unsigned int *value);
  1976. /**
  1977. * Retrieve the PCIe replay counter.
  1978. *
  1979. * For Kepler &tm; or newer fully supported devices.
  1980. *
  1981. * @param device The identifier of the target device
  1982. * @param value Reference in which to return the counter's value
  1983. *
  1984. * @return
  1985. * - \ref NVML_SUCCESS if \a value and \a rollover have been set
  1986. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  1987. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, or \a value or \a rollover are NULL
  1988. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature
  1989. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  1990. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  1991. */
  1992. nvmlReturn_t DECLDIR nvmlDeviceGetPcieReplayCounter(nvmlDevice_t device, unsigned int *value);
  1993. /**
  1994. * Retrieves the current clock speeds for the device.
  1995. *
  1996. * For Fermi &tm; or newer fully supported devices.
  1997. *
  1998. * See \ref nvmlClockType_t for details on available clock information.
  1999. *
  2000. * @param device The identifier of the target device
  2001. * @param type Identify which clock domain to query
  2002. * @param clock Reference in which to return the clock speed in MHz
  2003. *
  2004. * @return
  2005. * - \ref NVML_SUCCESS if \a clock has been set
  2006. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2007. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a clock is NULL
  2008. * - \ref NVML_ERROR_NOT_SUPPORTED if the device cannot report the specified clock
  2009. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  2010. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  2011. */
  2012. nvmlReturn_t DECLDIR nvmlDeviceGetClockInfo(nvmlDevice_t device, nvmlClockType_t type, unsigned int *clock);
  2013. /**
  2014. * Retrieves the maximum clock speeds for the device.
  2015. *
  2016. * For Fermi &tm; or newer fully supported devices.
  2017. *
  2018. * See \ref nvmlClockType_t for details on available clock information.
  2019. *
  2020. * \note On GPUs from Fermi family current P0 clocks (reported by \ref nvmlDeviceGetClockInfo) can differ from max clocks
  2021. * by few MHz.
  2022. *
  2023. * @param device The identifier of the target device
  2024. * @param type Identify which clock domain to query
  2025. * @param clock Reference in which to return the clock speed in MHz
  2026. *
  2027. * @return
  2028. * - \ref NVML_SUCCESS if \a clock has been set
  2029. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2030. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a clock is NULL
  2031. * - \ref NVML_ERROR_NOT_SUPPORTED if the device cannot report the specified clock
  2032. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  2033. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  2034. */
  2035. nvmlReturn_t DECLDIR nvmlDeviceGetMaxClockInfo(nvmlDevice_t device, nvmlClockType_t type, unsigned int *clock);
  2036. /**
  2037. * Retrieves the current setting of a clock that applications will use unless an overspec situation occurs.
  2038. * Can be changed using \ref nvmlDeviceSetApplicationsClocks.
  2039. *
  2040. * For Kepler &tm; or newer fully supported devices.
  2041. *
  2042. * @param device The identifier of the target device
  2043. * @param clockType Identify which clock domain to query
  2044. * @param clockMHz Reference in which to return the clock in MHz
  2045. *
  2046. * @return
  2047. * - \ref NVML_SUCCESS if \a clockMHz has been set
  2048. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2049. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a clockMHz is NULL or \a clockType is invalid
  2050. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature
  2051. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  2052. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  2053. */
  2054. nvmlReturn_t DECLDIR nvmlDeviceGetApplicationsClock(nvmlDevice_t device, nvmlClockType_t clockType, unsigned int *clockMHz);
  2055. /**
  2056. * Retrieves the default applications clock that GPU boots with or
  2057. * defaults to after \ref nvmlDeviceResetApplicationsClocks call.
  2058. *
  2059. * For Kepler &tm; or newer fully supported devices.
  2060. *
  2061. * @param device The identifier of the target device
  2062. * @param clockType Identify which clock domain to query
  2063. * @param clockMHz Reference in which to return the default clock in MHz
  2064. *
  2065. * @return
  2066. * - \ref NVML_SUCCESS if \a clockMHz has been set
  2067. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2068. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a clockMHz is NULL or \a clockType is invalid
  2069. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature
  2070. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  2071. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  2072. *
  2073. * \see nvmlDeviceGetApplicationsClock
  2074. */
  2075. nvmlReturn_t DECLDIR nvmlDeviceGetDefaultApplicationsClock(nvmlDevice_t device, nvmlClockType_t clockType, unsigned int *clockMHz);
  2076. /**
  2077. * Resets the application clock to the default value
  2078. *
  2079. * This is the applications clock that will be used after system reboot or driver reload.
  2080. * Default value is constant, but the current value an be changed using \ref nvmlDeviceSetApplicationsClocks.
  2081. *
  2082. * On Pascal and newer hardware, if clocks were previously locked with \ref nvmlDeviceSetApplicationsClocks,
  2083. * this call will unlock clocks. This returns clocks their default behavior ofautomatically boosting above
  2084. * base clocks as thermal limits allow.
  2085. *
  2086. * @see nvmlDeviceGetApplicationsClock
  2087. * @see nvmlDeviceSetApplicationsClocks
  2088. *
  2089. * For Fermi &tm; or newer non-GeForce fully supported devices and Maxwell or newer GeForce devices.
  2090. *
  2091. * @param device The identifier of the target device
  2092. *
  2093. * @return
  2094. * - \ref NVML_SUCCESS if new settings were successfully set
  2095. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2096. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid
  2097. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature
  2098. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  2099. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  2100. */
  2101. nvmlReturn_t DECLDIR nvmlDeviceResetApplicationsClocks(nvmlDevice_t device);
  2102. /**
  2103. * Retrieves the clock speed for the clock specified by the clock type and clock ID.
  2104. *
  2105. * For Kepler &tm; or newer fully supported devices.
  2106. *
  2107. * @param device The identifier of the target device
  2108. * @param clockType Identify which clock domain to query
  2109. * @param clockId Identify which clock in the domain to query
  2110. * @param clockMHz Reference in which to return the clock in MHz
  2111. *
  2112. * @return
  2113. * - \ref NVML_SUCCESS if \a clockMHz has been set
  2114. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2115. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a clockMHz is NULL or \a clockType is invalid
  2116. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature
  2117. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  2118. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  2119. */
  2120. nvmlReturn_t DECLDIR nvmlDeviceGetClock(nvmlDevice_t device, nvmlClockType_t clockType, nvmlClockId_t clockId, unsigned int *clockMHz);
  2121. /**
  2122. * Retrieves the customer defined maximum boost clock speed specified by the given clock type.
  2123. *
  2124. * For newer than Maxwell &tm; fully supported devices.
  2125. *
  2126. * @param device The identifier of the target device
  2127. * @param clockType Identify which clock domain to query
  2128. * @param clockMHz Reference in which to return the clock in MHz
  2129. *
  2130. * @return
  2131. * - \ref NVML_SUCCESS if \a clockMHz has been set
  2132. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2133. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a clockMHz is NULL or \a clockType is invalid
  2134. * - \ref NVML_ERROR_NOT_SUPPORTED if the device or the \a clockType on this device does not support this feature
  2135. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  2136. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  2137. */
  2138. nvmlReturn_t DECLDIR nvmlDeviceGetMaxCustomerBoostClock(nvmlDevice_t device, nvmlClockType_t clockType, unsigned int *clockMHz);
  2139. /**
  2140. * Retrieves the list of possible memory clocks that can be used as an argument for \ref nvmlDeviceSetApplicationsClocks.
  2141. *
  2142. * For Kepler &tm; or newer fully supported devices.
  2143. *
  2144. * @param device The identifier of the target device
  2145. * @param count Reference in which to provide the \a clocksMHz array size, and
  2146. * to return the number of elements
  2147. * @param clocksMHz Reference in which to return the clock in MHz
  2148. *
  2149. * @return
  2150. * - \ref NVML_SUCCESS if \a count and \a clocksMHz have been populated
  2151. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2152. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a count is NULL
  2153. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature
  2154. * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a count is too small (\a count is set to the number of
  2155. * required elements)
  2156. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  2157. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  2158. *
  2159. * @see nvmlDeviceSetApplicationsClocks
  2160. * @see nvmlDeviceGetSupportedGraphicsClocks
  2161. */
  2162. nvmlReturn_t DECLDIR nvmlDeviceGetSupportedMemoryClocks(nvmlDevice_t device, unsigned int *count, unsigned int *clocksMHz);
  2163. /**
  2164. * Retrieves the list of possible graphics clocks that can be used as an argument for \ref nvmlDeviceSetApplicationsClocks.
  2165. *
  2166. * For Kepler &tm; or newer fully supported devices.
  2167. *
  2168. * @param device The identifier of the target device
  2169. * @param memoryClockMHz Memory clock for which to return possible graphics clocks
  2170. * @param count Reference in which to provide the \a clocksMHz array size, and
  2171. * to return the number of elements
  2172. * @param clocksMHz Reference in which to return the clocks in MHz
  2173. *
  2174. * @return
  2175. * - \ref NVML_SUCCESS if \a count and \a clocksMHz have been populated
  2176. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2177. * - \ref NVML_ERROR_NOT_FOUND if the specified \a memoryClockMHz is not a supported frequency
  2178. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a clock is NULL
  2179. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature
  2180. * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a count is too small
  2181. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  2182. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  2183. *
  2184. * @see nvmlDeviceSetApplicationsClocks
  2185. * @see nvmlDeviceGetSupportedMemoryClocks
  2186. */
  2187. nvmlReturn_t DECLDIR nvmlDeviceGetSupportedGraphicsClocks(nvmlDevice_t device, unsigned int memoryClockMHz, unsigned int *count, unsigned int *clocksMHz);
  2188. /**
  2189. * Retrieve the current state of Auto Boosted clocks on a device and store it in \a isEnabled
  2190. *
  2191. * For Kepler &tm; or newer fully supported devices.
  2192. *
  2193. * Auto Boosted clocks are enabled by default on some hardware, allowing the GPU to run at higher clock rates
  2194. * to maximize performance as thermal limits allow.
  2195. *
  2196. * On Pascal and newer hardware, Auto Aoosted clocks are controlled through application clocks.
  2197. * Use \ref nvmlDeviceSetApplicationsClocks and \ref nvmlDeviceResetApplicationsClocks to control Auto Boost
  2198. * behavior.
  2199. *
  2200. * @param device The identifier of the target device
  2201. * @param isEnabled Where to store the current state of Auto Boosted clocks of the target device
  2202. * @param defaultIsEnabled Where to store the default Auto Boosted clocks behavior of the target device that the device will
  2203. * revert to when no applications are using the GPU
  2204. *
  2205. * @return
  2206. * - \ref NVML_SUCCESS If \a isEnabled has been been set with the Auto Boosted clocks state of \a device
  2207. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2208. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a isEnabled is NULL
  2209. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support Auto Boosted clocks
  2210. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  2211. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  2212. *
  2213. */
  2214. nvmlReturn_t DECLDIR nvmlDeviceGetAutoBoostedClocksEnabled(nvmlDevice_t device, nvmlEnableState_t *isEnabled, nvmlEnableState_t *defaultIsEnabled);
  2215. /**
  2216. * Try to set the current state of Auto Boosted clocks on a device.
  2217. *
  2218. * For Kepler &tm; or newer fully supported devices.
  2219. *
  2220. * Auto Boosted clocks are enabled by default on some hardware, allowing the GPU to run at higher clock rates
  2221. * to maximize performance as thermal limits allow. Auto Boosted clocks should be disabled if fixed clock
  2222. * rates are desired.
  2223. *
  2224. * Non-root users may use this API by default but can be restricted by root from using this API by calling
  2225. * \ref nvmlDeviceSetAPIRestriction with apiType=NVML_RESTRICTED_API_SET_AUTO_BOOSTED_CLOCKS.
  2226. * Note: Persistence Mode is required to modify current Auto Boost settings, therefore, it must be enabled.
  2227. *
  2228. * On Pascal and newer hardware, Auto Boosted clocks are controlled through application clocks.
  2229. * Use \ref nvmlDeviceSetApplicationsClocks and \ref nvmlDeviceResetApplicationsClocks to control Auto Boost
  2230. * behavior.
  2231. *
  2232. * @param device The identifier of the target device
  2233. * @param enabled What state to try to set Auto Boosted clocks of the target device to
  2234. *
  2235. * @return
  2236. * - \ref NVML_SUCCESS If the Auto Boosted clocks were successfully set to the state specified by \a enabled
  2237. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2238. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid
  2239. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support Auto Boosted clocks
  2240. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  2241. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  2242. *
  2243. */
  2244. nvmlReturn_t DECLDIR nvmlDeviceSetAutoBoostedClocksEnabled(nvmlDevice_t device, nvmlEnableState_t enabled);
  2245. /**
  2246. * Try to set the default state of Auto Boosted clocks on a device. This is the default state that Auto Boosted clocks will
  2247. * return to when no compute running processes (e.g. CUDA application which have an active context) are running
  2248. *
  2249. * For Kepler &tm; or newer non-GeForce fully supported devices and Maxwell or newer GeForce devices.
  2250. * Requires root/admin permissions.
  2251. *
  2252. * Auto Boosted clocks are enabled by default on some hardware, allowing the GPU to run at higher clock rates
  2253. * to maximize performance as thermal limits allow. Auto Boosted clocks should be disabled if fixed clock
  2254. * rates are desired.
  2255. *
  2256. * On Pascal and newer hardware, Auto Boosted clocks are controlled through application clocks.
  2257. * Use \ref nvmlDeviceSetApplicationsClocks and \ref nvmlDeviceResetApplicationsClocks to control Auto Boost
  2258. * behavior.
  2259. *
  2260. * @param device The identifier of the target device
  2261. * @param enabled What state to try to set default Auto Boosted clocks of the target device to
  2262. * @param flags Flags that change the default behavior. Currently Unused.
  2263. *
  2264. * @return
  2265. * - \ref NVML_SUCCESS If the Auto Boosted clock's default state was successfully set to the state specified by \a enabled
  2266. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2267. * - \ref NVML_ERROR_NO_PERMISSION If the calling user does not have permission to change Auto Boosted clock's default state.
  2268. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid
  2269. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support Auto Boosted clocks
  2270. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  2271. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  2272. *
  2273. */
  2274. nvmlReturn_t DECLDIR nvmlDeviceSetDefaultAutoBoostedClocksEnabled(nvmlDevice_t device, nvmlEnableState_t enabled, unsigned int flags);
  2275. /**
  2276. * Retrieves the intended operating speed of the device's fan.
  2277. *
  2278. * Note: The reported speed is the intended fan speed. If the fan is physically blocked and unable to spin, the
  2279. * output will not match the actual fan speed.
  2280. *
  2281. * For all discrete products with dedicated fans.
  2282. *
  2283. * The fan speed is expressed as a percent of the maximum, i.e. full speed is 100%.
  2284. *
  2285. * @param device The identifier of the target device
  2286. * @param speed Reference in which to return the fan speed percentage
  2287. *
  2288. * @return
  2289. * - \ref NVML_SUCCESS if \a speed has been set
  2290. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2291. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a speed is NULL
  2292. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not have a fan
  2293. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  2294. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  2295. */
  2296. nvmlReturn_t DECLDIR nvmlDeviceGetFanSpeed(nvmlDevice_t device, unsigned int *speed);
  2297. /**
  2298. * Retrieves the current temperature readings for the device, in degrees C.
  2299. *
  2300. * For all products.
  2301. *
  2302. * See \ref nvmlTemperatureSensors_t for details on available temperature sensors.
  2303. *
  2304. * @param device The identifier of the target device
  2305. * @param sensorType Flag that indicates which sensor reading to retrieve
  2306. * @param temp Reference in which to return the temperature reading
  2307. *
  2308. * @return
  2309. * - \ref NVML_SUCCESS if \a temp has been set
  2310. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2311. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, \a sensorType is invalid or \a temp is NULL
  2312. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not have the specified sensor
  2313. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  2314. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  2315. */
  2316. nvmlReturn_t DECLDIR nvmlDeviceGetTemperature(nvmlDevice_t device, nvmlTemperatureSensors_t sensorType, unsigned int *temp);
  2317. /**
  2318. * Retrieves the temperature threshold for the GPU with the specified threshold type in degrees C.
  2319. *
  2320. * For Kepler &tm; or newer fully supported devices.
  2321. *
  2322. * See \ref nvmlTemperatureThresholds_t for details on available temperature thresholds.
  2323. *
  2324. * @param device The identifier of the target device
  2325. * @param thresholdType The type of threshold value queried
  2326. * @param temp Reference in which to return the temperature reading
  2327. * @return
  2328. * - \ref NVML_SUCCESS if \a temp has been set
  2329. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2330. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, \a thresholdType is invalid or \a temp is NULL
  2331. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not have a temperature sensor or is unsupported
  2332. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  2333. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  2334. */
  2335. nvmlReturn_t DECLDIR nvmlDeviceGetTemperatureThreshold(nvmlDevice_t device, nvmlTemperatureThresholds_t thresholdType, unsigned int *temp);
  2336. /**
  2337. * Retrieves the current performance state for the device.
  2338. *
  2339. * For Fermi &tm; or newer fully supported devices.
  2340. *
  2341. * See \ref nvmlPstates_t for details on allowed performance states.
  2342. *
  2343. * @param device The identifier of the target device
  2344. * @param pState Reference in which to return the performance state reading
  2345. *
  2346. * @return
  2347. * - \ref NVML_SUCCESS if \a pState has been set
  2348. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2349. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a pState is NULL
  2350. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature
  2351. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  2352. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  2353. */
  2354. nvmlReturn_t DECLDIR nvmlDeviceGetPerformanceState(nvmlDevice_t device, nvmlPstates_t *pState);
  2355. /**
  2356. * Retrieves current clocks throttling reasons.
  2357. *
  2358. * For all fully supported products.
  2359. *
  2360. * \note More than one bit can be enabled at the same time. Multiple reasons can be affecting clocks at once.
  2361. *
  2362. * @param device The identifier of the target device
  2363. * @param clocksThrottleReasons Reference in which to return bitmask of active clocks throttle
  2364. * reasons
  2365. *
  2366. * @return
  2367. * - \ref NVML_SUCCESS if \a clocksThrottleReasons has been set
  2368. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2369. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a clocksThrottleReasons is NULL
  2370. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature
  2371. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  2372. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  2373. *
  2374. * @see nvmlClocksThrottleReasons
  2375. * @see nvmlDeviceGetSupportedClocksThrottleReasons
  2376. */
  2377. nvmlReturn_t DECLDIR nvmlDeviceGetCurrentClocksThrottleReasons(nvmlDevice_t device, unsigned long long *clocksThrottleReasons);
  2378. /**
  2379. * Retrieves bitmask of supported clocks throttle reasons that can be returned by
  2380. * \ref nvmlDeviceGetCurrentClocksThrottleReasons
  2381. *
  2382. * For all fully supported products.
  2383. *
  2384. * This method is not supported on virtualized GPU environments.
  2385. *
  2386. * @param device The identifier of the target device
  2387. * @param supportedClocksThrottleReasons Reference in which to return bitmask of supported
  2388. * clocks throttle reasons
  2389. *
  2390. * @return
  2391. * - \ref NVML_SUCCESS if \a supportedClocksThrottleReasons has been set
  2392. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2393. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a supportedClocksThrottleReasons is NULL
  2394. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  2395. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  2396. *
  2397. * @see nvmlClocksThrottleReasons
  2398. * @see nvmlDeviceGetCurrentClocksThrottleReasons
  2399. */
  2400. nvmlReturn_t DECLDIR nvmlDeviceGetSupportedClocksThrottleReasons(nvmlDevice_t device, unsigned long long *supportedClocksThrottleReasons);
  2401. /**
  2402. * Deprecated: Use \ref nvmlDeviceGetPerformanceState. This function exposes an incorrect generalization.
  2403. *
  2404. * Retrieve the current performance state for the device.
  2405. *
  2406. * For Fermi &tm; or newer fully supported devices.
  2407. *
  2408. * See \ref nvmlPstates_t for details on allowed performance states.
  2409. *
  2410. * @param device The identifier of the target device
  2411. * @param pState Reference in which to return the performance state reading
  2412. *
  2413. * @return
  2414. * - \ref NVML_SUCCESS if \a pState has been set
  2415. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2416. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a pState is NULL
  2417. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature
  2418. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  2419. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  2420. */
  2421. nvmlReturn_t DECLDIR nvmlDeviceGetPowerState(nvmlDevice_t device, nvmlPstates_t *pState);
  2422. /**
  2423. * This API has been deprecated.
  2424. *
  2425. * Retrieves the power management mode associated with this device.
  2426. *
  2427. * For products from the Fermi family.
  2428. * - Requires \a NVML_INFOROM_POWER version 3.0 or higher.
  2429. *
  2430. * For from the Kepler or newer families.
  2431. * - Does not require \a NVML_INFOROM_POWER object.
  2432. *
  2433. * This flag indicates whether any power management algorithm is currently active on the device. An
  2434. * enabled state does not necessarily mean the device is being actively throttled -- only that
  2435. * that the driver will do so if the appropriate conditions are met.
  2436. *
  2437. * See \ref nvmlEnableState_t for details on allowed modes.
  2438. *
  2439. * @param device The identifier of the target device
  2440. * @param mode Reference in which to return the current power management mode
  2441. *
  2442. * @return
  2443. * - \ref NVML_SUCCESS if \a mode has been set
  2444. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2445. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a mode is NULL
  2446. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature
  2447. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  2448. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  2449. */
  2450. nvmlReturn_t DECLDIR nvmlDeviceGetPowerManagementMode(nvmlDevice_t device, nvmlEnableState_t *mode);
  2451. /**
  2452. * Retrieves the power management limit associated with this device.
  2453. *
  2454. * For Fermi &tm; or newer fully supported devices.
  2455. *
  2456. * The power limit defines the upper boundary for the card's power draw. If
  2457. * the card's total power draw reaches this limit the power management algorithm kicks in.
  2458. *
  2459. * This reading is only available if power management mode is supported.
  2460. * See \ref nvmlDeviceGetPowerManagementMode.
  2461. *
  2462. * @param device The identifier of the target device
  2463. * @param limit Reference in which to return the power management limit in milliwatts
  2464. *
  2465. * @return
  2466. * - \ref NVML_SUCCESS if \a limit has been set
  2467. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2468. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a limit is NULL
  2469. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature
  2470. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  2471. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  2472. */
  2473. nvmlReturn_t DECLDIR nvmlDeviceGetPowerManagementLimit(nvmlDevice_t device, unsigned int *limit);
  2474. /**
  2475. * Retrieves information about possible values of power management limits on this device.
  2476. *
  2477. * For Kepler &tm; or newer fully supported devices.
  2478. *
  2479. * @param device The identifier of the target device
  2480. * @param minLimit Reference in which to return the minimum power management limit in milliwatts
  2481. * @param maxLimit Reference in which to return the maximum power management limit in milliwatts
  2482. *
  2483. * @return
  2484. * - \ref NVML_SUCCESS if \a minLimit and \a maxLimit have been set
  2485. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2486. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a minLimit or \a maxLimit is NULL
  2487. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature
  2488. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  2489. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  2490. *
  2491. * @see nvmlDeviceSetPowerManagementLimit
  2492. */
  2493. nvmlReturn_t DECLDIR nvmlDeviceGetPowerManagementLimitConstraints(nvmlDevice_t device, unsigned int *minLimit, unsigned int *maxLimit);
  2494. /**
  2495. * Retrieves default power management limit on this device, in milliwatts.
  2496. * Default power management limit is a power management limit that the device boots with.
  2497. *
  2498. * For Kepler &tm; or newer fully supported devices.
  2499. *
  2500. * @param device The identifier of the target device
  2501. * @param defaultLimit Reference in which to return the default power management limit in milliwatts
  2502. *
  2503. * @return
  2504. * - \ref NVML_SUCCESS if \a defaultLimit has been set
  2505. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2506. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a defaultLimit is NULL
  2507. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature
  2508. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  2509. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  2510. */
  2511. nvmlReturn_t DECLDIR nvmlDeviceGetPowerManagementDefaultLimit(nvmlDevice_t device, unsigned int *defaultLimit);
  2512. /**
  2513. * Retrieves power usage for this GPU in milliwatts and its associated circuitry (e.g. memory)
  2514. *
  2515. * For Fermi &tm; or newer fully supported devices.
  2516. *
  2517. * On Fermi and Kepler GPUs the reading is accurate to within +/- 5% of current power draw.
  2518. *
  2519. * It is only available if power management mode is supported. See \ref nvmlDeviceGetPowerManagementMode.
  2520. *
  2521. * @param device The identifier of the target device
  2522. * @param power Reference in which to return the power usage information
  2523. *
  2524. * @return
  2525. * - \ref NVML_SUCCESS if \a power has been populated
  2526. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2527. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a power is NULL
  2528. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support power readings
  2529. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  2530. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  2531. */
  2532. nvmlReturn_t DECLDIR nvmlDeviceGetPowerUsage(nvmlDevice_t device, unsigned int *power);
  2533. /**
  2534. * Get the effective power limit that the driver enforces after taking into account all limiters
  2535. *
  2536. * Note: This can be different from the \ref nvmlDeviceGetPowerManagementLimit if other limits are set elsewhere
  2537. * This includes the out of band power limit interface
  2538. *
  2539. * For Kepler &tm; or newer fully supported devices.
  2540. *
  2541. * @param device The device to communicate with
  2542. * @param limit Reference in which to return the power management limit in milliwatts
  2543. *
  2544. * @return
  2545. * - \ref NVML_SUCCESS if \a limit has been set
  2546. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2547. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a limit is NULL
  2548. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature
  2549. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  2550. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  2551. */
  2552. nvmlReturn_t DECLDIR nvmlDeviceGetEnforcedPowerLimit(nvmlDevice_t device, unsigned int *limit);
  2553. /**
  2554. * Retrieves the current GOM and pending GOM (the one that GPU will switch to after reboot).
  2555. *
  2556. * For GK110 M-class and X-class Tesla &tm; products from the Kepler family.
  2557. * Modes \ref NVML_GOM_LOW_DP and \ref NVML_GOM_ALL_ON are supported on fully supported GeForce products.
  2558. * Not supported on Quadro &reg; and Tesla &tm; C-class products.
  2559. *
  2560. * @param device The identifier of the target device
  2561. * @param current Reference in which to return the current GOM
  2562. * @param pending Reference in which to return the pending GOM
  2563. *
  2564. * @return
  2565. * - \ref NVML_SUCCESS if \a mode has been populated
  2566. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2567. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a current or \a pending is NULL
  2568. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature
  2569. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  2570. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  2571. *
  2572. * @see nvmlGpuOperationMode_t
  2573. * @see nvmlDeviceSetGpuOperationMode
  2574. */
  2575. nvmlReturn_t DECLDIR nvmlDeviceGetGpuOperationMode(nvmlDevice_t device, nvmlGpuOperationMode_t *current, nvmlGpuOperationMode_t *pending);
  2576. /**
  2577. * Retrieves the amount of used, free and total memory available on the device, in bytes.
  2578. *
  2579. * For all products.
  2580. *
  2581. * Enabling ECC reduces the amount of total available memory, due to the extra required parity bits.
  2582. * Under WDDM most device memory is allocated and managed on startup by Windows.
  2583. *
  2584. * Under Linux and Windows TCC, the reported amount of used memory is equal to the sum of memory allocated
  2585. * by all active channels on the device.
  2586. *
  2587. * See \ref nvmlMemory_t for details on available memory info.
  2588. *
  2589. * @param device The identifier of the target device
  2590. * @param memory Reference in which to return the memory information
  2591. *
  2592. * @return
  2593. * - \ref NVML_SUCCESS if \a memory has been populated
  2594. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2595. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a memory is NULL
  2596. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  2597. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  2598. */
  2599. nvmlReturn_t DECLDIR nvmlDeviceGetMemoryInfo(nvmlDevice_t device, nvmlMemory_t *memory);
  2600. /**
  2601. * Retrieves the current compute mode for the device.
  2602. *
  2603. * For all products.
  2604. *
  2605. * See \ref nvmlComputeMode_t for details on allowed compute modes.
  2606. *
  2607. * @param device The identifier of the target device
  2608. * @param mode Reference in which to return the current compute mode
  2609. *
  2610. * @return
  2611. * - \ref NVML_SUCCESS if \a mode has been set
  2612. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2613. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a mode is NULL
  2614. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature
  2615. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  2616. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  2617. *
  2618. * @see nvmlDeviceSetComputeMode()
  2619. */
  2620. nvmlReturn_t DECLDIR nvmlDeviceGetComputeMode(nvmlDevice_t device, nvmlComputeMode_t *mode);
  2621. /**
  2622. * Retrieves the current and pending ECC modes for the device.
  2623. *
  2624. * For Fermi &tm; or newer fully supported devices.
  2625. * Only applicable to devices with ECC.
  2626. * Requires \a NVML_INFOROM_ECC version 1.0 or higher.
  2627. *
  2628. * Changing ECC modes requires a reboot. The "pending" ECC mode refers to the target mode following
  2629. * the next reboot.
  2630. *
  2631. * See \ref nvmlEnableState_t for details on allowed modes.
  2632. *
  2633. * @param device The identifier of the target device
  2634. * @param current Reference in which to return the current ECC mode
  2635. * @param pending Reference in which to return the pending ECC mode
  2636. *
  2637. * @return
  2638. * - \ref NVML_SUCCESS if \a current and \a pending have been set
  2639. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2640. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or either \a current or \a pending is NULL
  2641. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature
  2642. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  2643. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  2644. *
  2645. * @see nvmlDeviceSetEccMode()
  2646. */
  2647. nvmlReturn_t DECLDIR nvmlDeviceGetEccMode(nvmlDevice_t device, nvmlEnableState_t *current, nvmlEnableState_t *pending);
  2648. /**
  2649. * Retrieves the device boardId from 0-N.
  2650. * Devices with the same boardId indicate GPUs connected to the same PLX. Use in conjunction with
  2651. * \ref nvmlDeviceGetMultiGpuBoard() to decide if they are on the same board as well.
  2652. * The boardId returned is a unique ID for the current configuration. Uniqueness and ordering across
  2653. * reboots and system configurations is not guaranteed (i.e. if a Tesla K40c returns 0x100 and
  2654. * the two GPUs on a Tesla K10 in the same system returns 0x200 it is not guaranteed they will
  2655. * always return those values but they will always be different from each other).
  2656. *
  2657. *
  2658. * For Fermi &tm; or newer fully supported devices.
  2659. *
  2660. * @param device The identifier of the target device
  2661. * @param boardId Reference in which to return the device's board ID
  2662. *
  2663. * @return
  2664. * - \ref NVML_SUCCESS if \a boardId has been set
  2665. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2666. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a boardId is NULL
  2667. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature
  2668. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  2669. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  2670. */
  2671. nvmlReturn_t DECLDIR nvmlDeviceGetBoardId(nvmlDevice_t device, unsigned int *boardId);
  2672. /**
  2673. * Retrieves whether the device is on a Multi-GPU Board
  2674. * Devices that are on multi-GPU boards will set \a multiGpuBool to a non-zero value.
  2675. *
  2676. * For Fermi &tm; or newer fully supported devices.
  2677. *
  2678. * @param device The identifier of the target device
  2679. * @param multiGpuBool Reference in which to return a zero or non-zero value
  2680. * to indicate whether the device is on a multi GPU board
  2681. *
  2682. * @return
  2683. * - \ref NVML_SUCCESS if \a multiGpuBool has been set
  2684. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2685. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a multiGpuBool is NULL
  2686. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature
  2687. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  2688. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  2689. */
  2690. nvmlReturn_t DECLDIR nvmlDeviceGetMultiGpuBoard(nvmlDevice_t device, unsigned int *multiGpuBool);
  2691. /**
  2692. * Retrieves the total ECC error counts for the device.
  2693. *
  2694. * For Fermi &tm; or newer fully supported devices.
  2695. * Only applicable to devices with ECC.
  2696. * Requires \a NVML_INFOROM_ECC version 1.0 or higher.
  2697. * Requires ECC Mode to be enabled.
  2698. *
  2699. * The total error count is the sum of errors across each of the separate memory systems, i.e. the total set of
  2700. * errors across the entire device.
  2701. *
  2702. * See \ref nvmlMemoryErrorType_t for a description of available error types.\n
  2703. * See \ref nvmlEccCounterType_t for a description of available counter types.
  2704. *
  2705. * @param device The identifier of the target device
  2706. * @param errorType Flag that specifies the type of the errors.
  2707. * @param counterType Flag that specifies the counter-type of the errors.
  2708. * @param eccCounts Reference in which to return the specified ECC errors
  2709. *
  2710. * @return
  2711. * - \ref NVML_SUCCESS if \a eccCounts has been set
  2712. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2713. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device, \a errorType or \a counterType is invalid, or \a eccCounts is NULL
  2714. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature
  2715. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  2716. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  2717. *
  2718. * @see nvmlDeviceClearEccErrorCounts()
  2719. */
  2720. nvmlReturn_t DECLDIR nvmlDeviceGetTotalEccErrors(nvmlDevice_t device, nvmlMemoryErrorType_t errorType, nvmlEccCounterType_t counterType, unsigned long long *eccCounts);
  2721. /**
  2722. * Retrieves the detailed ECC error counts for the device.
  2723. *
  2724. * @deprecated This API supports only a fixed set of ECC error locations
  2725. * On different GPU architectures different locations are supported
  2726. * See \ref nvmlDeviceGetMemoryErrorCounter
  2727. *
  2728. * For Fermi &tm; or newer fully supported devices.
  2729. * Only applicable to devices with ECC.
  2730. * Requires \a NVML_INFOROM_ECC version 2.0 or higher to report aggregate location-based ECC counts.
  2731. * Requires \a NVML_INFOROM_ECC version 1.0 or higher to report all other ECC counts.
  2732. * Requires ECC Mode to be enabled.
  2733. *
  2734. * Detailed errors provide separate ECC counts for specific parts of the memory system.
  2735. *
  2736. * Reports zero for unsupported ECC error counters when a subset of ECC error counters are supported.
  2737. *
  2738. * See \ref nvmlMemoryErrorType_t for a description of available bit types.\n
  2739. * See \ref nvmlEccCounterType_t for a description of available counter types.\n
  2740. * See \ref nvmlEccErrorCounts_t for a description of provided detailed ECC counts.
  2741. *
  2742. * @param device The identifier of the target device
  2743. * @param errorType Flag that specifies the type of the errors.
  2744. * @param counterType Flag that specifies the counter-type of the errors.
  2745. * @param eccCounts Reference in which to return the specified ECC errors
  2746. *
  2747. * @return
  2748. * - \ref NVML_SUCCESS if \a eccCounts has been populated
  2749. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2750. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device, \a errorType or \a counterType is invalid, or \a eccCounts is NULL
  2751. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature
  2752. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  2753. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  2754. *
  2755. * @see nvmlDeviceClearEccErrorCounts()
  2756. */
  2757. nvmlReturn_t DECLDIR nvmlDeviceGetDetailedEccErrors(nvmlDevice_t device, nvmlMemoryErrorType_t errorType, nvmlEccCounterType_t counterType, nvmlEccErrorCounts_t *eccCounts);
  2758. /**
  2759. * Retrieves the requested memory error counter for the device.
  2760. *
  2761. * For Fermi &tm; or newer fully supported devices.
  2762. * Requires \a NVML_INFOROM_ECC version 2.0 or higher to report aggregate location-based memory error counts.
  2763. * Requires \a NVML_INFOROM_ECC version 1.0 or higher to report all other memory error counts.
  2764. *
  2765. * Only applicable to devices with ECC.
  2766. *
  2767. * Requires ECC Mode to be enabled.
  2768. *
  2769. * See \ref nvmlMemoryErrorType_t for a description of available memory error types.\n
  2770. * See \ref nvmlEccCounterType_t for a description of available counter types.\n
  2771. * See \ref nvmlMemoryLocation_t for a description of available counter locations.\n
  2772. *
  2773. * @param device The identifier of the target device
  2774. * @param errorType Flag that specifies the type of error.
  2775. * @param counterType Flag that specifies the counter-type of the errors.
  2776. * @param locationType Specifies the location of the counter.
  2777. * @param count Reference in which to return the ECC counter
  2778. *
  2779. * @return
  2780. * - \ref NVML_SUCCESS if \a count has been populated
  2781. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2782. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device, \a bitTyp,e \a counterType or \a locationType is
  2783. * invalid, or \a count is NULL
  2784. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support ECC error reporting in the specified memory
  2785. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  2786. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  2787. */
  2788. nvmlReturn_t DECLDIR nvmlDeviceGetMemoryErrorCounter(nvmlDevice_t device, nvmlMemoryErrorType_t errorType,
  2789. nvmlEccCounterType_t counterType,
  2790. nvmlMemoryLocation_t locationType, unsigned long long *count);
  2791. /**
  2792. * Retrieves the current utilization rates for the device's major subsystems.
  2793. *
  2794. * For Fermi &tm; or newer fully supported devices.
  2795. *
  2796. * See \ref nvmlUtilization_t for details on available utilization rates.
  2797. *
  2798. * \note During driver initialization when ECC is enabled one can see high GPU and Memory Utilization readings.
  2799. * This is caused by ECC Memory Scrubbing mechanism that is performed during driver initialization.
  2800. *
  2801. * @param device The identifier of the target device
  2802. * @param utilization Reference in which to return the utilization information
  2803. *
  2804. * @return
  2805. * - \ref NVML_SUCCESS if \a utilization has been populated
  2806. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2807. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a utilization is NULL
  2808. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature
  2809. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  2810. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  2811. */
  2812. nvmlReturn_t DECLDIR nvmlDeviceGetUtilizationRates(nvmlDevice_t device, nvmlUtilization_t *utilization);
  2813. /**
  2814. * Retrieves the current utilization and sampling size in microseconds for the Encoder
  2815. *
  2816. * For Kepler &tm; or newer fully supported devices.
  2817. *
  2818. * @param device The identifier of the target device
  2819. * @param utilization Reference to an unsigned int for encoder utilization info
  2820. * @param samplingPeriodUs Reference to an unsigned int for the sampling period in US
  2821. *
  2822. * @return
  2823. * - \ref NVML_SUCCESS if \a utilization has been populated
  2824. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2825. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, \a utilization is NULL, or \a samplingPeriodUs is NULL
  2826. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature
  2827. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  2828. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  2829. */
  2830. nvmlReturn_t DECLDIR nvmlDeviceGetEncoderUtilization(nvmlDevice_t device, unsigned int *utilization, unsigned int *samplingPeriodUs);
  2831. /**
  2832. * Retrieves the current utilization and sampling size in microseconds for the Decoder
  2833. *
  2834. * For Kepler &tm; or newer fully supported devices.
  2835. *
  2836. * @param device The identifier of the target device
  2837. * @param utilization Reference to an unsigned int for decoder utilization info
  2838. * @param samplingPeriodUs Reference to an unsigned int for the sampling period in US
  2839. *
  2840. * @return
  2841. * - \ref NVML_SUCCESS if \a utilization has been populated
  2842. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2843. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, \a utilization is NULL, or \a samplingPeriodUs is NULL
  2844. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature
  2845. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  2846. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  2847. */
  2848. nvmlReturn_t DECLDIR nvmlDeviceGetDecoderUtilization(nvmlDevice_t device, unsigned int *utilization, unsigned int *samplingPeriodUs);
  2849. /**
  2850. * Retrieves the current and pending driver model for the device.
  2851. *
  2852. * For Fermi &tm; or newer fully supported devices.
  2853. * For windows only.
  2854. *
  2855. * On Windows platforms the device driver can run in either WDDM or WDM (TCC) mode. If a display is attached
  2856. * to the device it must run in WDDM mode. TCC mode is preferred if a display is not attached.
  2857. *
  2858. * See \ref nvmlDriverModel_t for details on available driver models.
  2859. *
  2860. * @param device The identifier of the target device
  2861. * @param current Reference in which to return the current driver model
  2862. * @param pending Reference in which to return the pending driver model
  2863. *
  2864. * @return
  2865. * - \ref NVML_SUCCESS if either \a current and/or \a pending have been set
  2866. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2867. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or both \a current and \a pending are NULL
  2868. * - \ref NVML_ERROR_NOT_SUPPORTED if the platform is not windows
  2869. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  2870. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  2871. *
  2872. * @see nvmlDeviceSetDriverModel()
  2873. */
  2874. nvmlReturn_t DECLDIR nvmlDeviceGetDriverModel(nvmlDevice_t device, nvmlDriverModel_t *current, nvmlDriverModel_t *pending);
  2875. /**
  2876. * Get VBIOS version of the device.
  2877. *
  2878. * For all products.
  2879. *
  2880. * The VBIOS version may change from time to time. It will not exceed 32 characters in length
  2881. * (including the NULL terminator). See \ref nvmlConstants::NVML_DEVICE_VBIOS_VERSION_BUFFER_SIZE.
  2882. *
  2883. * @param device The identifier of the target device
  2884. * @param version Reference to which to return the VBIOS version
  2885. * @param length The maximum allowed length of the string returned in \a version
  2886. *
  2887. * @return
  2888. * - \ref NVML_SUCCESS if \a version has been set
  2889. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2890. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, or \a version is NULL
  2891. * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a length is too small
  2892. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  2893. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  2894. */
  2895. nvmlReturn_t DECLDIR nvmlDeviceGetVbiosVersion(nvmlDevice_t device, char *version, unsigned int length);
  2896. /**
  2897. * Get Bridge Chip Information for all the bridge chips on the board.
  2898. *
  2899. * For all fully supported products.
  2900. * Only applicable to multi-GPU products.
  2901. *
  2902. * @param device The identifier of the target device
  2903. * @param bridgeHierarchy Reference to the returned bridge chip Hierarchy
  2904. *
  2905. * @return
  2906. * - \ref NVML_SUCCESS if bridge chip exists
  2907. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2908. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, or \a bridgeInfo is NULL
  2909. * - \ref NVML_ERROR_NOT_SUPPORTED if bridge chip not supported on the device
  2910. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  2911. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  2912. *
  2913. */
  2914. nvmlReturn_t DECLDIR nvmlDeviceGetBridgeChipInfo(nvmlDevice_t device, nvmlBridgeChipHierarchy_t *bridgeHierarchy);
  2915. /**
  2916. * Get information about processes with a compute context on a device
  2917. *
  2918. * For Fermi &tm; or newer fully supported devices.
  2919. *
  2920. * This function returns information only about compute running processes (e.g. CUDA application which have
  2921. * active context). Any graphics applications (e.g. using OpenGL, DirectX) won't be listed by this function.
  2922. *
  2923. * To query the current number of running compute processes, call this function with *infoCount = 0. The
  2924. * return code will be NVML_ERROR_INSUFFICIENT_SIZE, or NVML_SUCCESS if none are running. For this call
  2925. * \a infos is allowed to be NULL.
  2926. *
  2927. * The usedGpuMemory field returned is all of the memory used by the application.
  2928. *
  2929. * Keep in mind that information returned by this call is dynamic and the number of elements might change in
  2930. * time. Allocate more space for \a infos table in case new compute processes are spawned.
  2931. *
  2932. * @param device The identifier of the target device
  2933. * @param infoCount Reference in which to provide the \a infos array size, and
  2934. * to return the number of returned elements
  2935. * @param infos Reference in which to return the process information
  2936. *
  2937. * @return
  2938. * - \ref NVML_SUCCESS if \a infoCount and \a infos have been populated
  2939. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2940. * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a infoCount indicates that the \a infos array is too small
  2941. * \a infoCount will contain minimal amount of space necessary for
  2942. * the call to complete
  2943. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, either of \a infoCount or \a infos is NULL
  2944. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  2945. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  2946. *
  2947. * @see \ref nvmlSystemGetProcessName
  2948. */
  2949. nvmlReturn_t DECLDIR nvmlDeviceGetComputeRunningProcesses(nvmlDevice_t device, unsigned int *infoCount, nvmlProcessInfo_t *infos);
  2950. /**
  2951. * Get information about processes with a graphics context on a device
  2952. *
  2953. * For Kepler &tm; or newer fully supported devices.
  2954. *
  2955. * This function returns information only about graphics based processes
  2956. * (eg. applications using OpenGL, DirectX)
  2957. *
  2958. * To query the current number of running graphics processes, call this function with *infoCount = 0. The
  2959. * return code will be NVML_ERROR_INSUFFICIENT_SIZE, or NVML_SUCCESS if none are running. For this call
  2960. * \a infos is allowed to be NULL.
  2961. *
  2962. * The usedGpuMemory field returned is all of the memory used by the application.
  2963. *
  2964. * Keep in mind that information returned by this call is dynamic and the number of elements might change in
  2965. * time. Allocate more space for \a infos table in case new graphics processes are spawned.
  2966. *
  2967. * @param device The identifier of the target device
  2968. * @param infoCount Reference in which to provide the \a infos array size, and
  2969. * to return the number of returned elements
  2970. * @param infos Reference in which to return the process information
  2971. *
  2972. * @return
  2973. * - \ref NVML_SUCCESS if \a infoCount and \a infos have been populated
  2974. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2975. * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a infoCount indicates that the \a infos array is too small
  2976. * \a infoCount will contain minimal amount of space necessary for
  2977. * the call to complete
  2978. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, either of \a infoCount or \a infos is NULL
  2979. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  2980. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  2981. *
  2982. * @see \ref nvmlSystemGetProcessName
  2983. */
  2984. nvmlReturn_t DECLDIR nvmlDeviceGetGraphicsRunningProcesses(nvmlDevice_t device, unsigned int *infoCount, nvmlProcessInfo_t *infos);
  2985. /**
  2986. * Check if the GPU devices are on the same physical board.
  2987. *
  2988. * For all fully supported products.
  2989. *
  2990. * @param device1 The first GPU device
  2991. * @param device2 The second GPU device
  2992. * @param onSameBoard Reference in which to return the status.
  2993. * Non-zero indicates that the GPUs are on the same board.
  2994. *
  2995. * @return
  2996. * - \ref NVML_SUCCESS if \a onSameBoard has been set
  2997. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  2998. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a dev1 or \a dev2 are invalid or \a onSameBoard is NULL
  2999. * - \ref NVML_ERROR_NOT_SUPPORTED if this check is not supported by the device
  3000. * - \ref NVML_ERROR_GPU_IS_LOST if the either GPU has fallen off the bus or is otherwise inaccessible
  3001. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  3002. */
  3003. nvmlReturn_t DECLDIR nvmlDeviceOnSameBoard(nvmlDevice_t device1, nvmlDevice_t device2, int *onSameBoard);
  3004. /**
  3005. * Retrieves the root/admin permissions on the target API. See \a nvmlRestrictedAPI_t for the list of supported APIs.
  3006. * If an API is restricted only root users can call that API. See \a nvmlDeviceSetAPIRestriction to change current permissions.
  3007. *
  3008. * For all fully supported products.
  3009. *
  3010. * @param device The identifier of the target device
  3011. * @param apiType Target API type for this operation
  3012. * @param isRestricted Reference in which to return the current restriction
  3013. * NVML_FEATURE_ENABLED indicates that the API is root-only
  3014. * NVML_FEATURE_DISABLED indicates that the API is accessible to all users
  3015. *
  3016. * @return
  3017. * - \ref NVML_SUCCESS if \a isRestricted has been set
  3018. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  3019. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, \a apiType incorrect or \a isRestricted is NULL
  3020. * - \ref NVML_ERROR_NOT_SUPPORTED if this query is not supported by the device or the device does not support
  3021. * the feature that is being queried (E.G. Enabling/disabling Auto Boosted clocks is
  3022. * not supported by the device)
  3023. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  3024. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  3025. *
  3026. * @see nvmlRestrictedAPI_t
  3027. */
  3028. nvmlReturn_t DECLDIR nvmlDeviceGetAPIRestriction(nvmlDevice_t device, nvmlRestrictedAPI_t apiType, nvmlEnableState_t *isRestricted);
  3029. /**
  3030. * Gets recent samples for the GPU.
  3031. *
  3032. * For Kepler &tm; or newer fully supported devices.
  3033. *
  3034. * Based on type, this method can be used to fetch the power, utilization or clock samples maintained in the buffer by
  3035. * the driver.
  3036. *
  3037. * Power, Utilization and Clock samples are returned as type "unsigned int" for the union nvmlValue_t.
  3038. *
  3039. * To get the size of samples that user needs to allocate, the method is invoked with samples set to NULL.
  3040. * The returned samplesCount will provide the number of samples that can be queried. The user needs to
  3041. * allocate the buffer with size as samplesCount * sizeof(nvmlSample_t).
  3042. *
  3043. * lastSeenTimeStamp represents CPU timestamp in microseconds. Set it to 0 to fetch all the samples maintained by the
  3044. * underlying buffer. Set lastSeenTimeStamp to one of the timeStamps retrieved from the date of the previous query
  3045. * to get more recent samples.
  3046. *
  3047. * This method fetches the number of entries which can be accommodated in the provided samples array, and the
  3048. * reference samplesCount is updated to indicate how many samples were actually retrieved. The advantage of using this
  3049. * method for samples in contrast to polling via existing methods is to get get higher frequency data at lower polling cost.
  3050. *
  3051. * @param device The identifier for the target device
  3052. * @param type Type of sampling event
  3053. * @param lastSeenTimeStamp Return only samples with timestamp greater than lastSeenTimeStamp.
  3054. * @param sampleValType Output parameter to represent the type of sample value as described in nvmlSampleVal_t
  3055. * @param sampleCount Reference to provide the number of elements which can be queried in samples array
  3056. * @param samples Reference in which samples are returned
  3057. * @return
  3058. * - \ref NVML_SUCCESS if samples are successfully retrieved
  3059. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  3060. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, \a samplesCount is NULL or
  3061. * reference to \a sampleCount is 0 for non null \a samples
  3062. * - \ref NVML_ERROR_NOT_SUPPORTED if this query is not supported by the device
  3063. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  3064. * - \ref NVML_ERROR_NOT_FOUND if sample entries are not found
  3065. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  3066. */
  3067. nvmlReturn_t DECLDIR nvmlDeviceGetSamples(nvmlDevice_t device, nvmlSamplingType_t type, unsigned long long lastSeenTimeStamp,
  3068. nvmlValueType_t *sampleValType, unsigned int *sampleCount, nvmlSample_t *samples);
  3069. /**
  3070. * Gets Total, Available and Used size of BAR1 memory.
  3071. *
  3072. * BAR1 is used to map the FB (device memory) so that it can be directly accessed by the CPU or by 3rd party
  3073. * devices (peer-to-peer on the PCIE bus).
  3074. *
  3075. * For Kepler &tm; or newer fully supported devices.
  3076. *
  3077. * @param device The identifier of the target device
  3078. * @param bar1Memory Reference in which BAR1 memory
  3079. * information is returned.
  3080. *
  3081. * @return
  3082. * - \ref NVML_SUCCESS if BAR1 memory is successfully retrieved
  3083. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  3084. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, \a bar1Memory is NULL
  3085. * - \ref NVML_ERROR_NOT_SUPPORTED if this query is not supported by the device
  3086. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  3087. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  3088. *
  3089. */
  3090. nvmlReturn_t DECLDIR nvmlDeviceGetBAR1MemoryInfo(nvmlDevice_t device, nvmlBAR1Memory_t *bar1Memory);
  3091. /**
  3092. * Gets the duration of time during which the device was throttled (lower than requested clocks) due to power
  3093. * or thermal constraints.
  3094. *
  3095. * The method is important to users who are tying to understand if their GPUs throttle at any point during their applications. The
  3096. * difference in violation times at two different reference times gives the indication of GPU throttling event.
  3097. *
  3098. * Violation for thermal capping is not supported at this time.
  3099. *
  3100. * For Kepler &tm; or newer fully supported devices.
  3101. *
  3102. * @param device The identifier of the target device
  3103. * @param perfPolicyType Represents Performance policy which can trigger GPU throttling
  3104. * @param violTime Reference to which violation time related information is returned
  3105. *
  3106. *
  3107. * @return
  3108. * - \ref NVML_SUCCESS if violation time is successfully retrieved
  3109. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  3110. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, \a perfPolicyType is invalid, or \a violTime is NULL
  3111. * - \ref NVML_ERROR_NOT_SUPPORTED if this query is not supported by the device
  3112. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  3113. *
  3114. */
  3115. nvmlReturn_t DECLDIR nvmlDeviceGetViolationStatus(nvmlDevice_t device, nvmlPerfPolicyType_t perfPolicyType, nvmlViolationTime_t *violTime);
  3116. /**
  3117. * @}
  3118. */
  3119. /** @addtogroup nvmlAccountingStats
  3120. * @{
  3121. */
  3122. /**
  3123. * Queries the state of per process accounting mode.
  3124. *
  3125. * For Kepler &tm; or newer fully supported devices.
  3126. *
  3127. * See \ref nvmlDeviceGetAccountingStats for more details.
  3128. * See \ref nvmlDeviceSetAccountingMode
  3129. *
  3130. * @param device The identifier of the target device
  3131. * @param mode Reference in which to return the current accounting mode
  3132. *
  3133. * @return
  3134. * - \ref NVML_SUCCESS if the mode has been successfully retrieved
  3135. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  3136. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a mode are NULL
  3137. * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature
  3138. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  3139. */
  3140. nvmlReturn_t DECLDIR nvmlDeviceGetAccountingMode(nvmlDevice_t device, nvmlEnableState_t *mode);
  3141. /**
  3142. * Queries process's accounting stats.
  3143. *
  3144. * For Kepler &tm; or newer fully supported devices.
  3145. *
  3146. * Accounting stats capture GPU utilization and other statistics across the lifetime of a process.
  3147. * Accounting stats can be queried during life time of the process and after its termination.
  3148. * The time field in \ref nvmlAccountingStats_t is reported as 0 during the lifetime of the process and
  3149. * updated to actual running time after its termination.
  3150. * Accounting stats are kept in a circular buffer, newly created processes overwrite information about old
  3151. * processes.
  3152. *
  3153. * See \ref nvmlAccountingStats_t for description of each returned metric.
  3154. * List of processes that can be queried can be retrieved from \ref nvmlDeviceGetAccountingPids.
  3155. *
  3156. * @note Accounting Mode needs to be on. See \ref nvmlDeviceGetAccountingMode.
  3157. * @note Only compute and graphics applications stats can be queried. Monitoring applications stats can't be
  3158. * queried since they don't contribute to GPU utilization.
  3159. * @note In case of pid collision stats of only the latest process (that terminated last) will be reported
  3160. *
  3161. * @warning On Kepler devices per process statistics are accurate only if there's one process running on a GPU.
  3162. *
  3163. * @param device The identifier of the target device
  3164. * @param pid Process Id of the target process to query stats for
  3165. * @param stats Reference in which to return the process's accounting stats
  3166. *
  3167. * @return
  3168. * - \ref NVML_SUCCESS if stats have been successfully retrieved
  3169. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  3170. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a stats are NULL
  3171. * - \ref NVML_ERROR_NOT_FOUND if process stats were not found
  3172. * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature or accounting mode is disabled
  3173. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  3174. *
  3175. * @see nvmlDeviceGetAccountingBufferSize
  3176. */
  3177. nvmlReturn_t DECLDIR nvmlDeviceGetAccountingStats(nvmlDevice_t device, unsigned int pid, nvmlAccountingStats_t *stats);
  3178. /**
  3179. * Queries list of processes that can be queried for accounting stats. The list of processes returned
  3180. * can be in running or terminated state.
  3181. *
  3182. * For Kepler &tm; or newer fully supported devices.
  3183. *
  3184. * To just query the number of processes ready to be queried, call this function with *count = 0 and
  3185. * pids=NULL. The return code will be NVML_ERROR_INSUFFICIENT_SIZE, or NVML_SUCCESS if list is empty.
  3186. *
  3187. * For more details see \ref nvmlDeviceGetAccountingStats.
  3188. *
  3189. * @note In case of PID collision some processes might not be accessible before the circular buffer is full.
  3190. *
  3191. * @param device The identifier of the target device
  3192. * @param count Reference in which to provide the \a pids array size, and
  3193. * to return the number of elements ready to be queried
  3194. * @param pids Reference in which to return list of process ids
  3195. *
  3196. * @return
  3197. * - \ref NVML_SUCCESS if pids were successfully retrieved
  3198. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  3199. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a count is NULL
  3200. * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature or accounting mode is disabled
  3201. * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a count is too small (\a count is set to
  3202. * expected value)
  3203. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  3204. *
  3205. * @see nvmlDeviceGetAccountingBufferSize
  3206. */
  3207. nvmlReturn_t DECLDIR nvmlDeviceGetAccountingPids(nvmlDevice_t device, unsigned int *count, unsigned int *pids);
  3208. /**
  3209. * Returns the number of processes that the circular buffer with accounting pids can hold.
  3210. *
  3211. * For Kepler &tm; or newer fully supported devices.
  3212. *
  3213. * This is the maximum number of processes that accounting information will be stored for before information
  3214. * about oldest processes will get overwritten by information about new processes.
  3215. *
  3216. * @param device The identifier of the target device
  3217. * @param bufferSize Reference in which to provide the size (in number of elements)
  3218. * of the circular buffer for accounting stats.
  3219. *
  3220. * @return
  3221. * - \ref NVML_SUCCESS if buffer size was successfully retrieved
  3222. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  3223. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a bufferSize is NULL
  3224. * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature or accounting mode is disabled
  3225. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  3226. *
  3227. * @see nvmlDeviceGetAccountingStats
  3228. * @see nvmlDeviceGetAccountingPids
  3229. */
  3230. nvmlReturn_t DECLDIR nvmlDeviceGetAccountingBufferSize(nvmlDevice_t device, unsigned int *bufferSize);
  3231. /** @} */
  3232. /** @addtogroup nvmlDeviceQueries
  3233. * @{
  3234. */
  3235. /**
  3236. * Returns the list of retired pages by source, including pages that are pending retirement
  3237. * The address information provided from this API is the hardware address of the page that was retired. Note
  3238. * that this does not match the virtual address used in CUDA, but will match the address information in XID 63
  3239. *
  3240. * For Kepler &tm; or newer fully supported devices.
  3241. *
  3242. * @param device The identifier of the target device
  3243. * @param cause Filter page addresses by cause of retirement
  3244. * @param pageCount Reference in which to provide the \a addresses buffer size, and
  3245. * to return the number of retired pages that match \a cause
  3246. * Set to 0 to query the size without allocating an \a addresses buffer
  3247. * @param addresses Buffer to write the page addresses into
  3248. *
  3249. * @return
  3250. * - \ref NVML_SUCCESS if \a pageCount was populated and \a addresses was filled
  3251. * - \ref NVML_ERROR_INSUFFICIENT_SIZE if \a pageCount indicates the buffer is not large enough to store all the
  3252. * matching page addresses. \a pageCount is set to the needed size.
  3253. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  3254. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid, \a pageCount is NULL, \a cause is invalid, or
  3255. * \a addresses is NULL
  3256. * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature
  3257. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  3258. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  3259. */
  3260. nvmlReturn_t DECLDIR nvmlDeviceGetRetiredPages(nvmlDevice_t device, nvmlPageRetirementCause_t cause,
  3261. unsigned int *pageCount, unsigned long long *addresses);
  3262. /**
  3263. * Check if any pages are pending retirement and need a reboot to fully retire.
  3264. *
  3265. * For Kepler &tm; or newer fully supported devices.
  3266. *
  3267. * @param device The identifier of the target device
  3268. * @param isPending Reference in which to return the pending status
  3269. *
  3270. * @return
  3271. * - \ref NVML_SUCCESS if \a isPending was populated
  3272. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  3273. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a isPending is NULL
  3274. * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature
  3275. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  3276. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  3277. */
  3278. nvmlReturn_t DECLDIR nvmlDeviceGetRetiredPagesPendingStatus(nvmlDevice_t device, nvmlEnableState_t *isPending);
  3279. /** @} */
  3280. /***************************************************************************************************/
  3281. /** @defgroup nvmlUnitCommands Unit Commands
  3282. * This chapter describes NVML operations that change the state of the unit. For S-class products.
  3283. * Each of these requires root/admin access. Non-admin users will see an NVML_ERROR_NO_PERMISSION
  3284. * error code when invoking any of these methods.
  3285. * @{
  3286. */
  3287. /***************************************************************************************************/
  3288. /**
  3289. * Set the LED state for the unit. The LED can be either green (0) or amber (1).
  3290. *
  3291. * For S-class products.
  3292. * Requires root/admin permissions.
  3293. *
  3294. * This operation takes effect immediately.
  3295. *
  3296. *
  3297. * <b>Current S-Class products don't provide unique LEDs for each unit. As such, both front
  3298. * and back LEDs will be toggled in unison regardless of which unit is specified with this command.</b>
  3299. *
  3300. * See \ref nvmlLedColor_t for available colors.
  3301. *
  3302. * @param unit The identifier of the target unit
  3303. * @param color The target LED color
  3304. *
  3305. * @return
  3306. * - \ref NVML_SUCCESS if the LED color has been set
  3307. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  3308. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a unit or \a color is invalid
  3309. * - \ref NVML_ERROR_NOT_SUPPORTED if this is not an S-class product
  3310. * - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to perform this operation
  3311. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  3312. *
  3313. * @see nvmlUnitGetLedState()
  3314. */
  3315. nvmlReturn_t DECLDIR nvmlUnitSetLedState(nvmlUnit_t unit, nvmlLedColor_t color);
  3316. /** @} */
  3317. /***************************************************************************************************/
  3318. /** @defgroup nvmlDeviceCommands Device Commands
  3319. * This chapter describes NVML operations that change the state of the device.
  3320. * Each of these requires root/admin access. Non-admin users will see an NVML_ERROR_NO_PERMISSION
  3321. * error code when invoking any of these methods.
  3322. * @{
  3323. */
  3324. /***************************************************************************************************/
  3325. /**
  3326. * Set the persistence mode for the device.
  3327. *
  3328. * For all products.
  3329. * For Linux only.
  3330. * Requires root/admin permissions.
  3331. *
  3332. * The persistence mode determines whether the GPU driver software is torn down after the last client
  3333. * exits.
  3334. *
  3335. * This operation takes effect immediately. It is not persistent across reboots. After each reboot the
  3336. * persistence mode is reset to "Disabled".
  3337. *
  3338. * See \ref nvmlEnableState_t for available modes.
  3339. *
  3340. * @param device The identifier of the target device
  3341. * @param mode The target persistence mode
  3342. *
  3343. * @return
  3344. * - \ref NVML_SUCCESS if the persistence mode was set
  3345. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  3346. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a mode is invalid
  3347. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature
  3348. * - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to perform this operation
  3349. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  3350. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  3351. *
  3352. * @see nvmlDeviceGetPersistenceMode()
  3353. */
  3354. nvmlReturn_t DECLDIR nvmlDeviceSetPersistenceMode(nvmlDevice_t device, nvmlEnableState_t mode);
  3355. /**
  3356. * Set the compute mode for the device.
  3357. *
  3358. * For all products.
  3359. * Requires root/admin permissions.
  3360. *
  3361. * The compute mode determines whether a GPU can be used for compute operations and whether it can
  3362. * be shared across contexts.
  3363. *
  3364. * This operation takes effect immediately. Under Linux it is not persistent across reboots and
  3365. * always resets to "Default". Under windows it is persistent.
  3366. *
  3367. * Under windows compute mode may only be set to DEFAULT when running in WDDM
  3368. *
  3369. * See \ref nvmlComputeMode_t for details on available compute modes.
  3370. *
  3371. * @param device The identifier of the target device
  3372. * @param mode The target compute mode
  3373. *
  3374. * @return
  3375. * - \ref NVML_SUCCESS if the compute mode was set
  3376. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  3377. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a mode is invalid
  3378. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature
  3379. * - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to perform this operation
  3380. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  3381. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  3382. *
  3383. * @see nvmlDeviceGetComputeMode()
  3384. */
  3385. nvmlReturn_t DECLDIR nvmlDeviceSetComputeMode(nvmlDevice_t device, nvmlComputeMode_t mode);
  3386. /**
  3387. * Set the ECC mode for the device.
  3388. *
  3389. * For Kepler &tm; or newer fully supported devices.
  3390. * Only applicable to devices with ECC.
  3391. * Requires \a NVML_INFOROM_ECC version 1.0 or higher.
  3392. * Requires root/admin permissions.
  3393. *
  3394. * The ECC mode determines whether the GPU enables its ECC support.
  3395. *
  3396. * This operation takes effect after the next reboot.
  3397. *
  3398. * See \ref nvmlEnableState_t for details on available modes.
  3399. *
  3400. * @param device The identifier of the target device
  3401. * @param ecc The target ECC mode
  3402. *
  3403. * @return
  3404. * - \ref NVML_SUCCESS if the ECC mode was set
  3405. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  3406. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a ecc is invalid
  3407. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature
  3408. * - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to perform this operation
  3409. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  3410. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  3411. *
  3412. * @see nvmlDeviceGetEccMode()
  3413. */
  3414. nvmlReturn_t DECLDIR nvmlDeviceSetEccMode(nvmlDevice_t device, nvmlEnableState_t ecc);
  3415. /**
  3416. * Clear the ECC error and other memory error counts for the device.
  3417. *
  3418. * For Kepler &tm; or newer fully supported devices.
  3419. * Only applicable to devices with ECC.
  3420. * Requires \a NVML_INFOROM_ECC version 2.0 or higher to clear aggregate location-based ECC counts.
  3421. * Requires \a NVML_INFOROM_ECC version 1.0 or higher to clear all other ECC counts.
  3422. * Requires root/admin permissions.
  3423. * Requires ECC Mode to be enabled.
  3424. *
  3425. * Sets all of the specified ECC counters to 0, including both detailed and total counts.
  3426. *
  3427. * This operation takes effect immediately.
  3428. *
  3429. * See \ref nvmlMemoryErrorType_t for details on available counter types.
  3430. *
  3431. * @param device The identifier of the target device
  3432. * @param counterType Flag that indicates which type of errors should be cleared.
  3433. *
  3434. * @return
  3435. * - \ref NVML_SUCCESS if the error counts were cleared
  3436. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  3437. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a counterType is invalid
  3438. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature
  3439. * - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to perform this operation
  3440. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  3441. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  3442. *
  3443. * @see
  3444. * - nvmlDeviceGetDetailedEccErrors()
  3445. * - nvmlDeviceGetTotalEccErrors()
  3446. */
  3447. nvmlReturn_t DECLDIR nvmlDeviceClearEccErrorCounts(nvmlDevice_t device, nvmlEccCounterType_t counterType);
  3448. /**
  3449. * Set the driver model for the device.
  3450. *
  3451. * For Fermi &tm; or newer fully supported devices.
  3452. * For windows only.
  3453. * Requires root/admin permissions.
  3454. *
  3455. * On Windows platforms the device driver can run in either WDDM or WDM (TCC) mode. If a display is attached
  3456. * to the device it must run in WDDM mode.
  3457. *
  3458. * It is possible to force the change to WDM (TCC) while the display is still attached with a force flag (nvmlFlagForce).
  3459. * This should only be done if the host is subsequently powered down and the display is detached from the device
  3460. * before the next reboot.
  3461. *
  3462. * This operation takes effect after the next reboot.
  3463. *
  3464. * Windows driver model may only be set to WDDM when running in DEFAULT compute mode.
  3465. *
  3466. * Change driver model to WDDM is not supported when GPU doesn't support graphics acceleration or
  3467. * will not support it after reboot. See \ref nvmlDeviceSetGpuOperationMode.
  3468. *
  3469. * See \ref nvmlDriverModel_t for details on available driver models.
  3470. * See \ref nvmlFlagDefault and \ref nvmlFlagForce
  3471. *
  3472. * @param device The identifier of the target device
  3473. * @param driverModel The target driver model
  3474. * @param flags Flags that change the default behavior
  3475. *
  3476. * @return
  3477. * - \ref NVML_SUCCESS if the driver model has been set
  3478. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  3479. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a driverModel is invalid
  3480. * - \ref NVML_ERROR_NOT_SUPPORTED if the platform is not windows or the device does not support this feature
  3481. * - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to perform this operation
  3482. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  3483. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  3484. *
  3485. * @see nvmlDeviceGetDriverModel()
  3486. */
  3487. nvmlReturn_t DECLDIR nvmlDeviceSetDriverModel(nvmlDevice_t device, nvmlDriverModel_t driverModel, unsigned int flags);
  3488. /**
  3489. * Set clocks that applications will lock to.
  3490. *
  3491. * Sets the clocks that compute and graphics applications will be running at.
  3492. * e.g. CUDA driver requests these clocks during context creation which means this property
  3493. * defines clocks at which CUDA applications will be running unless some overspec event
  3494. * occurs (e.g. over power, over thermal or external HW brake).
  3495. *
  3496. * Can be used as a setting to request constant performance.
  3497. *
  3498. * On Pascal and newer hardware, this will automatically disable automatic boosting of clocks.
  3499. *
  3500. * On K80 and newer Kepler and Maxwell GPUs, users desiring fixed performance should also call
  3501. * \ref nvmlDeviceSetAutoBoostedClocksEnabled to prevent clocks from automatically boosting
  3502. * above the clock value being set.
  3503. *
  3504. * For Kepler &tm; or newer non-GeForce fully supported devices and Maxwell or newer GeForce devices.
  3505. * Requires root/admin permissions.
  3506. *
  3507. * See \ref nvmlDeviceGetSupportedMemoryClocks and \ref nvmlDeviceGetSupportedGraphicsClocks
  3508. * for details on how to list available clocks combinations.
  3509. *
  3510. * After system reboot or driver reload applications clocks go back to their default value.
  3511. * See \ref nvmlDeviceResetApplicationsClocks.
  3512. *
  3513. * @param device The identifier of the target device
  3514. * @param memClockMHz Requested memory clock in MHz
  3515. * @param graphicsClockMHz Requested graphics clock in MHz
  3516. *
  3517. * @return
  3518. * - \ref NVML_SUCCESS if new settings were successfully set
  3519. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  3520. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a memClockMHz and \a graphicsClockMHz
  3521. * is not a valid clock combination
  3522. * - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to perform this operation
  3523. * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature
  3524. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  3525. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  3526. */
  3527. nvmlReturn_t DECLDIR nvmlDeviceSetApplicationsClocks(nvmlDevice_t device, unsigned int memClockMHz, unsigned int graphicsClockMHz);
  3528. /**
  3529. * Set new power limit of this device.
  3530. *
  3531. * For Kepler &tm; or newer fully supported devices.
  3532. * Requires root/admin permissions.
  3533. *
  3534. * See \ref nvmlDeviceGetPowerManagementLimitConstraints to check the allowed ranges of values.
  3535. *
  3536. * \note Limit is not persistent across reboots or driver unloads.
  3537. * Enable persistent mode to prevent driver from unloading when no application is using the device.
  3538. *
  3539. * @param device The identifier of the target device
  3540. * @param limit Power management limit in milliwatts to set
  3541. *
  3542. * @return
  3543. * - \ref NVML_SUCCESS if \a limit has been set
  3544. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  3545. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a defaultLimit is out of range
  3546. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support this feature
  3547. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  3548. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  3549. *
  3550. * @see nvmlDeviceGetPowerManagementLimitConstraints
  3551. * @see nvmlDeviceGetPowerManagementDefaultLimit
  3552. */
  3553. nvmlReturn_t DECLDIR nvmlDeviceSetPowerManagementLimit(nvmlDevice_t device, unsigned int limit);
  3554. /**
  3555. * Sets new GOM. See \a nvmlGpuOperationMode_t for details.
  3556. *
  3557. * For GK110 M-class and X-class Tesla &tm; products from the Kepler family.
  3558. * Modes \ref NVML_GOM_LOW_DP and \ref NVML_GOM_ALL_ON are supported on fully supported GeForce products.
  3559. * Not supported on Quadro &reg; and Tesla &tm; C-class products.
  3560. * Requires root/admin permissions.
  3561. *
  3562. * Changing GOMs requires a reboot.
  3563. * The reboot requirement might be removed in the future.
  3564. *
  3565. * Compute only GOMs don't support graphics acceleration. Under windows switching to these GOMs when
  3566. * pending driver model is WDDM is not supported. See \ref nvmlDeviceSetDriverModel.
  3567. *
  3568. * @param device The identifier of the target device
  3569. * @param mode Target GOM
  3570. *
  3571. * @return
  3572. * - \ref NVML_SUCCESS if \a mode has been set
  3573. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  3574. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a mode incorrect
  3575. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support GOM or specific mode
  3576. * - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to perform this operation
  3577. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  3578. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  3579. *
  3580. * @see nvmlGpuOperationMode_t
  3581. * @see nvmlDeviceGetGpuOperationMode
  3582. */
  3583. nvmlReturn_t DECLDIR nvmlDeviceSetGpuOperationMode(nvmlDevice_t device, nvmlGpuOperationMode_t mode);
  3584. /**
  3585. * Changes the root/admin restructions on certain APIs. See \a nvmlRestrictedAPI_t for the list of supported APIs.
  3586. * This method can be used by a root/admin user to give non-root/admin access to certain otherwise-restricted APIs.
  3587. * The new setting lasts for the lifetime of the NVIDIA driver; it is not persistent. See \a nvmlDeviceGetAPIRestriction
  3588. * to query the current restriction settings.
  3589. *
  3590. * For Kepler &tm; or newer fully supported devices.
  3591. * Requires root/admin permissions.
  3592. *
  3593. * @param device The identifier of the target device
  3594. * @param apiType Target API type for this operation
  3595. * @param isRestricted The target restriction
  3596. *
  3597. * @return
  3598. * - \ref NVML_SUCCESS if \a isRestricted has been set
  3599. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  3600. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device is invalid or \a apiType incorrect
  3601. * - \ref NVML_ERROR_NOT_SUPPORTED if the device does not support changing API restrictions or the device does not support
  3602. * the feature that api restrictions are being set for (E.G. Enabling/disabling auto
  3603. * boosted clocks is not supported by the device)
  3604. * - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to perform this operation
  3605. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  3606. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  3607. *
  3608. * @see nvmlRestrictedAPI_t
  3609. */
  3610. nvmlReturn_t DECLDIR nvmlDeviceSetAPIRestriction(nvmlDevice_t device, nvmlRestrictedAPI_t apiType, nvmlEnableState_t isRestricted);
  3611. /**
  3612. * @}
  3613. */
  3614. /** @addtogroup nvmlAccountingStats
  3615. * @{
  3616. */
  3617. /**
  3618. * Enables or disables per process accounting.
  3619. *
  3620. * For Kepler &tm; or newer fully supported devices.
  3621. * Requires root/admin permissions.
  3622. *
  3623. * @note This setting is not persistent and will default to disabled after driver unloads.
  3624. * Enable persistence mode to be sure the setting doesn't switch off to disabled.
  3625. *
  3626. * @note Enabling accounting mode has no negative impact on the GPU performance.
  3627. *
  3628. * @note Disabling accounting clears all accounting pids information.
  3629. *
  3630. * See \ref nvmlDeviceGetAccountingMode
  3631. * See \ref nvmlDeviceGetAccountingStats
  3632. * See \ref nvmlDeviceClearAccountingPids
  3633. *
  3634. * @param device The identifier of the target device
  3635. * @param mode The target accounting mode
  3636. *
  3637. * @return
  3638. * - \ref NVML_SUCCESS if the new mode has been set
  3639. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  3640. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device or \a mode are invalid
  3641. * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature
  3642. * - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to perform this operation
  3643. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  3644. */
  3645. nvmlReturn_t DECLDIR nvmlDeviceSetAccountingMode(nvmlDevice_t device, nvmlEnableState_t mode);
  3646. /**
  3647. * Clears accounting information about all processes that have already terminated.
  3648. *
  3649. * For Kepler &tm; or newer fully supported devices.
  3650. * Requires root/admin permissions.
  3651. *
  3652. * See \ref nvmlDeviceGetAccountingMode
  3653. * See \ref nvmlDeviceGetAccountingStats
  3654. * See \ref nvmlDeviceSetAccountingMode
  3655. *
  3656. * @param device The identifier of the target device
  3657. *
  3658. * @return
  3659. * - \ref NVML_SUCCESS if accounting information has been cleared
  3660. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  3661. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device are invalid
  3662. * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature
  3663. * - \ref NVML_ERROR_NO_PERMISSION if the user doesn't have permission to perform this operation
  3664. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  3665. */
  3666. nvmlReturn_t DECLDIR nvmlDeviceClearAccountingPids(nvmlDevice_t device);
  3667. /** @} */
  3668. /***************************************************************************************************/
  3669. /** @defgroup NvLink NvLink Methods
  3670. * This chapter describes methods that NVML can perform on NVLINK enabled devices.
  3671. * @{
  3672. */
  3673. /***************************************************************************************************/
  3674. /**
  3675. * Retrieves the state of the device's NvLink for the link specified
  3676. *
  3677. * For newer than Maxwell &tm; fully supported devices.
  3678. *
  3679. * @param device The identifier of the target device
  3680. * @param link Specifies the NvLink link to be queried
  3681. * @param isActive \a nvmlEnableState_t where NVML_FEATURE_ENABLED indicates that
  3682. * the link is active and NVML_FEATURE_DISABLED indicates it
  3683. * is inactive
  3684. *
  3685. * @return
  3686. * - \ref NVML_SUCCESS if \a isActive has been set
  3687. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  3688. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device or \a link is invalid or \a isActive is NULL
  3689. * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature
  3690. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  3691. */
  3692. nvmlReturn_t DECLDIR nvmlDeviceGetNvLinkState(nvmlDevice_t device, unsigned int link, nvmlEnableState_t *isActive);
  3693. /**
  3694. * Retrieves the version of the device's NvLink for the link specified
  3695. *
  3696. * For newer than Maxwell &tm; fully supported devices.
  3697. *
  3698. * @param device The identifier of the target device
  3699. * @param link Specifies the NvLink link to be queried
  3700. * @param version Requested NvLink version
  3701. *
  3702. * @return
  3703. * - \ref NVML_SUCCESS if \a version has been set
  3704. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  3705. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device or \a link is invalid or \a version is NULL
  3706. * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature
  3707. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  3708. */
  3709. nvmlReturn_t DECLDIR nvmlDeviceGetNvLinkVersion(nvmlDevice_t device, unsigned int link, unsigned int *version);
  3710. /**
  3711. * Retrieves the requested capability from the device's NvLink for the link specified
  3712. * Please refer to the \a nvmlNvLinkCapability_t structure for the specific caps that can be queried
  3713. * The return value should be treated as a boolean.
  3714. *
  3715. * For newer than Maxwell &tm; fully supported devices.
  3716. *
  3717. * @param device The identifier of the target device
  3718. * @param link Specifies the NvLink link to be queried
  3719. * @param capability Specifies the \a nvmlNvLinkCapability_t to be queried
  3720. * @param capResult A boolean for the queried capability indicating that feature is available
  3721. *
  3722. * @return
  3723. * - \ref NVML_SUCCESS if \a capResult has been set
  3724. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  3725. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device, \a link, or \a capability is invalid or \a capResult is NULL
  3726. * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature
  3727. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  3728. */
  3729. nvmlReturn_t DECLDIR nvmlDeviceGetNvLinkCapability(nvmlDevice_t device, unsigned int link,
  3730. nvmlNvLinkCapability_t capability, unsigned int *capResult);
  3731. /**
  3732. * Retrieves the PCI information for the remote node on a NvLink link
  3733. * Note: pciSubSystemId is not filled in this function and is indeterminate
  3734. *
  3735. * For newer than Maxwell &tm; fully supported devices.
  3736. *
  3737. * @param device The identifier of the target device
  3738. * @param link Specifies the NvLink link to be queried
  3739. * @param pci \a nvmlPciInfo_t of the remote node for the specified link
  3740. *
  3741. * @return
  3742. * - \ref NVML_SUCCESS if \a pci has been set
  3743. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  3744. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device or \a link is invalid or \a pci is NULL
  3745. * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature
  3746. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  3747. */
  3748. nvmlReturn_t DECLDIR nvmlDeviceGetNvLinkRemotePciInfo(nvmlDevice_t device, unsigned int link, nvmlPciInfo_t *pci);
  3749. /**
  3750. * Retrieves the specified error counter value
  3751. * Please refer to \a nvmlNvLinkErrorCounter_t for error counters that are available
  3752. *
  3753. * For newer than Maxwell &tm; fully supported devices.
  3754. *
  3755. * @param device The identifier of the target device
  3756. * @param link Specifies the NvLink link to be queried
  3757. * @param counter Specifies the NvLink counter to be queried
  3758. * @param counterValue Returned counter value
  3759. *
  3760. * @return
  3761. * - \ref NVML_SUCCESS if \a counter has been set
  3762. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  3763. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device, \a link, or \a counter is invalid or \a counterValue is NULL
  3764. * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature
  3765. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  3766. */
  3767. nvmlReturn_t DECLDIR nvmlDeviceGetNvLinkErrorCounter(nvmlDevice_t device, unsigned int link,
  3768. nvmlNvLinkErrorCounter_t counter, unsigned long long *counterValue);
  3769. /**
  3770. * Resets all error counters to zero
  3771. * Please refer to \a nvmlNvLinkErrorCounter_t for the list of error counters that are reset
  3772. *
  3773. * For newer than Maxwell &tm; fully supported devices.
  3774. *
  3775. * @param device The identifier of the target device
  3776. * @param link Specifies the NvLink link to be queried
  3777. *
  3778. * @return
  3779. * - \ref NVML_SUCCESS if the reset is successful
  3780. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  3781. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device or \a link is invalid
  3782. * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature
  3783. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  3784. */
  3785. nvmlReturn_t DECLDIR nvmlDeviceResetNvLinkErrorCounters(nvmlDevice_t device, unsigned int link);
  3786. /**
  3787. * Set the NVLINK utilization counter control information for the specified counter, 0 or 1.
  3788. * Please refer to \a nvmlNvLinkUtilizationControl_t for the structure definition. Performs a reset
  3789. * of the counters if the reset parameter is non-zero.
  3790. *
  3791. * For newer than Maxwell &tm; fully supported devices.
  3792. *
  3793. * @param device The identifier of the target device
  3794. * @param counter Specifies the counter that should be set (0 or 1).
  3795. * @param link Specifies the NvLink link to be queried
  3796. * @param control A reference to the \a nvmlNvLinkUtilizationControl_t to set
  3797. * @param reset Resets the counters on set if non-zero
  3798. *
  3799. * @return
  3800. * - \ref NVML_SUCCESS if the control has been set successfully
  3801. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  3802. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device, \a counter, \a link, or \a control is invalid
  3803. * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature
  3804. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  3805. */
  3806. nvmlReturn_t DECLDIR nvmlDeviceSetNvLinkUtilizationControl(nvmlDevice_t device, unsigned int link, unsigned int counter,
  3807. nvmlNvLinkUtilizationControl_t *control, unsigned int reset);
  3808. /**
  3809. * Get the NVLINK utilization counter control information for the specified counter, 0 or 1.
  3810. * Please refer to \a nvmlNvLinkUtilizationControl_t for the structure definition
  3811. *
  3812. * For newer than Maxwell &tm; fully supported devices.
  3813. *
  3814. * @param device The identifier of the target device
  3815. * @param counter Specifies the counter that should be set (0 or 1).
  3816. * @param link Specifies the NvLink link to be queried
  3817. * @param control A reference to the \a nvmlNvLinkUtilizationControl_t to place information
  3818. *
  3819. * @return
  3820. * - \ref NVML_SUCCESS if the control has been set successfully
  3821. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  3822. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device, \a counter, \a link, or \a control is invalid
  3823. * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature
  3824. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  3825. */
  3826. nvmlReturn_t DECLDIR nvmlDeviceGetNvLinkUtilizationControl(nvmlDevice_t device, unsigned int link, unsigned int counter,
  3827. nvmlNvLinkUtilizationControl_t *control);
  3828. /**
  3829. * Retrieve the NVLINK utilization counter based on the current control for a specified counter.
  3830. * In general it is good practice to use \a nvmlDeviceSetNvLinkUtilizationControl
  3831. * before reading the utilization counters as they have no default state
  3832. *
  3833. * For newer than Maxwell &tm; fully supported devices.
  3834. *
  3835. * @param device The identifier of the target device
  3836. * @param link Specifies the NvLink link to be queried
  3837. * @param counter Specifies the counter that should be read (0 or 1).
  3838. * @param rxcounter Receive counter return value
  3839. * @param txcounter Transmit counter return value
  3840. *
  3841. * @return
  3842. * - \ref NVML_SUCCESS if \a rxcounter and \a txcounter have been successfully set
  3843. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  3844. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device, \a counter, or \a link is invalid or \a rxcounter or \a txcounter are NULL
  3845. * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature
  3846. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  3847. */
  3848. nvmlReturn_t DECLDIR nvmlDeviceGetNvLinkUtilizationCounter(nvmlDevice_t device, unsigned int link, unsigned int counter,
  3849. unsigned long long *rxcounter, unsigned long long *txcounter);
  3850. /**
  3851. * Freeze the NVLINK utilization counters
  3852. * Both the receive and transmit counters are operated on by this function
  3853. *
  3854. * For newer than Maxwell &tm; fully supported devices.
  3855. *
  3856. * @param device The identifier of the target device
  3857. * @param link Specifies the NvLink link to be queried
  3858. * @param counter Specifies the counter that should be frozen (0 or 1).
  3859. * @param freeze NVML_FEATURE_ENABLED = freeze the receive and transmit counters
  3860. * NVML_FEATURE_DISABLED = unfreeze the receive and transmit counters
  3861. *
  3862. * @return
  3863. * - \ref NVML_SUCCESS if counters were successfully frozen or unfrozen
  3864. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  3865. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device, \a link, \a counter, or \a freeze is invalid
  3866. * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature
  3867. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  3868. */
  3869. nvmlReturn_t DECLDIR nvmlDeviceFreezeNvLinkUtilizationCounter (nvmlDevice_t device, unsigned int link,
  3870. unsigned int counter, nvmlEnableState_t freeze);
  3871. /**
  3872. * Reset the NVLINK utilization counters
  3873. * Both the receive and transmit counters are operated on by this function
  3874. *
  3875. * For newer than Maxwell &tm; fully supported devices.
  3876. *
  3877. * @param device The identifier of the target device
  3878. * @param link Specifies the NvLink link to be reset
  3879. * @param counter Specifies the counter that should be reset (0 or 1)
  3880. *
  3881. * @return
  3882. * - \ref NVML_SUCCESS if counters were successfully reset
  3883. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  3884. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a device, \a link, or \a counter is invalid
  3885. * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature
  3886. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  3887. */
  3888. nvmlReturn_t DECLDIR nvmlDeviceResetNvLinkUtilizationCounter (nvmlDevice_t device, unsigned int link, unsigned int counter);
  3889. /** @} */
  3890. /***************************************************************************************************/
  3891. /** @defgroup nvmlEvents Event Handling Methods
  3892. * This chapter describes methods that NVML can perform against each device to register and wait for
  3893. * some event to occur.
  3894. * @{
  3895. */
  3896. /***************************************************************************************************/
  3897. /**
  3898. * Create an empty set of events.
  3899. * Event set should be freed by \ref nvmlEventSetFree
  3900. *
  3901. * For Fermi &tm; or newer fully supported devices.
  3902. * @param set Reference in which to return the event handle
  3903. *
  3904. * @return
  3905. * - \ref NVML_SUCCESS if the event has been set
  3906. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  3907. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a set is NULL
  3908. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  3909. *
  3910. * @see nvmlEventSetFree
  3911. */
  3912. nvmlReturn_t DECLDIR nvmlEventSetCreate(nvmlEventSet_t *set);
  3913. /**
  3914. * Starts recording of events on a specified devices and add the events to specified \ref nvmlEventSet_t
  3915. *
  3916. * For Fermi &tm; or newer fully supported devices.
  3917. * Ecc events are available only on ECC enabled devices (see \ref nvmlDeviceGetTotalEccErrors)
  3918. * Power capping events are available only on Power Management enabled devices (see \ref nvmlDeviceGetPowerManagementMode)
  3919. *
  3920. * For Linux only.
  3921. *
  3922. * \b IMPORTANT: Operations on \a set are not thread safe
  3923. *
  3924. * This call starts recording of events on specific device.
  3925. * All events that occurred before this call are not recorded.
  3926. * Checking if some event occurred can be done with \ref nvmlEventSetWait
  3927. *
  3928. * If function reports NVML_ERROR_UNKNOWN, event set is in undefined state and should be freed.
  3929. * If function reports NVML_ERROR_NOT_SUPPORTED, event set can still be used. None of the requested eventTypes
  3930. * are registered in that case.
  3931. *
  3932. * @param device The identifier of the target device
  3933. * @param eventTypes Bitmask of \ref nvmlEventType to record
  3934. * @param set Set to which add new event types
  3935. *
  3936. * @return
  3937. * - \ref NVML_SUCCESS if the event has been set
  3938. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  3939. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a eventTypes is invalid or \a set is NULL
  3940. * - \ref NVML_ERROR_NOT_SUPPORTED if the platform does not support this feature or some of requested event types
  3941. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  3942. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  3943. *
  3944. * @see nvmlEventType
  3945. * @see nvmlDeviceGetSupportedEventTypes
  3946. * @see nvmlEventSetWait
  3947. * @see nvmlEventSetFree
  3948. */
  3949. nvmlReturn_t DECLDIR nvmlDeviceRegisterEvents(nvmlDevice_t device, unsigned long long eventTypes, nvmlEventSet_t set);
  3950. /**
  3951. * Returns information about events supported on device
  3952. *
  3953. * For Fermi &tm; or newer fully supported devices.
  3954. *
  3955. * Events are not supported on Windows. So this function returns an empty mask in \a eventTypes on Windows.
  3956. *
  3957. * @param device The identifier of the target device
  3958. * @param eventTypes Reference in which to return bitmask of supported events
  3959. *
  3960. * @return
  3961. * - \ref NVML_SUCCESS if the eventTypes has been set
  3962. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  3963. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a eventType is NULL
  3964. * - \ref NVML_ERROR_GPU_IS_LOST if the target GPU has fallen off the bus or is otherwise inaccessible
  3965. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  3966. *
  3967. * @see nvmlEventType
  3968. * @see nvmlDeviceRegisterEvents
  3969. */
  3970. nvmlReturn_t DECLDIR nvmlDeviceGetSupportedEventTypes(nvmlDevice_t device, unsigned long long *eventTypes);
  3971. /**
  3972. * Waits on events and delivers events
  3973. *
  3974. * For Fermi &tm; or newer fully supported devices.
  3975. *
  3976. * If some events are ready to be delivered at the time of the call, function returns immediately.
  3977. * If there are no events ready to be delivered, function sleeps till event arrives
  3978. * but not longer than specified timeout. This function in certain conditions can return before
  3979. * specified timeout passes (e.g. when interrupt arrives)
  3980. *
  3981. * In case of xid error, the function returns the most recent xid error type seen by the system. If there are multiple
  3982. * xid errors generated before nvmlEventSetWait is invoked then the last seen xid error type is returned for all
  3983. * xid error events.
  3984. *
  3985. * @param set Reference to set of events to wait on
  3986. * @param data Reference in which to return event data
  3987. * @param timeoutms Maximum amount of wait time in milliseconds for registered event
  3988. *
  3989. * @return
  3990. * - \ref NVML_SUCCESS if the data has been set
  3991. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  3992. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a data is NULL
  3993. * - \ref NVML_ERROR_TIMEOUT if no event arrived in specified timeout or interrupt arrived
  3994. * - \ref NVML_ERROR_GPU_IS_LOST if a GPU has fallen off the bus or is otherwise inaccessible
  3995. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  3996. *
  3997. * @see nvmlEventType
  3998. * @see nvmlDeviceRegisterEvents
  3999. */
  4000. nvmlReturn_t DECLDIR nvmlEventSetWait(nvmlEventSet_t set, nvmlEventData_t * data, unsigned int timeoutms);
  4001. /**
  4002. * Releases events in the set
  4003. *
  4004. * For Fermi &tm; or newer fully supported devices.
  4005. *
  4006. * @param set Reference to events to be released
  4007. *
  4008. * @return
  4009. * - \ref NVML_SUCCESS if the event has been successfully released
  4010. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  4011. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  4012. *
  4013. * @see nvmlDeviceRegisterEvents
  4014. */
  4015. nvmlReturn_t DECLDIR nvmlEventSetFree(nvmlEventSet_t set);
  4016. /** @} */
  4017. /***************************************************************************************************/
  4018. /** @defgroup nvmlZPI Drain states
  4019. * This chapter describes methods that NVML can perform against each device to control their drain state
  4020. * and recognition by NVML and NVIDIA kernel driver. These methods can be used with out-of-band tools to
  4021. * power on/off GPUs, enable robust reset scenarios, etc.
  4022. * @{
  4023. */
  4024. /***************************************************************************************************/
  4025. /**
  4026. * Modify the drain state of a GPU. This method forces a GPU to no longer accept new incoming requests.
  4027. * Any new NVML process will no longer see this GPU. Persistence mode for this GPU must be turned off before
  4028. * this call is made.
  4029. * Must be called as administrator.
  4030. * For Linux only.
  4031. *
  4032. * For newer than Maxwell &tm; fully supported devices.
  4033. * Some Kepler devices supported.
  4034. *
  4035. * @param pciInfo The PCI address of the GPU drain state to be modified
  4036. * @param newState The drain state that should be entered, see \ref nvmlEnableState_t
  4037. *
  4038. * @return
  4039. * - \ref NVML_SUCCESS if counters were successfully reset
  4040. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  4041. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a nvmlIndex or \a newState is invalid
  4042. * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature
  4043. * - \ref NVML_ERROR_NO_PERMISSION if the calling process has insufficient permissions to perform operation
  4044. * - \ref NVML_ERROR_IN_USE if the device has persistence mode turned on
  4045. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  4046. */
  4047. nvmlReturn_t DECLDIR nvmlDeviceModifyDrainState (nvmlPciInfo_t *pciInfo, nvmlEnableState_t newState);
  4048. /**
  4049. * Query the drain state of a GPU. This method is used to check if a GPU is in a currently draining
  4050. * state.
  4051. * For Linux only.
  4052. *
  4053. * For newer than Maxwell &tm; fully supported devices.
  4054. * Some Kepler devices supported.
  4055. *
  4056. * @param pciInfo The PCI address of the GPU drain state to be queried
  4057. * @param currentState The current drain state for this GPU, see \ref nvmlEnableState_t
  4058. *
  4059. * @return
  4060. * - \ref NVML_SUCCESS if counters were successfully reset
  4061. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  4062. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a nvmlIndex or \a currentState is invalid
  4063. * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature
  4064. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  4065. */
  4066. nvmlReturn_t DECLDIR nvmlDeviceQueryDrainState (nvmlPciInfo_t *pciInfo, nvmlEnableState_t *currentState);
  4067. /**
  4068. * This method will remove the specified GPU from the view of both NVML and the NVIDIA kernel driver
  4069. * as long as no other processes are attached. If other processes are attached, this call will return
  4070. * NVML_ERROR_IN_USE and the GPU will be returned to its original "draining" state. Note: the
  4071. * only situation where a process can still be attached after nvmlDeviceModifyDrainState() is called
  4072. * to initiate the draining state is if that process was using, and is still using, a GPU before the
  4073. * call was made. Also note, persistence mode counts as an attachment to the GPU thus it must be disabled
  4074. * prior to this call.
  4075. *
  4076. * For long-running NVML processes please note that this will change the enumeration of current GPUs.
  4077. * For example, if there are four GPUs present and GPU1 is removed, the new enumeration will be 0-2.
  4078. * Also, device handles after the removed GPU will not be valid and must be re-established.
  4079. * Must be run as administrator.
  4080. * For Linux only.
  4081. *
  4082. * For newer than Maxwell &tm; fully supported devices.
  4083. * Some Kepler devices supported.
  4084. *
  4085. * @param pciInfo The PCI address of the GPU to be removed
  4086. *
  4087. * @return
  4088. * - \ref NVML_SUCCESS if counters were successfully reset
  4089. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  4090. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a nvmlIndex is invalid
  4091. * - \ref NVML_ERROR_NOT_SUPPORTED if the device doesn't support this feature
  4092. * - \ref NVML_ERROR_IN_USE if the device is still in use and cannot be removed
  4093. */
  4094. nvmlReturn_t DECLDIR nvmlDeviceRemoveGpu (nvmlPciInfo_t *pciInfo);
  4095. /**
  4096. * Request the OS and the NVIDIA kernel driver to rediscover a portion of the PCI subsystem looking for GPUs that
  4097. * were previously removed. The portion of the PCI tree can be narrowed by specifying a domain, bus, and device.
  4098. * If all are zeroes then the entire PCI tree will be searched. Please note that for long-running NVML processes
  4099. * the enumeration will change based on how many GPUs are discovered and where they are inserted in bus order.
  4100. *
  4101. * In addition, all newly discovered GPUs will be initialized and their ECC scrubbed which may take several seconds
  4102. * per GPU. Also, all device handles are no longer guaranteed to be valid post discovery.
  4103. *
  4104. * Must be run as administrator.
  4105. * For Linux only.
  4106. *
  4107. * For newer than Maxwell &tm; fully supported devices.
  4108. * Some Kepler devices supported.
  4109. *
  4110. * @param pciInfo The PCI tree to be searched. Only the domain, bus, and device
  4111. * fields are used in this call.
  4112. *
  4113. * @return
  4114. * - \ref NVML_SUCCESS if counters were successfully reset
  4115. * - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
  4116. * - \ref NVML_ERROR_INVALID_ARGUMENT if \a pciInfo is invalid
  4117. * - \ref NVML_ERROR_NOT_SUPPORTED if the operating system does not support this feature
  4118. * - \ref NVML_ERROR_OPERATING_SYSTEM if the operating system is denying this feature
  4119. * - \ref NVML_ERROR_NO_PERMISSION if the calling process has insufficient permissions to perform operation
  4120. * - \ref NVML_ERROR_UNKNOWN on any unexpected error
  4121. */
  4122. nvmlReturn_t DECLDIR nvmlDeviceDiscoverGpus (nvmlPciInfo_t *pciInfo);
  4123. /** @} */
  4124. /**
  4125. * NVML API versioning support
  4126. */
  4127. #if defined(__NVML_API_VERSION_INTERNAL)
  4128. #undef nvmlDeviceGetPciInfo
  4129. #undef nvmlDeviceGetCount
  4130. #undef nvmlDeviceGetHandleByIndex
  4131. #undef nvmlDeviceGetHandleByPciBusId
  4132. #undef nvmlInit
  4133. #endif
  4134. #ifdef __cplusplus
  4135. }
  4136. #endif
  4137. #endif