def helper_test_inject_instance_fields(handle, gpuIds): instances, cis = ensure_instance_ids(handle, gpuIds[0], 1, 1) firstInstanceId = instances.keys()[0] lastCIId = cis.keys()[0] # Set up the watches on these groups groupId = dcgm_agent.dcgmGroupCreate(handle, dcgm_structs.DCGM_GROUP_EMPTY, 'tien') fieldGroupId = dcgm_agent.dcgmFieldGroupCreate( handle, [dcgm_fields.DCGM_FI_DEV_ECC_DBE_VOL_TOTAL], 'kal') dcgm_agent.dcgmGroupAddEntity(handle, groupId, dcgm_fields.DCGM_FE_GPU, gpuIds[0]) dcgm_agent.dcgmGroupAddEntity(handle, groupId, dcgm_fields.DCGM_FE_GPU_I, firstInstanceId) dcgm_agent.dcgmGroupAddEntity(handle, groupId, dcgm_fields.DCGM_FE_GPU_CI, lastCIId) dcgm_agent.dcgmWatchFields(handle, groupId, fieldGroupId, 1, 100, 100) dcgm_internal_helpers.inject_value( handle, gpuIds[0], dcgm_fields.DCGM_FI_DEV_ECC_DBE_VOL_TOTAL, 2, 5, isInt=True, verifyInsertion=True, entityType=dcgm_fields.DCGM_FE_GPU) # Read the values to make sure they were stored properly entities = [ dcgm_structs.c_dcgmGroupEntityPair_t(), dcgm_structs.c_dcgmGroupEntityPair_t(), dcgm_structs.c_dcgmGroupEntityPair_t() ] entities[0].entityGroupId = dcgm_fields.DCGM_FE_GPU_I entities[0].entityId = firstInstanceId entities[1].entityGroupId = dcgm_fields.DCGM_FE_GPU_CI entities[1].entityId = lastCIId entities[2].entityGroupId = dcgm_fields.DCGM_FE_GPU entities[2].entityId = gpuIds[0] fieldIds = [dcgm_fields.DCGM_FI_DEV_ECC_DBE_VOL_TOTAL] values = dcgm_agent.dcgmEntitiesGetLatestValues(handle, entities, fieldIds, 0) for v in values: if v.entityGroupId == dcgm_fields.DCGM_FE_GPU: assert v.value.i64 == 2, "Failed to inject value 2 for entity %u from group %u" % ( v.entityId, v.entityGroupId) else: from dcgm_structs import DCGM_ST_NO_DATA assert ( v.status == DCGM_ST_NO_DATA ), "Injected meaningless value %u for entity %u from group %u" % ( v.value.i64, v.entityId, v.entityGroupId)
def _WatchFieldGroup(self): ret = dcgm_agent.dcgmWatchFields(self._handle, self._groupId, self._fieldGroup.fieldGroupId, self._updateFreq, self._maxKeepAge, self._maxKeepSamples) dcgm_structs._dcgmCheckReturn(ret) #Will throw exception on error #Force an update of the fields so that we can fetch initial values ret = dcgm_agent.dcgmUpdateAllFields(self._handle, 1) dcgm_structs._dcgmCheckReturn(ret) #Will throw exception on error #initial update will fetch from startTimestamp self.GetMore()
def WatchFields(self, fieldGroup, updateFreq, maxKeepAge, maxKeepSamples): ret = dcgm_agent.dcgmWatchFields(self._dcgmHandle.handle, self._groupId, fieldGroup.fieldGroupId, updateFreq, maxKeepAge, maxKeepSamples) dcgm_structs._dcgmCheckReturn(ret)