def test_dcgm_policy_inject_retiredpages_standalone(handle, gpuIds): """ Verifies that we can inject an error into the retired pages counters and receive a callback """ newPolicy = dcgm_structs.c_dcgmPolicy_v1() newPolicy.version = dcgm_structs.dcgmPolicy_version1 newPolicy.condition = dcgm_structs.DCGM_POLICY_COND_MAX_PAGES_RETIRED newPolicy.parms[2].tag = 1 newPolicy.parms[2].val.llval = 5 # find a GPU that supports ECC and retired pages (otherwise internal test will ignore it) dcgmHandle = pydcgm.DcgmHandle(handle) dcgmSystem = dcgmHandle.GetSystem() group = dcgmSystem.GetGroupWithGpuIds("test1", gpuIds) group.policy.Set(newPolicy) callbackQueue = Queue.Queue() c_callback = create_c_callback(callbackQueue) group.policy.Register(dcgm_structs.DCGM_POLICY_COND_MAX_PAGES_RETIRED, finishCallback=c_callback) # inject an error into ECC numPages = 10 field = dcgm_structs_internal.c_dcgmInjectFieldValue_v1() field.version = dcgm_structs_internal.dcgmInjectFieldValue_version1 field.fieldId = dcgm_fields.DCGM_FI_DEV_RETIRED_DBE field.status = 0 field.fieldType = ord(dcgm_fields.DCGM_FT_INT64) field.ts = int((time.time() + 60) * 1000000.0) # set the injected data into the future field.value.i64 = numPages ret = dcgm_agent_internal.dcgmInjectFieldValue(handle, gpuIds[0], field) assert (ret == dcgm_structs.DCGM_ST_OK) #inject a SBE too so that the health check code gets past its internal checks field.fieldId = dcgm_fields.DCGM_FI_DEV_RETIRED_SBE ret = dcgm_agent_internal.dcgmInjectFieldValue(handle, gpuIds[0], field) assert (ret == dcgm_structs.DCGM_ST_OK) # wait for the the policy manager to call back try: callbackData = callbackQueue.get(timeout=POLICY_CALLBACK_TIMEOUT_SECS) except Queue.Empty: assert False, "Callback never happened" # check that the callback occurred with the correct arguments assert(dcgm_structs.DCGM_POLICY_COND_MAX_PAGES_RETIRED == callbackData.condition), \ ("error callback was not for a retired pages, got: %s" % callbackData.condition) assert(numPages == callbackData.val.mpr.dbepages), \ 'Expected %s errors but got %s' % (numPages, callbackData.val.mpr.dbepages)
def test_dcgm_injection_multi_fetch_remote(handle, gpuIds): """ Verify that multi-fetches work with the agent """ gpuId = gpuIds[0] NinjectValues = 10 firstTs = get_usec_since_1970() lastTs = 0 injectedValues = [] #Inject the values we're going to fetch for i in range(NinjectValues): fvGood = dcgm_structs_internal.c_dcgmInjectFieldValue_v1() fvGood.version = dcgm_structs_internal.dcgmInjectFieldValue_version1 fvGood.fieldId = dcgm_fields.DCGM_FI_DEV_ECC_PENDING fvGood.status = 0 fvGood.fieldType = ord(dcgm_fields.DCGM_FT_INT64) fvGood.ts = firstTs + i fvGood.value.i64 = 1 + i #This will throw an exception if it fails dcgm_agent_internal.dcgmInjectFieldValue(handle, gpuId, fvGood) injectedValues.append(fvGood) lastTs = fvGood.ts #Fetch in forward order with no timestamp. Verify startTs = 0 endTs = 0 maxCount = 2 * NinjectValues #Pick a bigger number so we can verify only NinjectValues come back order = dcgm_structs.DCGM_ORDER_ASCENDING fvFetched = dcgm_agent_internal.dcgmGetMultipleValuesForField( handle, gpuId, fvGood.fieldId, maxCount, startTs, endTs, order) assert len(fvFetched) == NinjectValues, "Expected %d rows. Got %d" % ( NinjectValues, len(fvFetched)) helper_verify_multi_values(fvFetched, order, injectedValues) #Now do the same fetch with descending values startTs = 0 endTs = 0 maxCount = 2 * NinjectValues #Pick a bigger number so we can verify only NinjectValues come back order = dcgm_structs.DCGM_ORDER_DESCENDING fvFetched = dcgm_agent_internal.dcgmGetMultipleValuesForField( handle, gpuId, fvGood.fieldId, maxCount, startTs, endTs, order) assert len(fvFetched) == NinjectValues, "Expected %d rows. Got %d" % ( NinjectValues, len(fvFetched)) helper_verify_multi_values(fvFetched, order, injectedValues)
def helper_inject_vgpu_configuration(handle, gpuId, eccModeVal, powerLimitVal, computeModeVal): """ Helper method to inject configuration to Cachemanager """ if (eccModeVal != None): # inject an error into Ecc Mode eccMode = dcgm_structs_internal.c_dcgmInjectFieldValue_v1() eccMode.version = dcgm_structs_internal.dcgmInjectFieldValue_version1 eccMode.fieldId = dcgm_fields.DCGM_FI_DEV_ECC_CURRENT eccMode.status = 0 eccMode.fieldType = ord(dcgm_fields.DCGM_FT_INT64) eccMode.ts = int((time.time() + 1) * 1000000.0) # set the injected data into the future eccMode.value.i64 = eccModeVal ret = dcgm_agent_internal.dcgmInjectFieldValue(handle, gpuId, eccMode) assert (ret == dcgm_structs.DCGM_ST_OK) if (powerLimitVal != None): powerLimit = dcgm_structs_internal.c_dcgmInjectFieldValue_v1() powerLimit.version = dcgm_structs_internal.dcgmInjectFieldValue_version1 powerLimit.fieldId = dcgm_fields.DCGM_FI_DEV_POWER_MGMT_LIMIT powerLimit.status = 0 powerLimit.fieldType = ord(dcgm_fields.DCGM_FT_DOUBLE) powerLimit.ts = int((time.time() + 1) * 1000000.0) # set the injected data into the future powerLimit.value.dbl = powerLimitVal ret = dcgm_agent_internal.dcgmInjectFieldValue(handle, gpuId, powerLimit) assert (ret == dcgm_structs.DCGM_ST_OK) if (computeModeVal != None): computeMode = dcgm_structs_internal.c_dcgmInjectFieldValue_v1() computeMode.version = dcgm_structs_internal.dcgmInjectFieldValue_version1 computeMode.fieldId = dcgm_fields.DCGM_FI_DEV_COMPUTE_MODE computeMode.status = 0 computeMode.fieldType = ord(dcgm_fields.DCGM_FT_INT64) computeMode.ts = int( (time.time() + 1) * 1000000.0) # set the injected data into the future computeMode.value.i64 = computeModeVal ret = dcgm_agent_internal.dcgmInjectFieldValue(handle, gpuId, computeMode) assert (ret == dcgm_structs.DCGM_ST_OK)
def test_collectd_basic_integration(handle, gpuIds): """ Verifies that we can inject specific data and get that same data back """ dcgmHandle = pydcgm.DcgmHandle(handle) dcgmSystem = dcgmHandle.GetSystem() specificFieldIds = [ dcgm_fields.DCGM_FI_DEV_RETIRED_DBE, dcgm_fields.DCGM_FI_DEV_RETIRED_SBE, dcgm_fields.DCGM_FI_DEV_POWER_VIOLATION, dcgm_fields.DCGM_FI_DEV_THERMAL_VIOLATION ] fieldValues = [1, 5, 1000, 9000] for gpuId in gpuIds: for i in range(0, len(specificFieldIds)): field = dcgm_structs_internal.c_dcgmInjectFieldValue_v1() field.version = dcgm_structs_internal.dcgmInjectFieldValue_version1 field.fieldId = specificFieldIds[i] field.status = 0 field.fieldType = ord(dcgm_fields.DCGM_FT_INT64) field.ts = int((time.time() + 10) * 1000000.0) # set the injected data into the future field.value.i64 = fieldValues[i] ret = dcgm_agent_internal.dcgmInjectFieldValue( handle, gpuId, field) assert (ret == dcgm_structs.DCGM_ST_OK) gvars = collectd_tester_globals.gvars assert 'config' in gvars gvars['config']() assert 'init' in gvars gvars['init']() assert 'read' in gvars gvars['read']() assert 'out' in gvars outDict = gvars['out'] assert 'shutdown' in gvars # gvars['shutdown']() # Verify that we can read back the fields we watch. for gpuId in gpuIds: assert str(gpuId) in outDict gpuDict = outDict[str(gpuId)] for i in range(0, len(specificFieldIds)): fieldTag = dcgmSystem.fields.GetFieldById(specificFieldIds[i]).tag assert fieldTag in gpuDict assert gpuDict[fieldTag] == fieldValues[i]
def inject_field_value_fp64(handle, gpuId, fieldId, value, offset): field = dcgm_structs_internal.c_dcgmInjectFieldValue_v1() field.version = dcgm_structs_internal.dcgmInjectFieldValue_version1 field.fieldId = fieldId field.status = 0 field.fieldType = ord(dcgm_fields.DCGM_FT_DOUBLE) field.ts = int((time.time() + offset) * 1000000.0) field.value.dbl = value return dcgm_agent_internal.dcgmInjectFieldValue(handle, gpuId, field)
def helper_test_dcgm_injection_summaries(handle, gpuIds): gpuId = gpuIds[0] # Watch the field we're inserting into dcgm_agent_internal.dcgmWatchFieldValue( handle, gpuId, dcgm_fields.DCGM_FI_DEV_ECC_SBE_AGG_TOTAL, 1, 3600.0, 10000) handleObj = pydcgm.DcgmHandle(handle=handle) systemObj = handleObj.GetSystem() #Make a base value that is good for starters field = dcgm_structs_internal.c_dcgmInjectFieldValue_v1() field.version = dcgm_structs_internal.dcgmInjectFieldValue_version1 field.fieldId = dcgm_fields.DCGM_FI_DEV_ECC_SBE_AGG_TOTAL field.status = 0 field.fieldType = ord(dcgm_fields.DCGM_FT_INT64) baseTime = get_usec_since_1970() for i in range(0, 10): field.ts = baseTime + i field.value.i64 = i ret = dcgm_agent_internal.dcgmInjectFieldValue(handle, gpuId, field) assert (ret == dcgm_structs.DCGM_ST_OK) time.sleep(1) systemObj.UpdateAllFields(1) tmpMask = dcgm_structs.DCGM_SUMMARY_MIN | dcgm_structs.DCGM_SUMMARY_MAX tmpMask = tmpMask | dcgm_structs.DCGM_SUMMARY_AVG | dcgm_structs.DCGM_SUMMARY_DIFF # Pass baseTime for the start to get nothing from the first query with test_utils.assert_raises( dcgm_structs.dcgmExceptionClass(dcgm_structs.DCGM_ST_NO_DATA)): request = dcgm_agent.dcgmGetFieldSummary( handle, dcgm_fields.DCGM_FI_DEV_ECC_SBE_AGG_TOTAL, dcgm_fields.DCGM_FE_GPU, gpuId, tmpMask, baseTime - 60, baseTime - 30) # Now adjust the time so we get values request = dcgm_agent.dcgmGetFieldSummary( handle, dcgm_fields.DCGM_FI_DEV_ECC_SBE_AGG_TOTAL, dcgm_fields.DCGM_FE_GPU, gpuId, tmpMask, 0, 0) assert (request.response.values[0].i64 == 0) assert (request.response.values[1].i64 == 9) assert (request.response.values[2].i64 == 4) assert (request.response.values[3].i64 == 9)
def test_dcgm_policy_inject_nvlinkerror_standalone(handle, gpuIds): """ Verifies that we can inject an error into the NVLINK error and receive a callback """ newPolicy = dcgm_structs.c_dcgmPolicy_v1() newPolicy.version = dcgm_structs.dcgmPolicy_version1 newPolicy.condition = dcgm_structs.DCGM_POLICY_COND_NVLINK newPolicy.parms[5].tag = 0 newPolicy.parms[5].val.boolean = True # find a GPU that supports nvlink (otherwise internal test will ignore it) dcgmHandle = pydcgm.DcgmHandle(handle) dcgmSystem = dcgmHandle.GetSystem() group = dcgmSystem.GetGroupWithGpuIds('test1', gpuIds) group.policy.Set(newPolicy) callbackQueue = Queue.Queue() c_callback = create_c_callback(callbackQueue) group.policy.Register(dcgm_structs.DCGM_POLICY_COND_NVLINK, finishCallback=c_callback) field = dcgm_structs_internal.c_dcgmInjectFieldValue_v1() field.version = dcgm_structs_internal.dcgmInjectFieldValue_version1 field.fieldId = dcgm_fields.DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_TOTAL field.status = 0 field.fieldType = ord(dcgm_fields.DCGM_FT_INT64) field.ts = int((time.time() + 60) * 1000000.0) # set the injected data into the future field.value.i64 = 1 ret = dcgm_agent_internal.dcgmInjectFieldValue(handle, gpuIds[0], field) assert (ret == dcgm_structs.DCGM_ST_OK) # wait for the the policy manager to call back try: callbackData = callbackQueue.get(timeout=POLICY_CALLBACK_TIMEOUT_SECS) except Queue.Empty: assert False, "Callback never happened" # check that the callback occurred with the correct arguments assert(dcgm_structs.DCGM_POLICY_COND_NVLINK == callbackData.condition), \ ("NVLINK error callback was not for a NVLINK error, got: %s" % callbackData.condition) assert(dcgm_fields.DCGM_FI_DEV_NVLINK_CRC_FLIT_ERROR_COUNT_TOTAL == callbackData.val.nvlink.fieldId), \ ("Expected 130 fieldId but got %s" % callbackData.val.nvlink.fieldId) assert ( 1 == callbackData.val.nvlink.counter ), 'Expected 1 PCI error but got %s' % callbackData.val.nvlink.counter
def helper_dcgm_policy_inject_eccerror(handle, gpuIds): """ Verifies that we can inject an error into the ECC counters and receive a callback """ newPolicy = dcgm_structs.c_dcgmPolicy_v1() newPolicy.version = dcgm_structs.dcgmPolicy_version1 newPolicy.condition = dcgm_structs.DCGM_POLICY_COND_DBE newPolicy.parms[0].tag = 0 newPolicy.parms[0].val.boolean = True dcgmHandle = pydcgm.DcgmHandle(handle) dcgmSystem = dcgmHandle.GetSystem() group = dcgmSystem.GetGroupWithGpuIds("test1", gpuIds) group.policy.Set(newPolicy) # the order of the callbacks will change once implementation is complete callbackQueue = Queue.Queue() c_callback = create_c_callback(callbackQueue) group.policy.Register(dcgm_structs.DCGM_POLICY_COND_DBE, c_callback, None) # inject an error into ECC field = dcgm_structs_internal.c_dcgmInjectFieldValue_v1() field.version = dcgm_structs_internal.dcgmInjectFieldValue_version1 field.fieldId = dcgm_fields.DCGM_FI_DEV_ECC_DBE_VOL_DEV field.status = 0 field.fieldType = ord(dcgm_fields.DCGM_FT_INT64) field.ts = int((time.time() + 60) * 1000000.0) # set the injected data into the future field.value.i64 = 1 logger.debug("injecting %s for gpuId %d" % (str(field), gpuIds[0])) ret = dcgm_agent_internal.dcgmInjectFieldValue(handle, gpuIds[0], field) assert (ret == dcgm_structs.DCGM_ST_OK) # wait for the the policy manager to call back try: callbackData = callbackQueue.get(timeout=POLICY_CALLBACK_TIMEOUT_SECS) except Queue.Empty: assert False, "Callback never happened" # check that the callback occurred with the correct arguments assert(dcgm_structs.DCGM_POLICY_COND_DBE == callbackData.condition), \ ("error callback was not for a DBE error, got: %s" % callbackData.condition) assert ( 1 == callbackData.val.dbe.numerrors ), 'Expected 1 DBE error but got %s' % callbackData.val.dbe.numerrors assert(dcgm_structs.c_dcgmPolicyConditionDbe_t.LOCATIONS['DEVICE'] == callbackData.val.dbe.location), \ 'got: %s' % callbackData.val.dbe.location
def helper_dcgm_policy_inject_pcierror(handle, gpuIds): """ Verifies that we can inject an error into the PCI counters and receive a callback """ newPolicy = dcgm_structs.c_dcgmPolicy_v1() newPolicy.version = dcgm_structs.dcgmPolicy_version1 newPolicy.condition = dcgm_structs.DCGM_POLICY_COND_PCI newPolicy.parms[1].tag = 1 newPolicy.parms[1].val.llval = 0 gpuId = gpuIds[0] group = pydcgm.DcgmGroup(pydcgm.DcgmHandle(handle), groupName="test1", groupType=dcgm_structs.DCGM_GROUP_EMPTY) group.AddGpu(gpuId) group.policy.Set(newPolicy) callbackQueue = Queue.Queue() c_callback = create_c_callback(callbackQueue) group.policy.Register(dcgm_structs.DCGM_POLICY_COND_PCI, finishCallback=c_callback) field = dcgm_structs_internal.c_dcgmInjectFieldValue_v1() field.version = dcgm_structs_internal.dcgmInjectFieldValue_version1 field.fieldId = dcgm_fields.DCGM_FI_DEV_PCIE_REPLAY_COUNTER field.status = 0 field.fieldType = ord(dcgm_fields.DCGM_FT_INT64) field.ts = int((time.time() + 60) * 1000000.0) # set the injected data into the future field.value.i64 = 1 ret = dcgm_agent_internal.dcgmInjectFieldValue(handle, gpuId, field) assert (ret == dcgm_structs.DCGM_ST_OK) # wait for the the policy manager to call back try: callbackData = callbackQueue.get(timeout=POLICY_CALLBACK_TIMEOUT_SECS) except Queue.Empty: assert False, "Callback never happened" # check that the callback occurred with the correct arguments assert(dcgm_structs.DCGM_POLICY_COND_PCI == callbackData.condition), \ ("PCI error callback was not for a PCI error, got: %s" % callbackData.condition) assert (1 == callbackData.val.pci.counter ), 'Expected 1 PCI error but got %s' % callbackData.val.pci.counter
def test_reading_specific_data(handle, gpuIds): """ Verifies that we can inject specific data and get that same data back """ dcgmHandle = pydcgm.DcgmHandle(handle) dcgmSystem = dcgmHandle.GetSystem() specificFieldIds = [ dcgm_fields.DCGM_FI_DEV_RETIRED_DBE, dcgm_fields.DCGM_FI_DEV_POWER_VIOLATION, dcgm_fields.DCGM_FI_DEV_THERMAL_VIOLATION, ] fieldValues = [ 1, 1000, 9000, ] for i in range(0, len(specificFieldIds)): field = dcgm_structs_internal.c_dcgmInjectFieldValue_v1() field.version = dcgm_structs_internal.dcgmInjectFieldValue_version1 field.fieldId = specificFieldIds[i] field.status = 0 field.fieldType = ord(dcgm_fields.DCGM_FT_INT64) field.ts = int((time.time() + 10) * 1000000.0) # set the injected data into the future field.value.i64 = fieldValues[i] ret = dcgm_agent_internal.dcgmInjectFieldValue(handle, gpuIds[0], field) assert (ret == dcgm_structs.DCGM_ST_OK) # pylint: disable=undefined-variable dr = DcgmReader(fieldIds=specificFieldIds) dr.SetHandle(handle) latest = dr.GetLatestGpuValuesAsFieldIdDict() assert len(latest[gpuIds[0]]) == len(specificFieldIds) for i in range(0, len(specificFieldIds)): assert latest[gpuIds[0]][specificFieldIds[i]] == fieldValues[i]
def helper_test_dcgm_policy_inject_xiderror(handle, gpuIds): """ Verifies that we can inject an XID error and receive a callback """ newPolicy = dcgm_structs.c_dcgmPolicy_v1() newPolicy.version = dcgm_structs.dcgmPolicy_version1 newPolicy.condition = dcgm_structs.DCGM_POLICY_COND_XID newPolicy.parms[6].tag = 0 newPolicy.parms[6].val.boolean = True dcgmHandle = pydcgm.DcgmHandle(handle) validDeviceId = -1 devices = gpuIds for x in devices: fvSupported = dcgm_agent_internal.dcgmGetLatestValuesForFields( handle, x, [ dcgm_fields.DCGM_FI_DEV_XID_ERRORS, ]) if (fvSupported[0].value.i64 != dcgmvalue.DCGM_INT64_NOT_SUPPORTED): validDeviceId = x break if (validDeviceId == -1): test_utils.skip_test( "Can only run if at least one GPU that supports XID errors is present" ) group = pydcgm.DcgmGroup(dcgmHandle, groupName="test1", groupType=dcgm_structs.DCGM_GROUP_EMPTY) group.AddGpu(validDeviceId) group.policy.Set(newPolicy) callbackQueue = Queue.Queue() c_callback = create_c_callback(callbackQueue) group.policy.Register(dcgm_structs.DCGM_POLICY_COND_XID, finishCallback=c_callback) field = dcgm_structs_internal.c_dcgmInjectFieldValue_v1() field.version = dcgm_structs_internal.dcgmInjectFieldValue_version1 field.fieldId = dcgm_fields.DCGM_FI_DEV_XID_ERRORS field.status = 0 field.fieldType = ord(dcgm_fields.DCGM_FT_INT64) field.ts = int((time.time() + 60) * 1000000.0) # set the injected data into the future field.value.i64 = 16 ret = dcgm_agent_internal.dcgmInjectFieldValue(handle, validDeviceId, field) assert (ret == dcgm_structs.DCGM_ST_OK) # wait for the the policy manager to call back try: callbackData = callbackQueue.get(timeout=POLICY_CALLBACK_TIMEOUT_SECS) except Queue.Empty: assert False, "Callback never happened" # check that the callback occurred with the correct arguments assert(dcgm_structs.DCGM_POLICY_COND_XID == callbackData.condition), \ ("XID error callback was not for a XID error, got: %s" % callbackData.condition) assert (16 == callbackData.val.xid.errnum), ( 'Expected XID error 16 but got %s' % callbackData.val.xid.errnum)
requestId = dcgm_agent.dcgmPolicyRegister( handle, groupId, dcgm_structs.DCGM_POLICY_COND_MAX_PAGES_RETIRED, c_callback, c_callback) assert (requestId != None) # inject an error into page retirement field = dcgm_structs_internal.c_dcgmInjectFieldValue_v1() field.version = dcgm_structs_internal.dcgmInjectFieldValue_version1 field.fieldId = dcgm_fields.DCGM_FI_DEV_RETIRED_DBE field.status = 0 field.fieldType = ord(dcgm_fields.DCGM_FT_INT64) field.ts = int( (time.time() + 11) * 1000000.0) # set the injected data into the future field.value.i64 = 10 ret = dcgm_agent_internal.dcgmInjectFieldValue(handle, validDevice, field) assert (ret == dcgm_structs.DCGM_ST_OK) now = time.time() while not callbackCalled: if time.time() == now + 60: # wait 60 seconds print("Timeout waiting for callback") sys.exit(1) print("Callback successfully received.") ## Verify that configuration is auto-enforced after GPU reset #dcgm_agent.dcgmStatusClear(statusHandle) #ret = dcgm_agent.dcgmConfigEnforce(handle, groupId, statusHandle) helper_verify_power_value_standalone(handle, groupId, powerLimit_set)
def test_dcgm_injection_agent(handle, gpuIds): """ Verifies that injection works with the agent host engine """ gpuId = gpuIds[0] #Make a base value that is good for starters fvGood = dcgm_structs_internal.c_dcgmInjectFieldValue_v1() fvGood.version = dcgm_structs_internal.dcgmInjectFieldValue_version1 fvGood.fieldId = dcgm_fields.DCGM_FI_DEV_ECC_CURRENT fvGood.status = 0 fvGood.fieldType = ord(dcgm_fields.DCGM_FT_INT64) fvGood.ts = get_usec_since_1970() fvGood.value.i64 = 1 fieldInfoBefore = dcgm_agent_internal.dcgmGetCacheManagerFieldInfo( handle, gpuId, fvGood.fieldId) countBefore = fieldInfoBefore.numSamples #This will throw an exception if it fails dcgm_agent_internal.dcgmInjectFieldValue(handle, gpuId, fvGood) fieldInfoAfter = dcgm_agent_internal.dcgmGetCacheManagerFieldInfo( handle, gpuId, fvGood.fieldId) countAfter = fieldInfoAfter.numSamples assert countAfter > countBefore, "Expected countAfter %d > countBefore %d after injection" % ( countAfter, countBefore) #Fetch the value we just inserted and verify its attributes are the same fvFetched = dcgm_agent_internal.dcgmGetLatestValuesForFields( handle, gpuId, [ fvGood.fieldId, ])[0] helper_verify_fv_equal(fvFetched, fvGood) #Should be able to insert a null timestamp. The agent will just use "now" fvAlsoGood = fvGood fvAlsoGood.ts = 0 #This will thrown an exception if it fails dcgm_agent_internal.dcgmInjectFieldValue(handle, gpuId, fvAlsoGood) #Now make some attributes bad and expect an error fvBad = fvGood fvBad.fieldType = ord(dcgm_fields.DCGM_FT_DOUBLE) with test_utils.assert_raises( dcgmExceptionClass(dcgm_structs.DCGM_ST_BADPARAM)): dcgm_agent_internal.dcgmInjectFieldValue(handle, gpuId, fvBad) fvGood.fieldType = ord(dcgm_fields.DCGM_FT_INT64) """ TODO: DCGM-2130 - Restore this test when protobuf is removed #Now make some attributes bad and expect an error fvBad = fvGood fvBad.version = 0 with test_utils.assert_raises(dcgmExceptionClass(dcgm_structs.DCGM_ST_VER_MISMATCH)): dcgm_agent_internal.dcgmInjectFieldValue(handle, gpuId, fvBad) fvGood.version = dcgm_structs_internal.dcgmInjectFieldValue_version1 """ fvBad = fvGood fvBad.fieldId = dcgm_fields.DCGM_FI_MAX_FIELDS + 100 with test_utils.assert_raises( dcgmExceptionClass(dcgm_structs.DCGM_ST_BADPARAM)): dcgm_agent_internal.dcgmInjectFieldValue(handle, gpuId, fvBad)