Пример #1
0
def helper_test_inject_instance_fields(handle, gpuIds):
    instances, cis = ensure_instance_ids(handle, gpuIds[0], 1, 1)
    firstInstanceId = instances.keys()[0]
    lastCIId = cis.keys()[0]

    # Set up the watches on these groups
    groupId = dcgm_agent.dcgmGroupCreate(handle, dcgm_structs.DCGM_GROUP_EMPTY,
                                         'tien')
    fieldGroupId = dcgm_agent.dcgmFieldGroupCreate(
        handle, [dcgm_fields.DCGM_FI_DEV_ECC_DBE_VOL_TOTAL], 'kal')

    dcgm_agent.dcgmGroupAddEntity(handle, groupId, dcgm_fields.DCGM_FE_GPU,
                                  gpuIds[0])
    dcgm_agent.dcgmGroupAddEntity(handle, groupId, dcgm_fields.DCGM_FE_GPU_I,
                                  firstInstanceId)
    dcgm_agent.dcgmGroupAddEntity(handle, groupId, dcgm_fields.DCGM_FE_GPU_CI,
                                  lastCIId)
    dcgm_agent.dcgmWatchFields(handle, groupId, fieldGroupId, 1, 100, 100)

    dcgm_internal_helpers.inject_value(
        handle,
        gpuIds[0],
        dcgm_fields.DCGM_FI_DEV_ECC_DBE_VOL_TOTAL,
        2,
        5,
        isInt=True,
        verifyInsertion=True,
        entityType=dcgm_fields.DCGM_FE_GPU)

    # Read the values to make sure they were stored properly
    entities = [
        dcgm_structs.c_dcgmGroupEntityPair_t(),
        dcgm_structs.c_dcgmGroupEntityPair_t(),
        dcgm_structs.c_dcgmGroupEntityPair_t()
    ]

    entities[0].entityGroupId = dcgm_fields.DCGM_FE_GPU_I
    entities[0].entityId = firstInstanceId
    entities[1].entityGroupId = dcgm_fields.DCGM_FE_GPU_CI
    entities[1].entityId = lastCIId
    entities[2].entityGroupId = dcgm_fields.DCGM_FE_GPU
    entities[2].entityId = gpuIds[0]

    fieldIds = [dcgm_fields.DCGM_FI_DEV_ECC_DBE_VOL_TOTAL]

    values = dcgm_agent.dcgmEntitiesGetLatestValues(handle, entities, fieldIds,
                                                    0)
    for v in values:
        if v.entityGroupId == dcgm_fields.DCGM_FE_GPU:
            assert v.value.i64 == 2, "Failed to inject value 2 for entity %u from group %u" % (
                v.entityId, v.entityGroupId)
        else:
            from dcgm_structs import DCGM_ST_NO_DATA
            assert (
                v.status == DCGM_ST_NO_DATA
            ), "Injected meaningless value %u for entity %u from group %u" % (
                v.value.i64, v.entityId, v.entityGroupId)
Пример #2
0
 def __init__(self, dcgmHandle, name="", fieldIds=[], fieldGroupId=None):
     self.name = name
     self.fieldIds = fieldIds
     self._dcgmHandle = dcgmHandle
     if fieldGroupId is not None:
         self.fieldGroupId = fieldGroupId
     else:
         self.fieldGroupId = None  #Assign here so the destructor doesn't fail if the call below fails
         self.fieldGroupId = dcgm_agent.dcgmFieldGroupCreate(
             self._dcgmHandle.handle, fieldIds, name)
Пример #3
0
def test_dcgm_connection_client_cleanup(handle, gpuIds):
    '''
    Make sure that resources that were allocated by a client are cleaned up
    '''
    fieldGroupFieldIds = [
        dcgm_fields.DCGM_FI_DEV_GPU_TEMP,
    ]

    #Get a 2nd connection which we'll check for cleanup. Use the raw APIs so we can explicitly cleanup
    connectParams = dcgm_structs.c_dcgmConnectV2Params_v1()
    connectParams.version = dcgm_structs.c_dcgmConnectV2Params_version
    connectParams.persistAfterDisconnect = 0
    cleanupHandle = dcgm_agent.dcgmConnect_v2('localhost', connectParams)

    groupName = 'clientcleanupgroup'
    groupId = dcgm_agent.dcgmGroupCreate(cleanupHandle,
                                         dcgm_structs.DCGM_GROUP_EMPTY,
                                         groupName)

    fieldGroupName = 'clientcleanupfieldgroup'
    fieldGroupId = dcgm_agent.dcgmFieldGroupCreate(cleanupHandle,
                                                   fieldGroupFieldIds,
                                                   fieldGroupName)

    #Disconnect our second handle. This should cause the cleanup to occur
    dcgm_agent.dcgmDisconnect(cleanupHandle)

    time.sleep(1.0)  #Allow connection cleanup to occur since it's asynchronous

    #Try to retrieve the field group info. This should throw an exception
    with test_utils.assert_raises(
            dcgm_structs.dcgmExceptionClass(dcgm_structs.DCGM_ST_NO_DATA)):
        fieldGroupInfo = dcgm_agent.dcgmFieldGroupGetInfo(handle, fieldGroupId)

    #Try to retrieve the group info. This should throw an exception
    with test_utils.assert_raises(
            dcgm_structs.dcgmExceptionClass(
                dcgm_structs.DCGM_ST_NOT_CONFIGURED)):
        groupInfo = dcgm_agent.dcgmGroupGetInfo(handle, groupId)