Пример #1
0
def createUsageRecord(log_entry, hostname, user_map, project_map, missing_user_mappings):
    """
    Creates a Usage Record object given a Torque log entry.
    """

    # extract data from the workload trace (log_entry)
    job_id       = log_entry['jobid']
    user_name    = log_entry['user']
    queue        = log_entry['queue']
    submit_time  = int(log_entry['ctime'])
    start_time   = int(log_entry['start'])
    end_time     = int(log_entry['end'])
    account_name = log_entry['group']
    utilized_cpu = getSeconds(log_entry['resources_used.cput'])
    wall_time    = getSeconds(log_entry['resources_used.walltime'])
    core_count   = getCoreCount(log_entry['Resource_List.nodes'])
    hosts        = list(set([hc.split('/')[0] for hc in log_entry['exec_host'].split('+')]))

    # clean data and create various composite entries from the work load trace
    if job_id.isdigit() and hostname is not None:
        job_identifier = job_id + '.' + hostname
    else:
        job_identifier = job_id
    fqdn_job_id = hostname + ':' + job_identifier

    if not user_name in user_map:
        missing_user_mappings[user_name] = True

    vo_info = []
    if account_name is not None:
        mapped_project = project_map.get(account_name)
        if mapped_project is not None:
            voi = usagerecord.VOInformation()
            voi.type = 'lrmsurgen-projectmap'
            voi.name = mapped_project

    ## fill in usage record fields
    ur = usagerecord.UsageRecord()
    ur.record_id        = fqdn_job_id
    ur.local_job_id     = job_identifier
    ur.global_job_id    = fqdn_job_id
    ur.local_user_id    = user_name
    ur.global_user_name = user_map.get(user_name)
    ur.machine_name     = hostname
    ur.queue            = queue
    ur.processors       = core_count
    ur.node_count       = len(hosts)
    ur.host             = ','.join(hosts)
    ur.submit_time      = usagerecord.epoch2isoTime(submit_time)
    ur.start_time       = usagerecord.epoch2isoTime(start_time)
    ur.end_time         = usagerecord.epoch2isoTime(end_time)
    ur.cpu_duration     = utilized_cpu
    ur.wall_duration    = wall_time
    ur.project_name     = account_name
    ur.vo_info         += vo_info

    return ur
Пример #2
0
def createUsageRecord(log_entry, hostname, user_map, project_map, maui_server_host, missing_user_mappings):
    """
    Creates a Usage Record object given a Maui log entry.
    """

    # extract data from the workload trace (log_entry)

    job_id       = log_entry[0]
    user_name    = log_entry[3]
    req_class    = log_entry[7]
    submit_time  = int(log_entry[8])
    start_time   = int(log_entry[10])
    end_time     = int(log_entry[11])
    alo_tasks    = int(log_entry[21])
    account_name = log_entry[25]
    utilized_cpu = float(log_entry[29])
    core_count   = int(log_entry[31])*alo_tasks
    hosts        = log_entry[37].split(':')

    # clean data and create various composite entries from the work load trace

    if job_id.isdigit() and maui_server_host is not None:
        job_identifier = job_id + '.' + maui_server_host
    else:
        job_identifier = job_id
    fqdn_job_id = hostname + ':' + job_identifier

    if not user_name in user_map:
        missing_user_mappings[user_name] = True

    queue = req_class.replace('[','').replace(']','')
    if ':' in queue:
        queue = queue.split(':')[0]

    if account_name == '[NONE]':
        account_name = None

    vo_info = []
    if account_name is not None:
        mapped_project = project_map.get(account_name)
        if mapped_project is not None:
            voi = usagerecord.VOInformation()
            voi.type = 'lrmsurgen-projectmap'
            voi.name = mapped_project

    wall_time = end_time - start_time

    # okay, this is somewhat ridiculous and complicated:
    # When compiled on linux, maui will think that it will only get cputime reading
    # from the master node. To compensate for this it multiples the utilized cpu field
    # with the number of tasks. However on most newer torque installations the correct
    # cpu utilization is reported. When combined this creates abnormally high cpu time
    # values for parallel jobs. The following heuristic tries to compensate for this,
    # by checking if the cpu time is higher than wall_time * cpus (which it never should
    # be), and then correct the number. However this will not work for jobs with very
    # low efficiancy

    if utilized_cpu > wall_time * alo_tasks:
        utilized_cpu /= alo_tasks

    ## fill in usage record fields

    ur = usagerecord.UsageRecord()

    ur.record_id = fqdn_job_id

    ur.local_job_id = job_identifier
    ur.global_job_id = fqdn_job_id

    ur.local_user_id = user_name
    ur.global_user_name = user_map.get(user_name)

    ur.machine_name = hostname
    ur.queue = queue

    ur.processors = core_count
    ur.node_count = len(hosts)
    ur.host = ','.join(hosts)

    ur.submit_time = usagerecord.epoch2isoTime(submit_time)
    ur.start_time  = usagerecord.epoch2isoTime(start_time)
    ur.end_time    = usagerecord.epoch2isoTime(end_time)

    ur.cpu_duration = utilized_cpu
    ur.wall_duration = wall_time

    ur.project_name = account_name
    ur.vo_info += vo_info

    return ur