Пример #1
0
    def getSlurmDBJob(jid, req_fields=DEF_REQ_FLD):
        job = pyslurm.slurmdb_jobs().get(jobids=[jid]).get(jid, None)
        if not job:  # cannot find
            return None

        job['user_id'] = MyTool.getUid(job['user'])
        for f in req_fields:
            if f in job:
                continue
            if f in PyslurmQuery.MAP_JOB2DBJ:  # can be converted
                if type(PyslurmQuery.MAP_JOB2DBJ[f]) != list:
                    db_fld = PyslurmQuery.MAP_JOB2DBJ[f]
                    job[f] = job[db_fld]
                else:
                    db_fld, cvtFunc = PyslurmQuery.MAP_JOB2DBJ[f]
                    job[f] = cvtFunc(job[db_fld])
            else:  # cannot be converted
                logger.error(
                    "Cannot find/map reqested job field {} in job {}".format(
                        f, job))
        return job
Пример #2
0
def test1(user):
    print("Test user {}'s history".format(user))
    start, stop = MyTool.getStartStopTS(days=30)
    uid = MyTool.getUid(user)
    print(gendata_user(uid, start, stop))
Пример #3
0
def display_user_GPU(user_name):
    ts = int(time.time())
    uid = MyTool.getUid(user)
    if not uid:
        print("{} User {} does not exist.".format(MyTool.getTsString(ts),
                                                  user_name))
    user_job_lst = PyslurmQuery.getUserCurrJobs(uid)
    if not user_job_lst:
        print("{} User {} does not have running jobs.".format(
            MyTool.getTsString(ts), user_name))
        return
    node_dict = PyslurmQuery.getAllNodes()
    job_gpu_d = dict([(job['job_id'],
                       PyslurmQuery.getJobAllocGPU(job, node_dict))
                      for job in user_job_lst])

    u_node = [
        node_name for g_alloc_d in job_gpu_d.values()
        for node_name in g_alloc_d
    ]
    u_gpu_cnt = sum([
        len(g_lst) for g_alloc_d in job_gpu_d.values()
        for g_lst in g_alloc_d.values()
    ])
    g_union = reduce(lambda rlt, curr: rlt.union(set(curr)), [
        g_lst for g_alloc_d in job_gpu_d.values()
        for g_lst in g_alloc_d.values()
    ], set())
    print("{} User {} has {} running jobs,\talloc {} GPUs on {} GPU nodes.".
          format(MyTool.getTsString(ts), user_name, len(user_job_lst),
                 u_gpu_cnt, len(u_node)))
    #get gpu data
    if u_node:  #GPU nodes allocated
        gpu_data = BrightRestClient().getGPU(u_node,
                                             min([
                                                 job['start_time']
                                                 for job in user_job_lst
                                                 if job_gpu_d[job['job_id']]
                                             ]),
                                             list(g_union),
                                             msec=False)
    else:
        gpu_data = {}
    print("\t{:10}{:20}{:>16}{:>20}{:>25}".format("Jid", "Job run time",
                                                  "Node.GPU", "Job avg util",
                                                  "Avg util (5,10,30min)"))
    for job in user_job_lst:
        jid = job['job_id']
        j_run_time = str(datetime.timedelta(seconds=ts - job['start_time']))
        j_first_ln = True
        if not job_gpu_d[jid]:  # job not using GPU
            print("\t{:<10}{:20}{:>16}".format(job['job_id'], j_run_time,
                                               'No GPU'))
            continue
        for node, g_lst in job_gpu_d[jid].items():
            for g in g_lst:
                g_name = '{}.gpu{}'.format(node, g)
                g_data = gpu_data[g_name]
                g_avg = MyTool.getTimeSeqAvg(g_data, job['start_time'], ts)
                g_avg1 = MyTool.getTimeSeqAvg(g_data, ts - 5 * 60, ts)
                g_avg2 = MyTool.getTimeSeqAvg(g_data, ts - 10 * 60, ts)
                g_avg3 = MyTool.getTimeSeqAvg(g_data, ts - 30 * 60, ts)
                if j_first_ln:
                    print(
                        "\t{:<10}{:20}{:>16}{:>20.2f}{:>10.2f},{:>6.2f},{:>6.2f}"
                        .format(jid, j_run_time, g_name, g_avg * 100,
                                g_avg1 * 100, g_avg2 * 100, g_avg3 * 100))
                    j_first_ln = False
                else:
                    print(
                        "\t{:<10}{:20}{:>16}{:>20.2f}{:>10.2f},{:>6.2f},{:>6.2f}"
                        .format('', '', g_name, g_avg * 100, g_avg1 * 100,
                                g_avg2 * 100, g_avg3 * 100))