示例#1
0
def process_hold(clusters, dry_run=False):
    """Process a filtered queueinfo dict"""
    releasejob_cache = FileCache(RELEASEJOB_CACHE_FILE)

    # get the showq data
    for data in clusters.values():
        data['path'] = data['spath']  # showq path
    showq = Showq(clusters, cache_pickle=True)
    (queue_information, _, _) = showq.get_moab_command_information()

    # release the jobs, prepare the command
    m = MoabCommand(cache_pickle=False, dry_run=dry_run)
    for data in clusters.values():
        data['path'] = data['mpath']  # mjobctl path
    m.clusters = clusters

    # read the previous data
    ts_data = releasejob_cache.load('queue_information')
    if ts_data is None:
        old_queue_information = {}
    else:
        (_, old_queue_information) = ts_data

    stats = {
        'peruser': 0,
        'total': 0,
        'release': 0,
    }

    release_jobids = []

    for user, clusterdata in queue_information.items():
        oldclusterdata = old_queue_information.setdefault(user, {})
        totaluser = 0
        for cluster, data in clusterdata.items():
            olddata = oldclusterdata.setdefault(cluster, {})
            # DRMJID is supposed to be unique
            # get all oldjobids in one dict
            oldjobs = dict([(j['DRMJID'], j['_release'])
                            for jt in olddata.values() for j in jt])
            for jobtype, jobs in data.items():
                removeids = []
                for idx, job in enumerate(jobs):
                    jid = job['DRMJID']

                    if jobtype in RELEASEJOB_SUPPORTED_HOLDTYPES:
                        totaluser += 1
                        release = max(oldjobs.get(jid, 0), 0) + 1
                        job['_release'] = release
                        stats['release'] = max(stats['release'], release)
                        release_jobids.append(jid)
                        # release the job
                        cmd = [m.clusters[cluster]['path'], '-u', jid]
                        logger.info(
                            "Releasing job %s cluster %s for the %s-th time." %
                            (jid, cluster, release))
                        if dry_run:
                            logger.info("Dry run %s" % cmd)
                        else:
                            m._run_moab_command(cmd, cluster, [])
                    else:
                        # keep historical data, eg a previously released job could be idle now
                        # but keep the counter in case it gets held again
                        try:
                            release = oldjobs[jid]
                            job['_release'] = release
                        except KeyError:
                            # not previously in hold, remove it
                            removeids.append(idx)

                # remove the jobs (in reverse order)
                for remove_idx in removeids[::-1]:
                    jobs.pop(remove_idx)

                # cleanup
                if len(jobs) == 0:
                    data.pop(jobtype)
            # cleanup
            if len(data) == 0:
                clusterdata.pop(cluster)
        # cleanup
        if len(clusterdata) == 0:
            queue_information.pop(user)

        # update stats
        stats['peruser'] = max(stats['peruser'], totaluser)
        stats['total'] += totaluser

    logger.info(
        "Release statistics: total jobs in hold %(total)s; max in hold per user %(peruser)s; max releases per job %(release)s"
        % stats)

    # update and close
    releasejob_cache.update('queue_information', queue_information, 0)
    releasejob_cache.close()

    return release_jobids, stats