def __init__(self, clusters, cache_pickle=False, dry_run=False): MoabCommand.__init__(self, cache_pickle=cache_pickle, dry_run=dry_run) self.info = CheckjobInfo self.clusters = clusters
def process_hold(clusters, dry_run=False): """Process a filtered queueinfo dict""" releasejob_cache = FileCache(RELEASEJOB_CACHE_FILE) # get the showq data for data in clusters.values(): data['path'] = data['spath'] # showq path showq = Showq(clusters, cache_pickle=True) (queue_information, _, _) = showq.get_moab_command_information() # release the jobs, prepare the command m = MoabCommand(cache_pickle=False, dry_run=dry_run) for data in clusters.values(): data['path'] = data['mpath'] # mjobctl path m.clusters = clusters # read the previous data ts_data = releasejob_cache.load('queue_information') if ts_data is None: old_queue_information = {} else: (_, old_queue_information) = ts_data stats = { 'peruser': 0, 'total': 0, 'release': 0, } release_jobids = [] for user, clusterdata in queue_information.items(): oldclusterdata = old_queue_information.setdefault(user, {}) totaluser = 0 for cluster, data in clusterdata.items(): olddata = oldclusterdata.setdefault(cluster, {}) # DRMJID is supposed to be unique # get all oldjobids in one dict oldjobs = dict([(j['DRMJID'], j['_release']) for jt in olddata.values() for j in jt]) for jobtype, jobs in data.items(): removeids = [] for idx, job in enumerate(jobs): jid = job['DRMJID'] if jobtype in RELEASEJOB_SUPPORTED_HOLDTYPES: totaluser += 1 release = max(oldjobs.get(jid, 0), 0) + 1 job['_release'] = release stats['release'] = max(stats['release'], release) release_jobids.append(jid) # release the job cmd = [m.clusters[cluster]['path'], '-u', jid] logger.info("Releasing job %s cluster %s for the %s-th time." % (jid, cluster, release)) if dry_run: logger.info("Dry run %s" % cmd) else: m._run_moab_command(cmd, cluster, []) else: # keep historical data, eg a previously released job could be idle now # but keep the counter in case it gets held again try: release = oldjobs[jid] job['_release'] = release except KeyError: # not previously in hold, remove it removeids.append(idx) # remove the jobs (in reverse order) for remove_idx in removeids[::-1]: jobs.pop(remove_idx) # cleanup if len(jobs) == 0: data.pop(jobtype) # cleanup if len(data) == 0: clusterdata.pop(cluster) # cleanup if len(clusterdata) == 0: queue_information.pop(user) # update stats stats['peruser'] = max(stats['peruser'], totaluser) stats['total'] += totaluser logger.info("Release statistics: total jobs in hold %(total)s; max in hold per user %(peruser)s; max releases per job %(release)s" % stats) # update and close releasejob_cache.update('queue_information', queue_information, 0) releasejob_cache.close() return release_jobids, stats
def process_hold(clusters, dry_run=False): """Process a filtered queueinfo dict""" releasejob_cache = FileCache(RELEASEJOB_CACHE_FILE) # get the showq data for data in clusters.values(): data['path'] = data['spath'] # showq path showq = Showq(clusters, cache_pickle=True) (queue_information, _, _) = showq.get_moab_command_information() # release the jobs, prepare the command m = MoabCommand(cache_pickle=False, dry_run=dry_run) for data in clusters.values(): data['path'] = data['mpath'] # mjobctl path m.clusters = clusters # read the previous data ts_data = releasejob_cache.load('queue_information') if ts_data is None: old_queue_information = {} else: (_, old_queue_information) = ts_data stats = { 'peruser': 0, 'total': 0, 'release': 0, } release_jobids = [] for user, clusterdata in queue_information.items(): oldclusterdata = old_queue_information.setdefault(user, {}) totaluser = 0 for cluster, data in clusterdata.items(): olddata = oldclusterdata.setdefault(cluster, {}) # DRMJID is supposed to be unique # get all oldjobids in one dict oldjobs = dict([(j['DRMJID'], j['_release']) for jt in olddata.values() for j in jt]) for jobtype, jobs in data.items(): removeids = [] for idx, job in enumerate(jobs): jid = job['DRMJID'] if jobtype in RELEASEJOB_SUPPORTED_HOLDTYPES: totaluser += 1 release = max(oldjobs.get(jid, 0), 0) + 1 job['_release'] = release stats['release'] = max(stats['release'], release) release_jobids.append(jid) # release the job cmd = [m.clusters[cluster]['path'], '-u', jid] logger.info( "Releasing job %s cluster %s for the %s-th time." % (jid, cluster, release)) if dry_run: logger.info("Dry run %s" % cmd) else: m._run_moab_command(cmd, cluster, []) else: # keep historical data, eg a previously released job could be idle now # but keep the counter in case it gets held again try: release = oldjobs[jid] job['_release'] = release except KeyError: # not previously in hold, remove it removeids.append(idx) # remove the jobs (in reverse order) for remove_idx in removeids[::-1]: jobs.pop(remove_idx) # cleanup if len(jobs) == 0: data.pop(jobtype) # cleanup if len(data) == 0: clusterdata.pop(cluster) # cleanup if len(clusterdata) == 0: queue_information.pop(user) # update stats stats['peruser'] = max(stats['peruser'], totaluser) stats['total'] += totaluser logger.info( "Release statistics: total jobs in hold %(total)s; max in hold per user %(peruser)s; max releases per job %(release)s" % stats) # update and close releasejob_cache.update('queue_information', queue_information, 0) releasejob_cache.close() return release_jobids, stats