def fix_split(name): assert os.path.exists(name) and os.path.isdir(name) queue = QueueDir(name) assert queue.qsize() == 1 item_list = queue.get() queue.extend(item_list) logger.info(queue)
def _queue_jds(name): assert os.path.exists(name) and os.path.isdir(name) queue = QueueDir(name) jds = {} for i in range(queue.qsize()): jd = queue.peek(i) jds[i] = jd return jds
def _queue_ids(name): assert os.path.exists(name) and os.path.isdir(name) queue = QueueDir(name) ids = [] for i in range(queue.qsize()): jd = queue.peek(i) job_id = jd['job_id'] ids.append(job_id) return ids
def fill(dst, template, count): queue = QueueDir(dst) with open(template) as fh: jd_template = json.load(fh) jd_min = None if 'job_id' in jd_template: jd_min = jd_template['job_id'] assert count > 0 for i in range(count): jd = copy(jd_template) if jd_min is not None: jd['job_id'] = jd_min + i queue.put(jd)
def reset_fail(name): assert os.path.exists(name) and os.path.isdir(name) name = name.rstrip('/') assert name.endswith('.fail') origname = name.replace('.fail', '') groupname = os.path.basename(origname) qfail = QueueDir(name) qorig = QueueDir(origname) for jd in qfail: outdir = 'output-%s/%d' % (groupname, jd['job_id']) if os.path.exists(outdir): shutil.rmtree(outdir) qorig.put(jd)
def check_dupes(name, do_remove=False): assert os.path.exists(name) and os.path.isdir(name) queue = QueueDir(name) queue_files = queue.list_files() jds = {} for i in range(queue.qsize()): jd = queue.peek(i) key = jd['job_id'] jd_rec = {'file': queue_files[i], 'jd': jd, 'id': i} if key in (jds): jds[key].append(jd_rec) else: jds[key] = [jd_rec] for key, dupes in jds.iteritems(): if len(dupes) > 1: print "Dupes: %s" % dupes if do_remove: for jd_rec in dupes[0:-1]: print "remove: %s" % jd_rec['file'] os.remove(jd_rec['file']) # hack
def stat_host(basedir, name, exptotal=None): stat = {WORK: 0} total_count = 0 for key, suff in queue_exts.iteritems(): queue_dir = "%s/%s%s" % (basedir, name, suff) if os.path.exists(queue_dir): q = QueueDir(queue_dir) stat[key] = q.qsize() total_count += stat[key] else: stat[key] = 0 if os.path.exists("%s/%s.locker" % (basedir, name)): with open("%s/%s.locker" % (basedir, name)) as fh: queue_work = cPickle.load(fh) stat[WORK] = len(queue_work) total_count += len(queue_work) stat[TOTAL] = total_count update_calc_stat(stat, exptotal) stat['name'] = name return stat
def mv(src, dst, count=None, put_back=False): assert src is not None and dst is not None assert os.path.exists(src) and os.path.isdir(src) q_src = QueueDir(src) q_dst = QueueDir(dst, default_mask=q_src.mask) if count is None: count = q_src.qsize() if count > 0: jds = q_src.get_n(count) q_dst.extend(jds) if put_back: q_src.extend(jds) else: logger.warn("WARN: empty source queue") logger.info("SRC: %s" % q_src) logger.info("DST: %s" % q_dst)
def unlock(name): lockfile = "%s.locker" % name assert os.path.exists(name) and os.path.isdir(name) jds = _lock_ids(lockfile) assert len(jds) > 0 queue_orig = QueueDir(name) queue_succ = QueueDir(name + ".success") for job_id, jd in jds.iteritems(): if _has_output(name, jd): queue_succ.put(jd) logger.info("%d -> success" % job_id) else: queue_orig.put(jd) logger.info("%d -> orig" % job_id) os.remove(lockfile)
def check_success(start_id, stop_id): pool = multiprocessing.Pool(POOL_SIZE) group_names = ["mc%02d" % i for i in range(1,21)] print group_names unsuccessful = pool.map(_find_no_output, group_names) unsuccessful = [x for x in unsuccessful if len(x) > 0] print unsuccessful with open("no_output.dump", 'w') as fh: cPickle.dump(unsuccessful, fh) for unx in unsuccessful: queue_succ_name = unx.keys()[0].split(':')[0] queue_succ = QueueDir(queue_succ_name) queue_fail = QueueDir(queue_succ_name.replace('success', 'fail')) for key in sorted(unx.keys(), key=lambda x: int(x.split(':')[1]), reverse=True): id = int(key.split(':')[1]) jd = unx[key] print "%s -> fail (%d)" % (key, jd['job_id']) queue_fail.put(jd) queue_succ.remove(id)
def fix_interrupts(name): assert os.path.exists(name) and os.path.isdir(name) assert name.endswith('fail') queue_fail = QueueDir(name) queue_success = QueueDir(name.replace('fail', 'success')) restore_count = 0 queue_fail_size = queue_fail.qsize() fail_files = queue_fail.list_files() success_cache = {} for i in range(queue_success.qsize()): jd = queue_success.peek(i) key = jd['job_id'] jd_rec = {'jd': jd, 'id': i} success_cache[key] = jd_rec for i in range(queue_fail.qsize() - 1, -1, -1): jd = queue_fail.peek(i) if _has_output(name, jd): if jd['job_id'] in success_cache: print "WARN: already in success (%s)" % fail_files[i] continue print "seemsOK: %d" % jd['job_id'] restore_count += 1 queue_fail.remove(i) jd['ex_status'] = jd['status'] jd['status'] = 'SUCCESS' queue_success.put(jd) print "restored %d JDs of %d" % (restore_count, queue_fail_size)