예제 #1
0
파일: qmgr.py 프로젝트: anaderi/skygrid
def fix_split(name):
    assert os.path.exists(name) and os.path.isdir(name)
    queue = QueueDir(name)
    assert queue.qsize() == 1
    item_list = queue.get()
    queue.extend(item_list)
    logger.info(queue)
예제 #2
0
파일: qmgr.py 프로젝트: anaderi/skygrid
def _queue_jds(name):
    assert os.path.exists(name) and os.path.isdir(name)
    queue = QueueDir(name)
    jds = {}
    for i in range(queue.qsize()):
        jd = queue.peek(i)
        jds[i] = jd
    return jds
예제 #3
0
파일: qmgr.py 프로젝트: anaderi/skygrid
def _queue_ids(name):
    assert os.path.exists(name) and os.path.isdir(name)
    queue = QueueDir(name)
    ids = []
    for i in range(queue.qsize()):
        jd = queue.peek(i)
        job_id = jd['job_id']
        ids.append(job_id)
    return ids
예제 #4
0
파일: qmgr.py 프로젝트: anaderi/skygrid
def fill(dst, template, count):
    queue = QueueDir(dst)
    with open(template) as fh:
        jd_template = json.load(fh)
    jd_min = None
    if 'job_id' in jd_template:
        jd_min = jd_template['job_id']
    assert count > 0
    for i in range(count):
        jd = copy(jd_template)
        if jd_min is not None:
            jd['job_id'] = jd_min + i
        queue.put(jd)
예제 #5
0
파일: qmgr.py 프로젝트: anaderi/skygrid
def reset_fail(name):
    assert os.path.exists(name) and os.path.isdir(name)
    name = name.rstrip('/')
    assert name.endswith('.fail')
    origname = name.replace('.fail', '')
    groupname = os.path.basename(origname)

    qfail = QueueDir(name)
    qorig = QueueDir(origname)
    for jd in qfail:
        outdir = 'output-%s/%d' % (groupname, jd['job_id'])
        if os.path.exists(outdir):
            shutil.rmtree(outdir)
        qorig.put(jd)
예제 #6
0
파일: qmgr.py 프로젝트: anaderi/skygrid
def check_dupes(name, do_remove=False):
    assert os.path.exists(name) and os.path.isdir(name)
    queue = QueueDir(name)
    queue_files = queue.list_files()
    jds = {}
    for i in range(queue.qsize()):
        jd = queue.peek(i)
        key = jd['job_id']
        jd_rec = {'file': queue_files[i], 'jd': jd, 'id': i}
        if key in (jds):
            jds[key].append(jd_rec)
        else:
            jds[key] = [jd_rec]
    for key, dupes in jds.iteritems():
        if len(dupes) > 1:
            print "Dupes: %s" % dupes
            if do_remove:
                for jd_rec in dupes[0:-1]:
                    print "remove: %s" % jd_rec['file']
                    os.remove(jd_rec['file'])  # hack
예제 #7
0
def stat_host(basedir, name, exptotal=None):
    stat = {WORK: 0}
    total_count = 0
    for key, suff in queue_exts.iteritems():
        queue_dir = "%s/%s%s" % (basedir, name, suff)
        if os.path.exists(queue_dir):
            q = QueueDir(queue_dir)
            stat[key] = q.qsize()
            total_count += stat[key]
        else:
            stat[key] = 0
    if os.path.exists("%s/%s.locker" % (basedir, name)):
        with open("%s/%s.locker" % (basedir, name)) as fh:
            queue_work = cPickle.load(fh)
            stat[WORK] = len(queue_work)
            total_count += len(queue_work)

    stat[TOTAL] = total_count
    update_calc_stat(stat, exptotal)
    stat['name'] = name
    return stat
예제 #8
0
파일: qmgr.py 프로젝트: anaderi/skygrid
def mv(src, dst, count=None, put_back=False):
    assert src is not None and dst is not None
    assert os.path.exists(src) and os.path.isdir(src)
    q_src = QueueDir(src)
    q_dst = QueueDir(dst, default_mask=q_src.mask)
    if count is None:
        count = q_src.qsize()
    if count > 0:
        jds = q_src.get_n(count)
        q_dst.extend(jds)
        if put_back:
            q_src.extend(jds)
    else:
        logger.warn("WARN: empty source queue")
    logger.info("SRC: %s" %  q_src)
    logger.info("DST: %s" %  q_dst)
예제 #9
0
파일: qmgr.py 프로젝트: anaderi/skygrid
def unlock(name):
    lockfile = "%s.locker" % name
    assert os.path.exists(name) and os.path.isdir(name)
    jds = _lock_ids(lockfile)
    assert len(jds) > 0
    queue_orig = QueueDir(name)
    queue_succ = QueueDir(name + ".success")
    for job_id, jd in jds.iteritems():
        if _has_output(name, jd):
            queue_succ.put(jd)
            logger.info("%d -> success" % job_id)
        else:
            queue_orig.put(jd)
            logger.info("%d -> orig" % job_id)
    os.remove(lockfile)
예제 #10
0
파일: qmgr.py 프로젝트: anaderi/skygrid
def check_success(start_id, stop_id):
    pool = multiprocessing.Pool(POOL_SIZE)
    group_names = ["mc%02d" % i for i in range(1,21)]
    print group_names
    unsuccessful = pool.map(_find_no_output, group_names)
    unsuccessful = [x for x in unsuccessful if len(x) > 0]
    print unsuccessful
    with open("no_output.dump", 'w') as fh:
        cPickle.dump(unsuccessful, fh)
    for unx in unsuccessful:
        queue_succ_name = unx.keys()[0].split(':')[0]
        queue_succ = QueueDir(queue_succ_name)
        queue_fail = QueueDir(queue_succ_name.replace('success', 'fail'))
        for key in sorted(unx.keys(), key=lambda x: int(x.split(':')[1]), reverse=True):
            id = int(key.split(':')[1])
            jd = unx[key]
            print "%s -> fail (%d)" % (key, jd['job_id'])
            queue_fail.put(jd)
            queue_succ.remove(id)
예제 #11
0
파일: qmgr.py 프로젝트: anaderi/skygrid
def fix_interrupts(name):
    assert os.path.exists(name) and os.path.isdir(name)
    assert name.endswith('fail')
    queue_fail = QueueDir(name)
    queue_success = QueueDir(name.replace('fail', 'success'))
    restore_count = 0
    queue_fail_size = queue_fail.qsize()
    fail_files = queue_fail.list_files()

    success_cache = {}
    for i in range(queue_success.qsize()):
        jd = queue_success.peek(i)
        key = jd['job_id']
        jd_rec = {'jd': jd, 'id': i}
        success_cache[key] = jd_rec

    for i in range(queue_fail.qsize() - 1, -1, -1):
        jd = queue_fail.peek(i)
        if _has_output(name, jd):
            if jd['job_id'] in success_cache:
                print "WARN: already in success (%s)" % fail_files[i]
                continue
            print "seemsOK: %d" % jd['job_id']
            restore_count += 1
            queue_fail.remove(i)
            jd['ex_status'] = jd['status']
            jd['status'] = 'SUCCESS'
            queue_success.put(jd)
    print "restored %d JDs of %d" % (restore_count, queue_fail_size)