def check_brick_done(brick): nblobs, taskid = get_brick_nblobs(brick) ncancelled = len(brick_cancelled.get(brick, [])) ndone = len(allresults.get(brick, [])) tnow = time.time() if not brick in last_brick_status: last_brick_status[brick] = tnow if tnow - last_brick_status[brick] > brick_status_period: print('Brick', brick, ':', ncancelled, 'cancelled,', ndone, 'done, total', (nblobs if nblobs is not None else '(unknown)')) last_brick_status[brick] = tnow if nblobs is None: return if ndone + ncancelled < nblobs: return # Done this brick! Set qdo state=Succeeded checkpoint_fn = opt.checkpoint % dict(brick=brick) R = [ dict(brickname=brick, iblob=iblob, result=res) for iblob, res in allresults[brick].items() ] print('Writing final checkpoint', checkpoint_fn) _write_checkpoint(R, checkpoint_fn) print('Setting QDO task to Succeeded:', brick) q.set_task_state(taskid, qdo.Task.SUCCEEDED) del allresults[brick] finished_bricks.put((brick, len(R)))
def check_brick_done(brick): nblobs, taskid = get_brick_nblobs(brick) if nblobs is None: return ncancelled = len(brick_cancelled.get(brick, [])) if len(allresults[brick]) + ncancelled < nblobs: return # Done this brick! Set qdo state=Succeeded checkpoint_fn = opt.checkpoint % dict(brick=brick) R = [ dict(brickname=brick, iblob=iblob, result=res) for iblob, res in allresults[brick].items() ] print('Writing final checkpoint', checkpoint_fn) _write_checkpoint(R, checkpoint_fn) print('Setting QDO task to Succeeded:', brick) q.set_task_state(taskid, qdo.Task.SUCCEEDED) del allresults[brick] finished_bricks.put((brick, len(R)))
def output_thread(queuename, outqueue, checkpointqueue, blobsizes, finished_bricks, opt): try: import setproctitle setproctitle.setproctitle('farm: output') except: pass import qdo q = qdo.connect(queuename) allresults = {} # Stored values from the 'blobsizes' queue. # brick -> (nblobs, qdo_taskid) brick_info = {} # Local mapping of brickname -> [set of cancelled blob ids] brick_cancelled = {} def get_brick_nblobs(brick, defnblobs=None): if not brick in brick_info: try: while True: br, nb, tid = blobsizes.get(block=False) brick_info[br] = (nb, tid) except queue.Empty: pass return brick_info.get(brick, (defnblobs, None)) def check_brick_done(brick): nblobs, taskid = get_brick_nblobs(brick) if nblobs is None: return ncancelled = len(brick_cancelled.get(brick, [])) if len(allresults[brick]) + ncancelled < nblobs: return # Done this brick! Set qdo state=Succeeded checkpoint_fn = opt.checkpoint % dict(brick=brick) R = [ dict(brickname=brick, iblob=iblob, result=res) for iblob, res in allresults[brick].items() ] print('Writing final checkpoint', checkpoint_fn) _write_checkpoint(R, checkpoint_fn) print('Setting QDO task to Succeeded:', brick) q.set_task_state(taskid, qdo.Task.SUCCEEDED) del allresults[brick] finished_bricks.put((brick, len(R))) last_checkpoint = time.time() last_checkpoint_size = {} while True: tnow = time.time() dt = tnow - last_checkpoint if dt > opt.checkpoint_period: for brick, brickresults in allresults.items(): if brick in last_checkpoint_size: if len(brickresults) == last_checkpoint_size[brick]: #print('Brick', brick, 'has not changed since last checkpoint was written') continue checkpoint_fn = opt.checkpoint % dict(brick=brick) R = [ dict(brickname=brick, iblob=iblob, result=res) for iblob, res in brickresults.items() ] last_checkpoint_size[brick] = len(brickresults) nblobs, _ = get_brick_nblobs(brick, '(unknown)') print('Writing interim checkpoint', checkpoint_fn, ':', len(brickresults), 'of', nblobs, 'results') _write_checkpoint(R, checkpoint_fn) last_checkpoint = tnow # Read any checkpointed results sent by the input thread c = Counter() while True: try: (brick, iblob, res) = checkpointqueue.get(block=False) except: break if not brick in allresults: allresults[brick] = {} allresults[brick][iblob] = res c[brick] += 1 #if len(c): # print('Read checkpointed results:', c) for brick, n in c.most_common(): nblobs, _ = get_brick_nblobs(brick, '(unknown)') #print('Brick', brick, ': now', len(allresults[brick]), 'of', nblobs, 'done') check_brick_done(brick) try: brick, iblob, msg = outqueue.get(timeout=60) except: # timeout continue if msg == 'cancel': if not brick in brick_cancelled: brick_cancelled[brick] = set() brick_cancelled[brick].add(iblob) debug('Output thread: got cancel for brick', brick, 'blob', iblob) else: if msg is None: # short-cut empty work packet. continue # Worker sent a blob result result = pickle.loads(msg) if result is None: ### FIXME -- ??? continue if not brick in allresults: allresults[brick] = {} allresults[brick][iblob] = result check_brick_done(brick)