class WorkSet(object): def __init__(self, wsdir, wsid, writing=False): self.wsid = wsid self.qdir = os.path.join(wsdir, str(self.wsid)) if writing: FileEnqueue.recover(self.qdir) self.enq = FileEnqueue(self.qdir, buffer=200) else: self.enq = DummyFileEnqueue(self.qdir) self.deq = FileDequeue(self.qdir) self.running = True self.scheduledcount = 0 self.checkedoutcount = 0 self.finishedcount = 0 self.activecount = 0 def flush(self): # _flush() should be part of close(), but not now self.enq._flush() self.enq.close() def shutdown(self): self.flush() self.deq.close() def get_status(self): r = dict(id=self.wsid, running=self.running, scheduled=self.scheduledcount, checkedout=self.checkedoutcount, finished=self.finishedcount ) return r def schedule(self, curi): self.enq.queue(curi) self.scheduledcount += 1 def checkout(self, n): if not self.running: return [] r = [] while len(r) < n: curi = self.deq.get(timeout=0.001) if curi is None: self.enq.close() break r.append(curi) self.checkedoutcount += len(r) return r def deschedule(self, furi): self.finishedcount += 1
class IncomingQueue(object): # default maxsize 1GB - this would be too big for multi-queue # settings def __init__(self, qdir, noupdate=False, norecover=False, **kw): # ensure qdir directory exists self.qdir = qdir if not os.path.isdir(self.qdir): os.makedirs(self.qdir) self.addedcount = 0 self.processedcount = 0 self.rqfile = None self.qfiles = None if not norecover: FileEnqueue.recover(self.qdir) self.init_queues(**kw) def init_queues(self, buffsize=0, maxsize=1000*1000*1000): # dequeue side self.rqfile = FileDequeue(self.qdir) # single queue file, no asynchronous writes self.qfiles = [FileEnqueue(self.qdir, buffer=buffsize, maxsize=maxsize)] @property def buffsize(self): self.qfiles[0].buffer_size @buffsize.setter def buffsize(self, v): for enq in self.qfiles: enq.buffer_size = v def __del__(self): self.shutdown() def close(self, blocking=True): if self.qfiles: for q in self.qfiles: q.close(blocking=blocking) def flush(self): if self.qfiles: for q in self.qfiles: q._flush() def shutdown(self): if self.rqfile: self.rqfile.close() # _flush should be part of close, but not now. self.flush() self.write_executor.shutdown() self.close() def get_status(self): buffered = sum([enq.buffered_count for enq in self.qfiles]) r = dict(addedcount=self.addedcount, processedcount=self.processedcount, queuefilecount=self.rqfile.qfile_count(), dequeue=self.rqfile.get_status(), bufferedcount=buffered ) if self.rqfile: r['queuefilecount'] = self.rqfile.qfile_count() return r def add(self, curis): result = dict(processed=0) for curi in curis: enq = self.qfiles[0] enq.queue(curi) self.addedcount += 1 result['processed'] += 1 return result def get(self, timeout=0.0): o = self.rqfile.get(timeout) # if queue exhausted, try closing current enq # leave busy queues if not o: self.close(blocking=False) if o: self.processedcount += 1 return o