class Pool(object): def __init__(self, n_workers): self._work_queue = JoinableQueue() self._workers = [] self._func_list = [] for id in range(n_workers): thr = PoolWorker(self._work_queue) thr.start() self._workers.append(thr) logger.info("Started %s Threads" % n_workers) def join(self): logger.debug("Waiting for all threads to complete.") for thr in self._workers: self._work_queue.put("DIE") for thr in self._workers: thr.join() def wait(self): logger.debug("Starting Wait to finish current queue.") progress = ProgressPrint() progress.start() self._work_queue.join() progress.stop() def terminate(self): logger.debug("Terminating all threads") try: while True: self._work_queue.get_nowait() except queue.Empty: logger.debug("Queue is Empty") for thr in self._workers: self._work_queue.put("DIE") def _get_job(self, tag): for func in self._func_list: if func.tag == tag: return func.function def add_job_data(self, *args, **kwargs): tag = kwargs.pop("tag") job = self._get_job(tag) self._work_queue.put(Job(job, args, kwargs)) logger.debug("Added %s job with %s", tag, args) def set_job(self, func, tag): self._func_list.append(JobInfo(func, tag))
class MMapPool(object): def __init__(self, n, mmap_size): self.n = n self.mmap_size = mmap_size self.pool = [mmap.mmap(-1, mmap_size) for _ in range(n)] self.free_mmaps = set(range(n)) self.free_queue = JoinableQueue() def new(self): if not self.free_mmaps: self.free_mmaps.add(self.free_queue.get()) self.free_queue.task_done() while True: try: self.free_mmaps.add(self.free_queue.get_nowait()) self.free_queue.task_done() except Empty: break mmap_idx = self.free_mmaps.pop() return mmap_idx, self.pool[mmap_idx] def join(self): while len(self.free_mmaps) < self.n: self.free_mmaps.add(self.free_queue.get()) self.free_queue.task_done() def get(self, idx): return self.pool[idx] def free(self, idx): self.free_queue.put(idx)
class MMapPool(object): def __init__(self, n, mmap_size): self.n = n self.mmap_size = mmap_size self.pool = [mmap.mmap(-1, mmap_size) for _ in range(n)] self.free_mmaps = set(range(n)) self.free_queue = JoinableQueue() def new(self): if not self.free_mmaps: self.free_mmaps.add(self.free_queue.get()) self.free_queue.task_done() while True: try: self.free_mmaps.add(self.free_queue.get_nowait()) self.free_queue.task_done() except Empty: break mmap_idx = self.free_mmaps.pop() return mmap_idx, self.pool[mmap_idx] def join(self): while len(self.free_mmaps) < self.n: self.free_mmaps.add(self.free_queue.get()) self.free_queue.task_done() def get(self, idx): return self.pool[idx] def free(self, idx): self.free_queue.put(idx)
class RelayAgent(object): """Dummy relay agent. This agent subscribe to the '/decision_result' topic from environment and put the received data into a queue. Then it retrieves and replay the queued data when the its `step()` method is callled. """ def __init__(self, queue_len): self.q = Queue(queue_len) rospy.Subscriber('/decision_result', Int16, self.__mover) def __mover(self, data): try: key = data.data # print "[__mover]: {}".format(key) if self.q.full(): self.q.get_nowait() self.q.task_done() self.q.put(key, timeout=0.1) except Exception as e: print "[__mover]: action enque failed. {}".format(e.message) return def step(self, *args, **kwargs): while True: try: action = self.q.get(timeout=0.1) self.q.task_done() break except: print "[step]: get action failed" time.sleep(0.1) # print "[step()]: action = {}".format(action) return action, {} def set_session(self, *args): return
def process_modules_worker(cls, queue: multiprocessing.JoinableQueue) -> None: while True: if queue.empty(): continue print(f"{os.getpid()}") q = queue.get_nowait() irc: IrcClient = q[0] message:str = q[1] if not message: continue irc.process_private_message(irc, message) queue.task_done()
class _PrPipe(object): """Custom pipe manager to capture the output of processes and store them in dedicated thread-safe queues. Clients register their own queues. """ def __init__(self, pipeHandle): """ Args: pipeHandle (pipe): Pipe to monitor for records """ self.id = ''.join( [random.choice('0123456789ABCDEF') for x in range(6)]) self.queue = JoinableQueue(MAX_QUEUE_LENGTH) self.process = Process(target=self.enqueue_output, kwargs={ "out": pipeHandle, "queue": self.queue }) self.process.daemon = True self.process.start() self.clientQueuesLock = Lock() self.clientQueues = dict() self.lastClientId = 0 # Class contains Locks and Queues which cannot be pickled def __getstate__(self): """Prevent _PrPipe from being pickled across Processes Raises: Exception """ raise Exception("Don't pickle me!") def enqueue_output(self, out, queue): """Copy lines from a given pipe handle into a local threading.Queue Runs in a separate process, started by __init__ Args: out (pipe): Pipe to read from queue (Queue): Queue to write to """ for line in iter(out.readline, b''): queue.put(line.decode('utf-8')) out.close() def publish(self): """Push messages from the main queue to all client queues Must be triggered by an external mechanism Typically triggered by getLine or wait """ try: while not self.queue.empty(): with self.clientQueuesLock: line = self.queue.get_nowait() for q in list(self.clientQueues.values()): q.put(line) self.queue.task_done() except Empty: pass def getQueue(self, clientId): """Retrieve a client's Queue proxy object Args: clientId (string): ID of the client Returns: QueueProxy """ return self.clientQueues[text(clientId)] def isEmpty(self, clientId=None): """Checks whether the primary Queue or any clients' Queues are empty Returns True ONLY if ALL queues are empty if clientId is None Returns True ONLY if both main queue and specfied client queue are empty when clientId is provided Args: clientId (string): ID of the client Returns: bool """ if clientId is not None: return self.queue.empty() \ and self.getQueue(clientId).empty() else: empty = self.queue.empty() with self.clientQueuesLock: for q in list(self.clientQueues.values()): empty = empty and q.empty() return empty def is_alive(self): """Check whether the thread managing the pipe > Queue movement is still active Returns: bool """ return self.process.is_alive() def getLine(self, clientId): """Retrieve a line from a given client's Queue Args: clientId (string): ID of the client Returns: <element from Queue> Raises: Empty """ # Pull any newer lines self.publish() # Throws Empty q = self.getQueue(clientId) line = q.get_nowait() q.task_done() return line def registerClientQueue(self, queueProxy): """Attach an additional Queue proxy to this _PrPipe All elements published() from now on will also be added to this Queue Returns the clientId for the new client, which must be used in all future interaction with this _PrPipe Args: queueProxy (QueueProxy): Proxy object to a Queue we should populate Returns: string. The client's ID for acccess to this queue """ # Make sure we don't re-use a clientId clientId = self.lastClientId + 1 self.lastClientId = clientId with self.clientQueuesLock: self.clientQueues[text(clientId)] = queueProxy return text(clientId) def unRegisterClientQueue(self, clientId): """Detach a Queue proxy from this _PrPipe Returns the clientId that was removed Args: clientId (string): ID of the client Returns: string. ID of the client queue """ with self.clientQueuesLock: self.clientQueues.pop(clientId) return text(clientId) def destructiveAudit(self): """Print a line from each client Queue attached to this _PrPipe This is a destructive operation, as it *removes* a line from each Queue """ with self.clientQueuesLock: for clientId in list(self.clientQueues): try: print("clientId " + text(clientId) + ": " + self.getLine(clientId)) except: print("clientId " + text(clientId) + " is empty")
class Debugger: def __init__(self): self.q = JoinableQueue() self.terminate_flag = False self.printing_thread = Thread(target=self.threaded_method, # args=self.q ) self.printing_thread.start() def threaded_method(self): # Loop while not self.terminate_flag: try: line = self.q.get_nowait() if line is not None: print(line) self.q.task_done() except queue.Empty: pass sleep(SLEEP_TIME) # Remaining lines try: line = self.q.get_nowait() while line is not None: print(line) line = self.q.get() except Exception as e: print(f"{e}") return def join(self): self.q.join() self.terminate_flag = True self.printing_thread.join() def debug(self, channel: str, *args: Union[list, str]): """Debugging print and logging functions Records information for debugging by printing or logging to disk. args is a list of arguments to be formatted. Various channels can be toggled on or off from settings.DEBUG_CHANNELS: dict. Channels not found in the dict while be printed by default. Usage: debug("channel", "message") debug("channel", object) debug("channel", "message: {}, {}", ["list", thing_to_format]) respective outputs: [channel] message [channel] object.__repr__() [channel] message with brackets: list, thing_to_format.__repr__() By formatting once inside debug(), format() is only called if printing is turned on. Remember to include [ ] around the items to be formatted. Note that one iteration of this code spawned a separte thread for every debug() call. The printing system call could not keep up and threads piled up and eventually crashed the program. Either threads must be managed with pools or print statements can be allowed to slow down the program (They can be turn off in settings) """ # TODO: Use settings.ROLE for per client and server debugging? if (settings.DEBUG_PRINTING and self.channel_active(channel)): n = len(args) # Print message to console if n == 1: s = "[{}]\t\t{}".format(channel, args[0]) self.q.put(s) # print(s) elif n == 2: message = str(args[0]) s = "[{}]\t{}".format(channel, message.format(*args[1])) self.q.put(s) # print(s) elif 2 > 1: message = str(args[0]) s = "[{}]\t{}".format(channel, message.format(*args[1:])) self.q.put(s) # print(s) if (settings.DEBUG_LOGGING and channel_active(channel)): # TODO: Output stuff to a log file pass def channel_active(self, channel: str) -> bool: """Whether to print or log for a debug channel Channels that are not found are debugged by default """ if channel in settings.DEBUG_CHANNELS: val = settings.DEBUG_CHANNELS[channel] return val return True # default for unknown channels
class CAN: def __init__(self): # Distance Buffer self.distance_buffer = JoinableQueue(100) self.last_received_range_frame = RangeCANFrame(0, 0) # RFID Buffer self.RFID_buffer = JoinableQueue(100) # Bluetooth Remote Control Command Buffer self.btrc_buffer = JoinableQueue(100) # WiFi Control Center Message Buffer self.wfcc_buffer = JoinableQueue(100) # WiFi Train State Message Buffer self.wfts_buffer = JoinableQueue(100) def update_distance_buffer(self, distance_to_obstacle, timestamp): if self.distance_buffer.full(): self.distance_buffer.get() self.distance_buffer.put(RangeCANFrame(distance_to_obstacle, timestamp)) def update_RFID_buffer(self, RFID, timestamp): if self.RFID_buffer.full(): self.RFID_buffer.get() self.RFID_buffer.put(RFIDCANFrame(RFID, timestamp)) def update_btrc_buffer(self, btrc_command, timestamp): if self.btrc_buffer.full(): self.btrc_buffer.get() self.btrc_buffer.put( BluetoothRemoteControlCANFrame(btrc_command, timestamp)) def update_wfcc_buffer(self, wfcc_message, timestamp): if self.wfcc_buffer.full(): self.wfcc_buffer.get() self.wfcc_buffer.put(WiFiControlCenterCANFrame(wfcc_message, timestamp)) def update_wfts_buffer(self, most_recent_position, mode, state, decision, timestamp): if self.wfts_buffer.full(): self.wfts_buffer.get() self.wfts_buffer.put( WiFiTrainStateCANFrame(most_recent_position, mode, state, decision, timestamp)) def get_range_frame(self): if not self.distance_buffer.empty(): self.last_received_range_frame = self.distance_buffer.get_nowait() return self.last_received_range_frame def get_RFID_frame(self): if not self.RFID_buffer.empty(): return self.RFID_buffer.get_nowait() else: return RFIDCANFrame("NaRFID", 0) def get_btrc_frame(self): if not self.btrc_buffer.empty(): return self.btrc_buffer.get_nowait() else: return BluetoothRemoteControlCANFrame("None", 0) def get_wfcc_frame(self): if not self.wfcc_buffer.empty(): wfcc_frame = self.wfcc_buffer.get() return wfcc_frame else: return WiFiControlCenterCANFrame("None", 0) def get_wfts_frame(self): if not self.wfts_buffer.empty(): wfts_frame = self.wfts_buffer.get() return wfts_frame else: return WiFiTrainStateCANFrame(-1, "UNKNOWN", "UNKNOWN", "NO", 0)
class MessageServer(object): """Local HTTP server for interacting with the extension""" def __init__(self): global MESSAGE_SERVER MESSAGE_SERVER = self self.thread = None self.messages = JoinableQueue() self.config = None self.__is_started = threading.Event() def get_message(self, timeout): """Get a single message from the queue""" message = self.messages.get(block=True, timeout=timeout) self.messages.task_done() return message def flush_messages(self): """Flush all of the pending messages""" try: while True: self.messages.get_nowait() self.messages.task_done() except Exception: pass def handle_message(self, message): """Add a received message to the queue""" self.messages.put(message) def start(self): """Start running the server in a background thread""" self.__is_started.clear() self.thread = threading.Thread(target=self.run) self.thread.daemon = True self.thread.start() self.__is_started.wait(timeout=30) def stop(self): """Stop running the server""" logging.debug("Shutting down extension server") self.must_exit = True if self.thread is not None: ioloop = tornado.ioloop.IOLoop.instance() ioloop.add_callback(ioloop.stop) self.thread.join() self.thread = None logging.debug("Extension server stopped") def is_ok(self): """Check that the server is responding and restart it if necessary""" import requests if (sys.version_info >= (3, 0)): from time import monotonic else: from monotonic import monotonic end_time = monotonic() + 30 server_ok = False proxies = {"http": None, "https": None} while not server_ok and monotonic() < end_time: try: response = requests.get('http://127.0.0.1:8888/ping', timeout=10, proxies=proxies) if response.text == 'pong': server_ok = True except Exception: pass if not server_ok: time.sleep(5) return server_ok def run(self): """Main server loop""" logging.debug('Starting extension server on port 8888') try: asyncio.set_event_loop(asyncio.new_event_loop()) except Exception: pass application = tornado.web.Application([(r"/.*", TornadoRequestHandler)]) application.listen(8888, '127.0.0.1') self.__is_started.set() tornado.ioloop.IOLoop.instance().start()
class _PrPipe(object): """Custom pipe manager to capture the output of processes and store them in dedicated thread-safe queues. Clients register their own queues. """ def __init__(self, pipeHandle): """ Args: pipeHandle (pipe): Pipe to monitor for records """ self._initializeLogging() self.id = \ ''.join([random.choice('0123456789ABCDEF') for x in range(6)]) self.queue = JoinableQueue(settings.config["MAX_QUEUE_LENGTH"]) self.inboundQueueLock = Lock() self.process = Process(target=self.enqueue_output, kwargs={ "out": pipeHandle, "queue": self.queue }) self.process.daemon = True self.process.start() self.clientQueuesLock = Lock() self.clientQueues = dict() self.lastClientId = 0 # Class contains Locks and Queues which cannot be pickled def __getstate__(self): """Prevent _PrPipe from being pickled across Processes Raises: Exception """ raise Exception("Don't pickle me!") def _initializeLogging(self): if hasattr(self, '_log'): if self._log is not None: return # Logging self._log = logging.getLogger(__name__) self.addLoggingHandler(logging.NullHandler()) def addLoggingHandler(self, handler): self._log.addHandler(handler) def enqueue_output(self, out, queue): """Copy lines from a given pipe handle into a local threading.Queue Runs in a separate process, started by __init__. Closes pipe when done reading. Args: out (pipe): Pipe to read from queue (Queue): Queue to write to """ with self.inboundQueueLock: for line in iter(out.readline, ''): self._log.debug("Enqueing line of length {}".format(len(line))) lineContent = ContentWrapper(line) queue.put(lineContent) queueStatus = queue.empty() self._log.debug("Queue reporting empty as '{}' after adding " "line of length {}".format( queueStatus, len(lineContent))) # Wait until the queue reports the added content while queue.empty(): time.sleep(0.001) # If the queue originally reported that it was empty, report # that it's now showing the new content if queueStatus: self._log.debug("Queue now reporting the added content") self._log.debug("Closing pipe handle") out.close() def publish(self): """Push messages from the main queue to all client queues Must be triggered by an external mechanism Typically triggered by getLine or wait """ with self.inboundQueueLock: try: while not self.queue.empty(): with self.clientQueuesLock: line = self.queue.get_nowait() for q in list(self.clientQueues.values()): q.put(line) self.queue.task_done() except Empty: pass def getQueue(self, clientId): """Retrieve a client's Queue proxy object Args: clientId (string): ID of the client Returns: QueueProxy """ return self.clientQueues[text(clientId)] def isEmpty(self, clientId=None): """Checks whether the primary Queue or any clients' Queues are empty Returns True ONLY if ALL queues are empty if clientId is None Returns True ONLY if both main queue and specified client queue are empty when clientId is provided Args: clientId (string): ID of the client Returns: bool """ with self.inboundQueueLock: if clientId is not None: empty = self.queue.empty() \ and self.getQueue(clientId).empty() else: empty = self.queue.empty() with self.clientQueuesLock: for q in list(self.clientQueues.values()): empty = empty and q.empty() self._log.debug("Reporting queue empty: {}".format(empty)) return empty def is_alive(self): """Check whether the thread managing the pipe > Queue movement is still active Returns: bool """ return self.process.is_alive() def getLine(self, clientId): """Retrieve a line from a given client's Queue Args: clientId (string): ID of the client Returns: <element from Queue> Raises: Empty """ # Pull any newer lines self.publish() # Throws Empty q = self.getQueue(clientId) line = q.get_nowait() q.task_done() self._log.debug("Returning line") return line.value def registerClientQueue(self, queueProxy): """Attach an additional Queue proxy to this _PrPipe All elements published() from now on will also be added to this Queue Returns the clientId for the new client, which must be used in all future interaction with this _PrPipe Args: queueProxy (QueueProxy): Proxy object to a Queue we should populate Returns: string. The client's ID for access to this queue """ # Make sure we don't re-use a clientId clientId = self.lastClientId + 1 self.lastClientId = clientId with self.clientQueuesLock: self.clientQueues[text(clientId)] = queueProxy return text(clientId) def unRegisterClientQueue(self, clientId): """Detach a Queue proxy from this _PrPipe Returns the clientId that was removed Args: clientId (string): ID of the client Returns: string. ID of the client queue """ with self.clientQueuesLock: if text(clientId) in self.clientQueues: self.clientQueues.pop(clientId) return text(clientId) def destructiveAudit(self): """Print a line from each client Queue attached to this _PrPipe This is a destructive operation, as it *removes* a line from each Queue """ with self.clientQueuesLock: for clientId in list(self.clientQueues): try: self._log.info("clientId {}: {}".format( text(clientId), self.getLine(clientId))) except Empty: self._log.info("clientId {} is empty".format( text(clientId)))
class ParasolBatchSystem(AbstractBatchSystem): """The interface for Parasol. """ def __init__(self, config, maxCpus, maxMemory): AbstractBatchSystem.__init__(self, config, maxCpus, maxMemory) #Call the parent constructor if maxMemory != sys.maxint: logger.critical("A max memory has been specified for the parasol batch system class of %i, but currently this batchsystem interface does not support such limiting" % maxMemory) #Keep the name of the results file for the pstat2 command.. self.parasolCommand = config.attrib["parasol_command"] self.parasolResultsFile = getParasolResultsFileName(config.attrib["job_tree"]) #Reset the job queue and results (initially, we do this again once we've killed the jobs) self.queuePattern = re.compile("q\s+([0-9]+)") self.runningPattern = re.compile("r\s+([0-9]+)\s+[\S]+\s+[\S]+\s+([0-9]+)\s+[\S]+") self.killJobs(self.getIssuedJobIDs()) #Kill any jobs on the current stack logger.info("Going to sleep for a few seconds to kill any existing jobs") time.sleep(5) #Give batch system a second to sort itself out. logger.info("Removed any old jobs from the queue") #Reset the job queue and results exitValue = popenParasolCommand("%s -results=%s clear sick" % (self.parasolCommand, self.parasolResultsFile), False)[0] if exitValue != None: logger.critical("Could not clear sick status of the parasol batch %s" % self.parasolResultsFile) exitValue = popenParasolCommand("%s -results=%s flushResults" % (self.parasolCommand, self.parasolResultsFile), False)[0] if exitValue != None: logger.critical("Could not flush the parasol batch %s" % self.parasolResultsFile) open(self.parasolResultsFile, 'w').close() logger.info("Reset the results queue") #Stuff to allow max cpus to be work self.outputQueue1 = Queue() self.outputQueue2 = Queue() #worker = Thread(target=getUpdatedJob, args=(self.parasolResultsFileHandle, self.outputQueue1, self.outputQueue2)) #worker.setDaemon(True) worker = Process(target=getUpdatedJob, args=(self.parasolResultsFile, self.outputQueue1, self.outputQueue2)) worker.daemon = True worker.start() self.usedCpus = 0 self.jobIDsToCpu = {} def issueJob(self, command, memory, cpu): """Issues parasol with job commands. """ self.checkResourceRequest(memory, cpu) pattern = re.compile("your job ([0-9]+).*") parasolCommand = "%s -verbose -ram=%i -cpu=%i -results=%s add job '%s'" % (self.parasolCommand, memory, cpu, self.parasolResultsFile, command) #Deal with the cpus self.usedCpus += cpu while True: #Process finished results with no wait try: jobID = self.outputQueue1.get_nowait() self.usedCpus -= self.jobIDsToCpu.pop(jobID) assert self.usedCpus >= 0 self.outputQueue1.task_done() except Empty: break while self.usedCpus > self.maxCpus: #If we are still waiting self.usedCpus -= self.jobIDsToCpu.pop(self.outputQueue1.get()) assert self.usedCpus >= 0 self.outputQueue1.task_done() #Now keep going while True: #time.sleep(0.1) #Sleep to let parasol catch up #Apparently unnecessary line = popenParasolCommand(parasolCommand)[1][0] match = pattern.match(line) if match != None: #This is because parasol add job will return success, even if the job was not properly issued! break else: logger.info("We failed to properly add the job, we will try again after a sleep") time.sleep(5) jobID = int(match.group(1)) self.jobIDsToCpu[jobID] = cpu logger.debug("Got the parasol job id: %s from line: %s" % (jobID, line)) logger.debug("Issued the job command: %s with (parasol) job id: %i " % (parasolCommand, jobID)) return jobID def killJobs(self, jobIDs): """Kills the given jobs, represented as Job ids, then checks they are dead by checking they are not in the list of issued jobs. """ while True: for jobID in jobIDs: exitValue = popenParasolCommand("%s remove job %i" % (self.parasolCommand, jobID), runUntilSuccessful=False)[0] logger.info("Tried to remove jobID: %i, with exit value: %i" % (jobID, exitValue)) runningJobs = self.getIssuedJobIDs() if set(jobIDs).difference(set(runningJobs)) == set(jobIDs): return time.sleep(5) logger.critical("Tried to kill some jobs, but something happened and they are still going, so I'll try again") def getIssuedJobIDs(self): """Gets the list of jobs issued to parasol. """ #Example issued job, first field is jobID, last is the results file #31816891 localhost benedictpaten 2009/07/23 10:54:09 python ~/Desktop/out.txt issuedJobs = set() for line in popenParasolCommand("%s -extended list jobs" % self.parasolCommand)[1]: if line != '': tokens = line.split() if tokens[-1] == self.parasolResultsFile: jobID = int(tokens[0]) issuedJobs.add(jobID) return list(issuedJobs) def getRunningJobIDs(self): """Returns map of running jobIDs and the time they have been running. """ #Example lines.. #r 5410186 benedictpaten jobTreeSlave 1247029663 localhost #r 5410324 benedictpaten jobTreeSlave 1247030076 localhost runningJobs = {} issuedJobs = self.getIssuedJobIDs() for line in popenParasolCommand("%s -results=%s pstat2 " % (self.parasolCommand, self.parasolResultsFile))[1]: if line != '': match = self.runningPattern.match(line) if match != None: jobID = int(match.group(1)) startTime = int(match.group(2)) if jobID in issuedJobs: #It's one of our jobs runningJobs[jobID] = time.time() - startTime return runningJobs def getUpdatedJob(self, maxWait): jobID = self.getFromQueueSafely(self.outputQueue2, maxWait) if jobID != None: self.outputQueue2.task_done() return jobID def getRescueJobFrequency(self): """Parasol leaks jobs, but rescuing jobs involves calls to parasol list jobs and pstat2, making it expensive. """ return 5400 #Once every 90 minutes
class ProcessBlock(Process, ABC): """ The abstract class for a block/process in an execution pipeline """ # Arbitrary timeout for blocking queue operations _poll_interval = 1 def __init__(self, *args, parent=None, queue_size=0, **kwargs): super().__init__(*args, **kwargs) # Events (in the order they should be checked) self.events = OrderedDict([ ("cancel", Event()), ("requeue", Event()), ("stop", Event()), ]) # Corresponding event handlers self.event_handlers = { "cancel": self._cancel_handler, "requeue": self._requeue_handler, "stop": self._stop_handler, } # Master event, to be set after any other event self.event = Event() # The family of the processblock siblings = copy(parent.family.children) if parent is not None else [] self.family = BlockFamily(parent, siblings, []) # Link family with self self.family.link(self) # The object queue self.objs = JoinableQueue(queue_size) # List of objects that were canceled and need re-processing self._canceled_objs = deque() # Logging facility self.logger = getLogger(self.name) # Object currently processed self._obj = None def start(self): super().__init__(name=self.name) super().start() @abstractmethod def process_obj(self, obj): """ The actual work a block wants to perform on a object """ raise NotImplementedError() def _stop_handler(self): """ Send the "end object" (None) to every child """ self.logger.debug("sending the 'end object' to child processes...") for _ in self.family.alive_children(): self.objs.put(None) def cancel(self): """ Set the cancel event and the master event """ self.events["cancel"].set() self.event.set() def _cancel_handler(self): """ Cancel children's objects and re-queue them in self._canceled_objs """ self.logger.debug("ask children to requeue their objects") for child in self.family.alive_children(): child.events["requeue"].set() child.event.set() self.logger.debug("fetching canceled objects...") while (self.objs.qsize() != 0 or any(child.events["requeue"].is_set() for child in self.family.alive_children())): try: obj = self.objs.get_nowait() self.objs.task_done() except Empty: continue if obj is not None: self._canceled_objs.append(obj) # To be able to stop without the parent block sending an 'end object' if self.events["stop"].is_set(): self._canceled_objs.append(None) self.events["stop"].clear() # Clear the event self.events["cancel"].clear() def _requeue_handler(self): """ Requeue every object managed by the block or one of its children """ for child in self.family.alive_children(): child.events["requeue"].set() child.event.set() self.logger.debug("requeueing objects...") if self._obj is not None: self.family.parent.objs.put(self._obj) self._obj = None while (self.objs.qsize() != 0 or any(child.events["requeue"].is_set() for child in self.family.alive_children())): try: obj = self.objs.get_nowait() self.objs.task_done() except Empty: # Do not waste that time if self._canceled_objs: obj = self._canceled_objs.popleft() else: continue if obj is not None: self.family.parent.objs.put(obj) for obj in filter(lambda x: x is not None, self._canceled_objs): self.family.parent.objs.put(obj) self.logger.debug("wait for parent to fetch all the objects...") self.family.parent.objs.join() # Processblock was potentially stopped self.events["stop"].clear() # Clear the event self.events["requeue"].clear() def _process_events(self, ignore=()): """ Process events The order in which events are processed is important Returns: True --- if an Event was processed False --- otherwise """ self.logger.debug("process events...") if not self.event.is_set(): return False self.event.clear() event_processed = False for event_name in self.events: if event_name in ignore: continue if self.events[event_name].is_set(): self.logger.debug("processing '%s' event", event_name) self.event_handlers[event_name]() event_processed = True return event_processed def get_obj(self, timeout=None): """ Get an object from the parent block """ self.logger.debug("get an object to process...") try: return self._canceled_objs.popleft() except IndexError: obj = self.family.parent.objs.get(timeout=timeout) self.family.parent.objs.task_done() return obj def try_publish_obj(self, obj, poll_interval=None): """ Publish `obj` to child blocks (unless `obj` is None) Returns: True if `obj` was published False if an event occured before `obj` was published """ if obj is None: return True if not self.family.children: self.logger.debug("no one to pass '%s' onto", obj) return True self.logger.debug("publish '%s'", obj) while not self.event.is_set(): try: self.objs.put(obj, timeout=poll_interval) except Full: continue return True # An event occured self.logger.debug("publication was interrupted by an event") return False def _cleanup(self): """ Tell parent and siblings we stop and exit cleanly """ if self.family.parent is not None: self.family.parent.event.set() for sibling in self.family.siblings: sibling.event.set() self.logger.debug("waiting for child processes...") for child in self.family.children: child.join() def run(self): """ Launch child blocks and process objects """ # Launch child blocks # Children are started here in order to build a gracefull process tree self.logger.debug("start %d child(ren)", len(self.family.children)) for child in self.family.children: child.start() while not self.events["stop"].is_set(): # Processing loop while not self.events["stop"].is_set(): # Process exterior events if self._process_events(): continue # Find an object to process if self._obj is None: try: self._obj = self.get_obj(timeout=self._poll_interval) except Empty: continue if self._obj is None: self.logger.debug("received the 'end object'") self.events["stop"].set() self.event.set() continue obj = self._obj # Process the object self.logger.debug("process '%s'", obj) try: obj = self.process_obj(obj) except ProcessingError as exc: self.logger.warning(exc) continue except EventInterrupt: # An event ocrrured, process it continue # Publish the processed object, check for events periodically if self.try_publish_obj(obj, poll_interval=self._poll_interval): # Object was published, or did not need to be self._obj = None # Process the stop event (which is ignored in the loop underneath) self._process_events() # Wait for the entire family to stop, unless `stop` gets cleared while (self.events["stop"].is_set() and not self.family.is_stopped()): self.event.wait() self._process_events(ignore=("stop",)) # Process is exiting, there is no turning back # Every sibling/child process will shortly do so too (or already have) self._cleanup() self.logger.debug("terminating")
class ParasolBatchSystem(AbstractBatchSystem): """The interface for Parasol. """ def __init__(self, config, maxCpus, maxMemory): AbstractBatchSystem.__init__(self, config, maxCpus, maxMemory) #Call the parent constructor if maxMemory != sys.maxint: logger.warn("A max memory has been specified for the parasol batch system class of %i, but currently " "this batchsystem interface does not support such limiting" % maxMemory) #Keep the name of the results file for the pstat2 command.. self.parasolCommand = config.attrib["parasol_command"] self.parasolResultsFile = getParasolResultsFileName(config.attrib["job_store"]) #Reset the batchjob queue and results (initially, we do this again once we've killed the jobs) self.queuePattern = re.compile("q\s+([0-9]+)") self.runningPattern = re.compile("r\s+([0-9]+)\s+[\S]+\s+[\S]+\s+([0-9]+)\s+[\S]+") self.killBatchJobs(self.getIssuedBatchJobIDs()) #Kill any jobs on the current stack logger.info("Going to sleep for a few seconds to kill any existing jobs") time.sleep(5) #Give batch system a second to sort itself out. logger.info("Removed any old jobs from the queue") #Reset the batchjob queue and results exitValue = popenParasolCommand("%s -results=%s clear sick" % (self.parasolCommand, self.parasolResultsFile), False)[0] if exitValue is not None: logger.warn("Could not clear sick status of the parasol batch %s" % self.parasolResultsFile) exitValue = popenParasolCommand("%s -results=%s flushResults" % (self.parasolCommand, self.parasolResultsFile), False)[0] if exitValue is not None: logger.warn("Could not flush the parasol batch %s" % self.parasolResultsFile) open(self.parasolResultsFile, 'w').close() logger.info("Reset the results queue") #Stuff to allow max cpus to be work self.outputQueue1 = Queue() self.outputQueue2 = Queue() #worker = Thread(target=getUpdatedJob, args=(self.parasolResultsFileHandle, self.outputQueue1, self.outputQueue2)) #worker.setDaemon(True) worker = Process(target=getUpdatedJob, args=(self.parasolResultsFile, self.outputQueue1, self.outputQueue2)) worker.daemon = True worker.start() self.usedCpus = 0 self.jobIDsToCpu = {} def issueBatchJob(self, command, memory, cpu): """Issues parasol with batchjob commands. """ self.checkResourceRequest(memory, cpu) pattern = re.compile("your batchjob ([0-9]+).*") parasolCommand = "%s -verbose -ram=%i -cpu=%i -results=%s add batchjob '%s'" % (self.parasolCommand, memory, cpu, self.parasolResultsFile, command) #Deal with the cpus self.usedCpus += cpu while True: #Process finished results with no wait try: jobID = self.outputQueue1.get_nowait() self.usedCpus -= self.jobIDsToCpu.pop(jobID) assert self.usedCpus >= 0 self.outputQueue1.task_done() except Empty: break while self.usedCpus > self.maxCpus: #If we are still waiting self.usedCpus -= self.jobIDsToCpu.pop(self.outputQueue1.get()) assert self.usedCpus >= 0 self.outputQueue1.task_done() #Now keep going while True: #time.sleep(0.1) #Sleep to let parasol catch up #Apparently unnecessary line = popenParasolCommand(parasolCommand)[1][0] match = pattern.match(line) if match != None: #This is because parasol add batchjob will return success, even if the batchjob was not properly issued! break else: logger.info("We failed to properly add the batchjob, we will try again after a sleep") time.sleep(5) jobID = int(match.group(1)) self.jobIDsToCpu[jobID] = cpu logger.debug("Got the parasol batchjob id: %s from line: %s" % (jobID, line)) logger.debug("Issued the batchjob command: %s with (parasol) batchjob id: %i " % (parasolCommand, jobID)) return jobID def killBatchJobs(self, jobIDs): """Kills the given jobs, represented as Batchjob ids, then checks they are dead by checking they are not in the list of issued jobs. """ while True: for jobID in jobIDs: exitValue = popenParasolCommand("%s remove batchjob %i" % (self.parasolCommand, jobID), runUntilSuccessful=False)[0] logger.info("Tried to remove jobID: %i, with exit value: %i" % (jobID, exitValue)) runningJobs = self.getIssuedBatchJobIDs() if set(jobIDs).difference(set(runningJobs)) == set(jobIDs): return time.sleep(5) logger.warn("Tried to kill some jobs, but something happened and they are still going, so I'll try again") def getIssuedBatchJobIDs(self): """Gets the list of jobs issued to parasol. """ #Example issued batchjob, first field is jobID, last is the results file #31816891 localhost benedictpaten 2009/07/23 10:54:09 python ~/Desktop/out.txt issuedJobs = set() for line in popenParasolCommand("%s -extended list jobs" % self.parasolCommand)[1]: if line != '': tokens = line.split() if tokens[-1] == self.parasolResultsFile: jobID = int(tokens[0]) issuedJobs.add(jobID) return list(issuedJobs) def getRunningBatchJobIDs(self): """Returns map of running jobIDs and the time they have been running. """ #Example lines.. #r 5410186 benedictpaten worker 1247029663 localhost #r 5410324 benedictpaten worker 1247030076 localhost runningJobs = {} issuedJobs = self.getIssuedBatchJobIDs() for line in popenParasolCommand("%s -results=%s pstat2 " % (self.parasolCommand, self.parasolResultsFile))[1]: if line != '': match = self.runningPattern.match(line) if match != None: jobID = int(match.group(1)) startTime = int(match.group(2)) if jobID in issuedJobs: #It's one of our jobs runningJobs[jobID] = time.time() - startTime return runningJobs def getUpdatedBatchJob(self, maxWait): jobID = self.getFromQueueSafely(self.outputQueue2, maxWait) if jobID != None: self.outputQueue2.task_done() return jobID @classmethod def getRescueBatchJobFrequency(cls): """Parasol leaks jobs, but rescuing jobs involves calls to parasol list jobs and pstat2, making it expensive. """ return 5400 #Once every 90 minutes
print('Processed {}0K files'.format(progress // 10000)) elapsed = time.time() - timestamp if progress % 100000 == 0 or elapsed > 60 * 30: print('Restarting workers') workers = _restart_workers(argv.workers, workers, sources, targets) timestamp = time.time() except csv.Error as e: print(e) workers = _restart_workers(0, workers, sources, targets) except KeyboardInterrupt: # Allow ^C to interrupt from any thread. print('Keyboard interrupt') try: while True: sources.get_nowait() except: pass sources.close() for wrk in workers: wrk.terminate() targets.put(False) writer.join() # targets.join()
class MultiProcCompressTool(BaseCompressTool): _procs = None _np = 0 _np_limit = 0 _task_queues = None _result_queue = None def checkCpuLimit(self): if self.getOption("cpu_limit"): self._np_limit = int(self.getOption("cpu_limit")) self._np = cpu_count() if self._np_limit > 0: if self._np > self._np_limit: self._np = self._np_limit return self._np def init(self): self._procs = [] self._task_queues = [] self._np = self.checkCpuLimit() self._result_queue = JoinableQueue() for n in range(self._np): tq = JoinableQueue() self._task_queues.append(tq) p = Process(target=self._worker, name="Compressor-%s" % n, args=(tq, self._result_queue,)) p.start() self._procs.append(p) return self def stop(self): count = 50 alive = True while alive: for n in range(self._np): tq = self._task_queues[ n ] tq.put_nowait("stop") sleep(0.1) alive = False for n in range(self._np): if self._procs[n].is_alive(): alive = True count -= 1 if count <= 0: break for n in range(self._np): if self._procs[n].is_alive(): self._procs[n].terminate() return self def _worker(self, in_queue, out_queue): """ @param in_queue: {multiprocessing.JoinableQueue} @param out_queue: {multiprocessing.JoinableQueue} @var task: Task @return: """ sleep_wait = 0.01 while True: try: task = in_queue.get_nowait() except: task = None if task is None: sleep(sleep_wait) continue if type(task) is float: sleep_wait = task in_queue.task_done() sleep(sleep_wait) continue if type(task) is str and task == "stop": in_queue.task_done() break if type(task) is Task: result = Result() result.cdata, result.method = self._compressData(task.data) result.key = task.key out_queue.put_nowait(result) in_queue.task_done() return def compressData(self, dataToCompress): """ Compress data and returns back @param dataToCompress: dict { hash id: bytes data } @return dict { hash id: (compressed data (bytes), compresion method (string) ) } """ start_time = time() nkeys = len(dataToCompress.keys()) for n in range(self._np): tq = self._task_queues[n] tq.put_nowait(0.001) i = 0 for key, data in dataToCompress.items(): task = Task() task.key = key task.data = data nq = i % self._np tq = self._task_queues[ nq ] tq.put_nowait(task) i += 1 gotKeys = 0 while gotKeys < nkeys: try: res = self._result_queue.get_nowait() except: res = None if res is None: sleep(0.001) continue if type(res) is Result: self._result_queue.task_done() yield res.key, (res.cdata, res.method,) gotKeys += 1 for n in range(self._np): tq = self._task_queues[n] tq.put_nowait(0.01) self.time_spent_compressing = time() - start_time return pass
def main(factor=2): #E.G: if total cores is 2 , no of processes to be spawned is 2 * factor files_to_download = JoinableQueue() result_queue = JoinableQueue() time_taken = JoinableQueue() time_taken_to_read_from_queue = JoinableQueue() with open('downloads.txt', 'r') as f: for to_download in f: files_to_download.put_nowait(to_download.split('\n')[0]) files_to_download_size = files_to_download.qsize() cores = cpu_count() no_of_processes = cores * factor for i in xrange(no_of_processes): files_to_download.put_nowait(None) jobs = [] start = datetime.datetime.now() for name in xrange(no_of_processes): p = Process(target = download, args = (files_to_download, result_queue,\ time_taken, time_taken_to_read_from_queue,name)) p.start() jobs.append(p) for job in jobs: job.join() print result_queue.qsize() total_downloaded_urls = 0 try: while 1: r = result_queue.get_nowait() total_downloaded_urls += r except Empty: pass try: while 1: """ locals() keeps track of all variable, functions, class etc. datetime object is different from int, one cannot perform 0 + datetime.datetime.now(), if when we access the queue which contains time objects first time, total_time will be set to first time """ if 'total_time' in locals(): total_time += time_taken.get_nowait() else: total_time = time_taken.get_nowait() except Empty: print("{0} processes on {1} core machine took {2} time to download {3}\ urls" .format(no_of_processes, cores, total_time, \ total_downloaded_urls)) try: while 1: if 'queue_reading_time' in locals(): queue_reading_time += time_taken_to_read_from_queue.get_nowait( ) else: queue_reading_time = time_taken_to_read_from_queue.get_nowait() except Empty: print("{0} processes on {1} core machine took {2} time to read {3}\ urls from queue" .format(no_of_processes, cores,queue_reading_time\ ,files_to_download_size))
def main(factor = 2): #E.G: if total cores is 2 , no of processes to be spawned is 2 * factor files_to_download = JoinableQueue() result_queue = JoinableQueue() time_taken = JoinableQueue() time_taken_to_read_from_queue = JoinableQueue() with open('downloads.txt', 'r') as f: for to_download in f: files_to_download.put_nowait(to_download.split('\n')[0]) files_to_download_size = files_to_download.qsize() cores = cpu_count() no_of_processes = cores * factor for i in xrange(no_of_processes): files_to_download.put_nowait(None) jobs = [] start = datetime.datetime.now() for name in xrange(no_of_processes): p = Process(target = download, args = (files_to_download, result_queue,\ time_taken, time_taken_to_read_from_queue,name)) p.start() jobs.append(p) for job in jobs: job.join() print result_queue.qsize() total_downloaded_urls = 0 try: while 1: r = result_queue.get_nowait() total_downloaded_urls += r except Empty: pass try: while 1: """ locals() keeps track of all variable, functions, class etc. datetime object is different from int, one cannot perform 0 + datetime.datetime.now(), if when we access the queue which contains time objects first time, total_time will be set to first time """ if 'total_time' in locals(): total_time += time_taken.get_nowait() else: total_time = time_taken.get_nowait() except Empty: print("{0} processes on {1} core machine took {2} time to download {3}\ urls".format(no_of_processes, cores, total_time, \ total_downloaded_urls)) try: while 1: if 'queue_reading_time' in locals(): queue_reading_time += time_taken_to_read_from_queue.get_nowait() else: queue_reading_time = time_taken_to_read_from_queue.get_nowait() except Empty: print("{0} processes on {1} core machine took {2} time to read {3}\ urls from queue".format(no_of_processes, cores,queue_reading_time\ ,files_to_download_size))
class ProcessPool(object): def __init__(self, pool_size=None, max_load_single_proc=3, interval=None, logger_name=None): self.logger = logging.getLogger(logger_name or __name__) self.pool_size = pool_size if pool_size else self.get_default_size() self.max_load_single_proc = max_load_single_proc self.interval = interval self.reject_add_task = False self.task_queue = JoinableQueue( self.pool_size * self.max_load_single_proc) self.result_queue = JoinableQueue() self.exc_queue = JoinableQueue() self.pool = [] self.state = RUNNING self.init_worker_pool() self.sentinel_thread() def spawn(self, worker_class, *args, **kwargs): if not callable(worker_class): raise ProcessPoolException('pool worker class must be callable') self.add_task((worker_class, args, kwargs)) def add_task(self, task): if self.reject_add_task: raise ProcessPoolException('closed queue not allowed add task') self.task_queue.put(task) def get_default_size(self): cpu_cnt = cpu_count() return 2 if cpu_cnt <= 2 else int(cpu_cnt * 0.8) + 1 def init_worker_pool(self): for _ in range(self.pool_size): w = Worker(self) w.daemon = True self.pool.append(w) for w in self.pool: w.start() def sentinel_thread(self): self.sentinel = SentinelThread(self) self.sentinel.daemon = True self.sentinel.start() def join_queue(self): self.reject_add_task = True self.task_queue.join() def stop(self): self.sentinel.stop() self.sentinel.join() for w in self.pool: w.stop() for w in self.pool: if w.is_alive(): w.join() del self.pool[:] self.state = STOPPED def join(self, raise_error=False): self.join_queue() self.stop() if raise_error: try: exc, _ = self.exc_queue.get_nowait() except Queue.Empty: pass else: raise exc @property def exceptions(self): _exceptions = getattr(self, '_exceptions', []) if _exceptions: return _exceptions while True: try: err, tb = self.exc_queue.get_nowait() _exceptions.append((err, tb)) except Queue.Empty: break self._exceptions = _exceptions return _exceptions @property def results(self): _results = getattr(self, '_results', []) if _results: return _results while True: try: res = self.result_queue.get_nowait() _results.append(res) except Queue.Empty: break self._results = _results return _results
class ImagesBatcher(AbstractDataBatcher): def __init__( self, queue_size, batch_size, data_sampler, image_processor=None, audio_processor=None, single_epoch=False, cache_data=False, # TODO: implement me! disk_reader_process_num=1): """ Class for creating sequence of data batches for training or validation. :param queue_size: queue size for Batch readers :param batch_size: size of batches generated :param dataset_parser: dataset structure-related parser with all images and labels :param image_processor: image reading and preprocessing routine :param data_sampler: knows how to sample batches from dataset :param single_epoch: if enabled, image batcher finish one epoch with None batch :param cache_data: do we need to store all data in batcher memory? :param disk_reader_process_num: how many disk readers do we need? """ super(AbstractDataBatcher, self).__init__() # set parameters self.batch_size = batch_size self.epoch_is_finished = False self.batch_queue_balance = 0 if single_epoch: self.sampler_external_info = type('sampler_external_info', (object, ), dict(single_epoch=True)) else: self.sampler_external_info = None # parse given dataset and init data sampler self.data_sampler = data_sampler # set queues if queue_size == -1: queue_size = self.data_sampler.dataset_size() / self.batch_size + 1 self.task_queue = JoinableQueue(queue_size) self.batch_queue = JoinableQueue(queue_size) # init batch disk readers and start they self.data_readers = [] print('disk_reader_process_num:', disk_reader_process_num) for i in range(disk_reader_process_num): self.data_readers.append( (BatchDiskReader(self.task_queue, self.batch_queue, image_processor, audio_processor))) def start(self): self.epoch_is_finished = False # start batch disk readers for reader in self.data_readers: reader.start() # fill task queue with batches to start async reading from disk self.fill_task_queue() def fill_task_queue(self): try: while True: if not self.task_queue.full(): batch = self.data_sampler.sampling( self.batch_size, self.sampler_external_info) if batch is not None: self.task_queue.put_nowait(batch) self.batch_queue_balance += 1 else: self.epoch_is_finished = True break else: break except Exception as e: #Queue.Full: logger.error("ImagesBatcher: ", e) def next_batch(self): """ Returns next batch from data """ if self.epoch_is_finished and self.batch_queue_balance == 0: self.epoch_is_finished = False self.fill_task_queue() return None batch = self.batch_queue.get(block=True) self.batch_queue.task_done() self.batch_queue_balance -= 1 if not self.epoch_is_finished: # fill task queue self.fill_task_queue() return batch def update_sampler(self, target, logits, step, summary_writer): if hasattr(self.data_sampler, 'update'): labels = target.cpu().data.numpy() is_update_sampler = self.data_sampler.update( labels, logits, step, summary_writer) #if is_update_sampler: # self.clear_queue() def clear_queue(self): try: while True: self.task_queue.get_nowait() self.task_queue.task_done() except Exception as e: pass try: while True: self.batch_queue.get_nowait() self.batch_queue.task_done() except Exception as e: pass self.fill_task_queue() def finish(self): for data_reader in self.data_readers: data_reader.deactivate() while not self.task_queue.empty(): self.task_queue.get() self.task_queue.task_done() is_anybody_alive = [ data_reader.is_alive() for data_reader in self.data_readers ].count(True) > 0 while not self.batch_queue.empty() or is_anybody_alive: try: self.batch_queue.get(timeout=1) self.batch_queue.task_done() is_anybody_alive = [ data_reader.is_alive() for data_reader in self.data_readers ].count(True) > 0 except Exception as e: pass self.task_queue.join() self.batch_queue.join() for data_reader in self.data_readers: data_reader.join()
class MultiProcCompressTool(BaseCompressTool): _procs = None _np = 0 _np_limit = 0 _task_queues = None _result_queue = None def checkCpuLimit(self): if self.getOption("cpu_limit"): self._np_limit = int(self.getOption("cpu_limit")) self._np = cpu_count() if self._np_limit > 0: if self._np > self._np_limit: self._np = self._np_limit return self._np def init(self): self._procs = [] self._task_queues = [] self._np = self.checkCpuLimit() self._task_queue = JoinableQueue() self._result_queue = JoinableQueue() for n in range(self._np): tq = JoinableQueue() self._task_queues.append(tq) p = Process(target=self._worker, name="Compressor-%s" % n, args=(tq, self._result_queue,)) p.start() self._procs.append(p) return self def stop(self): count = 50 alive = True while alive: for n in range(self._np): tq = self._task_queues[ n ] tq.put_nowait("stop") sleep(0.1) alive = False for n in range(self._np): if self._procs[n].is_alive(): alive = True count -= 1 if count <= 0: break for n in range(self._np): if self._procs[n].is_alive(): self._procs[n].terminate() return self def _worker(self, in_queue, out_queue): """ @param in_queue: {multiprocessing.JoinableQueue} @param out_queue: {multiprocessing.JoinableQueue} @var task: Task @return: """ sleep_wait = 0.1 while True: try: task = in_queue.get_nowait() except: task = None if task is None: sleep(sleep_wait) continue if type(task) is float: sleep_wait = task in_queue.task_done() sleep(sleep_wait) continue if type(task) is str and task == "stop": in_queue.task_done() break if type(task) is Task: result = Result() result.cdata, result.method = self._compressData(task.data) result.key = task.key out_queue.put_nowait(result) in_queue.task_done() return def compressData(self, dataToCompress): """ Compress data and returns back @param dataToCompress: dict { hash id: bytes data } @return dict { hash id: (compressed data (bytes), compresion method (string) ) } """ start_time = time() nkeys = len(dataToCompress.keys()) for n in range(self._np): tq = self._task_queues[n] tq.put_nowait(0.001) i = 0 for key, data in dataToCompress.items(): task = Task() task.key = key task.data = data nq = i % self._np tq = self._task_queues[ nq ] tq.put_nowait(task) i += 1 gotKeys = 0 while gotKeys < nkeys: try: res = self._result_queue.get_nowait() except: res = None if res is None: sleep(0.001) continue if type(res) is Result: self._result_queue.task_done() yield res.key, (res.cdata, res.method,) gotKeys += 1 for n in range(self._np): tq = self._task_queues[n] tq.put_nowait(0.1) self.time_spent_compressing = time() - start_time return pass