def _worker(reader: DatasetReader, input_queue: Queue, output_queue: Queue, index: int) -> None: """ A worker that pulls filenames off the input queue, uses the dataset reader to read them, and places the generated instances on the output queue. When there are no filenames left on the input queue, it puts its ``index`` on the output queue and doesn't do anything else. """ # Keep going until you get a file_path that's None. while True: file_path = input_queue.get() if file_path is None: # Put my index on the queue to signify that I'm finished output_queue.put(index) break logger.info(f"reading instances from {file_path}") for instance in reader.read(file_path): output_queue.put(instance)
def _worker( reader: DatasetReader, input_queue: Queue, output_queue: Queue, num_active_workers: Value, num_inflight_items: Value, worker_id: int, ) -> None: """ A worker that pulls filenames off the input queue, uses the dataset reader to read them, and places the generated instances on the output queue. When there are no filenames left on the input queue, it decrements num_active_workers to signal completion. """ logger.info(f"Reader worker: {worker_id} PID: {os.getpid()}") # Keep going until you get a file_path that's None. while True: file_path = input_queue.get() if file_path is None: # It's important that we close and join the queue here before # decrementing num_active_workers. Otherwise our parent may join us # before the queue's feeder thread has passed all buffered items to # the underlying pipe resulting in a deadlock. # # See: # https://docs.python.org/3.6/library/multiprocessing.html?highlight=process#pipes-and-queues # https://docs.python.org/3.6/library/multiprocessing.html?highlight=process#programming-guidelines output_queue.close() output_queue.join_thread() # Decrementing is not atomic. # See https://docs.python.org/2/library/multiprocessing.html#multiprocessing.Value. with num_active_workers.get_lock(): num_active_workers.value -= 1 logger.info(f"Reader worker {worker_id} finished") break logger.info(f"reading instances from {file_path}") for instance in reader.read(file_path): with num_inflight_items.get_lock(): num_inflight_items.value += 1 output_queue.put(instance)