def _instance_worker(self, worker_id: int, queue: mp.JoinableQueue, lock) -> None: Tqdm.set_lock(lock) try: self.reader._set_worker_info(WorkerInfo(self.num_workers, worker_id)) instances = self.reader.read(self.data_path) checked_for_token_indexers: bool = False for instance in instances: # Check the first instance to make sure it doesn't contain any TextFields with # token_indexers because we don't want to be duplicating those by sending # them across processes. if not checked_for_token_indexers: for field_name, field in instance.fields.items(): if isinstance(field, TextField) and field._token_indexers is not None: raise ValueError( f"Found a TextField ({field_name}) with token_indexers already " "applied, but you're using num_workers > 0 in your data loader. " "Make sure your dataset reader's text_to_instance() method doesn't " "add any token_indexers to the TextFields it creates. Instead, the token_indexers " "should be added to the instances in the apply_token_indexers() method of your " "dataset reader (which you'll have to implement if you haven't done " "so already)." ) checked_for_token_indexers = True queue.put((instance, None)) except Exception as e: queue.put((None, (repr(e), traceback.format_exc()))) # Indicate to the consumer that this worker is finished. queue.put((None, None)) # Wait until this process can safely exit. queue.join()
def _batch_worker(self, worker_id: int, queue: mp.JoinableQueue, lock, rx: Connection) -> None: Tqdm.set_lock(lock) try: self.reader._set_worker_info( WorkerInfo(self.num_workers, worker_id)) instances = self.reader.read(self.data_path) for batch in self._instances_to_batches( instances, move_to_device=self._worker_cuda_safe): if self._safe_queue_put(worker_id, (batch, None), queue, rx): continue else: # Couldn't put item on queue because parent process has exited. return except Exception as e: if not self._safe_queue_put( worker_id, (None, (repr(e), traceback.format_exc())), queue, rx): return # Indicate to the consumer (main thread) that this worker is finished. queue.put((None, None)) # Wait until this process can safely exit. queue.join()
def _start_batch_workers(self, queue: mp.JoinableQueue, ctx) -> List[BaseProcess]: Tqdm.set_lock(mp.RLock()) workers: List[BaseProcess] = [] for worker_id in range(self.num_workers): worker: BaseProcess = ctx.Process( target=self._batch_worker, args=(worker_id, queue, Tqdm.get_lock()), daemon=True ) worker.start() workers.append(worker) return workers
def _start_batch_workers( self, queue: mp.JoinableQueue, ctx) -> Tuple[List[BaseProcess], List[Connection]]: Tqdm.set_lock(mp.RLock()) workers: List[BaseProcess] = [] txs: List[Connection] = [] for worker_id in range(self.num_workers): rx, tx = ctx.Pipe(duplex=False) worker: BaseProcess = ctx.Process(target=self._batch_worker, args=(worker_id, queue, Tqdm.get_lock(), rx), daemon=True) worker.start() workers.append(worker) txs.append(tx) return workers, txs
def _batch_worker(self, worker_id: int, queue: mp.JoinableQueue, lock) -> None: Tqdm.set_lock(lock) try: self.reader._set_worker_info(WorkerInfo(self.num_workers, worker_id)) instances = self.reader.read(self.data_path) for batch in self._instances_to_batches( instances, move_to_device=self._worker_cuda_safe ): queue.put((batch, None)) except Exception as e: queue.put((None, (repr(e), traceback.format_exc()))) # Indicate to the consumer (main thread) that this worker is finished. queue.put((None, None)) # Wait until this process can safely exit. queue.join()