def _instance_worker(self, worker_id: int, queue: mp.JoinableQueue, lock) -> None:
        Tqdm.set_lock(lock)
        try:
            self.reader._set_worker_info(WorkerInfo(self.num_workers, worker_id))
            instances = self.reader.read(self.data_path)
            checked_for_token_indexers: bool = False
            for instance in instances:
                # Check the first instance to make sure it doesn't contain any TextFields with
                # token_indexers because we don't want to be duplicating those by sending
                # them across processes.
                if not checked_for_token_indexers:
                    for field_name, field in instance.fields.items():
                        if isinstance(field, TextField) and field._token_indexers is not None:
                            raise ValueError(
                                f"Found a TextField ({field_name}) with token_indexers already "
                                "applied, but you're using num_workers > 0 in your data loader. "
                                "Make sure your dataset reader's text_to_instance() method doesn't "
                                "add any token_indexers to the TextFields it creates. Instead, the token_indexers "
                                "should be added to the instances in the apply_token_indexers() method of your "
                                "dataset reader (which you'll have to implement if you haven't done "
                                "so already)."
                            )
                    checked_for_token_indexers = True
                queue.put((instance, None))
        except Exception as e:
            queue.put((None, (repr(e), traceback.format_exc())))

        # Indicate to the consumer that this worker is finished.
        queue.put((None, None))

        # Wait until this process can safely exit.
        queue.join()
Пример #2
0
    def _batch_worker(self, worker_id: int, queue: mp.JoinableQueue, lock,
                      rx: Connection) -> None:
        Tqdm.set_lock(lock)
        try:
            self.reader._set_worker_info(
                WorkerInfo(self.num_workers, worker_id))
            instances = self.reader.read(self.data_path)
            for batch in self._instances_to_batches(
                    instances, move_to_device=self._worker_cuda_safe):
                if self._safe_queue_put(worker_id, (batch, None), queue, rx):
                    continue
                else:
                    # Couldn't put item on queue because parent process has exited.
                    return
        except Exception as e:
            if not self._safe_queue_put(
                    worker_id,
                (None, (repr(e), traceback.format_exc())), queue, rx):
                return

        # Indicate to the consumer (main thread) that this worker is finished.
        queue.put((None, None))

        # Wait until this process can safely exit.
        queue.join()
 def _start_batch_workers(self, queue: mp.JoinableQueue, ctx) -> List[BaseProcess]:
     Tqdm.set_lock(mp.RLock())
     workers: List[BaseProcess] = []
     for worker_id in range(self.num_workers):
         worker: BaseProcess = ctx.Process(
             target=self._batch_worker, args=(worker_id, queue, Tqdm.get_lock()), daemon=True
         )
         worker.start()
         workers.append(worker)
     return workers
Пример #4
0
 def _start_batch_workers(
         self, queue: mp.JoinableQueue,
         ctx) -> Tuple[List[BaseProcess], List[Connection]]:
     Tqdm.set_lock(mp.RLock())
     workers: List[BaseProcess] = []
     txs: List[Connection] = []
     for worker_id in range(self.num_workers):
         rx, tx = ctx.Pipe(duplex=False)
         worker: BaseProcess = ctx.Process(target=self._batch_worker,
                                           args=(worker_id, queue,
                                                 Tqdm.get_lock(), rx),
                                           daemon=True)
         worker.start()
         workers.append(worker)
         txs.append(tx)
     return workers, txs
    def _batch_worker(self, worker_id: int, queue: mp.JoinableQueue, lock) -> None:
        Tqdm.set_lock(lock)
        try:
            self.reader._set_worker_info(WorkerInfo(self.num_workers, worker_id))
            instances = self.reader.read(self.data_path)
            for batch in self._instances_to_batches(
                instances, move_to_device=self._worker_cuda_safe
            ):
                queue.put((batch, None))
        except Exception as e:
            queue.put((None, (repr(e), traceback.format_exc())))

        # Indicate to the consumer (main thread) that this worker is finished.
        queue.put((None, None))

        # Wait until this process can safely exit.
        queue.join()