def test_partition_by_cores(num_cpus_mock): x = [0, 1, 2, 3, 4] num_cpus_mock.return_value = 1 assert core.partition_by_cores(x) == [[0, 1, 2, 3, 4]] num_cpus_mock.return_value = 2 assert core.partition_by_cores(x) == [[0, 1, 2], [3, 4]] num_cpus_mock.return_value = 3 assert core.partition_by_cores(x) == [[0, 1], [2, 3], [4]] num_cpus_mock.return_value = 4 assert core.partition_by_cores(x) == [[0, 1], [2, 3], [4]]
def process_all(self, texts: Collection[str]) -> List[List[str]]: """Process a list of `texts`.""" if self.n_cpus <= 1: return self._process_all_1(texts) with ProcessPoolExecutor(self.n_cpus) as e: return sum( e.map(self._process_all_1, partition_by_cores(texts, self.n_cpus)), [])
def _process_in_parallel( self, texts: Collection[str] ) -> Tuple[List[np.ndarray], typing.Mapping[str, int]]: if self.n_cpus <= 1: if len(texts) == 0: return [[]], Counter() all_tokens, unk_list = zip(*self._process_on_one_core(texts)) return list(all_tokens), reduce(lambda x, y: merge_dicts_(x, y)[0], unk_list, {}) with ProcessPoolExecutor(self.n_cpus) as e: all_tokens = [] all_unks = {} for from_one_core in e.map(self._process_on_one_core, partition_by_cores(texts, self.n_cpus)): for tokens, unks in from_one_core: all_tokens.append(tokens) merge_dicts_(all_unks, unks) return all_tokens, all_unks