def __init__(self): self.datadir = Path.cwd() / config["DATADIR"] self.rawdir = self.datadir / "raw" self.traindir = self.datadir / "processed" self.testrawdir = self.datadir / "test" self.testprocdir = self.datadir / "testprocdir" if not self.rawdir.exists(): print("Generating frames ..") self.rawdir.mkdir(exist_ok=False) self.read_images(config["TRAIN_VID"], self.rawdir) if not self.testrawdir.exists(): print("Generating frames for test..") self.testrawdir.mkdir(exist_ok=False) self.read_images(config["TESTS_VID"], self.testrawdir) images = list(self.rawdir.rglob('*.jpg')) images_test = list(self.testrawdir.rglob('*.jpg')) _labels = [ float(x) for x in open(self.datadir / config["TRAIN_Y"]).read().splitlines() ] self.labels = torch.Tensor(_labels) if not self.traindir.exists(): print("Processing images ..") self.traindir.mkdir(exist_ok=False) pool = ProcessPool(max_workers=config["NUM_THREADS"]) for _ in tqdm(pool.map(self.transformations, images).result()): pass if not self.testprocdir.exists(): print("Processing images ..") self.testprocdir.mkdir(exist_ok=False) pool = ProcessPool(max_workers=config["NUM_THREADS"]) for _ in tqdm( pool.map(self.transformations, images_test).result()): pass
def find_tlds(self): dom_list = [self.known_domain + '.' + tld for tld in self.tld_list] try: pool = ThreadPool(max_workers=self.max_workers, max_tasks=self.max_tasks) results = pool.map(self.check_tld, dom_list, timeout=self.timeout) pool.close() pool.join() print(results) except Exception as e: print(repr(e)) pass
def process_urls(paths, n_processes, prefix=COMMON_CRAWL_URL, max_failures=100, num_progress_reports=50): print(f"Using {n_processes} parallel processes") failed_paths = [] bios = [] time0 = time.time() path_name = (paths[0] + '///').split('/')[1] num_progress_reports = max( 1, min(num_progress_reports, len(paths) // n_processes)) done = 0 pool = ProcessPool(n_processes) for i, paths_chunk in enumerate(chunks(paths, num_progress_reports)): ans = pool.map(bios_from_wet_url, [prefix + path for path in paths_chunk], timeout=1200) iterator = ans.result() for p in paths_chunk + ["done"]: try: a = next(iterator) assert p != "done" if a is not None: bios += [dict(path=p, **b) for b in a] continue except StopIteration: assert p == "done" break except Exception as error: print("--------------------\n" * 10 + f"function raised {error}") failed_paths.append(p) done += len(paths_chunk) pct = (i + 1) / num_progress_reports eta = (time.time() - time0) * (1 / pct - 1) / 60 / 60 print( f"{eta:.1f} hours left, {done:,}/{len(paths):,} done ({pct:.0%}),", f"{int(len(bios)/pct):,} estimated bios, {path_name}") if len(failed_paths) > 0: print(f" {len(failed_paths):,} failed paths") if len(failed_paths) > max_failures: break pool.close() return dedup_exact(bios), failed_paths # dedup_exact is new!
class PebbleMap(PySAT): name = 'PySAT Concurrency: PebbleMap' def __init__(self, **kwargs): self.pool = None super().__init__(**kwargs) def initialize(self, solver, **kwargs): if self.pool is not None: kwargs['output'].debug(2, 2, 'Pool already inited') else: self.pool = ProcessPool(max_workers=self.processes, initializer=self.init_func, initargs=(solver, kwargs['instance'])) kwargs['output'].debug( 2, 2, 'Init pool with %d processes' % self.processes) def process(self, tasks: List[Task], **kwargs) -> List[Result]: output = kwargs['output'] results = [] future = self.pool.map(self.solve_func, tasks) # timer = Timer(20., future.cancel, ()) # timer.start() try: for result in future.result(): results.append(result) output.debug(2, 3, 'Already solved %d tasks' % len(results)) except Exception as e: output.debug(0, 1, 'Error while fetching pool results: %s' % e) # if timer.is_alive(): # timer.cancel() if not self.keep: self.terminate() return [ result.set_value(self.measure.get(result)) for result in results ] def terminate(self): if self.pool: self.pool.stop() self.pool.join() self.pool = None
def propagate(self, tasks: List[Task], **kwargs) -> List[Result]: output, instance = kwargs['output'], kwargs['instance'] pool = ProcessPool( max_workers=self.processes, initializer=propagate_init, initargs=(self.propagator, instance) ) results = [] future = pool.map(propagate_solve, tasks) try: for result in future.result(): results.append(result) output.debug(2, 3, 'Already solved %d tasks' % len(results)) except Exception as e: output.debug(0, 1, 'Error while fetching pool results: %s' % e) pool.stop() pool.join() return [result.set_value(self.measure.get(result)) for result in results]
if __name__ == '__main__': num_trials = 100 graph_size = 100 num_workers = cpu_count() pool = ProcessPool() results = [] invader = np.random.randint(graph_size) graphs = [complete_graph(graph_size) for _ in range(num_trials)] print('Generated graphs') # Initializing graph nodes for G in graphs: for node in G: G.nodes[node]['name'] = 'C' if node == invader else 'D' print('Initialised graphs') future = pool.map(evolve, graphs, timeout=100) iterator = future.result() set_result = [] # with tqdm(total=num_trials,desc='Finished',leave=False) as pbar: while True: try: result = next(iterator) print(result) results.append(result) except StopIteration: break except TimeoutError as error: # print("function took longer than %d seconds" % error.args[1]) pass