def test_process_pool_join_futures_timeout(self): """Process Pool Spawn TimeoutError is raised if join on long tasks.""" pool = ProcessPool() for _ in range(2): pool.schedule(long_function) pool.close() self.assertRaises(TimeoutError, pool.join, 0.4) pool.stop() pool.join()
def test_process_pool_join_workers(self): """Process Pool Spawn no worker is running after join.""" pool = ProcessPool(max_workers=4) pool.schedule(function, args=[1]) pool.stop() pool.join() self.assertEqual(len(pool._pool_manager.worker_manager.workers), 0)
def test_process_pool_close_stopped(self): """Process Pool Fork is stopped after close.""" pool = ProcessPool(max_workers=1) pool.schedule(function, args=[1]) pool.close() pool.join() self.assertFalse(pool.active)
def test_process_pool_stop_stopped(self): """Process Pool Spawn is stopped after stop.""" pool = ProcessPool() pool.schedule(function, args=[1]) pool.stop() pool.join() self.assertFalse(pool.active)
def test_process_pool_stop_large_data(self): """Process Pool Spawn is stopped if large data is sent on the channel.""" data = "a" * 4098 * 1024 pool = ProcessPool(initializer=long_initializer) pool.schedule(function, args=[data]) pool.stop() pool.join() self.assertFalse(pool.active)
def test_process_pool_stop_large_data(self): """Process Pool Fork is stopped if large data is sent on the channel.""" data = "a" * 1098 * 1024 * 50 # 50 Mb pool = ProcessPool(max_workers=1) pool.schedule(function, args=[data]) pool.stop() pool.join() self.assertFalse(pool.active)
def test_process_pool_stop_futures(self): """Process Pool Spawn not all futures are performed on stop.""" futures = [] pool = ProcessPool() for index in range(10): futures.append(pool.schedule(function, args=[index])) pool.stop() pool.join() self.assertTrue(len([f for f in futures if not f.done()]) > 0)
def test_process_pool_close_futures(self): """Process Pool Spawn all futures are performed on close.""" futures = [] pool = ProcessPool() for index in range(10): futures.append(pool.schedule(function, args=[index])) pool.close() pool.join() map(self.assertTrue, [f.done() for f in futures])
def test_process_pool_stop_stopped_callback(self): """Process Pool Spawn is stopped in callback.""" with ProcessPool(max_workers=1, context=mp_context) as pool: def stop_pool_callback(_): pool.stop() future = pool.schedule(function, args=[1]) future.add_done_callback(stop_pool_callback) with self.assertRaises(RuntimeError): for index in range(10): time.sleep(0.1) pool.schedule(long_function, args=[index]) self.assertFalse(pool.active)
def process_all(func, arr, timeout_func=None, total=None, max_workers=None, timeout=None): with ProcessPool() as pool: future = pool.map(func, arr, timeout=timeout) iterator = future.result() results = [] for i in progress_bar(range(len(arr)), total=len(arr)): try: result = next(iterator) if result: results.append(result) except StopIteration: break except TimeoutError as error: if timeout_func: timeout_func(arr[i], error.args[1]) return results
class Processor: conf = None api = None log_q = None def __init__(self, recording_type, processing_state, process_func, num_workers): self.recording_type = recording_type self.processing_state = processing_state self.process_func = process_func self.num_workers = num_workers self.pool = ProcessPool(num_workers, initializer=logs.init_worker, initargs=(self.log_q, )) self.in_progress = {} def poll(self): self.reap_completed() if len(self.in_progress) >= self.num_workers: return True recording = self.api.next_job(self.recording_type, self.processing_state) if recording: logger.debug( "scheduling %s (%s: %s)", recording["id"], recording["type"], self.processing_state, ) future = self.pool.schedule(self.process_func, (recording, self.conf)) self.in_progress[recording["id"]] = future return True return False def reap_completed(self): for recording_id, future in list(self.in_progress.items()): if future.done(): del self.in_progress[recording_id] err = future.exception() if err: msg = f"{self.recording_type}.{self.processing_state} processing of {recording_id} failed: {err}" tb = getattr(err, "traceback", None) if tb: msg += f":\n{tb}" logger.error(msg)
def manager(date_str): #unpack and list daily zip vol_zip = f'{vol_root}/{RADAR_ID:02}/{date_str[0:4]}/vol/{RADAR_ID:02}_{date_str}.pvol.zip' temp_dir = True vol_ffn_list = file_util.unpack_zip(vol_zip) for arg_slice in file_util.chunks(vol_ffn_list, NCPU): with ProcessPool() as pool: future = pool.map(buffer, arg_slice, timeout=360) iterator = future.result() while True: try: _ = next(iterator) except StopIteration: break except TimeoutError as error: print("function took longer than %d seconds" % error.args[1]) except ProcessExpired as error: print("%s. Exit code: %d" % (error, error.exitcode)) except TypeError as error: print("%s. Exit code: %d" % (error, error.exitcode)) except Exception: traceback.print_exc() # import time # for vol_ffn in vol_ffn_list: # start = time.time() # torrentfields(vol_ffn) # end = time.time() # print('timer', end - start) # #run retrieval # i = 0 # n_files = len(vol_ffn_list) # for flist_chunk in file_util.chunks(vol_ffn_list, NCPU): #CUSTOM RANGE USED # bag = db.from_sequence(flist_chunk).map(buffer) # _ = bag.compute() # i += NCPU # del bag # print('processed: ' + str(round(i/n_files*100,2))) #clean up temp_vol_dir = os.path.dirname(vol_ffn_list[0]) if '/tmp' in temp_vol_dir: os.system('rm -rf ' + temp_vol_dir)
def dump_packs(artifact_manager: ArtifactsManager, pool: ProcessPool) -> List[ProcessFuture]: """ Create futures which dumps conditionally content/Packs. Args: artifact_manager: Artifacts manager object. pool: Process pool to schedule new processes. Returns: List[ProcessFuture]: List of pebble futures to wait for. """ futures = [] for pack_name, pack in artifact_manager.content.packs.items(): if pack_name not in IGNORED_PACKS: futures.append(pool.schedule(dump_pack, args=(artifact_manager, pack))) return futures
def start_workers(self): """ Start the pool and workers :return: The pool instance """ with self._start_lock: if self._pool is None: # Start the process pool log_queue = om.manager.get_in_queue() self._pool = ProcessPool(self.MAX_WORKERS, max_tasks=20, initializer=init_worker, initargs=(log_queue,)) return self._pool
def ProcessPoolHandler() -> ProcessPool: """ Process pool Handler which terminate all processes in case of Exception. Yields: ProcessPool: Pebble process pool. """ with ProcessPool(max_workers=3) as pool: try: yield pool except Exception: logging.exception( "Gracefully release all resources due to Error...") raise finally: pool.close() pool.join()
def muti(): results = [] errorcode = [] with ProcessPool(max_workers=8) as pool: totallist = [] for i in range(Ilist): aList[i] = 2.0 + 0.2 * i for m in range(PressureS): Ipressure[m] = m * 2 + 1 for t in range(tempertureS): Itemperture[t] = 300 + 50 * t totallist.append((aList[i], Ipressure[m], Itemperture[t])) future = pool.map(flamespeedcal, totallist, timeout=10000) iterator = future.result() while True: try: result = next(iterator) results.append(result) except StopIteration: break except TimeoutError as error: errorcode.append("function took longer than %d seconds" % error.args[1]) except ProcessExpired as error: errorcode.append("%s. Exit code: %d" % (error, error.exitcode)) except Exception as error: errorcode.append("function raised %s" % error) errorcode.append(error.traceback) with open("finaloutputdataO2N2.csv", 'w') as outfile: writer = csv.writer(outfile) writer.writerow([ "u(m/s)", "T(K)", "rho(kg/m3)", "pressure", "H2", "H", "O", "O2", "OH", "H2O", "HO2", "H2O2", "C", "CH", "CH2", "CH2(S)", "CH3", "CH4", "CO", "CO2", "HCO", "CH2O", "CH2OH", "CH3O", "CH3OH", "C2H", "C2H2", "C2H3", "C2H4", "C2H5", "C2H6", "HCCO", "CH2CO", "HCCOH", "N", "NH", "NH2", "NH3", "NNH", "NO", "NO2", "N2O", "HNO", "CN", "HCN", "H2CN", "HCNN", "HCNO", "HOCN", "HNCO", "NCO", "N2", "AR", "C3H7", "C3H8", "CH2CHO", "CH3CHO" ]) writer.writerows(results) if totallist == []: pass else: errorfile = open("errorcodeO2N2.txt", "w") errorfile.write(str(errorcode))
def main(year: int) -> None: """ It calls the production line and manages it. Buffer function that is used to catch any problem with the processing line without screwing the whole multiprocessing stuff. Parameters: =========== infile: str Name of the input radar file. outpath: str Path for saving output data. """ flist = glob.glob(os.path.join(INPATH, f"{year}/**/*.nc")) outlist = glob.glob(os.path.join(OUTPATH, f"v2021/ppi/{year}/**/*.nc")) oset = set([f[-18:-3] for f in outlist]) iset = set([f[-18:-3] for f in flist]) datelist = [*oset ^ iset] if len(datelist) == 0: print(f"No file to process for {YEAR}.") return None print(f"{year}: {len(datelist)} files to process.") inflist = [] for d in datelist: inflist.append([f for f in flist if d in f][0]) for fchunk in chunks(inflist, NCPUS): with ProcessPool() as pool: future = pool.map(buffer, fchunk, timeout=360) iterator = future.result() while True: try: _ = next(iterator) except StopIteration: break except TimeoutError as error: print("function took longer than %d seconds" % error.args[1]) except ProcessExpired as error: print("%s. Exit code: %d" % (error, error.exitcode)) except TypeError: continue except Exception: traceback.print_exc()
def test_process_pool_map_timeout(self): """Process Pool Fork map with timeout.""" raised = [] elements = [1, 2, 3] with ProcessPool(max_workers=1) as pool: future = pool.map(long_function, elements, timeout=0.1) generator = future.result() while True: try: next(generator) except TimeoutError as error: raised.append(error) except StopIteration: break self.assertTrue(all((isinstance(e, TimeoutError) for e in raised)))
def test_process_pool_map_broken_pool(self): """Process Pool Forkserver Broken Pool.""" elements = [1, 2, 3] with ProcessPool(max_workers=1, context=mp_context) as pool: future = pool.map(long_function, elements, timeout=1) generator = future.result() pool._context.state = ERROR while True: try: next(generator) except TimeoutError as error: self.assertFalse(pool.active) future.cancel() break except StopIteration: break
def test_process_pool_map_error(self): """Process Pool Forkserver errors do not stop the iteration.""" raised = None elements = [1, 'a', 3] with ProcessPool(max_workers=1, context=mp_context) as pool: future = pool.map(function, elements) generator = future.result() while True: try: next(generator) except TypeError as error: raised = error except StopIteration: break self.assertTrue(isinstance(raised, TypeError))
def start(self): """start of the program""" # get domains from file self.get_domains() # create a pool for multi-threaded processing with ProcessPool(max_workers=5, max_tasks=10) as pool: for i in self.domains: future = pool.schedule(self.check_domain, args=[i], timeout=self.timeout) future.item = i future.add_done_callback(self.task_done) # add objects to the database with which a connection could not be established try: self.run_buffer() except Exception as e: print(f'run_buffer error: {e}')
def main(): install_logging('Update_Tests_step.log', include_process_name=True) existing_test_playbooks = load_test_data_from_conf_json() with ProcessPool(max_workers=os.cpu_count(), max_tasks=100) as pool: for pack_name in os.listdir(PACKS_DIR): future_object = pool.schedule(generate_pack_tests_configuration, args=(pack_name, existing_test_playbooks), timeout=20) future_object.add_done_callback(update_new_conf_json) add_to_conf_json(NEW_CONF_JSON_OBJECT) logging.success( f'Added {len(NEW_CONF_JSON_OBJECT)} tests to the conf.json') logging.success( f'Added the following objects to the conf.json:\n{pformat(NEW_CONF_JSON_OBJECT)}' )
def find_tlds(self): dom_list = [self.known_domain + '.' + tld for tld in self.tld_list] try: pool = ThreadPool(max_workers=self.max_workers, max_tasks=self.max_tasks) results = pool.map(self.check_tld, dom_list, timeout=self.timeout) pool.close() pool.join() print(results) except Exception as e: print(repr(e)) pass
def run_mc_region_level( input_allc_files, input_bed_file, output_prefix, bed_file_name_column=False, contexts=CONTEXTS, compress=True, cap=2, overwrite=False, nprocs=1, timeout=None, ): """ run mc_gene_level in parallel """ # assume certain structures in the inputs and outputs # allc_xxx.tsv.gz -> output_prefix + "_" + allc_xxx.tsv.gz # but the output_files should remove .gz suffix at first output_files = [ output_prefix+"_"+os.path.basename(input_allc_file).replace('.tsv.gz', '.tsv') for input_allc_file in input_allc_files] nprocs = min(nprocs, len(input_allc_files)) logging.info("""Begin run_mc_region_level.\n Number of processes:{}\n Number of allc_files:{}\n Bed file: {}\n """.format(nprocs, len(input_allc_files), input_bed_file)) # parallelized processing with ProcessPool(max_workers=nprocs, max_tasks=10) as pool: for input_allc_file, output_file in zip(input_allc_files, output_files): future = pool.schedule(mc_region_level_worker, args=(input_allc_file, output_file, input_bed_file), kwargs={ 'bed_file_name_column': bed_file_name_column, 'contexts': contexts, 'compress': compress, 'cap': cap, 'overwrite': overwrite, }, timeout=timeout) future.add_done_callback(utils.task_done) # end parallel return
def generated_chunked_parallelized_results(self, partially_bound_function, tasks, n_processes, chunksize=1): with ProcessPool(n_processes, max_tasks=1) as pool: future = pool.map( partially_bound_function, [list(task_batch) for task_batch in Batch(tasks, chunksize)], ) iterator = future.result() while True: try: yield next(iterator) except StopIteration: break except Exception: logging.exception('Child failure')
def play_generation(self): with ProcessPool(max_workers=cpu_count() - 1) as pool: # TODO it take to match time, does it really run concurrently? generation_thread_partial = partial(generation_threed, self) future = pool.map(generation_thread_partial, list(self.population.keys()), timeout=60 * 5) iterator = future.result() print(list(iterator)) while True: try: result = next(iterator) self.results[result[0]] = result[1], result[2] print("*", end='') except StopIteration: print("\n"+"_" * 50) break except TimeoutError as error: print(f"function took longer than {error.args[1]} seconds", flush=True)
def parallel_load_image_tensor(image_file_paths, target_size, batch_count): batched_image_file_paths = construct_batch_arguments( image_file_paths, target_size, batch_count) standarized_images = [ np.array([]).reshape([0, target_size[0], target_size[1], 3]) ] is_successful = [] with ProcessPool(max_workers=config.MULTI_CORE_COUNT) as pool: future = pool.map(load_image_tensor_task, batched_image_file_paths, timeout=config.TIMEOUT_SECS, chunksize=config.CHUNK_SIZE) iterator = future.result() i = 0 while True: try: standardized_image_batch, is_successful_batch = next(iterator) standarized_images.append(standardized_image_batch) is_successful.append(is_successful_batch) except StopIteration: break except TimeoutError as error: is_successful.append( np.zeros([len(batched_image_file_paths[i][0]) ]).astype(bool)) print("function took longer than %d seconds" % error.args[1]) print("RESULTS PROCESSED = " + str(i) + " / " + str(len(batched_image_file_paths) / config.CHUNK_SIZE)) i = i + 1 print("finished mapping") print("images retrieved = " + str(len(standarized_images))) standarized_images = np.concatenate(standarized_images) is_successful = np.concatenate(is_successful) return standarized_images, is_successful
def calculate_all_packs_dependencies(pack_dependencies_result: dict, id_set: dict, packs: list) -> None: """ Calculates the pack dependencies and adds them to 'pack_dependencies_result' in parallel. First - the method generates the full dependency graph. Them - using a process pool we extract the dependencies of each pack and adds them to the 'pack_dependencies_result' Args: pack_dependencies_result: The dict to which the results should be added id_set: The id_set content packs: The packs that should be part of the dependencies calculation """ def add_pack_metadata_results(future: ProcessFuture) -> None: """ This is a callback that should be called once the result of the future is ready. The results include: first_level_dependencies, all_level_dependencies, pack_name Using these results we write the dependencies """ try: first_level_dependencies, all_level_dependencies, pack_name = future.result( ) # blocks until results ready logging.debug( f'Got dependencies for pack {pack_name}\n: {pformat(all_level_dependencies)}' ) pack_dependencies_result[pack_name] = { "dependencies": first_level_dependencies, "displayedImages": list(first_level_dependencies.keys()), "allLevelDependencies": all_level_dependencies, "path": os.path.join(PACKS_FOLDER, pack_name), "fullPath": os.path.abspath(os.path.join(PACKS_FOLDER, pack_name)) } except Exception: logging.exception('Failed to collect pack dependencies results') # Generating one graph with dependencies for all packs dependency_graph = get_all_packs_dependency_graph(id_set, packs) with ProcessPool(max_workers=cpu_count(), max_tasks=100) as pool: for pack in dependency_graph: future_object = pool.schedule(calculate_single_pack_dependencies, args=(pack, dependency_graph), timeout=10) future_object.add_done_callback(add_pack_metadata_results)
def watch(queue_endpoint, loop=True): logging.info(f"Started watching {queue_endpoint}") session = requests.Session() # Retries for up to 2 minutes, by default retry = Retry(connect=30, backoff_factor=0.5) adapter = HTTPAdapter(max_retries=retry) session.mount(queue_endpoint, adapter) with ProcessPool(max_tasks=50) as pool: while True: logging.debug(f"Polling {queue_endpoint}") try: result = session.get( queue_endpoint, params={ "started": False, "page_size": 100 }, auth=get_auth(), ) except requests.exceptions.ConnectionError: logging.exception("Connection error; sleeping for 15 mins") time.sleep(60 * 15) result.raise_for_status() jobs = result.json() for job in jobs["results"]: assert ( job["operation"] == "generate_cohort" ), f"The only currently-supported operation is `generate_cohort`, not `{job['operation']}`" response = requests.patch(job["url"], json={"started": True}, auth=get_auth()) response.raise_for_status() future = pool.schedule( run_job, (job, ), timeout=6 * HOUR, ) future.job = job future.add_done_callback(report_result) if loop: time.sleep(POLL_INTERVAL) else: break
def run_allc_count_contexts( input_allc_files, output_prefix, compress=True, overwrite=False, nprocs=1, timeout=None, ): """ run bin_allc in parallel """ # assume certain structures in the inputs and outputs # allc_xxx.tsv.gz -> output_prefix + "_" + allc_xxx.tsv.gz # but the output_files should remove .gz suffix at first nprocs = min(nprocs, len(input_allc_files)) logging.info("""Begin run bin allc.\n Number of processes:{}\n Number of allc_files:{}\n """.format(nprocs, len(input_allc_files))) output_files = [ output_prefix+"_"+os.path.basename(input_allc_file).replace('.tsv.gz', '.tsv') for input_allc_file in input_allc_files] output_dir = os.path.dirname(output_prefix) if not os.path.isdir(output_dir): os.makedirs(output_dir) # parallelized processing with ProcessPool(max_workers=nprocs, max_tasks=10) as pool: for input_allc_file, output_file in zip(input_allc_files, output_files): future = pool.schedule(allc_count_context_worker_wrap, args=(input_allc_file, output_file,), kwargs={ 'compress': compress, 'overwrite': overwrite, }, timeout=timeout) future.add_done_callback(utils.task_done) # end parallel return
def _parallel_init(eval_func, iterable, metab_index, base_biomass, model, weight_fraction): """ This function runs the evaluation function in parallel with 3 arguments. It is used twice: first to get the metabolite that the model can produce, second to verify the solvability of the generated individuals (multiple metabolites) """ processes = 4 metab_index_iter = repeat(metab_index) base_biomass_iter = repeat(base_biomass) model_iter = repeat(model) weight_fraction_iter = repeat(weight_fraction) with ProcessPool(max_workers=processes, max_tasks=4) as pool: future = pool.map(eval_func, iterable, metab_index_iter, base_biomass_iter, model_iter, weight_fraction_iter, timeout=400) iterator = future.result() all_results = [] while True: try: result = next(iterator) all_results.append(result) except StopIteration: break except TimeoutError as error: print("function took longer than %d seconds" % error.args[1]) result = 0, 100 all_results.append(result) except ProcessExpired as error: print("%s. Exit code: %d" % (error, error.exitcode)) except Exception as error: print("function raised %s" % error) print(error.traceback) # Python's traceback of remote process return all_results
def process(**kwargs): assert isdir(kwargs['database']) path = join(kwargs['database'], "*{}.hdf".format(extensions_dict[kwargs['action']])) files = list(map(lambda file: abspath(file), glob(path))) assert len(files) > 0 args = list(map(lambda file: merge_dicts({'file': file}, kwargs), files)) with ProcessPool(max_workers=cpu_count()) as pool: future = pool.map(main_entrance_point, args, timeout=timeout_dict[kwargs['action']]) iterator = future.result() while True: try: next(iterator) except StopIteration: break except TimeoutError as error: pass #print("function took longer than %d seconds" % error.args[1]) time.sleep(0.5)
class PebbleExecutor(concurrent.futures.Executor): def __init__(self, max_workers, timeout=None): self.pool = ProcessPool(max_workers=max_workers) self.timeout = timeout def submit(self, fn, *args, **kwargs): return self.pool.schedule(fn, args=args, timeout=self.timeout) def map(self, func, *iterables, timeout=None, chunksize=1): raise NotImplementedError("This wrapper does not support `map`.") def shutdown(self, wait=True): if wait: log.info("Closing workers...") self.pool.close() else: log.info("Ending workers...") self.pool.stop() self.pool.join() log.info("Workers joined.")
def propagate(self, tasks: List[Task], **kwargs) -> List[Result]: output, instance = kwargs['output'], kwargs['instance'] pool = ProcessPool( max_workers=self.processes, initializer=propagate_init, initargs=(self.propagator, instance) ) results = [] future = pool.map(propagate_solve, tasks) try: for result in future.result(): results.append(result) output.debug(2, 3, 'Already solved %d tasks' % len(results)) except Exception as e: output.debug(0, 1, 'Error while fetching pool results: %s' % e) pool.stop() pool.join() return [result.set_value(self.measure.get(result)) for result in results]
def main(): args.dump_root = Path(args.dump_root) args.dump_root.mkdir_p() n_scenes = len(data_loader.scenes) print('Found {} potential scenes'.format(n_scenes)) print('Retrieving frames') if args.num_threads == 1: for scene in tqdm(data_loader.scenes): dump_example(args, scene) else: with ProcessPool(max_workers=args.num_threads) as pool: tasks = pool.map(dump_example, [args] * n_scenes, data_loader.scenes) try: for _ in tqdm(tasks.result(), total=n_scenes): pass except KeyboardInterrupt as e: tasks.cancel() raise e print('Generating train val lists') np.random.seed(8964) # to avoid DataFlow snooping, we will make two cameras of the same scene to fall in the same set, train or val subdirs = args.dump_root.dirs() canonic_prefixes = set([subdir.basename()[:-2] for subdir in subdirs]) with open(args.dump_root / 'train.txt', 'w') as tf: with open(args.dump_root / 'val.txt', 'w') as vf: for pr in tqdm(canonic_prefixes): corresponding_dirs = args.dump_root.dirs('{}*'.format(pr)) if np.random.random() < 0.1: for s in corresponding_dirs: vf.write('{}\n'.format(s.name)) else: for s in corresponding_dirs: tf.write('{}\n'.format(s.name)) if args.with_depth and args.no_train_gt: for gt_file in s.files('*.npy'): gt_file.remove_p()
def crack_zip(file_path): logging.info('[7z] Decrypting 7z file') dict_txt_files = glob.glob( rf"./logged_in/archive_cracker/dictionaries/*.txt" ) # Lista słówników z folderu if len(dict_txt_files) == 0: logging.error('[7z] Dict not found') exit(1) future_list = [] with ProcessPool(max_workers=2, max_tasks=1000) as pool: future_list.append(pool.schedule(SevenZip(file_path).brute_crack)) for dict_path in dict_txt_files: future_list.append( pool.schedule(SevenZip(file_path).check_zip, args=(dict_path, ))) time.sleep(0.3) found = False # from concurrent.futures import ProcessPoolExecutor, wait, FIRST_COMPLETED # done, not_done = wait(thread_list, timeout=6, return_when=FIRST_COMPLETED) # Alternative while not found: if len(future_list) == 0: break for f in future_list: if f.done(): ret = f.result() if ret is None: f.cancel() future_list.remove(f) continue else: found = True for _f in future_list: # Clear all processes left _f.cancel() future_list.remove(_f) pool.stop() return ret else: continue
def parallelize(partially_bound_function, tasks, n_processes): num_successes = 0 num_failures = 0 results = [] with ProcessPool(n_processes, max_tasks=1) as pool: future = pool.map(partially_bound_function, tasks) iterator = future.result() results = [] while True: try: result = next(iterator) except StopIteration: break except Exception: logging.exception('Child failure') num_failures += 1 else: results.append(result) num_successes += 1 logging.info("Done. successes: %s, failures: %s", num_successes, num_failures) return results
def run_parallel_tests(self): assert not self.futures assert not self.temporary_folders with ProcessPool(max_workers=self.parallel_tests) as pool: order = 1 self.timeout_count = 0 while self.state != None: # do not create too many states if len(self.futures) >= self.parallel_tests: wait(self.futures, return_when=FIRST_COMPLETED) quit_loop = self.process_done_futures() if quit_loop: success = self.wait_for_first_success() self.terminate_all(pool) return success folder = tempfile.mkdtemp(prefix=self.TEMP_PREFIX, dir=self.root) test_env = TestEnvironment( self.state, order, self.test_script, folder, self.current_test_case, self.test_cases ^ {self.current_test_case}, self.current_pass.transform, self.pid_queue) future = pool.schedule(test_env.run, timeout=self.timeout) self.temporary_folders[future] = folder self.futures.append(future) order += 1 state = self.current_pass.advance(self.current_test_case, self.state) # we are at the end of enumeration if state == None: success = self.wait_for_first_success() self.terminate_all(pool) return success else: self.state = state
def main(): range_list = list(range(10)) range_list.extend(range(10, 0, -1)) randoclass = RandoClass() with ProcessPool() as pool: future = pool.map(function, range_list, itertools.repeat(randoclass), \ timeout=5) iterator = future.result() all_results = [] while True: try: result = next(iterator) all_results.append(result) except StopIteration: break except TimeoutError as error: print("function took longer than %d seconds" % error.args[1]) except ProcessExpired as error: print("%s. Exit code: %d" % (error, error.exitcode)) except Exception as error: print("function raised %s" % error) print(error.traceback) # Python's traceback of remote process return all_results
class MultiProcessingDocumentParser(object): """ A document parser that performs all it's tasks in different processes and returns results to the main process. Also implements a parsing timeout just in case the parser enters an infinite loop. :author: Andres Riancho ([email protected]) """ DEBUG = core_profiling_is_enabled() MAX_WORKERS = 2 if is_running_on_ci() else (multiprocessing.cpu_count() / 2) or 1 # Increasing the timeout when profiling is enabled seems to fix issue #9713 # # https://github.com/andresriancho/w3af/issues/9713 PROFILING_ENABLED = (user_wants_memory_profiling() or user_wants_pytracemalloc() or user_wants_cpu_profiling()) # in seconds PARSER_TIMEOUT = 60 * 3 if PROFILING_ENABLED else 10 # Document parsers can go crazy on memory usage when parsing some very # specific HTML / PDF documents. Sometimes when this happens the operating # system does an out of memory (OOM) kill of a "randomly chosen" process. # # We limit the memory which can be used by parsing processes to this constant # # The feature was tested in test_pebble_limit_memory_usage.py MEMORY_LIMIT = get_memory_limit() def __init__(self): self._pool = None self._start_lock = threading.RLock() def start_workers(self): """ Start the pool and workers :return: The pool instance """ with self._start_lock: if self._pool is None: # Start the process pool log_queue = om.manager.get_in_queue() self._pool = ProcessPool(self.MAX_WORKERS, max_tasks=20, initializer=init_worker, initargs=(log_queue, self.MEMORY_LIMIT)) return self._pool def stop_workers(self): """ Stop the pool workers :return: None """ if self._pool is not None: self._pool.stop() self._pool.join() self._pool = None def get_document_parser_for(self, http_response): """ Get a document parser for http_response This parses the http_response in a pool worker. This method has two features: * We can kill the worker if the parser is taking too long * We can have different parsers :param http_response: The http response instance :return: An instance of DocumentParser """ # Start the worker processes if needed self.start_workers() filename = write_http_response_to_temp_file(http_response) apply_args = (process_document_parser, filename, self.DEBUG) # Push the task to the workers try: future = self._pool.schedule(apply_with_return_error, args=(apply_args,), timeout=self.PARSER_TIMEOUT) except RuntimeError, rte: # Remove the temp file used to send data to the process remove_file_if_exists(filename) # We get here when the pebble pool management thread dies and # suddenly starts answering all calls with: # # RuntimeError('Unexpected error within the Pool') # # The scan needs to stop because we can't parse any more # HTTP responses, which is a very critical part of the process msg = str(rte) raise ScanMustStopException(msg) try: process_result = future.result() except TimeoutError: msg = ('[timeout] The parser took more than %s seconds' ' to complete parsing of "%s", killed it!') args = (self.PARSER_TIMEOUT, http_response.get_url()) raise TimeoutError(msg % args) except ProcessExpired: # We reach here when the process died because of an error, we # handle this just like when the parser takes a lot of time and # we're unable to retrieve an answer from it msg = ('One of the parser processes died unexpectedly, this could' ' be because of a bug, the operating system triggering OOM' ' kills, etc. The scanner will continue with the next' ' document, but the scan results might be inconsistent.') raise TimeoutError(msg) finally: # Remove the temp file used to send data to the process, we already # have the result, so this file is not needed anymore remove_file_if_exists(filename) # We still need to perform some error handling here... if isinstance(process_result, Error): if isinstance(process_result.exc_value, MemoryError): msg = ('The parser exceeded the memory usage limit of %s bytes' ' while trying to parse "%s". The parser was stopped in' ' order to prevent OOM issues.') args = (self.MEMORY_LIMIT, http_response.get_url()) om.out.debug(msg % args) raise MemoryError(msg % args) process_result.reraise() try: parser_output = load_object_from_temp_file(process_result) except Exception, e: msg = 'Failed to deserialize sub-process result. Exception: "%s"' args = (e,) raise Exception(msg % args)
class MultiProcessingDocumentParser(object): """ A document parser that performs all it's tasks in different processes and returns results to the main process. Also implements a parsing timeout just in case the parser enters an infinite loop. :author: Andres Riancho ([email protected]) """ DEBUG = core_profiling_is_enabled() MAX_WORKERS = 2 if is_running_on_ci() else (multiprocessing.cpu_count() / 2) or 1 # Increasing the timeout when profiling is enabled seems to fix issue #9713 # # https://github.com/andresriancho/w3af/issues/9713 PROFILING_ENABLED = (user_wants_memory_profiling() or user_wants_pytracemalloc() or user_wants_cpu_profiling()) # in seconds PARSER_TIMEOUT = 60 * 3 if PROFILING_ENABLED else 10 def __init__(self): self._pool = None self._start_lock = threading.RLock() def start_workers(self): """ Start the pool and workers :return: The pool instance """ with self._start_lock: if self._pool is None: # Start the process pool log_queue = om.manager.get_in_queue() self._pool = ProcessPool(self.MAX_WORKERS, max_tasks=20, initializer=init_worker, initargs=(log_queue,)) return self._pool def stop_workers(self): """ Stop the pool workers :return: None """ if self._pool is not None: self._pool.stop() self._pool.join() self._pool = None def get_document_parser_for(self, http_response): """ Get a document parser for http_response This parses the http_response in a pool worker. This method has two features: * We can kill the worker if the parser is taking too long * We can have different parsers :param http_response: The http response instance :return: An instance of DocumentParser """ # Start the worker processes if needed self.start_workers() apply_args = (process_document_parser, http_response, self.DEBUG) # Push the task to the workers future = self._pool.schedule(apply_with_return_error, args=(apply_args,), timeout=self.PARSER_TIMEOUT) try: parser_output = future.result() except TimeoutError: # Act just like when there is no parser msg = ('[timeout] The parser took more than %s seconds' ' to complete parsing of "%s", killed it!') args = (self.PARSER_TIMEOUT, http_response.get_url()) raise BaseFrameworkException(msg % args) else: if isinstance(parser_output, Error): parser_output.reraise() return parser_output def get_tags_by_filter(self, http_response, tags, yield_text=False): """ Return Tag instances for the tags which match the `tags` filter, parsing and all lxml stuff is done in another process and the Tag instances are sent to the main process (the one calling this method) through a pipe Some things to note: * Not all responses can be parsed, so I need to call DocumentParser and handle exceptions * The parser selected by DocumentParser might not have tags, and it might not have get_tags_by_filter. In this case just return an empty list * Just like get_document_parser_for we have a timeout in place, when we hit the timeout just return an empty list, this is not the best thing to do, but makes the plugin code easier to write (plugins would ignore this anyways) :param tags: The filter :param yield_text: Should we yield the tag text? :return: A list of Tag instances as defined in sgml.py :see: SGMLParser.get_tags_by_filter """ # Start the worker processes if needed self.start_workers() apply_args = (process_get_tags_by_filter, http_response, tags, yield_text, self.DEBUG) # Push the task to the workers future = self._pool.schedule(apply_with_return_error, args=(apply_args,), timeout=self.PARSER_TIMEOUT) try: filtered_tags = future.result() except TimeoutError: # We hit a timeout, return an empty list return [] else: # There was an exception in the parser, maybe the HTML was really # broken, or it wasn't an HTML at all. if isinstance(filtered_tags, Error): return [] return filtered_tags