def start_ranking_stream(): print('Starting stream...') tickers = [ "$GILD", "$UNP", "$UTX", "$HPQ", "$V", "$CSCO", "$SLB", "$AMGN", "$BA", "$COP", "$CMCSA", "$BMY", "$VZ", "$T", "$UNH" ] executor = ProcessPool() futures = [] try: for ticker in tickers: futures.append(executor.schedule(rank_work, args=[ticker])) print(futures) return futures except Exception as e: logger.exception(e) raise e
def run_allc_count_contexts( input_allc_files, output_prefix, compress=True, overwrite=False, nprocs=1, timeout=None, ): """ run bin_allc in parallel """ # assume certain structures in the inputs and outputs # allc_xxx.tsv.gz -> output_prefix + "_" + allc_xxx.tsv.gz # but the output_files should remove .gz suffix at first nprocs = min(nprocs, len(input_allc_files)) logging.info("""Begin run bin allc.\n Number of processes:{}\n Number of allc_files:{}\n """.format(nprocs, len(input_allc_files))) output_files = [ output_prefix+"_"+os.path.basename(input_allc_file).replace('.tsv.gz', '.tsv') for input_allc_file in input_allc_files] output_dir = os.path.dirname(output_prefix) if not os.path.isdir(output_dir): os.makedirs(output_dir) # parallelized processing with ProcessPool(max_workers=nprocs, max_tasks=10) as pool: for input_allc_file, output_file in zip(input_allc_files, output_files): future = pool.schedule(allc_count_context_worker_wrap, args=(input_allc_file, output_file,), kwargs={ 'compress': compress, 'overwrite': overwrite, }, timeout=timeout) future.add_done_callback(utils.task_done) # end parallel return
def main(): install_logging('Update_Tests_step.log', include_process_name=True) existing_test_playbooks = load_test_data_from_conf_json() with ProcessPool(max_workers=os.cpu_count()) as pool: for pack_name in os.listdir(PACKS_DIR): logging.debug( f'Collecting pack: {pack_name} tests to add to conf.json') future_object = pool.schedule(generate_pack_tests_configuration, args=(pack_name, existing_test_playbooks), timeout=30) future_object.add_done_callback(update_new_conf_json) logging.debug( f'Successfully added pack: {pack_name} test to conf.json') add_to_conf_json(NEW_CONF_JSON_OBJECT) logging.success( f'Added {len(NEW_CONF_JSON_OBJECT)} tests to the conf.json') logging.success( f'Added the following objects to the conf.json:\n{pformat(NEW_CONF_JSON_OBJECT)}' )
def process_urls(paths, n_processes, prefix=COMMON_CRAWL_URL, max_failures=100, num_progress_reports=50): print(f"Using {n_processes} parallel processes") failed_paths = [] bios = [] time0 = time.time() path_name = (paths[0] + '///').split('/')[1] num_progress_reports = max(1, min(num_progress_reports, len(paths) // n_processes)) done = 0 pool = ProcessPool(n_processes) for i, paths_chunk in enumerate(chunks(paths, num_progress_reports)): ans = pool.map(bios_from_wet_url, [prefix + path for path in paths_chunk], timeout=1200) iterator = ans.result() for p in paths_chunk + ["done"]: try: a = next(iterator) assert p != "done" if a is not None: bios += [dict(path=p, **b) for b in a] continue except StopIteration: assert p == "done" break except Exception as error: print("--------------------\n"*10 + f"function raised {error}") failed_paths.append(p) done += len(paths_chunk) pct = (i + 1) / num_progress_reports eta = (time.time() - time0) * (1 / pct - 1) / 60 / 60 print( f"{eta:.1f} hours left, {done:,}/{len(paths):,} done ({pct:.0%}),", f"{int(len(bios)/pct):,} estimated bios, {path_name}" ) if len(failed_paths) > 0: print(f" {len(failed_paths):,} failed paths") if len(failed_paths) > max_failures: break pool.close() return dedup_exact(bios), failed_paths # dedup_exact is new!
def main(): args.dump_root = Path(args.dump_root) args.dump_root.mkdir_p() n_scenes = len(data_loader.scenes) print('Found {} potential scenes'.format(n_scenes)) print('Retrieving frames') if args.num_threads == 1: for scene in tqdm(data_loader.scenes): dump_example(args, scene) else: with ProcessPool(max_workers=args.num_threads) as pool: tasks = pool.map(dump_example, [args] * n_scenes, data_loader.scenes) try: for _ in tqdm(tasks.result(), total=n_scenes): pass except KeyboardInterrupt as e: tasks.cancel() raise e print('Generating train val lists') np.random.seed(8964) # to avoid DataFlow snooping, we will make two cameras of the same scene to fall in the same set, train or val subdirs = args.dump_root.dirs() canonic_prefixes = set([subdir.basename()[:-2] for subdir in subdirs]) with open(args.dump_root / 'train.txt', 'w') as tf: with open(args.dump_root / 'val.txt', 'w') as vf: for pr in tqdm(canonic_prefixes): corresponding_dirs = args.dump_root.dirs('{}*'.format(pr)) if np.random.random() < 0.1: for s in corresponding_dirs: vf.write('{}\n'.format(s.name)) else: for s in corresponding_dirs: tf.write('{}\n'.format(s.name)) if args.with_depth and args.no_train_gt: for gt_file in s.files('*.npy'): gt_file.remove_p()
def process(**kwargs): assert isdir(kwargs['database']) path = join(kwargs['database'], "*{}.hdf".format(extensions_dict[kwargs['action']])) files = list(map(lambda file: abspath(file), glob(path))) assert len(files) > 0 args = list(map(lambda file: merge_dicts({'file': file}, kwargs), files)) with ProcessPool(max_workers=cpu_count()) as pool: future = pool.map(main_entrance_point, args, timeout=timeout_dict[kwargs['action']]) iterator = future.result() while True: try: next(iterator) except StopIteration: break except TimeoutError as error: pass #print("function took longer than %d seconds" % error.args[1]) time.sleep(0.5)
def _parallel_init(eval_func, iterable, metab_index, base_biomass, model, weight_fraction): """ This function runs the evaluation function in parallel with 3 arguments. It is used twice: first to get the metabolite that the model can produce, second to verify the solvability of the generated individuals (multiple metabolites) """ processes = 4 metab_index_iter = repeat(metab_index) base_biomass_iter = repeat(base_biomass) model_iter = repeat(model) weight_fraction_iter = repeat(weight_fraction) with ProcessPool(max_workers=processes, max_tasks=4) as pool: future = pool.map(eval_func, iterable, metab_index_iter, base_biomass_iter, model_iter, weight_fraction_iter, timeout=400) iterator = future.result() all_results = [] while True: try: result = next(iterator) all_results.append(result) except StopIteration: break except TimeoutError as error: print("function took longer than %d seconds" % error.args[1]) result = 0, 100 all_results.append(result) except ProcessExpired as error: print("%s. Exit code: %d" % (error, error.exitcode)) except Exception as error: print("function raised %s" % error) print(error.traceback) # Python's traceback of remote process return all_results
def crack_zip(file_path): logging.info('[7z] Decrypting 7z file') dict_txt_files = glob.glob( rf"./logged_in/archive_cracker/dictionaries/*.txt" ) # Lista słówników z folderu if len(dict_txt_files) == 0: logging.error('[7z] Dict not found') exit(1) future_list = [] with ProcessPool(max_workers=2, max_tasks=1000) as pool: future_list.append(pool.schedule(SevenZip(file_path).brute_crack)) for dict_path in dict_txt_files: future_list.append( pool.schedule(SevenZip(file_path).check_zip, args=(dict_path, ))) time.sleep(0.3) found = False # from concurrent.futures import ProcessPoolExecutor, wait, FIRST_COMPLETED # done, not_done = wait(thread_list, timeout=6, return_when=FIRST_COMPLETED) # Alternative while not found: if len(future_list) == 0: break for f in future_list: if f.done(): ret = f.result() if ret is None: f.cancel() future_list.remove(f) continue else: found = True for _f in future_list: # Clear all processes left _f.cancel() future_list.remove(_f) pool.stop() return ret else: continue
def parallelize(partially_bound_function, tasks, n_processes): num_successes = 0 num_failures = 0 results = [] with ProcessPool(n_processes, max_tasks=1) as pool: future = pool.map(partially_bound_function, tasks) iterator = future.result() results = [] while True: try: result = next(iterator) except StopIteration: break except Exception: logging.exception('Child failure') num_failures += 1 else: results.append(result) num_successes += 1 logging.info("Done. successes: %s, failures: %s", num_successes, num_failures) return results
def calculate_all_packs_dependencies(pack_dependencies_result: dict, id_set: dict, packs: list) -> None: """ Calculates the pack dependencies and adds them to 'pack_dependencies_result' in parallel. First - the method generates the full dependency graph. Them - using a process pool we extract the dependencies of each pack and adds them to the 'pack_dependencies_result' Args: pack_dependencies_result: The dict to which the results should be added id_set: The id_set content packs: The packs that should be part of the dependencies calculation """ def add_pack_metadata_results(future: ProcessFuture) -> None: """ This is a callback that should be called once the result of the future is ready. The results include: first_level_dependencies, all_level_dependencies, pack_name Using these results we write the dependencies """ try: first_level_dependencies, all_level_dependencies, pack_name = future.result() # blocks until results ready logging.debug(f'Got dependencies for pack {pack_name}\n: {pformat(all_level_dependencies)}') pack_dependencies_result[pack_name] = { "dependencies": first_level_dependencies, "displayedImages": list(first_level_dependencies.keys()), "allLevelDependencies": all_level_dependencies, "path": os.path.join(PACKS_FOLDER, pack_name), "fullPath": os.path.abspath(os.path.join(PACKS_FOLDER, pack_name)) } except Exception: logging.exception('Failed to collect pack dependencies results') # Generating one graph with dependencies for all packs dependency_graph = get_all_packs_dependency_graph(id_set, packs) with ProcessPool(max_workers=cpu_count(), max_tasks=100) as pool: for pack in dependency_graph: future_object = pool.schedule(calculate_single_pack_dependencies, args=(pack, dependency_graph), timeout=10) future_object.add_done_callback(add_pack_metadata_results)
def run_parallel_tests(self): assert not self.futures assert not self.temporary_folders with ProcessPool(max_workers=self.parallel_tests) as pool: order = 1 self.timeout_count = 0 while self.state != None: # do not create too many states if len(self.futures) >= self.parallel_tests: wait(self.futures, return_when=FIRST_COMPLETED) quit_loop = self.process_done_futures() if quit_loop: success = self.wait_for_first_success() self.terminate_all(pool) return success folder = tempfile.mkdtemp(prefix=self.TEMP_PREFIX, dir=self.root) test_env = TestEnvironment( self.state, order, self.test_script, folder, self.current_test_case, self.test_cases ^ {self.current_test_case}, self.current_pass.transform, self.pid_queue) future = pool.schedule(test_env.run, timeout=self.timeout) self.temporary_folders[future] = folder self.futures.append(future) order += 1 state = self.current_pass.advance(self.current_test_case, self.state) # we are at the end of enumeration if state == None: success = self.wait_for_first_success() self.terminate_all(pool) return success else: self.state = state
def main(date_range) -> None: """ Loop over dates: 1/ Unzip archives. 2/ Generate clutter mask for given date. 3/ Generate composite mask. 4/ Get the 95th percentile of the clutter reflectivity. 5/ Save data for the given date. 6/ Remove unzipped file and go to next iteration. Parameters: =========== date_range: Iter List of dates to process """ print(crayons.green(f"RCA processing for radar {RID}.")) print(crayons.green(f"Between {START_DATE} and {END_DATE}.")) print(crayons.green(f"Data will be saved in {OUTPATH}.")) with ProcessPool(max_workers=16, max_tasks=2) as pool: future = pool.map(process_date, date_range, timeout=180) iterator = future.result() while True: try: _ = next(iterator) except StopIteration: break except TimeoutError as error: print("function took longer than %d seconds" % error.args[1]) except ProcessExpired as error: print("%s. Exit code: %d" % (error, error.exitcode)) except Exception: traceback.print_exc() return None
def update_extensions(archivedir, parallel, forums_ext_ids, ext_ids, timeout, verbose, start_pystuck): ext_with_forums = list(set(forums_ext_ids)) ext_without_forums = list(set(ext_ids) - set(forums_ext_ids)) tups = [(ext_id, True) for ext_id in ext_with_forums] + [(ext_id, False) for ext_id in ext_without_forums] random.shuffle(tups) log_info("Updating {} extensions ({} including forums, {} excluding forums)".format(len(tups), len(ext_with_forums), len(ext_without_forums))) with MysqlProcessBackend( None, read_default_file=const_mysql_config_file(), charset='utf8mb4') as con: results = [] with ProcessPool(max_workers=parallel, initializer=init_process, initargs=(verbose, start_pystuck, RequestManager(parallel))) as pool: future = pool.map(update_extension, [(archivedir, con, extid, archive) for extid, archive in tups], chunksize=1, timeout=timeout) iterator = future.result() for ext_id in ext_ids: try: results.append(next(iterator)) except StopIteration: break except TimeoutError as error: log_warning("WorkerException: Processing of %s took longer than %d seconds" % (ext_id, error.args[1])) results.append(UpdateResult(ext_id, False, None, None, None, None, None, None, None, error)) except ProcessExpired as error: log_warning("WorkerException: %s (%s), exit code: %d" % (error, ext_id, error.exitcode)) results.append(UpdateResult(ext_id, False, None, None, None, None, None, None, None, error)) except Exception as error: log_warning("WorkerException: Processing %s raised %s" % (ext_id, error)) log_warning(error.traceback) # Python's traceback of remote process results.append(UpdateResult(ext_id, False, None, None, None, None, None, None, None, error)) return results
def test_parallel(self): from pebble import ProcessPool from concurrent.futures import TimeoutError from tqdm import tqdm pbar = tqdm(total=50) with ProcessPool(max_workers=16) as pool: future = pool.map(fibonacci, range(30), timeout=10) results = future.result() all_results = [] while True: try: result = next(results) all_results.append(result) pbar.update(1) except StopIteration: break except TimeoutError as error: all_results.append(None) pbar.update(1) print("function took longer than %d seconds" % error.args[1]) except ProcessExpired as error: all_results.append(None) pbar.update(1) print("%s. Exit code: %d" % (error, error.exitcode)) except Exception as error: all_results.append(None) print("function raised %s" % error) print(error.traceback ) # Python's traceback of remote process print(all_results) print(len(all_results)) import pdb pdb.set_trace()
def main() -> None: flist = sorted(glob.glob(os.path.join(INPATH, "*.hdf"))) if len(flist) == 0: raise FileNotFoundError(f"No file found in {INPATH}") print(f"Found {len(flist)} files in {INPATH}") for flist_chunk in chunks(flist, 16): with ProcessPool() as pool: future = pool.map(buffer, flist_chunk, timeout=600) iterator = future.result() while True: try: _ = next(iterator) except StopIteration: break except TimeoutError as error: print("function took longer than %d seconds" % error.args[1]) except ProcessExpired as error: print("%s. Exit code: %d" % (error, error.exitcode)) except Exception: traceback.print_exc() return None
def pebble_map(toolbox_evaluate, pop, initial_pop, model, base_biomass, exp_ess, distance, processes): print(processes) initial_pop_iter = repeat(initial_pop) model_iter = repeat(model) base_biomass_iter = repeat(base_biomass) exp_ess_iter = repeat(exp_ess) distance_iter = repeat(distance) with ProcessPool(processes) as pool: future = pool.map(toolbox_evaluate, pop, initial_pop_iter, model_iter, base_biomass_iter, exp_ess_iter, distance_iter, timeout=40) iterator = future.result() all_results = [] while True: try: result = next(iterator) all_results.append(result) except StopIteration: break except TimeoutError as error: print("function took longer than %d seconds" % error.args[1]) result = 0, 100 all_results.append(result) except ProcessExpired as error: print("%s. Exit code: %d" % (error, error.exitcode)) except Exception as error: print("function raised %s" % error) print(error.traceback) # Python's traceback of remote process return all_results
def main(): range_list = list(range(10)) range_list.extend(range(10, 0, -1)) randoclass = RandoClass() with ProcessPool() as pool: future = pool.map(function, range_list, itertools.repeat(randoclass), \ timeout=5) iterator = future.result() all_results = [] while True: try: result = next(iterator) all_results.append(result) except StopIteration: break except TimeoutError as error: print("function took longer than %d seconds" % error.args[1]) except ProcessExpired as error: print("%s. Exit code: %d" % (error, error.exitcode)) except Exception as error: print("function raised %s" % error) print(error.traceback) # Python's traceback of remote process return all_results
def convert_dataset(final_model, depth_dir, images_root_folder, occ_dir, dataset_output_dir, video_output_dir, ffmpeg, pose_scale=1, interpolated_frames=[], metadata=None, images_list=None, threads=8, downscale=None, compressed=True, width=None, visualization=False, video=False, verbose=0, **env): dataset_output_dir.makedirs_p() video_output_dir.makedirs_p() if video: visualization = True cameras_colmap, images_colmap, _ = rm.read_model(final_model, '.txt') # image_df = pd.DataFrame.from_dict(images, orient="index").set_index("id") if metadata is not None: metadata = metadata.set_index("db_id", drop=False).sort_values("time") framerate = metadata["framerate"].values[0] # image_df = image_df.reindex(metadata.index) images_list = metadata["image_path"].values else: assert images_list is not None framerate = None video = False # Discard images and cameras that are not represented by the image list images_colmap = { i.name: i for k, i in images_colmap.items() if i.name in images_list } cameras_ids = set([i.camera_id for i in images_colmap.values()]) cameras_colmap = {k: cameras_colmap[k] for k in cameras_ids} if downscale is None: assert width is not None rescaled_cameras = rescale_and_save_cameras(cameras_colmap, images_colmap, dataset_output_dir, width, downscale) poses = save_poses(images_colmap, images_list, dataset_output_dir, pose_scale) depth_maps = [] occ_maps = [] interpolated = [] imgs = [] registered = [] depth_shapes = [] for i in images_list: img_path = images_root_folder / i imgs.append(img_path) fname = img_path.basename() depth_path = depth_dir / fname occ_path = occ_dir / fname if compressed: depth_path += ".gz" occ_path += ".gz" if i in images_colmap: assert depth_path.isfile() registered.append(True) if occ_path.isfile(): occ_maps.append(occ_path) else: occ_maps.append(None) depth_maps.append(depth_path) camera = cameras_colmap[images_colmap[i].camera_id] depth_shapes.append((camera.height, camera.width)) if i in interpolated_frames: if verbose > 2: print("Image {} was interpolated".format(fname)) interpolated.append(True) else: interpolated.append(False) else: if verbose > 2: print("Image {} was not registered".format(fname)) registered.append(False) depth_maps.append(None) occ_maps.append(None) interpolated.append(False) depth_shapes.append(None) print('{}/{} Frames not registered ({:.2f}%)'.format( len(images_list) - sum(registered), len(images_list), 100 * (1 - sum(registered) / len(images_list)))) print('{}/{} Frames interpolated ({:.2f}%)'.format( sum(interpolated), len(images_list), 100 * sum(interpolated) / len(images_list))) if threads == 1: for i, d, o, ds, n in tqdm(zip(imgs, depth_maps, occ_maps, depth_shapes, interpolated), total=len(imgs)): process_one_frame(i, d, o, ds, dataset_output_dir, video_output_dir, downscale, n, visualization, viz_width=1920) else: with ProcessPool(max_workers=threads) as pool: tasks = pool.map(process_one_frame, imgs, depth_maps, occ_maps, depth_shapes, [dataset_output_dir] * len(imgs), [video_output_dir] * len(imgs), [downscale] * len(imgs), interpolated, [visualization] * len(imgs), [1920] * len(imgs)) try: for _ in tqdm(tasks.result(), total=len(imgs)): pass except KeyboardInterrupt as e: tasks.cancel() raise e if metadata is not None: wanted_keys = [ 'image_path', 'time', 'height', 'width', 'camera_model', 'camera_id' ] filtered_metadata = metadata[wanted_keys].copy() filtered_metadata['interpolated'] = interpolated filtered_metadata['registered'] = registered for i, j in product(range(3), range(4)): filtered_metadata['pose{}{}'.format(i, j)] = poses[:, i, j] filtered_metadata["fx"] = np.NaN filtered_metadata["fy"] = np.NaN filtered_metadata["cx"] = np.NaN filtered_metadata["cy"] = np.NaN for cam_id in filtered_metadata["camera_id"].unique(): if cam_id not in rescaled_cameras.keys(): continue cam = rescaled_cameras[cam_id] rows = filtered_metadata["camera_id"] == cam_id filtered_metadata.loc[rows, "fx"] = cam.params[0] if "SIMPLE" in cam.model or "RADIAL" in cam.model: filtered_metadata.loc[rows, "fy"] = cam.params[0] filtered_metadata.loc[rows, "cx"] = cam.params[1] filtered_metadata.loc[rows, "cy"] = cam.params[2] else: filtered_metadata.loc[rows, "fy"] = cam.params[1] filtered_metadata.loc[rows, "cx"] = cam.params[2] filtered_metadata.loc[rows, "cy"] = cam.params[3] filtered_metadata.to_csv(dataset_output_dir / 'metadata.csv') not_registered = [ i + '\n' for i, r in zip(images_list, registered) if not r ] with open(dataset_output_dir / 'not_registered.txt', 'w') as f: f.writelines(not_registered) if video: video_path = str( video_output_dir.parent / '{}_groundtruth_viz.mp4'.format(video_output_dir.stem)) glob_pattern = str(video_output_dir / '*.png') ffmpeg.create_video(video_path, glob_pattern, True, framerate) video_output_dir.rmtree_p()
def test_process_pool_map_zero_chunk(self): """Process Pool Fork map chunksize 0.""" with ProcessPool(max_workers=1) as pool: with self.assertRaises(ValueError): pool.map(function, [], chunksize=0)
def test_process_pool_ignoring_sigterm(self): """Process Pool Fork ignored SIGTERM signal are handled on Unix.""" with ProcessPool(max_workers=1) as pool: future = pool.schedule(sigterm_function, timeout=0.2) with self.assertRaises(TimeoutError): future.result()
def test_process_pool_expired_worker(self): """Process Pool Fork unexpect death of worker raises ProcessExpired.""" with ProcessPool(max_workers=1) as pool: future = pool.schedule(suicide_function) self.assertRaises(ProcessExpired, future.result)
def test_process_pool_stopped(self): """Process Pool Fork is not active once stopped.""" with ProcessPool(max_workers=1) as pool: pool.schedule(function, args=[1]) self.assertFalse(pool.active)
def test_process_pool_join_running(self): """Process Pool Fork RuntimeError is raised if active pool joined.""" with ProcessPool(max_workers=1) as pool: pool.schedule(function, args=[1]) self.assertRaises(RuntimeError, pool.join)
def test_process_pool_initializer(self): """Process Pool Fork initializer is correctly run.""" with ProcessPool(initializer=initializer, initargs=[1]) as pool: future = pool.schedule(initializer_function) self.assertEqual(future.result(), 1)
def test_process_pool_running(self): """Process Pool Fork is active if a future is scheduled.""" with ProcessPool(max_workers=1) as pool: pool.schedule(function, args=[1]) self.assertTrue(pool.active)
def test_process_pool_timeout(self): """Process Pool Fork future raises TimeoutError if so.""" with ProcessPool(max_workers=1) as pool: future = pool.schedule(long_function, timeout=0.1) self.assertRaises(TimeoutError, future.result)
def test_process_pool_pickling_error_result(self): """Process Pool Fork result pickling errors are raised by future.result.""" with ProcessPool(max_workers=1) as pool: future = pool.schedule(pickle_error_function) self.assertRaises((pickle.PicklingError, TypeError), future.result)
def test_process_pool_pickling_error_task(self): """Process Pool Fork task pickling errors are raised by future.result.""" with ProcessPool(max_workers=1) as pool: future = pool.schedule(function, args=[threading.Lock()]) self.assertRaises((pickle.PicklingError, TypeError), future.result)
def test_process_pool_error(self): """Process Pool Fork errors are raised by future get.""" with ProcessPool(max_workers=1) as pool: future = pool.schedule(error_function) self.assertRaises(Exception, future.result)
def test_process_pool_single_future(self): """Process Pool Fork single future.""" with ProcessPool(max_workers=1) as pool: future = pool.schedule(function, args=[1], kwargs={'keyword_argument': 1}) self.assertEqual(future.result(), 2)