def determine_brats_postprocessing(folder_with_preds, folder_with_gt, postprocessed_output_dir, processes=8, thresholds=(0, 10, 50, 100, 200, 500, 750, 1000, 1500, 2500, 10000), replace_with=2): # find pairs nifti_gt = subfiles(folder_with_gt, suffix=".nii.gz", sort=True) p = Pool(processes) nifti_pred = subfiles(folder_with_preds, suffix='.nii.gz', sort=True) results = p.starmap_async(load_niftis_threshold_compute_dice, zip(nifti_gt, nifti_pred, [thresholds] * len(nifti_pred))) results = results.get() all_dc_per_threshold = {} for t in thresholds: all_dc_per_threshold[t] = np.array([i[1][t] for i in results]) print(t, np.mean(all_dc_per_threshold[t])) means = [np.mean(all_dc_per_threshold[t]) for t in thresholds] best_threshold = thresholds[np.argmax(means)] print('best', best_threshold, means[np.argmax(means)]) maybe_mkdir_p(postprocessed_output_dir) p.starmap(apply_brats_threshold, zip(nifti_pred, [postprocessed_output_dir]*len(nifti_pred), [best_threshold]*len(nifti_pred), [replace_with] * len(nifti_pred))) p.close() p.join() save_pickle((thresholds, means, best_threshold, all_dc_per_threshold), join(postprocessed_output_dir, "threshold.pkl"))
def harvest(out_dir, existing_dir=0, start_id=0, stop_id=100000, verbose=False): g_verbose = verbose if verbose: print("Begin harvesting") if os.path.exists(out_dir): if existing_dir == 0: exit("Directory '" + out_dir + "' exists") elif not os.path.isdir(out_dir): exit("Error: '" + out_dir + "' is not a directory") elif existing_dir == -1: rmtree(out_dir) os.mkdir(out_dir) else: os.mkdir(out_dir) #Fetch records arg_list = [(i, out_dir, verbose) for i in range(start_id, stop_id)] mp = Pool(32) mp.starmap(fetch_write, arg_list, chunksize=1) mp.close() mp.join() if verbose: print("Harvesting complete")
def run(self, target_spacings, input_folder_with_cropped_npz, output_folder, data_identifier, num_threads=default_num_threads, force_separate_z=None): print("Initializing to run preprocessing") print("npz folder:", input_folder_with_cropped_npz) print("output_folder:", output_folder) list_of_cropped_npz_files = subfiles(input_folder_with_cropped_npz, True, None, ".npz", True) assert len(list_of_cropped_npz_files) != 0, "set list of files first" maybe_mkdir_p(output_folder) all_args = [] num_stages = len(target_spacings) # we need to know which classes are present in this dataset so that we can precompute where these classes are # located. This is needed for oversampling foreground all_classes = load_pickle(join(input_folder_with_cropped_npz, 'dataset_properties.pkl'))['all_classes'] for i in range(num_stages): output_folder_stage = os.path.join(output_folder, data_identifier + "_stage%d" % i) maybe_mkdir_p(output_folder_stage) spacing = target_spacings[i] for j, case in enumerate(list_of_cropped_npz_files): case_identifier = get_case_identifier_from_npz(case) args = spacing, case_identifier, output_folder_stage, input_folder_with_cropped_npz, force_separate_z, all_classes all_args.append(args) p = Pool(num_threads) p.starmap(self._run_internal, all_args) p.close() p.join()
def ExtractAllEnvelopes(LPF=False, CUTOFF=100): # # In case you need to print numpy outputs: # numpy.set_printoptions(threshold=numpy.inf, suppress=True) TotalTime = time.time() # Get all the GFB.npy files under resources/fcnn gfbFiles = glob.glob(join("resources", "f2cnn", "*", "*.GFB.npy")) print( "\n###############################\nExtracting Envelopes from files in '{}'." .format(split(gfbFiles[0])[0])) if LPF: print("Using Low Pass Filtering with a cutoff at {}Hz".format(CUTOFF)) else: print("Not using Low Pass Filtering") if not gfbFiles: print( "ERROR: NO .GFB.npy FILES FOUND, PLEASE GENERATE FILTERED OUTPUTS") exit(-1) print(len(gfbFiles), ".GFB.npy files found") # Usage of multiprocessing, to reduce computing time proc = cpu_count() counter = Value('i', 0) multiproc_pool = Pool(processes=proc, initializer=InitProcesses, initargs=(counter, )) arguments = zip(gfbFiles, repeat(len(gfbFiles)), repeat(LPF), repeat(CUTOFF)) # Pack all the arguments multiproc_pool.starmap(ExtractAndSaveEnvelope, arguments) print("Extracted Envelopes from all files.") print(' Total time:', time.time() - TotalTime) print('')
def calculate_unperturbated_empiricals( default_vs30, extended_period, fsf, im_config, n_processes, sim_root, empirical_im_logger: Logger = get_basic_logger(), ): events = load_fault_selection_file(fsf) empirical_im_logger.debug( f"Loaded {len(events)} events from the fault selection file" ) events = [ name if count == 1 else get_realisation_name(name, 1) for name, count in events.items() ] tasks = create_event_tasks( events, sim_root, im_config, default_vs30, extended_period, empirical_im_logger ) pool = Pool(min(n_processes, len(tasks))) empirical_im_logger.debug(f"Running empirical im calculations") pool.starmap(calculate_empirical, tasks) empirical_im_logger.debug(f"Empirical ims calculated")
def process_experiment(_experiment, _overwrite=False): _arguments = [ (_experiment, int(_series.split('_')[1]), _overwrite) for _series in paths.image_files(paths.serieses(_experiment)) ] _p = Pool(CPUS_TO_USE) _p.starmap(process_series, _arguments) _p.close()
def get_scores(dataset, res, gts, weights, n_threads=4): """ :param dataset: ['flickrstyle', 'flickrstyle'] :param res: ['candidate1', 'candidate2'] :param gts: {0: ['sent1', 'sent2'], 1: ['sent3', 'sent4']} :param weights: {'cider': 0.5, 'bleu': 0.5} :return: """ score = 0. if n_threads <= 0: # single thread _dataset = dict(enumerate(dataset)) if weights['cider'] > 0: score_cider = _compute_cider(_dataset, gts, res) score = score_cider + score if weights['bleu'] > 0: score_bleu4 = _compute_bleu(_dataset, gts, res) score = score_bleu4 + score else: # parallel def _get_chunk_index(n_samples, n_chunks): chunk_size = n_samples // n_chunks r = n_samples % n_chunks sizes = [chunk_size + 1 if i < r else chunk_size for i in range(n_chunks)] chunks = [] i = 0 for size in sizes: chunks.append(range(i, i + size)) i += size return chunks global pool if pool is None: # initialize thread pool, and initialize each thread pool = Pool(processes=n_threads, initializer=init_style_scorer, initargs=[_init_list]) chunk_index = _get_chunk_index(n_samples=len(res), n_chunks=n_threads) chunked_args = [] for i in range(n_threads): _dataset = {} _gts = {} _res = OrderedDict() for _i in chunk_index[i]: _dataset[_i] = dataset[_i] _gts[_i] = gts[_i] _res[_i] = res[_i] chunked_args.append([_dataset, _gts, _res]) if weights['cider'] > 0: score_cider = pool.starmap(func=_compute_cider, iterable=chunked_args) score_cider = np.concatenate(score_cider) score = score_cider * weights['cider'] + score if weights['bleu'] > 0: score_bleu4 = pool.starmap(func=_compute_bleu, iterable=chunked_args) score_bleu4 = np.concatenate(score_bleu4) score = score_bleu4 * weights['bleu'] + score return score
def process_experiments(_experiments, _overwrite=False): _arguments = [] for _tuple in load.experiments_serieses_as_tuples(_experiments): _experiment, _series_id = _tuple _arguments.append((_experiment, _series_id, _overwrite)) _p = Pool(CPUS_TO_USE) _p.starmap(process_series, _arguments) _p.close()
def apply_threshold_to_folder(folder_in, folder_out, threshold, replace_with, processes=24): maybe_mkdir_p(folder_out) niftis = subfiles(folder_in, suffix='.nii.gz', join=True) p = Pool(processes) p.starmap(apply_brats_threshold, zip(niftis, [folder_out]*len(niftis), [threshold]*len(niftis), [replace_with] * len(niftis))) p.close() p.join()
def process_experiments(_experiments, _pairs=True): _tuples = [] for _experiment in _experiments: _tuples += load.experiment_groups_as_tuples(_experiment) _p = Pool(CPUS_TO_USE) if _pairs: _answers = _p.starmap(process_group_pairs, _tuples) else: _answers = _p.starmap(process_group_single_cells, _tuples) _p.close()
def compress_everything(output_base, num_processes=8): p = Pool(num_processes) tasks = subfolders(output_base, join=False) tasknames = [i.split('/')[-1] for i in tasks] args = [] for t, tn in zip(tasks, tasknames): args.append((join(output_base, tn + ".zip"), join(output_base, t))) p.starmap(compress_folder, args) p.close() p.join()
def update(self, export='csv', path='out'): stock_codes = [] for file in os.listdir(os.path.join(path, 'raw_data')): if not file.endswith('csv'): continue stock_code = file[:-4] stock_codes.append(stock_code) pool = Pool(10) params = [(code, path) for code in stock_codes] if export.lower() in ['csv']: pool.starmap(self.update_single_code, params)
def crawl_companies_files(options, workers_num=10, include_companies=None, from_date=None): """ :param driver: the panthomjs or chromium driver with the current page loaded. We use the driver to navigate through the listing if needed :param workers_num: :param include_companies: :param from_date: :return: """ companies_files = [] pool = Pool(processes=workers_num) try: # Obtain the ccvm codes of all the listed companies ccvm_codes = [ r.ccvm for r in BovespaCompany.objects.only(["ccvm"]).all() ] ccvm_codes = sorted(ccvm_codes) _logger.debug("Processing the files of {0} companies from {1}".format( len(ccvm_codes), "{0:%Y-%m-%d}".format(from_date) if from_date else "THE BEGINNING")) func_params = [] for ccvm_code in ccvm_codes: if include_companies and ccvm_code not in include_companies: continue for doc_type in DOC_TYPES: func_params.append([ccvm_code, options, doc_type, from_date]) # call_results = pool.starmap(obtain_company_files, func_params) pool.starmap(obtain_company_files, func_params) # Merge all the responses into one only list # companies_files += list( # itertools.chain.from_iterable(call_results)) except TimeoutError: print("Timeout error") traceback.print_exc() raise finally: pool.close() pool.join() pool.terminate()
def run(self, target_spacings, input_folder_with_cropped_npz, output_folder, data_identifier, num_threads=default_num_threads, force_separate_z=None): """ :param target_spacings: list of lists [[1.25, 1.25, 5]] :param input_folder_with_cropped_npz: dim: c, x, y, z | npz_file['data'] np.savez_compressed(fname.npz, data=arr) :param output_folder: :param num_threads: :param force_separate_z: None :return: """ print("Initializing to run preprocessing") print("npz folder:", input_folder_with_cropped_npz) print("output_folder:", output_folder) list_of_cropped_npz_files = subfiles(input_folder_with_cropped_npz, False, None, ".npz", True) #print("list_of_cropped_npz_files:",list_of_cropped_npz_files) if not os.path.isdir(output_folder): os.makedirs(output_folder) num_stages = len(target_spacings) if not isinstance(num_threads, (list, tuple, np.ndarray)): num_threads = [num_threads] * num_stages assert len(num_threads) == num_stages # we need to know which classes are present in this dataset so that we can precompute where these classes are # located. This is needed for oversampling foreground all_classes = load_pickle(input_folder_with_cropped_npz + "/" + 'dataset_properties.pkl')['all_classes'] for i in range(num_stages): all_args = [] output_folder_stage = output_folder + "/" + data_identifier + "_stage%d" % i #print("preprocessing.run:output_folder_stage:",output_folder_stage) if not os.path.isdir(output_folder_stage): os.makedirs(output_folder_stage) #if not os.path.isdir(output_folder_stage): #os.makedirs(output_folder_stage) spacing = target_spacings[i] for j, case in enumerate(list_of_cropped_npz_files): case_identifier = get_case_identifier_from_npz(case) args = spacing, case_identifier, output_folder_stage, input_folder_with_cropped_npz, force_separate_z, all_classes all_args.append(args) p = Pool(num_threads[i]) p.starmap(self._run_internal, all_args) p.close() p.join()
def convert_labels_back_to_BraTS_2018_2019_convention(input_folder: str, output_folder: str, num_processes: int = 12): """ reads all prediction files (nifti) in the input folder, converts the labels back to BraTS convention and saves the result in output_folder :param input_folder: :param output_folder: :return: """ maybe_mkdir_p(output_folder) nii = subfiles(input_folder, suffix='.nii.gz', join=False) p = Pool(num_processes) p.starmap(load_convert_save, zip(nii, [input_folder] * len(nii), [output_folder] * len(nii))) p.close() p.join()
def main(ext): # arg_list is a list of tuples, each tuple is one call to mol_to_sdf #arg_list = [ (i["path"], i["filename"][:-4]) for i in find_files("/".join(os.path.abspath("").split("/")[:-1]) + "/datasets/dss_tox/DSSToxAll_20151019/ToxAll/", "mol$") ] arg_list = [(i["path"], i["filename"][:-(len(ext) + 1)]) for i in find_files( "/".join(os.path.abspath("").split("/")[:-1]) + "/datasets/activity_cliffs/", ext)] mp = Pool(number_of_processes) #mp.starmap(mol_to_sdf, arg_list, chunksize=1) #mp.starmap(del_mol, arg_list, chunksize=1) #mp.starmap(del_xyz, arg_list, chunksize=1) mp.starmap(mol2_to_sdf, arg_list, chunksize=1) #mp.starmap(del_sdf, arg_list, chunksize=1) mp.close() mp.join()
def crawl_listed_companies(options, workers_num=10): companies = [] pool = Pool(processes=workers_num) try: func_params = [] for letter in COMPANIES_LISTING_SEARCHER_LETTERS: func_params.append([letter, options]) call_results = pool.starmap( update_listed_companies, func_params) # Merge all the responses into one only list companies += list( itertools.chain.from_iterable(call_results)) return companies except TimeoutError: print("Timeout error") traceback.print_exc() raise finally: pool.close() pool.join() pool.terminate()
def main(keyword,page): pool = Pool() #num=[x*10 for x in range(0,page)] num=[[keyword,page] for page in map(lambda x :x*10,range(page))] numm=[[keyword,page] for page in map(lambda x :x,range(page))] tmp_L=pool.starmap(baidu,num) for x in tmp_L: for a in x: _write(str(a)) tmp_K=pool.starmap(_360,numm) for k in tmp_K: for t in k: print(t) _write(str(t)) pool.close() pool.join()
def parallel_cv_loop(func, cv, parallel=True): """ Performs a parallel training loop over the cv train_idx and test_idxs. Example: - func will usually be a class that contains df, labels info but __call__ method will run a single training loop given train_idx, test_idx - This will run func.__call__(train_idx, test_idx) for each idx pair in cv and return results Args: func (object): Class that has information relating to data, labels and takes a __call__(train_idx, test_idx) to run loop. cv (list): List of [(train_idx, test_idx), ...] pairs. give_cv_num (bool): Gives the cv num to the underlying function, used when using the full dataset and loading precomputed arrays for a specific cv_num parallel (bool): Set to false for a for loop (allows for debugging) Return: (list): A list of whatever func outputs for each cv idxs. """ if parallel: pool = Pool(len(cv)) results = pool.starmap( func, cv ) pool.close() else: results = [] for args in cv: results.append(func(*args)) return results
def main(): getTournyIDs = [] tournyIDs = [] data = requests.get("https://majestic.battlefy.com/hearthstone-masters/tournaments?start={}T08:00:42.465Z&end={}T08:00:42.465Z".format(START_DATE, END_DATE)).json() for tourny in data: getTournyIDs.append(tourny['_id']) num_tourneys = len(getTournyIDs) for tourny in getTournyIDs: data = requests.get("https://majestic.battlefy.com/tournaments/{}/".format(tourny)).json() tournyIDs.append(data['stageIDs'][0]) filepath = "csv.csv" print("Parsing {} tournaments using up to {} processes.".format(num_tourneys, NUM_PROCESSSES)) p = Pool(processes = NUM_PROCESSSES) returnData = p.starmap(parseTournament, zip(tournyIDs, getTournyIDs, range(num_tourneys))) p.close() p.join() print("Parsing Complete\nStart CSV file write:") with open(filepath, "a", newline='\n', encoding='utf-8') as csvfile: if os.stat(filepath).st_size == 0: csvfile.write(",K,D,D,D") #TournyID, Name, Deck, Deck, Deck for processString in returnData: split = processString.split(",") for line in split: csvfile.write("{}".format(line)) csvfile.write(",") print("Finish CSV File Write")
def add_demographics_threaded(df, basedate): num_splits = 4 dfs = np.array_split(df, num_splits) pool = Pool(processes=num_splits) names = ['_thread' + str(i) + '_' for i in range(num_splits)] basedate = [basedate] * num_splits sk_dfs = pool.starmap(add_demographics, zip(dfs, basedate, names)) return pd.concat(sk_dfs)
def run_asynch_test(function, dataset, iterations, poolsize=10): p = Pool(poolsize) iteration_list = list(range(0, iterations)) dataset_list = list(repeat(dataset, iterations)) results = p.starmap(function, zip(dataset_list, iteration_list)) trained = [result[0] for result in results] random = [result[1] for result in results] return trained, random
def evaluate_regions(folder_predicted: str, folder_gt: str, regions: dict, processes=default_num_threads): region_names = list(regions.keys()) files_in_pred = subfiles(folder_predicted, suffix='.nii.gz', join=False) files_in_gt = subfiles(folder_gt, suffix='.nii.gz', join=False) have_no_gt = [i for i in files_in_pred if i not in files_in_gt] assert len(have_no_gt) == 0, "Some files in folder_predicted have not ground truth in folder_gt" have_no_pred = [i for i in files_in_gt if i not in files_in_pred] if len(have_no_pred) > 0: print("WARNING! Some files in folder_gt were not predicted (not present in folder_predicted)!") files_in_gt.sort() files_in_pred.sort() # run for all cases full_filenames_gt = [join(folder_gt, i) for i in files_in_pred] full_filenames_pred = [join(folder_predicted, i) for i in files_in_pred] p = Pool(processes) res = p.starmap(evaluate_case, zip(full_filenames_pred, full_filenames_gt, [list(regions.values())] * len(files_in_gt))) p.close() p.join() all_results = {r: [] for r in region_names} with open(join(folder_predicted, 'summary.csv'), 'w') as f: f.write("casename") for r in region_names: f.write(",%s" % r) f.write("\n") for i in range(len(files_in_pred)): f.write(files_in_pred[i][:-7]) result_here = res[i] for k, r in enumerate(region_names): dc = result_here[k] f.write(",%02.4f" % dc) all_results[r].append(dc) f.write("\n") f.write('mean') for r in region_names: f.write(",%02.4f" % np.nanmean(all_results[r])) f.write("\n") f.write('median') for r in region_names: f.write(",%02.4f" % np.nanmedian(all_results[r])) f.write("\n") f.write('mean (nan is 1)') for r in region_names: tmp = np.array(all_results[r]) tmp[np.isnan(tmp)] = 1 f.write(",%02.4f" % np.mean(tmp)) f.write("\n") f.write('median (nan is 1)') for r in region_names: tmp = np.array(all_results[r]) tmp[np.isnan(tmp)] = 1 f.write(",%02.4f" % np.median(tmp)) f.write("\n")
def FilterAllOrganisedFiles(): TotalTime = time.time() # Get all the WAV files under resources # wavFiles = glob.glob(join("resources", "f2cnn", "*", "*.WAV")) wavFiles = glob.glob(os.path.join("resources", "f2cnn", "**", "*.WAV")) print( "\n###############################\nApplying FilterBank to files in '{}'." .format(os.path.split(wavFiles[0])[0])) if not wavFiles: print("NO WAV FILES FOUND, PLEASE ORGANIZE FILES") exit(-1) print(len(wavFiles), "files found") # #### READING CONFIG FILE config = ConfigParser() config.read('configF2CNN.conf') framerate = config.getint('FILTERBANK', 'FRAMERATE') nchannels = config.getint('FILTERBANK', 'NCHANNELS') lowcutoff = config.getint('FILTERBANK', 'LOW_FREQ') # ##### PREPARATION OF FILTERBANK # CENTER FREQUENCIES ON ERB SCALE CENTER_FREQUENCIES = filters.centre_freqs(framerate, nchannels, lowcutoff) # Filter coefficient for a Gammatone filterbank FILTERBANK_COEFFICIENTS = filters.make_erb_filters(framerate, CENTER_FREQUENCIES) # Usage of multiprocessing, to reduce computing time proc = cpu_count() counter = Value('i', 0) multiproc_pool = Pool(processes=proc, initializer=InitProcesses, initargs=( FILTERBANK_COEFFICIENTS, counter, )) multiproc_pool.starmap(GammatoneFiltering, zip(wavFiles, repeat(len(wavFiles)))) print("Filtered and Saved all files.") print(' Total time:', time.time() - TotalTime) print('')
def main(argv): # parse command line parser = argparse.ArgumentParser(description="scrape national wikipedia data on coronavirus", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-z', '--tgz', action="store_true", default=False, help="compress result directory to a single tgz file") parser.add_argument('-t', '--timeout', action="store", default = 10, type=float, help="http fetch timeout") parser.add_argument('-w', '--wikiprefix', action="store", default = "https://en.m.wikipedia.org", help="URL prefix of localized wikipedia pages") parser.add_argument('-l', '--listurl', action="store", default = "https://en.m.wikipedia.org/wiki/Template:2019%E2%80%9320_coronavirus_pandemic_data#covid19-container", help="URL with list of countries/territories to fetch") parser.add_argument('-v', '--verbose', action="store_true", default=False, help="print more info") parser.add_argument('-j', '--threads', action="store", default=128, type=int, help="parallel download threads") args = parser.parse_args(argv) # create output directory outdir = slugify("output-" + datetime.now(tz=None).strftime("%d-%b-%Y (%H:%M:%S.%f)"), replacements=[['%','_percent_'],[':','-']]) os.mkdir(outdir) # fetch list of country pages in parallel # we get the country list from the table in args.listurl country_list = requests.get(args.listurl).text country_soup = BeautifulSoup(country_list, 'lxml') loc_tbl = country_soup.find('table') rows = loc_tbl.find_all('tr') # end the list of rows when we see class="sortbottom" in a <tr> tag idx = next(i for i,tr in enumerate(rows) if tr.has_attr('class') and 'sortbottom' in tr['class']) rows = rows[0:idx-1] # in order to parallelize, need to convert soup back to strings rows = [str(tr) for tr in rows] pool = Pool(args.threads) # starmap only takes two args, so we zip the rows with the constant args into pairs poolargs = [outdir, args.wikiprefix, args.timeout, args.verbose] results = pool.starmap(fetch_countries, zip(rows, repeat(poolargs))) # remove non-errors errors = list(filter(None, results)) if args.tgz: outfile = outdir + '.tgz' os.system('tar czf ' + outfile + ' ' + outdir) os.system('rm -rf ' + outdir) if args.verbose: cprint('created output file ' + outfile) if len(errors) > 0: cprint("errors encountered fetching these: " + str(errors), 'red') else: print("all pages fetched successfully")
def aggregate_simulation_empirical_im_permutations( fsf, n_processes, sim_root, version, logger: Logger = get_basic_logger()): events = load_fault_selection_file(fsf) logger.debug(f"Loaded {len(events)} events from the fault selection file") events = [ name if count == 1 else get_realisation_name(name, 1) for name, count in events.items() ] worker_pool = Pool(n_processes) worker_pool.starmap( agg_emp_perms, [( pathlib.Path(get_empirical_dir(sim_root, event)), event, version, get_realisation_logger(logger, event).name, ) for event in events], )
def match_object_ids(ra, dec, limit_angle='2 arcsec', name_order=None): """Get the id from Simbad for every object in a RA, Dec list.""" # Perform it in parallel to handle the online query overhead func = partial(simbad_query_id, name_order=name_order, limit_angle=limit_angle) p = Pool(MAX_PARALLEL_QUERY) results = p.starmap(func, list(zip(ra, dec))) return results
def evaluator(ref: ndarray, deg: ndarray, sr: int, pool: Pool = None) -> float: length = len(ref) assert length == len(deg) return (sum( pool.starmap(func=func, iterable=((r, d, sr) for (r, d) in zip(ref, deg)))) / length)
def main(): """Used to test the Archipelago class. Tests 3 random seeds between 0 and 65535 with weathering values of 1, 3, 5 and sea_level values between -20 and 32 in steps of 4. """ t = TicToc() t.tic() args = [] for seed in random.sample(range(0, int("0xFFFF", 16)), 3): for weathering in [1, 3, 5]: for sea_level in range(-20, 32, 4): sea_level = sea_level / 100 # range() can't be used to generate a list of floats args.append([seed, weathering, sea_level]) pool = Pool(multiprocessing.cpu_count()) print("Total archipelagos being generated:", len(args)) pool.starmap(test, args) pool.close() pool.join() t.toc() print("Total time elapsed (seconds): {0:.2f}".format(t.elapsed))
def train_all_models_lgb_combined(combined_model_name, models_with_folds): X_all_combined = [] y_all_combined = [] requests = [] results = [] for model_with_folds in models_with_folds: for model_name, fold in model_with_folds: requests.append((model_name, fold)) # results.append(load_one_model(requests[-1])) pool = Pool(40) with utils.timeit_context('load all data'): results = pool.starmap(load_train_data, requests) for model_with_folds in models_with_folds: X_combined = [] y_combined = [] for model_name, fold in model_with_folds: X, y, video_ids = results[requests.index((model_name, fold))] print(model_name, fold, X.shape) X_combined.append(X) y_combined.append(y) X_all_combined.append(np.row_stack(X_combined)) y_all_combined.append(np.row_stack(y_combined)) X = np.column_stack(X_all_combined) y = y_all_combined[0] print(X.shape, y.shape) y_cat = np.argmax(y, axis=1) print(X.shape, y.shape) print(np.unique(y_cat)) with utils.timeit_context('fit'): param = { 'num_leaves': 50, 'objective': 'multiclass', 'max_depth': 5, 'learning_rate': .05, 'max_bin': 300, 'num_class': NB_CAT, 'metric': ['multi_logloss'] } model = lgb.train(param, lgb.Dataset(X, label=y_cat), num_boost_round=260) pickle.dump( model, open(f"../output/lgb_combined_{combined_model_name}.pkl", "wb"))
def _process(cls, spectrum, filter_spectrum, *args): """This private class method process spectrum with a given filter and parameters. .. note:: If no filter spectrum is provided, then it passes None as filter spectrum point to all processes. This method uses multiprocessing. """ if filter_spectrum: resampled_filter_spectrum = filter_spectrum.resample(spectrum) resampled_filter_lines = resampled_filter_spectrum.lines else: resampled_filter_lines = repeat(None, len(spectrum.lines)) data = zip(spectrum.lines, resampled_filter_lines, repeat(args)) p = Pool() y_values = p.starmap(cls._func, data) p.close() p.join() lines = zip(spectrum.x_values, y_values) return type(spectrum)(lines, interpolation=spectrum.interpolation)
article = Article(URL_PREFIX + "/wiki/Special:Export/Template:Periodic_table") categories = [] params = [] for row in article.get_table("table 1"): for key, value in row.items(): segments = [segment.strip() for segment in value.split(";")] if len(segments) >= 7: if segments[5].lower() not in categories: categories.append(segments[5].lower()) params.append( ( segments[1], segments[7].replace(" ", "_") if len(segments) > 7 else segments[1].capitalize(), ionization_energies, element_names, categories.index(segments[5].lower()), ) ) pool = Pool(processes=multiprocessing.cpu_count() * 2) json_data = pool.starmap(parse, params) pool.close() pool.join() # Save with open(OUTPUT_JSON, "w+") as outfile: json.dump(json_data, outfile, sort_keys=True, indent=4, ensure_ascii=False)