elif 'ServerNotFoundError' in e.args[0] or \ 'Unable to find the server' in e.args[0] or \ 'getaddrinfo failed' in e.args[0] or \ 'connection attempt failed' in e.args[0]: log.lprint('Waiting 30 secs...'.format(str(wait))) time.sleep(wait) continue else: continue # all extracted dictionaries to file if not Handler(OUTFILE).file_exists(): Handler.write_to_csv(temp_dicts, header=True, append=False, outfile=OUTFILE) else: Handler.write_to_csv(temp_dicts, header=False, append=True, outfile=OUTFILE) time2 = datetime.datetime.now() log.lprint( 'Time taken for site {s} ({ii} of {nn}): {t} seconds'.format( s=str(temp_dicts[0]['site']), ii=str(site_count + 1), nn=str(n_samp),
if len(num_list) > 1: md_vec = [md_vec[x] for x in num_list] # find all MD values that as less than cutoff percentile loc = list(i for i, x in enumerate(md_vec) if (x <= np.percentile(md_vec, md_cutoff) and x != np.nan)) out_samples += list(binned_samp_dicts[i] for i in loc) else: out_samples += binned_samp_dicts else: Opt.cprint('Too few samples for cleaning') out_samples += binned_samp_dicts Opt.cprint('After Mahalanobis dist removal of all samp above {} percentile: {}'.format(str(md_cutoff), str(len(out_samples)))) else: out_samples = out_list Handler.write_to_csv(out_samples, outfile) Opt.cprint('Done!')
res3 = ax1.hist(out_decid_frac_list, color='#FFA500', edgecolor='black', bins=int(nbins)) # save plot fig3.savefig(outdir + '/sample_distribution_plot_v{}.png'.format(version), bbox_inches='tight') ''' out_samp_index = list(range(len(out_decid_frac_samp))) np.random.shuffle(out_samp_index) out_decid_frac_samp = list(out_decid_frac_samp[i] for i in out_samp_index) # write csv file Handler.write_to_csv(out_decid_frac_samp, outfile) # write shp file--------- attribute_types = {'site': 'str', 'year': 'int', 'decid_frac': 'float'} trn_data = list() print('Total {} sites'.format(str(len(out_decid_frac_samp)))) ntrn = int((trn_perc * len(out_decid_frac_samp)) / 100.0) nval = len(out_decid_frac_samp) - ntrn # randomly select training samples based on number trn_sites = Sublist(range(len(out_decid_frac_samp))).random_selection(ntrn) # get the rest of samples as validation samples