Exemplo n.º 1
0
                elif 'ServerNotFoundError' in e.args[0] or \
                        'Unable to find the server' in e.args[0] or \
                        'getaddrinfo failed' in e.args[0] or \
                        'connection attempt failed' in e.args[0]:

                    log.lprint('Waiting 30 secs...'.format(str(wait)))
                    time.sleep(wait)
                    continue

            else:
                continue

        # all extracted dictionaries to file
        if not Handler(OUTFILE).file_exists():
            Handler.write_to_csv(temp_dicts,
                                 header=True,
                                 append=False,
                                 outfile=OUTFILE)

        else:
            Handler.write_to_csv(temp_dicts,
                                 header=False,
                                 append=True,
                                 outfile=OUTFILE)

        time2 = datetime.datetime.now()

        log.lprint(
            'Time taken for site {s} ({ii} of {nn}): {t} seconds'.format(
                s=str(temp_dicts[0]['site']),
                ii=str(site_count + 1),
                nn=str(n_samp),
                if len(num_list) > 1:

                    md_vec = [md_vec[x] for x in num_list]

                    # find all MD values that as less than cutoff percentile
                    loc = list(i for i, x in enumerate(md_vec)
                               if (x <= np.percentile(md_vec, md_cutoff) and x != np.nan))

                    out_samples += list(binned_samp_dicts[i] for i in loc)

                else:
                    out_samples += binned_samp_dicts

            else:

                Opt.cprint('Too few samples for cleaning')

                out_samples += binned_samp_dicts

        Opt.cprint('After Mahalanobis dist removal of all samp above {} percentile: {}'.format(str(md_cutoff),
                                                                                               str(len(out_samples))))
    else:
        out_samples = out_list

    Handler.write_to_csv(out_samples, outfile)

    Opt.cprint('Done!')



Exemplo n.º 3
0
    res3 = ax1.hist(out_decid_frac_list,
                    color='#FFA500',
                    edgecolor='black',
                    bins=int(nbins))

    # save plot
    fig3.savefig(outdir + '/sample_distribution_plot_v{}.png'.format(version), bbox_inches='tight')
    '''
    out_samp_index = list(range(len(out_decid_frac_samp)))
    np.random.shuffle(out_samp_index)

    out_decid_frac_samp = list(out_decid_frac_samp[i] for i in out_samp_index)

    # write csv file
    Handler.write_to_csv(out_decid_frac_samp, outfile)

    # write shp file---------
    attribute_types = {'site': 'str', 'year': 'int', 'decid_frac': 'float'}

    trn_data = list()

    print('Total {} sites'.format(str(len(out_decid_frac_samp))))

    ntrn = int((trn_perc * len(out_decid_frac_samp)) / 100.0)
    nval = len(out_decid_frac_samp) - ntrn

    # randomly select training samples based on number
    trn_sites = Sublist(range(len(out_decid_frac_samp))).random_selection(ntrn)

    # get the rest of samples as validation samples