def plot_obs_pred(obs, pred, radius, loglog, ax = None, inset = False, sites = None):
    """Generic function to generate an observed vs predicted figure with 1:1 line"""
    if not ax:
        fig = plt.figure(figsize = (3.5, 3.5))
        ax = plt.subplot(111)

    axis_min = 0.9 * min(list(obs[obs > 0]) + list(pred[pred > 0]))
    if loglog:
        axis_max = 3 * max(list(obs)+list(pred))
    else:
        axis_max = 1.1 * max(list(obs)+list(pred))
    macroecotools.plot_color_by_pt_dens(np.array(pred), np.array(obs), radius, loglog=loglog, plot_obj = ax)      
    plt.plot([axis_min, axis_max],[axis_min, axis_max], 'k-')
    plt.xlim(axis_min, axis_max)
    plt.ylim(axis_min, axis_max)
    ax.tick_params(axis = 'both', which = 'major', labelsize = 6)
    if loglog:
        plt.annotate(r'$R^2$ = %0.2f' %macroecotools.obs_pred_rsquare(np.log10(obs[(obs != 0) * (pred != 0)]), np.log10(pred[(obs != 0) * (pred != 0)])),
                     xy = (0.05, 0.85), xycoords = 'axes fraction', fontsize = 7)
    else:
        plt.annotate(r'$R^2$ = %0.2f' %macroecotools.obs_pred_rsquare(obs, pred),
                     xy = (0.05, 0.85), xycoords = 'axes fraction', fontsize = 7)
    if inset:
        axins = inset_axes(ax, width="30%", height="30%", loc=4)
        if loglog:
            hist_mete_r2(sites[(obs != 0) * (pred != 0)], np.log10(obs[(obs != 0) * (pred != 0)]), 
                         np.log10(pred[(obs != 0) * (pred != 0)]))
        else:
            hist_mete_r2(sites, obs, pred)
        plt.setp(axins, xticks=[], yticks=[])
    return ax
def bootstrap_SDR(name_site_combo, model, in_dir = './data/', out_dir = './out_files/', Niter = 200):
    """A general function of bootstrapping for ISD applying to all four models. 
    
    Inputs:
    name_site_combo: a list with dat_name and site
    model - takes one of four values 'ssnt_0', 'ssnt_1', 'asne', or 'agsne'
    in_dir - directory of raw data
    out_dir - directory used both in input (obs_pred.csv file) and output 
    Niter - number of bootstrap samples
    
    Output:
    Writes to one file on disk for R^2.
    
    """
    dat_name, site = name_site_combo
    dat = wk.import_raw_data(in_dir + dat_name + '.csv')
    dat_site = dat[dat['site'] == site]
    dat_clean = clean_data_agsne(dat_site)    
    G, S, N, E = get_GSNE(dat_clean)
    lambda1, beta, lambda3 = agsne.get_agsne_lambdas(G, S, N, E)
    
    par_list = []
    for sp in np.unique(dat_clean['sp']):
        dat_sp = dat_clean[dat_clean['sp'] == sp]
        n = len(dat_sp)
        genus_sp = dat_sp['genus'][0]
        m = len(np.unique(dat_clean[dat_clean['genus'] == genus_sp]['sp']))
        par_list.append([m, n])
        
    pred_obs = wk.import_obs_pred_data(out_dir + dat_name + '_obs_pred_sdr_' + model + '.csv')
    pred = pred_obs[pred_obs['site'] == site]['pred']
    obs = pred_obs[pred_obs['site'] == site]['obs'] 
    out_list_rsquare = [dat_name, site, str(mtools.obs_pred_rsquare(np.log10(obs), np.log10(pred)))]
    
    iisd_agsne = mete_distributions.theta_agsne([G, S, N, E], [lambda1, beta, lambda3, agsne.agsne_lambda3_z(lambda1, beta, S) / lambda3])
    iisd_asne = mete_distributions.theta_epsilon(S, N, E)
    dbh_scaled = np.array(dat_clean['dbh'] / min(dat_clean['dbh']))
    iisd_ssnt_0 = ssnt_isd_bounded(1, N / (sum(dbh_scaled ** 1) - N))
    iisd_ssnt_1 = ssnt_isd_bounded(2/3, N / (sum(dbh_scaled ** (2/3)) - N))
    dist_for_model = {'ssnt_0': iisd_ssnt_0, 'ssnt_1': iisd_ssnt_1, 'asne': iisd_asne, 'agsne': iisd_agsne}
    dist = dist_for_model[model]
        
    for i in range(Niter):
        if model in ['ssnt_0', 'ssnt_1']: obs_boot = np.array([np.mean((dist.rvs(par[1])) ** 2) for par in par_list]) # Here par[1] is n for each species
        elif model == 'asne': 
            obs_boot = np.array([np.mean(np.array(dist.rvs(par[1], par[1]))) for par in par_list])
        else:
            obs_boot = np.array([np.mean(np.array(dist.rvs(par[1], par[1], par[0]))) for par in par_list])
        out_list_rsquare.append(str(mtools.obs_pred_rsquare(np.log10(obs_boot), np.log10(pred))))
    
    wk.write_to_file(out_dir + 'SDR_bootstrap_' + model + '_rsquare.txt', ",".join(str(x) for x in out_list_rsquare))
Пример #3
0
def fig5(SADModels):
    """ This function generates a 2x2 figure, with these subplots:
        One subplot for each model:
            r-squared vs. N
            list of r-squared values and list of Ns
            plotted against each other
    """
    fig = plt.figure()

    for i, model in enumerate(SADModels):

        fig.add_subplot(2, 2, i+1)

        obs_pred_data = import_obs_pred_data(mydir + '/Results/' + model + '.txt')
        obs = ((obs_pred_data["obs"]))
        pred = ((obs_pred_data["pred"]))
        site = ((obs_pred_data["site"]))

        obs_data = []
        pred_data = []

        for sites in np.unique(site):
            obs_data.append(obs[sites==site])
            pred_data.append(pred[sites==site])

        Ns = []
        r2s = []

        for j, sad in enumerate(obs_data):

            r2 = macroecotools.obs_pred_rsquare(np.array(sad), np.array(pred_data[j]))
            r2s.append(r2)
            N = sum(sad) # Find Total Abundance
            Ns.append(N)

        plt.scatter(np.log(Ns).tolist(), r2s, color='Maroon', label=model, alpha = 0.5) # label is for the legend
        plt.xlabel('Log Abundance', fontsize=8)
        plt.ylabel('Rsquared Value', fontsize=8)
        plt.subplots_adjust(wspace = .35, hspace = .35)
        plt.axhline(y = 0)
        
        if model == 'SimBrokenStick':
            plt.title("Broken Stick R^2 v N", fontsize = 10)

        elif model == 'SimLogNormInt':
            plt.title("Log Norm R^2 v N", fontsize = 10)

        elif model == 'SimpleRandomFraction':
            plt.title("Random Fraction R^2 v N", fontsize = 10)

        elif model == 'SimParetoInt':
            plt.title("Pareto Int R^2 v N", fontsize = 10)

        print model + ': Done'

    
        # insert code to plot a legend
    plt.savefig('/Users/Nathan_Hillis/GitHub/SADModels/Results/R2vN.png', dpi=600, bbox_inches = 'tight', pad_inches=0.03)
    plt.show()
    return
def bootstrap_SAD(name_site_combo, model, in_dir = './data/', out_dir = './out_files/', Niter = 200):
    """A general function of bootstrapping for SAD applying to all four models. 
    
    Inputs:
    name_site_combo: a list with dat_name and site
    model - takes one of four values 'ssnt_0', 'ssnt_1', 'asne', or 'agsne'
    in_dir - directory of raw data
    out_dir - directory used both in input (obs_pred.csv file) and output 
    Niter - number of bootstrap samples
    
    Output:
    Writes to disk, with one file for R^2 and one for KS statistic.
    
    """
    dat_name, site = name_site_combo
    dat = wk.import_raw_data(in_dir + dat_name + '.csv')
    dat_site = dat[dat['site'] == site]
    dat_clean = clean_data_agsne(dat_site)    
    G, S, N, E = get_GSNE(dat_clean)
    beta_ssnt = mete.get_beta(S, N, version = 'untruncated')
    beta_asne = mete.get_beta(S, N)
    lambda1, beta, lambda3 = agsne.get_agsne_lambdas(G, S, N, E)
    sad_agsne = mete_distributions.sad_agsne([G, S, N, E], [lambda1, beta, lambda3, agsne.agsne_lambda3_z(lambda1, beta, S) / lambda3])
    dist_for_model = {'ssnt_0': stats.logser(np.exp(-beta_ssnt)), 
                      'ssnt_1': stats.logser(np.exp(-beta_ssnt)), 
                      'asne': md.trunc_logser(np.exp(-beta_asne), N),
                      'agsne': sad_agsne}
    dist = dist_for_model[model]
    pred_obs = wk.import_obs_pred_data(out_dir + dat_name + '_obs_pred_rad_' + model + '.csv')
    pred = pred_obs[pred_obs['site'] == site]['pred'][::-1]
    obs = pred_obs[pred_obs['site'] == site]['obs'][::-1]
    
    out_list_rsquare = [dat_name, site, str(mtools.obs_pred_rsquare(np.log10(obs), np.log10(pred)))]
    emp_cdf = mtools.get_emp_cdf(obs)
    out_list_ks = [dat_name, site, str(max(abs(emp_cdf - np.array([dist.cdf(x) for x in obs]))))]
    
    for i in range(Niter):
        obs_boot = np.array(sorted(dist.rvs(S)))
        cdf_boot = np.array([dist.cdf(x) for x in obs_boot])
        emp_cdf_boot = mtools.get_emp_cdf(obs_boot)
        out_list_rsquare.append(str(mtools.obs_pred_rsquare(np.log10(obs_boot), np.log10(pred))))
        out_list_ks.append(str(max(abs(emp_cdf_boot - np.array(cdf_boot)))))
    
    wk.write_to_file(out_dir + 'SAD_bootstrap_' + model + '_rsquare.txt', ",".join(str(x) for x in out_list_rsquare))
    wk.write_to_file(out_dir + 'SAD_bootstrap_' + model + '_ks.txt', ",".join(str(x) for x in out_list_ks))
Пример #5
0
def obs_pred_r2_analysis(datasets, data_dir='./data/'):
    """Calculated the coefficients of determination for the METE SAD"""
    for i, dataset in enumerate(datasets):
        obs_pred_data = import_obs_pred_data(data_dir + dataset + '_obs_pred.csv')
        obs = ((obs_pred_data["obs"]))
        pred = ((obs_pred_data["pred"]))
        loglog_R2 = macroecotools.obs_pred_rsquare(np.log10(obs),
                                               np.log10(pred))
        print("%s: log-log R^2 = %s" % (dataset.upper(), loglog_R2))  
Пример #6
0
def obs_pred_r2_multi(methods, datasets, data_dir='/home/kenlocey/data1/'): # TAKEN FROM THE mete_sads.py script
    print 'generating 1:1 line R-square values for dataset(s)'

    for i, dataset in enumerate(datasets):
        for j, method in enumerate(methods):
            obs_pred_data = import_obs_pred_data(data_dir + dataset + '/' + dataset + '_obs_pred.txt')
            obs = ((obs_pred_data["obs"]))
            pred = ((obs_pred_data["pred"]))
            print method, dataset,' ', macroecotools.obs_pred_rsquare(np.log10(obs), np.log10(pred))
def plot_r2_comp(name_site_combo,
                 dat_dir='./out_files/',
                 out_fig_dir='./out_figs/'):
    """Plot r2 of the three patterns separately for each community."""
    models = ['asne', 'agsne', 'ssnt_0', 'ssnt_1']
    model_names = ['ASNE', 'AGSNE', 'SSNT_N', 'SSNT_M']
    patterns = ['rad', 'isd', 'sdr']
    pattern_names = ['SAD', 'ISD', 'SDR']
    col_list = ['b', '#787878', 'r']
    symbol_list = ['o', 's', '*']

    fig = plt.figure(figsize=(10.5, 3.5))
    for i, pattern in enumerate(patterns):
        r2_dic = {'asne': [], 'agsne': [], 'ssnt_0': [], 'ssnt_1': []}
        r2_list = []
        for j, model in enumerate(models):
            for dat_name, site in name_site_combo:
                pred_obs_model_pattern = wk.import_obs_pred_data(dat_dir +
                                                                 dat_name +
                                                                 '_obs_pred_' +
                                                                 pattern +
                                                                 '_' + model +
                                                                 '.csv')
                pred_obs_site = pred_obs_model_pattern[
                    pred_obs_model_pattern['site'] == site]
                r2 = mtools.obs_pred_rsquare(np.log10(pred_obs_site['obs']),
                                             np.log10(pred_obs_site['pred']))
                r2_dic[model].append(r2)
                r2_list.append(r2)

        ax = plt.subplot(1, 3, i + 1)
        for j in range(1, 4):
            model = models[j]
            plt.scatter(r2_dic['asne'],
                        r2_dic[model],
                        s=20,
                        marker=symbol_list[j - 1],
                        facecolors=col_list[j - 1],
                        edgecolors='none',
                        label=model_names[j])
        min_val, max_val = min(r2_list), max(r2_list)
        if min_val < 0: axis_min = 1.1 * min_val
        else: axis_min = 0.9 * min_val
        if max_val < 0: axis_max = 0.9 * max_val
        else: axis_max = 1.1 * max_val
        plt.plot([axis_min, axis_max], [axis_min, axis_max], 'k-')
        plt.xlim(axis_min, axis_max)
        plt.ylim(axis_min, axis_max)
        ax.tick_params(axis='both', which='major', labelsize=6)
        ax.set_xlabel(r'$R^2$ of ASNE', labelpad=4, size=10)
        ax.set_ylabel(r'$R^2$ of the other models', labelpad=4, size=10)
        ax.set_title(pattern_names[i], size=16)
        if i == 0: ax.legend(loc=2, prop={'size': 10})

    plt.subplots_adjust(left=0.08, wspace=0.3)
    plt.tight_layout()
    plt.savefig(out_fig_dir + 'r2_comp.png', dpi=400)
Пример #8
0
def obs_pred_r2_multi(methods, data_dir = mydir + '/results/'): 
    # TAKEN FROM THE mete_sads.py script
    print 'generating 1:1 line R-square values for dataset(s)'

    for j, method in enumerate(methods):
        obs_pred_data = import_obs_pred_data(data_dir + dataset + '/' + dataset + '_obs_pred.txt')
        obs = ((obs_pred_data["obs"]))
        pred = ((obs_pred_data["pred"]))
        print method,' ', macroecotools.obs_pred_rsquare(np.log10(obs), np.log10(pred))
Пример #9
0
def figSuppp(figname = 'SuppFig3', data_dir=mydir, radius=2):
    fig = plt.figure()
    plot_dim = 2
    count = 0

    IN_Obs_Pred = importData.import_NSR2_data(mydir + \
        'data/NSR2/Stratified/lognorm_pln_NSR2_stratify.txt')
    N = np.asarray(list(((IN_Obs_Pred["N"]))))
    S = np.asarray(list(((IN_Obs_Pred["S"]))))
    NmaxObs = np.asarray(list(((IN_Obs_Pred["NmaxObs"]))))
    NmaxPred = []
    SPred = []
    for i in range(len(N)):
        NmaxPred_i = importPredictS.predictS(N[i], NmaxObs[i], predictNmax=True).getNmax()
        SPred_i = importPredictS.predictS(N[i], NmaxObs[i], predictNmax=True).getS()
        NmaxPred.append(NmaxPred_i)
        SPred.append(SPred_i)
    NmaxPred = np.asarray(NmaxPred)
    SPred = np.asarray(SPred)
    toIteratePred = [NmaxPred, SPred]
    toIterateObs = [NmaxObs, S]
    for x in range(2):
        axis_min = 0
        axis_max = 2 * max(toIteratePred[x])
        #print plot_dim
        ax = fig.add_subplot(plot_dim-1, plot_dim, count+1)
        if x == 0:
            ax.set_title(r"$\mathbf{N_{max}}$")
        else:
            ax.set_title(r"$\mathbf{S}$")

        macroecotools.plot_color_by_pt_dens(toIteratePred[x], toIterateObs[x], radius, loglog=1,
                        plot_obj=plt.subplot(plot_dim-1,plot_dim,count+1))
        plt.plot([axis_min, axis_max],[axis_min, axis_max], 'k-')
        plt.xlim(axis_min, axis_max)
        plt.ylim(0, axis_max)
        r2_all = macroecotools.obs_pred_rsquare(np.log10(toIterateObs[x]), np.log10(toIteratePred[x]))
        r2text = r"${}^{{2}}_{{m}} = {:.{p}f} $".format('r',r2_all , p=2)
        plt.text(0.18, 0.93, r2text,  fontsize=10,
            horizontalalignment='center',
            verticalalignment='center',transform = ax.transAxes)
        plt.tick_params(axis='both', which='major', labelsize=7)
        plt.subplots_adjust(wspace=0.5, hspace=0.3)

        #axins = inset_axes(ax, width="30%", height="30%", loc=4)

        ax.set(adjustable='box-forced', aspect='equal')
        #plt.setp(axins, xticks=[], yticks=[])

        count += 1
    fig.text(0.50, 0.04, r'$Predicted$', ha='center', va='center')
    fig.text(0.05, 0.5, r'$Observed$', ha='center', va='center', rotation='vertical')
    fig_name = str(mydir + 'figures/' + figname + '.png')
    plt.savefig(fig_name, dpi=600)#, bbox_inches = 'tight')#, pad_inches=0)
    plt.close()
Пример #10
0
def hist_mete_r2(sites, obs, pred):
    """Generate a kernel density estimate of the r^2 values for obs-pred plots"""
    r2s = []
    for site in sites:
        obs_site = obs[sites==site]
        pred_site = pred[sites==site]
        r2 = macroecotools.obs_pred_rsquare(obs_site, pred_site)
        r2s.append(r2)
    hist_r2 = np.histogram(r2s, range=(0, 1))
    xvals = hist_r2[1] + (hist_r2[1][1] - hist_r2[1][0])
    xvals = xvals[0:len(xvals)-1]
    yvals = hist_r2[0] / len(r2s)
    plt.plot(xvals, yvals, 'k-', linewidth=2)
    plt.axis([0, 1, 0, 1])
Пример #11
0
def hist_mete_r2(sites, obs, pred):  # TAKEN FROM Macroecotools or the mete_sads.py script used for White et al. (2012)
    """Generate a kernel density estimate of the r^2 values for obs-pred plots"""
    r2s = []
    for site in np.unique(sites):
        obs_site = obs[sites==site]
        pred_site = pred[sites==site]
        r2 = macroecotools.obs_pred_rsquare(obs_site, pred_site)
        r2s.append(r2)

    hist_r2 = np.histogram(r2s, range=(0, 1))
    xvals = hist_r2[1] + (hist_r2[1][1] - hist_r2[1][0])
    xvals = xvals[0:len(xvals)-1]
    yvals = hist_r2[0]
    plt.plot(xvals, yvals, 'k-', linewidth=2)
    plt.axis([0, 1, 0, 1.1 * max(yvals)])
Пример #12
0
def hist_mete_r2(sites, obs, pred):  # TAKEN FROM Macroecotools or the mete_sads.py script used for White et al. (2012)
    """Generate a kernel density estimate of the r^2 values for obs-pred plots"""
    r2s = []
    for site in np.unique(sites):
        obs_site = obs[sites==site]
        pred_site = pred[sites==site]
        r2 = macroecotools.obs_pred_rsquare(obs_site, pred_site)
        r2s.append(r2)

    hist_r2 = np.histogram(r2s, range=(0, 1))
    xvals = hist_r2[1] + (hist_r2[1][1] - hist_r2[1][0])
    xvals = xvals[0:len(xvals)-1]
    yvals = hist_r2[0]
    plt.plot(xvals, yvals, 'k-', linewidth=2)
    plt.axis([0, 1, 0, 1.1 * max(yvals)])
Пример #13
0
def generate_obs_pred_data(datasets, methods):

    for method in methods:
        for dataset in datasets:

            gN = 0
            #OUT = open(mydir+'/data/'+method+'_'+dataset+'_obs_pred.txt','w+')
            IN = mydir+'/MicroMETE/data/'+dataset+'_SADs.txt'
            num_lines = sum(1 for line in open(IN))

            for line in open(IN):

                line = line.split()
                obs = map(int, line)
                obs = list([x for x in obs if x > 1])

                N = sum(obs)
                gN += N
                print N
                S = len(obs)

                if S < 10:
                    continue

                obs.sort()
                obs.reverse()
                print method, dataset, N, S, 'countdown:', num_lines,

                if method == 'geom': # Predicted geometric series
                    pred = predRADs.get_GeomSeries(N, S, False) # False mean no zeros allowed

                elif method == 'mete': # Predicted log-series
                    logSeries = mete.get_mete_rad(S, N)
                    pred = logSeries[0]

                r2 = macroecotools.obs_pred_rsquare(np.log10(obs), np.log10(pred))
                print " r2:", r2

                # write to file, by cite, observed and expected ranked abundances
                #for i, sp in enumerate(pred):
                #    print>> OUT, obs[i], pred[i]

                num_lines -= 1

            print 'N(HMP): ',gN
            #OUT.close()

        print dataset
Пример #14
0
def plot_obs_pred_sad(SADModels, data_dir, radius=2): 
    # TAKEN FROM THE mete_sads.py script used for White et al. (2012)
    # Used for Figure 3 Locey and White (2013)        ########################################################################################

    """Multiple obs-predicted plotter"""
    fig = plt.figure()

    for i, model in enumerate(SADModels):

        fig.add_subplot(2, 2, i+1)

        obs_pred_data = import_obs_pred_data(data_dir + model + '.txt') 
        site = ((obs_pred_data["site"]))
        obs = ((obs_pred_data["obs"]))
        pred = ((obs_pred_data["pred"]))

        axis_min = 0.5 * min(obs)
        axis_max = 2 * max(obs)

        macroecotools.plot_color_by_pt_dens(pred, obs, radius, loglog=1,
                        plot_obj=plt.subplot(2, 2, i+1))

        plt.plot([axis_min, axis_max],[axis_min, axis_max], 'k-')
        plt.xlim(axis_min, axis_max)
        plt.ylim(axis_min, axis_max)

        plt.tick_params(axis='both', which='major', labelsize=8)
        plt.subplots_adjust(wspace=0.5, hspace=0.3)

        r2 = macroecotools.obs_pred_rsquare(np.log10(obs), np.log10(pred))
        print model, r2

        # Create inset for histogram of site level r^2 values
        #axins = inset_axes(ax, width="30%", height="30%", loc=4)
        #hist_mete_r2(site, np.log10(obs), np.log10(pred))
        #plt.setp(axins, xticks=[], yticks=[])

        plt.title(model)
        #plt.text(1, 2000,  r'$R^2$' + '='+ str(round(r2,3)))
        plt.ylabel('Observed abundance',rotation='90',fontsize=12)
        plt.xlabel('Predicted abundance',fontsize=12)

    plt.savefig(mydir+'/Results/obs_pred_plots.png', dpi=600)#, bbox_inches = 'tight')#, pad_inches=0)
    plt.show()
def plot_r2_comp(name_site_combo, dat_dir = './out_files/', out_fig_dir = './out_figs/'):
    """Plot r2 of the three patterns separately for each community."""
    models = ['asne', 'agsne', 'ssnt_0', 'ssnt_1']
    model_names = ['ASNE', 'AGSNE', 'SSNT_N', 'SSNT_M']
    patterns = ['rad', 'isd', 'sdr']
    pattern_names = ['SAD', 'ISD', 'SDR']
    col_list = ['b', '#787878', 'r']
    symbol_list = ['o', 's', '*']
    
    fig = plt.figure(figsize = (10.5, 3.5))
    for i, pattern in enumerate(patterns):
        r2_dic = {'asne':[], 'agsne':[], 'ssnt_0':[], 'ssnt_1':[]}
        r2_list = []
        for j, model in enumerate(models):
            for dat_name, site in name_site_combo:
                pred_obs_model_pattern = wk.import_obs_pred_data(dat_dir + dat_name + '_obs_pred_' + pattern + '_' + model + '.csv')
                pred_obs_site = pred_obs_model_pattern[pred_obs_model_pattern['site'] == site]
                r2 = mtools.obs_pred_rsquare(np.log10(pred_obs_site['obs']), np.log10(pred_obs_site['pred']))
                r2_dic[model].append(r2)
                r2_list.append(r2)
        
        ax = plt.subplot(1, 3, i + 1)
        for j in range(1, 4):
            model = models[j]
            plt.scatter(r2_dic['asne'], r2_dic[model], s = 20, marker = symbol_list[j - 1], facecolors = col_list[j - 1], 
                        edgecolors = 'none', label = model_names[j])
        min_val, max_val = min(r2_list), max(r2_list)
        if min_val < 0: axis_min = 1.1 * min_val
        else: axis_min = 0.9 * min_val
        if max_val < 0: axis_max = 0.9 * max_val
        else: axis_max= 1.1 * max_val    
        plt.plot([axis_min, axis_max], [axis_min, axis_max], 'k-')     
        plt.xlim(axis_min, axis_max)
        plt.ylim(axis_min, axis_max)
        ax.tick_params(axis = 'both', which = 'major', labelsize = 6)
        ax.set_xlabel(r'$R^2$ of ASNE', labelpad = 4, size = 10)
        ax.set_ylabel(r'$R^2$ of the other models', labelpad = 4, size = 10)
        ax.set_title(pattern_names[i], size = 16)
        if i == 0: ax.legend(loc = 2, prop = {'size': 10})
        
    plt.subplots_adjust(left = 0.08, wspace = 0.3)
    plt.tight_layout()
    plt.savefig(out_fig_dir + 'r2_comp.png', dpi = 400)  
Пример #16
0
def create_null_dataset(Svals, Nvals, Niter, dataset_name, data_dir='./data/',
                        dic_filename='beta_lookup_table.pck', return_obs_pred=0):
    """Create simulated fits to uniform abundance distribution data
    
    Create list of coefficients of determination for simulated observed vs.
    predicted abundance relationships for a dataset. If these values are
    similar to those observed then the constraints alone largely determine the
    fit to the data. If they are weaker than the observed then the application
    of maximum entropy is also important.
    
    Svals : a list of values of observed species richnesses to match. Each row
            is a community (e.g., a site, year, etc.)
    Nvals : a list of values of observed community abundances to match. The
            ordering of rows should match that of Svals so that each row
            represents the S and N values for a single community.
    Niter: number of simulations
    dataset_name : short code that will indicate the name of the dataset in
                    the output file names
    data_dir : directory in which to store output    
    
    """
    resultfile = open(data_dir + dataset_name + '_sim_r2.csv', 'wb')
    out = csv.writer(resultfile, dialect = 'excel')
    dic_beta = mete.get_beta_dict(dic_filename)    
    for i in range(Niter):
        pool = multiprocessing.Pool()
        curried_args = itertools.izip(Svals, Nvals, itertools.repeat(dic_beta))
        site_sim_results = pool.map(sim_null_curry, curried_args)
        pool.close()
        
        sim_obs = []
        sim_pred = []
        for site in site_sim_results:
            sim_obs.extend((site[0]))
            sim_pred.extend((site[1]))
        r2 = macroecotools.obs_pred_rsquare(np.array(np.log10(sim_obs)),
                                       np.array(np.log10(sim_pred)))
        results = ((np.column_stack((i, r2))))
        out.writerows(results)
    resultfile.close()
    if return_obs_pred == 1:
        return sim_obs, sim_pred
Пример #17
0
def plot_obs_pred_sad(methods, datasets, data_dir='/home/kenlocey/data1/', radius=2): # TAKEN FROM THE mete_sads.py script used for White et al. (2012)
    # Used for Figure 3 Locey and White (2013)        ########################################################################################

    """Multiple obs-predicted plotter"""
    for i, dataset in enumerate(datasets):
        for j, method in enumerate(methods):

            #if method == 'mete' and dataset == 'EMP': continue

            obs_pred_data = import_obs_pred_data(mydir+'/data/truedata/'+method+'_'+dataset+'_obs_pred.txt')
            #site = ((obs_pred_data["site"]))
            obs = ((obs_pred_data["obs"]))
            pred = ((obs_pred_data["pred"]))

            axis_min = 0.5 * min(obs)
            axis_max = 2 * max(obs)

            macroecotools.plot_color_by_pt_dens(pred, obs, radius, loglog=1,
                            plot_obj=plt.subplot(2, 2, j+1))

            plt.plot([axis_min, axis_max],[axis_min, axis_max], 'k-')
            plt.xlim(axis_min, axis_max)
            plt.ylim(axis_min, axis_max)

            plt.tick_params(axis='both', which='major', labelsize=8)
            plt.subplots_adjust(wspace=0.5, hspace=0.3)

            r2 = macroecotools.obs_pred_rsquare(np.log10(obs), np.log10(pred))
            print method, dataset, r2

            #Create inset for histogram of site level r^2 values
            #axins = inset_axes(ax, width="30%", height="30%", loc=4)
            #hist_mete_r2(site, np.log10(obs), np.log10(pred))
            #plt.setp(axins, xticks=[], yticks=[])

            if method == 'mete': plt.title("Log-series")
            else: plt.title("Geometric series")
            plt.text(1, 2000,  r'$R^2$' + '='+ str(round(r2,3)))
            plt.ylabel('Observed abundance',rotation='90',fontsize=12)
            plt.xlabel('Predicted abundance',fontsize=12)
    plt.savefig(mydir+'/obs_pred_plots.png', dpi=600)#, bbox_inches = 'tight')#, pad_inches=0)
Пример #18
0
def plot_sim_results(datasets, colors, data_dir='./data/'):
    all_lowerbounds = {'bbs': -0.7, 'cbc': -2.3, 'fia': 0, 'gentry': 0, 'mcdb': -0.75, 'nabc': -0.5}
    lowerbounds = [all_lowerbounds[dataset] for dataset in datasets]
    fig = plt.figure()
    for i, dataset in enumerate(datasets):
        sim_data = import_sim_data(data_dir + dataset + '_sim_r2.csv')
        obs_pred_data = import_obs_pred_data(data_dir + dataset +
                                             '_obs_pred.csv')
        obs_r2 = macroecotools.obs_pred_rsquare(np.log10(obs_pred_data['obs']),
                                                np.log10(obs_pred_data['pred']))
        sim_kde = stats.kde.gaussian_kde(sim_data['r2'])
        xvals = np.arange(lowerbounds[i], 1, 0.01)
        yvals = sim_kde.evaluate(xvals)
        xvals = xvals[yvals > 0.000001]
        yvals = yvals[yvals > 0.000001]
        ax = fig.add_subplot(3,2,i+1)
        longdashes = [10,5]
        plot_obj, = plt.plot(xvals, yvals, 'k--', linewidth=2, color=colors[i])
        plot_obj.set_dashes(longdashes)
        plt.plot([obs_r2, obs_r2], [0, max(yvals)], color=colors[i], linewidth=2)
        plt.axis([lowerbounds[i], 1, 0, 1.1 * max(yvals)])
    plt.savefig('sim_results.png', dpi=400, bbox_inches = 'tight', pad_inches=0)
Пример #19
0
import numpy as np
import matplotlib.pylab as plt
import sys

from macroecotools import plot_color_by_pt_dens, obs_pred_rsquare

def plot_obs_pred(obs_pred_data, adj=0, dest_file='./obs_pred.png'):
    plot_color_by_pt_dens(obs_pred_data['pred'] + adj, obs_pred_data['obs'] + adj,
                          3, loglog=1)
    plt.loglog([min(obs_pred_data['pred'] + adj), max(obs_pred_data['pred'] + adj)], 
               [min(obs_pred_data['pred'] + adj), max(obs_pred_data['pred'] + adj)], 'k-')
    plt.savefig(dest_file, dpi = 400)

if len(sys.argv) > 1:
    datasets = [sys.argv[1]]
else:
    datasets = ['bbs_2012', 'bbs_2008_2012', 'cbc', 'gentry', 'naba']


for dataset in datasets:
    for datatype in ['fit', 'test']:
        for predtype in ['rad']:
            obs_pred_data = read_csv('./results/' + dataset + '_' + predtype+ '_' + datatype + '_obs_pred.csv')
            adj = 0
            log_pred = [log(float(i + adj)) for i in obs_pred_data['pred'].values]
            log_obs = [log(float(i + adj)) for i in obs_pred_data['obs'].values]                  
            print obs_pred_rsquare(np.array(log_pred), np.array(log_obs))
            fig_name = './figs/' + dataset + '_' + datatype +'_obs_pred_' + predtype + '.png'
            plot_obs_pred(obs_pred_data, adj=adj, dest_file=fig_name)

Пример #20
0
def Supp(figname = 'Supp', data_dir=mydir, radius=2):
    # TAKEN FROM THE mete_sads.py script used for White et al. (2012)
    # Used for Figure 3 Locey and White (2013)
    """Multiple obs-predicted plotter"""
    fig = plt.figure()
    count = 0

    plot_dim = 2
    IN_Obs_Pred = importData.import_obs_pred_data(mydir + \
        'data/ObsPred/Stratified/lognorm_75_25_obs_pred_stratify_test.txt')
    site = np.asarray(list(((IN_Obs_Pred["site"]))))
    obs = np.asarray(list(((IN_Obs_Pred["obs"]))))
    pred7525 = np.asarray(list(((IN_Obs_Pred["pred7525"]))))
    predPln = np.asarray(list(((IN_Obs_Pred["predPln"]))))
    toIterate = [pred7525, predPln]
    for x in range(2):
        axis_min = 0
        axis_max = 2 * max(obs)
        #print plot_dim
        ax = fig.add_subplot(plot_dim, plot_dim, count+1)
        if x == 0:
            ax.set_title(r"$\mathbf{75:25\, Simulation}$")
        else:
            ax.set_title(r"$\mathbf{Lognormal\, MLE}$")

        macroecotools.plot_color_by_pt_dens(toIterate[x], obs, radius, loglog=1,
                        plot_obj=plt.subplot(plot_dim,plot_dim,count+1))
        #
        #plt.text(0.1, 0.9,'matplotlib', ha='center', va='center', transform=ax.transAxes)


        plt.plot([axis_min, axis_max],[axis_min, axis_max], 'k-')

        plt.xlim(0, axis_max)
        plt.ylim(0, axis_max)
        #r2s = ((INh2["R2"]))
        #r2s = r2s.astype(float)
        # insert r2 of all data
        r2_all = macroecotools.obs_pred_rsquare(np.log10(obs), np.log10(toIterate[x]))
        r2text = r"${}^{{2}}_{{m}} = {:.{p}f} $".format('r',r2_all , p=2)

        plt.text(0.18, 0.93, r2text,  fontsize=10,
            horizontalalignment='center',
            verticalalignment='center',transform = ax.transAxes)
        plt.tick_params(axis='both', which='major', labelsize=7)
        plt.subplots_adjust(wspace=0.5, hspace=0.3)

        axins = inset_axes(ax, width="30%", height="30%", loc=4)

        #hist_r2 = np.histogram(r2s, range=(0, 1))
        #xvals = hist_r2[1] + (hist_r2[1][1] - hist_r2[1][0])
        #xvals = xvals[0:len(xvals)-1]
        #yvals = hist_r2[0]
        #plt.plot(xvals, yvals, 'k-', linewidth=2)
        #plt.axis([0, 1, 0, 1.1 * max(yvals)])
        ax.set(adjustable='box-forced', aspect='equal')
        #plt.setp(axins, xticks=[], yticks=[])

        count += 1
    fig.text(0.50, 0.04, r'$Predicted \; rank-abundance$', ha='center', va='center')
    fig.text(0.05, 0.5, r'$Observed \; rank-abundance$', ha='center', va='center', rotation='vertical')
    fig_name = str(mydir + 'figures/' + figname + '.png')
    plt.savefig(fig_name, dpi=600)#, bbox_inches = 'tight')#, pad_inches=0)
    plt.close()
def bootstrap_ISD(name_site_combo,
                  model,
                  in_dir='./data/',
                  out_dir='./out_files/',
                  Niter=200):
    """A general function of bootstrapping for ISD applying to all four models. 
    
    Inputs:
    name_site_combo: a list with dat_name and site
    model - takes one of four values 'ssnt_0', 'ssnt_1', 'asne', or 'agsne'
    in_dir - directory of raw data
    out_dir - directory used both in input (obs_pred.csv file) and output 
    Niter - number of bootstrap samples
    
    Output:
    Writes to disk, with one file for R^2 and one for KS statistic.
    
    """
    dat_name, site = name_site_combo
    dat = wk.import_raw_data(in_dir + dat_name + '.csv')
    dat_site = dat[dat['site'] == site]
    dat_clean = clean_data_agsne(dat_site)
    G, S, N, E = get_GSNE(dat_clean)
    lambda1, beta, lambda3 = agsne.get_agsne_lambdas(G, S, N, E)
    isd_agsne = mete_distributions.psi_agsne([G, S, N, E], [
        lambda1, beta, lambda3,
        agsne.agsne_lambda3_z(lambda1, beta, S) / lambda3
    ])
    isd_asne = mete_distributions.psi_epsilon_approx(S, N, E)
    dbh_scaled = np.array(dat_clean['dbh'] / min(dat_clean['dbh']))
    isd_ssnt_0 = ssnt_isd_bounded(1, N / (sum(dbh_scaled**1) - N))
    isd_ssnt_1 = ssnt_isd_bounded(2 / 3, N / (sum(dbh_scaled**(2 / 3)) - N))
    dist_for_model = {
        'ssnt_0': isd_ssnt_0,
        'ssnt_1': isd_ssnt_1,
        'asne': isd_asne,
        'agsne': isd_agsne
    }
    dist = dist_for_model[model]
    pred_obs = wk.import_obs_pred_data(out_dir + dat_name + '_obs_pred_isd_' +
                                       model + '.csv')
    pred = pred_obs[pred_obs['site'] == site]['pred']
    obs = pred_obs[pred_obs['site'] == site]['obs']

    out_list_rsquare = [
        dat_name, site,
        str(mtools.obs_pred_rsquare(np.log10(obs), np.log10(pred)))
    ]
    wk.write_to_file(out_dir + 'ISD_bootstrap_' + model + '_rsquare.txt',
                     ",".join(str(x) for x in out_list_rsquare),
                     new_line=False)
    emp_cdf = mtools.get_emp_cdf(obs)
    out_list_ks = [
        dat_name, site,
        str(max(abs(emp_cdf - np.array([dist.cdf(x) for x in obs]))))
    ]
    wk.write_to_file(out_dir + 'ISD_bootstrap_' + model + '_ks.txt',
                     ",".join(str(x) for x in out_list_ks),
                     new_line=False)

    num_pools = 8  # Assuming that 8 pools are to be created
    for i in xrange(Niter):
        obs_boot = []
        cdf_boot = []
        while len(obs_boot) < N:
            pool = multiprocessing.Pool(num_pools)
            out_sample = pool.map(wk.generate_isd_sample,
                                  [dist for j in xrange(num_pools)])
            for combo in out_sample:
                cdf_sublist, sample_sublist = combo
                obs_boot.extend(sample_sublist)
                cdf_boot.extend(cdf_sublist)
            pool.close()
            pool.join()
        if model in ['asne', 'agsne']:
            obs_boot = np.sort(obs_boot[:N])**0.5  # Convert to diameter
        else:
            obs_boot = np.sort(obs_boot[:N])
        sample_rsquare = mtools.obs_pred_rsquare(np.log10(obs_boot),
                                                 np.log10(pred))
        sample_ks = max(abs(emp_cdf - np.sort(cdf_boot[:N])))

        wk.write_to_file(out_dir + 'ISD_bootstrap_' + model + '_rsquare.txt',
                         "".join([',', str(sample_rsquare)]),
                         new_line=False)
        wk.write_to_file(out_dir + 'ISD_bootstrap_' + model + '_ks.txt',
                         "".join([',', str(sample_ks)]),
                         new_line=False)

    wk.write_to_file(out_dir + 'ISD_bootstrap_' + model + '_rsquare.txt', '\t')
    wk.write_to_file(out_dir + 'ISD_bootstrap_' + model + '_ks.txt', '\t')
    wk.write_to_file(out_dir + 'ISD_bootstrap_' + model + '_ks.txt',
                     ",".join(str(x) for x in out_list_ks))
def bootstrap_SAD(name_site_combo,
                  model,
                  in_dir='./data/',
                  out_dir='./out_files/',
                  Niter=200):
    """A general function of bootstrapping for SAD applying to all four models. 
    
    Inputs:
    name_site_combo: a list with dat_name and site
    model - takes one of four values 'ssnt_0', 'ssnt_1', 'asne', or 'agsne'
    in_dir - directory of raw data
    out_dir - directory used both in input (obs_pred.csv file) and output 
    Niter - number of bootstrap samples
    
    Output:
    Writes to disk, with one file for R^2 and one for KS statistic.
    
    """
    dat_name, site = name_site_combo
    dat = wk.import_raw_data(in_dir + dat_name + '.csv')
    dat_site = dat[dat['site'] == site]
    dat_clean = clean_data_agsne(dat_site)
    G, S, N, E = get_GSNE(dat_clean)
    beta_ssnt = mete.get_beta(S, N, version='untruncated')
    beta_asne = mete.get_beta(S, N)
    lambda1, beta, lambda3 = agsne.get_agsne_lambdas(G, S, N, E)
    sad_agsne = mete_distributions.sad_agsne([G, S, N, E], [
        lambda1, beta, lambda3,
        agsne.agsne_lambda3_z(lambda1, beta, S) / lambda3
    ])
    dist_for_model = {
        'ssnt_0': stats.logser(np.exp(-beta_ssnt)),
        'ssnt_1': stats.logser(np.exp(-beta_ssnt)),
        'asne': md.trunc_logser(np.exp(-beta_asne), N),
        'agsne': sad_agsne
    }
    dist = dist_for_model[model]
    pred_obs = wk.import_obs_pred_data(out_dir + dat_name + '_obs_pred_rad_' +
                                       model + '.csv')
    pred = pred_obs[pred_obs['site'] == site]['pred'][::-1]
    obs = pred_obs[pred_obs['site'] == site]['obs'][::-1]

    out_list_rsquare = [
        dat_name, site,
        str(mtools.obs_pred_rsquare(np.log10(obs), np.log10(pred)))
    ]
    emp_cdf = mtools.get_emp_cdf(obs)
    out_list_ks = [
        dat_name, site,
        str(max(abs(emp_cdf - np.array([dist.cdf(x) for x in obs]))))
    ]

    for i in range(Niter):
        obs_boot = np.array(sorted(dist.rvs(S)))
        cdf_boot = np.array([dist.cdf(x) for x in obs_boot])
        emp_cdf_boot = mtools.get_emp_cdf(obs_boot)
        out_list_rsquare.append(
            str(mtools.obs_pred_rsquare(np.log10(obs_boot), np.log10(pred))))
        out_list_ks.append(str(max(abs(emp_cdf_boot - np.array(cdf_boot)))))

    wk.write_to_file(out_dir + 'SAD_bootstrap_' + model + '_rsquare.txt',
                     ",".join(str(x) for x in out_list_rsquare))
    wk.write_to_file(out_dir + 'SAD_bootstrap_' + model + '_ks.txt',
                     ",".join(str(x) for x in out_list_ks))
Пример #23
0
def fig4(figname="Fig4", data_dir=mydir, radius=2, saveAs="eps"):
    fig = plt.figure()
    fig.subplots_adjust(bottom=0.15)
    plot_dim = 1
    count = 0

    IN_Obs_Pred = importData.import_NSR2_data(mydir + "data/NSR2/Stratified/lognorm_pln_NSR2_stratify.txt")
    N = np.asarray(list(((IN_Obs_Pred["N"]))))
    S = np.asarray(list(((IN_Obs_Pred["S"]))))
    NmaxObs = np.asarray(list(((IN_Obs_Pred["NmaxObs"]))))
    models = ["geom", "lognorm", "mete", "zipf"]
    modelSlopes = [0.647520323289, 0.942904468437, 0.769214774397, 0.954497727096]
    modelInterepts = [0.116508916992, 0.292527611072, 0.19240314275, 0.189954627996]
    for g, model in enumerate(models):
        NmaxPred = []
        SPred = []
        for i in range(len(N)):
            NmaxPred_i = mo.predictS(N[i], NmaxObs[i], predictNmax=True).getNmax(
                b=modelInterepts[g], slope=modelSlopes[g]
            )
            SPred_i = mo.predictS(N[i], NmaxObs[i], predictNmax=True).getS()
            NmaxPred.append(NmaxPred_i)
            SPred.append(SPred_i)
        NmaxPred = np.asarray(NmaxPred)
        SPred = np.asarray(SPred)
        axis_min = 0
        axis_max = 2 * max(NmaxObs)
        ax = fig.add_subplot(2, 2, count + 1)
        if model == "zipf":
            OUT2 = importData.import_NSR2_data(
                data_dir + "data/NSR2/Stratified_Test/" + model + "_mle_NSR2_stratify.txt"
            )
        elif model == "lognorm":
            OUT2 = importData.import_NSR2_data(
                data_dir + "data/NSR2/Stratified_Test/" + model + "_pln_NSR2_stratify.txt"
            )
        else:
            OUT2 = importData.import_NSR2_data(data_dir + "data/NSR2/Stratified_Test/" + model + "_NSR2_stratify.txt")

        NmaxObs_BS = np.asarray(list(((OUT2["NmaxObs"]))))
        NmaxPred_BS = np.asarray(list(((OUT2["NmaxPred"]))))

        if model == "geom":
            ax.set_title("Broken-stick")
        elif model == "lognorm":
            ax.set_title("Lognormal")
        elif model == "mete":
            ax.set_title("Log-series")
        elif model == "zipf":
            ax.set_title("Zipf")
        macroecotools.plot_color_by_pt_dens(NmaxPred, NmaxObs, radius, loglog=1, plot_obj=plt.subplot(2, 2, count + 1))
        plt.plot([axis_min, axis_max], [axis_min, axis_max], "k-")
        plt.xlim(axis_min, axis_max)
        plt.ylim(0, axis_max)
        r2_all = macroecotools.obs_pred_rsquare(np.log10(NmaxObs), np.log10(NmaxPred))
        r2text = r"${}^{{2}}_{{m}} = {:.{p}f} $".format("r", r2_all, p=2)
        plt.text(
            0.72,
            0.12,
            r2text,
            fontsize=13,
            horizontalalignment="center",
            verticalalignment="center",
            transform=ax.transAxes,
        )
        plt.tick_params(axis="both", which="major", labelsize=12)
        plt.subplots_adjust(wspace=0.00001, hspace=0.3)
        ax.set(adjustable="box-forced", aspect="equal")

        count += 1
    fig.text(0.50, 0.055, "Predicted, " + r"$log_{10}(N_{max})$", ha="center", va="center", fontsize=19)
    fig.text(
        0.09, 0.5, "Observed, " + r"$log_{10}(N_{max})$", ha="center", va="center", rotation="vertical", fontsize=19
    )
    fig_name = str(mydir + "figures/" + figname + "_RGB." + saveAs)
    plt.savefig(fig_name, dpi=600, format=saveAs)  # , bbox_inches = 'tight')#, pad_inches=0)
    plt.close()
def bootstrap_SDR(name_site_combo,
                  model,
                  in_dir='./data/',
                  out_dir='./out_files/',
                  Niter=200):
    """A general function of bootstrapping for ISD applying to all four models. 
    
    Inputs:
    name_site_combo: a list with dat_name and site
    model - takes one of four values 'ssnt_0', 'ssnt_1', 'asne', or 'agsne'
    in_dir - directory of raw data
    out_dir - directory used both in input (obs_pred.csv file) and output 
    Niter - number of bootstrap samples
    
    Output:
    Writes to one file on disk for R^2.
    
    """
    dat_name, site = name_site_combo
    dat = wk.import_raw_data(in_dir + dat_name + '.csv')
    dat_site = dat[dat['site'] == site]
    dat_clean = clean_data_agsne(dat_site)
    G, S, N, E = get_GSNE(dat_clean)
    lambda1, beta, lambda3 = agsne.get_agsne_lambdas(G, S, N, E)

    par_list = []
    for sp in np.unique(dat_clean['sp']):
        dat_sp = dat_clean[dat_clean['sp'] == sp]
        n = len(dat_sp)
        genus_sp = dat_sp['genus'][0]
        m = len(np.unique(dat_clean[dat_clean['genus'] == genus_sp]['sp']))
        par_list.append([m, n])

    pred_obs = wk.import_obs_pred_data(out_dir + dat_name + '_obs_pred_sdr_' +
                                       model + '.csv')
    pred = pred_obs[pred_obs['site'] == site]['pred']
    obs = pred_obs[pred_obs['site'] == site]['obs']
    out_list_rsquare = [
        dat_name, site,
        str(mtools.obs_pred_rsquare(np.log10(obs), np.log10(pred)))
    ]

    iisd_agsne = mete_distributions.theta_agsne([G, S, N, E], [
        lambda1, beta, lambda3,
        agsne.agsne_lambda3_z(lambda1, beta, S) / lambda3
    ])
    iisd_asne = mete_distributions.theta_epsilon(S, N, E)
    dbh_scaled = np.array(dat_clean['dbh'] / min(dat_clean['dbh']))
    iisd_ssnt_0 = ssnt_isd_bounded(1, N / (sum(dbh_scaled**1) - N))
    iisd_ssnt_1 = ssnt_isd_bounded(2 / 3, N / (sum(dbh_scaled**(2 / 3)) - N))
    dist_for_model = {
        'ssnt_0': iisd_ssnt_0,
        'ssnt_1': iisd_ssnt_1,
        'asne': iisd_asne,
        'agsne': iisd_agsne
    }
    dist = dist_for_model[model]

    for i in range(Niter):
        if model in ['ssnt_0', 'ssnt_1']:
            obs_boot = np.array([
                np.mean((dist.rvs(par[1]))**2) for par in par_list
            ])  # Here par[1] is n for each species
        elif model == 'asne':
            obs_boot = np.array([
                np.mean(np.array(dist.rvs(par[1], par[1]))) for par in par_list
            ])
        else:
            obs_boot = np.array([
                np.mean(np.array(dist.rvs(par[1], par[1], par[0])))
                for par in par_list
            ])
        out_list_rsquare.append(
            str(mtools.obs_pred_rsquare(np.log10(obs_boot), np.log10(pred))))

    wk.write_to_file(out_dir + 'SDR_bootstrap_' + model + '_rsquare.txt',
                     ",".join(str(x) for x in out_list_rsquare))
Пример #25
0
# Force matplotlib to not use any Xwindows backend.
matplotlib.use('Agg')

from pandas import read_csv
from math import log
import numpy as np
import matplotlib.pylab as plt
import sys

from macroecotools import plot_color_by_pt_dens, obs_pred_rsquare

def plot_obs_pred(obs_pred_data, dest_file='./obs_pred.png'):
    plot_color_by_pt_dens(obs_pred_data['pred'], obs_pred_data['obs'], 3, loglog=1)
    plt.loglog([min(obs_pred_data['pred']), max(obs_pred_data['pred'])], 
               [min(obs_pred_data['pred']), max(obs_pred_data['pred'])], 'k-')
    plt.savefig(dest_file, dpi = 400)    

if len(sys.argv) > 1:
    datasets = [sys.argv[1]]
else:
    datasets = ['bbs_2012', 'bbs_2008_2012', 'cbc', 'fia', 'gentry', 'naba']

for dataset in datasets:
    for datatype in ['fit', 'test']:
        for predtype in ['rare']:
            obs_pred_data = read_csv('./results/' + dataset + '_' + predtype+ '_' + datatype + '_obs_pred.csv')
            print obs_pred_rsquare(np.array(obs_pred_data['obs'].values), np.array(obs_pred_data['pred'].values))
            fig_name = './figs/' + dataset + '_' + datatype +'_obs_pred_rarity.png'
            plot_obs_pred(obs_pred_data, dest_file=fig_name)

def fig4(figname = 'Fig4', data_dir=mydir, radius=1.5, saveAs = 'png'):
    fig = plt.figure()
    fig.subplots_adjust(bottom= 0.15)
    plot_dim = 1
    count = 0
    models = ['geom', 'lognorm', 'mete', 'zipf']
    #modelSlopes = [0.647520323289, 0.942904468437, 0.769214774397, 0.954497727096]
    #modelInterepts = [0.116508916992, 0.292527611072, 0.19240314275, 0.189954627996]
    modelSlopes = []
    modelInterepts = []


    for g, model in enumerate(models):

        if model == 'geom':
            IN_Obs_Pred = importData.import_NSR2_data(mydir + \
                'data/NSR2/Stratified/geom_NSR2_stratify.txt')
            nsr2 = importData.import_NSR2_data(data_dir + \
            'data/NSR2/Stratified_Test/' + model + '_NSR2_stratify.txt')


        elif model == 'lognorm':
            IN_Obs_Pred = importData.import_NSR2_data(mydir + \
                'data/NSR2/Stratified/lognorm_pln_NSR2_stratify.txt')
            nsr2 = importData.import_NSR2_data(data_dir + \
            'data/NSR2/Stratified_Test/' + model + '_'+  'pln' + '_NSR2_stratify.txt')


        elif model == 'mete':
            IN_Obs_Pred = importData.import_NSR2_data(mydir + \
                'data/NSR2/Stratified/mete_NSR2_stratify.txt')
            nsr2 = importData.import_NSR2_data(data_dir + \
            'data/NSR2/Stratified_Test/' + model + '_NSR2_stratify.txt')


        elif model == 'zipf':
            IN_Obs_Pred = importData.import_NSR2_data(mydir + \
                'data/NSR2/Stratified/zipf_mle_NSR2_stratify.txt')
            nsr2 = importData.import_NSR2_data(data_dir + \
            'data/NSR2/Stratified_Test/' + model + '_mle' + '_NSR2_stratify.txt')

        N = np.asarray(list(((IN_Obs_Pred["N"]))))
        N_All = np.asarray(list(((nsr2["N"]))))
        domSlope = np.mean(((nsr2["NmaxPredSlope"])))
        domIntercept =  10 ** np.mean(((nsr2["NmaxPredIntercept"])))

        NmaxObs = np.asarray(list(((IN_Obs_Pred["NmaxObs"]))))
        NmaxObsAll = np.asarray(list(((nsr2["NmaxObs"]))))

        NmaxPred = []
        NmaxPredAll = []
        for i in range(len(N)):
            NmaxPred_i = mo.predictNmax(N[i]).getNmax(b = domIntercept, slope = domSlope)
            NmaxPred.append(NmaxPred_i)

        NmaxPred = np.asarray(NmaxPred)
        NmaxPred_obs = [k for k in zip(NmaxObs, NmaxPred) if k[0] < 200000 ]
        NmaxObs = np.asarray([k[0] for k in NmaxPred_obs])
        NmaxPred = np.asarray([k[1] for k in NmaxPred_obs])

        axis_min = 10
        axis_max = 1000000
        ax = fig.add_subplot(2, 2, count+1)

        if model == 'geom':
            ax.set_title("Broken-stick")
        elif model == 'lognorm':
            ax.set_title("Lognormal")
        elif model == 'mete':
            ax.set_title("Log-series")
        elif model == 'zipf':
            ax.set_title("Zipf")

        #plot_color_by_pt_dens(NmaxPred, NmaxObs, radius, loglog=1,
        #                plot_obj=plt.subplot(2,2,count+1))
        #if model == 'lognorm':
        #    radius =  1.3
        macroecotools.plot_color_by_pt_dens(NmaxPred, NmaxObs, radius, loglog=1,
                        plot_obj=plt.subplot(2,2,count+1))
        plt.plot([axis_min, axis_max],[axis_min, axis_max], 'k-')
        plt.xlim(axis_min, axis_max)
        plt.ylim(axis_min, axis_max)
        ax.set_xlim(axis_min, axis_max)
        ax.set_ylim(axis_min, axis_max )
        r2_all = macroecotools.obs_pred_rsquare(np.log10(NmaxObs), np.log10(NmaxPred))
        r2text = r"${}^{{2}}_{{m}} = {:.{p}f} $".format('r',r2_all , p=2)
        plt.text(0.72, 0.12, r2text,  fontsize=13,
            horizontalalignment='center',
            verticalalignment='center',transform = ax.transAxes)
        plt.tick_params(axis='both', which='major', labelsize=12)
        plt.subplots_adjust(wspace=0.00001, hspace=0.3)
        ax.set(adjustable='box-forced', aspect='equal')
        count += 1

    fig.text(0.50, 0.055 , 'Predicted, ' +r'$log_{10}(N_{max})$', ha='center', va='center', fontsize = 19)
    fig.text(0.09, 0.5, 'Observed, ' +r'$log_{10}(N_{max})$', ha='center', va='center', rotation='vertical',\
        fontsize = 19)
    fig_name = str(mydir + 'figures/' + figname + '_RGB.' + saveAs)
    plt.savefig(fig_name, dpi=600, format = saveAs)#, bbox_inches = 'tight')#, pad_inches=0)
    plt.close()
Пример #27
0
                          3,
                          loglog=1)
    plt.loglog(
        [min(obs_pred_data['pred'] + adj),
         max(obs_pred_data['pred'] + adj)],
        [min(obs_pred_data['pred'] + adj),
         max(obs_pred_data['pred'] + adj)], 'k-')
    plt.savefig(dest_file, dpi=400)


if len(sys.argv) > 1:
    datasets = [sys.argv[1]]
else:
    datasets = ['bbs_2012', 'bbs_2008_2012', 'cbc', 'fia', 'gentry', 'naba']

for dataset in datasets:
    for datatype in ['fit', 'test']:
        for predtype in ['sad']:
            obs_pred_data = read_csv('./results/' + dataset + '_' + predtype +
                                     '_' + datatype + '_obs_pred_norm.csv')
            adj = 1
            log_pred = [
                log(float(i + adj)) for i in obs_pred_data['pred'].values
            ]
            log_obs = [
                log(float(i + adj)) for i in obs_pred_data['obs'].values
            ]
            print obs_pred_rsquare(np.array(log_pred), np.array(log_obs))
            fig_name = './figs/' + dataset + '_' + datatype + '_obs_pred_' + predtype + '.png'
            plot_obs_pred(obs_pred_data, adj=adj, dest_file=fig_name)
def bootstrap_ISD(name_site_combo, model, in_dir = './data/', out_dir = './out_files/', Niter = 200):
    """A general function of bootstrapping for ISD applying to all four models. 
    
    Inputs:
    name_site_combo: a list with dat_name and site
    model - takes one of four values 'ssnt_0', 'ssnt_1', 'asne', or 'agsne'
    in_dir - directory of raw data
    out_dir - directory used both in input (obs_pred.csv file) and output 
    Niter - number of bootstrap samples
    
    Output:
    Writes to disk, with one file for R^2 and one for KS statistic.
    
    """
    dat_name, site = name_site_combo
    dat = wk.import_raw_data(in_dir + dat_name + '.csv')
    dat_site = dat[dat['site'] == site]
    dat_clean = clean_data_agsne(dat_site)    
    G, S, N, E = get_GSNE(dat_clean)
    lambda1, beta, lambda3 = agsne.get_agsne_lambdas(G, S, N, E)
    isd_agsne = mete_distributions.psi_agsne([G, S, N, E], [lambda1, beta, lambda3, agsne.agsne_lambda3_z(lambda1, beta, S) / lambda3])
    isd_asne = mete_distributions.psi_epsilon_approx(S, N, E)
    dbh_scaled = np.array(dat_clean['dbh'] / min(dat_clean['dbh']))
    isd_ssnt_0 = ssnt_isd_bounded(1, N / (sum(dbh_scaled ** 1) - N))
    isd_ssnt_1 = ssnt_isd_bounded(2/3, N / (sum(dbh_scaled ** (2/3)) - N))
    dist_for_model = {'ssnt_0': isd_ssnt_0, 'ssnt_1': isd_ssnt_1, 'asne': isd_asne, 'agsne': isd_agsne}
    dist = dist_for_model[model]
    pred_obs = wk.import_obs_pred_data(out_dir + dat_name + '_obs_pred_isd_' + model + '.csv')
    pred = pred_obs[pred_obs['site'] == site]['pred']
    obs = pred_obs[pred_obs['site'] == site]['obs']
    
    out_list_rsquare = [dat_name, site, str(mtools.obs_pred_rsquare(np.log10(obs), np.log10(pred)))]
    wk.write_to_file(out_dir + 'ISD_bootstrap_' + model + '_rsquare.txt', ",".join(str(x) for x in out_list_rsquare), new_line = False)
    emp_cdf = mtools.get_emp_cdf(obs)
    out_list_ks = [dat_name, site, str(max(abs(emp_cdf - np.array([dist.cdf(x) for x in obs]))))]
    wk.write_to_file(out_dir + 'ISD_bootstrap_' + model + '_ks.txt', ",".join(str(x) for x in out_list_ks), new_line = False)
    
    num_pools = 8  # Assuming that 8 pools are to be created
    for i in xrange(Niter):
        obs_boot = []
        cdf_boot = []
        while len(obs_boot) < N:
            pool = multiprocessing.Pool(num_pools)
            out_sample = pool.map(wk.generate_isd_sample, [dist for j in xrange(num_pools)])
            for combo in out_sample:
                cdf_sublist, sample_sublist = combo
                obs_boot.extend(sample_sublist)
                cdf_boot.extend(cdf_sublist)
            pool.close()
            pool.join()
        if model in ['asne', 'agsne']: obs_boot = np.sort(obs_boot[:N]) ** 0.5 # Convert to diameter
        else: obs_boot = np.sort(obs_boot[:N])
        sample_rsquare = mtools.obs_pred_rsquare(np.log10(obs_boot), np.log10(pred))
        sample_ks = max(abs(emp_cdf - np.sort(cdf_boot[:N])))
        
        wk.write_to_file(out_dir + 'ISD_bootstrap_' + model + '_rsquare.txt', "".join([',', str(sample_rsquare)]), new_line = False)
        wk.write_to_file(out_dir + 'ISD_bootstrap_' + model + '_ks.txt', "".join([',', str(sample_ks)]), new_line = False)
    
    wk.write_to_file(out_dir + 'ISD_bootstrap_' + model + '_rsquare.txt', '\t')
    wk.write_to_file(out_dir + 'ISD_bootstrap_' + model + '_ks.txt', '\t')
    wk.write_to_file(out_dir + 'ISD_bootstrap_' + model + '_ks.txt', ",".join(str(x) for x in out_list_ks))
Пример #29
0
def sample_lines_mete_geom_test(datasets, SAD_number, iterations, percents):
    #percents = [0.500000, 0.250000, 0.125000, 0.062500, 0.031250, 0.015625]
    SAD_number = int(SAD_number)
    iterations = int(iterations)
    methods = ['geom', 'mete']
    for i, dataset in enumerate(datasets):
        signal.signal(signal.SIGALRM, timeout_handler)
        if dataset == 'MGRAST':
            # fix subset l8r
            IN = mydir  + dataset + '-Data' + '/MGRAST/MGRAST-SADs.txt'
            nsr2_data_mete_geom = gf.import_NSR2_data(mydir + 'NSR2/' + 'mete_MGRAST_NSR2.txt')
        elif dataset == '95' or dataset == '97' or dataset == '99':
            IN = mydir  + dataset + '-Data/' + str(dataset) + '/MGRAST-' + str(dataset) + '-SADs.txt'
            nsr2_data_mete_geom = gf.import_NSR2_data(mydir + 'NSR2/' + 'mete_MGRAST'+dataset+'_NSR2.txt')
        elif dataset == 'HMP':
            IN = mydir  + dataset + '-Data' + '/' + dataset +'-SADs_NAP.txt'
            nsr2_data_mete_geom = gf.import_NSR2_data(mydir + 'NSR2/' + 'mete_'+dataset+'_NSR2.txt')
        else:
            IN = mydir  + dataset + '-Data' + '/' + dataset +'-SADs.txt'
            nsr2_data_mete_geom = gf.import_NSR2_data(mydir + 'NSR2/' + 'mete_'+dataset+'_NSR2.txt')

        nsr2_data_mete_geom_N_site = np.column_stack((nsr2_data_mete_geom["site"], nsr2_data_mete_geom["N"]))
        nsr2_data_mete_geom_sorted = nsr2_data_mete_geom_N_site[nsr2_data_mete_geom_N_site[:,1].argsort()[::-1]]
        nsr2_data_mete_geom_top100 = nsr2_data_mete_geom_N_site[nsr2_data_mete_geom_N_site[:,1].argsort()[::-1]][:SAD_number,]
        # Get the SAD numbers
        mete_geom_numbers = nsr2_data_mete_geom_top100[:,0]
        mete_geom_numbers = mete_geom_numbers.astype(int)

        OUT1 = open(mydir + 'SubSampled-Data' + '/' + dataset + '_geom_SubSampled_Data.txt', 'w+')
        OUT2 = open(mydir + 'SubSampled-Data' + '/' + dataset + '_mete_SubSampled_Data.txt', 'w+')
        num_lines = sum(1 for line in open(IN))
        test_lines = 0
        succeess_lines_geom = SAD_number
        succeess_lines_mete = SAD_number
        while (succeess_lines_geom > 0) and (succeess_lines_mete > 0):
            site = nsr2_data_mete_geom_sorted[test_lines,0]
            for j,line in enumerate(open(IN)):
                if (j != site):
                    continue
                else:
                    if dataset == "HMP":
                        line = line.strip().split(',')
                        line = [x.strip(' ') for x in line]
                        line = [x.strip('[]') for x in line]
                        site_name = line[0]
                        line.pop(0)
                    else:
                        line = eval(line)
                obs = map(int, line)
                # Calculate relative abundance of each OTU
                # Use that as weights
                N_0 = float(sum(obs))
                S_0 = len(obs)
                N_max = max(obs)
                if S_0 < 10 or N_0 <= S_0:
                    test_lines += 1
                    continue
                line_ra = map(lambda x: x/N_0, obs)
                # Calculate relative abundance of each OTU
                # Use that as weights
                sample_sizes = map(lambda x: round(x*N_0), percents)
                if any(sample_size <= 10 for sample_size in sample_sizes)  == True:
                    test_lines += 1
                    continue
                gm_lines = SAD_number
                geom_means = [N_0, S_0, N_max]
                mete_means = [N_0, S_0, N_max]
                print dataset, N_0, S_0, ' countdown: ', succeess_lines_geom
                # separate this. get percents for Zipf and mete/geom
                # then go on with the sampling
                failed_percents = 0
                for k, percent in enumerate(percents):
                    sample_size = round(percent * N_0)
                    if sample_size <= 10 or failed_percents > 0:
                        continue
                    mg_iter = iterations

                    N_max_list_mg = []
                    N_0_list_mg = []
                    S_0_list_mg = []
                    r2_list_BS = []
                    r2_list_METE = []
                    iter_count_current = 0
                    iter_count = iterations
                    fail_threshold = 20
                    iter_failed = 0
                    while (mg_iter > 0) and (iter_failed < fail_threshold):
                        sample_k = np.random.multinomial(sample_size, line_ra, size = None)
                        sample_k_sorted = -np.sort( -sample_k[sample_k != 0] )
                        N_k = sum(sample_k_sorted)
                        S_k = sample_k_sorted.size
                        if S_k < 10 or N_k <= S_k:
                            iter_failed += 1
                            continue
                        N_max_k = max(sample_k_sorted)
                        logSeries = mete.get_mete_rad(S_k, N_k)
                        pred_mete = logSeries[0]
                        r2_mete = macroecotools.obs_pred_rsquare(np.log10(sample_k_sorted), np.log10(pred_mete))
                        pred_BS = get_GeomSeries(N_k, S_k, False) # False mean no zeros allowed
                        r2_BS = macroecotools.obs_pred_rsquare(np.log10(sample_k_sorted), np.log10(pred_BS))
                        r2_list = [r2_mete, r2_BS]
                        if any( (r2 == -float('inf') ) or (r2 == float('inf') ) or (r2 == float('Nan') ) for r2 in r2_list):
                            #mg_iter += 1
                            iter_failed += 1
                            continue
                        N_max_list_mg.append(N_max_k)
                        N_0_list_mg.append(N_k)
                        S_0_list_mg.append(S_k)
                        r2_list_BS.append(r2_BS)
                        r2_list_METE.append(r2_mete)
                        mg_iter -= 1

                    if len(N_max_list_mg) != iterations:
                        test_lines += 1
                        continue
                    N_0_mg_mean = np.mean(N_0_list_mg)
                    geom_means.append(N_0_mg_mean)
                    mete_means.append(N_0_mg_mean)

                    S_0_mean = np.mean(S_0_list_mg)
                    geom_means.append(S_0_mean)
                    mete_means.append(S_0_mean)

                    N_max_mg_mean = np.mean(N_max_list_mg)
                    geom_means.append(N_max_mg_mean)
                    mete_means.append(N_max_mg_mean)

                    r2_BS_mg_mean = np.mean(r2_list_BS)
                    geom_means.append(r2_BS_mg_mean)
                    r2_METE_mg_mean = np.mean(r2_list_METE)
                    mete_means.append(r2_METE_mg_mean)

                '''Now we check if the lists are the right length
                there are 6 iterations for the percentage
                mete/ geom, append four items each iteration.
                4*6 = 24, add three original = 27
                likewise, for zipf, (5*6) + 3 = 33 '''
                test_lines += 1
                if (len(geom_means) == 27):
                    succeess_lines_geom -= 1
                    geom_means_str = ' '.join(map(str, geom_means))
                    #OUT1.write(','.join(map(repr, geom_means_str[i]))
                    print>> OUT1, j, geom_means_str
                if (len(mete_means) == 27):
                    succeess_lines_mete -= 1
                    mete_means_str = ' '.join(map(str, mete_means))
                    print>> OUT2, j, mete_means_str
                print dataset, percent
Пример #30
0
def fig5(SADModels):
    """ This function generates a 2x2 figure, with these subplots:
        One subplot for each model:
            r-squared vs. N
            list of r-squared values and list of Ns
            plotted against each other
    """
    fig = plt.figure()

    for i, model in enumerate(SADModels):

        fig.add_subplot(2, 2, i + 1)

        obs_pred_data = import_obs_pred_data(mydir + '/Results/' + model +
                                             '.txt')
        obs = ((obs_pred_data["obs"]))
        pred = ((obs_pred_data["pred"]))
        site = ((obs_pred_data["site"]))

        obs_data = []
        pred_data = []

        for sites in np.unique(site):
            obs_data.append(obs[sites == site])
            pred_data.append(pred[sites == site])

        Ns = []
        r2s = []

        for j, sad in enumerate(obs_data):

            r2 = macroecotools.obs_pred_rsquare(np.array(sad),
                                                np.array(pred_data[j]))
            r2s.append(r2)
            N = sum(sad)  # Find Total Abundance
            Ns.append(N)

        plt.scatter(np.log(Ns).tolist(),
                    r2s,
                    color='Maroon',
                    label=model,
                    alpha=0.5)  # label is for the legend
        plt.xlabel('Log Abundance', fontsize=8)
        plt.ylabel('Rsquared Value', fontsize=8)
        plt.subplots_adjust(wspace=.35, hspace=.35)
        plt.axhline(y=0)

        if model == 'SimBrokenStick':
            plt.title("Broken Stick R^2 v N", fontsize=10)

        elif model == 'SimLogNormInt':
            plt.title("Log Norm R^2 v N", fontsize=10)

        elif model == 'SimpleRandomFraction':
            plt.title("Random Fraction R^2 v N", fontsize=10)

        elif model == 'SimParetoInt':
            plt.title("Pareto Int R^2 v N", fontsize=10)

        print model + ': Done'

        # insert code to plot a legend
    plt.savefig('/Users/Nathan_Hillis/GitHub/SADModels/Results/R2vN.png',
                dpi=600,
                bbox_inches='tight',
                pad_inches=0.03)
    plt.show()
    return
 def test_func(ab, dist_name, *pars):
     pred = get_pred_multi_dists(len(ab), dist_name, *pars)
     r2 = macroecotools.obs_pred_rsquare(sorted(ab, reverse = True), pred)
     return r2
Пример #32
0
mete_r2s = []
pln_r2s = []

shuffle(RADs)
for i, obs in enumerate(RADs):

    N = int(sum(obs))
    S = int(len(obs))

    if S > 4 and N > 10 and obs.count(1) / len(obs) < 0.5:

        result = mete.get_mete_rad(S, N)
        pred1 = np.log10(result[0])
        obs1 = np.log10(obs)
        mete_r2 = mct.obs_pred_rsquare(np.array(obs1), np.array(pred1))
        mete_r2s.append(mete_r2)

        pred = get_rad_from_obs(obs, 'pln')
        pred1 = np.log10(pred)
        pln_r2 = mct.obs_pred_rsquare(np.array(obs1), np.array(pred1))
        pln_r2s.append(pln_r2)

        print i, 'N:', N, ' S:', S, ' n:', len(
            pln_r2s), ' |  mete:', mete_r2, '  pln:', pln_r2

    if len(pln_r2s) > 20: break

kernel = 0.5
D = get_kdens_choose_kernel(mete_r2s, kernel)
plt.plot(D[0], D[1], color='0.5', lw=3, alpha=0.99, label='log-series')
Пример #33
0
site = np.asarray(list(((data["site"]))))
obs = np.asarray(list(((data["obs"]))))
pred = np.asarray(list(((data["pred"]))))

axis_min = 0
axis_max = 2 * max(obs)

radius=2
mct.plot_color_by_pt_dens(pred, obs, radius, loglog=1, plot_obj=ax1)

plt.plot([axis_min, axis_max],[axis_min, axis_max], 'k-')

plt.xlim(0, axis_max)
plt.ylim(0, axis_max)

r2_all = mct.obs_pred_rsquare(np.log10(obs), np.log10(pred))
r2text = r"${}^{{2}} = {:.{p}f} $".format('r',r2_all , p=2)

plt.text(2, 30000, r2text,  fontsize=14)
plt.text(28, 800000, 'Log-series',  fontsize=14)
plt.text(5, 0.1, 'Predicted rank-abundance', fontsize=10)
plt.text(0.1, 60000, 'Observed rank-abundance', rotation='vertical', fontsize=10)

plt.tick_params(axis='both', which='major', labelsize=7)
#plt.subplots_adjust(wspace=0.5, hspace=0.3)
#axins = inset_axes(ax, width="30%", height="30%", loc=4)
#plt.setp(axins, xticks=[], yticks=[])



ax2 = fig.add_subplot(2, 2, 2)
Пример #34
0
def fig4(figname = 'Fig4', data_dir=mydir, radius=2):
    fig = plt.figure()
    plot_dim = 1
    count = 0

    IN_Obs_Pred = importData.import_NSR2_data(mydir + \
        'data/NSR2/Stratified/lognorm_pln_NSR2_stratify.txt')
    N = np.asarray(list(((IN_Obs_Pred["N"]))))
    S = np.asarray(list(((IN_Obs_Pred["S"]))))
    NmaxObs = np.asarray(list(((IN_Obs_Pred["NmaxObs"]))))
    # order
    models = ['geom', 'lognorm', 'mete', 'zipf']
    modelSlopes = [0.647520323289, 0.942904468437, 0.769214774397, 0.954497727096]
    modelInterepts = [0.116508916992, 0.292527611072, 0.19240314275, 0.189954627996]
    for g, model in enumerate(models):
        NmaxPred = []
        SPred = []
        for i in range(len(N)):
            NmaxPred_i = importPredictS.predictS(N[i], NmaxObs[i], \
                predictNmax=True).getNmax(b = modelInterepts[g], slope = modelSlopes[g])
            SPred_i = importPredictS.predictS(N[i], NmaxObs[i], predictNmax=True).getS()
            NmaxPred.append(NmaxPred_i)
            SPred.append(SPred_i)
        NmaxPred = np.asarray(NmaxPred)
        SPred = np.asarray(SPred)
        axis_min = 0
        axis_max = 2 * max(NmaxObs)
        ax = fig.add_subplot(2, 2, count+1)
        #ax.set_title(r"$\mathbf{N_{max}}$", y=1.03)
        if model == 'geom':
            ax.set_title(r"$\mathbf{Broken-stick}$")
        elif model == 'lognorm':
            ax.set_title(r"$\mathbf{Lognormal}$")
        elif model == 'mete':
            ax.set_title(r"$\mathbf{Log-series}$")
        elif model == 'zipf':
            ax.set_title(r"$\mathbf{Zipf}$")
        macroecotools.plot_color_by_pt_dens(NmaxPred, NmaxObs, radius, loglog=1,
                        plot_obj=plt.subplot(2,2,count+1))
        plt.plot([axis_min, axis_max],[axis_min, axis_max], 'k-')
        plt.xlim(axis_min, axis_max)
        plt.ylim(0, axis_max)
        print max(NmaxPred)
        r2_all = macroecotools.obs_pred_rsquare(np.log10(NmaxObs), np.log10(NmaxPred))
        r2text = r"${}^{{2}}_{{m}} = {:.{p}f} $".format('r',r2_all , p=2)
        plt.text(0.22, 0.91, r2text,  fontsize=13,
            horizontalalignment='center',
            verticalalignment='center',transform = ax.transAxes)
        plt.tick_params(axis='both', which='major', labelsize=7)
        plt.subplots_adjust(wspace=0.5, hspace=0.3)

        #axins = inset_axes(ax, width="30%", height="30%", loc=4)

        ax.set(adjustable='box-forced', aspect='equal')
        #plt.setp(axins, xticks=[], yticks=[])

        count += 1
    fig.text(0.50, 0.04, r'$Predicted\, log_{10}(N_{max})$', ha='center', va='center', fontsize = 16)
    fig.text(0.04, 0.5, r'$Observed\,log_{10}(N_{max})$', ha='center', va='center', rotation='vertical',\
        fontsize = 16)
    fig_name = str(mydir + 'figures/' + figname + '.png')
    plt.savefig(fig_name, dpi=600)#, bbox_inches = 'tight')#, pad_inches=0)
    plt.close()
Пример #35
0
def generate_obs_pred_data(datasets, methods, size):

    for method in methods:
        for dataset in datasets:
            #OUT1 = open(mydir + "ObsPred/" + method +'_'+dataset+'_obs_pred.txt','w+')
            #OUT2 = open(mydir + "NSR2/" + method +'_'+dataset+'_NSR2.txt','w+')
            #OUT1 = open(mydir + "ObsPred/" + method +'_'+dataset+'_obs_pred_subset.txt','w+')
            #OUT2 = open(mydir + "NSR2/" + method +'_'+dataset+'_NSR2_subset.txt','w+')

            if dataset == "HMP":
                IN = mydir  + dataset + '-Data' + '/' + dataset +'-SADs.txt'
                num_lines = sum(1 for line in open(IN))
                OUT1 = open(mydir + "ObsPred/" + method +'_'+dataset+'_obs_pred.txt','w+')
                OUT2 = open(mydir + "NSR2/" + method +'_'+dataset+'_NSR2.txt','w+')
            elif dataset == 'EMPclosed' or dataset == 'EMPpen':
                IN = mydir  + dataset + '-Data' + '/' + dataset +'-SADs.txt'
                num_lines = sum(1 for line in open(IN))
                random_sites = np.random.randint(num_lines,size=size)
                num_lines = size
                OUT1 = open(mydir + "ObsPred/" + method +'_'+dataset+'_obs_pred_subset.txt','w+')
                OUT2 = open(mydir + "NSR2/" + method +'_'+dataset+'_NSR2_subset.txt','w+')
                num_lines = sum(1 for line in open(IN))
            else:
                IN = mydir + 'MGRAST-Data/' + dataset +  '/' + 'MGRAST-' + dataset + '-SADs.txt'
                num_lines = sum(1 for line in open(IN))
                OUT1 = open(mydir + "ObsPred/" + method +'_'+ 'MGRAST' + dataset+'_obs_pred.txt','w+')
                OUT2 = open(mydir + "NSR2/" + method +'_'+ 'MGRAST' + dataset+'_NSR2.txt','w+')

            for j,line in enumerate(open(IN)):
                if dataset == "HMP":
                    line = line.split()
                elif size == 0:
                    line = eval(line)
                else:
                    line = eval(line)
                    if j not in random_sites:
                        continue
                #line.strip("[]")
                #line.split()
                obs = map(int, line)

                N = sum(obs)
                S = len(obs)

                if S < 10 or N <= S:
                    num_lines += 1
                    continue

                obs.sort()
                obs.reverse()
                print method, dataset, N, S, 'countdown:', num_lines,

                if method == 'geom': # Predicted geometric series
                    pred = get_GeomSeries(N, S, False) # False mean no zeros allowed

                elif method == 'mete': # Predicted log-series
                    logSeries = mete.get_mete_rad(S, N)
                    pred = logSeries[0]

                r2 = macroecotools.obs_pred_rsquare(np.log10(obs), np.log10(pred))
                print " r2:", r2
                if r2 == -float('inf') or r2 == float('inf') or r2 == float('Nan'):
                    print r2 + " is Nan or inf, removing..."
                    continue
                print>> OUT2, j, N, S, r2
                # write to file, by cite, observed and expected ranked abundances
                for i, sp in enumerate(pred):
                    print>> OUT1, j, obs[i], pred[i]


                num_lines -= 1

            OUT1.close()

        print dataset
Пример #36
0
from macroecotools import plot_color_by_pt_dens, obs_pred_rsquare


def plot_obs_pred(obs_pred_data, dest_file='./obs_pred.png'):
    plot_color_by_pt_dens(obs_pred_data['pred'],
                          obs_pred_data['obs'],
                          3,
                          loglog=1)
    plt.loglog([min(obs_pred_data['pred']),
                max(obs_pred_data['pred'])],
               [min(obs_pred_data['pred']),
                max(obs_pred_data['pred'])], 'k-')
    plt.savefig(dest_file, dpi=400)


if len(sys.argv) > 1:
    datasets = [sys.argv[1]]
else:
    datasets = ['bbs_2012', 'bbs_2008_2012', 'cbc', 'fia', 'gentry', 'naba']

for dataset in datasets:
    for datatype in ['fit', 'test']:
        for predtype in ['rare']:
            obs_pred_data = read_csv('./results/' + dataset + '_' + predtype +
                                     '_' + datatype + '_obs_pred.csv')
            print obs_pred_rsquare(np.array(obs_pred_data['obs'].values),
                                   np.array(obs_pred_data['pred'].values))
            fig_name = './figs/' + dataset + '_' + datatype + '_obs_pred_rarity.png'
            plot_obs_pred(obs_pred_data, dest_file=fig_name)
Пример #37
0
 def test_func(ab, dist_name, *pars):
     pred = get_pred_multi_dists(len(ab), dist_name, *pars)
     r2 = macroecotools.obs_pred_rsquare(sorted(ab, reverse=True), pred)
     return r2
Пример #38
0
shuffle(RADs)
for i, obs in enumerate(RADs):

    N = int(sum(obs))
    S = int(len(obs))

    print i, N, S, len(pln_r2s)

    if S >= 10 and N > 50:

        if N < 10000:

            result = mete.get_mete_rad(S, N)
            predRAD = result[0]
            mete_r2 = mct.obs_pred_rsquare(np.array(obs), np.array(predRAD))
            mete_r2s.append(mete_r2)

            #zipf_pred = dist.zipf(obs)
            #predRAD = zipf_pred.from_cdf()
            #zipf_r2 = mct.obs_pred_rsquare(np.array(obs), np.array(predRAD))
            #zipf_r2s.append(zipf_r2)

            predRAD = get_rad_from_obs(obs, 'pln')
            pln_r2 = mct.obs_pred_rsquare(np.array(obs), np.array(predRAD))
            pln_r2s.append(pln_r2)

    if len(pln_r2s) > 200: break

fig = plt.figure(111)
kernel = 0.5
Пример #39
0
def test_zipf_num_est(datasets, estimators, SAD_number, iterations, fail_threshold):
    percents = [0.500000, 0.250000, 0.125000, 0.062500, 0.031250, 0.015625]
    for dataset in datasets:
        signal.signal(signal.SIGALRM, gf.timeout_handler)
        if dataset == 'MGRAST':
            # fix subset l8r
            IN = mydir  + dataset + '-Data' + '/MGRAST/MGRAST-SADs.txt'
            nsr2_data_zipf = gf.import_NSR2_data(mydir + 'NSR2/' + 'zipf_MGRAST_NSR2.txt')
        elif dataset == '95' or dataset == '97' or dataset == '99':
            IN = mydir  + dataset + '-Data/' + str(dataset) + '/MGRAST-' + str(dataset) + '-SADs.txt'
            nsr2_data_zipf = gf.import_NSR2_data(mydir + 'NSR2/' +'zipf_MGRAST'+dataset+'_NSR2.txt')
        elif dataset == 'HMP':
            IN = mydir  + dataset + '-Data' + '/' + dataset +'-SADs_NAP.txt'
            nsr2_data_zipf = gf.import_NSR2_data(mydir + 'NSR2/' +  'zipf_'+dataset+'_NSR2.txt')
        else:
            IN = mydir  + dataset + '-Data' + '/' + dataset +'-SADs.txt'
            nsr2_data_zipf = gf.import_NSR2_data(mydir + 'NSR2/' +  'zipf_'+dataset+'_NSR2.txt')

        nsr2_data_zipf_N_site = np.column_stack((nsr2_data_zipf["site"], nsr2_data_zipf["N"]))
        # Sort these arrays
        nsr2_data_zipf_sorted = nsr2_data_zipf_N_site[nsr2_data_zipf_N_site[:,1].argsort()[::-1]]
        nsr2_data_zipf_top100 = nsr2_data_zipf_sorted[:SAD_number,]
        # Get the SAD numbers
        zipf_numbers = nsr2_data_zipf_top100[:,0]
        zipf_numbers = zipf_numbers.astype(int)
        successful_SADs_samplings = SAD_number
        for estimator in estimators:
            OUT = open(mydir + 'SubSampled-Data' + '/' + dataset + '_zipf_' + \
                str(estimator) + '_SubSampled_Data.txt', 'w+')
            num_lines = sum(1 for line in open(IN))
            test_lines = 0
            succeess_lines = SAD_number
            while succeess_lines > 0:
                site = nsr2_data_zipf_sorted[test_lines,0]
                for j,line in enumerate(open(IN)):
                    if (j != site):
                        continue
                    else:
                        if dataset == "HMP":
                            line = line.strip().split(',')
                            line = [x.strip(' ') for x in line]
                            line = [x.strip('[]') for x in line]
                            site_name = line[0]
                            line.pop(0)
                        else:
                            line = eval(line)
                    obs = map(int, line)
                    # Calculate relative abundance of each OTU
                    # Use that as weights
                    N_0 = float(sum(obs))
                    S_0 = len(obs)
                    N_max = max(obs)
                    if S_0 < 10 or N_0 <= S_0:
                        test_lines += 1
                        continue
                    line_ra = map(lambda x: x/N_0, obs)
                    sample_sizes = map(lambda x: round(x*N_0), percents)
                    if any(sample_size <= 10 for sample_size in sample_sizes)  == True:
                        test_lines += 1
                        continue
                    zipf_means = [N_0, S_0, N_max]
                    failed_percents = 0
                    for k, percent in enumerate(percents):
                        if failed_percents > 0:
                            continue
                        N_max_list_zipf = []
                        N_0_list_zipf = []
                        S_0_list_zipf = []
                        r2_list_zipf = []
                        gamma_list = []
                        iter_count_current = 0
                        iter_count = iterations
                        iter_failed = 0
                        while iter_count > 0 and iter_failed < fail_threshold:
                            sample_size_k = sample_sizes[0]
                            sample_k = np.random.multinomial(sample_size_k, line_ra, size = None)
                            sample_k_sorted = -np.sort( -sample_k[sample_k != 0] )
                            N_0_k = sum(sample_k_sorted)
                            S_0_k = sample_k_sorted.size
                            if S_0_k < 10 or N_0_k <= S_0_k:
                                continue
                            N_max_k = max(sample_k_sorted)
                            iter_count_current += 1
                            # Start the timer. Once 1 second is over, a SIGALRM signal is sent.
                            signal.alarm(2)
                            # This try/except loop ensures that
                            #   you'll catch TimeoutException when it's sent.
                            #start_time = time.time()
                            try:
                                # Whatever your function that might hang
                                zipf_class = gf.zipf(sample_k_sorted, estimator)
                                pred_tuple = zipf_class.from_cdf()
                                Zipf_solve_line = zipf_class.zipf_solver(sample_k_sorted)
                                rv = stats.zipf(Zipf_solve_line)
                                pred_zipf = pred_tuple[0]
                                gamma = pred_tuple[1]
                                r2_zipf = macroecotools.obs_pred_rsquare(np.log10(sample_k_sorted), np.log10(pred_zipf))
                                if (r2_zipf == -float('inf') ) or (r2_zipf == float('inf') ) or (r2_zipf == float('Nan') ):
                                    continue
                                else:
                                    r2_list_zipf.append(r2_zipf)
                                    gamma_list.append(gamma)
                                    N_max_list_zipf.append(N_max_k)
                                    N_0_list_zipf.append(N_0_k)
                                    S_0_list_zipf.append(S_0_k)

                            except gf.TimeoutException:
                                print "Line " + str(j) + ": " + str(estimator) + " timed out"
                                iter_count -= 1
                                if iter_failed >= fail_threshold:
                                    failed_percents += 1
                                iter_failed += 1
                                continue # continue the for loop if function takes more than x seconds
                            else:
                                iter_count -= 1
                                #print("--- %s seconds ---" % (time.time() - start_time))
                                # Reset the alarm
                                signal.alarm(0)


                        if len(N_0_list_zipf) != iterations:
                            test_lines += 1
                            continue
                        N_0_zipf_mean = np.mean(N_0_list_zipf)
                        zipf_means.append(N_0_zipf_mean)

                        S_0_zipf_mean = np.mean(S_0_list_zipf)
                        zipf_means.append(S_0_zipf_mean)

                        N_max_zipf_mean = np.mean(N_max_list_zipf)
                        zipf_means.append(N_max_zipf_mean)

                        r2_zipf_mean = np.mean(r2_list_zipf)
                        zipf_means.append(r2_zipf_mean)

                        gamma_zipf_mean = np.mean(gamma_list)
                        zipf_means.append(gamma_zipf_mean)

                    '''Now we check if the lists are the right length
                    there are 6 iterations for the percentage
                    mete/ geom, append four items each iteration.
                    4*6 = 24, add three original = 27
                    likewise, for zipf, (5*6) + 3 = 33 '''
                    if len(zipf_means) == 33:
                        test_lines += 1
                        succeess_lines -= 1
                        zipf_means_str = ' '.join(map(str, zipf_means))
                        #OUT1.write(','.join(map(repr, geom_means_str[i]))
                        print>> OUT, j, zipf_means_str
                        print "Line " + str(j) + ": " + str(succeess_lines) + " SADs to go!"
                    else:
                        test_lines += 1
                #print estimator
            print dataset
Пример #40
0
print 'Number of RADs:', len(RADs)
mete_r2s = []
pln_r2s = []
zipf_r2s = []

ct = 0
shuffle(RADs)
for obs in RADs:
    N = int(sum(obs))
    S = int(len(obs))
    s = obs.count(1)

    if S > 9 and N > 9:
        ct += 1
        pred = mete.get_mete_rad(S, N)[0]
        mete_r2 = mct.obs_pred_rsquare(obs, np.array(pred))
        mete_r2s.append(mete_r2)

        pred = get_pln_from_obs(obs, 'pln')
        pred = np.log10(pred)
        obs1 = np.log10(obs)
        pln_r2 = mct.obs_pred_rsquare(obs1, pred)
        pln_r2s.append(pln_r2)

        print ct, 'N:', N, ' S:', S, ' n:', len(
            pln_r2s), ' |  mete:', mete_r2, '  pln:', pln_r2
    if len(pln_r2s) > minct: break

kernel = 0.5
D = get_kdens_choose_kernel(pln_r2s, kernel)
plt.plot(D[0],
Пример #41
0
shuffle(RADs)
for i, obs in enumerate(RADs):

    N = int(sum(obs))
    S = int(len(obs))

    print i, N, S, len(pln_r2s)

    if S >= 10 and N > 50:

        if N < 10000:

            result = mete.get_mete_rad(S, N)
            predRAD = result[0]
            mete_r2 = mct.obs_pred_rsquare(np.array(obs), np.array(predRAD))
            mete_r2s.append(mete_r2)

            #zipf_pred = dist.zipf(obs)
            #predRAD = zipf_pred.from_cdf()
            #zipf_r2 = mct.obs_pred_rsquare(np.array(obs), np.array(predRAD))
            #zipf_r2s.append(zipf_r2)

            predRAD = get_rad_from_obs(obs, 'pln')
            pln_r2 = mct.obs_pred_rsquare(np.array(obs), np.array(predRAD))
            pln_r2s.append(pln_r2)

    if len(pln_r2s) > 200: break


fig = plt.figure(111)