def fig2(n=352899, figname = 'Fig2', data_dir=mydir, \ stratify = True, radius=2, remove = 0, zipfType = 'mle', RGF = False, \ lognormType = 'pln', saveAs = 'eps'): # TAKEN FROM THE mete_sads.py script used for White et al. (2012) # Used for Figure 3 Locey and White (2013) """Multiple obs-predicted plotter""" fig = plt.figure() count = 0 plot_dim = 2 methods = ['geom', 'lognorm', 'mete', 'zipf'] fig.subplots_adjust(bottom= 0.30) for i, method in enumerate(methods): if method == 'zipf': obs_pred_data = importData.import_obs_pred_data(data_dir + 'data/ObsPred/Stratified/'+ method + '_'+ zipfType+'_obs_pred_stratify.txt') INh2 = importData.import_NSR2_data(data_dir + 'data/NSR2/Stratified/' + method + '_mle' + '_NSR2_stratify.txt') elif method == 'lognorm': obs_pred_data = importData.import_obs_pred_data(data_dir + 'data/ObsPred/Stratified/'+ method + '_'+ lognormType+'_obs_pred_stratify.txt') INh2 = importData.import_NSR2_data(data_dir + 'data/NSR2/Stratified/' + method + '_'+ lognormType + '_NSR2_stratify.txt') else: obs_pred_data = importData.import_obs_pred_data(data_dir + 'data/ObsPred/Stratified/'+ method +'_obs_pred_stratify.txt') INh2 = importData.import_NSR2_data(data_dir + 'data/NSR2/Stratified/' + method + '_NSR2_stratify.txt') obs = np.asarray(list(((obs_pred_data["obs"])))) pred = np.asarray(list(((obs_pred_data["pred"])))) site = np.asarray(list(((obs_pred_data["site"])))) obs2 = [] pred2 = [] site2 = [] obs_all = np.asarray(obs) pred_all = np.asarray(pred) site_all = np.asarray(site) if n == 'all' or len(obs) <= n: obs2 = list(obs) pred2 = list(pred) site2 = list(site) else: if len(obs) > n: inds = np.random.choice(range(len(site)), size=n, replace=False) for ind in inds: obs2.append(obs[ind]) pred2.append(pred[ind]) site2.append(site[ind]) obs = np.asarray(obs2) pred = np.asarray(pred2) site = np.asarray(site2) if method == 'zipf': axis_min = 0 axis_max = 2 * max(pred) else: axis_min = 0 axis_max = 2 * max(obs) ax = fig.add_subplot(plot_dim, plot_dim, count+1) if method == 'zipf': NSR2_BS = importData.import_NSR2_data(data_dir + 'data/NSR2/Stratified_Test/'+ method + '_mle_NSR2_stratify.txt') elif method == 'lognorm': NSR2_BS = importData.import_NSR2_data(data_dir + 'data/NSR2/Stratified_Test/'+ method + '_pln_NSR2_stratify.txt') else: NSR2_BS = importData.import_NSR2_data(data_dir + 'data/NSR2/Stratified_Test/'+ method +'_NSR2_stratify.txt') if method == 'geom': ax.set_title("Broken-stick") elif method == 'lognorm': ax.set_title("Lognormal") elif method == 'mete': ax.set_title("Log-series") elif method == 'zipf': ax.set_title("Zipf") print len(pred), len(obs) macroecotools.plot_color_by_pt_dens(pred, obs, radius, loglog=1, plot_obj=plt.subplot(plot_dim,plot_dim,count+1)) plt.plot([axis_min, axis_max],[axis_min, axis_max], 'k-') if method == 'zipf': plt.xlim(0, axis_max) plt.ylim(0, axis_max) else: plt.xlim(0, axis_max) plt.ylim(0, axis_max) r2s = ((INh2["R2"])) r2s = r2s.astype(float) # insert r2 of all data r2_all = np.mean(((NSR2_BS["R2"]))) print method + ' mean = ' + str(r2_all) print method + ' std dev = ' +str(np.std(r2_all)) r2text = r"${}^{{2}}_{{m}} = {:.{p}f} $".format('r',r2_all , p=2) if method == 'geom': plt.text(0.25, 0.90, r2text, fontsize=14, horizontalalignment='center', verticalalignment='center',transform = ax.transAxes) else: plt.text(0.22, 0.90, r2text, fontsize=14, horizontalalignment='center', verticalalignment='center',transform = ax.transAxes) plt.tick_params(axis='both', which='major', labelsize=10) plt.subplots_adjust(wspace=0.0000000001, hspace=0.5) axins = inset_axes(ax, width="30%", height="30%", loc=4) hist_r2 = np.histogram(r2s, range=(0, 1)) xvals = hist_r2[1] + (hist_r2[1][1] - hist_r2[1][0]) xvals = xvals[0:len(xvals)-1] yvals = hist_r2[0] plt.plot(xvals, yvals, 'k-', linewidth=2) plt.axis([0, 1, 0, 1.1 * max(yvals)]) ax.set(adjustable='box-forced', aspect='equal') plt.setp(axins, xticks=[], yticks=[]) count += 1 plt.tight_layout(pad=1.5, w_pad=0.8, h_pad=0.8) fig.text(0.50, 0.03, 'Predicted abundance', ha='center', va='center', fontsize=16) fig.text(0.08, 0.5, 'Observed abundance', ha='center', va='center', rotation='vertical', fontsize=16) fig_name = str(mydir + 'figures/' + figname + '_RGB.' + saveAs) plt.savefig(fig_name, dpi=600, format = saveAs)#, bbox_inches = 'tight')#, pad_inches=0) plt.close()
def figS1(n=35289, figname = 'FigS1', data_dir=mydir, radius=2, zipfType = 'mle', \ saveAs = 'eps', lognormType = 'pln'): methods = ['geom', 'lognorm', 'mete', 'zipf'] datasets = ['95', '97', '99'] fig = plt.figure() count = 0 rows = len(datasets) columns = len(methods) for i, dataset in enumerate(datasets): for j, method in enumerate(methods): print count if method == 'zipf': obs_pred_data = importData.import_obs_pred_data(data_dir + 'data/ObsPred/Stratified/'+ method + '_'+ zipfType+'_obs_pred_stratify.txt') INh2 = importData.import_NSR2_data(data_dir + 'data/NSR2/Stratified/' + method + '_mle' + '_NSR2_stratify.txt') elif method == 'lognorm': obs_pred_data = importData.import_obs_pred_data(data_dir + 'data/ObsPred/Stratified/'+ method + '_'+ lognormType+'_obs_pred_stratify.txt') INh2 = importData.import_NSR2_data(data_dir + 'data/NSR2/Stratified/' + method + '_'+ lognormType + '_NSR2_stratify.txt') else: obs_pred_data = importData.import_obs_pred_data(data_dir + 'data/ObsPred/Stratified/'+ method +'_obs_pred_stratify.txt') INh2 = importData.import_NSR2_data(data_dir + 'data/NSR2/Stratified/' + method + '_NSR2_stratify.txt') obs = np.asarray(list(((obs_pred_data["obs"])))) pred = np.asarray(list(((obs_pred_data["pred"])))) site = np.asarray(list(((obs_pred_data["site"])))) obs2 = [] pred2 = [] site2 = [] obs_all = np.asarray(obs) pred_all = np.asarray(pred) site_all = np.asarray(site) if n == 'all' or len(obs) <= n: obs2 = list(obs) pred2 = list(pred) site2 = list(site) else: if len(obs) > n: inds = np.random.choice(range(len(site)), size=n, replace=False) for ind in inds: obs2.append(obs[ind]) pred2.append(pred[ind]) site2.append(site[ind]) obs = np.asarray(obs2) pred = np.asarray(pred2) site = np.asarray(site2) print "number of points " + str(len(obs)) if method == 'zipf': axis_min = 0 axis_max = 2 * max(pred) else: axis_min = 0 axis_max = 2 * max(obs) ax = fig.add_subplot(rows, columns, count+1) if i == 0 and j == 0: ax.set_title("Broken-stick") elif i == 0 and j == 1: ax.set_title("Lognormal") elif i == 0 and j == 2: ax.set_title("Log-series") elif i == 0 and j == 3: ax.set_title("Zipf") if j == 0: if dataset == '95': ax.set_ylabel("MG-RAST 95%", rotation=90, size=12) elif dataset == '97': ax.set_ylabel("MG-RAST 97%", rotation=90, size=12) elif dataset == '99': ax.set_ylabel("MG-RAST 99%", rotation=90, size=12) macroecotools.plot_color_by_pt_dens(pred, obs, radius, loglog=1, plot_obj=plt.subplot(rows,columns,count+1)) plt.plot([axis_min, axis_max],[axis_min, axis_max], 'k-') if method == 'zipf': plt.xlim(0, axis_max) plt.ylim(0, axis_max) else: plt.xlim(0, axis_max) plt.ylim(0, axis_max) r2s = ((INh2["R2"])) r2s = r2s.astype(float) mean_r2s = np.mean(r2s) std_r2s = np.std(r2s) print method, dataset print "Mean r2 " + str(mean_r2s) print "Standard dev. " + str(std_r2s) if method == 'zipf': getR2 = importData.import_NSR2_data(data_dir + 'data/NSR2/Stratified_Test/SequenceSimilarity/'+ method + '_mle_' +dataset +'_NSR2_stratify.txt') elif method == 'lognorm': getR2 = importData.import_NSR2_data(data_dir + 'data/NSR2/Stratified_Test/SequenceSimilarity/'+ method + '_' + lognormType + '_' +dataset +'_NSR2_stratify.txt') else: getR2 = importData.import_NSR2_data(data_dir + 'data/NSR2/Stratified_Test/SequenceSimilarity/'+ method + '_' +dataset +'_NSR2_stratify.txt') r2_mean = np.mean(((getR2["R2"]))) if method == 'geom': r2text = r"${}^{{2}}_{{m}} = {:.{p}f} $".format('r',r2_mean , p=3) else: r2text = r"${}^{{2}}_{{m}} = {:.{p}f} $".format('r',r2_mean , p=2) if method == 'geom': plt.text(0.28, 0.90, r2text, fontsize=10, horizontalalignment='center', verticalalignment='center',transform = ax.transAxes) else: plt.text(0.25, 0.90, r2text, fontsize=10, horizontalalignment='center', verticalalignment='center',transform = ax.transAxes) plt.tick_params(axis='both', which='major', labelsize=8) plt.subplots_adjust(wspace=0.0000000001, hspace=0.5) axins = inset_axes(ax, width="30%", height="30%", loc=4) hist_r2 = np.histogram(r2s, range=(0, 1)) xvals = hist_r2[1] + (hist_r2[1][1] - hist_r2[1][0]) xvals = xvals[0:len(xvals)-1] yvals = hist_r2[0] plt.plot(xvals, yvals, 'k-', linewidth=2) plt.axis([0, 1, 0, 1.1 * max(yvals)]) ax.set(adjustable='box-forced', aspect='equal') plt.setp(axins, xticks=[], yticks=[]) count += 1 plt.tight_layout(pad=1.5, w_pad=0.8, h_pad=0.8) fig.subplots_adjust(left=0.1) fig.text(0.50, 0.02, 'Predicted abundance', ha='center', va='center', fontsize=14) fig.text(0.03, 0.5, 'Observed abundance', ha='center', va='center', rotation='vertical', fontsize=14) fig_name = str(mydir + 'figures/' + figname + '_RGB.' + saveAs) plt.savefig(fig_name, dpi=600, format = saveAs)#, bbox_inches = 'tight')#, pad_inches=0) plt.close()
def Supp(figname = 'Supp', data_dir=mydir, radius=2): # TAKEN FROM THE mete_sads.py script used for White et al. (2012) # Used for Figure 3 Locey and White (2013) """Multiple obs-predicted plotter""" fig = plt.figure() count = 0 plot_dim = 2 IN_Obs_Pred = importData.import_obs_pred_data(mydir + \ 'data/ObsPred/Stratified/lognorm_75_25_obs_pred_stratify_test.txt') site = np.asarray(list(((IN_Obs_Pred["site"])))) obs = np.asarray(list(((IN_Obs_Pred["obs"])))) pred7525 = np.asarray(list(((IN_Obs_Pred["pred7525"])))) predPln = np.asarray(list(((IN_Obs_Pred["predPln"])))) toIterate = [pred7525, predPln] for x in range(2): axis_min = 0 axis_max = 2 * max(obs) #print plot_dim ax = fig.add_subplot(plot_dim, plot_dim, count+1) if x == 0: ax.set_title(r"$\mathbf{75:25\, Simulation}$") else: ax.set_title(r"$\mathbf{Lognormal\, MLE}$") macroecotools.plot_color_by_pt_dens(toIterate[x], obs, radius, loglog=1, plot_obj=plt.subplot(plot_dim,plot_dim,count+1)) # #plt.text(0.1, 0.9,'matplotlib', ha='center', va='center', transform=ax.transAxes) plt.plot([axis_min, axis_max],[axis_min, axis_max], 'k-') plt.xlim(0, axis_max) plt.ylim(0, axis_max) #r2s = ((INh2["R2"])) #r2s = r2s.astype(float) # insert r2 of all data r2_all = macroecotools.obs_pred_rsquare(np.log10(obs), np.log10(toIterate[x])) r2text = r"${}^{{2}}_{{m}} = {:.{p}f} $".format('r',r2_all , p=2) plt.text(0.18, 0.93, r2text, fontsize=10, horizontalalignment='center', verticalalignment='center',transform = ax.transAxes) plt.tick_params(axis='both', which='major', labelsize=7) plt.subplots_adjust(wspace=0.5, hspace=0.3) axins = inset_axes(ax, width="30%", height="30%", loc=4) #hist_r2 = np.histogram(r2s, range=(0, 1)) #xvals = hist_r2[1] + (hist_r2[1][1] - hist_r2[1][0]) #xvals = xvals[0:len(xvals)-1] #yvals = hist_r2[0] #plt.plot(xvals, yvals, 'k-', linewidth=2) #plt.axis([0, 1, 0, 1.1 * max(yvals)]) ax.set(adjustable='box-forced', aspect='equal') #plt.setp(axins, xticks=[], yticks=[]) count += 1 fig.text(0.50, 0.04, r'$Predicted \; rank-abundance$', ha='center', va='center') fig.text(0.05, 0.5, r'$Observed \; rank-abundance$', ha='center', va='center', rotation='vertical') fig_name = str(mydir + 'figures/' + figname + '.png') plt.savefig(fig_name, dpi=600)#, bbox_inches = 'tight')#, pad_inches=0) plt.close()
def figS2( n=35289, figname="FigS2", data_dir=mydir, stratify=True, radius=2, remove=1, zipfType="mle", RGF=False, saveAs="eps", lognormType="pln", ): # TAKEN FROM THE mete_sads.py script used for White et al. (2012) # Used for Figure 3 Locey and White (2013) """Multiple obs-predicted plotter""" fig = plt.figure() count = 0 plot_dim = 2 fig.subplots_adjust(bottom=0.30) methods = ["geom", "lognorm", "mete", "zipf"] for i, method in enumerate(methods): if method == "zipf": obs_pred_data = importData.import_obs_pred_data( data_dir + "data/ObsPred/Remove_1s/Stratified/" + method + "_" + zipfType + "_obs_pred_1_stratify.txt" ) INh2 = importData.import_NSR2_data( data_dir + "data/NSR2/Remove_1s/Stratified/" + method + "_mle" + "_NSR2_1_stratify.txt" ) elif method == "lognorm": obs_pred_data = importData.import_obs_pred_data( data_dir + "data/ObsPred/Remove_1s/Stratified/" + method + "_" + lognormType + "_obs_pred_1_stratify.txt" ) INh2 = importData.import_NSR2_data( data_dir + "data/NSR2/Remove_1s/Stratified/" + method + "_" + lognormType + "_NSR2_1_stratify.txt" ) else: obs_pred_data = importData.import_obs_pred_data( data_dir + "data/ObsPred/Remove_1s/Stratified/" + method + "_obs_pred_1_stratify.txt" ) INh2 = importData.import_NSR2_data( data_dir + "data/NSR2/Remove_1s/Stratified/" + method + "_NSR2_1_stratify.txt" ) obs = np.asarray(list(((obs_pred_data["obs"])))) pred = np.asarray(list(((obs_pred_data["pred"])))) site = np.asarray(list(((obs_pred_data["site"])))) obs2 = [] pred2 = [] site2 = [] obs_all = np.asarray(obs) pred_all = np.asarray(pred) site_all = np.asarray(site) if n == "all" or len(obs) <= n: obs2 = list(obs) pred2 = list(pred) site2 = list(site) else: if len(obs) > n: inds = np.random.choice(range(len(site)), size=n, replace=False) for ind in inds: obs2.append(obs[ind]) pred2.append(pred[ind]) site2.append(site[ind]) obs = np.asarray(obs2) pred = np.asarray(pred2) site = np.asarray(site2) if method == "zipf": axis_min = 0 axis_max = 2 * max(pred) else: axis_min = 0 axis_max = 2 * max(obs) ax = fig.add_subplot(plot_dim, plot_dim, count + 1) if method == "zipf": NSR2_BS = importData.import_NSR2_data( data_dir + "data/NSR2/Stratified_Test/Remove_1s/" + method + "_mle_NSR2_1_stratify.txt" ) elif method == "lognorm": NSR2_BS = importData.import_NSR2_data( data_dir + "data/NSR2/Stratified_Test/Remove_1s/" + method + "_pln_NSR2_1_stratify.txt" ) else: NSR2_BS = importData.import_NSR2_data( data_dir + "data/NSR2/Stratified_Test/Remove_1s/" + method + "_NSR2_1_stratify.txt" ) if method == "geom": ax.set_title("Broken-stick") elif method == "lognorm": ax.set_title("Lognormal") elif method == "mete": ax.set_title("Log-series") elif method == "zipf": ax.set_title("Zipf") macroecotools.plot_color_by_pt_dens( pred, obs, radius, loglog=1, plot_obj=plt.subplot(plot_dim, plot_dim, count + 1) ) plt.plot([axis_min, axis_max], [axis_min, axis_max], "k-") if method == "zipf": plt.xlim(0, axis_max) plt.ylim(0, axis_max) else: plt.xlim(0, axis_max) plt.ylim(0, axis_max) r2s = INh2["R2"] r2s = r2s.astype(float) # insert r2 of all data r2_all = np.mean(((NSR2_BS["R2"]))) print method r2text = r"${}^{{2}}_{{m}} = {:.{p}f} $".format("r", r2_all, p=2) if method == "geom": plt.text( 0.25, 0.90, r2text, fontsize=14, horizontalalignment="center", verticalalignment="center", transform=ax.transAxes, ) else: plt.text( 0.22, 0.90, r2text, fontsize=14, horizontalalignment="center", verticalalignment="center", transform=ax.transAxes, ) plt.tick_params(axis="both", which="major", labelsize=10) plt.subplots_adjust(wspace=0.0000000001, hspace=0.5) axins = inset_axes(ax, width="30%", height="30%", loc=4) hist_r2 = np.histogram(r2s, range=(0, 1)) xvals = hist_r2[1] + (hist_r2[1][1] - hist_r2[1][0]) xvals = xvals[0 : len(xvals) - 1] yvals = hist_r2[0] plt.plot(xvals, yvals, "k-", linewidth=2) plt.axis([0, 1, 0, 1.1 * max(yvals)]) ax.set(adjustable="box-forced", aspect="equal") plt.setp(axins, xticks=[], yticks=[]) count += 1 plt.tight_layout(pad=1.5, w_pad=0.8, h_pad=0.8) fig.text(0.50, 0.02, "Predicted abundance", ha="center", va="center", fontsize=16) fig.text(0.08, 0.5, "Observed abundance", ha="center", va="center", rotation="vertical", fontsize=16) fig_name = str(mydir + "figures/" + figname + "_RGB." + saveAs) plt.savefig(fig_name, dpi=600, format=saveAs) # , bbox_inches = 'tight')#, pad_inches=0) plt.close()
def getLogNormSim(testNumber = 100, sample_size = 1000): '''This function randomly samples a number of sites set by testNumber from 'Stratified' dataset using the 75-25 simulation for the lognormal. Because some SADs take a very long time to generate (> 2 minutes), the function runs on a timer with a timeout function that moves to the next randomly chosed SAD (sampled without replacement), stopping when after testNumber of successful runs. ''' IN_NSR2 = importData.import_NSR2_data(mydir + 'data/NSR2/Stratified/lognorm_pln_NSR2_stratify.txt') IN_Obs_Pred = importData.import_obs_pred_data(mydir + 'data/ObsPred/Stratified/lognorm_pln_obs_pred_stratify.txt') OUT = open(mydir + 'data/ObsPred/Stratified/lognorm_75_25_obs_pred_stratify_test.txt', 'w+') siteNSR2 = np.asarray(list(((IN_NSR2["site"])))) N = np.asarray(list(((IN_NSR2["N"])))) S = np.asarray(list(((IN_NSR2["S"])))) siteObsPred = np.asarray(list(((IN_Obs_Pred["site"])))) obs = np.asarray(list(((IN_Obs_Pred["obs"])))) pred = np.asarray(list(((IN_Obs_Pred["pred"])))) uniqueSites = np.unique(siteNSR2) #randomSites = np.random.choice(uniqueSites, size=testNumber, replace=False) obs7525 = [] pred7525 = [] sites7525 = [] pred_pln = [] sites_pln =[] signal.signal(signal.SIGALRM, models.timeout_handler) count = testNumber while count > 0: #randomSite = np.random.choice(uniqueSites, size=1, replace=False) index = random.randint(0,len(uniqueSites)-1) randomSite = uniqueSites[index] uniqueSites = np.delete(uniqueSites,index) for i, site in enumerate(siteNSR2): if site == randomSite: N_i = N[i] S_i = S[i] a = datetime.datetime.now() #siteNSR2_i = siteNSR2[i] SAD = simLogNormFile.simLogNorm(N_i, S_i, sample_size).SimLogNormInt() if len(SAD) != S_i: continue print 'countdown: ' + str(count) print site, S_i, len(SAD) count -= 1 for j in SAD: pred7525.append(j) sites7525.append(site) zipSitesPred7527 = zip(sites7525, pred7525) #print zipSitesPred7527 indexes = np.unique(sites7525, return_index=True)[1] uniqueSites7525 = [sites7525[index] for index in sorted(indexes)] zipOsPredPln = zip(siteObsPred, obs, pred) zipOsPredPlnFilter = [x for x in zipOsPredPln if x[0] in uniqueSites7525] #print zipOsPredPlnFilter #print len(zipOsPredPlnFilter) #zipSitesPred7527Sort = sorted(L, key=itemgetter(0)) countTest = 0 for spot, uniqueSite7525 in enumerate(uniqueSites7525): for r, s in enumerate(siteObsPred): if int(s) == uniqueSite7525: print>> OUT, int(zipSitesPred7527[spot][0]),int(obs[r]), int(pred[r]), int(zipSitesPred7527[countTest][1]) countTest += 1 #obs7525.append(obs[r]) #pred_pln.append(pred[r]) #sites_pln.append(s) #print "pred sites obs752527 pred7525 " #print len(pred_pln), len(sites_pln), len(obs7525), len(pred7525) #for x, site_x in enumerate(sites7525): # print>> OUT, int(site_x), int(sites_pln[x]),int(obs7525[x]), int(pred7525[x]), int(pred_pln[x]) OUT.close()