def test_unequal_group_sizes(self): exp = pd.Series(index=self.exp_index, data=['ANOSIM', 'R', 6, 3, -0.363636, 0.878, 999]) np.random.seed(0) obs = anosim(self.dm_unequal, self.grouping_unequal) self.assert_series_equal(obs, exp) np.random.seed(0) obs = anosim(self.dm_unequal, self.grouping_unequal_relabeled) self.assert_series_equal(obs, exp)
def test_no_ties(self): exp = pd.Series(index=self.exp_index, data=['ANOSIM', 'R', 4, 2, 0.625, 0.332, 999], name='ANOSIM results') np.random.seed(0) obs = anosim(self.dm_no_ties, self.grouping_equal) self.assert_series_equal(obs, exp)
def test_ties(self): # Ensure we get the same results if we rerun the method using the same # inputs. Also ensure we get the same results if we run the method # using a grouping vector or a data frame with equivalent groupings. exp = pd.Series(index=self.exp_index, data=['ANOSIM', 'R', 4, 2, 0.25, 0.671, 999]) for _ in range(2): np.random.seed(0) obs = anosim(self.dm_ties, self.grouping_equal) self.assert_series_equal(obs, exp) for _ in range(2): np.random.seed(0) obs = anosim(self.dm_ties, self.df, column='Group') self.assert_series_equal(obs, exp)
def beta_diversity_pcoa(biom_fp, method="braycurtis", permutations=99, dim=2, col='method', colormap={'expected': 'red', 'rdp': 'seagreen', 'sortmerna': 'gray', 'uclust': 'blue', 'blast': 'purple'}): '''From biom table, compute Bray-Curtis distance; generate PCoA plot; and calculate adonis differences. biom_fp: path Path to biom.Table containing sample metadata. method: str skbio.Diversity method to use for ordination. permutations: int Number of permutations to perform for anosim tests. dim: int Number of dimensions to plot. Currently supports only 2-3 dimensions. col: str metadata name to use for distinguishing groups for anosim tests and pcoa plots. colormap: dict map groups names (must be group names in col) to colors used for plots. ''' dm, s_md = make_distance_matrix(biom_fp, method=method) # pcoa pc = pcoa(dm) # anosim tests results = anosim(dm, s_md, column=col, permutations=permutations) print('R = ', results['test statistic'], '; P = ', results['p-value']) if dim == 2: # bokeh pcoa plots pc123 = pc.samples.ix[:, ["PC1", "PC2", "PC3"]] smd_merge = s_md.merge(pc123, left_index=True, right_index=True) smd_merge['Color'] = [colormap[x] for x in smd_merge['method']] title = smd_merge['reference'][0] labels = ['PC {0} ({1:.2f})'.format(d + 1, pc.proportion_explained[d]) for d in range(0, 2)] circle_plot_from_dataframe(smd_merge, "PC1", "PC2", title, columns=["method", "sample_id", "params"], color="Color", labels=labels) else: # skbio pcoa plots pcoa_plot_skbio(pc, s_md, col='method') return s_md, results, pc, dm
def main(args): data_df = pd.read_table(args.data, index_col=0) data_df_nonnull = data_df[data_df['taxon'].notnull()] val_cols = data_df_nonnull.columns val_cols.remove('taxon') dm = DistanceMatrix(squareform(pdist(data_df_nonnull[val_cols], metric='euclidean'))) a = anosim(dm, data_df_nonnull['taxon'], permutations=0) a_df = pd.DataFrame(a).T a_df.index = [args.data_name] a_df.to_csv(sys.stdout, header=None)
eigen3 = eigen['PC3'].values print(eigen) print(eigen1) df_fins = pd.read_csv("samples_id_all.tsv", sep="\t", header=0, index_col=0) print(df_fins) df_fins.reset_index() df_fins = df_fins[['sal']] print(df_fins) #df_fin.to_csv("test6.tsv", sep="\t", header=1) df_fins['Observed OTUs'] = adiv_obs_otuss df_fins['Faith PD'] = adiv_faith_pds anosims = anosim(wu_dms, df_fins, column='sal', permutations=999) print(anosims['test statistic']) print(anosims['p-value']) print(df_fins.corr(method="spearman")) print(adiv_obs_otuss) figs = plt.figure() #plt.close('all') #plt.subplot(1,3,1) figs = wu_pcs.plot(df_fins, 'sal', axis_labels=('PC1' + str(eigen1) + '%', 'PC2' + str(eigen2) + '%', 'PC3' + str(eigen3) + '%'),
def beta_diversity(TaXon_table_xlsx, width, heigth, cmap, meta_data_to_test, taxonomic_level, path_to_outdirs, template, font_size, diss_metric): import pandas as pd import numpy as np from skbio.diversity import beta_diversity from skbio.stats.distance import anosim import plotly.express as px from pathlib import Path import PySimpleGUI as sg import webbrowser TaXon_table_xlsx = Path(TaXon_table_xlsx) Meta_data_table_xlsx = Path( str(path_to_outdirs) + "/" + "Meta_data_table" + "/" + TaXon_table_xlsx.stem + "_metadata.xlsx") TaXon_table_df = pd.read_excel(TaXon_table_xlsx, header=0).fillna("unidentified") TaXon_table_samples = TaXon_table_df.columns.tolist()[10:] Meta_data_table_df = pd.read_excel(Meta_data_table_xlsx, header=0).fillna("nan") Meta_data_table_samples = Meta_data_table_df['Samples'].tolist() metadata_list = Meta_data_table_df[meta_data_to_test].values.tolist() metadata_loc = Meta_data_table_df.columns.tolist().index(meta_data_to_test) ## drop samples with metadata called nan (= empty) drop_samples = [ i[0] for i in Meta_data_table_df.values.tolist() if i[metadata_loc] == "nan" ] if drop_samples != []: ## filter the TaXon table TaXon_table_df = TaXon_table_df.drop(drop_samples, axis=1) TaXon_table_samples = TaXon_table_df.columns.tolist()[10:] ## also remove empty OTUs row_filter_list = [] for row in TaXon_table_df.values.tolist(): reads = set(row[10:]) if reads != {0}: row_filter_list.append(row) columns = TaXon_table_df.columns.tolist() TaXon_table_df = pd.DataFrame(row_filter_list, columns=columns) Meta_data_table_df = pd.DataFrame( [ i for i in Meta_data_table_df.values.tolist() if i[0] not in drop_samples ], columns=Meta_data_table_df.columns.tolist()) Meta_data_table_samples = Meta_data_table_df['Samples'].tolist() metadata_list = Meta_data_table_df[meta_data_to_test].values.tolist() ## create a y axis title text taxon_title = taxonomic_level ## adjust taxonomic level if neccessary if taxonomic_level in ["ASVs", "ESVs", "OTUs", "zOTUs"]: taxon_title = taxonomic_level taxonomic_level = "ID" # check if the meta data differs if len(set(Meta_data_table_df[meta_data_to_test])) == len( Meta_data_table_df['Samples'].tolist()): sg.Popup( "The meta data is unique for all samples. Please adjust the meta data table!", title=("Error")) raise RuntimeError # check if the meta data differs if len(set(Meta_data_table_df[meta_data_to_test])) == 1: sg.Popup( "The meta data is similar for all samples. Please adjust the meta data table!", title=("Error")) raise RuntimeError if sorted(TaXon_table_samples) == sorted(Meta_data_table_samples): ## collect samples for plot samples = Meta_data_table_samples ## extract the relevant data TaXon_table_df = TaXon_table_df[[taxonomic_level] + samples] ## define an aggregation function to combine multiple hit of one taxonimic level aggregation_functions = {} ## define samples functions for sample in samples: ## 'sum' will calculate the sum of p/a data aggregation_functions[sample] = 'sum' ## define taxon level function aggregation_functions[taxonomic_level] = 'first' ## create condensed dataframe df_new = TaXon_table_df.groupby( TaXon_table_df[taxonomic_level]).aggregate(aggregation_functions) if 'unidentified' in df_new.index: df_new = df_new.drop('unidentified') ## collect reads data = df_new[samples].transpose().values.tolist() ## calculate dissimilarity distances dissimilarity_dm = beta_diversity(diss_metric, data, samples) anosim_results = anosim(dissimilarity_dm, metadata_list, permutations=999) anosim_r = round(anosim_results['test statistic'], 5) anosim_p = anosim_results['p-value'] textbox = "Anosim (" + meta_data_to_test + ", " + taxon_title + ")<br>" + "R = " + str( anosim_r) + "<br>" + "p = " + str(anosim_p) matrix = dissimilarity_dm.data matrix_df = pd.DataFrame(matrix) matrix_df.columns = samples matrix_df.index = samples # create plot color_label = diss_metric + " distance" fig = px.imshow(matrix, x=samples, y=samples, color_continuous_scale=cmap, labels=dict(color=color_label)) fig.update_layout(height=int(heigth), width=int(width), template=template, showlegend=True, title=textbox, font_size=font_size, title_font_size=font_size) # finish script output_pdf = Path( str(path_to_outdirs) + "/" + "Beta_diversity" + "/" + TaXon_table_xlsx.stem + "_" + meta_data_to_test + "_" + taxon_title + "_" + diss_metric + ".pdf") output_html = Path( str(path_to_outdirs) + "/" + "Beta_diversity" + "/" + TaXon_table_xlsx.stem + "_" + meta_data_to_test + "_" + taxon_title + "_" + diss_metric + ".html") output_xlsx = Path( str(path_to_outdirs) + "/" + "Beta_diversity" + "/" + TaXon_table_xlsx.stem + "_" + meta_data_to_test + "_" + taxon_title + "_" + diss_metric + ".xlsx") fig.write_image(str(output_pdf)) fig.write_html(str(output_html)) matrix_df.to_excel(output_xlsx) ## ask to show plot answer = sg.PopupYesNo('Show plot?', keep_on_top=True) if answer == "Yes": webbrowser.open('file://' + str(output_html)) ## write to log file sg.Popup("Beta diversity estimate are found in", path_to_outdirs, "/Beta_diversity/", title="Finished", keep_on_top=True) from taxontabletools.create_log import ttt_log ttt_log("beta diversity", "analysis", TaXon_table_xlsx.name, output_pdf.name, meta_data_to_test, path_to_outdirs) else: sg.PopupError( "Error: The samples between the taxon table and meta table do not match!", keep_on_top=True)
eigen3 = eigen['PC3'].values print(eigen) print(eigen1) df_fin = pd.read_csv("samples_id_all.tsv", sep="\t", header=0, index_col=0) print(df_fin) df_fin.reset_index() df_fin = df_fin[['true_lat']] print(df_fin) #df_fin.to_csv("test6.tsv", sep="\t", header=1) df_fin['Observed OTUs'] = adiv_obs_otus df_fin['Faith PD'] = adiv_faith_pd anosim_lat = anosim(wu_dm, df_fin, column='true_lat', permutations=999) print(anosim_lat['test statistic']) print(anosim_lat['p-value']) print(df_fin.corr(method="spearman")) print(adiv_obs_otus) fig = plt.figure() #plt.close('all') #plt.subplot(1,3,1) fig = wu_pc.plot(df_fin, 'true_lat', axis_labels=('PC1' + str(eigen1) + '%', 'PC2' + str(eigen2) + '%', 'PC3' + str(eigen3) + '%'),
its - 1][k].cluster_label field_plot = np.ma.masked_array(field_plot, field_plot == -10000) #%% Determine from which clusters the data is part of: nomask = np.where(~field_plot.mask) field_plot = field_plot[nomask] args = nwf.find_nearest_args(vLons[nomask], vLats[nomask], Flats, Flons) Flabels = field_plot[args] args = nwf.find_nearest_args(vLons[nomask], vLats[nomask], FlatsDino, FlonsDino) Dinolabels = field_plot[args] #%% if (len(np.unique(Dinolabels)) > 1): Dano = anosim(DistanceMatrix(Dinotaxdist), Dinolabels.astype(str), permutations=perm) DinoP[its] = list(Dano)[5] DinoR[its] = list(Dano)[4] if (len(np.unique(Flabels)) > 1): Fano = anosim(DistanceMatrix(Ftaxdist), Flabels.astype(str), permutations=perm) FP[its] = list(Fano)[5] FR[its] = list(Fano)[4] #%% Save file with ANOSIM results np.savez('ANOSIM_hierarchicalclus%s_sp%d_perm%d_its%d_mlat%d.npz' % (season, sp, perm, iterations, maxlat), ForamP=FP, DinoP=DinoP,
sample_id = each_sample_split[0] sample_group = each_sample_split[1] sample_id_list.append(sample_id) sample_group_list.append(sample_group) # read in data as dataframe df = pd.read_csv(infile_data, sep='\t') # get list of list from dataframe lol_data_in = [] for col_id in sample_id_list: column_num_list = (df[col_id].values).tolist() lol_data_in.append(column_num_list) # calculate distance matrix dist_arrary = pairwise_distances(lol_data_in, lol_data_in, metric=distance_metric) # add sample id to distance matrix dist_matrix = DistanceMatrix(dist_arrary, sample_id_list) # perform anosim test anosim_test = anosim(dist_matrix, sample_group_list, permutations=999) print(anosim_test) print() # perform permanova test permanova_test = permanova(dist_matrix, sample_group_list, permutations=999) print(permanova_test)
eigen3 = eigen['PC3'].values print(eigen) print(eigen1) df_fin = pd.read_csv("samples_id_all.tsv", sep="\t", header=0, index_col=0) print(df_fin) df_fin.reset_index() df_fin = df_fin[['region']] print(df_fin) #df_fin.to_csv("test6.tsv", sep="\t", header=1) df_fin['Observed OTUs'] = adiv_obs_otus df_fin['Faith PD'] = adiv_faith_pd anosim = anosim(wu_dm, df_fin, column='region', permutations=999) print(anosim['test statistic']) print(anosim['p-value']) print(df_fin.corr(method="spearman")) #print(adiv_obs_otus) fig = plt.figure() #plt.close('all') #plt.subplot(1,3,1) fig = wu_pc.plot(df_fin, 'region', axis_labels=('PC1' + str(eigen1) + '%', 'PC2' + str(eigen2) + '%', 'PC3' + str(eigen3) + '%'),
def test_no_permutations(self): exp = pd.Series(index=self.exp_index, data=['ANOSIM', 'R', 4, 2, 0.625, np.nan, 0], name='ANOSIM results') obs = anosim(self.dm_no_ties, self.grouping_equal, permutations=0) self.assert_series_equal(obs, exp)
eigen3 = eigen['PC3'].values print(eigen) print(eigen1) df_fint = pd.read_csv("samples_id_all.tsv", sep="\t", header=0, index_col=0) print(df_fint) df_fint.reset_index() df_fint = df_fint[['temp']] print(df_fint) #df_fin.to_csv("test6.tsv", sep="\t", header=1) df_fint['Observed OTUs'] = adiv_obs_otust df_fint['Faith PD'] = adiv_faith_pdt anosimt = anosim(wu_dmt, df_fint, column='temp', permutations=999) print(anosimt['test statistic']) print(anosimt['p-value']) print(df_fint.corr(method="spearman")) print(adiv_obs_otust) figt = plt.figure() #plt.close('all') #plt.subplot(1,3,1) figt = wu_pct.plot(df_fint, 'temp', axis_labels=('PC1' + str(eigen1) + '%', 'PC2' + str(eigen2) + '%', 'PC3' + str(eigen3) + '%'),
for a in range(len(rows[0])): if a > 0: this_sample = [] for b in range(len(rows)): if b > 0: this_sample.append(float(rows[b][a])) samples.append(this_sample) """ only_samples = ['LR', 'SR'] new_samples, new_names = [], [] for a in range(len(sample_names)): for b in range(len(only_samples)): if sample_names[a] == only_samples[b]: new_samples.append(samples[a]) new_names.append(sample_names[a]) samples = new_samples sample_names = new_names print(len(samples), len(sample_names)) """ sam_dm = dm.from_iterable(samples, metric=braycurtis) pdisp = permdisp(sam_dm, sample_names, column=None, test='median', permutations=999) print(pdisp) asim = anosim(sam_dm, sample_names, column=None, permutations=999) print(asim) perm = permanova(sam_dm, sample_names, column=None, permutations=999) print(perm)
eigen3 = eigen['PC3'].values print(eigen) print(eigen1) df_find = pd.read_csv("samples_id_all.tsv", sep="\t", header=0, index_col=0) print(df_find) df_find.reset_index() df_find = df_find[['depth_group_50']] print(df_find) #df_fin.to_csv("test6.tsv", sep="\t", header=1) df_find['Observed OTUs'] = adiv_obs_otusd df_find['Faith PD'] = adiv_faith_pdd anosimd = anosim(wu_dmd, df_find, column='depth_group_50', permutations=999) print(anosimd['test statistic']) print(anosimd['p-value']) print(df_find.corr(method="spearman")) print(adiv_obs_otusd) figd = plt.figure() #plt.close('all') #plt.subplot(1,3,1) figd = wu_pcd.plot(df_find, 'depth_group_50', axis_labels=('PC1' + str(eigen1) + '%', 'PC2' + str(eigen2) + '%', 'PC3' + str(eigen3) + '%'),
def PCoA_analysis(TaXon_table_xlsx, meta_data_to_test, taxonomic_level, width, height, pcoa_s, path_to_outdirs, template, font_size, color_discrete_sequence, pcoa_dissimilarity): import pandas as pd import numpy as np from skbio.diversity import beta_diversity from skbio.stats.ordination import pcoa from skbio.stats.distance import anosim import plotly.graph_objects as go from plotly.subplots import make_subplots import plotly.express as px from pathlib import Path import PySimpleGUI as sg import os, webbrowser from itertools import combinations TaXon_table_xlsx = Path(TaXon_table_xlsx) Meta_data_table_xlsx = Path( str(path_to_outdirs) + "/" + "Meta_data_table" + "/" + TaXon_table_xlsx.stem + "_metadata.xlsx") TaXon_table_df = pd.read_excel(TaXon_table_xlsx, header=0).fillna("unidentified") TaXon_table_samples = TaXon_table_df.columns.tolist()[10:] Meta_data_table_df = pd.read_excel(Meta_data_table_xlsx, header=0).fillna("nan") Meta_data_table_samples = Meta_data_table_df['Samples'].tolist() metadata_list = Meta_data_table_df[meta_data_to_test].values.tolist() metadata_loc = Meta_data_table_df.columns.tolist().index(meta_data_to_test) ## drop samples with metadata called nan (= empty) drop_samples = [ i[0] for i in Meta_data_table_df.values.tolist() if i[metadata_loc] == "nan" ] if drop_samples != []: ## filter the TaXon table TaXon_table_df = TaXon_table_df.drop(drop_samples, axis=1) TaXon_table_samples = TaXon_table_df.columns.tolist()[10:] ## also remove empty OTUs row_filter_list = [] for row in TaXon_table_df.values.tolist(): reads = set(row[10:]) if reads != {0}: row_filter_list.append(row) columns = TaXon_table_df.columns.tolist() TaXon_table_df = pd.DataFrame(row_filter_list, columns=columns) Meta_data_table_df = pd.DataFrame( [ i for i in Meta_data_table_df.values.tolist() if i[0] not in drop_samples ], columns=Meta_data_table_df.columns.tolist()) Meta_data_table_samples = Meta_data_table_df['Samples'].tolist() ## create a y axis title text taxon_title = taxonomic_level.lower() ## adjust taxonomic level if neccessary if taxonomic_level in ["ASVs", "ESVs", "OTUs", "zOTUs"]: taxon_title = taxonomic_level taxonomic_level = "ID" # check if the meta data differs if len(set(Meta_data_table_df[meta_data_to_test])) == len( Meta_data_table_df['Samples'].tolist()): sg.Popup( "The meta data is unique for all samples. Please adjust the meta data table!", title=("Error")) raise RuntimeError # check if the meta data differs if len(set(Meta_data_table_df[meta_data_to_test])) == 1: sg.Popup( "The meta data is similar for all samples. Please adjust the meta data table!", title=("Error")) raise RuntimeError if sorted(TaXon_table_samples) == sorted(Meta_data_table_samples): samples = Meta_data_table_samples ## extract the relevant data TaXon_table_df = TaXon_table_df[[taxonomic_level] + samples] ## define an aggregation function to combine multiple hit of one taxonimic level aggregation_functions = {} ## define samples functions for sample in samples: ## 'sum' will calculate the sum of p/a data aggregation_functions[sample] = 'sum' ## define taxon level function aggregation_functions[taxonomic_level] = 'first' ## create condensed dataframe TaXon_table_df = TaXon_table_df.groupby( TaXon_table_df[taxonomic_level]).aggregate(aggregation_functions) if 'unidentified' in TaXon_table_df.index: TaXon_table_df = TaXon_table_df.drop('unidentified') data = TaXon_table_df[samples].transpose().values.tolist() jc_dm = beta_diversity(pcoa_dissimilarity, data, samples) ordination_result = pcoa(jc_dm) metadata_list = Meta_data_table_df[meta_data_to_test].values.tolist() anosim_results = anosim(jc_dm, metadata_list, permutations=999) anosim_r = round(anosim_results['test statistic'], 5) anosim_p = anosim_results['p-value'] textbox = meta_data_to_test + ", " + taxon_title + "<br>Anosim " + "R = " + str( anosim_r) + " " + "p = " + str(anosim_p) ####################################################################################### # create window to ask for PCoA axis to test def slices(list, slice): for i in range(0, len(list), slice): yield list[i:i + slice] # collect the PCoA proportion explained values proportion_explained_list = [] for i, pcoa_axis in enumerate(ordination_result.proportion_explained): if round(pcoa_axis * 100, 2) >= 1: proportion_explained_list.append("PC" + str(i + 1) + " (" + str(round(pcoa_axis * 100, 2)) + " %)") pcoa_axis_checkboxes = list( slices([ sg.Checkbox(name, key=name, size=(15, 1)) for name in proportion_explained_list ], 10)) pcoa_window_layout = [ [sg.Text('Check up to four axes to be displayed')], [sg.Frame(layout=pcoa_axis_checkboxes, title='')], [sg.Text('Only axes >= 1 % explained variance are shown')], [sg.CB("Connect categories", default=True, key="draw_mesh")], [sg.Text('')], [sg.Button('Plot', key='Plot')], [sg.Button('Back')], ] pcoa_window = sg.Window('PCoA axis', pcoa_window_layout, keep_on_top=True) while True: event, values = pcoa_window.read() draw_mesh = values["draw_mesh"] if event is None or event == 'Back': break if event == 'Plot': ## create a subfolder for better sorting and overview dirName = Path( str(path_to_outdirs) + "/" + "PCoA_plots" + "/" + TaXon_table_xlsx.stem + "/") if not os.path.exists(dirName): os.mkdir(dirName) # collect the pcoa axis values axis_to_plot = [ key for key, value in values.items() if value == True and "PC" in key ] # pass on only if two pcoa axes were checked if len(axis_to_plot) == 2: cat1 = axis_to_plot[1].split()[0] cat2 = axis_to_plot[0].split()[0] df_pcoa = ordination_result.samples[[cat1, cat2]] df_pcoa.insert( 2, "Metadata", Meta_data_table_df[meta_data_to_test].values.tolist(), True) df_pcoa.insert( 3, "Samples", Meta_data_table_df["Samples"].values.tolist(), True) if draw_mesh == True: combinations_list = [] for metadata in df_pcoa["Metadata"]: ## collect all entries for the respective metadata arr = df_pcoa.loc[df_pcoa['Metadata'] == metadata][ [cat1, cat2, "Metadata", "Samples"]].to_numpy() ## create a df for all possible combinations using itertools combinations for entry in list(combinations(arr, 2)): combinations_list.append(list(entry[0])) combinations_list.append(list(entry[1])) ## create a dataframe to draw the plot from df = pd.DataFrame(combinations_list) df.columns = [cat1, cat2, "Metadata", "Samples"] fig = px.scatter( df, x=cat1, y=cat2, color="Metadata", text="Samples", title=textbox, color_discrete_sequence=color_discrete_sequence) fig.update_traces(marker_size=int(pcoa_s), mode="markers+lines") fig.update_layout(height=int(height), width=int(width), template=template, showlegend=True, font_size=font_size, title_font_size=font_size) fig.update_xaxes(title=axis_to_plot[1]) fig.update_yaxes(title=axis_to_plot[0]) else: fig = px.scatter( df_pcoa, x=cat1, y=cat2, color="Metadata", text="Samples", title=textbox, color_discrete_sequence=color_discrete_sequence) fig.update_traces(marker_size=int(pcoa_s), mode="markers") fig.update_layout(height=int(height), width=int(width), template=template, showlegend=True, font_size=font_size, title_font_size=font_size) fig.update_xaxes(title=axis_to_plot[1]) fig.update_yaxes(title=axis_to_plot[0]) ## define output files output_pdf = Path( str(dirName) + "/" + meta_data_to_test + "_" + taxon_title + ".pdf") output_html = Path( str(dirName) + "/" + meta_data_to_test + "_" + taxon_title + ".html") output_xlsx = Path( str(dirName) + "/" + meta_data_to_test + "_" + taxon_title + ".xlsx") ## write files fig.write_image(str(output_pdf)) fig.write_html(str(output_html)) ordination_result.samples[[cat1, cat2]].to_excel(output_xlsx) ## ask to show file answer = sg.PopupYesNo('Show plot?', keep_on_top=True) if answer == "Yes": webbrowser.open('file://' + str(output_html)) ## print closing text closing_text = "\n" + "PCoA plots are found in: " + str( path_to_outdirs) + "/PCoA_plots/" sg.Popup(closing_text, title="Finished", keep_on_top=True) ## write to log from taxontabletools.create_log import ttt_log ttt_log("pcoa analysis", "analysis", TaXon_table_xlsx.name, output_pdf.name, meta_data_to_test, path_to_outdirs) break elif len(axis_to_plot) == 3: cat1 = axis_to_plot[0].split()[0] cat2 = axis_to_plot[1].split()[0] cat3 = axis_to_plot[2].split()[0] df_pcoa = ordination_result.samples[[cat1, cat2, cat3]] df_pcoa.insert( 3, "Metadata", Meta_data_table_df[meta_data_to_test].values.tolist(), True) df_pcoa.insert( 4, "Samples", Meta_data_table_df["Samples"].values.tolist(), True) ## check if lines are to be drawn between the dots if draw_mesh == True: combinations_list = [] for metadata in df_pcoa["Metadata"]: ## collect all entries for the respective metadata arr = df_pcoa.loc[df_pcoa['Metadata'] == metadata][ [cat1, cat2, cat3, "Metadata", "Samples"]].to_numpy() ## create a df for all possible combinations using itertools combinations for entry in list(combinations(arr, 2)): combinations_list.append(list(entry[0])) combinations_list.append(list(entry[1])) ## create a dataframe to draw the plot from df = pd.DataFrame(combinations_list) df.columns = [cat1, cat2, cat3, "Metadata", "Samples"] ## draw the plot fig = px.scatter_3d( df, x=cat1, y=cat2, z=cat3, color="Metadata", text="Samples", title=textbox, color_discrete_sequence=color_discrete_sequence) fig.update_traces(marker_size=int(pcoa_s), mode="markers+lines", line=dict(width=0.5)) fig.update_layout(height=int(height), width=int(width), template=template, title=textbox, showlegend=True, font_size=font_size, title_font_size=font_size) fig.update_layout( scene=dict(xaxis_title=axis_to_plot[0], yaxis_title=axis_to_plot[1], zaxis_title=axis_to_plot[2])) else: fig = px.scatter_3d( df_pcoa, x=cat1, y=cat2, z=cat3, color="Metadata", text="Samples", color_discrete_sequence=color_discrete_sequence) fig.update_traces(marker_size=int(pcoa_s), mode="markers") fig.update_layout(height=int(height), width=int(width), template=template, showlegend=True, title=textbox, font_size=font_size, title_font_size=font_size) fig.update_layout( scene=dict(xaxis_title=axis_to_plot[0], yaxis_title=axis_to_plot[1], zaxis_title=axis_to_plot[2])) ## define output files output_pdf = Path( str(dirName) + "/" + meta_data_to_test + "_" + taxon_title + "_3d.pdf") output_html = Path( str(dirName) + "/" + meta_data_to_test + "_" + taxon_title + "_3d.html") output_xlsx = Path( str(dirName) + "/" + meta_data_to_test + "_" + taxon_title + "_3d.xlsx") ## write output files fig.write_image(str(output_pdf)) fig.write_html(str(output_html)) ordination_result.samples[[cat1, cat2]].to_excel(output_xlsx) ## ask to show file answer = sg.PopupYesNo('Show plot?', keep_on_top=True) if answer == "Yes": webbrowser.open('file://' + str(output_html)) ## print closing text closing_text = "PCoA plots are found in: " + str( path_to_outdirs) + "/PCoA_plots/" sg.Popup(closing_text, title="Finished", keep_on_top=True) ## write log file from taxontabletools.create_log import ttt_log ttt_log("pcoa analysis", "analysis", TaXon_table_xlsx.name, output_pdf.name, meta_data_to_test, path_to_outdirs) break else: sg.Popup("Please choose not more than 3 PCoA axes", title="Error", keep_on_top=True) if event == 'Plot matrix': if len(proportion_explained_list) >= 4: ## create a subfolder for better sorting and overview dirName = Path( str(path_to_outdirs) + "/" + "PCoA_plots" + "/" + TaXon_table_xlsx.stem + "/") if not os.path.exists(dirName): os.mkdir(dirName) df_pcoa = ordination_result.samples[[ "PC1", "PC2", "PC3", "PC4" ]] df_pcoa.insert( 4, "Metadata", Meta_data_table_df[meta_data_to_test].values.tolist(), True) df_pcoa.insert( 5, "Sample", Meta_data_table_df["Samples"].values.tolist(), True) fig = make_subplots(rows=4, cols=4) ########### 1 ########### fig.add_trace(go.Scatter(), row=1, col=1) fig.update_layout(template=template, font_size=font_size, title_font_size=font_size) text = "PC1 (" + str( round( ordination_result.proportion_explained["PC1"] * 100, 2)) + " %)" fig.add_annotation(text=text, showarrow=False) fig.update_xaxes(showticklabels=False, showgrid=False) fig.update_yaxes(showticklabels=False, showgrid=False) ########### 2 ########### df = df_pcoa[["PC1", "PC2", "Metadata", "Sample"]] for metadata in set(metadata_list): df_metadata = df[df['Metadata'] == metadata] #fig = px.scatter(df_pcoa, x="PC1", y="PC2", , ) fig.add_trace(go.Scatter( x=df_metadata["PC1"].values.tolist(), y=df_metadata["PC2"].values.tolist(), mode='markers', name=metadata, text=df_metadata["Sample"].values.tolist()), row=1, col=2) ########### 3 ########### df = df_pcoa[["PC1", "PC3", "Metadata", "Sample"]] for metadata in set(metadata_list): df_metadata = df[df['Metadata'] == metadata] #fig = px.scatter(df_pcoa, x="PC1", y="PC2", , ) fig.add_trace(go.Scatter( x=df_metadata["PC1"].values.tolist(), y=df_metadata["PC3"].values.tolist(), mode='markers', name=metadata, showlegend=False, text=df_metadata["Sample"].values.tolist()), row=1, col=3) ########### 4 ########### df = df_pcoa[["PC1", "PC4", "Metadata", "Sample"]] for metadata in set(metadata_list): df_metadata = df[df['Metadata'] == metadata] fig.add_trace(go.Scatter( x=df_metadata["PC1"].values.tolist(), y=df_metadata["PC4"].values.tolist(), mode='markers', name=metadata, showlegend=False, text=df_metadata["Sample"].values.tolist()), row=1, col=4) fig.update_traces(marker_size=int(pcoa_s), mode="markers") fig.update_xaxes(showgrid=False, row=1, col=4) fig.update_yaxes(showgrid=False, row=1, col=4) ########### 5 ########### fig.add_trace(go.Scatter(), row=2, col=2) fig.update_layout(template=template, font_size=font_size, title_font_size=font_size) text = "PC2 (" + str( round( ordination_result.proportion_explained["PC2"] * 100, 2)) + " %)" fig.add_annotation(text=text, showarrow=False, row=2, col=2) ########### 6 ########### df = df_pcoa[["PC2", "PC3", "Metadata", "Sample"]] for metadata in set(metadata_list): df_metadata = df[df['Metadata'] == metadata] #fig = px.scatter(df_pcoa, x="PC1", y="PC2", , ) fig.add_trace(go.Scatter( x=df_metadata["PC2"].values.tolist(), y=df_metadata["PC3"].values.tolist(), mode='markers', name=metadata, showlegend=False, text=df_metadata["Sample"].values.tolist()), row=2, col=3) ########### 7 ########### df = df_pcoa[["PC2", "PC4", "Metadata", "Sample"]] for metadata in set(metadata_list): df_metadata = df[df['Metadata'] == metadata] fig.add_trace(go.Scatter( x=df_metadata["PC2"].values.tolist(), y=df_metadata["PC4"].values.tolist(), mode='markers', name=metadata, showlegend=False, text=df_metadata["Sample"].values.tolist()), row=2, col=4) ########### 8 ########### fig.add_trace(go.Scatter(), row=3, col=3) fig.update_layout(template=template, font_size=font_size, title_font_size=font_size) text = "PC3 (" + str( round( ordination_result.proportion_explained["PC3"] * 100, 2)) + " %)" fig.add_annotation(text=text, showarrow=False, row=3, col=3) ########### 9 ########### df = df_pcoa[["PC3", "PC4", "Metadata", "Sample"]] for metadata in set(metadata_list): df_metadata = df[df['Metadata'] == metadata] #fig = px.scatter(df_pcoa, x="PC1", y="PC2", , ) fig.add_trace(go.Scatter( x=df_metadata["PC3"].values.tolist(), y=df_metadata["PC4"].values.tolist(), mode='markers', name=metadata, showlegend=False, text=df_metadata["Sample"].values.tolist()), row=3, col=4) ########### 5 ########### fig.add_trace(go.Scatter(), row=4, col=4) fig.update_layout(template=template, font_size=font_size, title_font_size=font_size) text = "PC4 (" + str( round( ordination_result.proportion_explained["PC4"] * 100, 2)) + " %)" fig.add_annotation(text=text, showarrow=False, row=4, col=4) ###################### fig.update_xaxes(showline=True, mirror=True, linewidth=1, linecolor='black') fig.update_yaxes(showline=True, mirror=True, linewidth=1, linecolor='black') fig.update_traces(marker_size=int(pcoa_s), mode="markers") # finish plot matrix fig.update_layout(height=1000, width=1000, title_text=textbox) ## define output files output_pdf = Path( str(dirName) + "/" + meta_data_to_test + "_" + taxon_title + "_matrix.pdf") output_html = Path( str(dirName) + "/" + meta_data_to_test + "_" + taxon_title + "_matrix.html") ## write output files fig.write_image(str(output_pdf)) fig.write_html(str(output_html)) ## ask to show file answer = sg.PopupYesNo('Show plot?', keep_on_top=True) if answer == "Yes": webbrowser.open('file://' + str(output_html)) ## print closing text closing_text = "\n" + "PCoA plots are found in: " + str( path_to_outdirs) + "/PCoA_plots/" sg.Popup(closing_text, title="Finished", keep_on_top=True) ## write to log file from taxontabletools.create_log import ttt_log ttt_log("pcoa analysis", "analysis", TaXon_table_xlsx.name, output_pdf.name, meta_data_to_test, path_to_outdirs) break else: sg.Popup( "There must be at least 4 PCoA axis available to plot the matrix!" ) pcoa_window.close() else: sg.PopupError( "The sample of both the TaXon table and the metadata table have to match!" )
title="CoMA", text= "ATTENTION: At least 1 of your eigenvalues is negative, potentially leading to problems! You may want to choose another metric for distance calculation or apply data transformation on the distance matrix (e.g. square root) to get rid of this problem." ) eig_dm = pd.DataFrame(pc.eigvals, columns=["Eigenvalue"]) eig_dm["Explained"] = pc.proportion_explained eig_dm["Summed_explanation"] = pc.proportion_explained.cumsum() if metric == "minkowski": eig_dm.to_csv("eigenvalues_" + mname + "_p" + str(p) + ".txt", sep="\t") else: eig_dm.to_csv("eigenvalues_" + mname + ".txt", sep="\t") #Statistics anos = anosim(div, map_DF, column=var, permutations=999) perm = permanova(div, map_DF, column=var, permutations=999) if metric == "minkowski": stat_file = "statistics_" + mname + "_p" + str(p) + "_" + var + ".txt" else: stat_file = "statistics_" + mname + "_" + var + ".txt" with open(stat_file, "w") as st: st.write("ANOSIM\tPermutations: 999\n\n") st.write("R\t" + str(anos["test statistic"]) + "\n") st.write("p-value\t" + str(anos["p-value"]) + "\n\n") st.write("PERMANOVA\tPermutations: 999\n\n") st.write("F\t" + str(perm["test statistic"]) + "\n") st.write("p-value\t" + str(perm["p-value"]) + "\n\n")