def db( meta: Metadata, folder: str, rank: str = "genus", threads: int = 1 ) -> JSONDirectory: """Create a model database from a set of SBML files.""" meta = meta.to_dataframe() meta.columns = meta.columns.str.lower() if not REQ_FIELDS.isin(meta.columns).all(): raise ValueError( "Metadata File needs to have the following " "columns %s." % ", ".join(REQ_FIELDS) ) meta["id"] = meta.index files = os.listdir(folder) meta["file"] = meta.id + ".xml" bad = meta.file.apply(lambda x: x not in files) if any(bad): raise ValueError( "The following models are in the Metadata but not " "in the folder: %s" % meta.file[bad] ) meta = meta.groupby(rank).apply(reduce_group).reset_index(drop=True) meta.index = meta[rank] json_dir = JSONDirectory() args = [ (tid, row, str(json_dir.json_files.path_maker(model_id=tid)), folder) for tid, row in meta.iterrows() ] workflow(_summarize_models, args, threads) meta["file"] = meta.index + ".json" meta["id"] = meta.index meta["summary_rank"] = rank meta.to_csv(json_dir.manifest.path_maker(), index=False) return json_dir
def denoise_stats( output_dir: str, stats: qiime2. Metadata, #stats type is not a metadata but this is the transformer used by DADA2 plugin to make DADA2Stats into pd.dataframe plot_type: str = 'line', labels: str = None, style: str = 'whitegrid', context: str = 'talk') -> None: if not labels: stats = load_df(stats) else: stats = load_df_labels(stats, labels) stats = pd.concat(stats) numeric = ['denoised', 'filtered', 'input', 'non-chimeric'] stats[numeric] = stats[numeric].apply(pd.to_numeric) #makes into a df stats = stats.groupby('id').sum() stats = stats.drop(columns=[ 'percentage of input passed filter', 'percentage of input non-chimeric' ]) df = pd.melt(stats.reset_index(), id_vars='id', var_name='step', value_name='read_number') input_read_number = df['read_number'].max() df['% of Reads Remaining'] = df['read_number'] / input_read_number * 100 step_order = {'input': 0, 'filtered': 1, 'denoised': 2, 'non-chimeric': 3} df['order'] = df['step'].apply(lambda x: step_order[x]) df = df.reset_index() df['Run Number'] = 'Run ' + df['id'] hue_order = df.query('step == "non-chimeric"').sort_values( '% of Reads Remaining', ascending=False)['id'] sns.set_style('whitegrid') sns.set_context("talk") line_graph = sns.lineplot(data=df, y='% of Reads Remaining', x='order', hue='Run Number') plt.ylim(0, 100) plt.xlim(0, 4) plt.xticks([x / 2 for x in range(0, 9)], ['Input', '', 'Filtered', '', 'Denoised', "", 'Non-chimeric']) plt.xlabel('Processing Steps') plt.legend(loc='center left', bbox_to_anchor=(1.25, 0.5), ncol=1) # plt.title('allow to give any title or default one') line_graph.figure.savefig(os.path.join(output_dir, 'line_graph.png'), bbox_inches='tight') line_graph.figure.savefig(os.path.join(output_dir, 'line_graph.pdf'), bbox_inches='tight') plt.gcf().clear() #maybe the bargraph r = range(len(stats)) vars_to_plot = df['id'].values print(df.shape) colors = ['darkorange', 'orange', 'sandybrown', 'navajowhite'] Step = ['Input', 'Filtered', 'Denoised', 'Non-chimeric'] plt.bar(r, df[df['step'] == 'input']['read_number'], color=colors[0], edgecolor='white', width=1) plt.bar(r, df[df['step'] == 'filtered']['read_number'], color=colors[1], edgecolor='white', width=1) plt.bar(r, df[df['step'] == 'denoised']['read_number'], color=colors[2], edgecolor='white', width=1) #plt.bar(r, df[df['step']=='merged']['read_number'], color = colors[3], edgecolor = 'white', width = 1) plt.bar(r, df[df['step'] == 'non-chimeric']['read_number'], color=colors[3], edgecolor='white', width=1) plt.xticks(r, vars_to_plot, fontweight='bold') plt.xlabel('Method') plt.ylabel('Sequencing Depth') plt.legend(Step, bbox_to_anchor=(1.05, 1), loc=2) plt.savefig(os.path.join(output_dir, 'bar_plot.png'), bbox_inches='tight') plt.savefig(os.path.join(output_dir, 'bar_plot.pdf'), bbox_inches='tight') plt.gcf().clear() index = os.path.join(TEMPLATES, 'denoise_assets', 'index.html') q2templates.render(index, output_dir)