示例#1
0
def db(
    meta: Metadata, folder: str, rank: str = "genus", threads: int = 1
) -> JSONDirectory:
    """Create a model database from a set of SBML files."""
    meta = meta.to_dataframe()
    meta.columns = meta.columns.str.lower()
    if not REQ_FIELDS.isin(meta.columns).all():
        raise ValueError(
            "Metadata File needs to have the following "
            "columns %s." % ", ".join(REQ_FIELDS)
        )
    meta["id"] = meta.index
    files = os.listdir(folder)
    meta["file"] = meta.id + ".xml"
    bad = meta.file.apply(lambda x: x not in files)
    if any(bad):
        raise ValueError(
            "The following models are in the Metadata but not "
            "in the folder: %s" % meta.file[bad]
        )

    meta = meta.groupby(rank).apply(reduce_group).reset_index(drop=True)
    meta.index = meta[rank]

    json_dir = JSONDirectory()
    args = [
        (tid, row, str(json_dir.json_files.path_maker(model_id=tid)), folder)
        for tid, row in meta.iterrows()
    ]
    workflow(_summarize_models, args, threads)
    meta["file"] = meta.index + ".json"
    meta["id"] = meta.index
    meta["summary_rank"] = rank
    meta.to_csv(json_dir.manifest.path_maker(), index=False)

    return json_dir
示例#2
0
def denoise_stats(
        output_dir: str,
        stats: qiime2.
    Metadata,  #stats type is not a metadata but this is the transformer used by DADA2 plugin to make DADA2Stats into pd.dataframe
        plot_type: str = 'line',
        labels: str = None,
        style: str = 'whitegrid',
        context: str = 'talk') -> None:

    if not labels:
        stats = load_df(stats)

    else:
        stats = load_df_labels(stats, labels)

    stats = pd.concat(stats)
    numeric = ['denoised', 'filtered', 'input', 'non-chimeric']
    stats[numeric] = stats[numeric].apply(pd.to_numeric)
    #makes into a df
    stats = stats.groupby('id').sum()
    stats = stats.drop(columns=[
        'percentage of input passed filter', 'percentage of input non-chimeric'
    ])
    df = pd.melt(stats.reset_index(),
                 id_vars='id',
                 var_name='step',
                 value_name='read_number')
    input_read_number = df['read_number'].max()
    df['% of Reads Remaining'] = df['read_number'] / input_read_number * 100
    step_order = {'input': 0, 'filtered': 1, 'denoised': 2, 'non-chimeric': 3}
    df['order'] = df['step'].apply(lambda x: step_order[x])
    df = df.reset_index()

    df['Run Number'] = 'Run ' + df['id']
    hue_order = df.query('step == "non-chimeric"').sort_values(
        '% of Reads Remaining', ascending=False)['id']

    sns.set_style('whitegrid')
    sns.set_context("talk")

    line_graph = sns.lineplot(data=df,
                              y='% of Reads Remaining',
                              x='order',
                              hue='Run Number')

    plt.ylim(0, 100)
    plt.xlim(0, 4)
    plt.xticks([x / 2 for x in range(0, 9)],
               ['Input', '', 'Filtered', '', 'Denoised', "", 'Non-chimeric'])
    plt.xlabel('Processing Steps')
    plt.legend(loc='center left', bbox_to_anchor=(1.25, 0.5), ncol=1)

    #    plt.title('allow to give any title or default one')

    line_graph.figure.savefig(os.path.join(output_dir, 'line_graph.png'),
                              bbox_inches='tight')
    line_graph.figure.savefig(os.path.join(output_dir, 'line_graph.pdf'),
                              bbox_inches='tight')
    plt.gcf().clear()

    #maybe the bargraph
    r = range(len(stats))
    vars_to_plot = df['id'].values

    print(df.shape)

    colors = ['darkorange', 'orange', 'sandybrown', 'navajowhite']
    Step = ['Input', 'Filtered', 'Denoised', 'Non-chimeric']
    plt.bar(r,
            df[df['step'] == 'input']['read_number'],
            color=colors[0],
            edgecolor='white',
            width=1)
    plt.bar(r,
            df[df['step'] == 'filtered']['read_number'],
            color=colors[1],
            edgecolor='white',
            width=1)
    plt.bar(r,
            df[df['step'] == 'denoised']['read_number'],
            color=colors[2],
            edgecolor='white',
            width=1)
    #plt.bar(r, df[df['step']=='merged']['read_number'], color = colors[3], edgecolor = 'white', width = 1)
    plt.bar(r,
            df[df['step'] == 'non-chimeric']['read_number'],
            color=colors[3],
            edgecolor='white',
            width=1)

    plt.xticks(r, vars_to_plot, fontweight='bold')
    plt.xlabel('Method')
    plt.ylabel('Sequencing Depth')

    plt.legend(Step, bbox_to_anchor=(1.05, 1), loc=2)

    plt.savefig(os.path.join(output_dir, 'bar_plot.png'), bbox_inches='tight')
    plt.savefig(os.path.join(output_dir, 'bar_plot.pdf'), bbox_inches='tight')
    plt.gcf().clear()

    index = os.path.join(TEMPLATES, 'denoise_assets', 'index.html')
    q2templates.render(index, output_dir)