Exemplo n.º 1
0
def scatter_g19_score(fc):
    fig = {}
    if fc == None:
        return fig
    counts = sf.load_covid_data([fc], 'c19_read_counts.hdr.tsv')
    counts['Covid Score'] = counts['covid_ratio'] + 0.000001
    counts['total reads'] = (counts['covid_ratio'] + counts['rnase_count'] +
                             counts['spikein_count'] + counts['unknown'])
    counts['replicate_flags'] = counts['replicate_flags'].replace(np.nan, "not available")
    g_samples = counts[counts.run_sample_id.str.startswith(('G', 'H'), na=False)]
    g_samples = g_samples[~g_samples.run_sample_id.str.startswith('Ht', na=False)]
    g_samples = g_samples.sort_values('Covid Score', ascending=False)
    if len(g_samples) > 0:
        fig = sf.fix_plot(px.scatter(g_samples, color='replicate_flags',
                                     x='run_sample_id', y='Covid Score'))
        fig.update_layout(yaxis_type="log",
                          title_text="Individual clinical samples for this FC and the flags for each replicate")

    else:

        fig = sf.fix_plot(px.scatter(counts, color='replicate_flags',
                                     x='run_sample_id', y='Covid Score'))
        fig.update_layout(yaxis_type="log",
                          title_text="All Samples for this FC and the flags for each replicate")
        # some sample id are a long integer, I want them as a category.
        fig.update_xaxes(type='category')

    return fig
Exemplo n.º 2
0
def pos_g19ratio(fc_click, my_type, start_date, end_date):
    fig = {}
    if fc_click:
        fclist_df = pd.DataFrame(sf.fc_list(), columns=['runid'])
        fclist_df.loc[:,
                      'date'] = (fclist_df.runid.str[2:4] + '-' +
                                 fclist_df.runid.str[4:6] + '-' +
                                 fclist_df.runid.str[:2]).astype(np.datetime64)
        fclist_df = fclist_df[(fclist_df['date'] > pd.to_datetime(start_date))
                              & (fclist_df['date'] < pd.to_datetime(end_date))]
        fclist = list(fclist_df.runid.unique())
        # now get just those runs
        calls = sf.load_covid_data(fclist, 'c19_call.hdr.tsv')
        calls.loc[:, 'date'] = (calls.runid.str[2:4] + '-' +
                                calls.runid.str[4:6] + '-' +
                                calls.runid.str[:2]).astype(np.datetime64)
        sample_df = calls[calls['sample_type'] == my_type]

        fig = sf.fix_plot(
            px.scatter(sample_df,
                       x='date',
                       y='median_covid_ratio',
                       color='flags'))
        fig_title = my_type + " control samples per day"
        fig.update_layout(yaxis_type="log", title_text=fig_title)

    return fig
Exemplo n.º 3
0
def total_read_bar(fc, metric, label):
    fig = {}
    if fc == None:
        return fig
        # return empty figure if no FC
    sub_df = sf.load_covid_data([fc], 'pool_stats.hdr.tsv')
    fig = px.bar(sub_df,
                 x='pos_pooling', y=metric, color='pos_pooling',
                 color_discrete_sequence=px.colors.sequential.Agsunset)
    fig = fig.update_layout(yaxis_title=metric)
    fig = sf.fix_plot(fig)
    return fig
Exemplo n.º 4
0
def data_scatter(fcs, metric):
    df = sf.compile_fc_metrics()
    if fcs == None:
        sub_df = df  # initial testing with all data
        if metric != "All":
            sub_df = sub_df[sub_df.Metric == metric]
        fig = px.scatter(sub_df, x="date", y="Value", color="Metric")
        fig.update_layout(title="all metrics", yaxis_type="log")
        fig = sf.fix_plot(fig)

    else:
        sub_df = df[df.Flowcell.isin(fcs)]
        if metric != "All":
            sub_df = sub_df[sub_df.Metric == metric]
        fig = px.bar(sub_df,
                     x='Metric',
                     y='Value',
                     barmode='group',
                     color='Flowcell')
        fig.update_layout(yaxis_type="log")
        fig = sf.fix_plot(fig)

    return fig
Exemplo n.º 5
0
def read_type_bar(fc):
    stack_colors = ['#273c75', '#00a8ff',
                    '#c23616', '#e84118',
                    '#7f8fa6', '#e1b12c',
                    '#fbc531']
    fig = {}
    if fc == None:
        return fig
        # return empty figure if no FC
    sub_df = sf.load_covid_data([fc], 'pool_stats.hdr.tsv')
    cols = [x for x in sub_df.columns if 'percent' in x] + ['pos_pooling']
    cols.remove('percent_usable_reads')
    fig = px.bar(sub_df[cols].melt(id_vars='pos_pooling'),
                 x='pos_pooling',
                 y='value',
                 color='variable',
                 color_discrete_sequence=stack_colors,
                 width=1200)
    fig = fig.update_layout(yaxis_title='Percent of Reads')
    fig = sf.fix_plot(fig)
    return fig
Exemplo n.º 6
0
def samples_plot(n_clicks, start_date, end_date):
    positive = {}
    negative = {}
    if n_clicks:
        samples = pd.read_pickle(
            '/ghds/groups/labdesk/bshih/c19dash/c19_dashboard/c19_call.pickle'
        ).dropna(subset=['run_sample_id'])
        samples['date'] = (samples.runid.str[2:4] + '-' +
                           samples.runid.str[4:6] + '-' +
                           samples.runid.str[:2]).astype(np.datetime64)
        samples = samples[(samples['date'] >= pd.to_datetime(start_date))
                          & (samples['date'] <= pd.to_datetime(end_date))]

        positive_df = samples[samples['sample_type'] == 'Positive']
        positive_df['median_covid_ratio'].replace({0: 0.0001}, inplace=True)
        positive_df.loc[positive_df['flags'] == 'pass',
                        'run_sample_id'] = np.nan
        max_pos = np.log10(positive_df['median_covid_ratio'].max() * 1.5)

        negative_df = samples[samples['sample_type'] == 'NTC']
        negative_df.loc[negative_df['flags'] == 'pass',
                        'run_sample_id'] = np.nan

        positive = sf.fix_plot(
            px.scatter(positive_df,
                       x='date',
                       y='median_covid_ratio',
                       color='flags',
                       height=400))
        positive.update_layout(legend=dict(orientation='h',
                                           yanchor='bottom',
                                           y=1.02,
                                           xanchor='right',
                                           x=1,
                                           font=dict(size=14)))
        positive.update_yaxes(type="log",
                              range=[-4, max_pos],
                              tickmode='auto',
                              nticks=5)
        positive.add_shape(type='line',
                           xref='paper',
                           yref='y',
                           x0=0,
                           y0=0.01,
                           x1=1,
                           y1=0.01,
                           line=dict(
                               color='Red',
                               width=2,
                               dash='dot',
                           ))
        current_date = 0
        yshift = 15
        for index, row in positive_df[~positive_df.run_sample_id.isna(
        )].iterrows():
            if current_date == row['date']:
                yshift += 20
            else:
                yshift = 15
            positive.add_annotation(x=row['date'],
                                    y=np.log10(row['median_covid_ratio']),
                                    text=row['run_sample_id'],
                                    showarrow=False,
                                    yshift=yshift)
            current_date = row['date']

        negative = sf.fix_plot(
            px.scatter(negative_df,
                       x='date',
                       y='median_covid_ratio',
                       color='flags',
                       height=400))
        negative.update_traces(marker_size=15)
        negative.update_layout(legend=dict(orientation='h',
                                           yanchor='bottom',
                                           y=1.02,
                                           xanchor='right',
                                           x=1,
                                           font=dict(size=14)))
        negative.add_shape(type='line',
                           xref='paper',
                           yref='y',
                           x0=0,
                           y0=0.01,
                           x1=1,
                           y1=0.01,
                           line=dict(
                               color='Red',
                               width=2,
                               dash='dot',
                           ))
        negative.update_yaxes(zeroline=True, zerolinecolor='black')

        current_date = 0
        yshift = 15
        for index, row in negative_df[~negative_df.run_sample_id.isna(
        )].iterrows():
            if current_date == row['date']:
                yshift += 20
            else:
                yshift = 15
            negative.add_annotation(x=row['date'],
                                    y=row['median_covid_ratio'],
                                    text=row['run_sample_id'],
                                    showarrow=False,
                                    yshift=yshift)
            current_date = row['date']

    return positive, negative
Exemplo n.º 7
0
def samples_plot(n_clicks, start_date, end_date):
    positive = {}
    negative = {}
    if n_clicks:
        samples = pd.read_pickle(
            '/ghds/groups/labdesk/bshih/c19dash/c19_dashboard/c19_read_counts.pickle'
        ).dropna(subset=['run_sample_id'])
        samples['date'] = (samples.runid.str[2:4] + '-' +
                           samples.runid.str[4:6] + '-' +
                           samples.runid.str[:2]).astype(np.datetime64)
        samples = samples[(samples['date'] >= pd.to_datetime(start_date))
                          & (samples['date'] <= pd.to_datetime(end_date))]

        positive_df = samples[samples['sample_type'] == 'Positive']
        positive_df['covid_ratio'].replace({0: 0.0001}, inplace=True)
        max_pos = np.log(positive_df['covid_ratio'].max() * 1.01)

        negative_df = samples[samples['sample_type'] == 'NTC']

        positive = sf.fix_plot(
            px.scatter(positive_df,
                       x='date',
                       y='covid_ratio',
                       color='replicate_flags',
                       height=400))
        positive.update_yaxes(type="log",
                              range=[-4, max_pos],
                              tickmode='auto',
                              nticks=5)
        positive.add_shape(type='line',
                           xref='paper',
                           yref='y',
                           x0=0,
                           y0=0.01,
                           x1=1,
                           y1=0.01,
                           line=dict(
                               color='Red',
                               width=2,
                               dash='dot',
                           ))

        negative = sf.fix_plot(
            px.scatter(negative_df,
                       x='date',
                       y='covid_ratio',
                       color='replicate_flags',
                       height=400))
        negative.update_traces(marker_size=15)
        negative.add_shape(type='line',
                           xref='paper',
                           yref='y',
                           x0=0,
                           y0=0.01,
                           x1=1,
                           y1=0.01,
                           line=dict(
                               color='Red',
                               width=2,
                               dash='dot',
                           ))
        negative.update_yaxes(zeroline=True, zerolinecolor='black')

    return positive, negative
Exemplo n.º 8
0
def make_pl_lm_plot(fc_click, my_metric, label, start_date, end_date):
    fig = {}
    if fc_click:
        pl_df = sf.compile_data_generic('pool_stats.hdr.tsv').sort_values(
            ['runid', 'pos_pooling'], ascending=[False, True])
        pl_df.loc[:, 'date'] = (pl_df.runid.str[2:4] + '-' +
                                pl_df.runid.str[4:6] + '-' +
                                pl_df.runid.str[:2]).astype(np.datetime64)
        pl_df = pl_df[(pl_df['date'] > pd.to_datetime(start_date))
                      & (pl_df['date'] < pd.to_datetime(end_date))]
        # get threshold metrics data
        metrics = pd.read_csv('thresholds_20200912.csv')
        metrics.set_index("metric", inplace=True)
        # get mean data
        mean_table = pd.read_csv('Mean_Table.csv')
        mean1 = mean_table.loc[mean_table['index'] == my_metric,
                               'mean'].iloc[0]
        stdev1 = mean_table.loc[mean_table['index'] == my_metric,
                                'std'].iloc[0]

        fig = sf.fix_plot(
            px.strip(pl_df,
                     x='date',
                     y=my_metric,
                     hover_name="runid",
                     log_y=True))
        # add threshold line
        if my_metric in metrics.index:
            fig.add_traces(
                go.Scatter(x=[pl_df['date'].iloc[0], pl_df['date'].iloc[-1]],
                           y=[
                               metrics.loc[my_metric, 'threshold'],
                               metrics.loc[my_metric, 'threshold']
                           ],
                           name='Threshold',
                           line=dict(color='red', width=4, dash='dot')))
        # add mean
        fig.add_traces(
            go.Scatter(x=[pl_df['date'].iloc[0], pl_df['date'].iloc[-1]],
                       y=[mean1, mean1],
                       name='Mean',
                       line=dict(color='rgb(127, 60, 141)',
                                 width=4,
                                 dash='dot')))
        # add standard deviation
        pos_stdv1 = mean1 + stdev1
        neg_stdv1 = mean1 - stdev1
        pos_stdv2 = mean1 + (2 * stdev1)
        neg_stdv2 = mean1 - (2 * stdev1)
        fig.add_traces(
            go.Scatter(x=[pl_df['date'].iloc[0], pl_df['date'].iloc[-1]],
                       y=[pos_stdv1, pos_stdv1],
                       name='+1 StDev',
                       line=dict(color='rgb(17, 165, 121)',
                                 width=4,
                                 dash='dot')))
        fig.add_traces(
            go.Scatter(x=[pl_df['date'].iloc[0], pl_df['date'].iloc[-1]],
                       y=[neg_stdv1, neg_stdv1],
                       name='-1 StDev',
                       line=dict(color='rgb(242, 183, 1)', width=4,
                                 dash='dot')))
        fig.add_traces(
            go.Scatter(x=[pl_df['date'].iloc[0], pl_df['date'].iloc[-1]],
                       y=[pos_stdv2, pos_stdv2],
                       name='+2 StDev',
                       line=dict(color='rgb(128, 186, 90)',
                                 width=4,
                                 dash='dot')))
        fig.add_traces(
            go.Scatter(x=[pl_df['date'].iloc[0], pl_df['date'].iloc[-1]],
                       y=[neg_stdv2, neg_stdv2],
                       name='-2 StDev',
                       line=dict(color='rgb(230, 131, 16)',
                                 width=4,
                                 dash='dot')))
        # fig = sf.add_rangeslider(fig)
        fig = fig.update_layout(yaxis_title=my_metric)

    return fig
Exemplo n.º 9
0
def make_fc_lm_plot(fc_click, my_metric, label, start_date, end_date):
    fig = {}
    if fc_click:
        # get fc data to fill the graph
        fc_df = sf.compile_data_generic('interop_db.hdr.tsv').sort_values(
            'runid', ascending=False)
        fc_df.loc[:, 'date'] = (fc_df.runid.str[2:4] + '-' +
                                fc_df.runid.str[4:6] + '-' +
                                fc_df.runid.str[:2]).astype(np.datetime64)
        # my hope is that this will allow faster loading
        # future warning can not use datetime with pandas anymore.
        fc_df = fc_df[(fc_df['date'] > pd.to_datetime(start_date))
                      & (fc_df['date'] < pd.to_datetime(end_date))]

        # get threshold metrics data
        metrics = pd.read_csv(
            '/ghds/groups/labdesk/bshih/c19dash/c19_dashboard/thresholds_20200912.csv'
        )
        metrics.drop(index=[7, 8, 9, 10, 11, 12, 13, 14], inplace=True)
        metrics.set_index('metric', inplace=True)

        metrics.loc[:, 'min_threshold'] = metrics['threshold']
        metrics.loc[:, 'max_threshold'] = np.nan
        metrics.loc['cluster_density', 'max_threshold'] = 335000
        # get mean data
        mean_table = pd.read_csv('Mean_Table.csv')
        mean1 = mean_table.loc[mean_table['index'] == my_metric,
                               'mean'].iloc[0]
        stdev1 = mean_table.loc[mean_table['index'] == my_metric,
                                'std'].iloc[0]
        # create figure first with fc data
        fig = sf.fix_plot(
            px.line(fc_df, hover_name="runid", x='date', y=my_metric))
        # add threshold line
        if my_metric in metrics.index:
            if metrics.loc[my_metric, 'min_threshold']:
                fig.add_traces(
                    go.Scatter(
                        x=[fc_df['date'].iloc[0], fc_df['date'].iloc[-1]],
                        y=[
                            metrics.loc[my_metric, 'min_threshold'],
                            metrics.loc[my_metric, 'min_threshold']
                        ],
                        name='Minimum Threshold',
                        line=dict(color='red', width=4, dash='dot')))
            if not np.isnan(metrics.loc[my_metric, 'max_threshold']):
                fig.add_traces(
                    go.Scatter(
                        x=[fc_df['date'].iloc[0], fc_df['date'].iloc[-1]],
                        y=[
                            metrics.loc[my_metric, 'max_threshold'],
                            metrics.loc[my_metric, 'max_threshold']
                        ],
                        name='Maximum Threshold',
                        line=dict(color='red', width=4, dash='dot')))
        # add mean
        fig.add_traces(
            go.Scatter(x=[fc_df['date'].iloc[0], fc_df['date'].iloc[-1]],
                       y=[mean1, mean1],
                       name='Mean',
                       line=dict(color='rgb(127, 60, 141)',
                                 width=4,
                                 dash='dot')))
        # add standard deviation
        pos_stdv1 = mean1 + stdev1
        neg_stdv1 = mean1 - stdev1
        pos_stdv2 = mean1 + (2 * stdev1)
        neg_stdv2 = mean1 - (2 * stdev1)
        if neg_stdv2 < 0:
            neg_stdv2 = 0
        fig.add_traces(
            go.Scatter(x=[fc_df['date'].iloc[0], fc_df['date'].iloc[-1]],
                       y=[pos_stdv1, pos_stdv1],
                       name='+1 StDev',
                       line=dict(color='rgb(17, 165, 121)',
                                 width=4,
                                 dash='dot')))
        fig.add_traces(
            go.Scatter(x=[fc_df['date'].iloc[0], fc_df['date'].iloc[-1]],
                       y=[neg_stdv1, neg_stdv1],
                       name='-1 StDev',
                       line=dict(color='rgb(242, 183, 1)', width=4,
                                 dash='dot')))
        fig.add_traces(
            go.Scatter(x=[fc_df['date'].iloc[0], fc_df['date'].iloc[-1]],
                       y=[pos_stdv2, pos_stdv2],
                       name='+2 StDev',
                       line=dict(color='rgb(128, 186, 90)',
                                 width=4,
                                 dash='dot')))
        fig.add_traces(
            go.Scatter(x=[fc_df['date'].iloc[0], fc_df['date'].iloc[-1]],
                       y=[neg_stdv2, neg_stdv2],
                       name='-2 StDev',
                       line=dict(color='rgb(230, 131, 16)',
                                 width=4,
                                 dash='dot')))
        # update figure range and date
        fig = fig.update_layout(yaxis_title=my_metric)

        try:
            for _, row in fc_df.iterrows():
                if row[my_metric] < metrics.loc[
                        my_metric,
                        'min_threshold'] or row[my_metric] > metrics.loc[
                            my_metric, 'max_threshold']:
                    fig.add_annotation(x=row['date'],
                                       y=row[my_metric],
                                       text='<b>{}</b>'.format(
                                           row['runid'][22:]),
                                       showarrow=False,
                                       yshift=-10)
        except KeyError:
            pass

    return fig