Exemplos de compile_data_generic em Python, exemplos de functions.shared_functions.compile_data_generic em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: make_mean.py Projeto: gh-bshih/g19_analysis

def get_mean_table(start_dt):
    start_date = start_dt
    end_date = dt.today()
    fc_df = compile_data_generic('interop_db.hdr.tsv').sort_values(
        'runid', ascending=False)
    fc_df.loc[:, 'date'] = (fc_df.runid.str[2:4] + '-' + fc_df.runid.str[4:6] +
                            '-' + fc_df.runid.str[:2]).astype(np.datetime64)
    # future warning can not use datetime with pandas anymore.
    fc_df = fc_df[(fc_df['date'] > pd.to_datetime(start_date))
                  & (fc_df['date'] < pd.to_datetime(end_date))]

    pl_df = compile_data_generic('pool_stats.hdr.tsv').sort_values(
        ['runid', 'pos_pooling'], ascending=[False, True])
    pl_df.loc[:, 'date'] = (pl_df.runid.str[2:4] + '-' + pl_df.runid.str[4:6] +
                            '-' + pl_df.runid.str[:2]).astype(np.datetime64)
    pl_df = pl_df[(pl_df['date'] > pd.to_datetime(start_date))
                  & (pl_df['date'] < pd.to_datetime(end_date))]
    mean_fc = fc_df.describe(exclude=[object]).T.reset_index()  # transpose
    mean_pl = pl_df.describe(exclude=[object]).T.reset_index()  # transpose
    mean_df = mean_fc.append(mean_pl, sort=False)
    return mean_df

Exemplo n.º 2

0

Exibir arquivo

Arquivo: plate_maps.py Projeto: gh-bshih/g19_analysis

def compile_data():
    df = sf.compile_data_generic('c19_read_counts.hdr.tsv')
    df = df.sort_values('runid', ascending=False)
    return df

Exemplo n.º 3

0

Exibir arquivo

Arquivo: plate_maps.py Projeto: gh-bshih/g19_analysis

def compile_fcs():
    df = sf.compile_data_generic('autoqc_report.hdr.tsv')
    return df

Exemplo n.º 4

0

Exibir arquivo

Arquivo: detections_viz.py Projeto: gh-bshih/g19_analysis

# -*- coding: utf-8 -*-

import dash_core_components as dcc
import dash_html_components as html

import functions.shared_functions as sf
import plotly.express as px
from app import app
from dash.dependencies import Input, Output
# Internal module imports
from settings import (POOL_METRICS)

# def compile_pool_metrics():
df = sf.compile_data_generic('c19_read_counts.hdr.tsv').sort_values(
    ['runid', 'pos_pooling'], ascending=[False, True])


def fix_plot(fig):
    fig.update_layout(plot_bgcolor='white',
                      xaxis=dict(showline=True,
                                 showgrid=True,
                                 showticklabels=True,
                                 linecolor='rgb(204, 204, 204)',
                                 gridcolor='rgb(204, 204, 204)'),
                      yaxis=dict(showline=True,
                                 showgrid=True,
                                 showticklabels=True,
                                 linecolor='rgb(204, 204, 204)',
                                 gridcolor='rgb(204, 204, 204)'))
    try:
        fig.update_traces(marker={

Exemplo n.º 5

0

Exibir arquivo

def make_pl_lm_plot(fc_click, my_metric, label, start_date, end_date):
    fig = {}
    if fc_click:
        pl_df = sf.compile_data_generic('pool_stats.hdr.tsv').sort_values(
            ['runid', 'pos_pooling'], ascending=[False, True])
        pl_df.loc[:, 'date'] = (pl_df.runid.str[2:4] + '-' +
                                pl_df.runid.str[4:6] + '-' +
                                pl_df.runid.str[:2]).astype(np.datetime64)
        pl_df = pl_df[(pl_df['date'] > pd.to_datetime(start_date))
                      & (pl_df['date'] < pd.to_datetime(end_date))]
        # get threshold metrics data
        metrics = pd.read_csv('thresholds_20200912.csv')
        metrics.set_index("metric", inplace=True)
        # get mean data
        mean_table = pd.read_csv('Mean_Table.csv')
        mean1 = mean_table.loc[mean_table['index'] == my_metric,
                               'mean'].iloc[0]
        stdev1 = mean_table.loc[mean_table['index'] == my_metric,
                                'std'].iloc[0]

        fig = sf.fix_plot(
            px.strip(pl_df,
                     x='date',
                     y=my_metric,
                     hover_name="runid",
                     log_y=True))
        # add threshold line
        if my_metric in metrics.index:
            fig.add_traces(
                go.Scatter(x=[pl_df['date'].iloc[0], pl_df['date'].iloc[-1]],
                           y=[
                               metrics.loc[my_metric, 'threshold'],
                               metrics.loc[my_metric, 'threshold']
                           ],
                           name='Threshold',
                           line=dict(color='red', width=4, dash='dot')))
        # add mean
        fig.add_traces(
            go.Scatter(x=[pl_df['date'].iloc[0], pl_df['date'].iloc[-1]],
                       y=[mean1, mean1],
                       name='Mean',
                       line=dict(color='rgb(127, 60, 141)',
                                 width=4,
                                 dash='dot')))
        # add standard deviation
        pos_stdv1 = mean1 + stdev1
        neg_stdv1 = mean1 - stdev1
        pos_stdv2 = mean1 + (2 * stdev1)
        neg_stdv2 = mean1 - (2 * stdev1)
        fig.add_traces(
            go.Scatter(x=[pl_df['date'].iloc[0], pl_df['date'].iloc[-1]],
                       y=[pos_stdv1, pos_stdv1],
                       name='+1 StDev',
                       line=dict(color='rgb(17, 165, 121)',
                                 width=4,
                                 dash='dot')))
        fig.add_traces(
            go.Scatter(x=[pl_df['date'].iloc[0], pl_df['date'].iloc[-1]],
                       y=[neg_stdv1, neg_stdv1],
                       name='-1 StDev',
                       line=dict(color='rgb(242, 183, 1)', width=4,
                                 dash='dot')))
        fig.add_traces(
            go.Scatter(x=[pl_df['date'].iloc[0], pl_df['date'].iloc[-1]],
                       y=[pos_stdv2, pos_stdv2],
                       name='+2 StDev',
                       line=dict(color='rgb(128, 186, 90)',
                                 width=4,
                                 dash='dot')))
        fig.add_traces(
            go.Scatter(x=[pl_df['date'].iloc[0], pl_df['date'].iloc[-1]],
                       y=[neg_stdv2, neg_stdv2],
                       name='-2 StDev',
                       line=dict(color='rgb(230, 131, 16)',
                                 width=4,
                                 dash='dot')))
        # fig = sf.add_rangeslider(fig)
        fig = fig.update_layout(yaxis_title=my_metric)

    return fig

Exemplo n.º 6

0

Exibir arquivo

def make_fc_lm_plot(fc_click, my_metric, label, start_date, end_date):
    fig = {}
    if fc_click:
        # get fc data to fill the graph
        fc_df = sf.compile_data_generic('interop_db.hdr.tsv').sort_values(
            'runid', ascending=False)
        fc_df.loc[:, 'date'] = (fc_df.runid.str[2:4] + '-' +
                                fc_df.runid.str[4:6] + '-' +
                                fc_df.runid.str[:2]).astype(np.datetime64)
        # my hope is that this will allow faster loading
        # future warning can not use datetime with pandas anymore.
        fc_df = fc_df[(fc_df['date'] > pd.to_datetime(start_date))
                      & (fc_df['date'] < pd.to_datetime(end_date))]

        # get threshold metrics data
        metrics = pd.read_csv(
            '/ghds/groups/labdesk/bshih/c19dash/c19_dashboard/thresholds_20200912.csv'
        )
        metrics.drop(index=[7, 8, 9, 10, 11, 12, 13, 14], inplace=True)
        metrics.set_index('metric', inplace=True)

        metrics.loc[:, 'min_threshold'] = metrics['threshold']
        metrics.loc[:, 'max_threshold'] = np.nan
        metrics.loc['cluster_density', 'max_threshold'] = 335000
        # get mean data
        mean_table = pd.read_csv('Mean_Table.csv')
        mean1 = mean_table.loc[mean_table['index'] == my_metric,
                               'mean'].iloc[0]
        stdev1 = mean_table.loc[mean_table['index'] == my_metric,
                                'std'].iloc[0]
        # create figure first with fc data
        fig = sf.fix_plot(
            px.line(fc_df, hover_name="runid", x='date', y=my_metric))
        # add threshold line
        if my_metric in metrics.index:
            if metrics.loc[my_metric, 'min_threshold']:
                fig.add_traces(
                    go.Scatter(
                        x=[fc_df['date'].iloc[0], fc_df['date'].iloc[-1]],
                        y=[
                            metrics.loc[my_metric, 'min_threshold'],
                            metrics.loc[my_metric, 'min_threshold']
                        ],
                        name='Minimum Threshold',
                        line=dict(color='red', width=4, dash='dot')))
            if not np.isnan(metrics.loc[my_metric, 'max_threshold']):
                fig.add_traces(
                    go.Scatter(
                        x=[fc_df['date'].iloc[0], fc_df['date'].iloc[-1]],
                        y=[
                            metrics.loc[my_metric, 'max_threshold'],
                            metrics.loc[my_metric, 'max_threshold']
                        ],
                        name='Maximum Threshold',
                        line=dict(color='red', width=4, dash='dot')))
        # add mean
        fig.add_traces(
            go.Scatter(x=[fc_df['date'].iloc[0], fc_df['date'].iloc[-1]],
                       y=[mean1, mean1],
                       name='Mean',
                       line=dict(color='rgb(127, 60, 141)',
                                 width=4,
                                 dash='dot')))
        # add standard deviation
        pos_stdv1 = mean1 + stdev1
        neg_stdv1 = mean1 - stdev1
        pos_stdv2 = mean1 + (2 * stdev1)
        neg_stdv2 = mean1 - (2 * stdev1)
        if neg_stdv2 < 0:
            neg_stdv2 = 0
        fig.add_traces(
            go.Scatter(x=[fc_df['date'].iloc[0], fc_df['date'].iloc[-1]],
                       y=[pos_stdv1, pos_stdv1],
                       name='+1 StDev',
                       line=dict(color='rgb(17, 165, 121)',
                                 width=4,
                                 dash='dot')))
        fig.add_traces(
            go.Scatter(x=[fc_df['date'].iloc[0], fc_df['date'].iloc[-1]],
                       y=[neg_stdv1, neg_stdv1],
                       name='-1 StDev',
                       line=dict(color='rgb(242, 183, 1)', width=4,
                                 dash='dot')))
        fig.add_traces(
            go.Scatter(x=[fc_df['date'].iloc[0], fc_df['date'].iloc[-1]],
                       y=[pos_stdv2, pos_stdv2],
                       name='+2 StDev',
                       line=dict(color='rgb(128, 186, 90)',
                                 width=4,
                                 dash='dot')))
        fig.add_traces(
            go.Scatter(x=[fc_df['date'].iloc[0], fc_df['date'].iloc[-1]],
                       y=[neg_stdv2, neg_stdv2],
                       name='-2 StDev',
                       line=dict(color='rgb(230, 131, 16)',
                                 width=4,
                                 dash='dot')))
        # update figure range and date
        fig = fig.update_layout(yaxis_title=my_metric)

        try:
            for _, row in fc_df.iterrows():
                if row[my_metric] < metrics.loc[
                        my_metric,
                        'min_threshold'] or row[my_metric] > metrics.loc[
                            my_metric, 'max_threshold']:
                    fig.add_annotation(x=row['date'],
                                       y=row[my_metric],
                                       text='<b>{}</b>'.format(
                                           row['runid'][22:]),
                                       showarrow=False,
                                       yshift=-10)
        except KeyError:
            pass

    return fig