Exemplo n.º 1
0
    def generate_correction_range(self):
        """
        Plot RT vs max(ri-rt) - min(ri-rt)
        """
        diffs = pd.DataFrame(index=self.standards_dfs[0].index)

        fig = Figure()

        for sample in self.features_dfs:
            name = self._get_name(sample)
            diffs[name] = sample[(name, self.COLUMNS['RI'])] - sample[
                (name, self.COLUMNS['RT'])]
        diffs[self.COLUMNS['RT']] = self.standards_dfs[0][(
            self.COLUMNS['TARGET'], self.COLUMNS['RT'])]
        diffs.sort_values(by=self.COLUMNS['RT'])

        fig.add_trace(
            Scattergl(x=diffs[self.COLUMNS['RT']],
                      y=diffs.max() - diffs.min(),
                      mode='lines+markers',
                      **self.scatter_shared_attrs))

        fig.update_layout(**self.standard_scatter_layout,
                          # height=3000,
                          )

        self.__zones__(fig)
        fig.update_xaxes(title=dict(text='Retention Time (s)', font_size=18),
                         showticklabels=True)
        fig.update_yaxes(
            title=dict(text='max(ri-rt) - min(ri-rt)', font_size=18))
        fig.update_layout(
            title_text='RT Correction error difference plot across samples')
        return fig
def look_at_ss_rank_plot():
    # make giant combination of all stocks and look at correlation between shortsqueeze rating 10 days and price change 10 days later
    # no correlation, but this isn't super helpful, because it should be the
    # ranking on the day new data comes out vs the price change 10 days later.
    sh_int['A'].columns.tolist().index('Short Squeeze Ranking')  # 145
    all_feats, all_targs = [], []
    for s in sh_int_stocks:
        if sh_int[s].shape[0] > hist_points:  # make_all_sh_future did this too
            # print(s)
            new_feats, new_targs, _, _ = create_hist_feats(
                sh_int[s]['Short Squeeze Ranking'].values.reshape(-1, 1),
                sh_int[s][targ_col].values,
                sh_int[s].index.values,
                hist_points=1,
                future=10)
            all_feats.append(new_feats)
            all_targs.append(new_targs)

    all_feats_np = np.concatenate(all_feats).flatten()
    all_targs_np = np.concatenate(all_targs).flatten()

    # no correlation, but this isn't super helpful, because it should be the
    # ranking on the day new data comes out vs the price change 10 days later.
    trace = Scattergl(x=all_feats_np,
                      y=all_targs_np,
                      mode='markers',
                      marker=dict(color='#FFBAD2', line=dict(width=1)))
    data = [trace]
    plot(data, filename='webgl')
Exemplo n.º 3
0
    def generate_istd_correlation(self):
        """Plots RI vs RT for ISTDs in each given sample"""
        fig = Figure()
        for sample in self.features_dfs:
            sample_name = self._get_name(sample)
            sample_color = next(self.sample_colors)
            sample_rts = sample.loc[:, (sample_name, self.COLUMNS['RT'])]
            sample_ris = sample.loc[:, (sample_name, self.COLUMNS['RI'])]

            # regression calculation
            # slope, intercept, r_value, p_value, std_err = stats.linregress(sample_rts.dropna(), sample_ris.dropna())
            # line = slope * sample_rts.dropna().values + intercept

            fig.add_trace(
                Scattergl(x=sample_rts,
                          y=sample_ris,
                          name=sample_name,
                          hovertext=sample_rts.index,
                          legendgroup=sample_name,
                          line_color=sample_color,
                          mode='markers',
                          **self.scatter_shared_attrs))
            # fig.add_trace(Scattergl(
            #     x=sample_rts,
            #     y=line,
            #     mode='line',
            #     name='Fit ' + sample_name,
            #     legendgroup=sample_name,
            #     line=dict(color=sample_color,
            #               dash='dash',
            #               width=1
            #               )
            # ))
            # fig.add_annotation(Annotation(
            #     x=3.5,
            #     y=23.5,
            #     text=f'$R^2 = {r_value},\\Y = {slope}*X + {intercept}$',
            #     showarrow=False,
            #     font=Font(size=16)
            # ))
        fig.update_layout(**self.scatter_layout,
                          legend=dict(x=1.0,
                                      y=1.0,
                                      xanchor='left',
                                      yanchor='top',
                                      orientation='v'))
        fig.update_xaxes(showticklabels=True)
        fig.update_yaxes(title=dict(text='Retention Index', font_size=18))
        fig.update_layout(title=dict(
            text='RT vs RI Correlation of correction ISTDs', font_size=18))
        return fig
Exemplo n.º 4
0
def create_traces(data, datetimes, n_channels, use_gradient):
    logger.info("Creating traces")
    traces = [
        Scattergl(
            x=datetimes,
            y=data[:, i],
            #xaxis=f'x{i + 1}',
            yaxis=f'y{i + 1}',
            line={
                'color': ('rgb(0, 0, 255)'),
                'width': 0.7
            }) for i in range(n_channels)
    ]
    if use_gradient:
        for i, trace in enumerate(traces):
            trace.line['color'] = \
                f'rgb({255*i/n_channels}, 0, {255*(1-i/n_channels)})'
    logger.info("Trace created")
    return traces
Exemplo n.º 5
0
def get_cluster_plot(species, grouping):
    """Generate tSNE cluster plot.
    Arguments:
        species (str): Name of species.
        grouping (str): Which variable to use for grouping. cluster_ordered, biosample, layer or cluster_biosample
    Returns:
        str: HTML generated by Plot.ly.
    """
    points_3d = get_3D_cluster_points(species)

    if not points_3d:
        points_2d = get_cluster_points(species)
        if not points_2d:
            raise FailToGraphException
        if not (grouping in points_2d[0]):
            grouping = "cluster_ordered"
            print("**** Using cluster_ordered")
    else:
        if not (grouping in points_3d[0]):
            grouping = "cluster_ordered"
            print("**** Using cluster_ordered")

    layout3d = Layout(autosize=False,
                      width=900,
                      height=700,
                      title='3D Cell Cluster',
                      titlefont={
                          'color': 'rgba(1,2,2,1)',
                          'size': 20
                      },
                      margin={
                          'l': 100,
                          'r': 150,
                          'b': 100,
                          't': 75
                      },
                      scene={
                          'camera': {
                              'eye': dict(x=1.2, y=1.5, z=0.7),
                              'center': dict(x=0.25, z=-0.1)
                          },
                          'aspectmode': 'data',
                          'xaxis': {
                              'title': 'tSNE 1',
                              'titlefont': {
                                  'color': 'rgba(1,2,2,1)',
                                  'size': 12
                              },
                              'type': 'linear',
                              'ticks': '',
                              'showticklabels': False,
                              'tickwidth': 0,
                              'showline': True,
                              'showgrid': False,
                              'zeroline': False,
                              'linecolor': 'black',
                              'linewidth': 0.5,
                              'mirror': True
                          },
                          'yaxis': {
                              'title': 'tSNE 2',
                              'titlefont': {
                                  'color': 'rgba(1,2,2,1)',
                                  'size': 12
                              },
                              'type': 'linear',
                              'ticks': '',
                              'showticklabels': False,
                              'tickwidth': 0,
                              'showline': True,
                              'showgrid': False,
                              'zeroline': False,
                              'linecolor': 'black',
                              'linewidth': 0.5,
                              'mirror': True
                          },
                          'zaxis': {
                              'title': 'tSNE 3',
                              'titlefont': {
                                  'color': 'rgba(1,2,2,1)',
                                  'size': 12
                              },
                              'type': 'linear',
                              'ticks': '',
                              'showticklabels': False,
                              'tickwidth': 0,
                              'showline': True,
                              'showgrid': False,
                              'zeroline': False,
                              'linecolor': 'black',
                              'linewidth': 0.5,
                              'mirror': True
                          }
                      })

    layout2d = Layout(
        autosize=True,
        showlegend=True,
        width=900,
        height=700,
        margin={
            'l': 100,
            'r': 150,
            'b': 75,
            't': 75,
            'pad': 20
        },
        legend={
            'orientation': 'v',
            'traceorder': 'grouped',
            'tracegroupgap': 10,
            'x': 1.03,
            'font': {
                'color': 'rgba(1,2,2,1)',
                'size': 12
            },
        },
        xaxis={
            'title': 'tSNE 1',
            'titlefont': {
                'color': 'rgba(1,2,2,1)',
                'size': 16
            },
            'type': 'linear',
            'ticks': '',
            'showticklabels': False,
            'tickwidth': 0,
            'showline': True,
            'showgrid': False,
            'zeroline': False,
            'linecolor': 'black',
            'linewidth': 0.5,
            'mirror': True
        },
        yaxis={
            'title': 'tSNE 2',
            'titlefont': {
                'color': 'rgba(1,2,2,1)',
                'size': 16
            },
            'type': 'linear',
            'ticks': '',
            'showticklabels': False,
            'tickwidth': 0,
            'showline': True,
            'showgrid': False,
            'zeroline': False,
            'linecolor': 'black',
            'linewidth': 0.5,
            'mirror': True,
            # 'range': [-20,20]
        },
        title='Cell clusters',
        titlefont={
            'color': 'rgba(1,2,2,1)',
            'size': 20
        },
        annotations=[{
            'text': 'Cluster',
            'x': 0,
            'y': -0.2,
            'ax': 0,
            'ay': 0,
            'showarrow': False,
            'font': {
                'color': 'rgba(1,2,2,1)',
                'size': 16
            },
            'xref': 'paper',
            'yref': 'paper',
            'xanchor': 'left',
            'yanchor': 'bottom',
            'textangle': 0,
        }])

    global trace_colors
    trace_colors = dict()

    traces_2d = OrderedDict()

    global num_colors
    if not points_3d:
        max_cluster = int(
            max(points_2d, key=lambda x: int(x['cluster_ordered']))
            ['cluster_ordered']) + 1
        if species == 'mmu':
            max_cluster = 16
        num_colors = int(
            max(points_2d, key=lambda x: int(x[grouping]))[grouping]) + 1
        colors = generate_cluster_colors(num_colors)
        symbols = [
            'circle', 'square', 'cross', 'triangle-up', 'triangle-down',
            'octagon', 'star', 'diamond'
        ]
        for point in points_2d:
            cluster_num = int(point['cluster_ordered'])
            biosample = int(point.get('biosample', 1)) - 1
            cluster_sample_num = int(
                point['cluster_ordered']) + max_cluster * biosample
            color_num = int(point[grouping]) - 1
            trace2d = traces_2d.setdefault(
                cluster_sample_num,
                Scattergl(
                    x=list(),
                    y=list(),
                    text=list(),
                    mode='markers',
                    visible=True,
                    name=point['cluster_name'] + " Sample" +
                    str(biosample + 1),
                    legendgroup=point[grouping],
                    marker={
                        'color': colors[color_num],
                        'size': 7,
                        'symbol':
                        symbols[biosample],  # Eran and Fangming 09/12/2017
                        'line': {
                            'width': 1,
                            'color': 'black'
                        }
                    },
                    hoverinfo='text'))
            trace2d['x'].append(point['tsne_x'])
            trace2d['y'].append(point['tsne_y'])
            trace2d['text'].append(
                build_hover_text({
                    'Cell': point.get('samp', 'N/A'),
                    'Layer': point.get('layer', 'N/A'),
                    'Biosample': point.get('biosample', 'N/A'),
                    'Cluster': str(cluster_num)
                }))

        for i, (key, value) in enumerate(sorted(traces_2d.items())):
            trace_str = 'cluster_color_' + str(int(value['legendgroup']) - 1)
            trace_colors.setdefault(trace_str, []).append(i)

        if species == 'mmu':
            for i in range(17, 23, 1):
                traces_2d[i]['marker']['size'] = 15
                traces_2d[i]['marker']['symbol'] = symbols[i % len(symbols)]
                traces_2d[i]['marker']['color'] = 'black'
                traces_2d[i]['visible'] = "legendonly"
        return {'traces_2d': traces_2d, 'layout2d': layout2d}

    else:
        max_cluster = int(
            max(points_3d, key=lambda x: int(x['cluster_ordered']))
            ['cluster_ordered']) + 1
        if species == 'mmu':
            max_cluster = 16
        num_colors = int(
            max(points_3d, key=lambda x: int(x[grouping]))[grouping]) + 1
        colors = generate_cluster_colors(num_colors)
        symbols = [
            'circle', 'square', 'cross', 'triangle-up', 'triangle-down',
            'octagon', 'star', 'diamond'
        ]
        traces_3d = OrderedDict()
        for point in points_3d:
            cluster_num = int(point['cluster_ordered'])
            biosample = int(point.get('biosample', 1)) - 1
            cluster_sample_num = int(
                point['cluster_ordered']) + max_cluster * biosample
            color_num = int(point[grouping]) - 1
            trace2d = traces_2d.setdefault(
                cluster_sample_num,
                Scattergl(
                    x=list(),
                    y=list(),
                    text=list(),
                    mode='markers',
                    visible=True,
                    name=point['cluster_name'] + " Sample" +
                    str(biosample + 1),
                    legendgroup=point[grouping],
                    marker={
                        'color': colors[color_num],
                        'size': 7,
                        'symbol':
                        symbols[biosample],  # Eran and Fangming 09/12/2017
                        'line': {
                            'width': 1,
                            'color': 'black'
                        }
                    },
                    hoverinfo='text'))
            trace2d['x'].append(point['tsne_x'])
            trace2d['y'].append(point['tsne_y'])
            trace2d['text'].append(
                build_hover_text({
                    'Cell': point.get('samp', 'N/A'),
                    'Layer': point.get('layer', 'N/A'),
                    'Biosample': point.get('biosample', 'N/A'),
                    'Cluster': str(cluster_num)
                }))
            trace3d = traces_3d.setdefault(
                cluster_sample_num,
                Scatter3d(
                    x=list(),
                    y=list(),
                    z=list(),
                    text=list(),
                    mode='markers',
                    visible=True,
                    name=point['cluster_name'] + " Sample" +
                    str(biosample + 1),
                    legendgroup=point[grouping],
                    marker={
                        'size': 4,
                        'symbol':
                        symbols[biosample],  # Eran and Fangming 09/12/2017
                        'line': {
                            'width': 1,
                            'color': 'black'
                        },
                        'color': colors[color_num],
                    },
                    hoverinfo='text'))
            trace3d['x'].append(point['tsne_1'])
            trace3d['y'].append(point['tsne_2'])
            trace3d['z'].append(point['tsne_3'])
            trace3d['text'] = trace2d['text']

        for i, (key, value) in enumerate(sorted(traces_2d.items())):
            trace_str = 'cluster_color_' + str(int(value['legendgroup']) - 1)
            trace_colors.setdefault(trace_str, []).append(i)

        if species == 'mmu':
            for i in range(17, 23, 1):
                traces_2d[i]['marker']['size'] = traces_3d[i]['marker'][
                    'size'] = 15
                traces_2d[i]['marker']['symbol'] = traces_3d[i]['marker'][
                    'symbol'] = symbols[i % len(symbols)]
                traces_2d[i]['marker']['color'] = traces_3d[i]['marker'][
                    'color'] = 'black'
                traces_2d[i]['visible'] = traces_3d[i][
                    'visible'] = "legendonly"

        return {
            'traces_2d': traces_2d,
            'traces_3d': traces_3d,
            'layout2d': layout2d,
            'layout3d': layout3d
        }
Exemplo n.º 6
0
    def alignment_plot_eic(self):
        """
        Returns a Figure representing the extracted ion chromatogram for the given samples. Plots rt and ri vs
        intensity per standard
        """
        masses = dict(self.standards_dfs[0].loc[:, (self.COLUMNS['TARGET'],
                                                    self.COLUMNS['MASS'])])
        rts = dict(
            self.standards_dfs[0].loc[:, (self.COLUMNS['TARGET'],
                                          self.COLUMNS['RT'])].sort_values())

        fig = make_subplots(rows=2,
                            cols=1,
                            vertical_spacing=0.15,
                            shared_xaxes=True,
                            shared_yaxes=True)

        self.max_x = 0
        self.max_y = 0
        # chromatogram plot
        for rawdata_df in self.rawdata_dfs:
            sample_name = self._get_name(rawdata_df)
            rawdata_df = rawdata_df.xs(sample_name, axis=1, drop_level=True)

            rawdata_summed_rt = pd.DataFrame(index=rawdata_df.index)
            rawdata_summed_ri = pd.DataFrame(index=rawdata_df.index)

            for tgt, rt in rts.items():
                target_color = self.target_colors[tgt]

                # rt v intensity
                rawdata_summed_rt = rawdata_df[abs(rawdata_df[self.COLUMNS['PRECMZ']] - masses[tgt] <= self.mass_tolerance)] \
                    .groupby(self.COLUMNS['RT'])[self.COLUMNS['INTENSITY']].sum()

                fig.add_trace(
                    Scattergl(
                        x=rawdata_df.groupby(
                            self.COLUMNS['RT'])[self.COLUMNS['RT']].mean(),
                        y=rawdata_summed_rt.transpose(),
                        legendgroup=tgt,
                        marker_color=target_color,
                        name=f"{rt:.2f} {re.sub(self.INCHIKEY_REGEX, '', tgt)}",
                        mode='lines',
                        **self.scatter_shared_attrs),
                    col=1,
                    row=1,
                )

                # ri v intensity
                rawdata_summed_ri = rawdata_df[abs(rawdata_df[self.COLUMNS['PRECMZ']] - masses[tgt] <= self.mass_tolerance)] \
                    .groupby(self.COLUMNS['RI'])[self.COLUMNS['INTENSITY']].sum()
                fig.add_trace(
                    Scattergl(x=rawdata_df.groupby(
                        self.COLUMNS['RT'])[self.COLUMNS['RT']].mean(),
                              y=rawdata_summed_ri.transpose(),
                              legendgroup=tgt,
                              marker_color=target_color,
                              showlegend=False,
                              mode='lines',
                              **self.scatter_shared_attrs),
                    col=1,
                    row=2,
                )

            self.max_x = max(self.max_x, rawdata_df[self.COLUMNS['RI']].max(),
                             rawdata_df[self.COLUMNS['RT']].max())
            self.max_y = max(self.max_y, max(rawdata_summed_ri),
                             max(rawdata_summed_rt))

        fig.update_layout(paper_bgcolor='white',
                          **self.standard_scatter_layout)

        fig.update_yaxes(title_text='Intensity',
                         tickformat='e',
                         range=[0, self.max_y],
                         showticklabels=True)
        fig.update_xaxes(range=[0, self.max_x], showticklabels=True)
        fig.update_layout(title_text="ISTD alignment plot (EIC)")
        return fig
def plot_results(mod,
                 tr_feats,
                 tr_targs,
                 te_feats=None,
                 te_targs=None,
                 folder=None,
                 test_lin_preds=None):
    # TODO: subplot with actual and predicted returns
    train_preds = mod.predict(tr_feats)[:, 0]
    train_score = mod.evaluate(tr_feats, tr_targs)
    # couldn't figure this out yet
    # train_score = K.run(stock_loss_mae_log(train_targs, train_preds))
    title = 'train preds vs actual (score = ' + str(train_score) + ')'
    if te_feats is None:
        title = 'full train preds vs actual (score = ' + str(train_score) + ')'
    data = [
        Scattergl(x=train_preds,
                  y=tr_targs,
                  mode='markers',
                  name='preds vs actual',
                  marker=dict(color=list(range(tr_targs.shape[0])),
                              colorscale='Portland',
                              showscale=True,
                              opacity=0.5))
    ]
    layout = Layout(title=title,
                    xaxis=dict(title='predictions'),
                    yaxis=dict(title='actual'))
    fig = Figure(data=data, layout=layout)
    if folder is None:
        if te_feats is None:
            filename = plot_dir + 'full_train_preds_vs_actual.html'
        else:
            filename = plot_dir + 'train_preds_vs_actual.html'
    else:
        if not os.path.exists(plot_dir + folder):
            os.mkdir(plot_dir + folder)

        if te_feats is None:
            filename = plot_dir + folder + '/' + 'full_train_preds_vs_actual.html'
        else:
            filename = plot_dir + folder + '/' + 'train_preds_vs_actual.html'

    plot(fig, filename=filename, auto_open=False, show_link=False)

    del train_score

    if te_feats is not None:
        test_preds = mod.predict(te_feats)[:, 0]
        test_score = mod.evaluate(te_feats, te_targs)
        # test_score = K.run(stock_loss_mae_log(train_targs, train_preds))
        data = [
            Scattergl(x=test_preds,
                      y=te_targs,
                      mode='markers',
                      name='preds vs actual',
                      marker=dict(color=list(range(te_targs.shape[0])),
                                  colorscale='Portland',
                                  showscale=True,
                                  opacity=0.5))
        ]
        if test_lin_preds is not None:
            line = Scatter(x=test_preds,
                           y=test_lin_preds,
                           mode='lines',
                           name='linear fit')
            data = data + [line]

        layout = Layout(title='test preds vs actual (score = ' +
                        str(test_score) + ')',
                        xaxis=dict(title='predictions'),
                        yaxis=dict(title='actual'))
        fig = Figure(data=data, layout=layout)
        if folder is None:
            filename = plot_dir + 'test_preds_vs_actual.html'
        else:
            filename = plot_dir + folder + '/' + 'test_preds_vs_actual.html'

        plot(fig, filename=filename, auto_open=False, show_link=False)

        del test_score
Exemplo n.º 8
0
from planet import Planet
import plotly.offline as plt
from plotly.graph_objs import Scattergl

earth = Planet("La tierra", 0.017, 1, 365.26, 5.9742 * 10**24, 0, 0, 101.22)

orbit = earth.get_orbit_2bodies(200)

plt.plot({'data': [Scattergl(x=orbit[:, 0], y=orbit[:, 1])]})
Exemplo n.º 9
0
    def generate_rt_shift(self) -> Figure:
        """
        Creates a chromatogram plot showing the sift produced by retention time correction

            Returns:
                fig: A plotly.graph_objs.Figure consisting of two subplots: rt vs intensity on top and ri vs intensity
                on the bottom.
                Raw data is plotted as a bar chart and standards are overlaid as a scatter plot.

        """
        fig = Figure()
        height = 100 + 50 * (len(self.rawdata_dfs) // 7)
        # chromatogram plot
        for rawdata_df in self.rawdata_dfs:
            sample_color = next(self.sample_colors)
            sample_name = rawdata_df.columns.unique(level=0).values[0]
            rawdata_df = rawdata_df.xs(sample_name, axis=1, drop_level=True)

            # rt v intensity
            rawdata_summed_rt = rawdata_df.groupby(self.COLUMNS['RT'])[
                self.COLUMNS['INTENSITY']].sum().reset_index()
            fig.add_trace(
                Scattergl(x=rawdata_summed_rt[self.COLUMNS['RT']],
                          y=rawdata_summed_rt[self.COLUMNS['INTENSITY']],
                          legendgroup=sample_name,
                          marker_color=sample_color,
                          name='RT ' + sample_name,
                          mode='lines',
                          **self.scatter_shared_attrs))

            # ri v intensity
            rawdata_summed_ri = rawdata_df.groupby(self.COLUMNS['RI'])[
                self.COLUMNS['INTENSITY']].sum().reset_index()
            fig.add_trace(
                Scattergl(x=rawdata_summed_ri[self.COLUMNS['RI']],
                          y=rawdata_summed_ri[self.COLUMNS['INTENSITY']],
                          legendgroup=sample_name,
                          marker_color=sample_color,
                          line={'dash': 'dash'},
                          name='RI ' + sample_name,
                          mode='lines',
                          **self.scatter_shared_attrs))

            self.max_x = max(self.max_x, rawdata_df[self.COLUMNS['RI']].max(),
                             rawdata_df[self.COLUMNS['RT']].max())
            self.max_y = max(
                self.max_y, rawdata_summed_ri[self.COLUMNS['INTENSITY']].max(),
                rawdata_summed_rt[self.COLUMNS['INTENSITY']].max())

        # fig = self._add_zones_and_std_lines(fig)

        fig.update_layout(**self.scatter_layout,
                          xaxis_showticklabels=True,
                          height=height,
                          legend=dict(x=0.5,
                                      y=-0.1,
                                      xanchor='center',
                                      yanchor='top',
                                      orientation='h'))

        fig.update_xaxes(title_text='RT (solid) - RI (dashed)',
                         range=[0, self.max_x * 1.1])
        fig.update_yaxes(title_text='Intensity', tickformat='e')
        fig.update_layout(title_text="Retention Time correction comparison")
        return fig
    def alignment_plot_tic(self):
        """Plots total ion chromatogram for given samples"""
        # FIXME Fix this plot. Must have to do with the x axis
        fig = make_subplots(rows=2,
                            cols=1,
                            vertical_spacing=0.15,
                            shared_xaxes=True,
                            shared_yaxes=True)
        max_x = 0
        max_y = 0
        height = 1000 + (50 * (len(self.rawdata_dfs) // 7))

        # chromatogram plot
        for rawdata_df in self.rawdata_dfs:
            sample_color = next(self.sample_colors)
            sample_name = rawdata_df.columns.unique(level=0).values[0]
            rawdata_df = rawdata_df.xs(sample_name, axis=1, drop_level=True)
            line_type = next(self.line_types)

            # rt v intensity
            rawdata_summed_rt = rawdata_df.groupby(self.COLUMNS['RT'])[
                self.COLUMNS['INTENSITY']].sum().reset_index()
            fig.add_trace(
                Scattergl(x=rawdata_summed_rt[self.COLUMNS['RT']],
                          y=rawdata_summed_rt[self.COLUMNS['INTENSITY']],
                          legendgroup=sample_name,
                          line=dict(width=1),
                          marker_color=sample_color,
                          name='RT ' + sample_name,
                          mode='lines',
                          **self.scatter_shared_attrs),
                col=1,
                row=1,
            )

            # ri v intensity
            rawdata_summed_ri = rawdata_df.groupby(self.COLUMNS['RI'])[
                self.COLUMNS['INTENSITY']].sum().reset_index()
            fig.add_trace(
                Scattergl(x=rawdata_summed_ri[self.COLUMNS['RI']],
                          y=rawdata_summed_ri[self.COLUMNS['INTENSITY']],
                          legendgroup=sample_name,
                          line=dict(dash=line_type, width=1),
                          marker_color=sample_color,
                          name='RI ' + sample_name,
                          mode='lines',
                          **self.scatter_shared_attrs),
                col=1,
                row=2,
            )

            max_x = max(max_x, rawdata_df[self.COLUMNS['RI']].max(),
                        rawdata_df[self.COLUMNS['RT']].max())
            max_y = max(max_y,
                        rawdata_summed_ri[self.COLUMNS['INTENSITY']].max(),
                        rawdata_summed_rt[self.COLUMNS['INTENSITY']].max())

        fig.update_layout(**self.scatter_layout,
                          paper_bgcolor='white',
                          height=height,
                          legend=dict(x=0.5,
                                      y=-0.1,
                                      xanchor='center',
                                      yanchor='top',
                                      orientation='h'))

        fig.update_xaxes(title_text='RT (solid) - RI (dashed)',
                         range=[0, max_x * 1.1],
                         showticklabels=True)
        fig.update_yaxes(title_text='Intensity',
                         tickformat='e',
                         range=[max_y * -0.05, max_y * 1.1])
        fig.update_layout(title_text="Retention Time correction comparison")
        return fig
Exemplo n.º 11
0
    def generate_rt_ri_delta_from_reference(self):
        """
        Creates two plots showing the difference in retention time (top) and retention index (bottom) versus the
        library reference for each internal standard
        """

        fig = make_subplots(rows=2,
                            cols=1,
                            vertical_spacing=0.05,
                            shared_xaxes=True)

        deltas_dfs = []
        for s in self.standards_dfs:
            s = s.sort_values([(self.COLUMNS['TARGET'], self.COLUMNS['RT'])],
                              axis='index')
            sample_name = self._get_name(s)
            deltas_dfs.append(self._calc_diff(s, sample_name))

        min_y = 0
        max_y = 0

        for sample in deltas_dfs:
            sample_name = self._get_name(sample)
            sample_color = next(self.sample_colors)
            size = len(sample.index.values)
            sample.columns = sample.columns.droplevel(0)
            min_y = min(min_y, sample['rt_diff'].min())
            max_y = max(max_y, sample['rt_diff'].max())
            min_y = min(min_y, sample['ri_diff'].min())
            max_y = max(max_y, sample['ri_diff'].max())
            min_y = min(min_y, sample['corr_diff'].min())
            max_y = max(max_y, sample['corr_diff'].max())

            fig.add_trace(
                Scattergl(x=self._fix_names(sample.index.values),
                          y=[0] * size,
                          name='RT ' + sample_name,
                          legendgroup=sample_name,
                          line_color=sample_color,
                          error_y=dict(
                              type='data',
                              symmetric=False,
                              array=sample['rt_diff'],
                          ),
                          **self.scatter_shared_attrs),
                col=1,
                row=1,
            )

            fig.add_trace(
                Scattergl(x=self._fix_names(sample.index.values),
                          y=[0] * size,
                          name='RI ' + sample_name,
                          legendgroup=sample_name,
                          line_color=sample_color,
                          error_y=dict(
                              type='data',
                              symmetric=False,
                              array=sample['ri_diff'],
                          ),
                          **self.scatter_shared_attrs),
                col=1,
                row=2,
            )

            fig.add_hline(y=self.ri_tolerance,
                          line=dict(color='rgba(0,0,0,0.75)',
                                    dash='dot',
                                    width=1))
            fig.add_hline(y=-self.ri_tolerance,
                          line=dict(color='rgba(0,0,0,0.75)',
                                    dash='dot',
                                    width=1))

            fig.update_layout(
                **self.standard_scatter_layout,
                height=3000,
                # paper_bgcolor='white',
            )
            fig.update_xaxes(type='category')
            fig['layout']['xaxis'].update(showticklabels=False)
            fig['layout']['xaxis2'].update(
                showticklabels=True,
                tickangle=60,
                title=dict(text='ISTD Name (Sorted L-R by increasing RT)',
                           font_size=18))
            fig['layout']['yaxis'].update(title=dict(
                text='Lib vs sample RT Difference', font_size=18),
                                          range=[min_y * 1.1, max_y * 1.1])
            fig['layout']['yaxis2'].update(title=dict(
                text='Lib vs sample RI Difference', font_size=18),
                                           range=[min_y * 1.1, max_y * 1.1])
            fig.update_layout(
                title_text=
                'Retention Time and Index deviations from the Reference (library)'
            )
        return fig
    def generate_plot(self) -> Figure:
        """
        Creates a chromatogram plot showing the sift produced by retention time correction
        """
        fig = Figure()
        height = 1000 + (len(self.rawdata_dfs) * 50 // 7)
        # chromatogram plot
        for rawdata_df in self.rawdata_dfs:
            sample_color = next(
                self.sample_colors
            )  # TODO Have this cycle reset every time generate_plot is called
            sample_name = rawdata_df.columns.unique(level=0).values[0]
            rawdata_df = rawdata_df.xs(sample_name, axis=1, drop_level=True)
            line_type = next(self.line_types)

            # rt v intensity
            rawdata_summed_rt = rawdata_df.groupby(self.COLUMNS['RT'])[
                self.COLUMNS['INTENSITY']].sum().reset_index()
            fig.add_trace(
                Scattergl(
                    x=rawdata_summed_rt[self.COLUMNS['RT']],
                    y=rawdata_summed_rt[self.COLUMNS['INTENSITY']],
                    opacity=self.rt_line2_opacity,
                    # showlegend=False,
                    legendgroup=sample_name,
                    line=dict(width=1),  # dash=line_type,
                    marker_color=sample_color,
                    name='RT ' + sample_name,
                    mode='lines',
                    **self.scatter_shared_attrs))

            # ri v intensity
            rawdata_summed_ri = rawdata_df.groupby(self.COLUMNS['RI'])[
                self.COLUMNS['INTENSITY']].sum().reset_index()
            fig.add_trace(
                Scattergl(x=rawdata_summed_ri[self.COLUMNS['RI']],
                          y=rawdata_summed_ri[self.COLUMNS['INTENSITY']],
                          opacity=self.ri_line2_opacity,
                          legendgroup=sample_name,
                          marker_color=sample_color,
                          mode='lines',
                          line=dict(dash='dash', width=1),
                          name='RI ' + sample_name,
                          **self.scatter_shared_attrs))

            self.max_x = max(self.max_x, rawdata_df[self.COLUMNS['RI']].max(),
                             rawdata_df[self.COLUMNS['RT']].max())
            self.max_y = max(
                self.max_y, rawdata_summed_ri[self.COLUMNS['INTENSITY']].max(),
                rawdata_summed_rt[self.COLUMNS['INTENSITY']].max())

        # markers for found ISTDs -- x = RT, + = RI
        for standards_df in self.standards_dfs:
            sample_name = standards_df.columns.unique(level=0).values[1]
            marker_color = next(self.sample_colors)

            standards_int = standards_df.loc[:, (sample_name,
                                                 self.COLUMNS['INTENSITY'])]
            annot_rt = standards_df.loc[:, (sample_name, self.COLUMNS['RT'])]
            annot_ri = standards_df.loc[:, (sample_name, self.COLUMNS['RI'])]

            # error_bars
            error_bars = self._calc_diff(standards_df, sample_name)
            rt_error_bars = error_bars.droplevel(
                level=0, axis=1)[self.COLUMNS['RT_DIFF']]
            ri_error_bars = error_bars.droplevel(
                level=0, axis=1)[self.COLUMNS['RI_DIFF']]

            fig.add_trace(
                Scattergl(x=annot_rt,
                          y=standards_int,
                          showlegend=False,
                          legendgroup=sample_name,
                          text=standards_df.index,
                          mode='markers',
                          name=sample_name,
                          marker=dict(size=10,
                                      symbol='x',
                                      opacity=self.rt_marker_opacity,
                                      color=marker_color,
                                      line=dict(color=marker_color, width=1)),
                          error_x=dict(
                              array=rt_error_bars,
                              symmetric=False,
                          ),
                          **self.scatter_shared_attrs))

            fig.add_trace(
                Scattergl(x=annot_ri,
                          y=standards_int,
                          showlegend=False,
                          legendgroup=sample_name,
                          text=standards_df.index,
                          line=dict(dash='dash', width=1),
                          mode='markers',
                          name=sample_name,
                          marker=dict(size=10,
                                      symbol='cross',
                                      opacity=self.ri_marker_opacity,
                                      color=marker_color,
                                      line=dict(color=marker_color, width=1)),
                          error_x=dict(
                              array=ri_error_bars,
                              symmetric=False,
                          ),
                          **self.scatter_shared_attrs))

        fig = self._add_zones_and_std_lines(fig)

        fig.update_layout(paper_bgcolor='white',
                          xaxis_showticklabels=True,
                          height=height,
                          **self.standard_scatter_layout)
        #Override parent legend
        fig.update_layout(legend=dict(
            x=0.5, y=-0.1, xanchor='center', yanchor='top', orientation='h'))
        fig.update_layout(
            title_text="Annotated ISTDs before and after RT Correction")
        fig.update_xaxes(title_text='RT (solid) - RI (dashed)',
                         range=[0, self.max_x * 1.1])
        fig.update_yaxes(title_text='Intensity',
                         tickformat='e',
                         range=[self.max_y * -0.05, self.max_y * 1.1])
        return fig