Exemplo n.º 1
0
def analyze_plate(d, h, cats):
    """ take a dataset (assuming with doses) and generate standard output figs """
    # should worry about organizing output too

    # create consensus, create SC plot
    dc, hc = pa.assemble_consensus(d, h, cats, save=True, sc=True)
    hc = gt.gen_label(hc, 'nd')
    dc.name = d.name

    # tSNE simple first pass, two parameters
    dim_reduct.tsne2(dc, hc, px=10, lr=[10, 150], inter=True)

    # create general dendrogram (only, no heatmap)
    dim_reduct.make_dendrogram(dc, labels=hc.label, outpath=True)

    # plot correlation matrix of the sorted combined zs by name and dose
    # can then follow up to plot the sweep or the clustered
    plottools.plot_correlation_matrix(dc,
                                      hc,
                                      title='dflt',
                                      sort=True,
                                      outpath=True,
                                      sparselabel=True,
                                      grid=True,
                                      labels=hc.name)

    if 'd' in cats:
        # plot landmark concs
        newcats = cats.replace('d', '')
        outpath = gt.dflt_outpath(fldr_name='landmark concs')
        #plottools.plot_landmark_concs(dc, hc, cats=newcats, genes='all', labels='dose', outpath=outpath)

        # call combo function to find genes that move and plot those dose-response plots (30 per plate)
        plottools.plot_ex_genes(d, h, n=10, mode='med')
Exemplo n.º 2
0
def plot_veh_matrices(path='dflt',
                      batch='A',
                      up=.75,
                      low=0,
                      kind='euclid',
                      getcells=True):
    """ gather data for vehicle matrix and plot it regular and clustered in euclidean """

    d, h = get_vehicle_matrix(path=path, batch=batch, getcells=getcells)

    h = gt.gen_label(h, 'pb')

    dim_reduct.make_dendrogram(d, labels=h.label)

    plottools.plot_correlation_matrix(d,
                                      h,
                                      lower=low,
                                      upper=up,
                                      ptype=kind,
                                      title=f'vehicles {kind}',
                                      outpath=True,
                                      sparselabel=False)

    plottools.plot_clustered_correl(d,
                                    h,
                                    ptype=kind,
                                    title=f'clustered vehicles {kind}',
                                    outpath=True)
Exemplo n.º 3
0
def run_plate_analysis(mode='ind', cats='nd', path='dflt'):
    """ runs standard analysis on either each plate individually 'ind' or all togegther 'comb'
    most useful for plates with doses. the default loc  

    default path will be newQC on the desktop """

    path = gt.check_dfltarg(path, os.path.join(gt.check_desktop(), 'newQC'))

    fl = gt.globit(path, '*ZSVCQNORM*')

    print(fl)

    if mode == 'comb':
        dl, hl = [], []
        for i, f in enumerate(fl):
            d, h = gct.extractgct(f)
            if i == 0:
                try:
                    pname = d.name + '+'
                except:
                    pname = h.addr[0].split(':')[0] + '+'
            if len(h.batch.unique()) > 1:
                # fix sample labels for plate/batch
                h.plate = h.plate + h.batch
            # define labels (should I add plate?)
            h = gt.gen_label(h, cats)
            dl.append(d)
            hl.append(h)
        try:
            d = pd.concat(dl, axis=1)
            d.name = pname
        except ValueError:
            sys.exit('no gct file plates to analyze')
        h = pd.concat(hl, axis=0)

        analyze_plate(d, h, cats)

    elif mode == 'ind':
        for f in fl:
            d, h = gct.extractgct(f)
            # define labels (should I add plate?)
            h = gt.gen_label(h, cats)

            analyze_plate(d, h, cats)
Exemplo n.º 4
0
def get_vehicle_matrix(path='dflt', batch='all', delim=':', getcells=False):
    """" for the path load all files and collapse vehicles, plot matrix
    batches can be all or 'A' only to just take the first one. getcells will re-predict cells """
    path = gt.check_dfltarg(path, os.path.join(gt.check_desktop(), 'newQC'))

    flv = gt.globit(path, '*Qctrl*')
    if len(flv) == 0:
        flv = gt.globit(path, '*_QNORM_*')

    # should put in a check to extract from regular qnorms
    dlist, hlist = [], []
    for f in flv:
        d, h = gct.extractgct(f)
        h['plate'] = h['plate'].apply(lambda x: x[:6])
        d, h = gt.dsub(d, h, {'type': 'vehicle'})
        if batch == 'all':
            for b in h.batch.unique():
                ds, hs = gt.dsub(d, h, {'batch': b})
                med = ds.median(axis=1)
                hs = gt.gen_label(hs, 'pb', delim=delim)
                dlist.append(med)
                hlist.append(hs.iloc[0])
        elif batch == 'A':
            ds, hs = gt.dsub(d, h, {'batch': 'A'})
            med = ds.median(axis=1)
            hs = gt.gen_label(hs, 'pb', delim=delim)
            dlist.append(med)
            hlist.append(hs.iloc[0])
        else:
            med = d.median(axis=1)
            hs = gt.gen_label(hs, 'p', delim=delim)
            dlist.append(med)
            hlist.append(hs.iloc[0])

    vdf = pd.concat(dlist, axis=1)
    vh = pd.DataFrame(hlist)
    vdf.columns = vh.label
    if getcells is True:
        vh['cell2'] = vh.label.apply(lambda x: predict_cells(vdf[x]))
        vh['label'] = vh.label + delim + vh.cell2
    vdf.columns = vh.label
    return vdf, vh
Exemplo n.º 5
0
def assemble_consensus(df,
                       h,
                       cats,
                       sc=True,
                       legend='brief',
                       plot=False,
                       skyl=False,
                       n=None,
                       save=False,
                       ret=True,
                       test=False):
    """ tool to assemble replicate zscore consensus, pass df, header and the breakdown categories 'nd' for instance
    will return the consolidated df and header file

    can pass in multiple gct files as a single string via F6 shortcut

    ccs will calculate the zscore correlation of replicates, and insert that into header df
    plot will use seaborn pairplot to visualize the calculated rep correlations above
    skyl controls skyline plot generation, can be True to plot all ind reps plus consensus
    n argument is a limiter to only consider treatments with enough replicates, including into consensus gct!!
    save will save the consensus gct file
    """

    if isinstance(df, str):
        df, h = gct.extractgct(df)
    else:
        print('error in loading dataframe')

    outpath = gt.dflt_outpath(fldr_name='output figs')
    try:
        pname = df.name
    except:
        pname = h.addr[0].split(':')[0]
    outpath = os.path.join(outpath, pname)
    try:
        os.mkdir(outpath)
    except:
        pass

    subs = breakdown(df, h, cats, dic=False)

    con_data = pd.DataFrame(index=df.index)
    addnl = []
    addnl.extend(['corr', 'all ccs'])
    addnl.extend(['prom', 'all proms', 'porder'])

    if addnl != []:
        con_header = pd.DataFrame(index=list(h.columns.values) + addnl)
    else:
        con_header = pd.DataFrame(index=h.columns)

    for ds, hs in subs:
        if n is not None:
            if len(ds.columns) < n:
                print('not enough reps', hs.iloc[0])
                continue

        c = consensus(ds, name='first')
        con_data = pd.concat([con_data, c], axis=1)

        new_annot = hs.iloc[0, :].copy().T
        new_annot.well = hs['well'].values
        new_annot.addr = hs['addr'].values

        corrs = []
        for i in range(len(ds.columns)):
            for j in range(1 + i, len(ds.columns)):
                corrs.append(
                    round(ds.iloc[:, i].corr(ds.iloc[:, j], method='pearson'),
                          2))
        if len(corrs) == 0:
            # print('corrs = na')
            # print(hs.iloc[0].values)
            new_annot['corr'] = np.nan
            new_annot['all ccs'] = np.nan
        elif len(corrs) == 1:
            new_annot['corr'] = round(corrs[0], 2)
            new_annot['all ccs'] = corrs
        else:
            new_annot['corr'] = round(np.percentile(corrs, 75), 2)
            new_annot['all ccs'] = corrs
        corrs = [decimal.Decimal(x) for x in corrs]
        new_annot['corr'] = pd.to_numeric(new_annot['corr'])

        proms = abs(ds).sum(axis=0).round().values
        porder = hs['well'].values
        new_annot['prom'] = round(np.percentile(proms, 75))
        new_annot['all proms'] = proms
        new_annot['porder'] = porder

        if plot is True:
            ds.columns = [x + ' - ' + hs.loc[x]['batch'] for x in ds.columns]
            ax = sns.pairplot(ds)
            myoutpath = os.path.join(outpath, 'rep zs scatter')
            try:
                os.mkdir(myoutpath)
            except:
                pass
            plt.savefig(
                os.path.join(myoutpath, h.plate[0] + '-' + ds.name + '.png'))
            plt.close()

        con_header = pd.concat([con_header, new_annot], axis=1)

        if skyl is True:
            myoutpath = os.path.join(outpath, 'skyline')
            try:
                os.mkdir(myoutpath)
            except:
                pass
            try:
                name = hs.iloc[0]['name'] + '-' + str(
                    hs.iloc[0]['dose']) + '-' + hs.iloc[0]['batch']
            except:
                name = hs.iloc[0]['name'] + '-' + hs.iloc[0]['batch']
            name = name.replace('.', ',')
            title = pname + '-' + name
            myoutpath = os.path.join(myoutpath, title)
            skyline.new_skyline(ds, title=title, outpath=myoutpath)

        if test is True:
            break

    con_header = con_header.T

    if sc is True:
        try:
            pname = df.name
        except:
            pname = h.addr[0].split(':')[0]
        title = pname + ' sc plot'
        outpath = gt.dflt_outpath(fn=pname + '_scplot.png')
        kwargs = {'x': 'corr', 'y': 'prom', 'data': con_header}

        kwargs.update({'alpha': .75, 'style': 'type', 'legend': legend})

        if 'd' in cats:
            kwargs['hue'] = 'name'
            kwargs['size'] = 'dose'
            kwargs['sizes'] = (40, 400)

        # this is experimental
        else:
            kwargs['sizes'] = (50)
            kwargs['hue'] = 'name'

        g = sns.relplot(**kwargs)
        g.fig.suptitle(title)
        g.fig.set_size_inches(7, 5)
        if legend is not None:
            for lh in g._legend.legendHandles:
                lh.set_alpha(.75)
        g.savefig(outpath, bbox_inches='tight')
        plt.close()

        con_header = gt.gen_label(con_header, 'nb')
        newfig, newax = dim_reduct.seaborn_scatter(con_header,
                                                   title,
                                                   outpath,
                                                   x='corr',
                                                   y='prom',
                                                   ptype='ax',
                                                   save=False)
        dim_reduct.html_scatter(newfig, newax, con_header, 'corr', 'prom',
                                title)
        plt.close()

    con_data.name = df.name

    if save is True:
        gct.save_headergct(con_data, con_header,
                           gt.dflt_outpath(fn=df.name + '_consensus.gct'))
    if ret is True:
        return con_data, con_header