def analyze_plate(d, h, cats): """ take a dataset (assuming with doses) and generate standard output figs """ # should worry about organizing output too # create consensus, create SC plot dc, hc = pa.assemble_consensus(d, h, cats, save=True, sc=True) hc = gt.gen_label(hc, 'nd') dc.name = d.name # tSNE simple first pass, two parameters dim_reduct.tsne2(dc, hc, px=10, lr=[10, 150], inter=True) # create general dendrogram (only, no heatmap) dim_reduct.make_dendrogram(dc, labels=hc.label, outpath=True) # plot correlation matrix of the sorted combined zs by name and dose # can then follow up to plot the sweep or the clustered plottools.plot_correlation_matrix(dc, hc, title='dflt', sort=True, outpath=True, sparselabel=True, grid=True, labels=hc.name) if 'd' in cats: # plot landmark concs newcats = cats.replace('d', '') outpath = gt.dflt_outpath(fldr_name='landmark concs') #plottools.plot_landmark_concs(dc, hc, cats=newcats, genes='all', labels='dose', outpath=outpath) # call combo function to find genes that move and plot those dose-response plots (30 per plate) plottools.plot_ex_genes(d, h, n=10, mode='med')
def plot_veh_matrices(path='dflt', batch='A', up=.75, low=0, kind='euclid', getcells=True): """ gather data for vehicle matrix and plot it regular and clustered in euclidean """ d, h = get_vehicle_matrix(path=path, batch=batch, getcells=getcells) h = gt.gen_label(h, 'pb') dim_reduct.make_dendrogram(d, labels=h.label) plottools.plot_correlation_matrix(d, h, lower=low, upper=up, ptype=kind, title=f'vehicles {kind}', outpath=True, sparselabel=False) plottools.plot_clustered_correl(d, h, ptype=kind, title=f'clustered vehicles {kind}', outpath=True)
def run_plate_analysis(mode='ind', cats='nd', path='dflt'): """ runs standard analysis on either each plate individually 'ind' or all togegther 'comb' most useful for plates with doses. the default loc default path will be newQC on the desktop """ path = gt.check_dfltarg(path, os.path.join(gt.check_desktop(), 'newQC')) fl = gt.globit(path, '*ZSVCQNORM*') print(fl) if mode == 'comb': dl, hl = [], [] for i, f in enumerate(fl): d, h = gct.extractgct(f) if i == 0: try: pname = d.name + '+' except: pname = h.addr[0].split(':')[0] + '+' if len(h.batch.unique()) > 1: # fix sample labels for plate/batch h.plate = h.plate + h.batch # define labels (should I add plate?) h = gt.gen_label(h, cats) dl.append(d) hl.append(h) try: d = pd.concat(dl, axis=1) d.name = pname except ValueError: sys.exit('no gct file plates to analyze') h = pd.concat(hl, axis=0) analyze_plate(d, h, cats) elif mode == 'ind': for f in fl: d, h = gct.extractgct(f) # define labels (should I add plate?) h = gt.gen_label(h, cats) analyze_plate(d, h, cats)
def get_vehicle_matrix(path='dflt', batch='all', delim=':', getcells=False): """" for the path load all files and collapse vehicles, plot matrix batches can be all or 'A' only to just take the first one. getcells will re-predict cells """ path = gt.check_dfltarg(path, os.path.join(gt.check_desktop(), 'newQC')) flv = gt.globit(path, '*Qctrl*') if len(flv) == 0: flv = gt.globit(path, '*_QNORM_*') # should put in a check to extract from regular qnorms dlist, hlist = [], [] for f in flv: d, h = gct.extractgct(f) h['plate'] = h['plate'].apply(lambda x: x[:6]) d, h = gt.dsub(d, h, {'type': 'vehicle'}) if batch == 'all': for b in h.batch.unique(): ds, hs = gt.dsub(d, h, {'batch': b}) med = ds.median(axis=1) hs = gt.gen_label(hs, 'pb', delim=delim) dlist.append(med) hlist.append(hs.iloc[0]) elif batch == 'A': ds, hs = gt.dsub(d, h, {'batch': 'A'}) med = ds.median(axis=1) hs = gt.gen_label(hs, 'pb', delim=delim) dlist.append(med) hlist.append(hs.iloc[0]) else: med = d.median(axis=1) hs = gt.gen_label(hs, 'p', delim=delim) dlist.append(med) hlist.append(hs.iloc[0]) vdf = pd.concat(dlist, axis=1) vh = pd.DataFrame(hlist) vdf.columns = vh.label if getcells is True: vh['cell2'] = vh.label.apply(lambda x: predict_cells(vdf[x])) vh['label'] = vh.label + delim + vh.cell2 vdf.columns = vh.label return vdf, vh
def assemble_consensus(df, h, cats, sc=True, legend='brief', plot=False, skyl=False, n=None, save=False, ret=True, test=False): """ tool to assemble replicate zscore consensus, pass df, header and the breakdown categories 'nd' for instance will return the consolidated df and header file can pass in multiple gct files as a single string via F6 shortcut ccs will calculate the zscore correlation of replicates, and insert that into header df plot will use seaborn pairplot to visualize the calculated rep correlations above skyl controls skyline plot generation, can be True to plot all ind reps plus consensus n argument is a limiter to only consider treatments with enough replicates, including into consensus gct!! save will save the consensus gct file """ if isinstance(df, str): df, h = gct.extractgct(df) else: print('error in loading dataframe') outpath = gt.dflt_outpath(fldr_name='output figs') try: pname = df.name except: pname = h.addr[0].split(':')[0] outpath = os.path.join(outpath, pname) try: os.mkdir(outpath) except: pass subs = breakdown(df, h, cats, dic=False) con_data = pd.DataFrame(index=df.index) addnl = [] addnl.extend(['corr', 'all ccs']) addnl.extend(['prom', 'all proms', 'porder']) if addnl != []: con_header = pd.DataFrame(index=list(h.columns.values) + addnl) else: con_header = pd.DataFrame(index=h.columns) for ds, hs in subs: if n is not None: if len(ds.columns) < n: print('not enough reps', hs.iloc[0]) continue c = consensus(ds, name='first') con_data = pd.concat([con_data, c], axis=1) new_annot = hs.iloc[0, :].copy().T new_annot.well = hs['well'].values new_annot.addr = hs['addr'].values corrs = [] for i in range(len(ds.columns)): for j in range(1 + i, len(ds.columns)): corrs.append( round(ds.iloc[:, i].corr(ds.iloc[:, j], method='pearson'), 2)) if len(corrs) == 0: # print('corrs = na') # print(hs.iloc[0].values) new_annot['corr'] = np.nan new_annot['all ccs'] = np.nan elif len(corrs) == 1: new_annot['corr'] = round(corrs[0], 2) new_annot['all ccs'] = corrs else: new_annot['corr'] = round(np.percentile(corrs, 75), 2) new_annot['all ccs'] = corrs corrs = [decimal.Decimal(x) for x in corrs] new_annot['corr'] = pd.to_numeric(new_annot['corr']) proms = abs(ds).sum(axis=0).round().values porder = hs['well'].values new_annot['prom'] = round(np.percentile(proms, 75)) new_annot['all proms'] = proms new_annot['porder'] = porder if plot is True: ds.columns = [x + ' - ' + hs.loc[x]['batch'] for x in ds.columns] ax = sns.pairplot(ds) myoutpath = os.path.join(outpath, 'rep zs scatter') try: os.mkdir(myoutpath) except: pass plt.savefig( os.path.join(myoutpath, h.plate[0] + '-' + ds.name + '.png')) plt.close() con_header = pd.concat([con_header, new_annot], axis=1) if skyl is True: myoutpath = os.path.join(outpath, 'skyline') try: os.mkdir(myoutpath) except: pass try: name = hs.iloc[0]['name'] + '-' + str( hs.iloc[0]['dose']) + '-' + hs.iloc[0]['batch'] except: name = hs.iloc[0]['name'] + '-' + hs.iloc[0]['batch'] name = name.replace('.', ',') title = pname + '-' + name myoutpath = os.path.join(myoutpath, title) skyline.new_skyline(ds, title=title, outpath=myoutpath) if test is True: break con_header = con_header.T if sc is True: try: pname = df.name except: pname = h.addr[0].split(':')[0] title = pname + ' sc plot' outpath = gt.dflt_outpath(fn=pname + '_scplot.png') kwargs = {'x': 'corr', 'y': 'prom', 'data': con_header} kwargs.update({'alpha': .75, 'style': 'type', 'legend': legend}) if 'd' in cats: kwargs['hue'] = 'name' kwargs['size'] = 'dose' kwargs['sizes'] = (40, 400) # this is experimental else: kwargs['sizes'] = (50) kwargs['hue'] = 'name' g = sns.relplot(**kwargs) g.fig.suptitle(title) g.fig.set_size_inches(7, 5) if legend is not None: for lh in g._legend.legendHandles: lh.set_alpha(.75) g.savefig(outpath, bbox_inches='tight') plt.close() con_header = gt.gen_label(con_header, 'nb') newfig, newax = dim_reduct.seaborn_scatter(con_header, title, outpath, x='corr', y='prom', ptype='ax', save=False) dim_reduct.html_scatter(newfig, newax, con_header, 'corr', 'prom', title) plt.close() con_data.name = df.name if save is True: gct.save_headergct(con_data, con_header, gt.dflt_outpath(fn=df.name + '_consensus.gct')) if ret is True: return con_data, con_header