def single_array(gids, unnorm_eluts, sp, min_count=1, remove_multi_base=False, norm_rows=False): """ unnorm_eluts: [el.NormElut(f, sp=sp, norm_cols=False, norm_rows=False) for f in fs] """ import plotting as pl use_eluts = elutions_containing_prots(unnorm_eluts, sp, gids, min_count) print len(use_eluts), "eluts with proteins" ncols = sum([e.normarr.shape[1] for e in use_eluts]) bigarr = np.zeros((len(gids), ncols)) startcol = 0 for e in use_eluts: freqarr = ut.normalize_fracs(e.normarr, norm_rows=norm_rows) temparr = np.zeros((len(gids), freqarr.shape[1])) for i, gid in enumerate(gids): if gid in e.baseid2inds: inds = list(e.baseid2inds[gid]) rows = freqarr[inds,:] row = np.max(rows, axis=0) temparr[i,:] = row frac_max = np.max(temparr) temparr = np.clip(np.log2(temparr*100 / frac_max), 0, 10) bigarr[:, startcol:startcol+freqarr.shape[1]] = temparr startcol += freqarr.shape[1] return bigarr
def pdist_score(mat, metric='euclidean', norm_rows=True, norm_cols=True): norm_mat = ut.normalize_fracs(mat, norm_rows, norm_cols) dists = spatial.distance.pdist(norm_mat, metric=metric) dist_mat = spatial.distance.squareform(dists) score_mat = 1 - np.nan_to_num(dist_mat) return score_mat
def __init__(self, filename, sp_base="Hs", norm_rows=False, norm_cols=False): e = load_elution(filename) self.prots = e.prots self.filename = e.filename self.normarr = ut.normalize_fracs(e.mat, norm_rows=norm_rows, norm_cols=norm_cols) self.pinv = ut.list_inv_to_dict(e.prots) sp_target = ut.shortname(e.filename)[:2] self.baseid2inds = sc.orth_indices(sp_base, sp_target, e.prots, False)
def __init__(self, filename, sp_base='Hs', norm_rows=False, norm_cols=False): e = load_elution(filename) self.prots = e.prots self.filename = e.filename self.normarr = ut.normalize_fracs(e.mat, norm_rows=norm_rows, norm_cols=norm_cols) self.pinv = ut.list_inv_to_dict(e.prots) sp_target = ut.shortname(e.filename)[:2] self.baseid2inds = sc.orth_indices(sp_base, sp_target, e.prots, False)
def plot_bigprofiles(prots, pids, unnorm_eluts, sp='Hs', min_count=1, remove_multi_base=False, gt=None, eluts_per_plot=10, do_cluster=True, label_trans=None, do_plot_tree=False, rename_fracs=None, colors=None, **kwargs): """ supply EITHER prots OR protids, set other to None unnorm_eluts: [el.NormElut(f, sp=sp, norm_cols=False, norm_rows=False) for f in fs] """ import plotting as pl if prots is not None: pids = [gt.name2id[p] for p in prots] if do_cluster: print "clustering" pids = cluster_ids(pids, unnorm_eluts, sp, gt=gt, do_plot=do_plot_tree, **kwargs) if gt is not None: prots = [gt.id2name[pid] for pid in pids if pid in gt.id2name] #re-order to match else: prots = pids print "No gene names provided--labeling with ids." if label_trans: print "Translating names for display." # Translate displayed names from base ids according to provided dict #prots = [gt.id2name[pid] for pid in pids] prots = [label_trans.get(p,p) for p in prots] prots.reverse(); pids.reverse(); # put them top to bottom if colors is not None: colors.reverse() print "%s proteins" % len(pids) use_eluts = elutions_containing_prots(unnorm_eluts, sp, pids, min_count) nplots = int(np.ceil(len(use_eluts) / eluts_per_plot)) maxfracs = 0 for iplot in range(nplots): pl.subplot(nplots, 1, iplot+1) plot_eluts = use_eluts[iplot*eluts_per_plot: (iplot+1)*eluts_per_plot] frac_names = [ut.shortname(e.filename) for e in plot_eluts] if rename_fracs: frac_names = [rename_fracs.get(n,n) for n in frac_names] startcols = [0] for i,e in enumerate(plot_eluts): freqarr = ut.normalize_fracs(e.normarr, norm_rows=False) sp_target = ut.shortname(e.filename)[:2] protsmax = max([np.max(freqarr[r]) for p in pids if p in e.baseid2inds for r in e.baseid2inds[p]]) plot_big_single(freqarr, pids, e.baseid2inds, protsmax, startcols[-1], colors=colors) startcols.append(startcols[-1]+freqarr.shape[1]) label_ys(prots) label_xs(startcols, frac_names) pl.grid(False) maxfracs = maxfracs if maxfracs > startcols[-1] else startcols[-1] for iplot in range(nplots): pl.subplot(nplots, 1, iplot+1) pl.xlim(0,maxfracs) pl.subplots_adjust(hspace=5/len(prots)) return nplots