def __call__(self, **kw): def make_X_labels(X,start,end,strand,down,up): flen = end-start i0 = (list(where(X == 0)[0])+[-1])[0]+1 i1 = (list(where(X == 1)[0])+[len(X)-1])[0]+1 i2 = len(X)-i1 istep = 0.5/(i1-i0) if down < 1: down *= flen if up < 1: up *= flen Xup = (array(range(-i0,0))+.5)*up/i0 Xb = (X[i0:i1]+istep)*flen Xdown = flen+(array(range(i2))+.5)*down/i2 if strand is None or strand > 0: return start+concatenate([Xup,Xb,Xdown]) else: return end-concatenate([Xup,Xb,Xdown]) chrmeta = "guess" features = track(kw.get('features'), chrmeta=chrmeta) #signals = kw.get('SigMulti',{}).get('signals', []) signals = kw.get('signals', []) if not isinstance(signals, list): signals = [signals] signals = [track(sig) for sig in signals] snames = [sig.name for sig in signals] labels = None data = None upstr = _upstr downstr = _downstr if kw.get("upstream") is not None: _up = int(kw["upstream"]) if _up > 50: upstr = (_up,5) elif _up > 0: upstr = (_up,1) else: upstr = (0,0) if kw.get("downstream") is not None: _down = int(kw["downstream"]) if _down > 50: downstr = (_down,5) elif _down > 0: downstr = (_down,1) else: downstr = (0,0) if kw.get("nbins") is not None: nbins = max(1,int(kw["nbins"])) else: nbins = _nbins if kw.get("noclust") is not None: noclust = str(kw["noclust"]).lower() in ['1','true','t','on'] else: noclust = False try: ymin = float(kw.get('ymin')) except (ValueError, TypeError): ymin = None try: ymax = float(kw.get('ymax')) except (ValueError, TypeError): ymax = None for chrom in features.chrmeta: if 'name' in features.fields: _fread = features.read(chrom) else: _fread = add_name_field(features.read(chrom)) _l, _d = feature_matrix([s.read(chrom) for s in signals], _fread, segment=True, nbins=nbins, upstream=upstr, downstream=downstr) if _d.size == 0: continue if data is None: labels = _l data = _d else: labels = concatenate((labels, _l)) data = vstack((data, _d)) outf = str(kw.get('output')) if outf not in output_list: outf = output_list[0] pdf = self.temporary_path(fname='plot_features.pdf') if outf == 'archive': tarname = self.temporary_path(fname='plot_features.tar.gz') tarfh = tarfile.open(tarname, "w:gz") if data is None: raise ValueError("No data") mode = kw.get('mode', 0) if str(mode) in [str(x[0]) for x in plot_types]: mode = int(mode) X = array(range(-upstr[1]+1,nbins+downstr[1]+1))/(1.0*nbins) if mode in plot_types[0]: #heatmap new = True if 'name' in features.fields: _fread = features.read(fields=['chr','start','end','name']) else: _fread = add_name_field(features.read(fields=['chr','start','end'])) order = [where(labels == feat[3])[0][0] for feat in _fread] for n in range(data.shape[-1]-1): heatmap(data[order, :, n], output=pdf, new=new, last=False, rows=labels[order], columns=X, main=snames[n], orderRows=not(noclust), orderCols=False, ymin=ymin, ymax=ymax) new = False heatmap(data[order, :, -1], output=pdf, new=new, last=True, rows=labels[order], columns=X, main=snames[-1], orderRows=not(noclust), orderCols=False, ymin=ymin, ymax=ymax) if outf == 'archive': for n,sn in enumerate(snames): _datf = self.temporary_path(fname=sn+"_data.txt") with open(_datf,"w") as dff: dff.write("\t".join([""]+[str(x) for x in X])+"\n") for k in order: dff.write("\t".join([labels[k]]+[str(x) for x in data[k, :, n]])+"\n") tarfh.add(_datf,arcname=os.path.basename(_datf)) elif mode in plot_types[1]: #average lineplot Y = data.mean(axis=0) if ymin is None: ymin = min([x.min() for x in Y]+[0]) if ymax is None: ymax = max([x.max() for x in Y]) lineplot(X, [Y[:, n] for n in range(data.shape[-1])], output=pdf, new=True, last=True, legend=snames, ylim=(ymin,ymax)) if outf == 'archive': _datf = self.temporary_path(fname="lineplot_data.txt") with open(_datf,"w") as dff: dff.write("\t".join([""]+[str(x) for x in X])+"\n") for n,sn in enumerate(snames): dff.write("\t".join([sn]+[str(x) for x in Y[:, n]])+"\n") tarfh.add(_datf,arcname=os.path.basename(_datf)) elif mode in plot_types[2]: #mosaic mfrow = [4,3] nplot = min(data.shape[0], max_pages*mfrow[0]*mfrow[1]) if ymin is None: ymin = min([data.min(),0]) if ymax is None: ymax = data.max() _f = ['chr','start','end'] _si = None if 'strand' in features.fields: _f.append('strand') _si = 3 if 'name' in features.fields: _fread = features.read(fields=_f+['name']) else: _fread = add_name_field(features.read(fields=_f)) order = [] for nf,feat in enumerate(_fread): reg = where(labels == feat[-1])[0][0] order.append(reg) X1 = make_X_labels(X, feat[1], feat[2], feat[_si] if _si else None, downstr[0], upstr[0]) xlim = (X1[0],X1[-1]) Y = [data[reg, :, n] for n in range(data.shape[-1])] if nf == 0: lineplot(X1, Y, output=pdf, new=True, last=False, mfrow=mfrow, main=labels[reg], ylim=(ymin,ymax), xlim=xlim) elif nf < nplot-1: lineplot(X1, Y, output=pdf, new=False, last=False, main=labels[reg], ylim=(ymin,ymax), xlim=xlim) else: lineplot(X1, Y, output=pdf, new=False, last=True, legend=snames, main=labels[reg], ylim=(ymin,ymax), xlim=xlim) break if outf == 'archive': for n,sn in enumerate(snames): _datf = self.temporary_path(fname=sn+"_data.txt") with open(_datf,"w") as dff: dff.write("\t".join([""]+[str(x) for x in X])+"\n") for k in order: dff.write("\t".join([labels[k]]+[str(x) for x in data[k, :, n]])+"\n") tarfh.add(_datf,arcname=os.path.basename(_datf)) else: raise ValueError("Mode not implemented: %s" % mode) if outf == 'archive': tarfh.add(pdf,arcname=os.path.basename(pdf)) tarfh.close() self.new_file(tarname, 'data_archive') else: self.new_file(pdf, 'plot_features') return self.display_time()
def __call__(self, **kw): def make_X_labels(X, start, end, strand, down, up): flen = end - start i0 = (list(where(X == 0)[0]) + [-1])[0] + 1 i1 = (list(where(X == 1)[0]) + [len(X) - 1])[0] + 1 i2 = len(X) - i1 istep = 0.5 / (i1 - i0) if down < 1: down *= flen if up < 1: up *= flen Xup = (array(range(-i0, 0)) + .5) * up / i0 Xb = (X[i0:i1] + istep) * flen Xdown = flen + (array(range(i2)) + .5) * down / i2 if strand is None or strand > 0: return start + concatenate([Xup, Xb, Xdown]) else: return end - concatenate([Xup, Xb, Xdown]) chrmeta = "guess" features = track(kw.get('features'), chrmeta=chrmeta) signals = kw.get('SigMulti', {}).get('signals', []) if not isinstance(signals, list): signals = [signals] signals = [track(sig) for sig in signals] snames = [sig.name for sig in signals] labels = None data = None upstr = _upstr downstr = _downstr if kw.get("upstream") is not None: _up = int(kw["upstream"]) if _up > 50: upstr = (_up, 5) elif _up > 0: upstr = (_up, 1) else: upstr = (0, 0) if kw.get("downstream") is not None: _down = int(kw["downstream"]) if _down > 50: downstr = (_down, 5) elif _down > 0: downstr = (_down, 1) else: downstr = (0, 0) if kw.get("nbins") is not None: nbins = max(1, int(kw["nbins"])) else: nbins = _nbins if kw.get("noclust") is not None: noclust = str(kw["noclust"]).lower() in ['1', 'true', 't', 'on'] else: noclust = False try: ymin = float(kw.get('ymin')) except (ValueError, TypeError): ymin = None try: ymax = float(kw.get('ymax')) except (ValueError, TypeError): ymax = None for chrom in features.chrmeta: if 'name' in features.fields: _fread = features.read(chrom) else: _fread = add_name_field(features.read(chrom)) _l, _d = feature_matrix([s.read(chrom) for s in signals], _fread, segment=True, nbins=nbins, upstream=upstr, downstream=downstr) if _d.size == 0: continue if data is None: labels = _l data = _d else: labels = concatenate((labels, _l)) data = vstack((data, _d)) outf = str(kw.get('output')) if outf not in output_list: outf = output_list[0] pdf = self.temporary_path(fname='plot_features.pdf') if outf == 'archive': tarname = self.temporary_path(fname='plot_features.tar.gz') tarfh = tarfile.open(tarname, "w:gz") if data is None: raise ValueError("No data") mode = kw.get('mode', 0) if str(mode) in [str(x[0]) for x in plot_types]: mode = int(mode) X = array(range(-upstr[1] + 1, nbins + downstr[1] + 1)) / (1.0 * nbins) if mode in plot_types[0]: #heatmap new = True if 'name' in features.fields: _fread = features.read(fields=['chr', 'start', 'end', 'name']) else: _fread = add_name_field( features.read(fields=['chr', 'start', 'end'])) order = [where(labels == feat[3])[0][0] for feat in _fread] for n in range(data.shape[-1] - 1): heatmap(data[order, :, n], output=pdf, new=new, last=False, rows=labels[order], columns=X, main=snames[n], orderRows=not (noclust), orderCols=False, ymin=ymin, ymax=ymax) new = False heatmap(data[order, :, -1], output=pdf, new=new, last=True, rows=labels[order], columns=X, main=snames[-1], orderRows=not (noclust), orderCols=False, ymin=ymin, ymax=ymax) if outf == 'archive': for n, sn in enumerate(snames): _datf = self.temporary_path(fname=sn + "_data.txt") with open(_datf, "w") as dff: dff.write("\t".join([""] + [str(x) for x in X]) + "\n") for k in order: dff.write("\t".join( [labels[k]] + [str(x) for x in data[k, :, n]]) + "\n") tarfh.add(_datf, arcname=os.path.basename(_datf)) elif mode in plot_types[1]: #average lineplot Y = data.mean(axis=0) if ymin is None: ymin = min([x.min() for x in Y] + [0]) if ymax is None: ymax = max([x.max() for x in Y]) lineplot(X, [Y[:, n] for n in range(data.shape[-1])], output=pdf, new=True, last=True, legend=snames, ylim=(ymin, ymax)) if outf == 'archive': _datf = self.temporary_path(fname="lineplot_data.txt") with open(_datf, "w") as dff: dff.write("\t".join([""] + [str(x) for x in X]) + "\n") for n, sn in enumerate(snames): dff.write("\t".join([sn] + [str(x) for x in Y[:, n]]) + "\n") tarfh.add(_datf, arcname=os.path.basename(_datf)) elif mode in plot_types[2]: #mosaic mfrow = [4, 3] nplot = min(data.shape[0], max_pages * mfrow[0] * mfrow[1]) if ymin is None: ymin = min([data.min(), 0]) if ymax is None: ymax = data.max() _f = ['chr', 'start', 'end'] _si = None if 'strand' in features.fields: _f.append('strand') _si = 3 if 'name' in features.fields: _fread = features.read(fields=_f + ['name']) else: _fread = add_name_field(features.read(fields=_f)) order = [] for nf, feat in enumerate(_fread): reg = where(labels == feat[-1])[0][0] order.append(reg) X1 = make_X_labels(X, feat[1], feat[2], feat[_si] if _si else None, downstr[0], upstr[0]) xlim = (X1[0], X1[-1]) Y = [data[reg, :, n] for n in range(data.shape[-1])] if nf == 0: lineplot(X1, Y, output=pdf, new=True, last=False, mfrow=mfrow, main=labels[reg], ylim=(ymin, ymax), xlim=xlim) elif nf < nplot - 1: lineplot(X1, Y, output=pdf, new=False, last=False, main=labels[reg], ylim=(ymin, ymax), xlim=xlim) else: lineplot(X1, Y, output=pdf, new=False, last=True, legend=snames, main=labels[reg], ylim=(ymin, ymax), xlim=xlim) break if outf == 'archive': for n, sn in enumerate(snames): _datf = self.temporary_path(fname=sn + "_data.txt") with open(_datf, "w") as dff: dff.write("\t".join([""] + [str(x) for x in X]) + "\n") for k in order: dff.write("\t".join( [labels[k]] + [str(x) for x in data[k, :, n]]) + "\n") tarfh.add(_datf, arcname=os.path.basename(_datf)) else: raise ValueError("Mode not implemented: %s" % mode) if outf == 'archive': tarfh.add(pdf, arcname=os.path.basename(pdf)) tarfh.close() self.new_file(tarname, 'data_archive') else: self.new_file(pdf, 'plot_features') return self.display_time()
def __call__(self, **kw): feature_type = int(kw.get("feature_type") or 0) individual = kw.get("individual", False) if isinstance(individual, basestring): individual = individual.lower() in ["1", "true", "t", "on"] if individual and int(kw["mode"]) != 1: raise ValueError("Only correlation plots can work with the 'individual' option.") assembly_id = kw.get("assembly") or None chrmeta = "guess" if assembly_id: assembly = genrep.Assembly(assembly_id) chrmeta = assembly.chrmeta genes = assembly.gene_track exons = assembly.exon_track elif not (feature_type == 3): raise ValueError("Please specify an assembly") # signals = kw.get('SigMulti',{}).get('signals', []) signals = kw.get("signals", []) if not isinstance(signals, list): signals = [signals] signals = [track(sig, chrmeta=chrmeta) for sig in signals] snames = [sig.name for sig in signals] if feature_type == 0: # bodies features = genes elif feature_type == 1: # promoters prom_pars = { "before_start": int(kw.get("upstream") or prom_up_def), "after_start": int(kw.get("downstream") or prom_down_def), "on_strand": True, } features = lambda c: neighborhood(genes(c), **prom_pars) elif feature_type == 2: # exons features = exons elif feature_type == 3: # custom track _t = track(kw.get("features"), chrmeta=chrmeta) chrmeta = _t.chrmeta features = _t.read else: raise ValueError("Feature type not known: %i" % feature_type) # highlights = kw.get('HiMulti',{}).get('highlights', []) highlights = kw.get("highlights", []) if not isinstance(highlights, list): highlights = [highlights] if highlights is not None: highlights = [track(hi, chrmeta=chrmeta) for hi in highlights] hinames = [t.name for t in highlights] pdf = self.temporary_path(fname="plot_pairs.pdf") narr = None set_index = [] set_labels = [] _new = True if int(kw["mode"]) == 1: # correl cormax = int(kw.get("cormax") or _cormax) xarr = array(range(-cormax, cormax + 1)) _f = ["chr", "start", "end", "score"] features = [x[:3] for chrom in chrmeta for x in sorted_stream(features(chrom))] table = self.temporary_path(fname="table.txt") with open(table, "w") as t: t.write("\t".join(["chr", "start", "end", "max(correlation)", "lag_max"]) + "\n") if individual: for nplot, feature in enumerate(features): if narr is not None and nplot < _MAX_PLOTS_: pairs(narr, xarr, labels=snames, output=pdf, new=_new, last=False) _new = False narr = correlation([s.read(fields=_f) for s in signals], [feature], (-cormax, cormax), True) list_corr = list(narr[0][0]) max_corr = max(list_corr) lag_max = list_corr.index(max_corr) - cormax t.write("\t".join([str(x) for x in feature[:3] + (max_corr, lag_max)]) + "\n") else: narr = correlation([s.read(fields=_f) for s in signals], features, (-cormax, cormax), True) list_corr = list(narr[0][0]) max_corr = max(list_corr) lag_max = list_corr.index(max_corr) - cormax t.write("\t".join(["-", "-", "-"] + [str(max_corr), str(lag_max)]) + "\n") elif int(kw["mode"]) == 0: # density xarr = None for chrom in chrmeta: feat = features(chrom) if "name" not in feat.fields: feat = add_name_field(feat) means = score_by_feature([s.read(chrom) for s in signals], feat) mf = means.fields[len(feat.fields) :] _n, _l = score_array(means, mf) if _n.size == 0: continue if narr is None: narr = _n else: narr = vstack((narr, _n)) set_index = [narr.shape[0]] for hitrack in highlights: for chrom in chrmeta: hiread = hitrack.read(chrom) if "name" not in hiread.fields: hiread = add_name_field(hiread) means = score_by_feature([s.read(chrom) for s in signals], hiread) mf = means.fields[len(hiread.fields) :] _n, _l = score_array(means, mf) if _n.size == 0: continue narr = vstack((narr, _n)) set_labels.extend(_l) set_index.append(narr.shape[0]) else: raise ValueError("Mode not implemented: %s" % kw["mode"]) if narr is None: raise ValueError("No data") pairs(narr, xarr, labels=snames, output=pdf, highlights=[set_index, set_labels], new=_new, last=True) if int(kw["mode"]) == 1: self.new_file(table, "table") self.new_file(pdf, "plot_pairs") return self.display_time()
def __call__(self, **kw): feature_type = int(kw.get('feature_type') or 0) assembly_id = kw.get('assembly') or None chrmeta = "guess" if assembly_id: assembly = genrep.Assembly(assembly_id) chrmeta = assembly.chrmeta genes = assembly.gene_track exons = assembly.exon_track elif not (feature_type == 3): raise ValueError("Please specify an assembly") signals = kw.get('SigMulti', {}).get('signals', []) if not isinstance(signals, list): signals = [signals] signals = [track(sig, chrmeta=chrmeta) for sig in signals] snames = [sig.name for sig in signals] if feature_type == 0: #bodies features = genes elif feature_type == 1: #promoters prom_pars = { 'before_start': int(kw.get('upstream') or prom_up_def), 'after_start': int(kw.get('downstream') or prom_down_def), 'on_strand': True } features = lambda c: neighborhood(genes(c), **prom_pars) elif feature_type == 2: #exons features = exons elif feature_type == 3: #custom track _t = track(kw.get('features'), chrmeta=chrmeta) chrmeta = _t.chrmeta features = _t.read else: raise ValueError("Feature type not known: %i" % feature_type) highlights = kw.get('HiMulti', {}).get('highlights', []) if not isinstance(highlights, list): highlights = [highlights] if highlights is not None: highlights = [track(hi, chrmeta=chrmeta) for hi in highlights] hinames = [t.name for t in highlights] pdf = self.temporary_path(fname='plot_pairs.pdf') narr = None set_index = [] set_labels = [] if int(kw['mode']) == 0: #correl cormax = int(kw.get('cormax') or _cormax) xarr = array(range(-cormax, cormax + 1)) srtdchrom = sorted(chrmeta.keys()) features = [ x[:3] for chrom in srtdchrom for x in sorted_stream(features(chrom)) ] _f = ['chr', 'start', 'end', 'score'] narr = correlation([s.read(fields=_f) for s in signals], features, (-cormax, cormax), True) elif int(kw['mode']) == 1: #density xarr = None for chrom in chrmeta: feat = features(chrom) if 'name' not in feat.fields: feat = add_name_field(feat) means = score_by_feature([s.read(chrom) for s in signals], feat) mf = means.fields[len(feat.fields):] _n, _l = score_array(means, mf) if _n.size == 0: continue if narr is None: narr = _n else: narr = vstack((narr, _n)) set_index = [narr.shape[0]] for hitrack in highlights: for chrom in chrmeta: hiread = hitrack.read(chrom) if 'name' not in hiread.fields: hiread = add_name_field(hiread) means = score_by_feature([s.read(chrom) for s in signals], hiread) mf = means.fields[len(hiread.fields):] _n, _l = score_array(means, mf) if _n.size == 0: continue narr = vstack((narr, _n)) set_labels.extend(_l) set_index.append(narr.shape[0]) else: raise ValueError("Mode not implemented: %s" % kw['mode']) if narr is None: raise ValueError("No data") pairs(narr, xarr, labels=snames, output=pdf, highlights=[set_index, set_labels]) self.new_file(pdf, 'plot_pairs') return self.display_time()
def __call__(self, **kw): feature_type = int(kw.get('feature_type') or 0) assembly_id = kw.get('assembly') or None chrmeta = "guess" if assembly_id: assembly = genrep.Assembly(assembly_id) chrmeta = assembly.chrmeta genes = assembly.gene_track exons = assembly.exon_track elif not(feature_type == 3): raise ValueError("Please specify an assembly") signals = kw.get('SigMulti',{}).get('signals', []) if not isinstance(signals, list): signals = [signals] signals = [track(sig, chrmeta=chrmeta) for sig in signals] snames = [sig.name for sig in signals] if feature_type == 0: #bodies features = genes elif feature_type == 1: #promoters prom_pars = {'before_start': int(kw.get('upstream') or prom_up_def), 'after_start': int(kw.get('downstream') or prom_down_def), 'on_strand': True} features = lambda c: neighborhood(genes(c), **prom_pars) elif feature_type == 2: #exons features = exons elif feature_type == 3: #custom track _t = track(kw.get('features'), chrmeta=chrmeta) chrmeta = _t.chrmeta features = _t.read else: raise ValueError("Feature type not known: %i" % feature_type) highlights = kw.get('HiMulti',{}).get('highlights', []) if not isinstance(highlights, list): highlights = [highlights] if highlights is not None: highlights = [track(hi, chrmeta=chrmeta) for hi in highlights] hinames = [t.name for t in highlights] pdf = self.temporary_path(fname='plot_pairs.pdf') narr = None set_index = [] set_labels = [] if int(kw['mode']) == 0: #correl cormax = int(kw.get('cormax') or _cormax) xarr = array(range(-cormax, cormax + 1)) srtdchrom = sorted(chrmeta.keys()) features = [x[:3] for chrom in srtdchrom for x in sorted_stream(features(chrom))] _f = ['chr', 'start', 'end', 'score'] narr = correlation([s.read(fields=_f) for s in signals], features, (-cormax, cormax), True) elif int(kw['mode']) == 1: #density xarr = None for chrom in chrmeta: feat = features(chrom) if 'name' not in feat.fields: feat = add_name_field(feat) means = score_by_feature([s.read(chrom) for s in signals], feat) mf = means.fields[len(feat.fields):] _n, _l = score_array(means, mf) if _n.size == 0: continue if narr is None: narr = _n else: narr = vstack((narr, _n)) set_index = [narr.shape[0]] for hitrack in highlights: for chrom in chrmeta: hiread = hitrack.read(chrom) if 'name' not in hiread.fields: hiread = add_name_field(hiread) means = score_by_feature([s.read(chrom) for s in signals], hiread) mf = means.fields[len(hiread.fields):] _n, _l = score_array(means, mf) if _n.size == 0: continue narr = vstack((narr, _n)) set_labels.extend(_l) set_index.append(narr.shape[0]) else: raise ValueError("Mode not implemented: %s" % kw['mode']) if narr is None: raise ValueError("No data") pairs(narr, xarr, labels=snames, output=pdf, highlights=[set_index,set_labels]) self.new_file(pdf, 'plot_pairs') return self.display_time()