def __call__(self, **kw): def _shift(stream, shift): istart = stream.fields.index('start') iend = stream.fields.index('end') i1 = min(istart, iend) i2 = max(istart, iend) def _apply_shift(x): return x[:i1] + (x[i1] + shift,) + x[i1 + 1:i2] + (x[i2] + shift,) + x[i2 + 1:] return track.FeatureStream((_apply_shift(x) for x in stream), fields=stream.fields) tfwd = track.track(kw.get('forward'), chrmeta=kw.get('assembly') or None) trev = track.track(kw.get('reverse'), chrmeta=kw.get('assembly') or None) if not kw.get('assembly'): # btrack does the job, take the max of both chromosome lengths chrmeta = tfwd.chrmeta for k, v in trev.chrmeta.iteritems(): chrmeta.setdefault(k, {})['length'] = max(v['length'], chrmeta.get(k, {}).get('length', 0)) elif tfwd.chrmeta: chrmeta = tfwd.chrmeta # For sql files, btrack doesn't make it, elif trev.chrmeta: chrmeta = trev.chrmeta # so one can contain the info while the second does not. else: raise ValueError("Must specify an assembly.") # In case nothing works - should not happen shiftval = int(kw.get('shift', 0)) if shiftval < 0: # Determine shift automatically shiftval = None xcor_lim = 300 for chrom, v in chrmeta.iteritems(): chrsize = v['length'] xcor_lim = min(xcor_lim, 0.01 * chrsize) xcor = correlation([tfwd.read(chrom), trev.read(chrom)], regions=(1, chrsize), limits=(-xcor_lim, xcor_lim)) max_xcor_idx = xcor.argmax() if xcor[max_xcor_idx] > 0.2: shiftval = (max_xcor_idx - xcor_lim - 1) / 2 #print "Autocorrelation shift=%i, correlation is %f at index %d for chromosome %s." \ # % (shiftval,xcor[max_xcor_idx],max_xcor_idx,chrom) break if not shiftval: raise ValueError("Unable to detect shift automatically. Must specify a shift value.") output = self.temporary_path(fname='density_merged', ext='sql') fields = ['chr', 'start', 'end', 'score'] tout = track.track(output, format='sql', fields=fields, chrmeta=chrmeta, info={'datatype': 'quantitative'}) mode = 'write' for chrom in chrmeta.keys(): tout.write(merge_scores([_shift(tfwd.read(selection=chrom), shiftval[chrom]), _shift(trev.read(selection=chrom), -shiftval[chrom])]), chrom=chrom, mode=mode, clip=True) mode = 'append' tout.close() trev.close() tfwd.close() self.new_file(output, 'density_merged') return 1
def __call__(self, **kw): feature_type = int(kw.get('feature_type') or 0) assembly_id = kw.get('assembly') or None chrmeta = "guess" if assembly_id: assembly = genrep.Assembly(assembly_id) chrmeta = assembly.chrmeta genes = assembly.gene_track exons = assembly.exon_track elif not(feature_type == 2): raise ValueError("Please specify an assembly") signals = kw.get('signals', []) if not isinstance(signals, list): signals = [signals] snames = [os.path.splitext(os.path.basename(sig))[0] for sig in signals] signals = [track(sig, chrmeta=chrmeta) for sig in signals] if feature_type == 0: #bodies features = genes elif feature_type == 1: #promoters prom_pars = {'before_start': int(kw.get('upstream') or prom_up_def), 'after_start': int(kw.get('downstream') or prom_down_def), 'on_strand': True} features = lambda c: neighborhood(genes(c), **prom_pars) elif feature_type == 2: #exons features = exons elif feature_type == 3: #custom track _t = track(kw.get('features'), chrmeta=chrmeta) chrmeta = _t.chrmeta features = _t.read else: raise ValueError("Feature type not known: %i" % feature_type) pdf = self.temporary_path(fname='plot_pairs.pdf') narr = None if int(kw['mode']) == 0: #correl xarr = array(range(-cormax, cormax + 1)) srtdchrom = sorted(chrmeta.keys()) features = [x[:3] for chrom in srtdchrom for x in sorted_stream(features(chrom))] _f = ['chr', 'start', 'end', 'score'] narr = correlation([s.read(fields=_f) for s in signals], features, (-cormax, cormax), True) elif int(kw['mode']) == 1: #density xarr = None for chrom in chrmeta: feat = features(chrom) means = score_by_feature([s.read(chrom) for s in signals], feat) mf = means.fields[len(feat.fields):] _n, _l = score_array(means, mf) if _n.size == 0: continue if narr is None: narr = _n else: narr = vstack((narr, _n)) else: raise ValueError("Mode not implemented: %s" % kw['mode']) if narr is None: raise ValueError("No data") pairs(narr, xarr, labels=snames, output=pdf) self.new_file(pdf, 'plot_pairs') return self.display_time()