def test_get_features_by_feature(frag3, frag6, region3, region6): from pybedtools import BedTool for f in [frag6]: t = Track.load(f) for r in [region3, region6]: result = [len(x[1]) for x in t._get_features_by_feature(BedTool(r))] assert [0, 1, 3] == sorted(result)
def tracks(): ftypes = ["bam", "bed", "wig", "bg", "bw", "wig.gz", "bg.gz", "bigWig", "bedGraph"]#, "bed.gz"] my_tracks = [] for ftype in ftypes: fname = "tests/data/profile." + ftype my_tracks.append(Track.load(fname)) return my_tracks
def __init__(self, bamfile, height=1, color=None, bgmode=None, alpha=None, fragmentsize=200, rmdup=True, rmrepeats=True, **kwargs): self.height = height self.track = Track.load(bamfile, fragmentsize=fragmentsize, rmdup=rmdup, rmrepeats=rmrepeats) self.ymax = None self.bgmode = bgmode self.scalepm = kwargs.get("adjscale", False) self.show_scale = kwargs.get("show_scale", True) if color: self.color = color else: self.color = "#a7004b" if alpha: self.alpha = alpha else: self.alpha = 1 self.fragmentsize = fragmentsize self.rmdup = rmdup self.rmrepeats = rmrepeats self.name = kwargs.get('name')
def test_fetch_to_counts(frag3, frag6, region3, region6): from pybedtools import BedTool for f in [frag3, frag6]: t = Track.load(f) for r in [region3, region6]: overlap = [x for x in t.fetch_to_counts(BedTool(r))] assert 3 == len(overlap) counts = sorted([len(x[1]) + len(x[2]) for x in overlap]) assert [0, 1, 3] == counts
def test_get_features_by_feature(frag3, frag6, region3, region6): from pybedtools import BedTool for f in [frag6]: t = Track.load(f) for r in [region3, region6]: result = [ len(x[1]) for x in t._get_features_by_feature(BedTool(r)) ] assert [0, 1, 3] == sorted(result)
def tracks(): ftypes = [ "bam", "bed", "wig", "bg", "bw", "wig.gz", "bg.gz", "bigWig", "bedGraph" ] #, "bed.gz"] my_tracks = [] for ftype in ftypes: fname = "tests/data/profile." + ftype my_tracks.append(Track.load(fname)) return my_tracks
def load_heatmap_data(featurefile, datafile, bins=100, up=5000, down=5000, rmdup=True, rpkm=False, rmrepeats=True,fragmentsize=None, dynam=False, guard=None): if guard is None: guard = [] #try mode='w' to make py2 and py3 work tmp = tempfile.NamedTemporaryFile(mode='w+', encoding='utf-8', delete=False, prefix="fluff") regions = [] order = {} count = 0 hashcounter = 0 if not guard and dynam: filt = True else: filt = False for i, line in enumerate(open(featurefile)): if line.startswith("#") or line[:5] == "track": hashcounter += 1 continue vals = line.strip().split("\t") strand = "+" gene = "" if len(vals) >= 6: strand = vals[5] if len(vals) >= 4: gene = vals[3] middle = int((int(vals[2]) + int(vals[1])) / 2) start, end = middle, middle if strand == "+": start -= up end += down else: start -= down end += up if filt: if start >= 0: guard.append(True) else: guard.append(False) if not filt and start >= 0: if not dynam or guard[i - hashcounter]: regions.append([vals[0], start, end, gene, strand]) order["{0}:{1}-{2}".format(vals[0], start, end)] = count count += 1 #add encode() to make py3 work tmp.write("{0}\t{1}\t{2}\t{3}\t0\t{4}\n".format(vals[0], start, end, gene, strand)) tmp.flush() track = Track.load(datafile, rmdup=rmdup, rmrepeats=rmrepeats, fragmentsize=fragmentsize) result = track.binned_stats(tmp.name, bins, split=True, rpkm=rpkm) # Retrieve original order r_data = np.array([[float(x) for x in row[3:]] for row in result]) return os.path.basename(datafile), regions, r_data, guard # [r_order]
def load_heatmap_data(featurefile, datafile, bins=100, up=5000, down=5000, rmdup=True, rpkm=False, rmrepeats=True,fragmentsize=None, dynam=False, guard=None): if guard is None: guard = [] tmp = tempfile.NamedTemporaryFile(delete=False, prefix="fluff") regions = [] order = {} count = 0 hashcounter = 0 if not guard and dynam: filt = True else: filt = False for i, line in enumerate(open(featurefile)): if line.startswith("#") or line[:5] == "track": hashcounter += 1 continue vals = line.strip().split("\t") strand = "+" gene = "" if len(vals) >= 6: strand = vals[5] if len(vals) >= 4: gene = vals[3] middle = (int(vals[2]) + int(vals[1])) / 2 start, end = middle, middle if strand == "+": start -= up end += down else: start -= down end += up if filt: if start >= 0: guard.append(True) else: guard.append(False) if not filt and start >= 0: if not dynam or guard[i - hashcounter]: regions.append([vals[0], start, end, gene, strand]) order["{0}:{1}-{2}".format(vals[0], start, end)] = count count += 1 tmp.write("{0}\t{1}\t{2}\t{3}\t0\t{4}\n".format(vals[0], start, end, gene, strand)) tmp.flush() track = Track.load(datafile, rmdup=rmdup, rmrepeats=rmrepeats, fragmentsize=fragmentsize) result = track.binned_stats(tmp.name, bins, split=True, rpkm=rpkm) # Retrieve original order r_data = np.array([[float(x) for x in row[3:]] for row in result]) return os.path.basename(datafile), regions, r_data, guard # [r_order]
def load_cluster_data(clust_file, datafiles, bins, rpkm, rmdup, rmrepeats, fragmentsize=None): data = {} for datafile in datafiles: result = [] track = Track.load(datafile, rmdup=rmdup, rmrepeats=rmrepeats, fragmentsize=fragmentsize) result = track.binned_stats(clust_file, bins, split=True, rpkm=rpkm ) data[os.path.basename(datafile)] = dict( [["{0}:{1}-{2}".format(vals[0], vals[1], vals[2]), [float(x) for x in vals[3:]]] for vals in result]) return data
def test_fragmentsize(region_fs, fragments): track = Track.load(fragments) result = track.binned_stats(region_fs, 4) assert 1 == len(result) counts = [int(x) for x in result[0].split("\t")[-4:]] assert [1, 0, 0, 1] == counts track.fragmentsize = 50 result = track.binned_stats(region_fs, 4) counts = [int(x) for x in result[0].split("\t")[-4:]] assert [1, 0, 0, 1] == counts track.fragmentsize = 100 result = track.binned_stats(region_fs, 4) counts = [int(x) for x in result[0].split("\t")[-4:]] assert [1, 1, 1, 1] == counts track.fragmentsize = 200 result = track.binned_stats(region_fs, 4) counts = [int(x) for x in result[0].split("\t")[-4:]] assert [2, 2, 2, 2] == counts