def get_seq(self, chrom, start, end, strand): chrom = match_chrom_format(chrom, self.keys()) # seq = self.fasta[chrom][start:end+1] seq = self.fasta.fetch(chrom, start, end + 1) if strand == "-": seq = reverse_comp(seq) return seq
def get_seq(self, chrom, start, end, strand): chrom = match_chrom_format(chrom, self.keys()) # seq = self.fasta[chrom][start:end+1] seq = self.fasta.fetch(chrom, start, end+1) if strand == "-": seq = reverse_comp(seq) return seq
def fetch_from_tabix(path, chrom, start, end): import pysam bed = pysam.TabixFile(path) chrom = match_chrom_format(chrom, bed.contigs) for locus in bed.fetch(chrom, start, end): locus = locus.split() yield tx_from_bedfields(locus)
def fetch_from_bigbed(path, chrom, start, end): import pyBigWig bed = pyBigWig.open(path) assert bed.isBigBed(), "Oops, for some reason I was expecting a bed file: {}".format(path) chrom = match_chrom_format(chrom, bed.chroms().keys()) for cur_start, cur_end, bed_line in bed.entries(chrom, start, end): bed_line = bed_line.split() yield tx_from_bedfields([chrom, cur_start, cur_end] + bed_line)
def fetch_from_bigbed(path, chrom, start, end): import pyBigWig bed = pyBigWig.open(path) assert bed.isBigBed( ), "Oops, for some reason I was expecting a bed file: {}".format(path) chrom = match_chrom_format(chrom, bed.chroms().keys()) for cur_start, cur_end, bed_line in bed.entries(chrom, start, end): bed_line = bed_line.split() yield tx_from_bedfields([chrom, cur_start, cur_end] + bed_line)
def tally_reads(self, bam): depths = [] for pileupcolumn in bam.pileup(match_chrom_format(self.chrom, bam.references), self.start, self.end, truncate=True): depths.append(pileupcolumn.n) for pileupread in pileupcolumn.pileups: if pileupread.is_refskip: continue elif pileupread.is_del: self.add_count(pileupcolumn.pos, "DEL") else: nuc = pileupread.alignment.query_sequence[pileupread.query_position] if nuc != "N": self.add_count(pileupcolumn.pos, nuc) if pileupread.indel > 0: self.add_count(pileupcolumn.pos, "INS")
def layout(self, scale): super().layout(scale) x = [] y = [] binsize = max(1, int((scale.end-scale.start) / self.nbins)) chrom = match_chrom_format(scale.chrom, self.bigwig.chroms().keys()) for i in range(scale.start, scale.end, binsize): values = self.bigwig.stats(chrom, i, i+binsize) x.append(i+binsize/2) y.append(values[0]) self.series = {"vals":Series(x, y, color="black")} self.min_y = min(y) self.max_y = max(y)
def layout(self, scale): self.scale = scale categories = set() chrom = match_chrom_format(self.scale.chrom, self.bam.references) for read in self.bam.fetch(chrom, self.scale.start, self.scale.end): category = self.keyfn(read) categories.add(category) categories = sorted(categories) self.height = 0 for category in categories: cur_track = self.bam_track_class(self.bam_path, name=self.category_label_fn(category)) cur_track.include_read_fn = _get_filter_fn(self.keyfn, category) cur_track.layout(scale) self.height += cur_track.height + self.space_between self.subtracks.append(cur_track)
def tally_reads(self, bam): depths = [] for pileupcolumn in bam.pileup(match_chrom_format( self.chrom, bam.references), self.start, self.end, truncate=True): depths.append(pileupcolumn.n) for pileupread in pileupcolumn.pileups: if pileupread.is_refskip: continue elif pileupread.is_del: self.add_count(pileupcolumn.pos, "DEL") else: nuc = pileupread.alignment.query_sequence[ pileupread.query_position] if nuc != "N": self.add_count(pileupcolumn.pos, nuc) if pileupread.indel > 0: self.add_count(pileupcolumn.pos, "INS")
def match_chrom_format(self, chrom): """ Ensures that the input argument `chrom` matches the chromosome name formatting in the bam file being visualized (ie "chr14" vs "14"). """ return match_chrom_format(chrom, self.bam.references)
def get_seq(self, chrom, start, end, strand): chrom = match_chrom_format(chrom, self.keys()) seq = self.names_to_contigs[chrom][start:end + 1] if strand == "-": seq = reverse_comp(seq) return seq
def get_seq(self, chrom, start, end, strand): chrom = match_chrom_format(chrom, self.keys()) seq = self.names_to_contigs[chrom][start:end+1] if strand == "-": seq = reverse_comp(seq) return seq