def fetch_intervals(self, genome_range): """ Fetch BigWig intervals within input chromosome range. """ chrom, start, end = split_genome_range(genome_range) if chrom not in self.bw.chroms(): chrom_ = change_chrom_names(chrom) else: chrom_ = chrom intervals = self.bw.intervals(chrom_, start, end) col_chrom = [chrom] * len(intervals) col_start = [] col_end = [] col_score = [] for s, e, v in intervals: col_start.append(s) col_end.append(e) col_score.append(v) intval_table = pd.DataFrame( { "chromsome": col_chrom, "start": col_start, "end": col_end, "score": col_score, }, columns=['chromsome', 'start', 'end', 'score']) return intval_table
def fetch_intervals(self, genome_range): """ Parameters ---------- genome_range : {str, GenomeRange} Return ------ intervals : pandas.core.frame.DataFrame Annotation interval table. """ chrom, start, end = split_genome_range(genome_range) rows = [] for row in tabix_query(self.bgz_file, chrom, start, end): rows.append(row) columns = [ 'seqname', 'source', 'feature', 'start', 'end', 'score', 'strand', 'frame', 'attribute' ] df = pd.DataFrame(rows, columns=columns) df['start'] = df['start'].astype(int) df['end'] = df['end'].astype(int) df['gene_name'] = df['attribute'].str.extract( ".*gene_name (.*?) ").iloc[:, 0].str.strip('\";') df['gene_name'].fillna("", inplace=True) return df
def __load(self, genome_range): valid_intervals = 0 interval_tree = self.interval_tree max_score = float('-inf') min_score = float('inf') chrom, start, end = split_genome_range(genome_range) try: bed_file_h = ReadBed(query_bed(self.bgz_file, chrom, start, end)) except StopIteration: log.info(f"No records in the range {str(genome_range)}") return valid_intervals, min_score, max_score self.bed_type = bed_file_h.file_type for bed in bed_file_h: if bed.score < min_score: min_score = bed.score if bed.score > max_score: max_score = bed.score if bed.chromosome not in interval_tree: interval_tree[bed.chromosome] = IntervalTree() itv = Interval(bed.start, bed.end, bed) if itv not in interval_tree: interval_tree[bed.chromosome].add(itv) valid_intervals += 1 return valid_intervals, min_score, max_score
def fetch_data(self, gr, **kwargs): chrom, start, end = split_genome_range(gr) sdata = self.data if self.properties['style'] == 'heatmap': return sdata.todense() data = np.asarray(sdata.mean(0)).flatten() return data
def plot_genome_range(self, ax, genome_range): """ Plot the track within a genome range. Parameters ---------- ax: matplotlib.axes.Axes Axis to use to plot the scale. genome_range : {str, GenomeRange} Genome range to plot. """ chrom, start, end = split_genome_range(genome_range) self.plot(ax, chrom, start, end)
def fetch_intervals(self, genome_range): """ Fetch intervals within input chromosome range. """ self.load_range(genome_range) chrom, start, end = split_genome_range(genome_range) if chrom not in self.interval_tree: chrom = change_chrom_names(chrom) if chrom not in self.interval_tree: intervals = [] else: intervals = sorted(self.interval_tree[chrom][start:end]) intval_table = self.intervals2dataframe(intervals) return intval_table
def fetch_intervals(self, genome_range: Union[str, GenomeRange]): """ Fetch intervals within input chromosome range. """ chrom, start, end = split_genome_range(genome_range) gr = GenomeRange(chrom, start, end) rows = self.__load(gr) if len(rows) == 0: chrom = change_chrom_names(chrom) rows = self.__load(GenomeRange(chrom, start, end)) intval_table = pd.DataFrame( rows, columns=['chromsome', 'start', 'end', 'score']) return intval_table
def fetch_intervals(self, genome_range: GenomeRange): chrom, start, end = split_genome_range(genome_range) rows = [ row_items for row_items in query_bam( self.indexed_bam, chrom, start, end, split=True) ] # https://samtools.github.io/hts-specs/SAMv1.pdf fields = [ "qname", "flag", "rname", "pos", "mapq", "cigar", "rnext", "pnext", "tlen", "seq", "qual", "options" ] df = pd.DataFrame(rows, columns=fields) if df.shape[0] > 0: df['flag'] = df['flag'].astype(int) df['pos'] = df['pos'].astype(int) df['mapq'] = df['mapq'].astype(int) return df
def fetch_data(self, gr: GenomeRange, **kwargs): """ Parameters ---------- gr : GenomeRange Return ------ intervals : pandas.core.frame.DataFrame BigWig interval table. """ chrom, start, end = split_genome_range(gr) if chrom not in self.bw.chromsizes: chrom = change_chrom_names(chrom) intervals = self.bw.fetch_intervals(chrom, start, end) columns = list(intervals.columns) if 'value' in columns: columns[columns.index('value')] = 'score' intervals.columns = columns return intervals
def fetch_data(self, gr: GenomeRange, **kwargs): """ Parameters ---------- gr : GenomeRange Return ------ intervals : pandas.core.frame.DataFrame BigWig interval table. """ chrom, start, end = split_genome_range(gr) if chrom not in self.bw.chroms(): chrom = change_chrom_names(chrom) intervals = self.bw.intervals(chrom, start, end) col_chrom = [chrom] * len(intervals) col_start = [] col_end = [] col_score = [] for s, e, v in intervals: col_start.append(s) col_end.append(e) col_score.append(v) intval_table = pd.DataFrame( { "chromsome": col_chrom, "start": col_start, "end": col_end, "score": col_score, }, columns=['chromsome', 'start', 'end', 'score']) return intval_table