Пример #1
0
    def fetch_intervals(self, genome_range):
        """
        Fetch BigWig intervals within input chromosome range.
        """
        chrom, start, end = split_genome_range(genome_range)
        if chrom not in self.bw.chroms():
            chrom_ = change_chrom_names(chrom)
        else:
            chrom_ = chrom

        intervals = self.bw.intervals(chrom_, start, end)

        col_chrom = [chrom] * len(intervals)
        col_start = []
        col_end = []
        col_score = []
        for s, e, v in intervals:
            col_start.append(s)
            col_end.append(e)
            col_score.append(v)

        intval_table = pd.DataFrame(
            {
                "chromsome": col_chrom,
                "start": col_start,
                "end": col_end,
                "score": col_score,
            },
            columns=['chromsome', 'start', 'end', 'score'])

        return intval_table
Пример #2
0
    def fetch_intervals(self, genome_range):
        """
        Parameters
        ----------
        genome_range : {str, GenomeRange}

        Return
        ------
        intervals : pandas.core.frame.DataFrame
            Annotation interval table.
        """
        chrom, start, end = split_genome_range(genome_range)
        rows = []
        for row in tabix_query(self.bgz_file, chrom, start, end):
            rows.append(row)
        columns = [
            'seqname', 'source', 'feature', 'start', 'end', 'score', 'strand',
            'frame', 'attribute'
        ]
        df = pd.DataFrame(rows, columns=columns)
        df['start'] = df['start'].astype(int)
        df['end'] = df['end'].astype(int)
        df['gene_name'] = df['attribute'].str.extract(
            ".*gene_name (.*?) ").iloc[:, 0].str.strip('\";')
        df['gene_name'].fillna("", inplace=True)
        return df
Пример #3
0
    def __load(self, genome_range):
        valid_intervals = 0
        interval_tree = self.interval_tree
        max_score = float('-inf')
        min_score = float('inf')

        chrom, start, end = split_genome_range(genome_range)
        try:
            bed_file_h = ReadBed(query_bed(self.bgz_file, chrom, start, end))
        except StopIteration:
            log.info(f"No records in the range {str(genome_range)}")
            return valid_intervals, min_score, max_score
        self.bed_type = bed_file_h.file_type

        for bed in bed_file_h:
            if bed.score < min_score:
                min_score = bed.score
            if bed.score > max_score:
                max_score = bed.score

            if bed.chromosome not in interval_tree:
                interval_tree[bed.chromosome] = IntervalTree()

            itv = Interval(bed.start, bed.end, bed)
            if itv not in interval_tree:
                interval_tree[bed.chromosome].add(itv)
            valid_intervals += 1
        return valid_intervals, min_score, max_score
Пример #4
0
    def fetch_data(self, gr, **kwargs):
        chrom, start, end = split_genome_range(gr)
        sdata = self.data

        if self.properties['style'] == 'heatmap':
            return sdata.todense()

        data = np.asarray(sdata.mean(0)).flatten()
        return data
Пример #5
0
    def plot_genome_range(self, ax, genome_range):
        """
        Plot the track within a genome range.

        Parameters
        ----------
        ax: matplotlib.axes.Axes
            Axis to use to plot the scale.

        genome_range : {str, GenomeRange}
            Genome range to plot.
        """
        chrom, start, end = split_genome_range(genome_range)
        self.plot(ax, chrom, start, end)
Пример #6
0
 def fetch_intervals(self, genome_range):
     """
     Fetch intervals within input chromosome range.
     """
     self.load_range(genome_range)
     chrom, start, end = split_genome_range(genome_range)
     if chrom not in self.interval_tree:
         chrom = change_chrom_names(chrom)
     if chrom not in self.interval_tree:
         intervals = []
     else:
         intervals = sorted(self.interval_tree[chrom][start:end])
     intval_table = self.intervals2dataframe(intervals)
     return intval_table
Пример #7
0
    def fetch_intervals(self, genome_range: Union[str, GenomeRange]):
        """
        Fetch intervals within input chromosome range.
        """
        chrom, start, end = split_genome_range(genome_range)
        gr = GenomeRange(chrom, start, end)

        rows = self.__load(gr)
        if len(rows) == 0:
            chrom = change_chrom_names(chrom)
            rows = self.__load(GenomeRange(chrom, start, end))

        intval_table = pd.DataFrame(
            rows, columns=['chromsome', 'start', 'end', 'score'])

        return intval_table
Пример #8
0
    def fetch_intervals(self, genome_range: GenomeRange):
        chrom, start, end = split_genome_range(genome_range)
        rows = [
            row_items for row_items in query_bam(
                self.indexed_bam, chrom, start, end, split=True)
        ]

        # https://samtools.github.io/hts-specs/SAMv1.pdf
        fields = [
            "qname", "flag", "rname", "pos", "mapq", "cigar", "rnext", "pnext",
            "tlen", "seq", "qual", "options"
        ]
        df = pd.DataFrame(rows, columns=fields)
        if df.shape[0] > 0:
            df['flag'] = df['flag'].astype(int)
            df['pos'] = df['pos'].astype(int)
            df['mapq'] = df['mapq'].astype(int)
        return df
Пример #9
0
    def fetch_data(self, gr: GenomeRange, **kwargs):
        """
        Parameters
        ----------
        gr : GenomeRange

        Return
        ------
        intervals : pandas.core.frame.DataFrame
            BigWig interval table.
        """
        chrom, start, end = split_genome_range(gr)
        if chrom not in self.bw.chromsizes:
            chrom = change_chrom_names(chrom)

        intervals = self.bw.fetch_intervals(chrom, start, end)
        columns = list(intervals.columns)
        if 'value' in columns:
            columns[columns.index('value')] = 'score'
        intervals.columns = columns

        return intervals
Пример #10
0
    def fetch_data(self, gr: GenomeRange, **kwargs):
        """
        Parameters
        ----------
        gr : GenomeRange

        Return
        ------
        intervals : pandas.core.frame.DataFrame
            BigWig interval table.
        """
        chrom, start, end = split_genome_range(gr)
        if chrom not in self.bw.chroms():
            chrom = change_chrom_names(chrom)

        intervals = self.bw.intervals(chrom, start, end)

        col_chrom = [chrom] * len(intervals)
        col_start = []
        col_end = []
        col_score = []
        for s, e, v in intervals:
            col_start.append(s)
            col_end.append(e)
            col_score.append(v)

        intval_table = pd.DataFrame(
            {
                "chromsome": col_chrom,
                "start": col_start,
                "end": col_end,
                "score": col_score,
            },
            columns=['chromsome', 'start', 'end', 'score'])

        return intval_table