Пример #1
0
    def __process_loop_file(self):
        interval_tree = {}

        with opener(self.properties['file']) as f:
            for idx, line in enumerate(f):
                line = to_string(line)
                # skip header line
                if idx == 0 and self.__is_header(line):
                    continue

                fields = line.split()
                chr1, x1, x2, chr2, y1, y2, *other = fields
                x1, x2, y1, y2 = list(map(int, [x1, x2, y1, y2]))

                # skip inter-chromosome interaction
                if chr1 != chr2:
                    continue
                chromosome = chr1

                if not chromosome.startswith("chr"):
                    chromosome = change_chrom_names(chromosome)
                if chromosome not in interval_tree:
                    interval_tree[chromosome] = IntervalTree()

                if len(other) == 0:
                    color = self.DEFAULT_COLOR
                else:
                    rgb = other[0].split(",")
                    rgb = list(map(int, rgb))
                    color = rgb2hex(*rgb)

                loop = self.LoopInverval(chr1, x1, x2, chr2, y1, y2, color)
                interval_tree[chromosome].add(Interval(x1, y2, loop))

        return interval_tree
Пример #2
0
    def fetch_intervals(self, genome_range):
        """
        Fetch BigWig intervals within input chromosome range.
        """
        chrom, start, end = split_genome_range(genome_range)
        if chrom not in self.bw.chroms():
            chrom_ = change_chrom_names(chrom)
        else:
            chrom_ = chrom

        intervals = self.bw.intervals(chrom_, start, end)

        col_chrom = [chrom] * len(intervals)
        col_start = []
        col_end = []
        col_score = []
        for s, e, v in intervals:
            col_start.append(s)
            col_end.append(e)
            col_score.append(v)

        intval_table = pd.DataFrame(
            {
                "chromsome": col_chrom,
                "start": col_start,
                "end": col_end,
                "score": col_score,
            },
            columns=['chromsome', 'start', 'end', 'score'])

        return intval_table
Пример #3
0
    def fetch_data(self, gr: GenomeRange, **kwargs) -> pd.DataFrame:
        rows = self.load(gr)
        if len(rows) == 0:
            gr.chrom = change_chrom_names(gr.chrom)
            rows = self.load(gr)

        return pd.DataFrame(rows,
                            columns=['chromsome', 'start', 'end', 'score'])
Пример #4
0
    def plot(self, ax, chrom_region, start_region, end_region):
        """
        Plots the boundaries as triangles in the given ax.
        """
        self.ax = ax

        from matplotlib.patches import Polygon
        ymax = 0.001
        valid_regions = 0
        if chrom_region not in self.interval_tree:
            orig = chrom_region
            chrom_region = change_chrom_names(chrom_region)
            log.info(
                'Chromosome name: {} does not exists. Changing name to {}'.
                format(orig, chrom_region))

        for region in sorted(
                self.interval_tree[chrom_region][start_region:end_region]):
            """
                  /\
                 /  \
                /    \
            _____________________
               x1 x2 x3
            """
            x1 = region.begin
            x2 = x1 + float(region.end - region.begin) / 2
            x3 = region.end
            y1 = 0
            y2 = (region.end - region.begin)

            rgb, edgecolor = self.get_rgb_and_edge_color(region.data)

            triangle = Polygon(np.array([[x1, y1], [x2, y2], [x3, y1]]),
                               closed=True,
                               facecolor=rgb,
                               edgecolor=edgecolor)
            ax.add_artist(triangle)
            valid_regions += 1

            if y2 > ymax:
                ymax = y2

        if valid_regions == 0:
            log.warning("No regions found for Track {}.".format(
                self.properties['name']))

        ax.set_xlim(start_region, end_region)
        if 'orientation' in self.properties and self.properties[
                'orientation'] == 'inverted':
            ax.set_ylim(ymax, 0)
        else:
            ax.set_ylim(0, ymax)

        self.plot_label()
Пример #5
0
    def __get_vlines(self, chrom, start, end):
        vlines_list = []

        if chrom not in list(self.vlines_intval_tree):
            chrom = change_chrom_names(chrom)

        for region in sorted(self.vlines_intval_tree[chrom][start - 1:end +
                                                            1]):
            vlines_list.append(region.begin)

        return vlines_list
Пример #6
0
    def plot(self, ax, chrom_region, start_region, end_region):
        self.ax = ax

        score_list = []
        pos_list = []

        if chrom_region not in list(self.interval_tree):
            chrom_region = change_chrom_names(chrom_region)

        for region in sorted(self.interval_tree[chrom_region][start_region - 10000:end_region + 10000]):
            score_list.append(float(region.data[0]))
            pos_list.append(region.begin + (region.end - region.begin) / 2)

        if 'color' not in self.properties:
            self.properties['color'] = PlotBedGraph.DEFAULT_COLOR

        if 'extra' in self.properties and self.properties['extra'][0] == '4C':
            # draw a vertical line for each fragment region center
            self.ax.fill_between(pos_list, score_list,
                                 facecolor=self.properties['color'],
                                 edgecolor='none')
            self.ax.vlines(pos_list, [0], score_list, color='olive', linewidth=0.5)
            self.ax.plot(pos_list, score_list, '-', color='slateblue', linewidth=0.7)
        else:
            try:
                self.ax.fill_between(pos_list, score_list, facecolor=self.properties['color'])
            except ValueError:
                log.warning("Invalid color {} for {}. "
                            "Using gray instead.".format(self.properties['color'], self.properties['file']))
                self.ax.fill_between(pos_list, score_list, facecolor='gray')

        self.ax.set_frame_on(False)
        self.ax.axes.get_xaxis().set_visible(False)
        self.ax.axes.get_yaxis().set_visible(False)
        self.ax.set_xlim(start_region, end_region)

        ymax = self.properties['max_value']
        ymin = self.properties['min_value']

        if float(ymax) % 1 == 0:
            ymax_print = int(ymax)
        else:
            ymax_print = "{:.1f}".format(ymax)
        self.ax.set_ylim(ymin, ymax)
        ydelta = ymax - ymin
        small_x = 0.01 * (end_region - start_region)

        if 'show_data_range' in self.properties and self.properties['show_data_range'] == 'no':
            pass
        else:
            # by default show the data range
            self.plot_data_range(ymin, ymax, self.properties['data_range_style'])

        self.plot_label()
Пример #7
0
    def __get_regions(self, chrom, start, end):
        regions = []

        if chrom not in list(self.interval_tree):
            chrom = change_chrom_names(chrom)

        for region in sorted(self.interval_tree[chrom][start - 10000:end +
                                                       10000]):
            regions.append((region.begin, region.end, region.data))

        return regions
Пример #8
0
 def fetch_intervals(self, genome_range):
     """
     Fetch intervals within input chromosome range.
     """
     self.load_range(genome_range)
     chrom, start, end = split_genome_range(genome_range)
     if chrom not in self.interval_tree:
         chrom = change_chrom_names(chrom)
     if chrom not in self.interval_tree:
         intervals = []
     else:
         intervals = sorted(self.interval_tree[chrom][start:end])
     intval_table = self.intervals2dataframe(intervals)
     return intval_table
Пример #9
0
    def fetch_intervals(self, genome_range: Union[str, GenomeRange]):
        """
        Fetch intervals within input chromosome range.
        """
        chrom, start, end = split_genome_range(genome_range)
        gr = GenomeRange(chrom, start, end)

        rows = self.__load(gr)
        if len(rows) == 0:
            chrom = change_chrom_names(chrom)
            rows = self.__load(GenomeRange(chrom, start, end))

        intval_table = pd.DataFrame(
            rows, columns=['chromsome', 'start', 'end', 'score'])

        return intval_table
Пример #10
0
    def plot(self, ax, chrom_region, region_start, region_end):
        """
        Makes and arc connecting two points on a linear scale representing
        interactions between Hi-C bins.
        """
        self.ax = ax

        from matplotlib.patches import Arc
        height = 1
        max_diameter = 0
        count = 0
        if chrom_region not in list(self.interval_tree):
            chrom_region = change_chrom_names(chrom_region)
        arcs_in_region = sorted(self.interval_tree[chrom_region][region_start:region_end])

        for idx, interval in enumerate(arcs_in_region):
            # skip arcs whose start and end are outside the plotted region
            if interval.begin < region_start and interval.end > region_end:
                continue

            if 'line_width' in self.properties:
                line_width = float(self.properties['line_width'])
            else:
                line_width = 0.5 * np.sqrt(interval.data)

            diameter = (interval.end - interval.begin)
            center = (interval.begin + interval.end) / 2
            if diameter > max_diameter:
                max_diameter = diameter
            count += 1
            ax.plot([center], [diameter])
            ax.add_patch(Arc((center, 0), diameter,
                             height*2, 0, 0, 180, color=self.properties['color'], lw=line_width))

        # increase max_diameter slightly to avoid cropping of the arcs.
#       max_diameter += max_diameter * 0.05
        height += height * 0.05
        log.debug("{} were arcs plotted".format(count))
        if 'orientation' in self.properties and self.properties['orientation'] == 'inverted':
            ax.set_ylim(height, 0.001)
        else:
            ax.set_ylim(-0.001, height)

        ax.set_xlim(region_start, region_end)
        log.debug('title is {}'.format(self.properties['title']))

        self.plot_label()
Пример #11
0
    def plot(self, ax, chrom_region, start_region, end_region):
        """
        Plots the boundaries as triangles in the given ax.
        """
        self.load_range(f"{chrom_region}:{start_region}-{end_region}")
        self.ax = ax
        genome_range = GenomeRange(chrom_region, start_region, end_region)
        self._genome_range = genome_range

        if chrom_region not in self.interval_tree:
            orig = chrom_region
            chrom_region = change_chrom_names(chrom_region)
            log.debug(
                'Chromosome name: {} does not exists. Changing name to {}'.
                format(orig, chrom_region))

        current_regions = sorted(
            self.interval_tree[chrom_region][start_region:end_region])
        ymax = max([region.end - region.begin for region in current_regions])
        for region in current_regions:
            if self.track_type.startswith('HiC'):

                if self.track_type == 'HiC:window' or self.track_type == 'HiC:triangular':
                    depth = (end_region - start_region) / 2
                    ymax = (end_region - start_region)
                    self.__plot_triangular(region, ymax, depth)
                else:
                    self.__plot_box(region)

            elif self.track_type == 'HicCompare':
                self.__plot_box(region)

            elif self.track_type in [
                    'BigWig', 'BedGraph', 'ABCompartment', 'Arcs'
            ]:
                depth_neg, depth_pos = ax.get_ylim()
                if ('orientation' in self.track.properties) and (
                        self.track.properties['orientation'] == 'inverted'):
                    depth = depth_neg
                else:
                    depth = depth_pos
                self.__plot_triangular(region, ymax, depth)

        if len(current_regions) == 0:
            log.warning("No regions found for Coverage {}.".format(
                self.properties['name']))
Пример #12
0
    def fetch_data(self, gr: GenomeRange, **kwargs):
        """
        Parameters
        ----------
        gr : GenomeRange

        Return
        ------
        intervals : pandas.core.frame.DataFrame
            BigWig interval table.
        """
        chrom, start, end = split_genome_range(gr)
        if chrom not in self.bw.chromsizes:
            chrom = change_chrom_names(chrom)

        intervals = self.bw.fetch_intervals(chrom, start, end)
        columns = list(intervals.columns)
        if 'value' in columns:
            columns[columns.index('value')] = 'score'
        intervals.columns = columns

        return intervals
Пример #13
0
    def fetch_data(self, gr: GenomeRange, **kwargs):
        """
        Parameters
        ----------
        gr : GenomeRange

        Return
        ------
        intervals : pandas.core.frame.DataFrame
            BigWig interval table.
        """
        chrom, start, end = split_genome_range(gr)
        if chrom not in self.bw.chroms():
            chrom = change_chrom_names(chrom)

        intervals = self.bw.intervals(chrom, start, end)

        col_chrom = [chrom] * len(intervals)
        col_start = []
        col_end = []
        col_score = []
        for s, e, v in intervals:
            col_start.append(s)
            col_end.append(e)
            col_score.append(v)

        intval_table = pd.DataFrame(
            {
                "chromsome": col_chrom,
                "start": col_start,
                "end": col_end,
                "score": col_score,
            },
            columns=['chromsome', 'start', 'end', 'score'])

        return intval_table
Пример #14
0
    def plot(self, ax, chrom_region, start_region, end_region):

        if chrom_region not in self.interval_tree:
            chrom_region = change_chrom_names(chrom_region)

        if hasattr(self.track, 'fetch_region'):
            start_fetch = self.track.fetch_region.start
            end_fetch = self.track.fetch_region.end
        else:
            start_fetch, end_fetch = start_region, end_region

        for intval in sorted(
                self.interval_tree[chrom_region][start_fetch:end_fetch]):
            loop = intval.data

            if (self.properties['color'] == 'rgb') or (self.properties['color']
                                                       == 'bed_rgb'):
                color = loop.color
            else:
                color = self.properties['color']

            if (self.properties['fill_color']) == 'rgb' or (
                    self.properties['fill_color'] == 'bed_rgb'):
                fill_color = loop.color
            else:
                fill_color = self.properties['fill_color']

            fill = True if self.properties['fill'] == 'yes' else False

            self.properties['style'] = self.track.properties['style']
            self.properties['depth_ratio'] = self.track.properties[
                'depth_ratio']

            if self.properties['style'] == STYLE_TRIANGULAR or self.properties[
                    'style'] == STYLE_WINDOW:

                depth_ratio = self.properties['depth_ratio'] if self.properties[
                    'depth_ratio'] != DEPTH_FULL else 1

                region_length = (end_region - start_region)
                depth_full = region_length * 0.5
                depth_limit = depth_full * depth_ratio

                x, y, (w, h) = self.__get_position_and_size(
                    loop.x1,
                    loop.x2,
                    loop.y1,
                    loop.y2,
                    style=self.properties['style'])

                if y >= depth_limit:
                    continue

                rec = Rectangle((x, y),
                                w,
                                h,
                                angle=45,
                                fill=fill,
                                alpha=self.properties['alpha'],
                                facecolor=fill_color,
                                edgecolor=color,
                                linewidth=self.properties['line_width'],
                                linestyle=self.properties['line_style'])
                ax.add_patch(rec)

            elif self.properties['style'] == STYLE_MATRIX:

                if self.properties['side'] == 'upper' or self.properties[
                        'side'] == 'both':
                    # plot upper rectangle
                    x, y, (w, h) = self.__get_position_and_size(
                        loop.x1,
                        loop.x2,
                        loop.y1,
                        loop.y2,
                        style=STYLE_MATRIX,
                        side="upper")
                    rec = Rectangle((x, y),
                                    w,
                                    h,
                                    fill=fill,
                                    alpha=self.properties['alpha'],
                                    facecolor=fill_color,
                                    edgecolor=color,
                                    linewidth=self.properties['line_width'],
                                    linestyle=self.properties['line_style'])
                    ax.add_patch(rec)

                if self.properties['side'] == 'lower' or self.properties[
                        'side'] == 'both':
                    # plot lower rectangle
                    x, y, (w, h) = self.__get_position_and_size(
                        loop.x1,
                        loop.x2,
                        loop.y1,
                        loop.y2,
                        style=STYLE_MATRIX,
                        side="lower")
                    rec = Rectangle((x, y),
                                    w,
                                    h,
                                    fill=fill,
                                    alpha=self.properties['alpha'],
                                    facecolor=fill_color,
                                    edgecolor=color,
                                    linewidth=self.properties['line_width'],
                                    linestyle=self.properties['line_style'])
                    ax.add_patch(rec)
Пример #15
0
    def plot(self, ax, chrom_region, start_region, end_region):
        self.counter = 0
        self.small_relative = 0.004 * (end_region - start_region)
        self.__get_length_w(ax.get_figure().get_figwidth(), start_region,
                            end_region)
        if 'global_max_row' in self.properties and self.properties[
                'global_max_row'] == 'yes':
            self.__get_max_num_row(self.len_w, self.small_relative)

        if chrom_region not in self.interval_tree.keys():
            chrom_region = change_chrom_names(chrom_region)

        genes_overlap = sorted(
            self.interval_tree[chrom_region][start_region:end_region])

        if self.properties['labels'] == 'auto':
            if len(genes_overlap) > 60:
                # turn labels off when too many intervals are visible.
                self.is_draw_labels = False
            else:
                self.is_draw_labels = True

        max_num_row_local = 1
        max_ypos = 0
        # check for the number of other intervals that overlap
        #    with the given interval
        #            1         2
        #  012345678901234567890123456
        #  1=========       4=========
        #       2=========
        #         3============
        #
        # for 1 row_last_position = [9]
        # for 2 row_last_position = [9, 14]
        # for 3 row_last_position = [9, 14, 19]
        # for 4 row_last_position = [26, 14, 19]

        row_last_position = [
        ]  # each entry in this list contains the end position
        # of genomic interval. The list index is the row
        # in which the genomic interval was plotted.
        # Any new genomic interval that wants to be plotted,
        # knows the row to use by finding the list index that
        # is larger than its start

        # check for overlapping genes including
        # label size (if plotted)

        for region in genes_overlap:
            """
            BED12 gene format with exon locations at the end
            chrX    20850   23076   CG17636-RA      0       -       20850   23017   0       3       946,765,64,     0,1031,2162,

            BED9
            bed with rgb at end
            chr2L   0       70000   ID_5    0.26864549832   .       0       70000   51,160,44

            BED6
            bed without rgb
            chr2L   0       70000   ID_5    0.26864549832   .
            """
            self.counter += 1
            bed = region.data

            if self.is_draw_labels:
                num_name_characters = len(
                    bed.name
                ) + 2  # +2 to account for an space before and after the name
                bed_extended_end = int(bed.end +
                                       (num_name_characters * self.len_w))
            else:
                bed_extended_end = (bed.end + 2 * self.small_relative)

            # get smallest free row
            if len(row_last_position) == 0:
                free_row = 0
                row_last_position.append(bed_extended_end)
            else:
                # get list of rows that are less than bed.start, then take the min
                idx_list = [
                    idx for idx, value in enumerate(row_last_position)
                    if value < bed.start
                ]
                if len(idx_list):
                    free_row = min(idx_list)
                    row_last_position[free_row] = bed_extended_end
                else:
                    free_row = len(row_last_position)
                    row_last_position.append(bed_extended_end)

            rgb, edgecolor = self.get_rgb_and_edge_color(bed)

            ypos = self.__get_y_pos(free_row)

            # do not plot if the maximum interval rows to plot is reached
            if 'gene_rows' in self.properties and free_row >= int(
                    self.properties['gene_rows']):
                continue

            if free_row > max_num_row_local:
                max_num_row_local = free_row
            if ypos > max_ypos:
                max_ypos = ypos

            if self.bed_type == 'bed12':
                if self.properties['style'] == 'flybase':
                    self.__draw_gene_with_introns_flybase_style(
                        ax, bed, ypos, rgb, edgecolor)
                else:
                    self.__draw_gene_with_introns(ax, bed, ypos, rgb,
                                                  edgecolor)
            else:
                self.__draw_gene_simple(ax, bed, ypos, rgb, edgecolor)

            if not self.is_draw_labels:
                pass
            elif bed.start > start_region and bed.end < end_region:
                ax.text(bed.end + self.small_relative,
                        ypos + (float(self.properties['interval_height']) / 2),
                        bed.name,
                        horizontalalignment='left',
                        verticalalignment='center',
                        fontproperties=self.fp)

        if self.counter == 0:
            log.warning(
                "*Warning* No intervals were found for file {} "
                "in Track '{}' for the interval plotted ({}:{}-{}).\n".format(
                    self.properties['file'], self.properties['name'],
                    chrom_region, start_region, end_region))

        ymax = 0

        if 'global_max_row' in self.properties and self.properties[
                'global_max_row'] == 'yes':
            ymin = self.max_num_row[chrom_region] * self.row_scale

        elif 'gene_rows' in self.properties:
            ymin = int(self.properties['gene_rows']) * self.row_scale
        else:
            ymin = max_ypos + self.properties['interval_height']

        log.debug("ylim {},{}".format(ymin, ymax))
        # the axis is inverted (thus, ymax < ymin)
        ax.set_ylim(ymin, ymax)

        if 'display' in self.properties:
            if self.properties['display'] == 'domain':
                ax.set_ylim(-5, 205)
            elif self.properties['display'] == 'collapsed':
                ax.set_ylim(-5, 105)

        ax.set_xlim(start_region, end_region)

        self.plot_label()