def __process_loop_file(self): interval_tree = {} with opener(self.properties['file']) as f: for idx, line in enumerate(f): line = to_string(line) # skip header line if idx == 0 and self.__is_header(line): continue fields = line.split() chr1, x1, x2, chr2, y1, y2, *other = fields x1, x2, y1, y2 = list(map(int, [x1, x2, y1, y2])) # skip inter-chromosome interaction if chr1 != chr2: continue chromosome = chr1 if not chromosome.startswith("chr"): chromosome = change_chrom_names(chromosome) if chromosome not in interval_tree: interval_tree[chromosome] = IntervalTree() if len(other) == 0: color = self.DEFAULT_COLOR else: rgb = other[0].split(",") rgb = list(map(int, rgb)) color = rgb2hex(*rgb) loop = self.LoopInverval(chr1, x1, x2, chr2, y1, y2, color) interval_tree[chromosome].add(Interval(x1, y2, loop)) return interval_tree
def fetch_intervals(self, genome_range): """ Fetch BigWig intervals within input chromosome range. """ chrom, start, end = split_genome_range(genome_range) if chrom not in self.bw.chroms(): chrom_ = change_chrom_names(chrom) else: chrom_ = chrom intervals = self.bw.intervals(chrom_, start, end) col_chrom = [chrom] * len(intervals) col_start = [] col_end = [] col_score = [] for s, e, v in intervals: col_start.append(s) col_end.append(e) col_score.append(v) intval_table = pd.DataFrame( { "chromsome": col_chrom, "start": col_start, "end": col_end, "score": col_score, }, columns=['chromsome', 'start', 'end', 'score']) return intval_table
def fetch_data(self, gr: GenomeRange, **kwargs) -> pd.DataFrame: rows = self.load(gr) if len(rows) == 0: gr.chrom = change_chrom_names(gr.chrom) rows = self.load(gr) return pd.DataFrame(rows, columns=['chromsome', 'start', 'end', 'score'])
def plot(self, ax, chrom_region, start_region, end_region): """ Plots the boundaries as triangles in the given ax. """ self.ax = ax from matplotlib.patches import Polygon ymax = 0.001 valid_regions = 0 if chrom_region not in self.interval_tree: orig = chrom_region chrom_region = change_chrom_names(chrom_region) log.info( 'Chromosome name: {} does not exists. Changing name to {}'. format(orig, chrom_region)) for region in sorted( self.interval_tree[chrom_region][start_region:end_region]): """ /\ / \ / \ _____________________ x1 x2 x3 """ x1 = region.begin x2 = x1 + float(region.end - region.begin) / 2 x3 = region.end y1 = 0 y2 = (region.end - region.begin) rgb, edgecolor = self.get_rgb_and_edge_color(region.data) triangle = Polygon(np.array([[x1, y1], [x2, y2], [x3, y1]]), closed=True, facecolor=rgb, edgecolor=edgecolor) ax.add_artist(triangle) valid_regions += 1 if y2 > ymax: ymax = y2 if valid_regions == 0: log.warning("No regions found for Track {}.".format( self.properties['name'])) ax.set_xlim(start_region, end_region) if 'orientation' in self.properties and self.properties[ 'orientation'] == 'inverted': ax.set_ylim(ymax, 0) else: ax.set_ylim(0, ymax) self.plot_label()
def __get_vlines(self, chrom, start, end): vlines_list = [] if chrom not in list(self.vlines_intval_tree): chrom = change_chrom_names(chrom) for region in sorted(self.vlines_intval_tree[chrom][start - 1:end + 1]): vlines_list.append(region.begin) return vlines_list
def plot(self, ax, chrom_region, start_region, end_region): self.ax = ax score_list = [] pos_list = [] if chrom_region not in list(self.interval_tree): chrom_region = change_chrom_names(chrom_region) for region in sorted(self.interval_tree[chrom_region][start_region - 10000:end_region + 10000]): score_list.append(float(region.data[0])) pos_list.append(region.begin + (region.end - region.begin) / 2) if 'color' not in self.properties: self.properties['color'] = PlotBedGraph.DEFAULT_COLOR if 'extra' in self.properties and self.properties['extra'][0] == '4C': # draw a vertical line for each fragment region center self.ax.fill_between(pos_list, score_list, facecolor=self.properties['color'], edgecolor='none') self.ax.vlines(pos_list, [0], score_list, color='olive', linewidth=0.5) self.ax.plot(pos_list, score_list, '-', color='slateblue', linewidth=0.7) else: try: self.ax.fill_between(pos_list, score_list, facecolor=self.properties['color']) except ValueError: log.warning("Invalid color {} for {}. " "Using gray instead.".format(self.properties['color'], self.properties['file'])) self.ax.fill_between(pos_list, score_list, facecolor='gray') self.ax.set_frame_on(False) self.ax.axes.get_xaxis().set_visible(False) self.ax.axes.get_yaxis().set_visible(False) self.ax.set_xlim(start_region, end_region) ymax = self.properties['max_value'] ymin = self.properties['min_value'] if float(ymax) % 1 == 0: ymax_print = int(ymax) else: ymax_print = "{:.1f}".format(ymax) self.ax.set_ylim(ymin, ymax) ydelta = ymax - ymin small_x = 0.01 * (end_region - start_region) if 'show_data_range' in self.properties and self.properties['show_data_range'] == 'no': pass else: # by default show the data range self.plot_data_range(ymin, ymax, self.properties['data_range_style']) self.plot_label()
def __get_regions(self, chrom, start, end): regions = [] if chrom not in list(self.interval_tree): chrom = change_chrom_names(chrom) for region in sorted(self.interval_tree[chrom][start - 10000:end + 10000]): regions.append((region.begin, region.end, region.data)) return regions
def fetch_intervals(self, genome_range): """ Fetch intervals within input chromosome range. """ self.load_range(genome_range) chrom, start, end = split_genome_range(genome_range) if chrom not in self.interval_tree: chrom = change_chrom_names(chrom) if chrom not in self.interval_tree: intervals = [] else: intervals = sorted(self.interval_tree[chrom][start:end]) intval_table = self.intervals2dataframe(intervals) return intval_table
def fetch_intervals(self, genome_range: Union[str, GenomeRange]): """ Fetch intervals within input chromosome range. """ chrom, start, end = split_genome_range(genome_range) gr = GenomeRange(chrom, start, end) rows = self.__load(gr) if len(rows) == 0: chrom = change_chrom_names(chrom) rows = self.__load(GenomeRange(chrom, start, end)) intval_table = pd.DataFrame( rows, columns=['chromsome', 'start', 'end', 'score']) return intval_table
def plot(self, ax, chrom_region, region_start, region_end): """ Makes and arc connecting two points on a linear scale representing interactions between Hi-C bins. """ self.ax = ax from matplotlib.patches import Arc height = 1 max_diameter = 0 count = 0 if chrom_region not in list(self.interval_tree): chrom_region = change_chrom_names(chrom_region) arcs_in_region = sorted(self.interval_tree[chrom_region][region_start:region_end]) for idx, interval in enumerate(arcs_in_region): # skip arcs whose start and end are outside the plotted region if interval.begin < region_start and interval.end > region_end: continue if 'line_width' in self.properties: line_width = float(self.properties['line_width']) else: line_width = 0.5 * np.sqrt(interval.data) diameter = (interval.end - interval.begin) center = (interval.begin + interval.end) / 2 if diameter > max_diameter: max_diameter = diameter count += 1 ax.plot([center], [diameter]) ax.add_patch(Arc((center, 0), diameter, height*2, 0, 0, 180, color=self.properties['color'], lw=line_width)) # increase max_diameter slightly to avoid cropping of the arcs. # max_diameter += max_diameter * 0.05 height += height * 0.05 log.debug("{} were arcs plotted".format(count)) if 'orientation' in self.properties and self.properties['orientation'] == 'inverted': ax.set_ylim(height, 0.001) else: ax.set_ylim(-0.001, height) ax.set_xlim(region_start, region_end) log.debug('title is {}'.format(self.properties['title'])) self.plot_label()
def plot(self, ax, chrom_region, start_region, end_region): """ Plots the boundaries as triangles in the given ax. """ self.load_range(f"{chrom_region}:{start_region}-{end_region}") self.ax = ax genome_range = GenomeRange(chrom_region, start_region, end_region) self._genome_range = genome_range if chrom_region not in self.interval_tree: orig = chrom_region chrom_region = change_chrom_names(chrom_region) log.debug( 'Chromosome name: {} does not exists. Changing name to {}'. format(orig, chrom_region)) current_regions = sorted( self.interval_tree[chrom_region][start_region:end_region]) ymax = max([region.end - region.begin for region in current_regions]) for region in current_regions: if self.track_type.startswith('HiC'): if self.track_type == 'HiC:window' or self.track_type == 'HiC:triangular': depth = (end_region - start_region) / 2 ymax = (end_region - start_region) self.__plot_triangular(region, ymax, depth) else: self.__plot_box(region) elif self.track_type == 'HicCompare': self.__plot_box(region) elif self.track_type in [ 'BigWig', 'BedGraph', 'ABCompartment', 'Arcs' ]: depth_neg, depth_pos = ax.get_ylim() if ('orientation' in self.track.properties) and ( self.track.properties['orientation'] == 'inverted'): depth = depth_neg else: depth = depth_pos self.__plot_triangular(region, ymax, depth) if len(current_regions) == 0: log.warning("No regions found for Coverage {}.".format( self.properties['name']))
def fetch_data(self, gr: GenomeRange, **kwargs): """ Parameters ---------- gr : GenomeRange Return ------ intervals : pandas.core.frame.DataFrame BigWig interval table. """ chrom, start, end = split_genome_range(gr) if chrom not in self.bw.chromsizes: chrom = change_chrom_names(chrom) intervals = self.bw.fetch_intervals(chrom, start, end) columns = list(intervals.columns) if 'value' in columns: columns[columns.index('value')] = 'score' intervals.columns = columns return intervals
def fetch_data(self, gr: GenomeRange, **kwargs): """ Parameters ---------- gr : GenomeRange Return ------ intervals : pandas.core.frame.DataFrame BigWig interval table. """ chrom, start, end = split_genome_range(gr) if chrom not in self.bw.chroms(): chrom = change_chrom_names(chrom) intervals = self.bw.intervals(chrom, start, end) col_chrom = [chrom] * len(intervals) col_start = [] col_end = [] col_score = [] for s, e, v in intervals: col_start.append(s) col_end.append(e) col_score.append(v) intval_table = pd.DataFrame( { "chromsome": col_chrom, "start": col_start, "end": col_end, "score": col_score, }, columns=['chromsome', 'start', 'end', 'score']) return intval_table
def plot(self, ax, chrom_region, start_region, end_region): if chrom_region not in self.interval_tree: chrom_region = change_chrom_names(chrom_region) if hasattr(self.track, 'fetch_region'): start_fetch = self.track.fetch_region.start end_fetch = self.track.fetch_region.end else: start_fetch, end_fetch = start_region, end_region for intval in sorted( self.interval_tree[chrom_region][start_fetch:end_fetch]): loop = intval.data if (self.properties['color'] == 'rgb') or (self.properties['color'] == 'bed_rgb'): color = loop.color else: color = self.properties['color'] if (self.properties['fill_color']) == 'rgb' or ( self.properties['fill_color'] == 'bed_rgb'): fill_color = loop.color else: fill_color = self.properties['fill_color'] fill = True if self.properties['fill'] == 'yes' else False self.properties['style'] = self.track.properties['style'] self.properties['depth_ratio'] = self.track.properties[ 'depth_ratio'] if self.properties['style'] == STYLE_TRIANGULAR or self.properties[ 'style'] == STYLE_WINDOW: depth_ratio = self.properties['depth_ratio'] if self.properties[ 'depth_ratio'] != DEPTH_FULL else 1 region_length = (end_region - start_region) depth_full = region_length * 0.5 depth_limit = depth_full * depth_ratio x, y, (w, h) = self.__get_position_and_size( loop.x1, loop.x2, loop.y1, loop.y2, style=self.properties['style']) if y >= depth_limit: continue rec = Rectangle((x, y), w, h, angle=45, fill=fill, alpha=self.properties['alpha'], facecolor=fill_color, edgecolor=color, linewidth=self.properties['line_width'], linestyle=self.properties['line_style']) ax.add_patch(rec) elif self.properties['style'] == STYLE_MATRIX: if self.properties['side'] == 'upper' or self.properties[ 'side'] == 'both': # plot upper rectangle x, y, (w, h) = self.__get_position_and_size( loop.x1, loop.x2, loop.y1, loop.y2, style=STYLE_MATRIX, side="upper") rec = Rectangle((x, y), w, h, fill=fill, alpha=self.properties['alpha'], facecolor=fill_color, edgecolor=color, linewidth=self.properties['line_width'], linestyle=self.properties['line_style']) ax.add_patch(rec) if self.properties['side'] == 'lower' or self.properties[ 'side'] == 'both': # plot lower rectangle x, y, (w, h) = self.__get_position_and_size( loop.x1, loop.x2, loop.y1, loop.y2, style=STYLE_MATRIX, side="lower") rec = Rectangle((x, y), w, h, fill=fill, alpha=self.properties['alpha'], facecolor=fill_color, edgecolor=color, linewidth=self.properties['line_width'], linestyle=self.properties['line_style']) ax.add_patch(rec)
def plot(self, ax, chrom_region, start_region, end_region): self.counter = 0 self.small_relative = 0.004 * (end_region - start_region) self.__get_length_w(ax.get_figure().get_figwidth(), start_region, end_region) if 'global_max_row' in self.properties and self.properties[ 'global_max_row'] == 'yes': self.__get_max_num_row(self.len_w, self.small_relative) if chrom_region not in self.interval_tree.keys(): chrom_region = change_chrom_names(chrom_region) genes_overlap = sorted( self.interval_tree[chrom_region][start_region:end_region]) if self.properties['labels'] == 'auto': if len(genes_overlap) > 60: # turn labels off when too many intervals are visible. self.is_draw_labels = False else: self.is_draw_labels = True max_num_row_local = 1 max_ypos = 0 # check for the number of other intervals that overlap # with the given interval # 1 2 # 012345678901234567890123456 # 1========= 4========= # 2========= # 3============ # # for 1 row_last_position = [9] # for 2 row_last_position = [9, 14] # for 3 row_last_position = [9, 14, 19] # for 4 row_last_position = [26, 14, 19] row_last_position = [ ] # each entry in this list contains the end position # of genomic interval. The list index is the row # in which the genomic interval was plotted. # Any new genomic interval that wants to be plotted, # knows the row to use by finding the list index that # is larger than its start # check for overlapping genes including # label size (if plotted) for region in genes_overlap: """ BED12 gene format with exon locations at the end chrX 20850 23076 CG17636-RA 0 - 20850 23017 0 3 946,765,64, 0,1031,2162, BED9 bed with rgb at end chr2L 0 70000 ID_5 0.26864549832 . 0 70000 51,160,44 BED6 bed without rgb chr2L 0 70000 ID_5 0.26864549832 . """ self.counter += 1 bed = region.data if self.is_draw_labels: num_name_characters = len( bed.name ) + 2 # +2 to account for an space before and after the name bed_extended_end = int(bed.end + (num_name_characters * self.len_w)) else: bed_extended_end = (bed.end + 2 * self.small_relative) # get smallest free row if len(row_last_position) == 0: free_row = 0 row_last_position.append(bed_extended_end) else: # get list of rows that are less than bed.start, then take the min idx_list = [ idx for idx, value in enumerate(row_last_position) if value < bed.start ] if len(idx_list): free_row = min(idx_list) row_last_position[free_row] = bed_extended_end else: free_row = len(row_last_position) row_last_position.append(bed_extended_end) rgb, edgecolor = self.get_rgb_and_edge_color(bed) ypos = self.__get_y_pos(free_row) # do not plot if the maximum interval rows to plot is reached if 'gene_rows' in self.properties and free_row >= int( self.properties['gene_rows']): continue if free_row > max_num_row_local: max_num_row_local = free_row if ypos > max_ypos: max_ypos = ypos if self.bed_type == 'bed12': if self.properties['style'] == 'flybase': self.__draw_gene_with_introns_flybase_style( ax, bed, ypos, rgb, edgecolor) else: self.__draw_gene_with_introns(ax, bed, ypos, rgb, edgecolor) else: self.__draw_gene_simple(ax, bed, ypos, rgb, edgecolor) if not self.is_draw_labels: pass elif bed.start > start_region and bed.end < end_region: ax.text(bed.end + self.small_relative, ypos + (float(self.properties['interval_height']) / 2), bed.name, horizontalalignment='left', verticalalignment='center', fontproperties=self.fp) if self.counter == 0: log.warning( "*Warning* No intervals were found for file {} " "in Track '{}' for the interval plotted ({}:{}-{}).\n".format( self.properties['file'], self.properties['name'], chrom_region, start_region, end_region)) ymax = 0 if 'global_max_row' in self.properties and self.properties[ 'global_max_row'] == 'yes': ymin = self.max_num_row[chrom_region] * self.row_scale elif 'gene_rows' in self.properties: ymin = int(self.properties['gene_rows']) * self.row_scale else: ymin = max_ypos + self.properties['interval_height'] log.debug("ylim {},{}".format(ymin, ymax)) # the axis is inverted (thus, ymax < ymin) ax.set_ylim(ymin, ymax) if 'display' in self.properties: if self.properties['display'] == 'domain': ax.set_ylim(-5, 205) elif self.properties['display'] == 'collapsed': ax.set_ylim(-5, 105) ax.set_xlim(start_region, end_region) self.plot_label()