def test_rangelabel(self): row = rangelabel.from_label("chr1:123-456", keep_gene=False) self.assertEqual(tuple(row), ("chr1", 122, 456)) label = rangelabel.to_label(row) self.assertEqual(label, "chr1:123-456") # unpack_range for region, expect in ( ["chr1", ("chr1", None, None)], ["chr1:100-123", ("chr1", 99, 123)], [("chr1", 100, 123), ("chr1", 100, 123)], [("chr1", 100, 123, "A"), ("chr1", 100, 123)], ): result = rangelabel.unpack_range(region) self.assertEqual(result, expect)
def translate_region_to_bins(region, bins): """Map genomic coordinates to bin indices. Return a tuple of (chrom, start, end), just like unpack_range. """ if region is None: return Region(None, None, None) chrom, start, end = unpack_range(region) if start is None and end is None: return Region(chrom, start, end) if start is None: start = 0 if end is None: end = float("inf") # NB: only bin start positions matter here c_bin_starts = bins.data.loc[bins.data.chromosome == chrom, "start"].values r_start, r_end = np.searchsorted(c_bin_starts, [start, end]) return Region(chrom, r_start, r_end)
def select_range_genes(cnarr, segments, variants, show_range, show_gene, window_width): """Determine which datapoints to show based on the given options. Behaviors:: start/end show_gene + + given region + genes; err if any gene outside it - + window +/- around genes + - given region, highlighting any genes within it - - whole chromosome, no genes If `show_range` is a chromosome name only, no start/end positions, then the whole chromosome will be shown. If region start/end coordinates are given and `show_gene` is '' or ',' (or all commas, etc.), then instead of highlighting all genes in the selection, no genes will be highlighted. """ chrom, start, end = unpack_range(show_range) if start is None and end is None: # Either the specified range is only chrom, no start-end, or gene names # were given window_coords = () else: # Viewing region coordinates were specified -- take them as given # Fill in open-ended ranges' endpoints if start is None: start = 0 elif start < 0: start = 0 if not end: # Default selection endpoint to the maximum chromosome position end = (cnarr or segments or variants).filter(chromosome=chrom).end.iat[-1] if end <= start: raise ValueError( "Coordinate range {}:{}-{} (from {}) has size <= 0".format( chrom, start, end, show_range)) window_coords = (start, end) gene_ranges = [] if show_gene is None: if window_coords: if cnarr: # Highlight all genes within the given range gene_ranges = plots.gene_coords_by_range( cnarr, chrom, start, end)[chrom] if not gene_ranges and (end - start) < 10 * window_width: # No genes in the selected region, so if the selection is small # (i.e. <80% of the displayed window, <10x window padding), # highlight the selected region itself. # (To prevent this, use show_gene='' or window_width=0) logging.info("No genes found in selection; will highlight the " "selected region itself instead") gene_ranges = [(start, end, "Selection")] window_coords = (max(0, start - window_width), end + window_width) else: gene_names = filter(None, show_gene.split(',')) if gene_names: # Scan for probes matching the specified gene(s) gene_coords = plots.gene_coords_by_name(cnarr or segments, gene_names) if len(gene_coords) > 1: raise ValueError("Genes %s are split across chromosomes %s" % (show_gene, list(gene_coords.keys()))) g_chrom, gene_ranges = gene_coords.popitem() if chrom: # Confirm that the selected chromosomes match core.assert_equal( "Chromosome also selected by region (-c) " "does not match", **{ "chromosome": chrom, "gene(s)": g_chrom }) else: chrom = g_chrom gene_ranges.sort() if window_coords: # Verify all genes fit in the given window for gene_start, gene_end, gene_name in gene_ranges: if not (start <= gene_start and gene_end <= end): raise ValueError("Selected gene %s (%s:%d-%d) " "is outside specified region %s" % (gene_name, chrom, gene_start, gene_end, show_range)) elif not show_range: # Set the display window to the selected genes +/- a margin window_coords = (max(0, gene_ranges[0][0] - window_width), gene_ranges[-1][1] + window_width) # Prune plotted elements to the selected region sel_probes = (cnarr.in_range(chrom, *window_coords) if cnarr else CNA([])) sel_segs = (segments.in_range(chrom, *window_coords, mode='trim') if segments else CNA([])) sel_snvs = (variants.in_range(chrom, *window_coords) if variants else None) logging.info( "Showing %d probes and %d selected genes in region %s", len(sel_probes), len(gene_ranges), (chrom + ":%d-%d" % window_coords if window_coords else chrom)) return sel_probes, sel_segs, sel_snvs, window_coords, gene_ranges, chrom
def select_range_genes(cnarr, segments, variants, show_range, show_gene, window_width): """Determine which datapoints to show based on the given options. Behaviors:: start/end show_gene + + given region + genes; err if any gene outside it - + window +/- around genes + - given region, highlighting any genes within it - - whole chromosome, no genes If `show_range` is a chromosome name only, no start/end positions, then the whole chromosome will be shown. If region start/end coordinates are given and `show_gene` is '' or ',' (or all commas, etc.), then instead of highlighting all genes in the selection, no genes will be highlighted. """ chrom, start, end = unpack_range(show_range) if start is None and end is None: # Either the specified range is only chrom, no start-end, or gene names # were given window_coords = () else: # Viewing region coordinates were specified -- take them as given # Fill in open-ended ranges' endpoints if start is None: start = 0 elif start < 0: start = 0 if not end: # Default selection endpoint to the maximum chromosome position end = (cnarr or segments or variants ).filter(chromosome=chrom).end.iat[-1] if end <= start: raise ValueError("Coordinate range {}:{}-{} (from {}) has size <= 0" .format(chrom, start, end, show_range)) window_coords = (start, end) gene_ranges = [] if show_gene is None: if window_coords: if cnarr: # Highlight all genes within the given range gene_ranges = plots.gene_coords_by_range(cnarr, chrom, start, end)[chrom] if not gene_ranges and (end - start) < 10 * window_width: # No genes in the selected region, so if the selection is small # (i.e. <80% of the displayed window, <10x window padding), # highlight the selected region itself. # (To prevent this, use show_gene='' or window_width=0) logging.info("No genes found in selection; will highlight the " "selected region itself instead") gene_ranges = [(start, end, "Selection")] window_coords = (max(0, start - window_width), end + window_width) else: gene_names = filter(None, show_gene.split(',')) if gene_names: # Scan for probes matching the specified gene(s) gene_coords = plots.gene_coords_by_name(cnarr or segments, gene_names) if len(gene_coords) > 1: raise ValueError("Genes %s are split across chromosomes %s" % (show_gene, list(gene_coords.keys()))) g_chrom, gene_ranges = gene_coords.popitem() if chrom: # Confirm that the selected chromosomes match core.assert_equal("Chromosome also selected by region (-c) " "does not match", **{"chromosome": chrom, "gene(s)": g_chrom}) else: chrom = g_chrom gene_ranges.sort() if window_coords: # Verify all genes fit in the given window for gene_start, gene_end, gene_name in gene_ranges: if not (start <= gene_start and gene_end <= end): raise ValueError("Selected gene %s (%s:%d-%d) " "is outside specified region %s" % (gene_name, chrom, gene_start, gene_end, show_range)) elif not show_range: # Set the display window to the selected genes +/- a margin window_coords = (max(0, gene_ranges[0][0] - window_width), gene_ranges[-1][1] + window_width) # Prune plotted elements to the selected region sel_probes = (cnarr.in_range(chrom, *window_coords) if cnarr else CNA([])) sel_segs = (segments.in_range(chrom, *window_coords, mode='trim') if segments else CNA([])) sel_snvs = (variants.in_range(chrom, *window_coords) if variants else None) logging.info("Showing %d probes and %d selected genes in region %s", len(sel_probes), len(gene_ranges), (chrom + ":%d-%d" % window_coords if window_coords else chrom)) return sel_probes, sel_segs, sel_snvs, window_coords, gene_ranges, chrom
def do_heatmap(cnarrs, show_range=None, do_desaturate=False, by_bin=False): """Plot copy number for multiple samples as a heatmap.""" _fig, axis = plt.subplots() set_colorbar(axis) # List sample names on the y-axis axis.set_yticks([i + 0.5 for i in range(len(cnarrs))]) axis.set_yticklabels([c.sample_id for c in cnarrs]) axis.set_ylim(0, len(cnarrs)) axis.invert_yaxis() axis.set_ylabel("Samples") if hasattr(axis, 'set_facecolor'): # matplotlib >= 2.0 axis.set_facecolor('#DDDDDD') else: # Older matplotlib axis.set_axis_bgcolor('#DDDDDD') if by_bin and show_range: try: a_cnarr = next(c for c in cnarrs if "probes" not in c) except StopIteration: r_chrom, r_start, r_end = unpack_range(show_range) if r_start is not None or r_end is not None: raise ValueError("Need at least 1 .cnr input file if --by-bin " "(by_bin) and --chromosome (show_range) are " "both used to specify a sub-chromosomal " "region.") else: logging.info("Using sample %s to map %s to bin coordinates", a_cnarr.sample_id, show_range) r_chrom, r_start, r_end = plots.translate_region_to_bins( show_range, a_cnarr) else: r_chrom, r_start, r_end = unpack_range(show_range) if r_start is not None or r_end is not None: logging.info("Showing log2 ratios in range %s:%d-%s", r_chrom, r_start or 0, r_end or '*') elif r_chrom: logging.info("Showing log2 ratios on chromosome %s", r_chrom) # Closes over do_desaturate def cna2df(cna): """Extract a dataframe of plotting points from a CopyNumArray.""" points = cna.data.loc[:, ["start", "end"]] points["color"] = cna.log2.apply(plots.cvg2rgb, args=(do_desaturate, )) return points # Group each file's probes/segments by chromosome sample_data = [collections.defaultdict(list) for _c in cnarrs] # Calculate the size (max endpoint value) of each chromosome chrom_sizes = collections.OrderedDict() for i, cnarr in enumerate(cnarrs): if by_bin: cnarr = plots.update_binwise_positions_simple(cnarr) if r_chrom: subcna = cnarr.in_range(r_chrom, r_start, r_end, mode="trim") sample_data[i][r_chrom] = cna2df(subcna) chrom_sizes[r_chrom] = max(subcna.end.iat[-1] if subcna else 0, chrom_sizes.get(r_chrom, 0)) else: for chrom, subcna in cnarr.by_chromosome(): sample_data[i][chrom] = cna2df(subcna) chrom_sizes[chrom] = max(subcna.end.iat[-1] if subcna else 0, chrom_sizes.get(r_chrom, 0)) # Closes over axis def plot_sample_chrom(i, sample): """Draw the given coordinates and colors as a horizontal series.""" xranges = [(start, end - start) for start, end in zip(sample.start, sample.end)] bars = BrokenBarHCollection(xranges, (i, i + 1), edgecolors="none", facecolors=sample["color"]) axis.add_collection(bars) if show_range: # Lay out only the selected chromosome # Set x-axis the chromosomal positions (in Mb), title as the selection if by_bin: MB = 1 axis.set_xlabel("Position (bin)") else: MB = plots.MB axis.set_xlabel("Position (Mb)") axis.set_xlim((r_start or 0) * MB, (r_end or chrom_sizes[r_chrom]) * MB) axis.set_title(show_range) axis.tick_params(which='both', direction='out') axis.get_xaxis().tick_bottom() axis.get_yaxis().tick_left() # Plot the individual probe/segment coverages for i, sample in enumerate(sample_data): crow = sample[r_chrom] if not len(crow): logging.warning("Sample #%d has no datapoints in selection %s", i + 1, show_range) crow["start"] *= MB crow["end"] *= MB plot_sample_chrom(i, crow) else: # Lay out chromosome dividers and x-axis labels # (Just enough padding to avoid overlap with the divider line) chrom_offsets = plots.plot_x_dividers(axis, chrom_sizes, 1) # Plot the individual probe/segment coverages for i, sample in enumerate(sample_data): for chrom, curr_offset in chrom_offsets.items(): crow = sample[chrom] if len(crow): crow["start"] += curr_offset crow["end"] += curr_offset plot_sample_chrom(i, crow) else: logging.warning("Sample #%d has no datapoints", i + 1) return axis
def do_heatmap(cnarrs, show_range=None, do_desaturate=False): """Plot copy number for multiple samples as a heatmap.""" _fig, axis = plt.subplots() set_colorbar(axis) # List sample names on the y-axis axis.set_yticks([i + 0.5 for i in range(len(cnarrs))]) axis.set_yticklabels([c.sample_id for c in cnarrs]) axis.set_ylim(0, len(cnarrs)) axis.invert_yaxis() axis.set_ylabel("Samples") axis.set_axis_bgcolor('#DDDDDD') r_chrom, r_start, r_end = unpack_range(show_range) if r_start is not None or r_end is not None: logging.info("Showing log2 ratios in range %s:%d-%s", r_chrom, r_start, r_end or '*') elif r_chrom: logging.info("Showing log2 ratios on chromosome %s", r_chrom) # Closes over do_desaturate def cna2df(cna): """Extract a dataframe of plotting points from a CopyNumArray.""" points = cna.data.loc[:, ["start", "end"]] points["color"] = cna.log2.apply(plots.cvg2rgb, args=(do_desaturate, )) return points # Group each file's probes/segments by chromosome sample_data = [collections.defaultdict(list) for _c in cnarrs] # Calculate the size (max endpoint value) of each chromosome chrom_sizes = collections.OrderedDict() for i, cnarr in enumerate(cnarrs): if r_chrom: subcna = cnarr.in_range(r_chrom, r_start, r_end, mode="trim") sample_data[i][r_chrom] = cna2df(subcna) chrom_sizes[r_chrom] = max(subcna.end.iat[-1] if subcna else 0, chrom_sizes.get(r_chrom, 0)) else: for chrom, subcna in cnarr.by_chromosome(): sample_data[i][chrom] = cna2df(subcna) chrom_sizes[chrom] = max(subcna.end.iat[-1] if subcna else 0, chrom_sizes.get(r_chrom, 0)) # Closes over axis def plot_sample_chrom(i, sample): """Draw the given coordinates and colors as a horizontal series.""" xranges = [(start, end - start) for start, end in zip(sample.start, sample.end)] bars = BrokenBarHCollection(xranges, (i, i + 1), edgecolors="none", facecolors=sample["color"]) axis.add_collection(bars) if show_range: # Lay out only the selected chromosome # Set x-axis the chromosomal positions (in Mb), title as the selection axis.set_xlim((r_start or 0) * plots.MB, (r_end or chrom_sizes[r_chrom]) * plots.MB) axis.set_title(show_range) axis.set_xlabel("Position (Mb)") axis.tick_params(which='both', direction='out') axis.get_xaxis().tick_bottom() axis.get_yaxis().tick_left() # Plot the individual probe/segment coverages for i, sample in enumerate(sample_data): crow = sample[r_chrom] crow["start"] *= plots.MB crow["end"] *= plots.MB plot_sample_chrom(i, crow) else: # Lay out chromosome dividers and x-axis labels # (Just enough padding to avoid overlap with the divider line) chrom_offsets = plots.plot_x_dividers(axis, chrom_sizes, 1) # Plot the individual probe/segment coverages for i, sample in enumerate(sample_data): for chrom, curr_offset in chrom_offsets.items(): crow = sample[chrom] if len(crow): crow["start"] += curr_offset crow["end"] += curr_offset plot_sample_chrom(i, crow) return axis
def do_heatmap(cnarrs, show_range=None, do_desaturate=False, by_bin=False): """Plot copy number for multiple samples as a heatmap.""" _fig, axis = plt.subplots() set_colorbar(axis) # List sample names on the y-axis axis.set_yticks([i + 0.5 for i in range(len(cnarrs))]) axis.set_yticklabels([c.sample_id for c in cnarrs]) axis.set_ylim(0, len(cnarrs)) axis.invert_yaxis() axis.set_ylabel("Samples") if hasattr(axis, 'set_facecolor'): # matplotlib >= 2.0 axis.set_facecolor('#DDDDDD') else: # Older matplotlib axis.set_axis_bgcolor('#DDDDDD') if by_bin and show_range: try: a_cnarr = next(c for c in cnarrs if "probes" not in c) except StopIteration: r_chrom, r_start, r_end = unpack_range(show_range) if r_start is not None or r_end is not None: raise ValueError("Need at least 1 .cnr input file if --by-bin " "(by_bin) and --chromosome (show_range) are " "both used to specify a sub-chromosomal " "region.") else: logging.info("Using sample %s to map %s to bin coordinates", a_cnarr.sample_id, show_range) r_chrom, r_start, r_end = plots.translate_region_to_bins(show_range, a_cnarr) else: r_chrom, r_start, r_end = unpack_range(show_range) if r_start is not None or r_end is not None: logging.info("Showing log2 ratios in range %s:%d-%s", r_chrom, r_start or 0, r_end or '*') elif r_chrom: logging.info("Showing log2 ratios on chromosome %s", r_chrom) # Closes over do_desaturate def cna2df(cna): """Extract a dataframe of plotting points from a CopyNumArray.""" points = cna.data.loc[:, ["start", "end"]] points["color"] = cna.log2.apply(plots.cvg2rgb, args=(do_desaturate,)) return points # Group each file's probes/segments by chromosome sample_data = [collections.defaultdict(list) for _c in cnarrs] # Calculate the size (max endpoint value) of each chromosome chrom_sizes = collections.OrderedDict() for i, cnarr in enumerate(cnarrs): if by_bin: cnarr = plots.update_binwise_positions_simple(cnarr) if r_chrom: subcna = cnarr.in_range(r_chrom, r_start, r_end, mode="trim") sample_data[i][r_chrom] = cna2df(subcna) chrom_sizes[r_chrom] = max(subcna.end.iat[-1] if subcna else 0, chrom_sizes.get(r_chrom, 0)) else: for chrom, subcna in cnarr.by_chromosome(): sample_data[i][chrom] = cna2df(subcna) chrom_sizes[chrom] = max(subcna.end.iat[-1] if subcna else 0, chrom_sizes.get(r_chrom, 0)) # Closes over axis def plot_sample_chrom(i, sample): """Draw the given coordinates and colors as a horizontal series.""" xranges = [(start, end - start) for start, end in zip(sample.start, sample.end)] bars = BrokenBarHCollection(xranges, (i, i+1), edgecolors="none", facecolors=sample["color"]) axis.add_collection(bars) if show_range: # Lay out only the selected chromosome # Set x-axis the chromosomal positions (in Mb), title as the selection if by_bin: MB = 1 axis.set_xlabel("Position (bin)") else: MB = plots.MB axis.set_xlabel("Position (Mb)") axis.set_xlim((r_start or 0) * MB, (r_end or chrom_sizes[r_chrom]) * MB) axis.set_title(show_range) axis.tick_params(which='both', direction='out') axis.get_xaxis().tick_bottom() axis.get_yaxis().tick_left() # Plot the individual probe/segment coverages for i, sample in enumerate(sample_data): crow = sample[r_chrom] if not len(crow): logging.warning("Sample #%d has no datapoints in selection %s", i+1, show_range) crow["start"] *= MB crow["end"] *= MB plot_sample_chrom(i, crow) else: # Lay out chromosome dividers and x-axis labels # (Just enough padding to avoid overlap with the divider line) chrom_offsets = plots.plot_x_dividers(axis, chrom_sizes, 1) # Plot the individual probe/segment coverages for i, sample in enumerate(sample_data): for chrom, curr_offset in chrom_offsets.items(): crow = sample[chrom] if len(crow): crow["start"] += curr_offset crow["end"] += curr_offset plot_sample_chrom(i, crow) else: logging.warning("Sample #%d has no datapoints", i+1) return axis
def chromosome_scatter(cnarr, segments, variants, show_range, show_gene, antitarget_marker, do_trend, window_width, y_min, y_max, title, segment_color): """Plot a specified region on one chromosome. Possibilities:: Options | Shown ------------ | -------- -c | -g | Genes | Region ------- | -- | ----- | ------ - | + | given | auto: gene(s) + margin chr | - | none | whole chrom chr | + | given | whole chrom chr:s-e | - | all | given chr:s-e | + | given | given """ chrom, start, end = unpack_range(show_range) window_coords = () genes = [] if show_gene: gene_names = show_gene.split(',') # Scan for probes matching the specified gene gene_coords = plots.gene_coords_by_name(cnarr or segments, gene_names) if len(gene_coords) != 1: raise ValueError("Genes %s are split across chromosomes %s" % (show_gene, list(gene_coords.keys()))) g_chrom, genes = gene_coords.popitem() if chrom: # Confirm that the selected chromosomes match core.assert_equal( "Chromosome also selected by region (-c) " "does not match", **{ "chromosome": chrom, "gene(s)": g_chrom }) else: chrom = g_chrom # Set the display window to the selected genes +/- a margin genes.sort() window_coords = (max(0, genes[0][0] - window_width), genes[-1][1] + window_width) if start is not None or end is not None: # Default selection endpoint to the maximum chromosome position if not end: end = (cnarr or segments or variants).filter(chromosome=chrom).end.iat[-1] if window_coords: # Genes were specified, & window was set around them if start > window_coords[0] or end < window_coords[1]: raise ValueError("Selected gene region " + chrom + (":%d-%d" % window_coords) + " is outside specified region " + show_range) window_coords = (max(0, start - window_width), end + window_width) if cnarr and not genes: genes = plots.gene_coords_by_range(cnarr, chrom, start, end)[chrom] if not genes and window_width > (end - start) / 10.0: # No genes in the selected region, so highlight the region # itself (unless the selection is ~entire displayed window) logging.info("No genes found in selection; will show the " "selected region itself instead") genes = [(start, end, "Selection")] elif show_range and window_coords: # Specified range is only chrom, no start-end # Reset window around selected genes to show the whole chromosome window_coords = () # Prune plotted elements to the selected region sel_probes = (cnarr.in_range(chrom, *window_coords) if cnarr else CNA([])) sel_seg = (segments.in_range(chrom, *window_coords, mode='trim') if segments else CNA([])) sel_snvs = (variants.in_range(chrom, *window_coords) if variants else None) logging.info( "Showing %d probes and %d selected genes in region %s", len(sel_probes), len(genes), (chrom + ":%d-%d" % window_coords if window_coords else chrom)) # Create plots if cnarr or segments: # Plot CNVs at chromosome level if variants: # Lay out top 3/5 for the CN scatter, bottom 2/5 for SNP plot axgrid = pyplot.GridSpec(5, 1, hspace=.5) axis = pyplot.subplot(axgrid[:3]) axis2 = pyplot.subplot(axgrid[3:], sharex=axis) # Plot allele freqs for only the selected region snv_on_chromosome(axis2, sel_snvs, sel_seg, genes, do_trend) else: _fig, axis = pyplot.subplots() axis.set_xlabel("Position (Mb)") cnv_on_chromosome(axis, sel_probes, sel_seg, genes, antitarget_marker=antitarget_marker, do_trend=do_trend, x_limits=window_coords, y_min=y_min, y_max=y_max, segment_color=segment_color) elif variants: # Only plot SNVs in a single-panel layout _fig, axis = pyplot.subplots() snv_on_chromosome(axis, sel_snvs, sel_seg, genes, do_trend) if title is None: title = "%s %s" % ((cnarr or segments or variants).sample_id, chrom) axis.set_title(title)