def to_hq_hist_plot(hqbasefraction_dist, output_dir): plot_name = get_plot_title(spec, Constants.PG_HQ, Constants.P_HQ) x_label = get_plot_xlabel(spec, Constants.PG_HQ, Constants.P_HQ) y_label = get_plot_ylabel(spec, Constants.PG_HQ, Constants.P_HQ) nbins = int(hqbasefraction_dist['NumBins'].metavalue) bin_counts = hqbasefraction_dist['BinCounts'] heights = [int(bc.metavalue) for bc in bin_counts] edges = [float(bn) / float(nbins) for bn in xrange(nbins)] bin_width = float(hqbasefraction_dist['BinWidth'].metavalue) fig, ax = get_fig_axes_lpr() ax.bar(edges, heights, color=get_green(0), edgecolor=get_green(0), width=(bin_width * 0.75)) ax.set_xlabel(x_label) ax.set_ylabel(y_label) png_fn = os.path.join(output_dir, "{p}.png".format(p=Constants.P_HQ)) png_base, thumbnail_base = save_figure_with_thumbnail(fig, png_fn, dpi=DEFAULT_DPI) hq_plot = Plot(Constants.P_HQ, os.path.relpath(png_base, output_dir), title=plot_name, caption=plot_name, thumbnail=os.path.relpath(thumbnail_base, output_dir)) plot_groups = [PlotGroup(Constants.PG_HQ, plots=[hq_plot])] return plot_groups
def _to_read_stats_plots(PlotConstants, title, readLenDists, readQualDists, output_dir, dpi=72, lenDistShaper=None): length_plots = [] # ReadLen distribution to barplot: if lenDistShaper is None: lenDistShaper = continuous_dist_shaper(readLenDists, trim_excess=True) for i, orig_rlendist in enumerate(readLenDists): rlendist = lenDistShaper(orig_rlendist) assert sum(orig_rlendist.bins) == sum(rlendist.bins) len_fig, len_axes = get_fig_axes_lpr() len_axes.bar(rlendist.labels, rlendist.bins, color=get_green(0), edgecolor=get_green(0), width=(rlendist.binWidth * 0.75)) len_axes.set_xlabel(get_plot_xlabel(spec, PlotConstants.PG_LENGTH, PlotConstants.P_LENGTH)) len_axes.set_ylabel(get_plot_ylabel(spec, PlotConstants.PG_LENGTH, PlotConstants.P_LENGTH)) png_fn = os.path.join(output_dir, "{p}{i}.png".format(i=i, p=PlotConstants.P_LENGTH_PREFIX)) png_base, thumbnail_base = save_figure_with_thumbnail(len_fig, png_fn, dpi=dpi) length_plots.append( Plot("{p}_{i}".format(i=i, p=PlotConstants.P_LENGTH), os.path.relpath(png_base, output_dir), title=title, caption=title, thumbnail=os.path.relpath(thumbnail_base, output_dir))) plot_groups = [ PlotGroup(PlotConstants.PG_LENGTH, title=title, plots=length_plots, thumbnail=os.path.relpath(thumbnail_base, output_dir)) ] return plot_groups # FIXME these aren't useful yet qual_plots = [] # ReadQual distribution to barplot: shaper = continuous_dist_shaper(readQualDists, trim_excess=True) for i, orig_rqualdist in enumerate(readQualDists): rqualdist = shaper(orig_rqualdist) qual_fig, qual_axes = get_fig_axes_lpr() qual_axes.bar(rqualdist.labels, rqualdist.bins, color=get_green(0), edgecolor=get_green(0), width=(rqualdist.binWidth * 0.75)) qual_axes.set_xlabel(get_plot_xlabel(spec, PlotConstants.PG_QUAL, PlotConstants.P_QUAL)) qual_axes.set_ylabel(get_plot_ylabel(spec, PlotConstants.PG_QUAL, PlotConstants.P_QUAL)) png_fn = os.path.join(output_dir, "{p}{i}.png".format(i=i, p=PlotConstants.P_QUAL_PREFIX)) png_base, thumbnail_base = save_figure_with_thumbnail(qual_fig, png_fn, dpi=dpi) qual_plots.append( Plot("{p}_{i}".format(i=i, p=PlotConstants.P_QUAL), os.path.relpath(png_base, output_dir), thumbnail=os.path.relpath(thumbnail_base, output_dir))) plot_groups.append( PlotGroup(PlotConstants.PG_QUAL, plots=qual_plots)) return plot_groups
def to_readlen_plotgroup(readlen_dist, output_dir): plot_name = get_plot_title( spec, Constants.PG_READLENGTH, Constants.P_READLENGTH) x_label = get_plot_xlabel( spec, Constants.PG_READLENGTH, Constants.P_READLENGTH) y_label = get_plot_ylabel( spec, Constants.PG_READLENGTH, Constants.P_READLENGTH) nbins = readlen_dist.numBins heights = readlen_dist.bins bin_width = readlen_dist.binWidth edges = [float(bn) * bin_width for bn in xrange(nbins)] edges, heights, bin_width = reshape(readlen_dist, edges, heights) fig, ax = get_fig_axes_lpr() if sum(readlen_dist.bins) > 0: ax.bar(edges, heights, color=get_green(0), edgecolor=get_green(0), width=(bin_width * 0.75)) ax.set_xlabel(x_label) ax.set_ylabel(y_label) ax.yaxis.set_major_locator(MaxNLocator(integer=True)) png_fn = os.path.join( output_dir, "{p}.png".format(p=Constants.P_READLENGTH)) png_base, thumbnail_base = save_figure_with_thumbnail(fig, png_fn, dpi=DEFAULT_DPI) readlen_plot = Plot(Constants.P_READLENGTH, os.path.relpath(png_base, output_dir), title=plot_name, caption=plot_name, thumbnail=os.path.relpath(thumbnail_base, output_dir)) plot_groups = [PlotGroup(Constants.PG_READLENGTH, plots=[readlen_plot])] return plot_groups
def _get_plot_view_configs(self): """ Any change to the 'raw' view of a report plot should be changed here. There's three histogram plots. 1. Subread concordance 2. Subread rendlength 3. Readlength """ _p = [ PlotViewProperties( Constants.P_SUBREAD_CONCORDANCE, Constants.PG_SUBREAD_CONCORDANCE, generate_plot, 'mapped_subread_concordance_histogram.png', xlabel=get_plot_xlabel(spec, Constants.PG_SUBREAD_CONCORDANCE, Constants.P_SUBREAD_CONCORDANCE), ylabel=get_plot_ylabel(spec, Constants.PG_SUBREAD_CONCORDANCE, Constants.P_SUBREAD_CONCORDANCE), color=get_green(3), edgecolor=get_green(2), use_group_thumb=True, plot_group_title=get_plot_title( spec, Constants.PG_SUBREAD_CONCORDANCE, Constants.P_SUBREAD_CONCORDANCE)), PlotViewProperties( Constants.P_SUBREAD_LENGTH, Constants.PG_SUBREAD_LENGTH, generate_plot, 'mapped_subreadlength_histogram.png', xlabel=get_plot_xlabel(spec, Constants.PG_SUBREAD_LENGTH, Constants.P_SUBREAD_LENGTH), ylabel=get_plot_ylabel(spec, Constants.PG_SUBREAD_LENGTH, Constants.P_SUBREAD_LENGTH), use_group_thumb=True, color=get_blue(3), edgecolor=get_blue(2), plot_group_title=get_plot_title(spec, Constants.PG_SUBREAD_LENGTH, Constants.P_SUBREAD_LENGTH)), PlotViewProperties( Constants.P_READLENGTH, Constants.PG_READLENGTH, generate_plot, 'mapped_readlength_histogram.png', xlabel=get_plot_xlabel(spec, Constants.PG_READLENGTH, Constants.P_READLENGTH), ylabel=get_plot_ylabel(spec, Constants.PG_READLENGTH, Constants.P_READLENGTH), color=get_blue(3), edgecolor=get_blue(2), use_group_thumb=True, plot_group_title=get_plot_title(spec, Constants.PG_READLENGTH, Constants.P_READLENGTH)), ] return {v.plot_id: v for v in _p}
def _plot_view_configs(): """ Any change to the 'raw' view of a report plot should be changed here. """ _p = [PlotViewProperties(Constants.P_PRE_FILTER_READ_LENGTH_HIST, Constants.PG_READ_LENGTH, custom_read_length_histogram, Constants.I_PRE_FILTER_READ_LENGTH, xlabel="ReadLength", ylabel="Reads", rlabel="bp > Read Length", title="Pre-Filter", color=get_green(3), edgecolor=get_green(2), plot_group_title="Polymerase Read Length"), PlotViewProperties(Constants.P_POST_FILTER_READ_LENGHT_HIST, Constants.PG_READ_LENGTH, custom_read_length_histogram, Constants.I_POST_FILTER_READ_LENGTH, xlabel="ReadLength", ylabel="Reads", rlabel="bp > Read Length", title="Post-Filter", color=get_green(3), edgecolor=get_green(2), use_group_thumb=True, plot_group_title="Polymerase Read Length"), PlotViewProperties(Constants.P_PRE_FILTER_READ_SCORE_HIST, Constants.PG_READ_SCORE, custom_read_accuracy_histogram, Constants.I_PRE_FILTER_READ_SCORE, xlabel="Read Quality", ylabel="Reads", rlabel="bp > Read Quality", title="Pre-Filter", color=get_blue(3), edgecolor=get_blue(2), plot_group_title="Polymerase Read Quality"), PlotViewProperties(Constants.P_POST_FILTER_READ_SCORE_HIST, Constants.PG_READ_SCORE, custom_read_accuracy_histogram, Constants.I_POST_FILTER_READ_SCORE, xlabel="Read Quality", ylabel="Reads", rlabel="bp > Read Quality", title="Post-Filter", color=get_blue(3), edgecolor=get_blue(2), use_group_thumb=True, plot_group_title="Polymerase Read Quality") ] # make it easier to access return {v.plot_id: v for v in _p}
def _get_plot_view_configs(self): """ Any change to the 'raw' view of a report plot should be changed here. There's three histogram plots. 1. Subread accuracy 2. Subread rendlength 3. Readlength """ _p = [ PlotViewProperties( Constants.P_SUBREAD_ACCURACY, Constants.PG_SUBREAD_ACCURACY, generate_plot, 'mapped_subread_accuracy_histogram.png', xlabel="Concordance", ylabel="Subreads", color=get_green(3), edgecolor=get_green(2), use_group_thumb=True, plot_group_title="Mapped Subread Accuracy"), PlotViewProperties( Constants.P_SUBREAD_LENGTH, Constants.PG_SUBREAD_LENGTH, generate_plot, 'mapped_subreadlength_histogram.png', xlabel="Subread Length", ylabel="Subreads", use_group_thumb=True, color=get_blue(3), edgecolor=get_blue(2), plot_group_title="Mapped Subread Length"), PlotViewProperties( Constants.P_READLENGTH, Constants.PG_READLENGTH, generate_plot, 'mapped_readlength_histogram.png', xlabel="Read Length", ylabel="Reads", color=get_blue(3), edgecolor=get_blue(2), use_group_thumb=True, plot_group_title="Mapped Polymerase Read Length") ] return {v.plot_id: v for v in _p}
def __init__(self, plot_id, plot_group_id, plot_func, image_name, title=None, xlabel=None, ylabel=None, rlabel=None, color=get_green(2), edgecolor=get_green(3), thumb=None, use_group_thumb=False, plot_group_title=None): """ FIXME This should be easier to use... Mixing the Plot and PlotGroup """ if not isinstance(plot_func, (types.FunctionType, functools.partial)): _d = dict(t=type(plot_func), f=plot_func) raise TypeError( "plot_func requies a function, Got type {t} for {f}".format( **_d)) # This plotting function must have the signature (aggregator, plot_view, output_dir) # and must return a fig, ax tuple self.plot_func = plot_func self.plot_id = plot_id self.plot_group_id = plot_group_id self.image_name = image_name self.title = title self.xlabel = xlabel self.ylabel = ylabel # right x axis label self.rlabel = rlabel self._thumb = thumb self.color = color self.edgecolor = edgecolor # Within a PlotGroup is this plot that will be used (True/False) self.use_group_thumb = use_group_thumb # Used for the plot group self.plot_group_title = plot_group_title
def __init__(self, plot_id, plot_group_id, plot_func, image_name, title=None, xlabel=None, ylabel=None, rlabel=None, color=get_green(2), edgecolor=get_green(3), thumb=None, use_group_thumb=False, plot_group_title=None): """ FIXME This should be easier to use... Mixing the Plot and PlotGroup """ if not isinstance(plot_func, (types.FunctionType, functools.partial)): _d = dict(t=type(plot_func), f=plot_func) raise TypeError( "plot_func requies a function, Got type {t} for {f}".format(**_d)) # This plotting function must have the signature (aggregator, plot_view, output_dir) # and must return a fig, ax tuple self.plot_func = plot_func self.plot_id = plot_id self.plot_group_id = plot_group_id self.image_name = image_name self.title = title self.xlabel = xlabel self.ylabel = ylabel # right x axis label self.rlabel = rlabel self._thumb = thumb self.color = color self.edgecolor = edgecolor # Within a PlotGroup is this plot that will be used (True/False) self.use_group_thumb = use_group_thumb # Used for the plot group self.plot_group_title = plot_group_title
def _create_bars(contig_variants): """ :param contig_variants: (ContigVariants) :returns: tuple of pbreports.plot.helper.Bar objects """ dataIns = np.array([l[1] for l in contig_variants.variants]) dataDels = np.array([l[2] for l in contig_variants.variants]) dataSnv = np.array([l[3] for l in contig_variants.variants]) insBarModel = PH.Bar(dataIns, 'Insertions', color=PH.get_blue(3)) delBarModel = PH.Bar(dataDels, 'Deletions', color=PH.get_green(3)) snvBarModel = PH.Bar(dataSnv, 'Substitutions', color=PH.get_orange()) return (insBarModel, delBarModel, snvBarModel)
def to_report(stats_xml, output_dir, dpi=72): #TODO: make dpi matter """Main point of entry :type stats_xml: str :type output_dir: str :type dpi: int :rtype: Report """ log.info("Analyzing XML {f}".format(f=stats_xml)) dset = DataSet(stats_xml) if not dset.metadata.summaryStats: dset.loadStats(stats_xml) if not dset.metadata.summaryStats.medianInsertDists: raise RuntimeError("No Pipeline Summary Stats (sts.xml) found") # Pull some stats: adapter_dimers = np.round( 100.0 * dset.metadata.summaryStats.adapterDimerFraction, decimals=2) short_inserts = np.round( 100.0 * dset.metadata.summaryStats.shortInsertFraction, decimals=2) plots = [] # Pull some histograms (may have dupes (unmergeable distributions)): for i, ins_len_dist in enumerate( dset.metadata.summaryStats.medianInsertDists): # make a bar chart: fig, ax = get_fig_axes_lpr() ax.bar(map(float, ins_len_dist.labels), ins_len_dist.bins, color=get_green(0), edgecolor=get_green(0), width=(ins_len_dist.binWidth * 0.75)) ax.set_xlabel('Median Distance Between Adapters') ax.set_ylabel('Reads') png_fn = os.path.join(output_dir, "interAdapterDist{i}.png".format(i=i)) png_base, thumbnail_base = save_figure_with_thumbnail(fig, png_fn, dpi=dpi) # build the report: plots.append(Plot("adapter_xml_plot_{i}".format(i=i), os.path.relpath(png_base), thumbnail=os.path.relpath(thumbnail_base))) plot_groups = [PlotGroup("adapter_xml_plot_group", title="Observed Insert Length Distribution", plots=plots, thumbnail=os.path.relpath(thumbnail_base))] columns = [Column("adaper_xml_conditions", None, ('Adapter Dimers (0-10bp)', 'Short Inserts (11-100bp)')), Column("adaper_xml_results", None, (adapter_dimers, short_inserts))] tables = [Table("adapter_xml_table", "Adapter Statistics", columns)] report = Report("adapter_xml_report", title="Adapter Report", tables=tables, attributes=None, plotgroups=plot_groups) return report
_custom_read_length_histogram, Constants.P_READLENGTH, (get_plot_xlabel(spec, Constants.PG_READLENGTH, Constants.P_READLENGTH), "Reads", "bp > Read Length"), 80, Constants.I_CCS_READ_LENGTH_HIST, get_blue(3), ) create_accuracy_plot = functools.partial( create_plot, _custom_read_accuracy_histogram, Constants.P_ACCURACY, (get_plot_xlabel(spec, Constants.PG_ACCURACY, Constants.P_ACCURACY), "Reads", "bp > Read Score"), 80, Constants.I_CCS_READ_ACCURACY_HIST, get_green(3), ) create_npasses_plot = functools.partial( create_plot, _make_histogram, Constants.P_NPASSES, ( get_plot_xlabel(spec, Constants.PG_NPASSES, Constants.P_NPASSES), get_plot_ylabel(spec, Constants.PG_NPASSES, Constants.P_NPASSES), ), 80, Constants.I_CCS_NUM_PASSES_HIST, "#F18B17", )
def to_report(filtered_csv, output_dir, dpi=72, thumb_dpi=20): """ Run Report """ validate_file(filtered_csv) validate_dir(output_dir) aggregators = { 'nbases': SumAggregator('length'), 'nreads': CountAggregator('length'), 'mean_subreadlength': MeanSubreadLengthAggregator('length'), 'max_readlength': MaxAggregator('length'), 'n50': N50Aggregator('length'), 'readlength_histogram': HistogramAggregator('length', 0, 100, nbins=10000), 'subread': SubreadLengthHistogram(dx=100) } passed_filter = lambda record: record.passed_filter is True passed_filter_func = functools.partial(_apply, [passed_filter], aggregators.values()) all_subread_aggregators = { 'raw_nreads': SumAggregator('length'), 'max_raw_readlength': MaxAggregator('length'), 'raw_readlength_histogram': HistogramAggregator('length', 0, 100, nbins=10000) } all_filter_func = functools.partial(_apply, [null_filter], all_subread_aggregators.values()) funcs = [passed_filter_func, all_filter_func] with open(filtered_csv, 'r') as f: # read in header header = f.readline() # validate_header(header) applyer(to_record, f, funcs) for aggregator in itertools.chain(aggregators.values(), all_subread_aggregators.values()): log.info(aggregator) # Check if any reads are found if all_subread_aggregators['raw_nreads'].attribute == 0: raise NoSubreadsFound( "No subreads found in {f}".format(f=filtered_csv)) # Now check if aggregators['nreads'].attribute == 0: msg = "No subreads passed the filter in {f}.".format(f=filtered_csv) raise NoSubreadsPassedFilter(msg) # this is where you change the plotting options plot_view = PlotViewProperties( Constants.P_POST_FILTER, Constants.PG_SUBREAD_LENGTH, custom_subread_length_histogram, Constants.I_FILTER_SUBREADS_HIST, xlabel=meta_rpt.get_meta_plotgroup( Constants.PG_SUBREAD_LENGTH).get_meta_plot( Constants.P_POST_FILTER).xlabel, ylabel=meta_rpt.get_meta_plotgroup( Constants.PG_SUBREAD_LENGTH).get_meta_plot( Constants.P_POST_FILTER).ylabel["L"], rlabel=meta_rpt.get_meta_plotgroup( Constants.PG_SUBREAD_LENGTH).get_meta_plot( Constants.P_POST_FILTER).ylabel["R"], thumb="filtered_subread_report_thmb.png", use_group_thumb=True, plot_group_title="", color=get_green(3), edgecolor=get_green(2)) view_config_d = {'subread': plot_view} id_aggregators = {'subread': aggregators['subread']} plot_groups = to_plot_groups(view_config_d, output_dir, id_aggregators) to_a = lambda n: aggregators[n].attribute attributes = _to_attributes(to_a('nreads'), to_a('nbases'), to_a('mean_subreadlength'), to_a('n50')) report = Report(Constants.R_ID, title="Subread filtering", plotgroups=plot_groups, attributes=attributes) log.debug(str(report)) return meta_rpt.apply_view(report)
return plot # These functions create signatures (datum, axis_labels, nbins, barcolor _custom_read_length_histogram = functools.partial( _custom_histogram_with_cdf, "Mb > Read Length", 1000000) _custom_read_accuracy_histogram = functools.partial( _custom_histogram_with_cdf, "Mb > Predicted Accuracy", 1000000) # These functions need to generate a function with signature (datum, # output_dir, dpi=) create_readlength_plot = functools.partial(__create_plot, _custom_read_length_histogram, Constants.P_READLENGTH, ("Read Length", "Reads", "bp > Read Length"), 80, Constants.I_CCS_READ_LENGTH_HIST, get_blue(3)) create_accuracy_plot = functools.partial(__create_plot, _custom_read_accuracy_histogram, Constants.P_ACCURACY, ("Quality", "Reads", "bp > Predicted Accuracy"), 80, Constants.I_CCS_READ_ACCURACY_HIST, get_green(3)) create_npasses_plot = functools.partial(__create_plot, _make_histogram, Constants.P_NPASSES, ("Number of Passes", "Reads"), 80, Constants.I_CCS_NUM_PASSES_HIST, "#F18B17") create_scatter_plot = functools.partial(__create_plot, scatter_plot_accuracy_vs_numpasses, Constants.P_SCATTER, ("Number of passes", "Predicted accuracy (Phred QV)"), None, Constants.I_CCS_SCATTER_PLOT, get_blue(3)) def to_report(ccs_subread_set, output_dir): bam_files = list(ccs_subread_set.toExternalFiles()) log.info("Generating report from files: {f}".format(f=bam_files)) movie_results = []
def _to_read_stats_plots(PlotConstants, title, readLenDists, readQualDists, output_dir, dpi=72, lenDistShaper=None): length_plots = [] # ReadLen distribution to barplot: if lenDistShaper is None: lenDistShaper = continuous_dist_shaper(readLenDists, trim_excess=True) for i, orig_rlendist in enumerate(readLenDists): rlendist = lenDistShaper(orig_rlendist) assert sum(orig_rlendist.bins) == sum(rlendist.bins) len_fig, len_axes = get_fig_axes_lpr() len_axes.bar(rlendist.labels, rlendist.bins, color=get_green(0), edgecolor=get_green(0), width=(rlendist.binWidth * 0.75)) len_axes.set_xlabel( meta_rpt.get_meta_plotgroup(PlotConstants.PG_LENGTH).get_meta_plot( PlotConstants.P_LENGTH).xlabel) len_axes.set_ylabel( meta_rpt.get_meta_plotgroup(PlotConstants.PG_LENGTH).get_meta_plot( PlotConstants.P_LENGTH).ylabel) png_fn = os.path.join( output_dir, "{p}{i}.png".format(i=i, p=PlotConstants.P_LENGTH_PREFIX)) png_base, thumbnail_base = save_figure_with_thumbnail(len_fig, png_fn, dpi=dpi) length_plots.append( Plot("{p}_{i}".format(i=i, p=PlotConstants.P_LENGTH), os.path.relpath(png_base, output_dir), thumbnail=os.path.relpath(thumbnail_base, output_dir))) plot_groups = [ PlotGroup(PlotConstants.PG_LENGTH, plots=length_plots, thumbnail=os.path.relpath(thumbnail_base, output_dir)) ] return plot_groups # FIXME these aren't useful yet qual_plots = [] # ReadQual distribution to barplot: shaper = continuous_dist_shaper(readQualDists, trim_excess=True) for i, orig_rqualdist in enumerate(readQualDists): rqualdist = shaper(orig_rqualdist) qual_fig, qual_axes = get_fig_axes_lpr() qual_axes.bar(rqualdist.labels, rqualdist.bins, color=get_green(0), edgecolor=get_green(0), width=(rqualdist.binWidth * 0.75)) qual_axes.set_xlabel( meta_rpt.get_meta_plotgroup(PlotConstants.PG_LENGTH).get_meta_plot( PlotConstants.P_LENGTH).xlabel) qual_axes.set_ylabel( meta_rpt.get_meta_plotgroup(PlotConstants.PG_QUAL).get_meta_plot( PlotConstants.P_QUAL).ylabel) png_fn = os.path.join( output_dir, "{p}{i}.png".format(i=i, p=PlotConstants.P_QUAL_PREFIX)) png_base, thumbnail_base = save_figure_with_thumbnail(qual_fig, png_fn, dpi=dpi) qual_plots.append( Plot("{p}_{i}".format(i=i, p=PlotConstants.P_QUAL), os.path.relpath(png_base, output_dir), thumbnail=os.path.relpath(thumbnail_base, output_dir))) plot_groups.append(PlotGroup(PlotConstants.PG_QUAL, plots=qual_plots)) return plot_groups
def to_report(filtered_csv, output_dir, dpi=72, thumb_dpi=20): """ Run Report """ validate_file(filtered_csv) validate_dir(output_dir) aggregators = {'nbases': SumAggregator('length'), 'nreads': CountAggregator('length'), 'mean_subreadlength': MeanSubreadLengthAggregator('length'), 'max_readlength': MaxAggregator('length'), 'n50': N50Aggregator('length'), 'readlength_histogram': HistogramAggregator('length', 0, 100, nbins=10000), 'subread': SubreadLengthHistogram(dx=100)} passed_filter = lambda record: record.passed_filter is True passed_filter_func = functools.partial( _apply, [passed_filter], aggregators.values()) all_subread_aggregators = {'raw_nreads': SumAggregator('length'), 'max_raw_readlength': MaxAggregator('length'), 'raw_readlength_histogram': HistogramAggregator('length', 0, 100, nbins=10000)} all_filter_func = functools.partial( _apply, [null_filter], all_subread_aggregators.values()) funcs = [passed_filter_func, all_filter_func] with open(filtered_csv, 'r') as f: # read in header header = f.readline() # validate_header(header) applyer(to_record, f, funcs) for aggregator in itertools.chain(aggregators.values(), all_subread_aggregators.values()): log.info(aggregator) # Check if any reads are found if all_subread_aggregators['raw_nreads'].attribute == 0: raise NoSubreadsFound( "No subreads found in {f}".format(f=filtered_csv)) # Now check if aggregators['nreads'].attribute == 0: msg = "No subreads passed the filter in {f}.".format(f=filtered_csv) raise NoSubreadsPassedFilter(msg) # this is where you change the plotting options plot_view = PlotViewProperties(Constants.P_POST_FILTER, Constants.PG_SUBREAD_LENGTH, custom_subread_length_histogram, Constants.I_FILTER_SUBREADS_HIST, xlabel=meta_rpt.get_meta_plotgroup(Constants.PG_SUBREAD_LENGTH).get_meta_plot(Constants.P_POST_FILTER).xlabel, ylabel=meta_rpt.get_meta_plotgroup(Constants.PG_SUBREAD_LENGTH).get_meta_plot(Constants.P_POST_FILTER).ylabel["L"], rlabel=meta_rpt.get_meta_plotgroup(Constants.PG_SUBREAD_LENGTH).get_meta_plot(Constants.P_POST_FILTER).ylabel["R"], thumb="filtered_subread_report_thmb.png", use_group_thumb=True, plot_group_title="", color=get_green(3), edgecolor=get_green(2)) view_config_d = {'subread': plot_view} id_aggregators = {'subread': aggregators['subread']} plot_groups = to_plot_groups(view_config_d, output_dir, id_aggregators) to_a = lambda n: aggregators[n].attribute attributes = _to_attributes(to_a('nreads'), to_a('nbases'), to_a('mean_subreadlength'), to_a('n50')) report = Report(Constants.R_ID, title="Subread filtering", plotgroups=plot_groups, attributes=attributes) log.debug(str(report)) return meta_rpt.apply_view(report)
def to_report(stats_xml, output_dir, dpi=72): """Main point of entry :type stats_xml: str :type output_dir: str :type dpi: int :rtype: Report """ log.info("Analyzing XML {f}".format(f=stats_xml)) # stats_xml should be a dataset: dset = DataSet(stats_xml) dataset_uuids = [dset.uuid] # but if it isn't, no problem: if not dset.metadata.summaryStats: dset.loadStats(stats_xml) # an sts file was provided which will generate a new random uuid dataset_uuids = [] if not dset.metadata.summaryStats.readLenDists: raise RuntimeError("No Pipeline Summary Stats (sts.xml) found") # Build the stats table: nbases = 0 nreads = 0 n50 = 0 readscoretotal = 0 readscorenumber = 0 approx_read_lens = [] # if a merge failed there may be more than one dist: for rlendist in dset.metadata.summaryStats.readLenDists: nbases += _total_from_bins(rlendist.bins, rlendist.minBinValue, rlendist.binWidth) nreads += sum(rlendist.bins) # N50: for i, lbin in enumerate(rlendist.bins): # use the average, except for the last bin if i != len(rlendist.bins) - 1: value = ((i * rlendist.binWidth) + rlendist.minBinValue + rlendist.binWidth / 2) # for the last bin, just use the value else: value = (i * rlendist.binWidth) + rlendist.minBinValue approx_read_lens.extend([value] * lbin) # TODO(mdsmith)(2016-02-09) make sure maxOutlierValue is updated # during a merge /todo # but pop off that last value and replace it with the # maxOutlierValue: # approx_read_lens.pop() # approx_read_lens.append(rlendist.maxBinValue) n50 = np.round(compute_n50(approx_read_lens)) for rqualdist in dset.metadata.summaryStats.readQualDists: readscoretotal += _total_from_bins(rqualdist.bins, rqualdist.minBinValue, rqualdist.binWidth) readscorenumber += sum(rqualdist.bins) readlen = 0 if nreads != 0: readlen = np.round(nbases / nreads, decimals=2) readQuality = 0 if readscorenumber != 0: readQuality = np.round(readscoretotal / readscorenumber, decimals=2) row_names = ["Polymerase Read Bases", "Polymerase Reads", "Polymerase Read N50", "Polymerase Read Length", "Polymerase Read Quality"] _pre_filter = [np.round(nbases, decimals=2), nreads, n50, readlen, readQuality] plots = [] # ReadLen distribution to barplot: for i, rlendist in enumerate(dset.metadata.summaryStats.readLenDists): len_fig, len_axes = get_fig_axes_lpr() len_axes.bar(rlendist.labels, rlendist.bins, color=get_green(0), edgecolor=get_green(0), width=(rlendist.binWidth * 0.75)) len_axes.set_xlabel('Read Length') len_axes.set_ylabel('Reads') png_fn = os.path.join(output_dir, "readLenDist{i}.png".format(i=i)) png_base, thumbnail_base = save_figure_with_thumbnail(len_fig, png_fn, dpi=dpi) plots.append(Plot("filter_len_xml_plot_{i}".format(i=i), os.path.relpath(png_base, output_dir), thumbnail=os.path.relpath(thumbnail_base, output_dir))) plot_groups = [PlotGroup("filter_len_xml_plot_group", title="Polymerase Read Length", plots=plots, thumbnail=os.path.relpath(thumbnail_base, output_dir))] plots = [] # ReadQual distribution to barplot: for i, rqualdist in enumerate(dset.metadata.summaryStats.readQualDists): qual_fig, qual_axes = get_fig_axes_lpr() qual_axes.bar(rqualdist.labels, rqualdist.bins, color=get_green(0), edgecolor=get_green(0), width=(rqualdist.binWidth * 0.75)) qual_axes.set_xlabel('Read Quality') qual_axes.set_ylabel('Reads') png_fn = os.path.join(output_dir, "readQualDist{i}.png".format(i=i)) png_base, thumbnail_base = save_figure_with_thumbnail(qual_fig, png_fn, dpi=dpi) plots.append(Plot("filter_qual_xml_plot_{i}".format(i=i), os.path.relpath(png_base, output_dir), thumbnail=os.path.relpath(thumbnail_base, output_dir))) plot_groups.append(PlotGroup("filter_qual_xml_plot_group", title="Polymerase Read Quality", plots=plots)) # build the report: columns = [Column("filter_names_column", header="Metrics", values=row_names)] columns.append(Column("filter_stats_column", header="Values", values=_pre_filter)) tables = [Table("filter_xml_table", "Filtering Statistics", columns)] report = Report("filtering_stats_xml_report", title="Filtering stats XML report", tables=tables, attributes=None, plotgroups=plot_groups, dataset_uuids=dataset_uuids) return report
return plot # These functions create signatures (data, axis_labels, nbins, barcolor _custom_read_length_histogram = functools.partial( _custom_histogram_with_cdf, "Mb > Read Length", 1000000) _custom_read_accuracy_histogram = functools.partial( _custom_histogram_with_cdf, "Mb > Read Score", 1000000) # These functions need to generate a function with signature (data, # output_dir, dpi=) create_readlength_plot = functools.partial(create_plot, _custom_read_length_histogram, Constants.P_READLENGTH, ("Read Length", "Reads", "bp > Read Length"), 80, Constants.I_CCS_READ_LENGTH_HIST, get_blue(3)) create_accuracy_plot = functools.partial(create_plot, _custom_read_accuracy_histogram, Constants.P_ACCURACY, ("Quality", "Reads", "bp > Read Score"), 80, Constants.I_CCS_READ_ACCURACY_HIST, get_green(3)) create_npasses_plot = functools.partial(create_plot, _make_histogram, Constants.P_NPASSES, ("Number of Passes", "Reads"), 80, Constants.I_CCS_NUM_PASSES_HIST, "#F18B17") create_scatter_plot = functools.partial(create_plot, scatter_plot_accuracy_vs_numpasses, Constants.P_SCATTER, ("Number of passes", "Read Score as Phred QV"), None, Constants.I_CCS_SCATTER_PLOT, get_blue(3)) def to_report(ccs_subread_set, output_dir): bam_files = list(ccs_subread_set.toExternalFiles()) log.info("Generating report from files: {f}".format(f=bam_files)) movie_results = []
def to_report(stats_xml, output_dir, dpi=72): # TODO: make dpi matter """Main point of entry :type stats_xml: str :type output_dir: str :type dpi: int :rtype: Report """ log.info("Analyzing XML {f}".format(f=stats_xml)) dset = SubreadSet(stats_xml) if not dset.metadata.summaryStats: dset.loadStats(stats_xml) if not dset.metadata.summaryStats.medianInsertDists: raise IOError("Pipeline Summary Stats (sts.xml) not found or missing " "key distributions") # Pull some stats: adapter_dimers = np.round( 100.0 * dset.metadata.summaryStats.adapterDimerFraction, decimals=2) short_inserts = np.round( 100.0 * dset.metadata.summaryStats.shortInsertFraction, decimals=2) plots = [] # Pull some histograms (may have dupes (unmergeable distributions)): shaper = continuous_dist_shaper(dset.metadata.summaryStats.medianInsertDists) for i, orig_ins_len_dist in enumerate( dset.metadata.summaryStats.medianInsertDists): ins_len_dist = shaper(orig_ins_len_dist) # make a bar chart: fig, ax = get_fig_axes_lpr() ax.bar(map(float, ins_len_dist.labels), ins_len_dist.bins, color=get_green(0), edgecolor=get_green(0), width=(ins_len_dist.binWidth * 0.75)) ax.set_xlabel(meta_rpt.get_meta_plotgroup(Constants.PG_ADAPTER).get_meta_plot(Constants.P_ADAPTER).xlabel) ax.set_ylabel(meta_rpt.get_meta_plotgroup(Constants.PG_ADAPTER).get_meta_plot(Constants.P_ADAPTER).ylabel) png_fn = os.path.join(output_dir, "interAdapterDist{i}.png".format(i=i)) png_base, thumbnail_base = save_figure_with_thumbnail(fig, png_fn, dpi=dpi) # build the report: plots.append(Plot("adapter_xml_plot_{i}".format(i=i), os.path.relpath(png_base, output_dir), thumbnail=os.path.relpath(thumbnail_base, output_dir))) plot_groups = [PlotGroup(Constants.PG_ADAPTER, plots=plots, thumbnail=os.path.relpath(thumbnail_base, output_dir))] attributes = [Attribute(i, v) for i,v in zip([Constants.A_DIMERS, Constants.A_SHORT_INSERTS], [adapter_dimers, short_inserts])] tables = [] report = Report(meta_rpt.id, title=meta_rpt.title, attributes=attributes, tables=tables, )#plotgroups=plot_groups) return meta_rpt.apply_view(report)
"Mb > Read Score", sys.maxint) # These functions need to generate a function with signature (data, # output_dir, dpi=) create_readlength_plot = functools.partial( create_plot, _custom_read_length_histogram, Constants.P_READLENGTH, (get_plot_xlabel(spec, Constants.PG_READLENGTH, Constants.P_READLENGTH), "Reads", "bp > Read Length"), 50, Constants.I_CCS_READ_LENGTH_HIST, get_blue(3)) create_accuracy_plot = functools.partial( create_plot, _custom_read_accuracy_histogram, Constants.P_ACCURACY, (get_plot_xlabel(spec, Constants.PG_ACCURACY, Constants.P_ACCURACY), "Reads", "reads > Read Score"), 100, Constants.I_CCS_READ_ACCURACY_HIST, get_green(3)) create_npasses_plot = functools.partial( create_plot, make_histogram, Constants.P_NPASSES, (get_plot_xlabel(spec, Constants.PG_NPASSES, Constants.P_NPASSES), get_plot_ylabel(spec, Constants.PG_NPASSES, Constants.P_NPASSES)), 80, Constants.I_CCS_NUM_PASSES_HIST, "#F18B17") create_scatter_plot = functools.partial( create_plot, scatter_plot_accuracy_vs_numpasses, Constants.P_SCATTER, (get_plot_xlabel(spec, Constants.PG_SCATTER, Constants.P_SCATTER), get_plot_ylabel(spec, Constants.PG_SCATTER, Constants.P_SCATTER)), None, Constants.I_CCS_SCATTER_PLOT, get_blue(3)) def to_report(ccs_set, output_dir):
_custom_histogram_with_cdf, "Mb > Read Score", 1000000) # These functions need to generate a function with signature (data, # output_dir, dpi=) create_readlength_plot = functools.partial( create_plot, _custom_read_length_histogram, Constants.P_READLENGTH, (get_plot_xlabel(spec, Constants.PG_READLENGTH, Constants.P_READLENGTH), "Reads", "bp > Read Length"), 80, Constants.I_CCS_READ_LENGTH_HIST, get_blue(3)) create_accuracy_plot = functools.partial( create_plot, _custom_read_accuracy_histogram, Constants.P_ACCURACY, (get_plot_xlabel(spec, Constants.PG_ACCURACY, Constants.P_ACCURACY), "Reads", "bp > Read Score"), 80, Constants.I_CCS_READ_ACCURACY_HIST, get_green(3)) create_npasses_plot = functools.partial( create_plot, _make_histogram, Constants.P_NPASSES, (get_plot_xlabel(spec, Constants.PG_NPASSES, Constants.P_NPASSES), get_plot_ylabel(spec, Constants.PG_NPASSES, Constants.P_NPASSES)), 80, Constants.I_CCS_NUM_PASSES_HIST, "#F18B17") create_scatter_plot = functools.partial( create_plot, scatter_plot_accuracy_vs_numpasses, Constants.P_SCATTER, (get_plot_xlabel(spec, Constants.PG_SCATTER, Constants.P_SCATTER), get_plot_ylabel(spec, Constants.PG_SCATTER, Constants.P_SCATTER)), None, Constants.I_CCS_SCATTER_PLOT, get_blue(3)) def to_report(ccs_set, output_dir):
def to_report_impl(dset, output_dir, dpi=DEFAULT_DPI): if not dset.metadata.summaryStats.medianInsertDists: raise InvalidStatsError("Pipeline Summary Stats (sts.xml) not found " "or missing key distributions") # Pull some stats: adapter_dimers = np.round(100.0 * dset.metadata.summaryStats.adapterDimerFraction, decimals=2) short_inserts = np.round(100.0 * dset.metadata.summaryStats.shortInsertFraction, decimals=2) attributes = [ Attribute(i, v) for i, v in zip([Constants.A_DIMERS, Constants.A_SHORT_INSERTS], [adapter_dimers, short_inserts]) ] if Constants.BASE_RATE_DIST in dset.metadata.summaryStats.tags: dist = dset.metadata.summaryStats[Constants.BASE_RATE_DIST] if len(dist) > 1: log.warn("Dataset was merged, local base rate not applicable") else: base_rate = dist[0].sampleMed attributes.append(Attribute(Constants.A_BASE_RATE, base_rate)) else: log.warn("No local base rate distribution available") plots = [] # Pull some histograms (may have dupes (unmergeable distributions)): shaper = continuous_dist_shaper( dset.metadata.summaryStats.medianInsertDists) for i, orig_ins_len_dist in enumerate( dset.metadata.summaryStats.medianInsertDists): ins_len_dist = shaper(orig_ins_len_dist) # make a bar chart: fig, ax = get_fig_axes_lpr() ax.bar(map(float, ins_len_dist.labels), ins_len_dist.bins, color=get_green(0), edgecolor=get_green(0), width=(ins_len_dist.binWidth * 0.75)) ax.set_xlabel( get_plot_xlabel(spec, Constants.PG_ADAPTER, Constants.P_ADAPTER)) ax.set_ylabel( get_plot_ylabel(spec, Constants.PG_ADAPTER, Constants.P_ADAPTER)) png_fn = os.path.join(output_dir, "interAdapterDist{i}.png".format(i=i)) png_base, thumbnail_base = save_figure_with_thumbnail(fig, png_fn, dpi=dpi) # build the report: plots.append( Plot("adapter_xml_plot_{i}".format(i=i), os.path.relpath(png_base, output_dir), thumbnail=os.path.relpath(thumbnail_base, output_dir))) plot_groups = [ PlotGroup(Constants.PG_ADAPTER, plots=plots, thumbnail=os.path.relpath(thumbnail_base, output_dir)) ] tables = [] report = Report( Constants.R_ID, attributes=attributes, tables=tables, ) # plotgroups=plot_groups) return spec.apply_view(report)