def to_reports(subreads, output_dir): output_files = [] log.info("Loading {f}".format(f=subreads)) ds = SubreadSet(subreads) ds.loadStats() for base, module in [("filter_stats_xml", filter_stats_xml), ("adapter_xml", adapter_xml), ("loading_xml", loading_xml), ("control", control)]: constants = getattr(module, "Constants") task_id = constants.TOOL_ID to_report = getattr(module, "to_report_impl") try: rpt_output_dir = os.path.join(output_dir, base) os.mkdir(rpt_output_dir) file_name = os.path.join(rpt_output_dir, "{b}.json".format(b=base)) report = to_report(ds, rpt_output_dir) log.info("Writing {f}".format(f=file_name)) report.write_json(file_name) output_files.append(DataStoreFile( uuid=report.uuid, source_id=task_id, type_id=FileTypes.REPORT.file_type_id, path=file_name, is_chunked=False, name=base)) except InvalidStatsError as e: log.error("This dataset lacks some required statistics") log.error("Skipping generation of {b} report".format(b=base)) datastore = DataStore(output_files) return datastore
def to_report(stats_xml, output_dir, dpi=72): """Main point of entry :type stats_xml: str :type output_dir: str :type dpi: int :rtype: Report """ log.info("Analyzing XML {f}".format(f=stats_xml)) # stats_xml should be a dataset: dset = SubreadSet(stats_xml) dataset_uuids = [dset.uuid] # but if it isn't, no problem: if not dset.metadata.summaryStats: dset.loadStats(stats_xml) # an sts file was provided which will generate a new random uuid dataset_uuids = [] if not dset.metadata.summaryStats.readLenDists: raise IOError("Pipeline Summary Stats (sts.xml) not found or missing " "key distributions") # we want all of the length distributions in this report to look the same, # so we make the shaper here and pass it around: alldists = (dset.metadata.summaryStats.readLenDists[:] + dset.metadata.summaryStats.insertReadLenDists[:]) len_dist_shaper = continuous_dist_shaper(alldists, trim_excess=True) attr = to_read_stats_attributes( readLenDists=dset.metadata.summaryStats.readLenDists, readQualDists=dset.metadata.summaryStats.readQualDists) attr.extend( to_insert_stats_attributes( readLenDists=dset.metadata.summaryStats.insertReadLenDists, readQualDists=dset.metadata.summaryStats.insertReadQualDists)) plot_groups = to_read_stats_plots( readLenDists=dset.metadata.summaryStats.readLenDists, readQualDists=dset.metadata.summaryStats.readQualDists, output_dir=output_dir, lenDistShaper=len_dist_shaper) plot_groups.extend( to_insert_stats_plots( readLenDists=dset.metadata.summaryStats.insertReadLenDists, readQualDists=dset.metadata.summaryStats.insertReadQualDists, output_dir=output_dir, lenDistShaper=len_dist_shaper)) # build the report: report = Report(meta_rpt.id, title=meta_rpt.title, attributes=attr, plotgroups=plot_groups, dataset_uuids=dataset_uuids) return meta_rpt.apply_view(report)
def to_report(stats_xml, output_dir): log.info("Starting {f} v{v}".format(f=os.path.basename(__file__), v=__version__)) log.info("Analyzing XML {f}".format(f=stats_xml)) dset = SubreadSet(stats_xml) dset.loadStats() if stats_xml.endswith(".sts.xml"): dset.loadStats(stats_xml) return to_report_impl(dset, output_dir)
def to_report(stats_xml, output_dir, dpi=72): """Main point of entry :type stats_xml: str :type output_dir: str :type dpi: int :rtype: Report """ log.info("Analyzing XML {f}".format(f=stats_xml)) # stats_xml should be a dataset: dset = SubreadSet(stats_xml) dataset_uuids = [dset.uuid] # but if it isn't, no problem: if not dset.metadata.summaryStats: dset.loadStats(stats_xml) # an sts file was provided which will generate a new random uuid dataset_uuids = [] if not dset.metadata.summaryStats.readLenDists: raise IOError("Pipeline Summary Stats (sts.xml) not found or missing " "key distributions") # we want all of the length distributions in this report to look the same, # so we make the shaper here and pass it around: alldists = (dset.metadata.summaryStats.readLenDists[:] + dset.metadata.summaryStats.insertReadLenDists[:]) len_dist_shaper = continuous_dist_shaper(alldists, trim_excess=True) attr = to_read_stats_attributes( readLenDists=dset.metadata.summaryStats.readLenDists, readQualDists=dset.metadata.summaryStats.readQualDists) attr.extend(to_insert_stats_attributes( readLenDists=dset.metadata.summaryStats.insertReadLenDists, readQualDists=dset.metadata.summaryStats.insertReadQualDists)) plot_groups = to_read_stats_plots( readLenDists=dset.metadata.summaryStats.readLenDists, readQualDists=dset.metadata.summaryStats.readQualDists, output_dir=output_dir, lenDistShaper=len_dist_shaper) plot_groups.extend(to_insert_stats_plots( readLenDists=dset.metadata.summaryStats.insertReadLenDists, readQualDists=dset.metadata.summaryStats.insertReadQualDists, output_dir=output_dir, lenDistShaper=len_dist_shaper)) # build the report: report = Report(meta_rpt.id, title=meta_rpt.title, attributes=attr, plotgroups=plot_groups, dataset_uuids=dataset_uuids) return meta_rpt.apply_view(report)
def to_report(stats_xml, output_dir): """Main point of entry :type stats_xml: str :type output_dir: str :type dpi: int :rtype: Report """ log.info("Analyzing XML {f}".format(f=stats_xml)) dset = SubreadSet(stats_xml) if stats_xml.endswith(".sts.xml"): dset.loadStats(stats_xml) dset.loadStats() return to_report_impl(dset, output_dir)
def to_report(stats_xml, output_dir, dpi=DEFAULT_DPI): # TODO: make dpi matter """Main point of entry :type stats_xml: str :type output_dir: str :type dpi: int :rtype: Report """ log.info("Analyzing XML {f}".format(f=stats_xml)) dset = SubreadSet(stats_xml) if not dset.metadata.summaryStats: dset.loadStats(stats_xml) return to_report_impl(dset, output_dir, dpi)
def to_report(stats_xml, output_dir, dpi=72): """Main point of entry :type stats_xml: str :type output_dir: str :type dpi: int :rtype: Report """ log.info("Analyzing XML {f}".format(f=stats_xml)) # stats_xml should be a dataset: dset = SubreadSet(stats_xml) dataset_uuids = [dset.uuid] # but if it isn't, no problem: if not dset.metadata.summaryStats: dset.loadStats(stats_xml) # an sts file was provided which will generate a new random uuid dataset_uuids = [] if not dset.metadata.summaryStats.readLenDists: raise IOError("Pipeline Summary Stats (sts.xml) not found or missing " "key distributions") attr = to_read_stats_attributes( readLenDists=dset.metadata.summaryStats.readLenDists, readQualDists=dset.metadata.summaryStats.readQualDists) attr.extend(to_insert_stats_attributes( readLenDists=dset.metadata.summaryStats.insertReadLenDists, readQualDists=dset.metadata.summaryStats.insertReadQualDists)) plot_groups = to_read_stats_plots( readLenDists=dset.metadata.summaryStats.readLenDists, readQualDists=dset.metadata.summaryStats.readQualDists, output_dir=output_dir) plot_groups.extend(to_insert_stats_plots( readLenDists=dset.metadata.summaryStats.insertReadLenDists, readQualDists=dset.metadata.summaryStats.insertReadQualDists, output_dir=output_dir)) # build the report: report = Report("raw_data_report", title="Raw Data Report", attributes=attr, plotgroups=plot_groups, dataset_uuids=dataset_uuids) return report
def test_reports_with_fixed_bins(self): # TODO readQualDists are currently unpopulated, turn back on when # they're repopulated # for dist_name, nbins in zip(['medianInsertDists', 'readLenDists', # 'readQualDists'], [200, 200, 50]): for dist_name, nbins in zip(['medianInsertDists', 'readLenDists'], [200, 200]): ss = SubreadSet() ss.loadStats(get_fixed_bin_sts()) ss2 = SubreadSet() ss2.loadStats(get_fixed_bin_sts()) # shift ss2 mdist = getattr(ss2.metadata.summaryStats, dist_name)[0].bins mdist = [0, 0, 0] + mdist[:-3] getattr(ss2.metadata.summaryStats, dist_name)[0].bins = mdist ss3 = ss + ss2 ss4 = SubreadSet() ss4.loadStats(get_fixed_bin_sts()) # shift ss4 mdist = getattr(ss4.metadata.summaryStats, dist_name)[0].bins mdist = [0 for _ in mdist] getattr(ss4.metadata.summaryStats, dist_name)[0].bins = mdist dists = getattr(ss4.metadata.summaryStats, dist_name) self.assertEqual(len(dists), 1) for n in [0, 1, 2, 10, 40, 41, 49, 50, 51, 200, 500]: ds = continuous_dist_shaper(dists, nbins=n) fixed_dists = [ds(dist) for dist in dists] self.assertEqual(len(dists[0].bins), nbins) self.assertEqual(len(fixed_dists[0].bins), nbins) self.assertEqual(sum(dists[0].bins), sum(fixed_dists[0].bins)) sss = [ss, ss2, ss3] for sset in sss: dists = getattr(sset.metadata.summaryStats, dist_name) self.assertEqual(len(dists), 1) # 0, requested nbins > numBins fails back to no-op ops = [1, 2, 3, 4, 7, 10, 40, 41, 49, 50, 51, 200, 500] no_ops = [0] for n in no_ops: ds = continuous_dist_shaper(dists, nbins=n) fixed_dists = [ds(dist) for dist in dists] self.assertEqual(len(dists[0].bins), nbins) self.assertEqual(len(fixed_dists[0].bins), nbins) self.assertEqual(sum(dists[0].bins), sum(fixed_dists[0].bins)) for n in ops: ds = continuous_dist_shaper(dists, nbins=n) fixed_dists = [ds(dist) for dist in dists] self.assertEqual(len(dists[0].bins), nbins) self.assertEqual(len(fixed_dists[0].bins), n) self.assertEqual(sum(dists[0].bins), sum(fixed_dists[0].bins))
def test_reports_with_fixed_bins(self): # TODO readQualDists are currently unpopulated, turn back on when # they're repopulated # for dist_name, nbins in zip(['medianInsertDists', 'readLenDists', # 'readQualDists'], [200, 200, 50]): for dist_name, nbins in zip(["medianInsertDists", "readLenDists"], [200, 200]): ss = SubreadSet() ss.loadStats(get_fixed_bin_sts()) ss2 = SubreadSet() ss2.loadStats(get_fixed_bin_sts()) # shift ss2 mdist = getattr(ss2.metadata.summaryStats, dist_name)[0].bins mdist = [0, 0, 0] + mdist[:-3] getattr(ss2.metadata.summaryStats, dist_name)[0].bins = mdist ss3 = ss + ss2 ss4 = SubreadSet() ss4.loadStats(get_fixed_bin_sts()) # shift ss4 mdist = getattr(ss4.metadata.summaryStats, dist_name)[0].bins mdist = [0 for _ in mdist] getattr(ss4.metadata.summaryStats, dist_name)[0].bins = mdist dists = getattr(ss4.metadata.summaryStats, dist_name) self.assertEqual(len(dists), 1) for n in [0, 1, 2, 10, 40, 41, 49, 50, 51, 200, 500]: ds = continuous_dist_shaper(dists, nbins=n) fixed_dists = [ds(dist) for dist in dists] self.assertEqual(len(dists[0].bins), nbins) self.assertEqual(len(fixed_dists[0].bins), nbins) self.assertEqual(sum(dists[0].bins), sum(fixed_dists[0].bins)) sss = [ss, ss2, ss3] for sset in sss: dists = getattr(sset.metadata.summaryStats, dist_name) self.assertEqual(len(dists), 1) # 0, requested nbins > numBins fails back to no-op ops = [1, 2, 3, 4, 7, 10, 40, 41, 49, 50, 51, 200, 500] no_ops = [0] for n in no_ops: ds = continuous_dist_shaper(dists, nbins=n) fixed_dists = [ds(dist) for dist in dists] self.assertEqual(len(dists[0].bins), nbins) self.assertEqual(len(fixed_dists[0].bins), nbins) self.assertEqual(sum(dists[0].bins), sum(fixed_dists[0].bins)) for n in ops: ds = continuous_dist_shaper(dists, nbins=n) fixed_dists = [ds(dist) for dist in dists] self.assertEqual(len(dists[0].bins), nbins) self.assertEqual(len(fixed_dists[0].bins), n) self.assertEqual(sum(dists[0].bins), sum(fixed_dists[0].bins))
def test_stats_metadata(self): ds = DataSet(data.getBam()) ds.loadStats(data.getStats()) self.assertEqual(ds.metadata.summaryStats.prodDist.numBins, 4) self.assertEqual(ds.metadata.summaryStats.prodDist.bins, [1576, 901, 399, 0]) ds1 = DataSet(data.getXml(8)) ds1.loadStats(data.getStats()) ds2 = DataSet(data.getXml(11)) ds2.loadStats(data.getStats()) ds3 = ds1 + ds2 self.assertEqual(ds1.metadata.summaryStats.prodDist.bins, [1576, 901, 399, 0]) self.assertEqual(ds2.metadata.summaryStats.prodDist.bins, [1576, 901, 399, 0]) self.assertEqual(ds3.metadata.summaryStats.prodDist.bins, [3152, 1802, 798, 0]) self.assertEqual(ds1.metadata.summaryStats.readLenDist.bins, [0, 62, 39, 36, 29, 37, 19, 29, 37, 32, 32, 40, 45, 54, 73, 77, 97, 95, 49, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) self.assertEqual(ds2.metadata.summaryStats.readLenDist.bins, [0, 62, 39, 36, 29, 37, 19, 29, 37, 32, 32, 40, 45, 54, 73, 77, 97, 95, 49, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) self.assertEqual(ds3.metadata.summaryStats.readLenDist.bins, [0, 124, 78, 72, 58, 74, 38, 58, 74, 64, 64, 80, 90, 108, 146, 154, 194, 190, 98, 34, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) # Lets check some manual values ds1 = DataSet(data.getXml(8)) ds1.loadStats(data.getStats()) ds2 = DataSet(data.getXml(11)) ds2.loadStats(data.getStats()) ds1.metadata.summaryStats.readLenDist.bins = ( [0, 10, 9, 8, 7, 6, 4, 2, 1, 0, 0, 1]) self.assertEqual(ds1.metadata.summaryStats.readLenDist.bins, [0, 10, 9, 8, 7, 6, 4, 2, 1, 0, 0, 1]) ds1.metadata.summaryStats.readLenDist.minBinValue = 10 ds1.metadata.summaryStats.readLenDist.binWidth = 10 ds2.metadata.summaryStats.readLenDist.bins = ( [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1]) self.assertEqual(ds2.metadata.summaryStats.readLenDist.bins, [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1]) ds2.metadata.summaryStats.readLenDist.minBinValue = 20 ds2.metadata.summaryStats.readLenDist.binWidth = 10 ds3 = ds1 + ds2 self.assertEqual(ds3.metadata.summaryStats.readLenDist.bins, [0, 10, 10, 9, 8, 7, 5, 3, 2, 1, 0, 1, 1]) # now lets swap ds1 = DataSet(data.getXml(8)) ds1.loadStats(data.getStats()) ds2 = DataSet(data.getXml(11)) ds2.loadStats(data.getStats()) ds1.metadata.summaryStats.readLenDist.bins = ( [0, 10, 9, 8, 7, 6, 4, 2, 1, 0, 0, 1]) self.assertEqual(ds1.metadata.summaryStats.readLenDist.bins, [0, 10, 9, 8, 7, 6, 4, 2, 1, 0, 0, 1]) ds1.metadata.summaryStats.readLenDist.minBinValue = 20 ds1.metadata.summaryStats.readLenDist.binWidth = 10 ds2.metadata.summaryStats.readLenDist.bins = ( [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1]) self.assertEqual(ds2.metadata.summaryStats.readLenDist.bins, [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1]) ds2.metadata.summaryStats.readLenDist.minBinValue = 10 ds2.metadata.summaryStats.readLenDist.binWidth = 10 ds3 = ds1 + ds2 self.assertEqual(ds3.metadata.summaryStats.readLenDist.bins, [0, 1, 11, 10, 9, 8, 7, 5, 3, 1, 0, 1, 1]) # now lets do some non-overlapping ds1 = DataSet(data.getXml(8)) ds1.loadStats(data.getStats()) ds2 = DataSet(data.getXml(11)) ds2.loadStats(data.getStats()) ds1.metadata.summaryStats.readLenDist.bins = ( [1, 1, 1]) self.assertEqual(ds1.metadata.summaryStats.readLenDist.bins, [1, 1, 1]) ds1.metadata.summaryStats.readLenDist.minBinValue = 10 ds1.metadata.summaryStats.readLenDist.binWidth = 10 ds2.metadata.summaryStats.readLenDist.bins = ( [2, 2, 2]) self.assertEqual(ds2.metadata.summaryStats.readLenDist.bins, [2, 2, 2]) ds2.metadata.summaryStats.readLenDist.minBinValue = 50 ds2.metadata.summaryStats.readLenDist.binWidth = 10 ds3 = ds1 + ds2 self.assertEqual(ds3.metadata.summaryStats.readLenDist.bins, [1, 1, 1, 0, 2, 2, 2]) # now lets test the subdataset metadata retention: ss = SubreadSet(data.getXml(10)) ss.loadStats(data.getStats(0)) ss.loadStats(data.getStats(1)) self.assertEqual(153168.0, ss.metadata.summaryStats.numSequencingZmws) self.assertEqual( 2876.0, ss.subdatasets[0].metadata.summaryStats.numSequencingZmws) self.assertEqual( 150292.0, ss.subdatasets[1].metadata.summaryStats.numSequencingZmws)
def get_fixed_bin_dataset(): sfn = get_fixed_bin_sts() sset = SubreadSet() sset.loadStats(sfn) return sset
def to_report(stats_xml, output_dir, dpi=72): # TODO: make dpi matter """Main point of entry :type stats_xml: str :type output_dir: str :type dpi: int :rtype: Report """ log.info("Analyzing XML {f}".format(f=stats_xml)) dset = SubreadSet(stats_xml) if not dset.metadata.summaryStats: dset.loadStats(stats_xml) if not dset.metadata.summaryStats.medianInsertDists: raise IOError("Pipeline Summary Stats (sts.xml) not found or missing " "key distributions") # Pull some stats: adapter_dimers = np.round( 100.0 * dset.metadata.summaryStats.adapterDimerFraction, decimals=2) short_inserts = np.round( 100.0 * dset.metadata.summaryStats.shortInsertFraction, decimals=2) plots = [] # Pull some histograms (may have dupes (unmergeable distributions)): shaper = continuous_dist_shaper(dset.metadata.summaryStats.medianInsertDists) for i, orig_ins_len_dist in enumerate( dset.metadata.summaryStats.medianInsertDists): ins_len_dist = shaper(orig_ins_len_dist) # make a bar chart: fig, ax = get_fig_axes_lpr() ax.bar(map(float, ins_len_dist.labels), ins_len_dist.bins, color=get_green(0), edgecolor=get_green(0), width=(ins_len_dist.binWidth * 0.75)) ax.set_xlabel(meta_rpt.get_meta_plotgroup(Constants.PG_ADAPTER).get_meta_plot(Constants.P_ADAPTER).xlabel) ax.set_ylabel(meta_rpt.get_meta_plotgroup(Constants.PG_ADAPTER).get_meta_plot(Constants.P_ADAPTER).ylabel) png_fn = os.path.join(output_dir, "interAdapterDist{i}.png".format(i=i)) png_base, thumbnail_base = save_figure_with_thumbnail(fig, png_fn, dpi=dpi) # build the report: plots.append(Plot("adapter_xml_plot_{i}".format(i=i), os.path.relpath(png_base, output_dir), thumbnail=os.path.relpath(thumbnail_base, output_dir))) plot_groups = [PlotGroup(Constants.PG_ADAPTER, plots=plots, thumbnail=os.path.relpath(thumbnail_base, output_dir))] attributes = [Attribute(i, v) for i,v in zip([Constants.A_DIMERS, Constants.A_SHORT_INSERTS], [adapter_dimers, short_inserts])] tables = [] report = Report(meta_rpt.id, title=meta_rpt.title, attributes=attributes, tables=tables, )#plotgroups=plot_groups) return meta_rpt.apply_view(report)
def test_stats_metadata(self): ds = DataSet(data.getBam()) ds.loadStats(data.getStats()) self.assertEqual(ds.metadata.summaryStats.prodDist.numBins, 4) self.assertEqual(ds.metadata.summaryStats.prodDist.bins, [1576, 901, 399, 0]) ds1 = DataSet(data.getXml(8)) ds1.loadStats(data.getStats()) ds2 = DataSet(data.getXml(11)) ds2.loadStats(data.getStats()) ds3 = ds1 + ds2 self.assertEqual(ds1.metadata.summaryStats.prodDist.bins, [1576, 901, 399, 0]) self.assertEqual(ds2.metadata.summaryStats.prodDist.bins, [1576, 901, 399, 0]) self.assertEqual(ds3.metadata.summaryStats.prodDist.bins, [3152, 1802, 798, 0]) self.assertEqual(ds1.metadata.summaryStats.readLenDist.bins, [ 0, 62, 39, 36, 29, 37, 19, 29, 37, 32, 32, 40, 45, 54, 73, 77, 97, 95, 49, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]) self.assertEqual(ds2.metadata.summaryStats.readLenDist.bins, [ 0, 62, 39, 36, 29, 37, 19, 29, 37, 32, 32, 40, 45, 54, 73, 77, 97, 95, 49, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]) self.assertEqual(ds3.metadata.summaryStats.readLenDist.bins, [ 0, 124, 78, 72, 58, 74, 38, 58, 74, 64, 64, 80, 90, 108, 146, 154, 194, 190, 98, 34, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]) # Lets check some manual values ds1 = DataSet(data.getXml(8)) ds1.loadStats(data.getStats()) ds2 = DataSet(data.getXml(11)) ds2.loadStats(data.getStats()) ds1.metadata.summaryStats.readLenDist.bins = ([ 0, 10, 9, 8, 7, 6, 4, 2, 1, 0, 0, 1 ]) self.assertEqual(ds1.metadata.summaryStats.readLenDist.bins, [0, 10, 9, 8, 7, 6, 4, 2, 1, 0, 0, 1]) ds1.metadata.summaryStats.readLenDist.minBinValue = 10 ds1.metadata.summaryStats.readLenDist.binWidth = 10 ds2.metadata.summaryStats.readLenDist.bins = ([ 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1 ]) self.assertEqual(ds2.metadata.summaryStats.readLenDist.bins, [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1]) ds2.metadata.summaryStats.readLenDist.minBinValue = 20 ds2.metadata.summaryStats.readLenDist.binWidth = 10 ds3 = ds1 + ds2 self.assertEqual(ds3.metadata.summaryStats.readLenDist.bins, [0, 10, 10, 9, 8, 7, 5, 3, 2, 1, 0, 1, 1]) # now lets swap ds1 = DataSet(data.getXml(8)) ds1.loadStats(data.getStats()) ds2 = DataSet(data.getXml(11)) ds2.loadStats(data.getStats()) ds1.metadata.summaryStats.readLenDist.bins = ([ 0, 10, 9, 8, 7, 6, 4, 2, 1, 0, 0, 1 ]) self.assertEqual(ds1.metadata.summaryStats.readLenDist.bins, [0, 10, 9, 8, 7, 6, 4, 2, 1, 0, 0, 1]) ds1.metadata.summaryStats.readLenDist.minBinValue = 20 ds1.metadata.summaryStats.readLenDist.binWidth = 10 ds2.metadata.summaryStats.readLenDist.bins = ([ 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1 ]) self.assertEqual(ds2.metadata.summaryStats.readLenDist.bins, [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1]) ds2.metadata.summaryStats.readLenDist.minBinValue = 10 ds2.metadata.summaryStats.readLenDist.binWidth = 10 ds3 = ds1 + ds2 self.assertEqual(ds3.metadata.summaryStats.readLenDist.bins, [0, 1, 11, 10, 9, 8, 7, 5, 3, 1, 0, 1, 1]) # now lets do some non-overlapping ds1 = DataSet(data.getXml(8)) ds1.loadStats(data.getStats()) ds2 = DataSet(data.getXml(11)) ds2.loadStats(data.getStats()) ds1.metadata.summaryStats.readLenDist.bins = ([1, 1, 1]) self.assertEqual(ds1.metadata.summaryStats.readLenDist.bins, [1, 1, 1]) ds1.metadata.summaryStats.readLenDist.minBinValue = 10 ds1.metadata.summaryStats.readLenDist.binWidth = 10 ds2.metadata.summaryStats.readLenDist.bins = ([2, 2, 2]) self.assertEqual(ds2.metadata.summaryStats.readLenDist.bins, [2, 2, 2]) ds2.metadata.summaryStats.readLenDist.minBinValue = 50 ds2.metadata.summaryStats.readLenDist.binWidth = 10 ds3 = ds1 + ds2 self.assertEqual(ds3.metadata.summaryStats.readLenDist.bins, [1, 1, 1, 0, 2, 2, 2]) # now lets test the subdataset metadata retention: ss = SubreadSet(data.getXml(10)) ss.loadStats(data.getStats(0)) ss.loadStats(data.getStats(1)) self.assertEqual(153168.0, ss.metadata.summaryStats.numSequencingZmws) self.assertEqual( 2876.0, ss.subdatasets[0].metadata.summaryStats.numSequencingZmws) self.assertEqual( 150292.0, ss.subdatasets[1].metadata.summaryStats.numSequencingZmws)