Exemplo n.º 1
0
def to_reports(subreads, output_dir):
    output_files = []
    log.info("Loading {f}".format(f=subreads))
    ds = SubreadSet(subreads)
    ds.loadStats()
    for base, module in [("filter_stats_xml", filter_stats_xml),
                         ("adapter_xml", adapter_xml),
                         ("loading_xml", loading_xml),
                         ("control", control)]:
        constants = getattr(module, "Constants")
        task_id = constants.TOOL_ID
        to_report = getattr(module, "to_report_impl")
        try:
            rpt_output_dir = os.path.join(output_dir, base)
            os.mkdir(rpt_output_dir)
            file_name = os.path.join(rpt_output_dir, "{b}.json".format(b=base))
            report = to_report(ds, rpt_output_dir)
            log.info("Writing {f}".format(f=file_name))
            report.write_json(file_name)
            output_files.append(DataStoreFile(
                uuid=report.uuid,
                source_id=task_id,
                type_id=FileTypes.REPORT.file_type_id,
                path=file_name,
                is_chunked=False,
                name=base))
        except InvalidStatsError as e:
            log.error("This dataset lacks some required statistics")
            log.error("Skipping generation of {b} report".format(b=base))
    datastore = DataStore(output_files)
    return datastore
Exemplo n.º 2
0
def to_report(stats_xml, output_dir, dpi=72):
    """Main point of entry

    :type stats_xml: str
    :type output_dir: str
    :type dpi: int

    :rtype: Report
    """
    log.info("Analyzing XML {f}".format(f=stats_xml))
    # stats_xml should be a dataset:
    dset = SubreadSet(stats_xml)

    dataset_uuids = [dset.uuid]
    # but if it isn't, no problem:
    if not dset.metadata.summaryStats:
        dset.loadStats(stats_xml)
        # an sts file was provided which will generate a new random uuid
        dataset_uuids = []
    if not dset.metadata.summaryStats.readLenDists:
        raise IOError("Pipeline Summary Stats (sts.xml) not found or missing "
                      "key distributions")

    # we want all of the length distributions in this report to look the same,
    # so we make the shaper here and pass it around:
    alldists = (dset.metadata.summaryStats.readLenDists[:] +
                dset.metadata.summaryStats.insertReadLenDists[:])
    len_dist_shaper = continuous_dist_shaper(alldists, trim_excess=True)

    attr = to_read_stats_attributes(
        readLenDists=dset.metadata.summaryStats.readLenDists,
        readQualDists=dset.metadata.summaryStats.readQualDists)
    attr.extend(
        to_insert_stats_attributes(
            readLenDists=dset.metadata.summaryStats.insertReadLenDists,
            readQualDists=dset.metadata.summaryStats.insertReadQualDists))

    plot_groups = to_read_stats_plots(
        readLenDists=dset.metadata.summaryStats.readLenDists,
        readQualDists=dset.metadata.summaryStats.readQualDists,
        output_dir=output_dir,
        lenDistShaper=len_dist_shaper)
    plot_groups.extend(
        to_insert_stats_plots(
            readLenDists=dset.metadata.summaryStats.insertReadLenDists,
            readQualDists=dset.metadata.summaryStats.insertReadQualDists,
            output_dir=output_dir,
            lenDistShaper=len_dist_shaper))

    # build the report:
    report = Report(meta_rpt.id,
                    title=meta_rpt.title,
                    attributes=attr,
                    plotgroups=plot_groups,
                    dataset_uuids=dataset_uuids)

    return meta_rpt.apply_view(report)
Exemplo n.º 3
0
def to_report(stats_xml, output_dir):
    log.info("Starting {f} v{v}".format(f=os.path.basename(__file__),
                                        v=__version__))
    log.info("Analyzing XML {f}".format(f=stats_xml))
    dset = SubreadSet(stats_xml)
    dset.loadStats()
    if stats_xml.endswith(".sts.xml"):
        dset.loadStats(stats_xml)
    return to_report_impl(dset, output_dir)
Exemplo n.º 4
0
def to_report(stats_xml, output_dir, dpi=72):
    """Main point of entry

    :type stats_xml: str
    :type output_dir: str
    :type dpi: int

    :rtype: Report
    """
    log.info("Analyzing XML {f}".format(f=stats_xml))
    # stats_xml should be a dataset:
    dset = SubreadSet(stats_xml)

    dataset_uuids = [dset.uuid]
    # but if it isn't, no problem:
    if not dset.metadata.summaryStats:
        dset.loadStats(stats_xml)
        # an sts file was provided which will generate a new random uuid
        dataset_uuids = []
    if not dset.metadata.summaryStats.readLenDists:
        raise IOError("Pipeline Summary Stats (sts.xml) not found or missing "
                      "key distributions")


    # we want all of the length distributions in this report to look the same,
    # so we make the shaper here and pass it around:
    alldists = (dset.metadata.summaryStats.readLenDists[:] +
                dset.metadata.summaryStats.insertReadLenDists[:])
    len_dist_shaper = continuous_dist_shaper(alldists, trim_excess=True)

    attr = to_read_stats_attributes(
        readLenDists=dset.metadata.summaryStats.readLenDists,
        readQualDists=dset.metadata.summaryStats.readQualDists)
    attr.extend(to_insert_stats_attributes(
        readLenDists=dset.metadata.summaryStats.insertReadLenDists,
        readQualDists=dset.metadata.summaryStats.insertReadQualDists))

    plot_groups = to_read_stats_plots(
        readLenDists=dset.metadata.summaryStats.readLenDists,
        readQualDists=dset.metadata.summaryStats.readQualDists,
        output_dir=output_dir,
        lenDistShaper=len_dist_shaper)
    plot_groups.extend(to_insert_stats_plots(
        readLenDists=dset.metadata.summaryStats.insertReadLenDists,
        readQualDists=dset.metadata.summaryStats.insertReadQualDists,
        output_dir=output_dir,
        lenDistShaper=len_dist_shaper))

    # build the report:
    report = Report(meta_rpt.id,
                    title=meta_rpt.title,
                    attributes=attr,
                    plotgroups=plot_groups,
                    dataset_uuids=dataset_uuids)

    return meta_rpt.apply_view(report)
Exemplo n.º 5
0
def to_report(stats_xml, output_dir):
    """Main point of entry

    :type stats_xml: str
    :type output_dir: str
    :type dpi: int

    :rtype: Report
    """
    log.info("Analyzing XML {f}".format(f=stats_xml))
    dset = SubreadSet(stats_xml)
    if stats_xml.endswith(".sts.xml"):
        dset.loadStats(stats_xml)
    dset.loadStats()
    return to_report_impl(dset, output_dir)
Exemplo n.º 6
0
def to_report(stats_xml, output_dir, dpi=DEFAULT_DPI):
    # TODO: make dpi matter
    """Main point of entry

    :type stats_xml: str
    :type output_dir: str
    :type dpi: int

    :rtype: Report
    """
    log.info("Analyzing XML {f}".format(f=stats_xml))
    dset = SubreadSet(stats_xml)
    if not dset.metadata.summaryStats:
        dset.loadStats(stats_xml)
    return to_report_impl(dset, output_dir, dpi)
Exemplo n.º 7
0
def to_report(stats_xml, output_dir, dpi=72):
    """Main point of entry

    :type stats_xml: str
    :type output_dir: str
    :type dpi: int

    :rtype: Report
    """
    log.info("Analyzing XML {f}".format(f=stats_xml))
    # stats_xml should be a dataset:
    dset = SubreadSet(stats_xml)
    dataset_uuids = [dset.uuid]
    # but if it isn't, no problem:
    if not dset.metadata.summaryStats:
        dset.loadStats(stats_xml)
        # an sts file was provided which will generate a new random uuid
        dataset_uuids = []
    if not dset.metadata.summaryStats.readLenDists:
        raise IOError("Pipeline Summary Stats (sts.xml) not found or missing "
                      "key distributions")

    attr = to_read_stats_attributes(
        readLenDists=dset.metadata.summaryStats.readLenDists,
        readQualDists=dset.metadata.summaryStats.readQualDists)
    attr.extend(to_insert_stats_attributes(
        readLenDists=dset.metadata.summaryStats.insertReadLenDists,
        readQualDists=dset.metadata.summaryStats.insertReadQualDists))

    plot_groups = to_read_stats_plots(
        readLenDists=dset.metadata.summaryStats.readLenDists,
        readQualDists=dset.metadata.summaryStats.readQualDists,
        output_dir=output_dir)
    plot_groups.extend(to_insert_stats_plots(
        readLenDists=dset.metadata.summaryStats.insertReadLenDists,
        readQualDists=dset.metadata.summaryStats.insertReadQualDists,
        output_dir=output_dir))

    # build the report:
    report = Report("raw_data_report",
                    title="Raw Data Report",
                    attributes=attr,
                    plotgroups=plot_groups,
                    dataset_uuids=dataset_uuids)

    return report
    def test_reports_with_fixed_bins(self):
        # TODO readQualDists are currently unpopulated, turn back on when
        # they're repopulated
        # for dist_name, nbins in zip(['medianInsertDists', 'readLenDists',
        #                             'readQualDists'], [200, 200, 50]):
        for dist_name, nbins in zip(['medianInsertDists', 'readLenDists'],
                                    [200, 200]):
            ss = SubreadSet()
            ss.loadStats(get_fixed_bin_sts())

            ss2 = SubreadSet()
            ss2.loadStats(get_fixed_bin_sts())

            # shift ss2
            mdist = getattr(ss2.metadata.summaryStats, dist_name)[0].bins
            mdist = [0, 0, 0] + mdist[:-3]
            getattr(ss2.metadata.summaryStats, dist_name)[0].bins = mdist

            ss3 = ss + ss2

            ss4 = SubreadSet()
            ss4.loadStats(get_fixed_bin_sts())

            # shift ss4
            mdist = getattr(ss4.metadata.summaryStats, dist_name)[0].bins
            mdist = [0 for _ in mdist]
            getattr(ss4.metadata.summaryStats, dist_name)[0].bins = mdist

            dists = getattr(ss4.metadata.summaryStats, dist_name)
            self.assertEqual(len(dists), 1)
            for n in [0, 1, 2, 10, 40, 41, 49, 50, 51, 200, 500]:
                ds = continuous_dist_shaper(dists, nbins=n)
                fixed_dists = [ds(dist) for dist in dists]
                self.assertEqual(len(dists[0].bins), nbins)
                self.assertEqual(len(fixed_dists[0].bins), nbins)
                self.assertEqual(sum(dists[0].bins), sum(fixed_dists[0].bins))

            sss = [ss, ss2, ss3]

            for sset in sss:
                dists = getattr(sset.metadata.summaryStats, dist_name)
                self.assertEqual(len(dists), 1)
                # 0, requested nbins > numBins fails back to no-op
                ops = [1, 2, 3, 4, 7, 10, 40, 41, 49, 50, 51, 200, 500]
                no_ops = [0]
                for n in no_ops:
                    ds = continuous_dist_shaper(dists, nbins=n)
                    fixed_dists = [ds(dist) for dist in dists]
                    self.assertEqual(len(dists[0].bins), nbins)
                    self.assertEqual(len(fixed_dists[0].bins), nbins)
                    self.assertEqual(sum(dists[0].bins),
                                     sum(fixed_dists[0].bins))

                for n in ops:
                    ds = continuous_dist_shaper(dists, nbins=n)
                    fixed_dists = [ds(dist) for dist in dists]
                    self.assertEqual(len(dists[0].bins), nbins)
                    self.assertEqual(len(fixed_dists[0].bins), n)
                    self.assertEqual(sum(dists[0].bins),
                                     sum(fixed_dists[0].bins))
    def test_reports_with_fixed_bins(self):
        # TODO readQualDists are currently unpopulated, turn back on when
        # they're repopulated
        # for dist_name, nbins in zip(['medianInsertDists', 'readLenDists',
        #                             'readQualDists'], [200, 200, 50]):
        for dist_name, nbins in zip(["medianInsertDists", "readLenDists"], [200, 200]):
            ss = SubreadSet()
            ss.loadStats(get_fixed_bin_sts())

            ss2 = SubreadSet()
            ss2.loadStats(get_fixed_bin_sts())

            # shift ss2
            mdist = getattr(ss2.metadata.summaryStats, dist_name)[0].bins
            mdist = [0, 0, 0] + mdist[:-3]
            getattr(ss2.metadata.summaryStats, dist_name)[0].bins = mdist

            ss3 = ss + ss2

            ss4 = SubreadSet()
            ss4.loadStats(get_fixed_bin_sts())

            # shift ss4
            mdist = getattr(ss4.metadata.summaryStats, dist_name)[0].bins
            mdist = [0 for _ in mdist]
            getattr(ss4.metadata.summaryStats, dist_name)[0].bins = mdist

            dists = getattr(ss4.metadata.summaryStats, dist_name)
            self.assertEqual(len(dists), 1)
            for n in [0, 1, 2, 10, 40, 41, 49, 50, 51, 200, 500]:
                ds = continuous_dist_shaper(dists, nbins=n)
                fixed_dists = [ds(dist) for dist in dists]
                self.assertEqual(len(dists[0].bins), nbins)
                self.assertEqual(len(fixed_dists[0].bins), nbins)
                self.assertEqual(sum(dists[0].bins), sum(fixed_dists[0].bins))

            sss = [ss, ss2, ss3]

            for sset in sss:
                dists = getattr(sset.metadata.summaryStats, dist_name)
                self.assertEqual(len(dists), 1)
                # 0, requested nbins > numBins fails back to no-op
                ops = [1, 2, 3, 4, 7, 10, 40, 41, 49, 50, 51, 200, 500]
                no_ops = [0]
                for n in no_ops:
                    ds = continuous_dist_shaper(dists, nbins=n)
                    fixed_dists = [ds(dist) for dist in dists]
                    self.assertEqual(len(dists[0].bins), nbins)
                    self.assertEqual(len(fixed_dists[0].bins), nbins)
                    self.assertEqual(sum(dists[0].bins), sum(fixed_dists[0].bins))

                for n in ops:
                    ds = continuous_dist_shaper(dists, nbins=n)
                    fixed_dists = [ds(dist) for dist in dists]
                    self.assertEqual(len(dists[0].bins), nbins)
                    self.assertEqual(len(fixed_dists[0].bins), n)
                    self.assertEqual(sum(dists[0].bins), sum(fixed_dists[0].bins))
Exemplo n.º 10
0
    def test_stats_metadata(self):
        ds = DataSet(data.getBam())
        ds.loadStats(data.getStats())
        self.assertEqual(ds.metadata.summaryStats.prodDist.numBins, 4)
        self.assertEqual(ds.metadata.summaryStats.prodDist.bins,
                         [1576, 901, 399, 0])
        ds1 = DataSet(data.getXml(8))
        ds1.loadStats(data.getStats())
        ds2 = DataSet(data.getXml(11))
        ds2.loadStats(data.getStats())
        ds3 = ds1 + ds2
        self.assertEqual(ds1.metadata.summaryStats.prodDist.bins,
                         [1576, 901, 399, 0])
        self.assertEqual(ds2.metadata.summaryStats.prodDist.bins,
                         [1576, 901, 399, 0])
        self.assertEqual(ds3.metadata.summaryStats.prodDist.bins,
                         [3152, 1802, 798, 0])
        self.assertEqual(ds1.metadata.summaryStats.readLenDist.bins,
                         [0, 62, 39, 36, 29, 37, 19, 29, 37, 32, 32, 40, 45,
                          54, 73, 77, 97, 95, 49, 17, 2, 0, 0, 0, 0, 0, 0,
                          0, 0, 0, 0, 0])
        self.assertEqual(ds2.metadata.summaryStats.readLenDist.bins,
                         [0, 62, 39, 36, 29, 37, 19, 29, 37, 32, 32, 40, 45,
                          54, 73, 77, 97, 95, 49, 17, 2, 0, 0, 0, 0, 0, 0,
                          0, 0, 0, 0, 0])
        self.assertEqual(ds3.metadata.summaryStats.readLenDist.bins,
                         [0, 124, 78, 72, 58, 74, 38, 58, 74, 64, 64, 80, 90,
                          108, 146, 154, 194, 190, 98, 34, 4, 0, 0, 0, 0, 0,
                          0, 0, 0, 0, 0, 0])
        # Lets check some manual values
        ds1 = DataSet(data.getXml(8))
        ds1.loadStats(data.getStats())
        ds2 = DataSet(data.getXml(11))
        ds2.loadStats(data.getStats())
        ds1.metadata.summaryStats.readLenDist.bins = (
            [0, 10, 9, 8, 7, 6, 4, 2, 1, 0, 0, 1])
        self.assertEqual(ds1.metadata.summaryStats.readLenDist.bins,
                         [0, 10, 9, 8, 7, 6, 4, 2, 1, 0, 0, 1])
        ds1.metadata.summaryStats.readLenDist.minBinValue = 10
        ds1.metadata.summaryStats.readLenDist.binWidth = 10
        ds2.metadata.summaryStats.readLenDist.bins = (
            [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1])
        self.assertEqual(ds2.metadata.summaryStats.readLenDist.bins,
                         [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1])
        ds2.metadata.summaryStats.readLenDist.minBinValue = 20
        ds2.metadata.summaryStats.readLenDist.binWidth = 10
        ds3 = ds1 + ds2
        self.assertEqual(ds3.metadata.summaryStats.readLenDist.bins,
                         [0, 10, 10, 9, 8, 7, 5, 3, 2, 1, 0, 1, 1])
        # now lets swap
        ds1 = DataSet(data.getXml(8))
        ds1.loadStats(data.getStats())
        ds2 = DataSet(data.getXml(11))
        ds2.loadStats(data.getStats())
        ds1.metadata.summaryStats.readLenDist.bins = (
            [0, 10, 9, 8, 7, 6, 4, 2, 1, 0, 0, 1])
        self.assertEqual(ds1.metadata.summaryStats.readLenDist.bins,
                         [0, 10, 9, 8, 7, 6, 4, 2, 1, 0, 0, 1])
        ds1.metadata.summaryStats.readLenDist.minBinValue = 20
        ds1.metadata.summaryStats.readLenDist.binWidth = 10
        ds2.metadata.summaryStats.readLenDist.bins = (
            [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1])
        self.assertEqual(ds2.metadata.summaryStats.readLenDist.bins,
                         [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1])
        ds2.metadata.summaryStats.readLenDist.minBinValue = 10
        ds2.metadata.summaryStats.readLenDist.binWidth = 10
        ds3 = ds1 + ds2
        self.assertEqual(ds3.metadata.summaryStats.readLenDist.bins,
                         [0, 1, 11, 10, 9, 8, 7, 5, 3, 1, 0, 1, 1])

        # now lets do some non-overlapping
        ds1 = DataSet(data.getXml(8))
        ds1.loadStats(data.getStats())
        ds2 = DataSet(data.getXml(11))
        ds2.loadStats(data.getStats())
        ds1.metadata.summaryStats.readLenDist.bins = (
            [1, 1, 1])
        self.assertEqual(ds1.metadata.summaryStats.readLenDist.bins,
                         [1, 1, 1])
        ds1.metadata.summaryStats.readLenDist.minBinValue = 10
        ds1.metadata.summaryStats.readLenDist.binWidth = 10
        ds2.metadata.summaryStats.readLenDist.bins = (
            [2, 2, 2])
        self.assertEqual(ds2.metadata.summaryStats.readLenDist.bins,
                         [2, 2, 2])
        ds2.metadata.summaryStats.readLenDist.minBinValue = 50
        ds2.metadata.summaryStats.readLenDist.binWidth = 10
        ds3 = ds1 + ds2
        self.assertEqual(ds3.metadata.summaryStats.readLenDist.bins,
                         [1, 1, 1, 0, 2, 2, 2])

        # now lets test the subdataset metadata retention:
        ss = SubreadSet(data.getXml(10))
        ss.loadStats(data.getStats(0))
        ss.loadStats(data.getStats(1))
        self.assertEqual(153168.0, ss.metadata.summaryStats.numSequencingZmws)
        self.assertEqual(
            2876.0, ss.subdatasets[0].metadata.summaryStats.numSequencingZmws)
        self.assertEqual(
            150292.0, ss.subdatasets[1].metadata.summaryStats.numSequencingZmws)
def get_fixed_bin_dataset():
    sfn = get_fixed_bin_sts()
    sset = SubreadSet()
    sset.loadStats(sfn)
    return sset
Exemplo n.º 12
0
def to_report(stats_xml, output_dir, dpi=72):
    # TODO: make dpi matter
    """Main point of entry

    :type stats_xml: str
    :type output_dir: str
    :type dpi: int

    :rtype: Report
    """
    log.info("Analyzing XML {f}".format(f=stats_xml))
    dset = SubreadSet(stats_xml)
    if not dset.metadata.summaryStats:
        dset.loadStats(stats_xml)
    if not dset.metadata.summaryStats.medianInsertDists:
        raise IOError("Pipeline Summary Stats (sts.xml) not found or missing "
                      "key distributions")

    # Pull some stats:
    adapter_dimers = np.round(
        100.0 * dset.metadata.summaryStats.adapterDimerFraction,
        decimals=2)
    short_inserts = np.round(
        100.0 * dset.metadata.summaryStats.shortInsertFraction,
        decimals=2)

    plots = []
    # Pull some histograms (may have dupes (unmergeable distributions)):
    shaper = continuous_dist_shaper(dset.metadata.summaryStats.medianInsertDists)
    for i, orig_ins_len_dist in enumerate(
            dset.metadata.summaryStats.medianInsertDists):
        ins_len_dist = shaper(orig_ins_len_dist)
        # make a bar chart:
        fig, ax = get_fig_axes_lpr()
        ax.bar(map(float, ins_len_dist.labels), ins_len_dist.bins,
               color=get_green(0), edgecolor=get_green(0),
               width=(ins_len_dist.binWidth * 0.75))
        ax.set_xlabel(meta_rpt.get_meta_plotgroup(Constants.PG_ADAPTER).get_meta_plot(Constants.P_ADAPTER).xlabel)
        ax.set_ylabel(meta_rpt.get_meta_plotgroup(Constants.PG_ADAPTER).get_meta_plot(Constants.P_ADAPTER).ylabel)
        png_fn = os.path.join(output_dir,
                              "interAdapterDist{i}.png".format(i=i))
        png_base, thumbnail_base = save_figure_with_thumbnail(fig, png_fn,
                                                              dpi=dpi)

        # build the report:
        plots.append(Plot("adapter_xml_plot_{i}".format(i=i),
                          os.path.relpath(png_base, output_dir),
                          thumbnail=os.path.relpath(thumbnail_base, output_dir)))

    plot_groups = [PlotGroup(Constants.PG_ADAPTER,
                             plots=plots,
                             thumbnail=os.path.relpath(thumbnail_base, output_dir))]
    attributes = [Attribute(i, v) for i,v in
        zip([Constants.A_DIMERS, Constants.A_SHORT_INSERTS],
            [adapter_dimers, short_inserts])]

    tables = []

    report = Report(meta_rpt.id,
                    title=meta_rpt.title,
                    attributes=attributes,
                    tables=tables,
                    )#plotgroups=plot_groups)

    return meta_rpt.apply_view(report)
def get_fixed_bin_dataset():
    sfn = get_fixed_bin_sts()
    sset = SubreadSet()
    sset.loadStats(sfn)
    return sset
Exemplo n.º 14
0
    def test_stats_metadata(self):
        ds = DataSet(data.getBam())
        ds.loadStats(data.getStats())
        self.assertEqual(ds.metadata.summaryStats.prodDist.numBins, 4)
        self.assertEqual(ds.metadata.summaryStats.prodDist.bins,
                         [1576, 901, 399, 0])
        ds1 = DataSet(data.getXml(8))
        ds1.loadStats(data.getStats())
        ds2 = DataSet(data.getXml(11))
        ds2.loadStats(data.getStats())
        ds3 = ds1 + ds2
        self.assertEqual(ds1.metadata.summaryStats.prodDist.bins,
                         [1576, 901, 399, 0])
        self.assertEqual(ds2.metadata.summaryStats.prodDist.bins,
                         [1576, 901, 399, 0])
        self.assertEqual(ds3.metadata.summaryStats.prodDist.bins,
                         [3152, 1802, 798, 0])
        self.assertEqual(ds1.metadata.summaryStats.readLenDist.bins, [
            0, 62, 39, 36, 29, 37, 19, 29, 37, 32, 32, 40, 45, 54, 73, 77, 97,
            95, 49, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
        ])
        self.assertEqual(ds2.metadata.summaryStats.readLenDist.bins, [
            0, 62, 39, 36, 29, 37, 19, 29, 37, 32, 32, 40, 45, 54, 73, 77, 97,
            95, 49, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
        ])
        self.assertEqual(ds3.metadata.summaryStats.readLenDist.bins, [
            0, 124, 78, 72, 58, 74, 38, 58, 74, 64, 64, 80, 90, 108, 146, 154,
            194, 190, 98, 34, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
        ])
        # Lets check some manual values
        ds1 = DataSet(data.getXml(8))
        ds1.loadStats(data.getStats())
        ds2 = DataSet(data.getXml(11))
        ds2.loadStats(data.getStats())
        ds1.metadata.summaryStats.readLenDist.bins = ([
            0, 10, 9, 8, 7, 6, 4, 2, 1, 0, 0, 1
        ])
        self.assertEqual(ds1.metadata.summaryStats.readLenDist.bins,
                         [0, 10, 9, 8, 7, 6, 4, 2, 1, 0, 0, 1])
        ds1.metadata.summaryStats.readLenDist.minBinValue = 10
        ds1.metadata.summaryStats.readLenDist.binWidth = 10
        ds2.metadata.summaryStats.readLenDist.bins = ([
            0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1
        ])
        self.assertEqual(ds2.metadata.summaryStats.readLenDist.bins,
                         [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1])
        ds2.metadata.summaryStats.readLenDist.minBinValue = 20
        ds2.metadata.summaryStats.readLenDist.binWidth = 10
        ds3 = ds1 + ds2
        self.assertEqual(ds3.metadata.summaryStats.readLenDist.bins,
                         [0, 10, 10, 9, 8, 7, 5, 3, 2, 1, 0, 1, 1])
        # now lets swap
        ds1 = DataSet(data.getXml(8))
        ds1.loadStats(data.getStats())
        ds2 = DataSet(data.getXml(11))
        ds2.loadStats(data.getStats())
        ds1.metadata.summaryStats.readLenDist.bins = ([
            0, 10, 9, 8, 7, 6, 4, 2, 1, 0, 0, 1
        ])
        self.assertEqual(ds1.metadata.summaryStats.readLenDist.bins,
                         [0, 10, 9, 8, 7, 6, 4, 2, 1, 0, 0, 1])
        ds1.metadata.summaryStats.readLenDist.minBinValue = 20
        ds1.metadata.summaryStats.readLenDist.binWidth = 10
        ds2.metadata.summaryStats.readLenDist.bins = ([
            0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1
        ])
        self.assertEqual(ds2.metadata.summaryStats.readLenDist.bins,
                         [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1])
        ds2.metadata.summaryStats.readLenDist.minBinValue = 10
        ds2.metadata.summaryStats.readLenDist.binWidth = 10
        ds3 = ds1 + ds2
        self.assertEqual(ds3.metadata.summaryStats.readLenDist.bins,
                         [0, 1, 11, 10, 9, 8, 7, 5, 3, 1, 0, 1, 1])

        # now lets do some non-overlapping
        ds1 = DataSet(data.getXml(8))
        ds1.loadStats(data.getStats())
        ds2 = DataSet(data.getXml(11))
        ds2.loadStats(data.getStats())
        ds1.metadata.summaryStats.readLenDist.bins = ([1, 1, 1])
        self.assertEqual(ds1.metadata.summaryStats.readLenDist.bins, [1, 1, 1])
        ds1.metadata.summaryStats.readLenDist.minBinValue = 10
        ds1.metadata.summaryStats.readLenDist.binWidth = 10
        ds2.metadata.summaryStats.readLenDist.bins = ([2, 2, 2])
        self.assertEqual(ds2.metadata.summaryStats.readLenDist.bins, [2, 2, 2])
        ds2.metadata.summaryStats.readLenDist.minBinValue = 50
        ds2.metadata.summaryStats.readLenDist.binWidth = 10
        ds3 = ds1 + ds2
        self.assertEqual(ds3.metadata.summaryStats.readLenDist.bins,
                         [1, 1, 1, 0, 2, 2, 2])

        # now lets test the subdataset metadata retention:
        ss = SubreadSet(data.getXml(10))
        ss.loadStats(data.getStats(0))
        ss.loadStats(data.getStats(1))
        self.assertEqual(153168.0, ss.metadata.summaryStats.numSequencingZmws)
        self.assertEqual(
            2876.0, ss.subdatasets[0].metadata.summaryStats.numSequencingZmws)
        self.assertEqual(
            150292.0,
            ss.subdatasets[1].metadata.summaryStats.numSequencingZmws)