def test_reports_with_fixed_bins(self):
        # TODO readQualDists are currently unpopulated, turn back on when
        # they're repopulated
        # for dist_name, nbins in zip(['medianInsertDists', 'readLenDists',
        #                             'readQualDists'], [200, 200, 50]):
        for dist_name, nbins in zip(['medianInsertDists', 'readLenDists'],
                                    [200, 200]):
            ss = SubreadSet()
            ss.loadStats(get_fixed_bin_sts())

            ss2 = SubreadSet()
            ss2.loadStats(get_fixed_bin_sts())

            # shift ss2
            mdist = getattr(ss2.metadata.summaryStats, dist_name)[0].bins
            mdist = [0, 0, 0] + mdist[:-3]
            getattr(ss2.metadata.summaryStats, dist_name)[0].bins = mdist

            ss3 = ss + ss2

            ss4 = SubreadSet()
            ss4.loadStats(get_fixed_bin_sts())

            # shift ss4
            mdist = getattr(ss4.metadata.summaryStats, dist_name)[0].bins
            mdist = [0 for _ in mdist]
            getattr(ss4.metadata.summaryStats, dist_name)[0].bins = mdist

            dists = getattr(ss4.metadata.summaryStats, dist_name)
            self.assertEqual(len(dists), 1)
            for n in [0, 1, 2, 10, 40, 41, 49, 50, 51, 200, 500]:
                ds = continuous_dist_shaper(dists, nbins=n)
                fixed_dists = [ds(dist) for dist in dists]
                self.assertEqual(len(dists[0].bins), nbins)
                self.assertEqual(len(fixed_dists[0].bins), nbins)
                self.assertEqual(sum(dists[0].bins), sum(fixed_dists[0].bins))

            sss = [ss, ss2, ss3]

            for sset in sss:
                dists = getattr(sset.metadata.summaryStats, dist_name)
                self.assertEqual(len(dists), 1)
                # 0, requested nbins > numBins fails back to no-op
                ops = [1, 2, 3, 4, 7, 10, 40, 41, 49, 50, 51, 200, 500]
                no_ops = [0]
                for n in no_ops:
                    ds = continuous_dist_shaper(dists, nbins=n)
                    fixed_dists = [ds(dist) for dist in dists]
                    self.assertEqual(len(dists[0].bins), nbins)
                    self.assertEqual(len(fixed_dists[0].bins), nbins)
                    self.assertEqual(sum(dists[0].bins),
                                     sum(fixed_dists[0].bins))

                for n in ops:
                    ds = continuous_dist_shaper(dists, nbins=n)
                    fixed_dists = [ds(dist) for dist in dists]
                    self.assertEqual(len(dists[0].bins), nbins)
                    self.assertEqual(len(fixed_dists[0].bins), n)
                    self.assertEqual(sum(dists[0].bins),
                                     sum(fixed_dists[0].bins))
def _to_read_stats_plots(PlotConstants, title, readLenDists, readQualDists,
                         output_dir, dpi=72, lenDistShaper=None):
    length_plots = []
    # ReadLen distribution to barplot:
    if lenDistShaper is None:
        lenDistShaper = continuous_dist_shaper(readLenDists, trim_excess=True)
    for i, orig_rlendist in enumerate(readLenDists):
        rlendist = lenDistShaper(orig_rlendist)
        assert sum(orig_rlendist.bins) == sum(rlendist.bins)
        len_fig, len_axes = get_fig_axes_lpr()
        len_axes.bar(rlendist.labels, rlendist.bins,
                     color=get_green(0), edgecolor=get_green(0),
                     width=(rlendist.binWidth * 0.75))
        len_axes.set_xlabel(get_plot_xlabel(spec, PlotConstants.PG_LENGTH,
                                            PlotConstants.P_LENGTH))
        len_axes.set_ylabel(get_plot_ylabel(spec, PlotConstants.PG_LENGTH,
                                            PlotConstants.P_LENGTH))
        png_fn = os.path.join(output_dir, "{p}{i}.png".format(i=i,
                                                              p=PlotConstants.P_LENGTH_PREFIX))
        png_base, thumbnail_base = save_figure_with_thumbnail(len_fig, png_fn,
                                                              dpi=dpi)
        length_plots.append(
            Plot("{p}_{i}".format(i=i, p=PlotConstants.P_LENGTH),
                 os.path.relpath(png_base, output_dir),
                 title=title, caption=title,
                 thumbnail=os.path.relpath(thumbnail_base, output_dir)))
    plot_groups = [
        PlotGroup(PlotConstants.PG_LENGTH,
                  title=title,
                  plots=length_plots,
                  thumbnail=os.path.relpath(thumbnail_base, output_dir))
    ]
    return plot_groups

    # FIXME these aren't useful yet
    qual_plots = []
    # ReadQual distribution to barplot:
    shaper = continuous_dist_shaper(readQualDists, trim_excess=True)
    for i, orig_rqualdist in enumerate(readQualDists):
        rqualdist = shaper(orig_rqualdist)
        qual_fig, qual_axes = get_fig_axes_lpr()
        qual_axes.bar(rqualdist.labels, rqualdist.bins,
                      color=get_green(0), edgecolor=get_green(0),
                      width=(rqualdist.binWidth * 0.75))
        qual_axes.set_xlabel(get_plot_xlabel(spec, PlotConstants.PG_QUAL,
                                             PlotConstants.P_QUAL))
        qual_axes.set_ylabel(get_plot_ylabel(spec, PlotConstants.PG_QUAL,
                                             PlotConstants.P_QUAL))
        png_fn = os.path.join(output_dir, "{p}{i}.png".format(i=i,
                                                              p=PlotConstants.P_QUAL_PREFIX))
        png_base, thumbnail_base = save_figure_with_thumbnail(qual_fig, png_fn,
                                                              dpi=dpi)
        qual_plots.append(
            Plot("{p}_{i}".format(i=i, p=PlotConstants.P_QUAL),
                 os.path.relpath(png_base, output_dir),
                 thumbnail=os.path.relpath(thumbnail_base, output_dir)))
    plot_groups.append(
        PlotGroup(PlotConstants.PG_QUAL,
                  plots=qual_plots))
    return plot_groups
    def test_reports_with_fixed_bins(self):
        # TODO readQualDists are currently unpopulated, turn back on when
        # they're repopulated
        # for dist_name, nbins in zip(['medianInsertDists', 'readLenDists',
        #                             'readQualDists'], [200, 200, 50]):
        for dist_name, nbins in zip(["medianInsertDists", "readLenDists"], [200, 200]):
            ss = SubreadSet()
            ss.loadStats(get_fixed_bin_sts())

            ss2 = SubreadSet()
            ss2.loadStats(get_fixed_bin_sts())

            # shift ss2
            mdist = getattr(ss2.metadata.summaryStats, dist_name)[0].bins
            mdist = [0, 0, 0] + mdist[:-3]
            getattr(ss2.metadata.summaryStats, dist_name)[0].bins = mdist

            ss3 = ss + ss2

            ss4 = SubreadSet()
            ss4.loadStats(get_fixed_bin_sts())

            # shift ss4
            mdist = getattr(ss4.metadata.summaryStats, dist_name)[0].bins
            mdist = [0 for _ in mdist]
            getattr(ss4.metadata.summaryStats, dist_name)[0].bins = mdist

            dists = getattr(ss4.metadata.summaryStats, dist_name)
            self.assertEqual(len(dists), 1)
            for n in [0, 1, 2, 10, 40, 41, 49, 50, 51, 200, 500]:
                ds = continuous_dist_shaper(dists, nbins=n)
                fixed_dists = [ds(dist) for dist in dists]
                self.assertEqual(len(dists[0].bins), nbins)
                self.assertEqual(len(fixed_dists[0].bins), nbins)
                self.assertEqual(sum(dists[0].bins), sum(fixed_dists[0].bins))

            sss = [ss, ss2, ss3]

            for sset in sss:
                dists = getattr(sset.metadata.summaryStats, dist_name)
                self.assertEqual(len(dists), 1)
                # 0, requested nbins > numBins fails back to no-op
                ops = [1, 2, 3, 4, 7, 10, 40, 41, 49, 50, 51, 200, 500]
                no_ops = [0]
                for n in no_ops:
                    ds = continuous_dist_shaper(dists, nbins=n)
                    fixed_dists = [ds(dist) for dist in dists]
                    self.assertEqual(len(dists[0].bins), nbins)
                    self.assertEqual(len(fixed_dists[0].bins), nbins)
                    self.assertEqual(sum(dists[0].bins), sum(fixed_dists[0].bins))

                for n in ops:
                    ds = continuous_dist_shaper(dists, nbins=n)
                    fixed_dists = [ds(dist) for dist in dists]
                    self.assertEqual(len(dists[0].bins), nbins)
                    self.assertEqual(len(fixed_dists[0].bins), n)
                    self.assertEqual(sum(dists[0].bins), sum(fixed_dists[0].bins))
示例#4
0
def to_report(stats_xml, output_dir, dpi=72):
    """Main point of entry

    :type stats_xml: str
    :type output_dir: str
    :type dpi: int

    :rtype: Report
    """
    log.info("Analyzing XML {f}".format(f=stats_xml))
    # stats_xml should be a dataset:
    dset = SubreadSet(stats_xml)

    dataset_uuids = [dset.uuid]
    # but if it isn't, no problem:
    if not dset.metadata.summaryStats:
        dset.loadStats(stats_xml)
        # an sts file was provided which will generate a new random uuid
        dataset_uuids = []
    if not dset.metadata.summaryStats.readLenDists:
        raise IOError("Pipeline Summary Stats (sts.xml) not found or missing "
                      "key distributions")

    # we want all of the length distributions in this report to look the same,
    # so we make the shaper here and pass it around:
    alldists = (dset.metadata.summaryStats.readLenDists[:] +
                dset.metadata.summaryStats.insertReadLenDists[:])
    len_dist_shaper = continuous_dist_shaper(alldists, trim_excess=True)

    attr = to_read_stats_attributes(
        readLenDists=dset.metadata.summaryStats.readLenDists,
        readQualDists=dset.metadata.summaryStats.readQualDists)
    attr.extend(
        to_insert_stats_attributes(
            readLenDists=dset.metadata.summaryStats.insertReadLenDists,
            readQualDists=dset.metadata.summaryStats.insertReadQualDists))

    plot_groups = to_read_stats_plots(
        readLenDists=dset.metadata.summaryStats.readLenDists,
        readQualDists=dset.metadata.summaryStats.readQualDists,
        output_dir=output_dir,
        lenDistShaper=len_dist_shaper)
    plot_groups.extend(
        to_insert_stats_plots(
            readLenDists=dset.metadata.summaryStats.insertReadLenDists,
            readQualDists=dset.metadata.summaryStats.insertReadQualDists,
            output_dir=output_dir,
            lenDistShaper=len_dist_shaper))

    # build the report:
    report = Report(meta_rpt.id,
                    title=meta_rpt.title,
                    attributes=attr,
                    plotgroups=plot_groups,
                    dataset_uuids=dataset_uuids)

    return meta_rpt.apply_view(report)
示例#5
0
def to_report(stats_xml, output_dir, dpi=72):
    """Main point of entry

    :type stats_xml: str
    :type output_dir: str
    :type dpi: int

    :rtype: Report
    """
    log.info("Analyzing XML {f}".format(f=stats_xml))
    # stats_xml should be a dataset:
    dset = SubreadSet(stats_xml)

    dataset_uuids = [dset.uuid]
    # but if it isn't, no problem:
    if not dset.metadata.summaryStats:
        dset.loadStats(stats_xml)
        # an sts file was provided which will generate a new random uuid
        dataset_uuids = []
    if not dset.metadata.summaryStats.readLenDists:
        raise IOError("Pipeline Summary Stats (sts.xml) not found or missing "
                      "key distributions")


    # we want all of the length distributions in this report to look the same,
    # so we make the shaper here and pass it around:
    alldists = (dset.metadata.summaryStats.readLenDists[:] +
                dset.metadata.summaryStats.insertReadLenDists[:])
    len_dist_shaper = continuous_dist_shaper(alldists, trim_excess=True)

    attr = to_read_stats_attributes(
        readLenDists=dset.metadata.summaryStats.readLenDists,
        readQualDists=dset.metadata.summaryStats.readQualDists)
    attr.extend(to_insert_stats_attributes(
        readLenDists=dset.metadata.summaryStats.insertReadLenDists,
        readQualDists=dset.metadata.summaryStats.insertReadQualDists))

    plot_groups = to_read_stats_plots(
        readLenDists=dset.metadata.summaryStats.readLenDists,
        readQualDists=dset.metadata.summaryStats.readQualDists,
        output_dir=output_dir,
        lenDistShaper=len_dist_shaper)
    plot_groups.extend(to_insert_stats_plots(
        readLenDists=dset.metadata.summaryStats.insertReadLenDists,
        readQualDists=dset.metadata.summaryStats.insertReadQualDists,
        output_dir=output_dir,
        lenDistShaper=len_dist_shaper))

    # build the report:
    report = Report(meta_rpt.id,
                    title=meta_rpt.title,
                    attributes=attr,
                    plotgroups=plot_groups,
                    dataset_uuids=dataset_uuids)

    return meta_rpt.apply_view(report)
示例#6
0
def to_report(stats_xml, output_dir, dpi=72):
    # TODO: make dpi matter
    """Main point of entry

    :type stats_xml: str
    :type output_dir: str
    :type dpi: int

    :rtype: Report
    """
    log.info("Analyzing XML {f}".format(f=stats_xml))
    dset = SubreadSet(stats_xml)
    if not dset.metadata.summaryStats:
        dset.loadStats(stats_xml)
    if not dset.metadata.summaryStats.medianInsertDists:
        raise IOError("Pipeline Summary Stats (sts.xml) not found or missing "
                      "key distributions")

    # Pull some stats:
    adapter_dimers = np.round(
        100.0 * dset.metadata.summaryStats.adapterDimerFraction,
        decimals=2)
    short_inserts = np.round(
        100.0 * dset.metadata.summaryStats.shortInsertFraction,
        decimals=2)

    plots = []
    # Pull some histograms (may have dupes (unmergeable distributions)):
    shaper = continuous_dist_shaper(dset.metadata.summaryStats.medianInsertDists)
    for i, orig_ins_len_dist in enumerate(
            dset.metadata.summaryStats.medianInsertDists):
        ins_len_dist = shaper(orig_ins_len_dist)
        # make a bar chart:
        fig, ax = get_fig_axes_lpr()
        ax.bar(map(float, ins_len_dist.labels), ins_len_dist.bins,
               color=get_green(0), edgecolor=get_green(0),
               width=(ins_len_dist.binWidth * 0.75))
        ax.set_xlabel(meta_rpt.get_meta_plotgroup(Constants.PG_ADAPTER).get_meta_plot(Constants.P_ADAPTER).xlabel)
        ax.set_ylabel(meta_rpt.get_meta_plotgroup(Constants.PG_ADAPTER).get_meta_plot(Constants.P_ADAPTER).ylabel)
        png_fn = os.path.join(output_dir,
                              "interAdapterDist{i}.png".format(i=i))
        png_base, thumbnail_base = save_figure_with_thumbnail(fig, png_fn,
                                                              dpi=dpi)

        # build the report:
        plots.append(Plot("adapter_xml_plot_{i}".format(i=i),
                          os.path.relpath(png_base, output_dir),
                          thumbnail=os.path.relpath(thumbnail_base, output_dir)))

    plot_groups = [PlotGroup(Constants.PG_ADAPTER,
                             plots=plots,
                             thumbnail=os.path.relpath(thumbnail_base, output_dir))]
    attributes = [Attribute(i, v) for i,v in
        zip([Constants.A_DIMERS, Constants.A_SHORT_INSERTS],
            [adapter_dimers, short_inserts])]

    tables = []

    report = Report(meta_rpt.id,
                    title=meta_rpt.title,
                    attributes=attributes,
                    tables=tables,
                    )#plotgroups=plot_groups)

    return meta_rpt.apply_view(report)
示例#7
0
def _to_read_stats_plots(PlotConstants,
                         title,
                         readLenDists,
                         readQualDists,
                         output_dir,
                         dpi=72,
                         lenDistShaper=None):
    length_plots = []
    # ReadLen distribution to barplot:
    if lenDistShaper is None:
        lenDistShaper = continuous_dist_shaper(readLenDists, trim_excess=True)
    for i, orig_rlendist in enumerate(readLenDists):
        rlendist = lenDistShaper(orig_rlendist)
        assert sum(orig_rlendist.bins) == sum(rlendist.bins)
        len_fig, len_axes = get_fig_axes_lpr()
        len_axes.bar(rlendist.labels,
                     rlendist.bins,
                     color=get_green(0),
                     edgecolor=get_green(0),
                     width=(rlendist.binWidth * 0.75))
        len_axes.set_xlabel(
            meta_rpt.get_meta_plotgroup(PlotConstants.PG_LENGTH).get_meta_plot(
                PlotConstants.P_LENGTH).xlabel)
        len_axes.set_ylabel(
            meta_rpt.get_meta_plotgroup(PlotConstants.PG_LENGTH).get_meta_plot(
                PlotConstants.P_LENGTH).ylabel)
        png_fn = os.path.join(
            output_dir, "{p}{i}.png".format(i=i,
                                            p=PlotConstants.P_LENGTH_PREFIX))
        png_base, thumbnail_base = save_figure_with_thumbnail(len_fig,
                                                              png_fn,
                                                              dpi=dpi)
        length_plots.append(
            Plot("{p}_{i}".format(i=i, p=PlotConstants.P_LENGTH),
                 os.path.relpath(png_base, output_dir),
                 thumbnail=os.path.relpath(thumbnail_base, output_dir)))
    plot_groups = [
        PlotGroup(PlotConstants.PG_LENGTH,
                  plots=length_plots,
                  thumbnail=os.path.relpath(thumbnail_base, output_dir))
    ]
    return plot_groups

    # FIXME these aren't useful yet
    qual_plots = []
    # ReadQual distribution to barplot:
    shaper = continuous_dist_shaper(readQualDists, trim_excess=True)
    for i, orig_rqualdist in enumerate(readQualDists):
        rqualdist = shaper(orig_rqualdist)
        qual_fig, qual_axes = get_fig_axes_lpr()
        qual_axes.bar(rqualdist.labels,
                      rqualdist.bins,
                      color=get_green(0),
                      edgecolor=get_green(0),
                      width=(rqualdist.binWidth * 0.75))
        qual_axes.set_xlabel(
            meta_rpt.get_meta_plotgroup(PlotConstants.PG_LENGTH).get_meta_plot(
                PlotConstants.P_LENGTH).xlabel)
        qual_axes.set_ylabel(
            meta_rpt.get_meta_plotgroup(PlotConstants.PG_QUAL).get_meta_plot(
                PlotConstants.P_QUAL).ylabel)
        png_fn = os.path.join(
            output_dir, "{p}{i}.png".format(i=i,
                                            p=PlotConstants.P_QUAL_PREFIX))
        png_base, thumbnail_base = save_figure_with_thumbnail(qual_fig,
                                                              png_fn,
                                                              dpi=dpi)
        qual_plots.append(
            Plot("{p}_{i}".format(i=i, p=PlotConstants.P_QUAL),
                 os.path.relpath(png_base, output_dir),
                 thumbnail=os.path.relpath(thumbnail_base, output_dir)))
    plot_groups.append(PlotGroup(PlotConstants.PG_QUAL, plots=qual_plots))
    return plot_groups
示例#8
0
def to_report_impl(dset, output_dir, dpi=DEFAULT_DPI):
    if not dset.metadata.summaryStats.medianInsertDists:
        raise InvalidStatsError("Pipeline Summary Stats (sts.xml) not found "
                                "or missing key distributions")

    # Pull some stats:
    adapter_dimers = np.round(100.0 *
                              dset.metadata.summaryStats.adapterDimerFraction,
                              decimals=2)
    short_inserts = np.round(100.0 *
                             dset.metadata.summaryStats.shortInsertFraction,
                             decimals=2)
    attributes = [
        Attribute(i, v)
        for i, v in zip([Constants.A_DIMERS, Constants.A_SHORT_INSERTS],
                        [adapter_dimers, short_inserts])
    ]

    if Constants.BASE_RATE_DIST in dset.metadata.summaryStats.tags:
        dist = dset.metadata.summaryStats[Constants.BASE_RATE_DIST]
        if len(dist) > 1:
            log.warn("Dataset was merged, local base rate not applicable")
        else:
            base_rate = dist[0].sampleMed
            attributes.append(Attribute(Constants.A_BASE_RATE, base_rate))
    else:
        log.warn("No local base rate distribution available")

    plots = []
    # Pull some histograms (may have dupes (unmergeable distributions)):
    shaper = continuous_dist_shaper(
        dset.metadata.summaryStats.medianInsertDists)
    for i, orig_ins_len_dist in enumerate(
            dset.metadata.summaryStats.medianInsertDists):
        ins_len_dist = shaper(orig_ins_len_dist)
        # make a bar chart:
        fig, ax = get_fig_axes_lpr()
        ax.bar(map(float, ins_len_dist.labels),
               ins_len_dist.bins,
               color=get_green(0),
               edgecolor=get_green(0),
               width=(ins_len_dist.binWidth * 0.75))
        ax.set_xlabel(
            get_plot_xlabel(spec, Constants.PG_ADAPTER, Constants.P_ADAPTER))
        ax.set_ylabel(
            get_plot_ylabel(spec, Constants.PG_ADAPTER, Constants.P_ADAPTER))
        png_fn = os.path.join(output_dir,
                              "interAdapterDist{i}.png".format(i=i))
        png_base, thumbnail_base = save_figure_with_thumbnail(fig,
                                                              png_fn,
                                                              dpi=dpi)

        # build the report:
        plots.append(
            Plot("adapter_xml_plot_{i}".format(i=i),
                 os.path.relpath(png_base, output_dir),
                 thumbnail=os.path.relpath(thumbnail_base, output_dir)))

    plot_groups = [
        PlotGroup(Constants.PG_ADAPTER,
                  plots=plots,
                  thumbnail=os.path.relpath(thumbnail_base, output_dir))
    ]
    tables = []

    report = Report(
        Constants.R_ID,
        attributes=attributes,
        tables=tables,
    )  # plotgroups=plot_groups)

    return spec.apply_view(report)