def test_to_dict(self):
        """
        The id of report sub elements is prepended with the id of the parent
        element when to_dict is called.
        """
        r = Report("redfang")
        a = Attribute("a", "b")
        a2 = Attribute("a2", "b2")
        r.add_attribute(a)
        r.add_attribute(a2)

        pg = PlotGroup("pgid")
        pg.add_plot(Plot("pid", "anImg"))
        pg.add_plot(Plot("pid2", "anImg2"))
        r.add_plotgroup(pg)

        t = Table("tabid")
        t.add_column(Column("c1"))
        r.add_table(t)

        d = r.to_dict()

        log.debug("\n" + pformat(d))

        self.assertEqual("redfang", d["id"])
        self.assertEqual("redfang.a", d["attributes"][0]["id"])
        self.assertEqual("redfang.a2", d["attributes"][1]["id"])
        self.assertEqual("redfang.pgid", d["plotGroups"][0]["id"])
        self.assertEqual("redfang.pgid.pid", d["plotGroups"][0]["plots"][0]["id"])
        self.assertEqual("redfang.pgid.pid2", d["plotGroups"][0]["plots"][1]["id"])

        self.assertEqual("redfang.tabid", d["tables"][0]["id"])
        self.assertEqual("redfang.tabid.c1", d["tables"][0]["columns"][0]["id"])
示例#2
0
    def test_to_dict_multi(self):
        """
        Multiple complex elements.
        The id of report sub elements is prepended with the id of the parent
        element when to_dict is called.
        """
        r = Report('redfang')
        a = Attribute('a', 'b')
        a2 = Attribute('a2', 'b2')
        r.add_attribute(a)
        r.add_attribute(a2)

        pg = PlotGroup('pgid')
        pg.add_plot(Plot('pid', 'anImg'))
        pg.add_plot(Plot('pid2', 'anImg2'))
        r.add_plotgroup(pg)

        pg = PlotGroup('pgid2')
        pg.add_plot(Plot('pid2', 'anImg2'))
        pg.add_plot(Plot('pid22', 'anImg22'))
        r.add_plotgroup(pg)

        t = Table('tabid')
        t.add_column(Column('c1'))
        r.add_table(t)

        t = Table('tabid2')
        t.add_column(Column('c2'))
        r.add_table(t)

        d = r.to_dict()

        log.debug(str(d))

        self.assertEqual('redfang', d['id'])
        self.assertEqual('redfang.a', d['attributes'][0]['id'])
        self.assertEqual('redfang.a2', d['attributes'][1]['id'])

        self.assertEqual('redfang.pgid', d['plotGroups'][0]['id'])
        self.assertEqual('redfang.pgid.pid', d[
                         'plotGroups'][0]['plots'][0]['id'])
        self.assertEqual('redfang.pgid.pid2', d[
                         'plotGroups'][0]['plots'][1]['id'])

        self.assertEqual('redfang.pgid2', d['plotGroups'][1]['id'])
        self.assertEqual('redfang.pgid2.pid2', d[
                         'plotGroups'][1]['plots'][0]['id'])
        self.assertEqual('redfang.pgid2.pid22', d[
                         'plotGroups'][1]['plots'][1]['id'])

        self.assertEqual('redfang.tabid', d['tables'][0]['id'])
        self.assertEqual('redfang.tabid.c1', d['tables'][
                         0]['columns'][0]['id'])

        self.assertEqual('redfang.tabid2', d['tables'][1]['id'])
        self.assertEqual('redfang.tabid2.c2', d[
                         'tables'][1]['columns'][0]['id'])

        log.info(repr(r))
        self.assertIsNotNone(repr(r))
示例#3
0
    def test_to_dict(self):
        """
        The id of report sub elements is prepended with the id of the parent
        element when to_dict is called.
        """
        r = Report('redfang')
        a = Attribute('a', 'b')
        a2 = Attribute('a2', 'b2')
        r.add_attribute(a)
        r.add_attribute(a2)

        pg = PlotGroup('pgid')
        pg.add_plot(Plot('pid', 'anImg'))
        pg.add_plot(Plot('pid2', 'anImg2'))
        r.add_plotgroup(pg)

        t = Table('tabid')
        t.add_column(Column('c1'))
        r.add_table(t)

        d = r.to_dict()

        log.debug("\n" + pformat(d))

        self.assertEqual('redfang', d['id'])
        self.assertEqual('redfang.a', d['attributes'][0]['id'])
        self.assertEqual('redfang.a2', d['attributes'][1]['id'])
        self.assertEqual('redfang.pgid', d['plotGroups'][0]['id'])
        self.assertEqual('redfang.pgid.pid', d['plotGroups'][0]['plots'][0]['id'])
        self.assertEqual('redfang.pgid.pid2', d['plotGroups'][0]['plots'][1]['id'])

        self.assertEqual('redfang.tabid', d['tables'][0]['id'])
        self.assertEqual('redfang.tabid.c1', d['tables'][0]['columns'][0]['id'])
示例#4
0
    def test_get_plotgroup_by_id(self):
        r = Report('redfang')
        pg1 = PlotGroup('pgid1')
        pg1.add_plot(Plot('pid1', 'anImg'))
        r.add_plotgroup(pg1)

        pg = r.get_plotgroup_by_id('pgid1')
        self.assertEqual(pg, pg1)
示例#5
0
    def test_get_plotgroup_by_id_with_bad_id(self):
        r = Report('redfang')
        pg1 = PlotGroup('pgid1')
        pg1.add_plot(Plot('pid1', 'anImg'))
        r.add_plotgroup(pg1)

        bad_pg = r.get_plotgroup_by_id('id_that_does_not_exist')
        self.assertIsNone(bad_pg)
示例#6
0
    def test_get_plotgroup_by_id(self):
        r = Report('redfang')
        pg1 = PlotGroup('pgid1')
        pg1.add_plot(Plot('pid1', 'anImg'))
        r.add_plotgroup(pg1)

        pg = r.get_plotgroup_by_id('pgid1')
        self.assertEqual(pg, pg1)
示例#7
0
    def test_get_plotgroup_by_id_with_bad_id(self):
        r = Report('redfang')
        pg1 = PlotGroup('pgid1')
        pg1.add_plot(Plot('pid1', 'anImg'))
        r.add_plotgroup(pg1)

        bad_pg = r.get_plotgroup_by_id('id_that_does_not_exist')
        self.assertIsNone(bad_pg)
示例#8
0
    def test_get_plot_by_id(self):
        r = Report('redfang')
        pg1 = PlotGroup('pgid1')
        p1 = Plot('pid1', 'anImg')
        pg1.add_plot(p1)
        r.add_plotgroup(pg1)

        p = r.get_plotgroup_by_id('pgid1').get_plot_by_id('pid1')
        assert p == p1
示例#9
0
    def test_get_plot_by_id_with_bad_id(self):
        r = Report('redfang')
        pg1 = PlotGroup('pgid1')
        p1 = Plot('pid1', 'anImg')
        pg1.add_plot(p1)
        r.add_plotgroup(pg1)

        bad_p = r.get_plotgroup_by_id('pgid1').get_plot_by_id(
            'id_that_does_not_exist')
        assert bad_p is None
示例#10
0
    def test_to_dict_multi(self):
        """
        Multiple complex elements.
        The id of report sub elements is prepended with the id of the parent
        element when to_dict is called.
        """
        r = Report("redfang")
        a = Attribute("a", "b")
        a2 = Attribute("a2", "b2")
        r.add_attribute(a)
        r.add_attribute(a2)

        pg = PlotGroup("pgid")
        pg.add_plot(Plot("pid", "anImg"))
        pg.add_plot(Plot("pid2", "anImg2"))
        r.add_plotgroup(pg)

        pg = PlotGroup("pgid2")
        pg.add_plot(Plot("pid2", "anImg2"))
        pg.add_plot(Plot("pid22", "anImg22"))
        r.add_plotgroup(pg)

        t = Table("tabid")
        t.add_column(Column("c1"))
        r.add_table(t)

        t = Table("tabid2")
        t.add_column(Column("c2"))
        r.add_table(t)

        d = r.to_dict()

        log.debug(str(d))

        self.assertEqual("redfang", d["id"])
        self.assertEqual("redfang.a", d["attributes"][0]["id"])
        self.assertEqual("redfang.a2", d["attributes"][1]["id"])

        self.assertEqual("redfang.pgid", d["plotGroups"][0]["id"])
        self.assertEqual("redfang.pgid.pid", d["plotGroups"][0]["plots"][0]["id"])
        self.assertEqual("redfang.pgid.pid2", d["plotGroups"][0]["plots"][1]["id"])

        self.assertEqual("redfang.pgid2", d["plotGroups"][1]["id"])
        self.assertEqual("redfang.pgid2.pid2", d["plotGroups"][1]["plots"][0]["id"])
        self.assertEqual("redfang.pgid2.pid22", d["plotGroups"][1]["plots"][1]["id"])

        self.assertEqual("redfang.tabid", d["tables"][0]["id"])
        self.assertEqual("redfang.tabid.c1", d["tables"][0]["columns"][0]["id"])

        self.assertEqual("redfang.tabid2", d["tables"][1]["id"])
        self.assertEqual("redfang.tabid2.c2", d["tables"][1]["columns"][0]["id"])

        log.info(repr(r))
        self.assertIsNotNone(repr(r))
    def test_basic(self):
        p1 = Plot("p1", "image.png", thumbnail="thumb.png")
        p2 = Plot("p2", "image2.png", thumbnail="thumb2.png")
        plots = [p1, p2]
        title = "My Plots"
        legend = "Legend"
        thumbnail = p1.thumbnail
        pg = PlotGroup("my_id", title=title, legend=legend, thumbnail=thumbnail, plots=plots)

        d = pg.to_dict()
        validate_plot_group(d)
        self.assertIsNotNone(d)
示例#12
0
def to_report(ccs_set, output_dir):
    bam_files = list(ccs_set.toExternalFiles())
    log.info("Generating report from files: {f}".format(f=bam_files))
    bam_stats, movie_names = _stats_from_dataset(ccs_set)
    movie_results = _stats_to_movie_results(bam_stats, movie_names)
    log.debug("\n" + pformat(movie_results))

    rs = [m.read_lengths for m in movie_results]
    readlengths = np.concatenate(rs)
    ac = [m.accuracies for m in movie_results]
    accuracies = np.concatenate(ac)
    ps = [m.num_passes for m in movie_results]
    num_passes = np.concatenate(ps)

    readlength_plot = create_readlength_plot(readlengths, output_dir)
    accuracy_plot = create_accuracy_plot(accuracies, output_dir)
    npasses_plot = create_npasses_plot(num_passes, output_dir)
    scatter_plot = create_scatter_plot((num_passes, accuracies), output_dir)

    readlength_group = PlotGroup(Constants.PG_READLENGTH,
                                 plots=[readlength_plot],
                                 thumbnail=readlength_plot.thumbnail)
    accuracy_group = PlotGroup(Constants.PG_ACCURACY,
                               plots=[accuracy_plot],
                               thumbnail=accuracy_plot.thumbnail)

    npasses_group = PlotGroup(Constants.PG_NPASSES,
                              plots=[npasses_plot],
                              thumbnail=npasses_plot.thumbnail)

    scatter_group = PlotGroup(Constants.PG_SCATTER,
                              plots=[scatter_plot],
                              thumbnail=scatter_plot.thumbnail)

    movie_table = _movie_results_to_table(movie_results)
    log.info(str(movie_table))
    tables = [movie_table]
    if ccs_set.isBarcoded:
        tables.append(_make_barcode_table(bam_stats, ccs_set))

    attributes = _movie_results_to_attributes(movie_results)

    report = Report(Constants.R_ID,
                    title="CCS Report",
                    tables=tables,
                    attributes=attributes,
                    plotgroups=[
                        readlength_group, accuracy_group, npasses_group,
                        scatter_group
                    ],
                    dataset_uuids=(ccs_set.uuid, ))

    return meta_rpt.apply_view(report)
    def test_basic(self):
        p1 = Plot('p1', 'image.png', thumbnail='thumb.png')
        p2 = Plot('p2', 'image2.png', thumbnail='thumb2.png')
        plots = [p1, p2]
        title = "My Plots"
        legend = "Legend"
        thumbnail = p1.thumbnail
        pg = PlotGroup('my_id', title=title, legend=legend,
                       thumbnail=thumbnail, plots=plots)

        d = pg.to_dict()
        validate_plot_group(d)
        self.assertIsNotNone(d)
    def test_to_dict(self):
        """Test plotGroup to_dict function."""
        a = PlotGroup('123', title='foo title', legend='foo legend',
                      thumbnail='foo thumbnail')
        a.add_plot(Plot('id', 'i1', caption='a caption'))

        d = a.to_dict()
        log.debug(pformat(d))

        self.assertEquals('123', d['id'])
        self.assertEquals('foo title', d['title'])
        self.assertEquals('foo legend', d['legend'])
        self.assertEquals('foo thumbnail', d['thumbnail'])
        self.assertEquals(1, len(d['plots']))
        log.info(a)
        self.assertIsNotNone(repr(a))
示例#15
0
 def _create_coverage_histo_plot_grp(self, stats, output_dir):
     """
     Returns io.model.PlotGroup object
     Create the plotGroup element that contains the coverage plot histogram
     :param stats: (ReferenceStats) see _get_reference_coverage_stats
     :param output_dir: (string) where to write images
     """
     fig, ax = self._create_histogram(stats)
     fname, thumb = [
         os.path.basename(f) for f in save_figure_with_thumbnail(
             fig, os.path.join(output_dir, 'coverage_histogram.png'))
     ]
     plot = Plot(Constants.P_COVERAGE_HIST,
                 fname,
                 caption=get_plot_caption(self.spec,
                                          Constants.PG_COVERAGE_HIST,
                                          Constants.P_COVERAGE_HIST),
                 title=get_plot_title(self.spec, Constants.PG_COVERAGE_HIST,
                                      Constants.P_COVERAGE_HIST))
     plot_group = PlotGroup(Constants.PG_COVERAGE_HIST,
                            thumbnail=thumb,
                            plots=[plot],
                            title=get_plotgroup_title(
                                self.spec, Constants.PG_COVERAGE_HIST))
     return plot_group
示例#16
0
def to_readlen_plotgroup(readlen_dist, output_dir):
    plot_name = get_plot_title(
        spec, Constants.PG_READLENGTH, Constants.P_READLENGTH)
    x_label = get_plot_xlabel(
        spec, Constants.PG_READLENGTH, Constants.P_READLENGTH)
    y_label = get_plot_ylabel(
        spec, Constants.PG_READLENGTH, Constants.P_READLENGTH)
    nbins = readlen_dist.numBins
    heights = readlen_dist.bins
    bin_width = readlen_dist.binWidth
    edges = [float(bn) * bin_width for bn in xrange(nbins)]
    edges, heights, bin_width = reshape(readlen_dist, edges, heights)
    fig, ax = get_fig_axes_lpr()
    if sum(readlen_dist.bins) > 0:
        ax.bar(edges, heights, color=get_green(0),
               edgecolor=get_green(0), width=(bin_width * 0.75))
    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label)
    ax.yaxis.set_major_locator(MaxNLocator(integer=True))
    png_fn = os.path.join(
        output_dir, "{p}.png".format(p=Constants.P_READLENGTH))
    png_base, thumbnail_base = save_figure_with_thumbnail(fig, png_fn, dpi=DEFAULT_DPI)
    readlen_plot = Plot(Constants.P_READLENGTH,
                        os.path.relpath(png_base, output_dir),
                        title=plot_name, caption=plot_name,
                        thumbnail=os.path.relpath(thumbnail_base, output_dir))
    plot_groups = [PlotGroup(Constants.PG_READLENGTH, plots=[readlen_plot])]
    return plot_groups
    def test_basic(self):
        p1 = Plot('p1', 'image.png', thumbnail='thumb.png')
        p2 = Plot('p2', 'image2.png', thumbnail='thumb2.png')
        plots = [p1, p2]
        title = "My Plots"
        legend = "Legend"
        thumbnail = p1.thumbnail
        pg = PlotGroup('my_id',
                       title=title,
                       legend=legend,
                       thumbnail=thumbnail,
                       plots=plots)

        d = pg.to_dict()
        validate_plot_group(d)
        self.assertIsNotNone(d)
示例#18
0
def fasta_to_plot_group(fasta_file, output_dir):
    lengths = []
    with FastaReader(fasta_file) as f:
        for record in f:
            lengths.append(len(record.sequence))

    from pbreports.plot.helper import get_fig_axes  #pylint: disable=import-error
    from pbcommand.models.report import PlotGroup, Plot
    fig, ax = get_fig_axes()

    if len(lengths) == 1:
        v = lengths[0]
        hrange = (v - 1, v + 1)
        ax.hist(lengths, range=hrange)
    else:
        ax.hist(lengths)

    ax.set_title("Sequence Length Histogram")
    ax.set_xlabel("Sequence Length")

    name = "sequence_length_hist.png"
    png_path = os.path.join(output_dir, name)
    fig.savefig(png_path)
    plots = [Plot("sequence_lengths", name)]
    pg = PlotGroup("reference_hist", "Sequence Lengths", plots=plots)
    return pg
示例#19
0
def to_hq_hist_plot(hqbasefraction_dist, output_dir):
    plot_name = get_plot_title(spec, Constants.PG_HQ, Constants.P_HQ)
    x_label = get_plot_xlabel(spec, Constants.PG_HQ, Constants.P_HQ)
    y_label = get_plot_ylabel(spec, Constants.PG_HQ, Constants.P_HQ)
    nbins = int(hqbasefraction_dist['NumBins'].metavalue)
    bin_counts = hqbasefraction_dist['BinCounts']
    heights = [int(bc.metavalue) for bc in bin_counts]
    edges = [float(bn) / float(nbins) for bn in xrange(nbins)]
    bin_width = float(hqbasefraction_dist['BinWidth'].metavalue)
    fig, ax = get_fig_axes_lpr()
    ax.bar(edges,
           heights,
           color=get_green(0),
           edgecolor=get_green(0),
           width=(bin_width * 0.75))
    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label)
    png_fn = os.path.join(output_dir, "{p}.png".format(p=Constants.P_HQ))
    png_base, thumbnail_base = save_figure_with_thumbnail(fig,
                                                          png_fn,
                                                          dpi=DEFAULT_DPI)
    hq_plot = Plot(Constants.P_HQ,
                   os.path.relpath(png_base, output_dir),
                   title=plot_name,
                   caption=plot_name,
                   thumbnail=os.path.relpath(thumbnail_base, output_dir))
    plot_groups = [PlotGroup(Constants.PG_HQ, plots=[hq_plot])]
    return plot_groups
示例#20
0
def addQmodMotifHist(csvFile,
                     kinData,
                     outputFolder,
                     dpi=DEFAULT_DPI,
                     max_motifs=10):

    # Apart from passing in motif_summary.csv file name, nearly identical to
    # addQmodHist

    image_name = os.path.join(outputFolder, Constants.I_MOTIFS_QMOD)

    # Generate modification detection plot
    fig, ax = plotMotifHist(csvFile, kinData, max_motifs=max_motifs)

    png, thumbpng = PH.save_figure_with_thumbnail(fig, image_name, dpi=dpi)

    log.info((png, thumbpng))

    plot = Plot(Constants.P_MOD_QV,
                image=os.path.basename(png),
                thumbnail=os.path.basename(thumbpng))

    pg = PlotGroup(Constants.PG_MOD_QV,
                   title=get_plotgroup_title(spec, Constants.PG_MOD_QV),
                   plots=[plot],
                   thumbnail=os.path.basename(thumbpng))
    return pg
示例#21
0
def make_report(in_fn, out_dir='.', bounds=None, nolegend=False,
                reference=None, dpi=60, name=None):
    """AlignmentToPng Report

    Convert an input bam or DataSet XML file to a figure of Concordance vs.
    Subread Length.

    Args:
        in_fn: the bam, DataSet XML or cmp.h5 file to turn into a length vs
               concordance plot
        out_dir: the output directory to be used with the file name or default
        name: the file name to be used with the outdir or default (no full
              path filenames!)
        bounds: the figure limits (in xmin:xmax:ymin:ymax)
        nolegend: exclude the figure legend
        reference: the reference to use in the figure. Default of all
                   references
        dpi: the dots per inch (resolution) of the figure
    """

    data = _read_in_file(in_fn, reference)
    report = Report('alignment_to_png_report')

    if not name:
        name = '%s.png' % os.path.splitext(os.path.basename(in_fn))[0]
    png_fn = os.path.join(out_dir, name)
    _make_plot(data, png_fn, bounds, dpi, nolegend)
    plot_group = PlotGroup(Constants.PLOT_GROUP_ID,
                           plots=[Plot('alignment_to_png_plot',
                                       os.path.basename(png_fn))])
    report.add_plotgroup(plot_group)
    return report
    def test_to_dict(self):
        """Test plotGroup to_dict function."""
        a = PlotGroup('123',
                      title='foo title',
                      legend='foo legend',
                      thumbnail='foo thumbnail')
        a.add_plot(Plot('id', 'i1', caption='a caption'))

        d = a.to_dict()
        log.debug(pformat(d))

        assert '123' == d['id']
        assert 'foo title' == d['title']
        assert 'foo legend' == d['legend']
        assert 'foo thumbnail' == d['thumbnail']
        assert 1 == len(d['plots'])
        log.info(a)
        assert repr(a) is not None
    def test_to_dict(self):
        """Test plotGroup to_dict function."""
        a = PlotGroup('123',
                      title='foo title',
                      legend='foo legend',
                      thumbnail='foo thumbnail')
        a.add_plot(Plot('id', 'i1', caption='a caption'))

        d = a.to_dict()
        log.debug(pformat(d))

        self.assertEquals('123', d['id'])
        self.assertEquals('foo title', d['title'])
        self.assertEquals('foo legend', d['legend'])
        self.assertEquals('foo thumbnail', d['thumbnail'])
        self.assertEquals(1, len(d['plots']))
        log.info(a)
        self.assertIsNotNone(repr(a))
示例#24
0
def _get_plot_group_length(control_data, sample_data, output_dir):
    """
    Create the quality plot group and return it.
    """
    fig = _create_length_figure(control_data, sample_data)
    fname = 'control_non-control_readlength.png'
    thumb = save_figure_with_thumbnail(fig, os.path.join(output_dir, fname))[1]
    plots = [Plot(Constants.P_LENGTH, fname)]
    pg = PlotGroup(Constants.PG_LENGTH,
                   thumbnail=os.path.basename(thumb),
                   plots=plots)
    return pg
示例#25
0
def makeReport(inReadsFN, inSummaryFN, outDir):
    """
    Generate a report with ID, tables, attributes and plot groups.

    inReadsFN --- an input FASTA file which has all consensus
    isoforms produced by pbtranscript.py cluster.
    This file is required to plot a read length histogram as part of
    the report:
         consensus_isoforms_readlength_hist.png

    inSummaryFN --- a summary TXT file with cluster attributes,
    including two attributes:
         number of consensus isoforms
         average length of consensus isoforms
    Attributes of the report are extracted from this file.

    """
    log.info("Plotting read length histogram from file: {f}".
             format(f=inReadsFN))

    # Collect read lengths of
    reader = ContigSet(inReadsFN)
    rs = [len(r.sequence) for r in reader]
    reader.close()
    readlengths = np.array(rs)

    # Plot read length histogram
    readlength_plot = create_readlength_plot(readlengths, outDir)
    readlength_group = PlotGroup(Constants.PG_READLENGTH,
                                 plots=[readlength_plot],
                                 thumbnail=readlength_plot.thumbnail)

    log.info("Plotting summary attributes from file: {f}".
             format(f=inSummaryFN))
    # Produce attributes based on summary.
    dataset_uuids = [ContigSet(inReadsFN).uuid]
    attributes = _report_to_attributes(inSummaryFN)
    r = load_report_from_json(inSummaryFN)
        # FIXME(nechols)(2016-03-22) not using the dataset UUIDs from these
        # reports; should we be?

    table = attributesToTable(attributes)
    log.info(str(table))

    # A report is consist of ID, tables, attributes, and plotgroups.
    report = Report(Constants.R_ID,
                    title=meta_rpt.title,
                    attributes=attributes,
                    plotgroups=[readlength_group],
                    dataset_uuids=dataset_uuids)

    return meta_rpt.apply_view(report)
示例#26
0
def make_report(hq_transcripts_file, lq_transcripts_file, output_dir):
    """
    Generate a report with ID, tables, attributes and plot groups.
    """
    log.info("Plotting read length histogram from files: {h} {l}".
             format(h=hq_transcripts_file, l=lq_transcripts_file))

    # Collect read lengths and average qvs
    ds_hq = TranscriptSet(hq_transcripts_file, strict=True)
    ds_lq = TranscriptSet(lq_transcripts_file, strict=True)
    readlengths, hq_qvs, lq_qvs = [], [], []
    for k, ds in enumerate([ds_hq, ds_lq]):
        for rec in ds:
            readlengths.append(float(rec.qLen))
            if ds is ds_hq:
                hq_qvs.append(rec.readScore)
            else:
                lq_qvs.append(rec.readScore)
    readlengths = np.array(readlengths)
    avgqvs = np.array(hq_qvs + lq_qvs)
    # Plot read length histogram
    readlength_plot = create_readlength_plot(readlengths, output_dir)
    readlength_group = PlotGroup(Constants.PG_READLENGTH,
                                 plots=[readlength_plot],
                                 thumbnail=readlength_plot.thumbnail)
    # Plot average qv histogram
    avgqv_plot = create_avgqv_plot(avgqvs, output_dir)
    avgqv_group = PlotGroup(Constants.PG_AVGQV,
                            plots=[avgqv_plot],
                            thumbnail=avgqv_plot.thumbnail)
    attributes = [
        Attribute(Constants.A_N_HQ_ID, value=len(ds_hq)),
        Attribute(Constants.A_N_LQ_ID, value=len(ds_lq))
    ]
    report = Report(Constants.R_ID,
                    attributes=attributes,
                    plotgroups=[readlength_group, avgqv_group],
                    dataset_uuids=[ds_hq.uuid, ds_lq.uuid])
    return spec.apply_view(report)
示例#27
0
def make_plots(bc_groups, base_dir):
    """
    Generate all plots, both 1D and 2D, and return a list of PlotGroups.
    """
    groups = [g for g in bc_groups if g.label != Constants.LABEL_NONE]
    groups.sort(lambda a, b: cmp(b.n_reads, a.n_reads))
    plot_nreads = make_nreads_line_plot(groups, base_dir)
    log.info("Generating 1D histograms...")
    plot_nreads_hist = make_nreads_histogram(groups, base_dir)
    plot_rl = make_readlength_histogram(groups, base_dir)
    log.info("Generating barcode quality score plots...")
    plot_bq = make_bcqual_histogram(groups, base_dir)
    bq_plots = [plot_bq]
    log.info("Generating 2D histograms...")
    plot_rl2d = make_readlength_hist2d(groups, base_dir)
    plot_bq = make_bcqual_hist2d(groups, base_dir)
    return [
        PlotGroup(Constants.PG_STATS,
                  plots=[plot_nreads, plot_nreads_hist, plot_rl]),
        PlotGroup(Constants.PG_BQ, plots=bq_plots),
        PlotGroup(Constants.PG_HIST2D, plots=[plot_rl2d, plot_bq])
    ]
示例#28
0
def _to_plot_group(d):
    id_ = _to_id(d['id'])
    legend = d.get('legend', None)
    thumbnail = d.get('thumbnail', None)
    # is this optional?
    title = d.get('title', None)

    if 'plots' in d:
        plots = [_to_plot(pd) for pd in d['plots']]
    else:
        plots = []

    return PlotGroup(id_, title=title, legend=legend, plots=plots,
                     thumbnail=thumbnail)
示例#29
0
def make_modifications_report(modifications_h5, report, output_dir, dpi=72):
    """
    Entry point to report generation.
    """
    basemods_h5 = h5py.File(modifications_h5)
    scatter = get_qmod_plot(basemods_h5, output_dir, dpi)
    hist = get_qmod_hist(basemods_h5, output_dir, dpi)
    pg = PlotGroup(Constants.PG_KIN,
                   title=get_plotgroup_title(spec, Constants.PG_KIN),
                   thumbnail=scatter.thumbnail,
                   plots=[scatter, hist])
    rpt = Report(spec.id, plotgroups=[pg])
    rpt = spec.apply_view(rpt)
    rpt.write_json(os.path.join(output_dir, report))
    return 0
示例#30
0
 def apply_plotgroup_view(self, plotgroup):
     legend = plotgroup.legend
     title = plotgroup.title
     if legend is None:
         legend = self.legend
     if title is None:
         title = self.title
     return PlotGroup(self.id,
                      title=title,
                      legend=legend,
                      thumbnail=plotgroup.thumbnail,
                      plots=[
                          self.get_meta_plot(p.id).apply_plot_view(p)
                          for p in plotgroup.plots
                      ])
示例#31
0
def _create_variants_plot_grp(top_contigs, var_map, output_dir):
    """
    Returns io.model.PlotGroup object
    Create the plotGroup element that contains variants plots of the top contigs.
    :param top_contigs: (list of Contig objects) sorted by contig size
    :param var_map: (dict string:ContigVariants) mapping of contig.header to ContigVariants object
    :param output_dir: (string) where to write images
    """
    plots = []
    thumbnail = None
    legend = None
    idx = 0
    for tc in top_contigs:
        if not tc.header in var_map:
            # no coverage of this contig
            continue
        ctg_var = var_map[tc.header]
        bars = _create_bars(ctg_var)
        if legend is None:
            legend = _get_legend_file(bars, output_dir)

        fig, ax = _create_contig_fig_ax(bars, _get_x_labels(ctg_var))

        fname = os.path.join(output_dir, ctg_var.file_name)
        if thumbnail is None:
            imgfiles = PH.save_figure_with_thumbnail(fig, fname)
            thumbnail = os.path.basename(imgfiles[1])
        else:
            fig.savefig(fname, dpi=DEFAULT_DPI)

        id_ = 'coverage_variants_{i}'.format(i=str(idx))
        caption = "Observed variants across {c}".format(c=ctg_var.name)
        plot = Plot(id_,
                    os.path.basename(fname),
                    title=caption,
                    caption=caption)
        plots.append(plot)
        idx += 1
        plt.close(fig)

    plot_group = PlotGroup(Constants.PG_VARIANTS,
                           thumbnail=thumbnail,
                           legend=legend,
                           plots=plots)
    return plot_group
示例#32
0
    def _create_coverage_plot_grp(self, top_contigs, cov_map, output_dir):
        """
        Returns io.model.PlotGroup object
        Create the plotGroup element that contains the coverage plots of the top contigs.
        :param top_contigs: (list of Contig objects) sorted by contig size
        :param cov_map: (dict string:ContigCoverage) mapping of contig.id to ContigCoverage object
        :param output_dir: (string) where to write images
        """
        plots = []
        thumbnail = None
        idx = 0
        log.debug('Creating plots for {n} top contig(s)'.format(
            n=str(len(top_contigs))))
        for tc in top_contigs:
            if not tc.id in cov_map:
                # no coverage of this contig
                log.debug('contig {c} has no coverage info '.format(c=tc.id))
                continue
            ctg_cov = cov_map[tc.id]
            fig, ax = self._create_contig_plot(ctg_cov)

            fname = os.path.join(output_dir, ctg_cov.file_name)
            if thumbnail is None:
                imgfiles = save_figure_with_thumbnail(fig, fname)
                thumbnail = os.path.basename(imgfiles[1])
            else:
                fig.savefig(fname)
            plt.close(fig)
            id_ = "coverage_contig_{i}".format(i=str(idx))
            caption = self.spec.get_plotgroup_spec(
                Constants.PG_COVERAGE).get_plot_spec(
                    Constants.P_COVERAGE).caption + " {c}."
            plot = Plot(id_,
                        os.path.basename(fname),
                        caption.format(c=ctg_cov.name),
                        title=caption.format(c=ctg_cov.name))
            plots.append(plot)
            idx += 1

        plot_group = PlotGroup(Constants.PG_COVERAGE,
                               title=get_plotgroup_title(
                                   self.spec, Constants.PG_COVERAGE),
                               thumbnail=thumbnail,
                               plots=plots)
        return plot_group
示例#33
0
def to_report(fastq_files, qv_hist=None, readlength_hist=None):
    """Generate a histogram of read lengths and quality values"""
    fastq_stats = fastq_files_to_stats(fastq_files)

    table = _generate_table(fastq_stats.values())
    log.debug(str(table))

    if qv_hist is not None:
        fig, ax = to_qv_histogram(fastq_stats.values())
        fig.savefig(qv_hist)
    if readlength_hist is not None:
        fig, ax = to_read_length_histogram(fastq_stats.values())
        fig.savefig(readlength_hist)
    plt.close(fig)
    readlength_hist_plot = Plot(Constants.P_RL, readlength_hist)
    plotgroup = PlotGroup(Constants.PG_RL, plots=[readlength_hist_plot])
    report = Report(Constats.R_ID, tables=[table], plotgroups=[plotgroup])
    return spec.apply_view(report)
示例#34
0
def _to_workflow_report(job_resources, bg, workflow_opts, task_opts, state, was_successful, plot_images):
    """
    Copy images to image local directory and return a pbreport Report

    """
    plot_groups = []
    if plot_images:
        plots = []
        for i, plot_image in enumerate(plot_images):
            html_image_abs = os.path.join(job_resources.images, os.path.basename(plot_image))
            shutil.copy(plot_image, html_image_abs)
            # Make the file path relative to images/my-image.png
            html_image = os.path.join(os.path.basename(job_resources.images), os.path.basename(plot_image))
            p = Plot("plot_{i}".format(i=i), html_image)
            plots.append(p)

        pg = PlotGroup("workflow_state_plots", plots=plots)
        plot_groups.append(pg)

    return Report("workflow_report", plotgroups=plot_groups)
示例#35
0
def to_mod_report(motif_summary_csv, output_dir):

    # Set up the modifications report
    #report = GraphReportItem()
    #report.title = 'Modifications'
    #graphGroup = GraphGroupItem(title ='Kinetic Detections')

    kinData = readModificationCsvGz(motif_summary_csv)

    p1 = addQmodPlot(kinData, output_dir)
    p2 = addQmodHist(kinData, output_dir)
    plots = [p1, p2]

    pg = PlotGroup(Constants.PG_MOD,
                   title=get_plotgroup_title(spec, Constants.PG_MOD),
                   plots=plots)

    r = Report(Constants.R_ID, plotgroups=[pg])

    return spec.apply_view(r)
示例#36
0
def make_polished_assembly_report(report, gff, fastq, output_dir):
    """
    Entry to report.
    :param gff: (str) path to alignment_summary.gff
    :param fastq: (str) path to polished fastq file
    :param report: (str) report name
    create a polished assembly report.
    """
    log.info("Starting version {f} v{x}".format(x=__version__,
                                                f=os.path.basename(__file__)))

    log.debug("Loading {f}".format(f=fastq))
    contigs = _get_contigs(fastq)

    log.debug("Loading {f}".format(f=gff))
    _get_contig_coverage(gff, contigs)

    log.debug("Computing and creating plots")

    cvqp = _coverage_vs_quality_plot(contigs, output_dir)

    pgrp = PlotGroup(Constants.PG_COVERAGE,
                     thumbnail=cvqp.thumbnail,
                     plots=[cvqp])

    rep = Report(Constants.R_ID)
    rep.add_attribute(Attribute(Constants.A_N_CONTIGS, len(contigs)))
    read_lengths = [c.length for c in contigs.values()]
    read_lengths.sort()
    rep.add_attribute(_get_att_max_contig_length(read_lengths))
    rep.add_attribute(_get_att_n_50_contig_length(read_lengths))
    rep.add_attribute(_get_att_sum_contig_lengths(read_lengths))
    rep.add_attribute(_get_att_esize_contig_length(read_lengths))
    rep.add_plotgroup(pgrp)
    rep = spec.apply_view(rep)

    rep.write_json(os.path.join(output_dir, report))
    _write_coverage_vs_quality_csv(contigs, output_dir)

    return 0
示例#37
0
def to_rl_overlay_plot(numunfilteredbasecalls_dist, readlen_dist, output_dir):
    plot_name = get_plot_title(spec, Constants.PG_RRL, Constants.P_RRL)
    x_label = get_plot_xlabel(spec, Constants.PG_RRL, Constants.P_RRL)
    y_label = get_plot_ylabel(spec, Constants.PG_RRL, Constants.P_RRL)
    unfiltered_bins = [
        int(bc.metavalue) for bc in numunfilteredbasecalls_dist['BinCounts']
    ]
    poly_bins = [int(bc.metavalue) for bc in readlen_dist['BinCounts']]
    max_unfiltered = len(unfiltered_bins) * \
        int(numunfilteredbasecalls_dist['BinWidth'].metavalue)
    max_poly = len(poly_bins) * int(readlen_dist['BinWidth'].metavalue)
    unfiltered_data = expand_data(unfiltered_bins, max_unfiltered)
    poly_data = expand_data(poly_bins, max_poly)
    fig, ax = get_fig_axes_lpr()
    ax.hist(unfiltered_data,
            label="Unfiltered",
            histtype='stepfilled',
            alpha=0.3,
            bins=len(unfiltered_bins),
            range=[0, max_unfiltered])
    ax.hist(poly_data,
            label="Polymerase",
            histtype='stepfilled',
            alpha=0.3,
            bins=len(poly_bins),
            range=[0, max_poly])
    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label)
    ax.legend()
    png_fn = os.path.join(output_dir, "{p}.png".format(p=Constants.P_RRL))
    png_base, thumbnail_base = save_figure_with_thumbnail(fig,
                                                          png_fn,
                                                          dpi=DEFAULT_DPI)
    rrl_plot = Plot(Constants.P_RRL,
                    os.path.relpath(png_base, output_dir),
                    title=plot_name,
                    caption=plot_name,
                    thumbnail=os.path.relpath(thumbnail_base, output_dir))
    plot_groups = [PlotGroup(Constants.PG_RRL, plots=[rrl_plot])]
    return plot_groups
示例#38
0
def to_plotgroup(plot_json, output_dir):
    n_samples = len(plot_json)
    if n_samples > 0:
        fig, ax = plt.subplots(n_samples,
                               2,
                               figsize=(15, n_samples * 5),
                               squeeze=False)
        od = collections.OrderedDict(sorted(plot_json.items()))
        counter = 0
        y_max = [0, 0]
        for sample, data in od.iteritems():
            add_subplots(fig, ax, sample, data, counter, y_max)
            counter += 1
        label_rows(fig, ax, od.keys())
        label_columns(fig, ax)
        for row in xrange(0, n_samples):
            ax[row, 0].set_ylim(top=y_max[0] * 1.1)
            ax[row, 1].set_ylim(top=y_max[1] * 1.1)
        p1 = mpatches.Patch(color='#FF7E79', linewidth=0)
        p2 = mpatches.Patch(color='#A9D18E', linewidth=0)
        fig.legend((p1, p2), ("Deletions", "Insertions"),
                   "upper left",
                   fontsize=15)
    else:
        fig = plt.figure()
    plot_name = get_plot_title(spec, Constants.PG_SV, Constants.P_SV)
    png_fn = os.path.join(output_dir, "{p}.png".format(p=Constants.P_SV))
    png_base, thumbnail_base = save_figure_with_thumbnail(fig,
                                                          png_fn,
                                                          dpi=DEFAULT_DPI,
                                                          bbox_inches='tight')
    plot = Plot(Constants.P_SV,
                os.path.relpath(png_base, output_dir),
                title=plot_name,
                caption=plot_name,
                thumbnail=os.path.relpath(thumbnail_base, output_dir))
    plot_group = PlotGroup(Constants.PG_SV, plots=[plot])

    return plot_group
示例#39
0
def make_modifications_report(modifications_h5,
                              report,
                              output_dir,
                              dpi=DEFAULT_DPI):
    """
    Entry point to report generation.
    """
    try:
        import h5py
    except ImportError:
        raise ImportError("This module requires that h5py be installed")
    basemods_h5 = h5py.File(modifications_h5)
    scatter = get_qmod_plot(basemods_h5, output_dir, dpi)
    hist = get_qmod_hist(basemods_h5, output_dir, dpi)
    pg = PlotGroup(Constants.PG_KIN,
                   title=get_plotgroup_title(spec, Constants.PG_KIN),
                   thumbnail=scatter.thumbnail,
                   plots=[scatter, hist])
    rpt = Report(spec.id, plotgroups=[pg])
    rpt = spec.apply_view(rpt)
    rpt.write_json(os.path.join(output_dir, report))
    return 0
 def _test():
     pg = PlotGroup('foo')
     pg.add_plot(Plot('id', 'i1'))
     pg.add_plot(Plot('id', 'i2'))
示例#41
0
    def to_report(self, output_dir, report_id=Constants.R_ID):
        """
        This needs to be cleaned up. Keeping the old interface for testing purposes.
        """
        started_at = time.time()

        log.info("Found {n} movies.".format(n=len(self.movies)))

        log.info("Working from {n} alignment file{s}: {f}".format(
            n=len(self.alignment_file_list),
            s='s' if len(self.alignment_file_list) > 1 else '',
            f=self.alignment_file_list))

        # make this a dict {attribute_key_name:Aggreggator} so it's easy to
        # access the instances after they've been computed.
        # there's duplicated keys in the attributes?
        # number_of_aligned_reads/mapped_reads_n
        _total_aggregators = self._get_total_aggregators()
        null_filter = lambda r: True
        total_model = StatisticsModel(
            _total_aggregators.values(), filter_func=null_filter)

        # need to create specific instances for a given movie. This is used to
        # create the mapping reports stats table
        movie_models = {}

        def _my_filter(movie_name1, movie_name2):
            return movie_name1 == movie_name2

        for movie in self.movies:
            ags = [k() for k in self.COLUMN_AGGREGATOR_CLASSES]
            # Note this WILL NOT work because of how scope works in python
            # filter_by_movie_func = lambda m_name: movie.name == m_name
            _my_filter_func = functools.partial(_my_filter, movie)
            model = StatisticsModel(ags, filter_func=_my_filter_func)
            movie_models[movie] = model

        # The statistic models that will be run
        all_models = [total_model] + movie_models.values()
        log.debug(all_models)

        # Run all the analysis. Now the aggregators can be accessed

        analyze_movies(self.movies, self.alignment_file_list, all_models)

        # temp structure used to create the report table. The order is
        # important

        # add total values
        _to_a = lambda k: _total_aggregators[k].attribute
        _row = [_to_a(n) for n in self.COLUMN_ATTR]
        _row.insert(0, 'All Movies')
        movie_datum = [_row]

        # Add each individual movie stats
        for movie_name_, model_ in movie_models.iteritems():
            _row = [movie_name_]
            for a in model_.aggregators:
                _row.append(a.attribute)
            movie_datum.append(_row)
        log.info(movie_datum)

        # create the Report table

        table = self._to_table(movie_datum)

        for movie_name, model in movie_models.iteritems():
            log.info("Movie name {n}".format(n=movie_name))
            for a in model.aggregators:
                log.info(movie_name + " " + repr(a))

        log.info("")
        log.info("Total models")
        for a in total_model.aggregators:
            log.info(a)

        attributes = get_attributes(_total_aggregators)

        log.info("Attributes from streaming mapping Report.")
        for a in attributes:
            log.info(a)

        plot_config_views = self._get_plot_view_configs()
        plot_groups = []

        ds = openDataFile(self.alignment_file)
        ds.updateCounts()
        if len(ds) > 0:
            # keeping the ids independent requires a bit of dictionary madness
            # {report_id:HistogramAggregator}
            id_to_aggregators = {k: _total_aggregators[v]
                                 for k, v in self.HISTOGRAM_IDS.iteritems()}
            plot_groups = to_plot_groups(plot_config_views, output_dir,
                                         id_to_aggregators)
            rb_pg = PlotGroup(Constants.PG_RAINBOW)
            rb_png = "mapped_concordance_vs_read_length.png"
            make_rainbow_plot(self.alignment_file, rb_png)
            rb_plt = Plot(Constants.P_RAINBOW, rb_png,
                          caption=get_plot_caption(spec, Constants.PG_RAINBOW,
                                                   Constants.P_RAINBOW))
            rb_pg.add_plot(rb_plt)
            plot_groups.append(rb_pg)
        self.add_more_plots(plot_groups, output_dir)

        tables = [table]
        report = Report(report_id,
                        attributes=attributes,
                        plotgroups=plot_groups,
                        tables=tables,
                        dataset_uuids=self.dataset_uuids)

        log.debug(report)

        run_time = time.time() - started_at
        log.info("Completed running in {s:.2f} sec.".format(s=run_time))
        return report