def test_make_adapter_report_sts_xml(self): """ Test make_adapter_report with an sts.xml file """ # All of the histogram generation code should be tested in # pbcore.io.dataset, not here. Just test the report. try: log.info( TestXMLstatsRpts.test_make_adapter_report_sts_xml.__doc__) sts_xml = data.getStats(1) rpt = make_adapter_report(sts_xml, self.get_output_dir()) d = json.loads(rpt.to_json()) t = d['tables'][0] c0 = t['columns'][0] c1 = t['columns'][1] self.assertEqual('Adapter Dimers (0-10bp)', c0['values'][0]) self.assertEqual('Short Inserts (11-100bp)', c0['values'][1]) self.assertEqual(0.0, c1['values'][0]) self.assertEqual(0.0, c1['values'][1]) self.assertTrue(os.path.exists(os.path.join( self.get_output_dir(), 'interAdapterDist0.png'))) except: log.error(traceback.format_exc()) raise
def test_make_loading_report_with_sts_xml(self): """ Test the content of the loading report generated from a sts.xml """ sts_xml = data.getStats(0) rpt = make_loading_report(sts_xml) d = json.loads(rpt.to_json()) t = d["tables"][0] c0 = t["columns"][0] c1 = t["columns"][1] c2 = t["columns"][2] c3 = t["columns"][3] c4 = t["columns"][4] self.assertEqual("Collection Context", c0["header"]) self.assertEqual("loading_xml_report.loading_xml_table." "collection_context", c0["id"]) self.assertEqual("NA", c0["values"][0]) self.assertEqual("Productive ZMWs", c1["header"]) self.assertEqual("loading_xml_report.loading_xml_table." "productive_zmws", c1["id"]) self.assertEqual(2876, c1["values"][0]) self.assertEqual("Productivity 0 (%)", c2["header"]) self.assertEqual("loading_xml_report.loading_xml_table." "productivity_0", c2["id"]) self.assertAlmostEqual(54.798, c2["values"][0], delta=0.0003) self.assertEqual("Productivity 1 (%)", c3["header"]) self.assertEqual("loading_xml_report.loading_xml_table." "productivity_1", c3["id"]) self.assertAlmostEqual(31.328, c3["values"][0], delta=0.0003) self.assertEqual("Productivity 2 (%)", c4["header"]) self.assertEqual("loading_xml_report.loading_xml_table." "productivity_2", c4["id"]) self.assertAlmostEqual(13.873, c4["values"][0], delta=0.0003)
def test_loadstats_cli(self): outfile = os.path.join(tempfile.mkdtemp(suffix="dataset-unittest"), 'withStats.alignmentset.xml') cmd = "dataset loadstats {d} {s} --outfile {o}".format( o=outfile, d=data.getXml(7), s=data.getStats()) self._run_cmd_with_output(cmd, outfile) aln = AlignmentSet(outfile) assert aln.metadata.summaryStats
def test_loadstats_cli(self): outfile = os.path.join(tempfile.mkdtemp(suffix="dataset-unittest"), 'withStats.alignmentset.xml') cmd = "dataset loadstats {d} {s} --outfile {o}".format( o=outfile, d=data.getXml(8), s=data.getStats()) log.debug(cmd) o, r, m = backticks(cmd) self.assertEqual(r, 0) self.assertTrue(os.path.exists(outfile)) aln = AlignmentSet(outfile) self.assertTrue(aln.metadata.summaryStats)
def test_loadstats_cli(self): outfile = os.path.join( tempfile.mkdtemp(suffix="dataset-unittest"), 'withStats.alignmentset.xml') cmd = "dataset loadstats {d} {s} --outfile {o}".format( o=outfile, d=data.getXml(8), s=data.getStats()) log.debug(cmd) o, r, m = backticks(cmd) self.assertEqual(r, 0) self.assertTrue(os.path.exists(outfile)) aln = AlignmentSet(outfile) self.assertTrue(aln.metadata.summaryStats)
def test_make_loading_report_with_sts_xml(self): """ Test the content of the loading report generated from a sts.xml """ try: log.info( TestXMLstatsRpts.test_make_loading_report_with_sts_xml.__doc__) sts_xml = data.getStats(0) rpt = make_loading_report(sts_xml) d = json.loads(rpt.to_json()) t = d['tables'][0] c0 = t['columns'][0] c1 = t['columns'][1] c2 = t['columns'][2] c3 = t['columns'][3] c4 = t['columns'][4] self.assertEqual('Collection Context', c0['header']) self.assertEqual('loading_xml_report.loading_xml_table.' 'collection_context', c0['id']) self.assertEqual('NA', c0['values'][0]) self.assertEqual('Productive ZMWs', c1['header']) self.assertEqual('loading_xml_report.loading_xml_table.' 'productive_zmws', c1['id']) self.assertEqual(2876, c1['values'][0]) self.assertEqual('Productivity 0 (%)', c2['header']) self.assertEqual('loading_xml_report.loading_xml_table.' 'productivity_0', c2['id']) self.assertAlmostEqual(54.80, c2['values'][0], delta=.0003) self.assertEqual('Productivity 1 (%)', c3['header']) self.assertEqual('loading_xml_report.loading_xml_table.' 'productivity_1', c3['id']) self.assertAlmostEqual(31.33, c3['values'][0], delta=.0003) self.assertEqual('Productivity 2 (%)', c4['header']) self.assertEqual('loading_xml_report.loading_xml_table.' 'productivity_2', c4['id']) self.assertAlmostEqual(13.87, c4['values'][0], delta=.0003) except: log.error(traceback.format_exc()) raise
def test_adapter_exit_code_0(self): log.info(TestXMLstatsRpts.test_adapter_exit_code_0.__doc__) sts_xml = data.getStats(0) cmd = 'adapter_xml {c} {r}'.format( r='foo.json', c=sts_xml) o, c, m = backticks(cmd) print "COMMAND: {c}".format(c=cmd) log.info(cmd) print "o: {o}".format(o=o) print "c: {c}".format(c=c) print "m: {m}".format(m=m) if c is not 0: log.error(m) log.error(o) self.assertEquals(0, c)
def test_make_filter_stats_report_sts_xml(self): """ Test the content of the filter report generated from an sts.xml """ try: log.info( TestXMLstatsRpts.test_make_filter_stats_report_sts_xml.__doc__) sts_xml = data.getStats(0) rpt = make_filter_report(sts_xml, self.get_output_dir()) d = json.loads(rpt.to_json()) t = d['tables'][0] c0 = t['columns'][0] c1 = t['columns'][1] self.assertEqual('Metrics', c0['header']) self.assertEqual('filtering_stats_xml_report.filter_xml_table.' 'filter_names_column', c0['id']) self.assertEqual('Polymerase Read Bases', c0['values'][0]) self.assertEqual('Polymerase Reads', c0['values'][1]) self.assertEqual('Polymerase Read N50', c0['values'][2]) self.assertEqual('Polymerase Read Length', c0['values'][3]) self.assertEqual('Polymerase Read Quality', c0['values'][4]) self.assertEqual('Values', c1['header']) self.assertEqual('filtering_stats_xml_report.filter_xml_table.' 'filter_stats_column', c1['id']) self.assertEqual(4464266.29, c1['values'][0]) self.assertEqual(901, c1['values'][1]) self.assertEqual(6570, c1['values'][2]) self.assertEqual(4954.79, c1['values'][3]) self.assertEqual(0.83, c1['values'][4]) self.assertTrue(os.path.exists(os.path.join( self.get_output_dir(), 'readLenDist0.png'))) self.assertTrue(os.path.exists(os.path.join( self.get_output_dir(), 'readQualDist0.png'))) # these are from a raw STS file self.assertEqual(len(rpt._dataset_uuids), 0, "Incorrect report datasets uuids") except: log.error(traceback.format_exc()) raise
def test_make_loading_report_with_sts_xml(self): """ Test the content of the loading report generated from a sts.xml """ sts_xml = data.getStats(0) rpt = make_loading_report(sts_xml) d = json.loads(rpt.to_json()) t = d['tables'][0] c0 = t['columns'][0] c1 = t['columns'][1] c2 = t['columns'][2] c3 = t['columns'][3] c4 = t['columns'][4] self.assertEqual('Collection Context', c0['header']) self.assertEqual('loading_xml_report.loading_xml_table.' 'collection_context', c0['id']) self.assertEqual('NA', c0['values'][0]) self.assertEqual('Productive ZMWs', c1['header']) self.assertEqual('loading_xml_report.loading_xml_table.' 'productive_zmws', c1['id']) self.assertEqual(2876, c1['values'][0]) self.assertEqual('Productivity 0 (%)', c2['header']) self.assertEqual('loading_xml_report.loading_xml_table.' 'productivity_0', c2['id']) self.assertAlmostEqual(54.798, c2['values'][0], delta=.0003) self.assertEqual('Productivity 1 (%)', c3['header']) self.assertEqual('loading_xml_report.loading_xml_table.' 'productivity_1', c3['id']) self.assertAlmostEqual(31.328, c3['values'][0], delta=.0003) self.assertEqual('Productivity 2 (%)', c4['header']) self.assertEqual('loading_xml_report.loading_xml_table.' 'productivity_2', c4['id']) self.assertAlmostEqual(13.873, c4['values'][0], delta=.0003)
def test_make_loading_report_with_sts_xml(self): """ Test the content of the loading report generated from a sts.xml """ sts_xml = data.getStats(0) rpt = make_loading_report(sts_xml) d = json.loads(rpt.to_json()) t = d['tables'][0] c0 = t['columns'][0] c1 = t['columns'][1] c2 = t['columns'][2] c3 = t['columns'][3] c4 = t['columns'][4] self.assertEqual('Collection Context', c0['header']) self.assertEqual( 'loading_xml_report.loading_xml_table.' 'collection_context', c0['id']) self.assertEqual('NA', c0['values'][0]) self.assertEqual('Productive ZMWs', c1['header']) self.assertEqual( 'loading_xml_report.loading_xml_table.' 'productive_zmws', c1['id']) self.assertEqual(2876, c1['values'][0]) self.assertEqual('Productivity 0 (%)', c2['header']) self.assertEqual( 'loading_xml_report.loading_xml_table.' 'productivity_0', c2['id']) self.assertAlmostEqual(54.798, c2['values'][0], delta=.0003) self.assertEqual('Productivity 1 (%)', c3['header']) self.assertEqual( 'loading_xml_report.loading_xml_table.' 'productivity_1', c3['id']) self.assertAlmostEqual(31.328, c3['values'][0], delta=.0003) self.assertEqual('Productivity 2 (%)', c4['header']) self.assertEqual( 'loading_xml_report.loading_xml_table.' 'productivity_2', c4['id']) self.assertAlmostEqual(13.873, c4['values'][0], delta=.0003)
def test_stats_metadata(self): ds = DataSet(data.getBam()) ds.loadStats(data.getStats()) self.assertEqual(ds.metadata.summaryStats.prodDist.numBins, 4) self.assertEqual(ds.metadata.summaryStats.prodDist.bins, [1576, 901, 399, 0]) ds1 = DataSet(data.getXml(8)) ds1.loadStats(data.getStats()) ds2 = DataSet(data.getXml(11)) ds2.loadStats(data.getStats()) ds3 = ds1 + ds2 self.assertEqual(ds1.metadata.summaryStats.prodDist.bins, [1576, 901, 399, 0]) self.assertEqual(ds2.metadata.summaryStats.prodDist.bins, [1576, 901, 399, 0]) self.assertEqual(ds3.metadata.summaryStats.prodDist.bins, [3152, 1802, 798, 0]) self.assertEqual(ds1.metadata.summaryStats.readLenDist.bins, [0, 62, 39, 36, 29, 37, 19, 29, 37, 32, 32, 40, 45, 54, 73, 77, 97, 95, 49, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) self.assertEqual(ds2.metadata.summaryStats.readLenDist.bins, [0, 62, 39, 36, 29, 37, 19, 29, 37, 32, 32, 40, 45, 54, 73, 77, 97, 95, 49, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) self.assertEqual(ds3.metadata.summaryStats.readLenDist.bins, [0, 124, 78, 72, 58, 74, 38, 58, 74, 64, 64, 80, 90, 108, 146, 154, 194, 190, 98, 34, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) # Lets check some manual values ds1 = DataSet(data.getXml(8)) ds1.loadStats(data.getStats()) ds2 = DataSet(data.getXml(11)) ds2.loadStats(data.getStats()) ds1.metadata.summaryStats.readLenDist.bins = ( [0, 10, 9, 8, 7, 6, 4, 2, 1, 0, 0, 1]) self.assertEqual(ds1.metadata.summaryStats.readLenDist.bins, [0, 10, 9, 8, 7, 6, 4, 2, 1, 0, 0, 1]) ds1.metadata.summaryStats.readLenDist.minBinValue = 10 ds1.metadata.summaryStats.readLenDist.binWidth = 10 ds2.metadata.summaryStats.readLenDist.bins = ( [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1]) self.assertEqual(ds2.metadata.summaryStats.readLenDist.bins, [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1]) ds2.metadata.summaryStats.readLenDist.minBinValue = 20 ds2.metadata.summaryStats.readLenDist.binWidth = 10 ds3 = ds1 + ds2 self.assertEqual(ds3.metadata.summaryStats.readLenDist.bins, [0, 10, 10, 9, 8, 7, 5, 3, 2, 1, 0, 1, 1]) # now lets swap ds1 = DataSet(data.getXml(8)) ds1.loadStats(data.getStats()) ds2 = DataSet(data.getXml(11)) ds2.loadStats(data.getStats()) ds1.metadata.summaryStats.readLenDist.bins = ( [0, 10, 9, 8, 7, 6, 4, 2, 1, 0, 0, 1]) self.assertEqual(ds1.metadata.summaryStats.readLenDist.bins, [0, 10, 9, 8, 7, 6, 4, 2, 1, 0, 0, 1]) ds1.metadata.summaryStats.readLenDist.minBinValue = 20 ds1.metadata.summaryStats.readLenDist.binWidth = 10 ds2.metadata.summaryStats.readLenDist.bins = ( [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1]) self.assertEqual(ds2.metadata.summaryStats.readLenDist.bins, [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1]) ds2.metadata.summaryStats.readLenDist.minBinValue = 10 ds2.metadata.summaryStats.readLenDist.binWidth = 10 ds3 = ds1 + ds2 self.assertEqual(ds3.metadata.summaryStats.readLenDist.bins, [0, 1, 11, 10, 9, 8, 7, 5, 3, 1, 0, 1, 1]) # now lets do some non-overlapping ds1 = DataSet(data.getXml(8)) ds1.loadStats(data.getStats()) ds2 = DataSet(data.getXml(11)) ds2.loadStats(data.getStats()) ds1.metadata.summaryStats.readLenDist.bins = ( [1, 1, 1]) self.assertEqual(ds1.metadata.summaryStats.readLenDist.bins, [1, 1, 1]) ds1.metadata.summaryStats.readLenDist.minBinValue = 10 ds1.metadata.summaryStats.readLenDist.binWidth = 10 ds2.metadata.summaryStats.readLenDist.bins = ( [2, 2, 2]) self.assertEqual(ds2.metadata.summaryStats.readLenDist.bins, [2, 2, 2]) ds2.metadata.summaryStats.readLenDist.minBinValue = 50 ds2.metadata.summaryStats.readLenDist.binWidth = 10 ds3 = ds1 + ds2 self.assertEqual(ds3.metadata.summaryStats.readLenDist.bins, [1, 1, 1, 0, 2, 2, 2]) # now lets test the subdataset metadata retention: ss = SubreadSet(data.getXml(10)) ss.loadStats(data.getStats(0)) ss.loadStats(data.getStats(1)) self.assertEqual(153168.0, ss.metadata.summaryStats.numSequencingZmws) self.assertEqual( 2876.0, ss.subdatasets[0].metadata.summaryStats.numSequencingZmws) self.assertEqual( 150292.0, ss.subdatasets[1].metadata.summaryStats.numSequencingZmws)
def test_stats_metadata(self): ds = DataSet(data.getBam()) ds.loadStats(data.getStats()) self.assertEqual(ds.metadata.summaryStats.prodDist.numBins, 4) self.assertEqual(ds.metadata.summaryStats.prodDist.bins, [1576, 901, 399, 0]) ds1 = DataSet(data.getXml(8)) ds1.loadStats(data.getStats()) ds2 = DataSet(data.getXml(11)) ds2.loadStats(data.getStats()) ds3 = ds1 + ds2 self.assertEqual(ds1.metadata.summaryStats.prodDist.bins, [1576, 901, 399, 0]) self.assertEqual(ds2.metadata.summaryStats.prodDist.bins, [1576, 901, 399, 0]) self.assertEqual(ds3.metadata.summaryStats.prodDist.bins, [3152, 1802, 798, 0]) self.assertEqual(ds1.metadata.summaryStats.readLenDist.bins, [ 0, 62, 39, 36, 29, 37, 19, 29, 37, 32, 32, 40, 45, 54, 73, 77, 97, 95, 49, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]) self.assertEqual(ds2.metadata.summaryStats.readLenDist.bins, [ 0, 62, 39, 36, 29, 37, 19, 29, 37, 32, 32, 40, 45, 54, 73, 77, 97, 95, 49, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]) self.assertEqual(ds3.metadata.summaryStats.readLenDist.bins, [ 0, 124, 78, 72, 58, 74, 38, 58, 74, 64, 64, 80, 90, 108, 146, 154, 194, 190, 98, 34, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]) # Lets check some manual values ds1 = DataSet(data.getXml(8)) ds1.loadStats(data.getStats()) ds2 = DataSet(data.getXml(11)) ds2.loadStats(data.getStats()) ds1.metadata.summaryStats.readLenDist.bins = ([ 0, 10, 9, 8, 7, 6, 4, 2, 1, 0, 0, 1 ]) self.assertEqual(ds1.metadata.summaryStats.readLenDist.bins, [0, 10, 9, 8, 7, 6, 4, 2, 1, 0, 0, 1]) ds1.metadata.summaryStats.readLenDist.minBinValue = 10 ds1.metadata.summaryStats.readLenDist.binWidth = 10 ds2.metadata.summaryStats.readLenDist.bins = ([ 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1 ]) self.assertEqual(ds2.metadata.summaryStats.readLenDist.bins, [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1]) ds2.metadata.summaryStats.readLenDist.minBinValue = 20 ds2.metadata.summaryStats.readLenDist.binWidth = 10 ds3 = ds1 + ds2 self.assertEqual(ds3.metadata.summaryStats.readLenDist.bins, [0, 10, 10, 9, 8, 7, 5, 3, 2, 1, 0, 1, 1]) # now lets swap ds1 = DataSet(data.getXml(8)) ds1.loadStats(data.getStats()) ds2 = DataSet(data.getXml(11)) ds2.loadStats(data.getStats()) ds1.metadata.summaryStats.readLenDist.bins = ([ 0, 10, 9, 8, 7, 6, 4, 2, 1, 0, 0, 1 ]) self.assertEqual(ds1.metadata.summaryStats.readLenDist.bins, [0, 10, 9, 8, 7, 6, 4, 2, 1, 0, 0, 1]) ds1.metadata.summaryStats.readLenDist.minBinValue = 20 ds1.metadata.summaryStats.readLenDist.binWidth = 10 ds2.metadata.summaryStats.readLenDist.bins = ([ 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1 ]) self.assertEqual(ds2.metadata.summaryStats.readLenDist.bins, [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1]) ds2.metadata.summaryStats.readLenDist.minBinValue = 10 ds2.metadata.summaryStats.readLenDist.binWidth = 10 ds3 = ds1 + ds2 self.assertEqual(ds3.metadata.summaryStats.readLenDist.bins, [0, 1, 11, 10, 9, 8, 7, 5, 3, 1, 0, 1, 1]) # now lets do some non-overlapping ds1 = DataSet(data.getXml(8)) ds1.loadStats(data.getStats()) ds2 = DataSet(data.getXml(11)) ds2.loadStats(data.getStats()) ds1.metadata.summaryStats.readLenDist.bins = ([1, 1, 1]) self.assertEqual(ds1.metadata.summaryStats.readLenDist.bins, [1, 1, 1]) ds1.metadata.summaryStats.readLenDist.minBinValue = 10 ds1.metadata.summaryStats.readLenDist.binWidth = 10 ds2.metadata.summaryStats.readLenDist.bins = ([2, 2, 2]) self.assertEqual(ds2.metadata.summaryStats.readLenDist.bins, [2, 2, 2]) ds2.metadata.summaryStats.readLenDist.minBinValue = 50 ds2.metadata.summaryStats.readLenDist.binWidth = 10 ds3 = ds1 + ds2 self.assertEqual(ds3.metadata.summaryStats.readLenDist.bins, [1, 1, 1, 0, 2, 2, 2]) # now lets test the subdataset metadata retention: ss = SubreadSet(data.getXml(10)) ss.loadStats(data.getStats(0)) ss.loadStats(data.getStats(1)) self.assertEqual(153168.0, ss.metadata.summaryStats.numSequencingZmws) self.assertEqual( 2876.0, ss.subdatasets[0].metadata.summaryStats.numSequencingZmws) self.assertEqual( 150292.0, ss.subdatasets[1].metadata.summaryStats.numSequencingZmws)