def test_subreadset_consolidate(self): log.debug("Test methods directly") aln = SubreadSet(data.getXml(10), data.getXml(13)) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') consolidateBams(aln.toExternalFiles(), outfn, filterDset=aln) self.assertTrue(os.path.exists(outfn)) consAln = SubreadSet(outfn) self.assertEqual(len(consAln.toExternalFiles()), 1) for read1, read2 in zip(sorted(list(aln)), sorted(list(consAln))): self.assertEqual(read1, read2) self.assertEqual(len(aln), len(consAln)) log.debug("Test through API") aln = SubreadSet(data.getXml(10), data.getXml(13)) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = SubreadSet(data.getXml(10), data.getXml(13)) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(aln), len(nonCons))
def test_alignmentset_consolidate(self): log.debug("Test methods directly") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') consolidateBams(aln.toExternalFiles(), outfn, filterDset=aln) self.assertTrue(os.path.exists(outfn)) consAln = AlignmentSet(outfn) self.assertEqual(len(consAln.toExternalFiles()), 1) for read1, read2 in zip(sorted(list(aln)), sorted(list(consAln))): self.assertEqual(read1, read2) self.assertEqual(len(aln), len(consAln)) log.debug("Test through API") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(aln), len(nonCons)) log.debug("Test with cheap filter") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) aln.filters.addRequirement(rname=[('=', 'B.vulgatus.5')]) self.assertEqual(len(list(aln)), 7) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) nonCons.filters.addRequirement(rname=[('=', 'B.vulgatus.5')]) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(list(aln)), len(list(nonCons))) log.debug("Test with not refname filter") # This isn't trivial with bamtools """ aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) aln.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')]) self.assertEqual(len(list(aln)), 7) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) nonCons.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')]) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(list(aln)), len(list(nonCons))) """ log.debug("Test with expensive filter") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) aln.filters.addRequirement(accuracy=[('>', '.85')]) self.assertEqual(len(list(aln)), 174) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) nonCons.filters.addRequirement(accuracy=[('>', '.85')]) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(list(aln)), len(list(nonCons))) log.debug("Test cli") outdir = tempfile.mkdtemp(suffix="dataset-unittest") datafile = os.path.join(outdir, "merged.bam") xmlfile = os.path.join(outdir, "merged.xml") cmd = "dataset.py consolidate {i} {d} {x}".format(i=data.getXml(12), d=datafile, x=xmlfile) log.debug(cmd) o, r, m = backticks(cmd) self.assertEqual(r, 0)
def test_alignmentset_consolidate(self): log.debug("Test methods directly") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') consolidateBams(aln.toExternalFiles(), outfn, filterDset=aln, useTmp=False) self.assertTrue(os.path.exists(outfn)) consAln = AlignmentSet(outfn) self.assertEqual(len(consAln.toExternalFiles()), 1) for read1, read2 in zip(sorted(list(aln)), sorted(list(consAln))): self.assertEqual(read1, read2) self.assertEqual(len(aln), len(consAln)) log.debug("Test methods directly in tmp") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') consolidateBams(aln.toExternalFiles(), outfn, filterDset=aln, useTmp=True) self.assertTrue(os.path.exists(outfn)) consAln = AlignmentSet(outfn) self.assertEqual(len(consAln.toExternalFiles()), 1) for read1, read2 in zip(sorted(list(aln)), sorted(list(consAln))): self.assertEqual(read1, read2) self.assertEqual(len(aln), len(consAln)) log.debug("Test through API") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(aln), len(nonCons)) # Test that it is a valid xml: outdir = tempfile.mkdtemp(suffix="dataset-unittest") datafile = os.path.join(outdir, "apimerged.bam") xmlfile = os.path.join(outdir, "apimerged.xml") log.debug(xmlfile) aln.write(xmlfile) log.debug("Test with cheap filter") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) aln.filters.addRequirement(rname=[('=', 'B.vulgatus.5')]) self.assertEqual(len(list(aln)), 7) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) nonCons.filters.addRequirement(rname=[('=', 'B.vulgatus.5')]) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(list(aln)), len(list(nonCons))) log.debug("Test with not refname filter") # This isn't trivial with bamtools """ aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) aln.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')]) self.assertEqual(len(list(aln)), 7) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) nonCons.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')]) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(list(aln)), len(list(nonCons))) """ log.debug("Test with expensive filter") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) aln.filters.addRequirement(accuracy=[('>', '.85')]) self.assertEqual(len(list(aln)), 174) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) nonCons.filters.addRequirement(accuracy=[('>', '.85')]) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(list(aln)), len(list(nonCons)))
def test_alignmentset_consolidate(self): log.debug("Test methods directly") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') consolidateBams(aln.toExternalFiles(), outfn, filterDset=aln) self.assertTrue(os.path.exists(outfn)) consAln = AlignmentSet(outfn) self.assertEqual(len(consAln.toExternalFiles()), 1) for read1, read2 in zip(sorted(list(aln)), sorted(list(consAln))): self.assertEqual(read1, read2) self.assertEqual(len(aln), len(consAln)) log.debug("Test through API") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(aln), len(nonCons)) # Test that it is a valid xml: outdir = tempfile.mkdtemp(suffix="dataset-unittest") datafile = os.path.join(outdir, "apimerged.bam") xmlfile = os.path.join(outdir, "apimerged.xml") log.debug(xmlfile) aln.write(xmlfile) log.debug("Test with cheap filter") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) aln.filters.addRequirement(rname=[('=', 'B.vulgatus.5')]) self.assertEqual(len(list(aln)), 7) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) nonCons.filters.addRequirement(rname=[('=', 'B.vulgatus.5')]) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(list(aln)), len(list(nonCons))) log.debug("Test with not refname filter") # This isn't trivial with bamtools """ aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) aln.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')]) self.assertEqual(len(list(aln)), 7) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) nonCons.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')]) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(list(aln)), len(list(nonCons))) """ log.debug("Test with expensive filter") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) aln.filters.addRequirement(accuracy=[('>', '.85')]) self.assertEqual(len(list(aln)), 174) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) nonCons.filters.addRequirement(accuracy=[('>', '.85')]) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(list(aln)), len(list(nonCons))) log.debug("Test cli") outdir = tempfile.mkdtemp(suffix="dataset-unittest") datafile = os.path.join(outdir, "merged.bam") xmlfile = os.path.join(outdir, "merged.xml") cmd = "dataset consolidate {i} {d} {x}".format(i=data.getXml(12), d=datafile, x=xmlfile) log.debug(cmd) o, r, m = backticks(cmd) self.assertEqual(r, 0)