def test_subset_filter(self): ds2 = AlignmentSet(data.getXml(7)) assert len(ds2) == 92 modvalue = 8 # manually: hns = ds2.index.holeNumber assert np.count_nonzero(hns % modvalue == 0) == 26 # dset filters: ds2.filters.addRequirement(zm=[('=', '0', modvalue)]) assert len(ds2) == 26 # written: filtstr = '( Uint32Cast(zm) % 8 = 0 )' assert str(ds2.filters) == filtstr filtxmlstr = ('<pbbase:Property Hash="Uint32Cast" Modulo="8" ' 'Name="zm" Operator="=" Value="0"/>') fn = tempfile.NamedTemporaryFile(suffix="alignmentset.xml").name ds2.write(fn) with open(fn, 'r') as ifh: found = False for line in ifh: if filtxmlstr in line: found = True assert found
def _make_alignmentset(file_name=None): bam = pbcore.data.getBamAndCmpH5()[0] ds = AlignmentSet(bam) if file_name is None: file_name = tempfile.NamedTemporaryFile(suffix=".alignmentset.xml").name ds.write(file_name) return file_name
def test_membership_filter_with_equal_operator(self): aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) hns = np.unique(aln.index.holeNumber)[:1] aln.filters.addRequirement(zm=[('=', hns)]) self.assertEqual(len(list(aln)), 5) aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) hns = np.unique(aln.index.holeNumber) aln.filters.addRequirement(zm=[('==', hns)]) self.assertEqual(len(list(aln)), 177) aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) hns = np.unique(aln.index.holeNumber) hns = [n for _ in range(10000) for n in hns] hns = np.array(hns) aln.filters.addRequirement(zm=[('==', hns)]) self.assertEqual(len(list(aln)), 177) aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) hns = np.unique(aln.index.holeNumber)[:1] hns = list(hns) aln.filters.addRequirement(zm=[('==', hns)]) self.assertEqual(len(list(aln)), 5) aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) hns = np.unique(aln.index.holeNumber)[:1] hns = set(hns) aln.filters.addRequirement(zm=[('==', hns)]) self.assertEqual(len(list(aln)), 5) aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) qnames = [r.qName for r in aln[:10]] aln.filters.addRequirement(qname=[('==', qnames)]) self.assertEqual(len(list(aln)), 10) aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) qnames = [r.qName for r in aln[:1]] aln.filters.addRequirement(qname=[('==', qnames)]) self.assertEqual(len(list(aln)), 1) fn = tempfile.NamedTemporaryFile(suffix="alignmentset.xml").name aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) hns = np.unique(aln.index.holeNumber)[:1] aln.filters.addRequirement(zm=[('==', hns)]) aln.write(fn) aln.close() aln2 = AlignmentSet(fn) self.assertEqual(len(list(aln2)), 5)
def test_uuid(self): ds = AlignmentSet() old = ds.uuid _ = ds.newUuid() self.assertNotEqual(old, ds.uuid) aln = AlignmentSet(data.getXml(no=8)) oldUuid = aln.uuid outdir = tempfile.mkdtemp(suffix="dataset-doctest") outXml = os.path.join(outdir, 'tempfile.xml') aln.write(outXml) aln = AlignmentSet(outXml) self.assertEqual(aln.uuid, oldUuid)
def test_uuid(self): ds = AlignmentSet() old = ds.uuid _ = ds.newUuid() assert not old == ds.uuid aln = AlignmentSet(data.getXml(7)) oldUuid = aln.uuid outdir = tempfile.mkdtemp(suffix="dataset-doctest") outXml = os.path.join(outdir, 'tempfile.xml') aln.write(outXml) aln = AlignmentSet(outXml) assert aln.uuid == oldUuid
def test_write(self): outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfile = os.path.join(outdir, 'tempfile.xml') ds1 = AlignmentSet(data.getBam()) ds1.write(outfile) log.debug('Validated file: {f}'.format(f=outfile)) validateFile(outfile) ds2 = AlignmentSet(outfile) self.assertTrue(ds1 == ds2) # Should fail when strict: ds3 = AlignmentSet(data.getBam()) ds3.write(outfile)
def test_write(self): outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfile = os.path.join(outdir, 'tempfile.xml') ds1 = AlignmentSet(data.getBam()) ds1.write(outfile) log.debug('Validated file: {f}'.format(f=outfile)) validateFile(outfile) ds2 = AlignmentSet(outfile) self.assertTrue(ds1 == ds2) # Should fail when strict: ds3 = AlignmentSet(data.getBam()) ds3.write(outfile)
def test_uuid(self): ds = AlignmentSet() old = ds.uuid _ = ds.newUuid() self.assertNotEqual(old, ds.uuid) aln = AlignmentSet(data.getXml(no=8)) oldUuid = aln.uuid outdir = tempfile.mkdtemp(suffix="dataset-doctest") outXml = os.path.join(outdir, 'tempfile.xml') aln.write(outXml) aln = AlignmentSet(outXml) self.assertEqual(aln.uuid, oldUuid)
def test_membership_filter(self): aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) hns = np.unique(aln.index.holeNumber)[:1] aln.filters.addRequirement(zm=[('in', hns)]) self.assertEqual(len(list(aln)), 5) aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) hns = np.unique(aln.index.holeNumber) aln.filters.addRequirement(zm=[('in', hns)]) self.assertEqual(len(list(aln)), 177) aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) hns = np.unique(aln.index.holeNumber) hns = [n for _ in range(10000) for n in hns] hns = np.array(hns) aln.filters.addRequirement(zm=[('in', hns)]) self.assertEqual(len(list(aln)), 177) aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) hns = np.unique(aln.index.holeNumber)[:1] hns = list(hns) aln.filters.addRequirement(zm=[('in', hns)]) self.assertEqual(len(list(aln)), 5) aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) hns = np.unique(aln.index.holeNumber)[:1] hns = set(hns) aln.filters.addRequirement(zm=[('in', hns)]) self.assertEqual(len(list(aln)), 5) aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) qnames = [r.qName for r in aln[:10]] aln.filters.addRequirement(qname=[('in', qnames)]) self.assertEqual(len(list(aln)), 10) fn = tempfile.NamedTemporaryFile(suffix="alignmentset.xml").name aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) hns = np.unique(aln.index.holeNumber)[:1] aln.filters.addRequirement(zm=[('in', hns)]) aln.write(fn) aln.close() aln2 = AlignmentSet(fn) self.assertEqual(len(list(aln2)), 5)
def reDefineFilter(aset): """ pbbam can't handle reference filters. so I have to recast a reference filter into a list of zmws """ alignments = AlignmentSet(aset) zmws = np.unique(alignments.index['holeNumber']) if len(zmws) > 1000: zmws = np.array(random.sample(zmws, 1000)) if zmws.size > 0: # only make the swap if zmws is not empty alignments.filters.addRequirement(zm=[('=', zmws)]) else: # otherwise make the filter nonsense alignments.filters.addRequirement(zm=[('=', -1)]) # remove ref name filter for pbbam compatibility alignments.filters.removeRequirement('rname') alignments.write(aset)
def to_chunked_alignmentset_files(alignmentset_path, reference_path, max_total_nchunks, chunk_key, dir_name, base_name, ext): dset = AlignmentSet(alignmentset_path, strict=True) dset_chunks = dset.split(contigs=True, maxChunks=max_total_nchunks, breakContigs=True) # sanity checking reference_set = ReferenceSet(reference_path, strict=True) d = {} for i, dset in enumerate(dset_chunks): chunk_id = '_'.join([base_name, str(i)]) chunk_name = '.'.join([chunk_id, ext]) chunk_path = os.path.join(dir_name, chunk_name) dset.write(chunk_path) d[chunk_key] = os.path.abspath(chunk_path) d['$chunk.reference_id'] = reference_path c = PipelineChunk(chunk_id, **d) yield c
def to_chunked_alignmentset_files(alignmentset_path, reference_path, max_total_nchunks, chunk_key, dir_name, base_name, ext): dset = AlignmentSet(alignmentset_path, strict=True) dset_chunks = dset.split(contigs=True, maxChunks=max_total_nchunks, breakContigs=True) # sanity checking reference_set = ReferenceSet(reference_path, strict=True) d = {} for i, dset in enumerate(dset_chunks): chunk_id = '_'.join([base_name, str(i)]) chunk_name = '.'.join([chunk_id, ext]) chunk_path = os.path.join(dir_name, chunk_name) dset.write(chunk_path) d[chunk_key] = os.path.abspath(chunk_path) d['$chunk.reference_id'] = reference_path c = PipelineChunk(chunk_id, **d) yield c
def run(alignmentset, referenceset, fofn, max_nchunks): #'python -m pbcoretools.tasks.scatter_alignments_reference alignment_ds ds_reference json_out' dir_name = os.getcwd() dset = AlignmentSet(alignmentset, strict=True) dset_chunks = dset.split(contigs=True, maxChunks=max_nchunks, breakContigs=True) # referenceset is used only for sanity checking. ReferenceSet(referenceset, strict=True) chunk_fns = [] for i, dset in enumerate(dset_chunks): chunk_name = 'chunk_alignmentset_{}.alignmentset.xml'.format(i) #chunk_fn = os.path.join(dir_name, chunk_name) chunk_fn = chunk_name # rel to CWD if os.path.exists(chunk_fn): os.unlink(chunk_fn) dset.write(chunk_fn, relPaths=True) chunk_fns.append(chunk_fn) with open(fofn, 'w') as ofs: for fn in chunk_fns: ofs.write('{}\n'.format(fn)) log.info('Wrote {} chunks into "{}"'.format(len(dset_chunks), fofn))
def test_alignmentset_consolidate(self): log.debug("Test methods directly") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') consolidateBams(aln.toExternalFiles(), outfn, filterDset=aln, useTmp=False) self.assertTrue(os.path.exists(outfn)) consAln = AlignmentSet(outfn) self.assertEqual(len(consAln.toExternalFiles()), 1) for read1, read2 in zip(sorted(list(aln)), sorted(list(consAln))): self.assertEqual(read1, read2) self.assertEqual(len(aln), len(consAln)) log.debug("Test methods directly in tmp") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') consolidateBams(aln.toExternalFiles(), outfn, filterDset=aln, useTmp=True) self.assertTrue(os.path.exists(outfn)) consAln = AlignmentSet(outfn) self.assertEqual(len(consAln.toExternalFiles()), 1) for read1, read2 in zip(sorted(list(aln)), sorted(list(consAln))): self.assertEqual(read1, read2) self.assertEqual(len(aln), len(consAln)) log.debug("Test through API") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(aln), len(nonCons)) # Test that it is a valid xml: outdir = tempfile.mkdtemp(suffix="dataset-unittest") datafile = os.path.join(outdir, "apimerged.bam") xmlfile = os.path.join(outdir, "apimerged.xml") log.debug(xmlfile) aln.write(xmlfile) log.debug("Test with cheap filter") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) aln.filters.addRequirement(rname=[('=', 'B.vulgatus.5')]) self.assertEqual(len(list(aln)), 7) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) nonCons.filters.addRequirement(rname=[('=', 'B.vulgatus.5')]) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(list(aln)), len(list(nonCons))) log.debug("Test with not refname filter") # This isn't trivial with bamtools """ aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) aln.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')]) self.assertEqual(len(list(aln)), 7) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) nonCons.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')]) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(list(aln)), len(list(nonCons))) """ log.debug("Test with expensive filter") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) aln.filters.addRequirement(accuracy=[('>', '.85')]) self.assertEqual(len(list(aln)), 174) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) nonCons.filters.addRequirement(accuracy=[('>', '.85')]) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(list(aln)), len(list(nonCons)))
def test_alignmentset_consolidate(self): log.debug("Test through API") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(aln), len(nonCons)) # Test that it is a valid xml: outdir = tempfile.mkdtemp(suffix="dataset-unittest") datafile = os.path.join(outdir, "apimerged.bam") xmlfile = os.path.join(outdir, "apimerged.xml") log.debug(xmlfile) aln.write(xmlfile) log.debug("Test with cheap filter") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) aln.filters.addRequirement(rname=[('=', 'B.vulgatus.5')]) self.assertEqual(len(list(aln)), 7) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) nonCons.filters.addRequirement(rname=[('=', 'B.vulgatus.5')]) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(list(aln)), len(list(nonCons))) log.debug("Test with not refname filter") # This isn't trivial with bamtools """ aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) aln.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')]) self.assertEqual(len(list(aln)), 7) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) nonCons.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')]) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(list(aln)), len(list(nonCons))) """ log.debug("Test with expensive filter") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) aln.filters.addRequirement(accuracy=[('>', '.85')]) self.assertEqual(len(list(aln)), 174) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) nonCons.filters.addRequirement(accuracy=[('>', '.85')]) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(list(aln)), len(list(nonCons))) log.debug("Test cli") outdir = tempfile.mkdtemp(suffix="dataset-unittest") datafile = os.path.join(outdir, "merged.bam") xmlfile = os.path.join(outdir, "merged.xml") cmd = "dataset consolidate {i} {d} {x}".format(i=data.getXml(12), d=datafile, x=xmlfile) log.debug(cmd) o, r, m = backticks(cmd) self.assertEqual(r, 0)
def setUpClass(cls): ds = AlignmentSet(pbcore.data.getBamAndCmpH5()[0], strict=True) cls.alignment_set_xml = tempfile.NamedTemporaryFile( suffix=".alignmentset.xml").name ds.write(cls.alignment_set_xml)
def _get_input_file(cls): ds_xml = tempfile.NamedTemporaryFile(suffix=".alignmentset.xml").name bams = [cls.ALIGNMENTS, TestEmptyBAM.ALIGNMENTS] ds = AlignmentSet(*bams, strict=True) ds.write(ds_xml) return ds_xml
def test_membership_filter(self): aln = AlignmentSet(data.getXml(11)) assert len(list(aln)) == 177 hns = np.unique(aln.index.holeNumber)[:1] aln.filters.addRequirement(zm=[('in', hns)]) assert len(list(aln)) == 5 aln = AlignmentSet(data.getXml(11)) assert len(list(aln)) == 177 hns = np.unique(aln.index.holeNumber) aln.filters.addRequirement(zm=[('in', hns)]) assert len(list(aln)) == 177 aln = AlignmentSet(data.getXml(11)) assert len(list(aln)) == 177 hns = np.unique(aln.index.holeNumber) hns = [n for _ in range(10000) for n in hns] hns = np.array(hns) aln.filters.addRequirement(zm=[('in', hns)]) assert len(list(aln)) == 177 aln = AlignmentSet(data.getXml(11)) assert len(list(aln)) == 177 hns = np.unique(aln.index.holeNumber)[:1] hns = list(hns) aln.filters.addRequirement(zm=[('in', hns)]) assert len(list(aln)) == 5 aln = AlignmentSet(data.getXml(11)) assert len(list(aln)) == 177 hns = np.unique(aln.index.holeNumber)[:1] hns = set(hns) aln.filters.addRequirement(zm=[('in', hns)]) assert len(list(aln)) == 5 aln = AlignmentSet(data.getXml(11)) assert len(list(aln)) == 177 qnames = [r.qName for r in aln[:10]] aln.filters.addRequirement(qname=[('in', qnames)]) assert len(list(aln)) == 10 aln = AlignmentSet(data.getXml(11)) assert len(list(aln)) == 177 qnames = [r.qName for r in aln[:1]] aln.filters.addRequirement(qname=[('in', qnames)]) assert len(list(aln)) == 1 # test partial qnames: aln = AlignmentSet(data.getXml(11)) assert len(list(aln)) == 177 qnames = ['/'.join(r.qName.split('/')[:2]) for r in aln[:1]] assert qnames == ['pbalchemy1GbRSIIsim0/6'] aln.filters.addRequirement(qname=[('in', qnames)]) assert len(list(aln)) == 7 fn = tempfile.NamedTemporaryFile(suffix="alignmentset.xml").name aln = AlignmentSet(data.getXml(11)) assert len(list(aln)) == 177 hns = np.unique(aln.index.holeNumber)[:1] aln.filters.addRequirement(zm=[('in', hns)]) aln.write(fn) aln.close() aln2 = AlignmentSet(fn) assert len(list(aln2)) == 5
def _get_input_file(cls): ds_xml = tempfile.NamedTemporaryFile( suffix=".alignmentset.xml").name ds = AlignmentSet(cls.ALIGNMENTS, strict=True) ds.write(ds_xml) return ds_xml
def test_alignmentset_consolidate(self): log.debug("Test methods directly") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') consolidateBams(aln.toExternalFiles(), outfn, filterDset=aln) self.assertTrue(os.path.exists(outfn)) consAln = AlignmentSet(outfn) self.assertEqual(len(consAln.toExternalFiles()), 1) for read1, read2 in zip(sorted(list(aln)), sorted(list(consAln))): self.assertEqual(read1, read2) self.assertEqual(len(aln), len(consAln)) log.debug("Test through API") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(aln), len(nonCons)) # Test that it is a valid xml: outdir = tempfile.mkdtemp(suffix="dataset-unittest") datafile = os.path.join(outdir, "apimerged.bam") xmlfile = os.path.join(outdir, "apimerged.xml") log.debug(xmlfile) aln.write(xmlfile) log.debug("Test with cheap filter") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) aln.filters.addRequirement(rname=[('=', 'B.vulgatus.5')]) self.assertEqual(len(list(aln)), 7) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) nonCons.filters.addRequirement(rname=[('=', 'B.vulgatus.5')]) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(list(aln)), len(list(nonCons))) log.debug("Test with not refname filter") # This isn't trivial with bamtools """ aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) aln.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')]) self.assertEqual(len(list(aln)), 7) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) nonCons.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')]) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(list(aln)), len(list(nonCons))) """ log.debug("Test with expensive filter") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) aln.filters.addRequirement(accuracy=[('>', '.85')]) self.assertEqual(len(list(aln)), 174) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) nonCons.filters.addRequirement(accuracy=[('>', '.85')]) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(list(aln)), len(list(nonCons))) log.debug("Test cli") outdir = tempfile.mkdtemp(suffix="dataset-unittest") datafile = os.path.join(outdir, "merged.bam") xmlfile = os.path.join(outdir, "merged.xml") cmd = "dataset consolidate {i} {d} {x}".format(i=data.getXml(12), d=datafile, x=xmlfile) log.debug(cmd) o, r, m = backticks(cmd) self.assertEqual(r, 0)
def test_alignmentset_consolidate(self): log.debug("Test through API") aln = AlignmentSet(data.getXml(11)) assert len(aln.toExternalFiles()) == 2 outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) assert os.path.exists(outfn) assert len(aln.toExternalFiles()) == 1 nonCons = AlignmentSet(data.getXml(11)) assert len(nonCons.toExternalFiles()) == 2 for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): assert read1 == read2 assert len(aln) == len(nonCons) # Test that it is a valid xml: outdir = tempfile.mkdtemp(suffix="dataset-unittest") datafile = os.path.join(outdir, "apimerged.bam") xmlfile = os.path.join(outdir, "apimerged.xml") log.debug(xmlfile) aln.write(xmlfile) log.debug("Test with cheap filter") aln = AlignmentSet(data.getXml(11)) assert len(list(aln)) == 177 aln.filters.addRequirement(rname=[('=', 'B.vulgatus.5')]) assert len(list(aln)) == 7 assert len(aln.toExternalFiles()) == 2 outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) assert os.path.exists(outfn) assert len(aln.toExternalFiles()) == 1 nonCons = AlignmentSet(data.getXml(11)) nonCons.filters.addRequirement(rname=[('=', 'B.vulgatus.5')]) assert len(nonCons.toExternalFiles()) == 2 for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): assert read1 == read2 assert len(list(aln)) == len(list(nonCons)) log.debug("Test with not refname filter") # This isn't trivial with bamtools """ aln = AlignmentSet(data.getXml(11)) assert len(list(aln)) == 177 aln.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')]) assert len(list(aln)) == 7 assert len(aln.toExternalFiles()) == 2 outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) assert os.path.exists(outfn) assert len(aln.toExternalFiles()) == 1 nonCons = AlignmentSet(data.getXml(11)) nonCons.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')]) assert len(nonCons.toExternalFiles()) == 2 for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): assert read1 == read2 assert len(list(aln)) == len(list(nonCons)) """ log.debug("Test with expensive filter") aln = AlignmentSet(data.getXml(11)) assert len(list(aln)) == 177 aln.filters.addRequirement(accuracy=[('>', '.85')]) assert len(list(aln)) == 174 assert len(aln.toExternalFiles()) == 2 outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) assert os.path.exists(outfn) assert len(aln.toExternalFiles()) == 1 nonCons = AlignmentSet(data.getXml(11)) nonCons.filters.addRequirement(accuracy=[('>', '.85')]) assert len(nonCons.toExternalFiles()) == 2 for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): assert read1 == read2 assert len(list(aln)) == len(list(nonCons)) log.debug("Test with one reference") aln = AlignmentSet(data.getXml(11)) reference = upstreamData.getFasta() aln.externalResources[0].reference = reference nonCons = aln.copy() assert len(aln.toExternalFiles()) == 2 outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) assert os.path.exists(outfn) assert len(aln.toExternalFiles()) == 1 #nonCons = AlignmentSet(data.getXml(11)) assert len(nonCons.toExternalFiles()) == 2 for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): assert read1 == read2 assert len(aln) == len(nonCons) assert aln.externalResources[0].reference == reference log.debug("Test with two references") aln = AlignmentSet(data.getXml(11)) reference = upstreamData.getFasta() for extRes in aln.externalResources: extRes.reference = reference assert len(aln.toExternalFiles()) == 2 outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) assert os.path.exists(outfn) assert len(aln.toExternalFiles()) == 1 #nonCons = AlignmentSet(data.getXml(11)) assert len(nonCons.toExternalFiles()) == 2 for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): assert read1 == read2 assert len(aln) == len(nonCons) assert aln.externalResources[0].reference == reference
def test_alignmentset_consolidate(self): log.debug("Test through API") aln = AlignmentSet(pbtestdata.get_file("aligned-ds-2")) assert len(aln.toExternalFiles()) == 2 outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) assert os.path.exists(outfn) assert len(aln.toExternalFiles()) == 1 nonCons = AlignmentSet(pbtestdata.get_file("aligned-ds-2")) assert len(nonCons.toExternalFiles()) == 2 for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): assert read1 == read2 assert len(aln) == len(nonCons) # Test that it is a valid xml: outdir = tempfile.mkdtemp(suffix="dataset-unittest") datafile = os.path.join(outdir, "apimerged.bam") xmlfile = os.path.join(outdir, "apimerged.xml") log.debug(xmlfile) aln.write(xmlfile) log.debug("Test with cheap filter") aln = AlignmentSet(pbtestdata.get_file("aligned-ds-2")) assert len(list(aln)) == 21 aln.filters.addRequirement(length=[(">=", 10000)]) assert len(list(aln)) == 10 assert len(aln.toExternalFiles()) == 2 outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) assert os.path.exists(outfn) assert len(aln.toExternalFiles()) == 1 nonCons = AlignmentSet(pbtestdata.get_file("aligned-ds-2")) nonCons.filters.addRequirement(length=[(">=", 10000)]) assert len(nonCons.toExternalFiles()) == 2 for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): assert read1 == read2 assert len(list(aln)) == len(list(nonCons)) log.debug("Test with not refname filter") # This isn't trivial with bamtools """ aln = AlignmentSet(data.getXml(11)) assert len(list(aln)) == 177 aln.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')]) assert len(list(aln)) == 7 assert len(aln.toExternalFiles()) == 2 outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) assert os.path.exists(outfn) assert len(aln.toExternalFiles()) == 1 nonCons = AlignmentSet(data.getXml(11)) nonCons.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')]) assert len(nonCons.toExternalFiles()) == 2 for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): assert read1 == read2 assert len(list(aln)) == len(list(nonCons)) """ log.debug("Test with expensive filter") aln = AlignmentSet(data.getXml(11)) assert len(list(aln)) == 177 aln.filters.addRequirement(accuracy=[('>', '.85')]) assert len(list(aln)) == 174 assert len(aln.toExternalFiles()) == 2 outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) assert os.path.exists(outfn) assert len(aln.toExternalFiles()) == 1 nonCons = AlignmentSet(data.getXml(11)) nonCons.filters.addRequirement(accuracy=[('>', '.85')]) assert len(nonCons.toExternalFiles()) == 2 for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): assert read1 == read2 assert len(list(aln)) == len(list(nonCons)) log.debug("Test cli") outdir = tempfile.mkdtemp(suffix="dataset-unittest") datafile = os.path.join(outdir, "merged.bam") xmlfile = os.path.join(outdir, "merged.xml") cmd = "dataset consolidate {i} {d} {x}".format(i=data.getXml(11), d=datafile, x=xmlfile) log.debug(cmd) subprocess.check_call(cmd.split())
def test_alignmentset_consolidate(self): log.debug("Test through API") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(aln), len(nonCons)) # Test that it is a valid xml: outdir = tempfile.mkdtemp(suffix="dataset-unittest") datafile = os.path.join(outdir, "apimerged.bam") xmlfile = os.path.join(outdir, "apimerged.xml") log.debug(xmlfile) aln.write(xmlfile) log.debug("Test with cheap filter") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) aln.filters.addRequirement(rname=[('=', 'B.vulgatus.5')]) self.assertEqual(len(list(aln)), 7) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) nonCons.filters.addRequirement(rname=[('=', 'B.vulgatus.5')]) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(list(aln)), len(list(nonCons))) log.debug("Test with not refname filter") # This isn't trivial with bamtools """ aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) aln.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')]) self.assertEqual(len(list(aln)), 7) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) nonCons.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')]) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(list(aln)), len(list(nonCons))) """ log.debug("Test with expensive filter") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) aln.filters.addRequirement(accuracy=[('>', '.85')]) self.assertEqual(len(list(aln)), 174) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) nonCons.filters.addRequirement(accuracy=[('>', '.85')]) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(list(aln)), len(list(nonCons))) log.debug("Test with one reference") aln = AlignmentSet(data.getXml(12)) reference = upstreamData.getFasta() aln.externalResources[0].reference = reference nonCons = aln.copy() self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) #nonCons = AlignmentSet(data.getXml(12)) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(aln), len(nonCons)) self.assertEqual(aln.externalResources[0].reference, reference) log.debug("Test with two references") aln = AlignmentSet(data.getXml(12)) reference = upstreamData.getFasta() for extRes in aln.externalResources: extRes.reference = reference self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) #nonCons = AlignmentSet(data.getXml(12)) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(aln), len(nonCons)) self.assertEqual(aln.externalResources[0].reference, reference)
def setUpClass(cls): super(TestPbreportMappingStats, cls).setUpClass() ds = AlignmentSet(pbcore.data.getBamAndCmpH5()[0], strict=True) ds.write(cls.INPUT_FILES[0])