Пример #1
0
    def test_subset_filter(self):
        ds2 = AlignmentSet(data.getXml(7))
        assert len(ds2) == 92
        modvalue = 8

        # manually:
        hns = ds2.index.holeNumber
        assert np.count_nonzero(hns % modvalue == 0) == 26

        # dset filters:
        ds2.filters.addRequirement(zm=[('=', '0', modvalue)])
        assert len(ds2) == 26

        # written:
        filtstr = '( Uint32Cast(zm) % 8 = 0 )'
        assert str(ds2.filters) == filtstr

        filtxmlstr = ('<pbbase:Property Hash="Uint32Cast" Modulo="8" '
                      'Name="zm" Operator="=" Value="0"/>')
        fn = tempfile.NamedTemporaryFile(suffix="alignmentset.xml").name
        ds2.write(fn)
        with open(fn, 'r') as ifh:
            found = False
            for line in ifh:
                if filtxmlstr in line:
                    found = True
        assert found
def _make_alignmentset(file_name=None):
    bam = pbcore.data.getBamAndCmpH5()[0]
    ds = AlignmentSet(bam)
    if file_name is None:
        file_name = tempfile.NamedTemporaryFile(suffix=".alignmentset.xml").name
    ds.write(file_name)
    return file_name
Пример #3
0
    def test_membership_filter_with_equal_operator(self):
        aln = AlignmentSet(data.getXml(12))
        self.assertEqual(len(list(aln)), 177)
        hns = np.unique(aln.index.holeNumber)[:1]
        aln.filters.addRequirement(zm=[('=', hns)])
        self.assertEqual(len(list(aln)), 5)

        aln = AlignmentSet(data.getXml(12))
        self.assertEqual(len(list(aln)), 177)
        hns = np.unique(aln.index.holeNumber)
        aln.filters.addRequirement(zm=[('==', hns)])
        self.assertEqual(len(list(aln)), 177)

        aln = AlignmentSet(data.getXml(12))
        self.assertEqual(len(list(aln)), 177)
        hns = np.unique(aln.index.holeNumber)
        hns = [n for _ in range(10000) for n in hns]
        hns = np.array(hns)
        aln.filters.addRequirement(zm=[('==', hns)])
        self.assertEqual(len(list(aln)), 177)

        aln = AlignmentSet(data.getXml(12))
        self.assertEqual(len(list(aln)), 177)
        hns = np.unique(aln.index.holeNumber)[:1]
        hns = list(hns)
        aln.filters.addRequirement(zm=[('==', hns)])
        self.assertEqual(len(list(aln)), 5)

        aln = AlignmentSet(data.getXml(12))
        self.assertEqual(len(list(aln)), 177)
        hns = np.unique(aln.index.holeNumber)[:1]
        hns = set(hns)
        aln.filters.addRequirement(zm=[('==', hns)])
        self.assertEqual(len(list(aln)), 5)

        aln = AlignmentSet(data.getXml(12))
        self.assertEqual(len(list(aln)), 177)
        qnames = [r.qName for r in aln[:10]]
        aln.filters.addRequirement(qname=[('==', qnames)])
        self.assertEqual(len(list(aln)), 10)

        aln = AlignmentSet(data.getXml(12))
        self.assertEqual(len(list(aln)), 177)
        qnames = [r.qName for r in aln[:1]]
        aln.filters.addRequirement(qname=[('==', qnames)])
        self.assertEqual(len(list(aln)), 1)

        fn = tempfile.NamedTemporaryFile(suffix="alignmentset.xml").name
        aln = AlignmentSet(data.getXml(12))
        self.assertEqual(len(list(aln)), 177)
        hns = np.unique(aln.index.holeNumber)[:1]
        aln.filters.addRequirement(zm=[('==', hns)])
        aln.write(fn)
        aln.close()
        aln2 = AlignmentSet(fn)
        self.assertEqual(len(list(aln2)), 5)
    def test_uuid(self):
        ds = AlignmentSet()
        old = ds.uuid
        _ = ds.newUuid()
        self.assertNotEqual(old, ds.uuid)

        aln = AlignmentSet(data.getXml(no=8))
        oldUuid = aln.uuid
        outdir = tempfile.mkdtemp(suffix="dataset-doctest")
        outXml = os.path.join(outdir, 'tempfile.xml')
        aln.write(outXml)
        aln = AlignmentSet(outXml)
        self.assertEqual(aln.uuid, oldUuid)
Пример #5
0
    def test_uuid(self):
        ds = AlignmentSet()
        old = ds.uuid
        _ = ds.newUuid()
        assert not old == ds.uuid

        aln = AlignmentSet(data.getXml(7))
        oldUuid = aln.uuid
        outdir = tempfile.mkdtemp(suffix="dataset-doctest")
        outXml = os.path.join(outdir, 'tempfile.xml')
        aln.write(outXml)
        aln = AlignmentSet(outXml)
        assert aln.uuid == oldUuid
Пример #6
0
    def test_write(self):
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        outfile = os.path.join(outdir, 'tempfile.xml')
        ds1 = AlignmentSet(data.getBam())
        ds1.write(outfile)
        log.debug('Validated file: {f}'.format(f=outfile))
        validateFile(outfile)
        ds2 = AlignmentSet(outfile)
        self.assertTrue(ds1 == ds2)

        # Should fail when strict:
        ds3 = AlignmentSet(data.getBam())
        ds3.write(outfile)
Пример #7
0
    def test_write(self):
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        outfile = os.path.join(outdir, 'tempfile.xml')
        ds1 = AlignmentSet(data.getBam())
        ds1.write(outfile)
        log.debug('Validated file: {f}'.format(f=outfile))
        validateFile(outfile)
        ds2 = AlignmentSet(outfile)
        self.assertTrue(ds1 == ds2)

        # Should fail when strict:
        ds3 = AlignmentSet(data.getBam())
        ds3.write(outfile)
    def test_uuid(self):
        ds = AlignmentSet()
        old = ds.uuid
        _ = ds.newUuid()
        self.assertNotEqual(old, ds.uuid)

        aln = AlignmentSet(data.getXml(no=8))
        oldUuid = aln.uuid
        outdir = tempfile.mkdtemp(suffix="dataset-doctest")
        outXml = os.path.join(outdir, 'tempfile.xml')
        aln.write(outXml)
        aln = AlignmentSet(outXml)
        self.assertEqual(aln.uuid, oldUuid)
Пример #9
0
    def test_membership_filter(self):
        aln = AlignmentSet(data.getXml(12))
        self.assertEqual(len(list(aln)), 177)
        hns = np.unique(aln.index.holeNumber)[:1]
        aln.filters.addRequirement(zm=[('in', hns)])
        self.assertEqual(len(list(aln)), 5)

        aln = AlignmentSet(data.getXml(12))
        self.assertEqual(len(list(aln)), 177)
        hns = np.unique(aln.index.holeNumber)
        aln.filters.addRequirement(zm=[('in', hns)])
        self.assertEqual(len(list(aln)), 177)

        aln = AlignmentSet(data.getXml(12))
        self.assertEqual(len(list(aln)), 177)
        hns = np.unique(aln.index.holeNumber)
        hns = [n for _ in range(10000) for n in hns]
        hns = np.array(hns)
        aln.filters.addRequirement(zm=[('in', hns)])
        self.assertEqual(len(list(aln)), 177)

        aln = AlignmentSet(data.getXml(12))
        self.assertEqual(len(list(aln)), 177)
        hns = np.unique(aln.index.holeNumber)[:1]
        hns = list(hns)
        aln.filters.addRequirement(zm=[('in', hns)])
        self.assertEqual(len(list(aln)), 5)

        aln = AlignmentSet(data.getXml(12))
        self.assertEqual(len(list(aln)), 177)
        hns = np.unique(aln.index.holeNumber)[:1]
        hns = set(hns)
        aln.filters.addRequirement(zm=[('in', hns)])
        self.assertEqual(len(list(aln)), 5)

        aln = AlignmentSet(data.getXml(12))
        self.assertEqual(len(list(aln)), 177)
        qnames = [r.qName for r in aln[:10]]
        aln.filters.addRequirement(qname=[('in', qnames)])
        self.assertEqual(len(list(aln)), 10)

        fn = tempfile.NamedTemporaryFile(suffix="alignmentset.xml").name
        aln = AlignmentSet(data.getXml(12))
        self.assertEqual(len(list(aln)), 177)
        hns = np.unique(aln.index.holeNumber)[:1]
        aln.filters.addRequirement(zm=[('in', hns)])
        aln.write(fn)
        aln.close()
        aln2 = AlignmentSet(fn)
        self.assertEqual(len(list(aln2)), 5)
def reDefineFilter(aset):
    """
    pbbam can't handle reference filters.
    so I have to recast a reference filter
    into a list of zmws
    """
    alignments = AlignmentSet(aset)
    zmws = np.unique(alignments.index['holeNumber'])
    if len(zmws) > 1000:
        zmws = np.array(random.sample(zmws, 1000))
    if zmws.size > 0:  # only make the swap if zmws is not empty
        alignments.filters.addRequirement(zm=[('=', zmws)])
    else:  # otherwise make the filter nonsense
        alignments.filters.addRequirement(zm=[('=', -1)])

    # remove ref name filter for pbbam compatibility
    alignments.filters.removeRequirement('rname')
    alignments.write(aset)
Пример #11
0
def to_chunked_alignmentset_files(alignmentset_path, reference_path,
                                  max_total_nchunks, chunk_key, dir_name,
                                  base_name, ext):
    dset = AlignmentSet(alignmentset_path, strict=True)
    dset_chunks = dset.split(contigs=True, maxChunks=max_total_nchunks,
                             breakContigs=True)

    # sanity checking
    reference_set = ReferenceSet(reference_path, strict=True)
    d = {}
    for i, dset in enumerate(dset_chunks):
        chunk_id = '_'.join([base_name, str(i)])
        chunk_name = '.'.join([chunk_id, ext])
        chunk_path = os.path.join(dir_name, chunk_name)
        dset.write(chunk_path)
        d[chunk_key] = os.path.abspath(chunk_path)
        d['$chunk.reference_id'] = reference_path
        c = PipelineChunk(chunk_id, **d)
        yield c
Пример #12
0
def to_chunked_alignmentset_files(alignmentset_path, reference_path,
                                  max_total_nchunks, chunk_key, dir_name,
                                  base_name, ext):
    dset = AlignmentSet(alignmentset_path, strict=True)
    dset_chunks = dset.split(contigs=True,
                             maxChunks=max_total_nchunks,
                             breakContigs=True)

    # sanity checking
    reference_set = ReferenceSet(reference_path, strict=True)
    d = {}
    for i, dset in enumerate(dset_chunks):
        chunk_id = '_'.join([base_name, str(i)])
        chunk_name = '.'.join([chunk_id, ext])
        chunk_path = os.path.join(dir_name, chunk_name)
        dset.write(chunk_path)
        d[chunk_key] = os.path.abspath(chunk_path)
        d['$chunk.reference_id'] = reference_path
        c = PipelineChunk(chunk_id, **d)
        yield c
Пример #13
0
def run(alignmentset, referenceset, fofn, max_nchunks):
    #'python -m pbcoretools.tasks.scatter_alignments_reference alignment_ds ds_reference json_out'
    dir_name = os.getcwd()
    dset = AlignmentSet(alignmentset, strict=True)
    dset_chunks = dset.split(contigs=True,
                             maxChunks=max_nchunks,
                             breakContigs=True)

    # referenceset is used only for sanity checking.
    ReferenceSet(referenceset, strict=True)

    chunk_fns = []
    for i, dset in enumerate(dset_chunks):
        chunk_name = 'chunk_alignmentset_{}.alignmentset.xml'.format(i)
        #chunk_fn = os.path.join(dir_name, chunk_name)
        chunk_fn = chunk_name  # rel to CWD
        if os.path.exists(chunk_fn):
            os.unlink(chunk_fn)
        dset.write(chunk_fn, relPaths=True)
        chunk_fns.append(chunk_fn)
    with open(fofn, 'w') as ofs:
        for fn in chunk_fns:
            ofs.write('{}\n'.format(fn))
    log.info('Wrote {} chunks into "{}"'.format(len(dset_chunks), fofn))
    def test_alignmentset_consolidate(self):
        log.debug("Test methods directly")
        aln = AlignmentSet(data.getXml(12))
        self.assertEqual(len(aln.toExternalFiles()), 2)
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        outfn = os.path.join(outdir, 'merged.bam')
        consolidateBams(aln.toExternalFiles(), outfn, filterDset=aln,
                        useTmp=False)
        self.assertTrue(os.path.exists(outfn))
        consAln = AlignmentSet(outfn)
        self.assertEqual(len(consAln.toExternalFiles()), 1)
        for read1, read2 in zip(sorted(list(aln)), sorted(list(consAln))):
            self.assertEqual(read1, read2)
        self.assertEqual(len(aln), len(consAln))

        log.debug("Test methods directly in tmp")
        aln = AlignmentSet(data.getXml(12))
        self.assertEqual(len(aln.toExternalFiles()), 2)
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        outfn = os.path.join(outdir, 'merged.bam')
        consolidateBams(aln.toExternalFiles(), outfn, filterDset=aln,
                        useTmp=True)
        self.assertTrue(os.path.exists(outfn))
        consAln = AlignmentSet(outfn)
        self.assertEqual(len(consAln.toExternalFiles()), 1)
        for read1, read2 in zip(sorted(list(aln)), sorted(list(consAln))):
            self.assertEqual(read1, read2)
        self.assertEqual(len(aln), len(consAln))

        log.debug("Test through API")
        aln = AlignmentSet(data.getXml(12))
        self.assertEqual(len(aln.toExternalFiles()), 2)
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        outfn = os.path.join(outdir, 'merged.bam')
        aln.consolidate(outfn)
        self.assertTrue(os.path.exists(outfn))
        self.assertEqual(len(aln.toExternalFiles()), 1)
        nonCons = AlignmentSet(data.getXml(12))
        self.assertEqual(len(nonCons.toExternalFiles()), 2)
        for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))):
            self.assertEqual(read1, read2)
        self.assertEqual(len(aln), len(nonCons))

        # Test that it is a valid xml:
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        datafile = os.path.join(outdir, "apimerged.bam")
        xmlfile = os.path.join(outdir, "apimerged.xml")
        log.debug(xmlfile)
        aln.write(xmlfile)

        log.debug("Test with cheap filter")
        aln = AlignmentSet(data.getXml(12))
        self.assertEqual(len(list(aln)), 177)
        aln.filters.addRequirement(rname=[('=', 'B.vulgatus.5')])
        self.assertEqual(len(list(aln)), 7)
        self.assertEqual(len(aln.toExternalFiles()), 2)
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        outfn = os.path.join(outdir, 'merged.bam')
        aln.consolidate(outfn)
        self.assertTrue(os.path.exists(outfn))
        self.assertEqual(len(aln.toExternalFiles()), 1)
        nonCons = AlignmentSet(data.getXml(12))
        nonCons.filters.addRequirement(rname=[('=', 'B.vulgatus.5')])
        self.assertEqual(len(nonCons.toExternalFiles()), 2)
        for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))):
            self.assertEqual(read1, read2)
        self.assertEqual(len(list(aln)), len(list(nonCons)))

        log.debug("Test with not refname filter")
        # This isn't trivial with bamtools
        """
        aln = AlignmentSet(data.getXml(12))
        self.assertEqual(len(list(aln)), 177)
        aln.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')])
        self.assertEqual(len(list(aln)), 7)
        self.assertEqual(len(aln.toExternalFiles()), 2)
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        outfn = os.path.join(outdir, 'merged.bam')
        aln.consolidate(outfn)
        self.assertTrue(os.path.exists(outfn))
        self.assertEqual(len(aln.toExternalFiles()), 1)
        nonCons = AlignmentSet(data.getXml(12))
        nonCons.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')])
        self.assertEqual(len(nonCons.toExternalFiles()), 2)
        for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))):
            self.assertEqual(read1, read2)
        self.assertEqual(len(list(aln)), len(list(nonCons)))
        """

        log.debug("Test with expensive filter")
        aln = AlignmentSet(data.getXml(12))
        self.assertEqual(len(list(aln)), 177)
        aln.filters.addRequirement(accuracy=[('>', '.85')])
        self.assertEqual(len(list(aln)), 174)
        self.assertEqual(len(aln.toExternalFiles()), 2)
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        outfn = os.path.join(outdir, 'merged.bam')
        aln.consolidate(outfn)
        self.assertTrue(os.path.exists(outfn))
        self.assertEqual(len(aln.toExternalFiles()), 1)
        nonCons = AlignmentSet(data.getXml(12))
        nonCons.filters.addRequirement(accuracy=[('>', '.85')])
        self.assertEqual(len(nonCons.toExternalFiles()), 2)
        for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))):
            self.assertEqual(read1, read2)
        self.assertEqual(len(list(aln)), len(list(nonCons)))
    def test_alignmentset_consolidate(self):

        log.debug("Test through API")
        aln = AlignmentSet(data.getXml(12))
        self.assertEqual(len(aln.toExternalFiles()), 2)
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        outfn = os.path.join(outdir, 'merged.bam')
        aln.consolidate(outfn)
        self.assertTrue(os.path.exists(outfn))
        self.assertEqual(len(aln.toExternalFiles()), 1)
        nonCons = AlignmentSet(data.getXml(12))
        self.assertEqual(len(nonCons.toExternalFiles()), 2)
        for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))):
            self.assertEqual(read1, read2)
        self.assertEqual(len(aln), len(nonCons))

        # Test that it is a valid xml:
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        datafile = os.path.join(outdir, "apimerged.bam")
        xmlfile = os.path.join(outdir, "apimerged.xml")
        log.debug(xmlfile)
        aln.write(xmlfile)

        log.debug("Test with cheap filter")
        aln = AlignmentSet(data.getXml(12))
        self.assertEqual(len(list(aln)), 177)
        aln.filters.addRequirement(rname=[('=', 'B.vulgatus.5')])
        self.assertEqual(len(list(aln)), 7)
        self.assertEqual(len(aln.toExternalFiles()), 2)
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        outfn = os.path.join(outdir, 'merged.bam')
        aln.consolidate(outfn)
        self.assertTrue(os.path.exists(outfn))
        self.assertEqual(len(aln.toExternalFiles()), 1)
        nonCons = AlignmentSet(data.getXml(12))
        nonCons.filters.addRequirement(rname=[('=', 'B.vulgatus.5')])
        self.assertEqual(len(nonCons.toExternalFiles()), 2)
        for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))):
            self.assertEqual(read1, read2)
        self.assertEqual(len(list(aln)), len(list(nonCons)))

        log.debug("Test with not refname filter")
        # This isn't trivial with bamtools
        """
        aln = AlignmentSet(data.getXml(12))
        self.assertEqual(len(list(aln)), 177)
        aln.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')])
        self.assertEqual(len(list(aln)), 7)
        self.assertEqual(len(aln.toExternalFiles()), 2)
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        outfn = os.path.join(outdir, 'merged.bam')
        aln.consolidate(outfn)
        self.assertTrue(os.path.exists(outfn))
        self.assertEqual(len(aln.toExternalFiles()), 1)
        nonCons = AlignmentSet(data.getXml(12))
        nonCons.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')])
        self.assertEqual(len(nonCons.toExternalFiles()), 2)
        for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))):
            self.assertEqual(read1, read2)
        self.assertEqual(len(list(aln)), len(list(nonCons)))
        """

        log.debug("Test with expensive filter")
        aln = AlignmentSet(data.getXml(12))
        self.assertEqual(len(list(aln)), 177)
        aln.filters.addRequirement(accuracy=[('>', '.85')])
        self.assertEqual(len(list(aln)), 174)
        self.assertEqual(len(aln.toExternalFiles()), 2)
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        outfn = os.path.join(outdir, 'merged.bam')
        aln.consolidate(outfn)
        self.assertTrue(os.path.exists(outfn))
        self.assertEqual(len(aln.toExternalFiles()), 1)
        nonCons = AlignmentSet(data.getXml(12))
        nonCons.filters.addRequirement(accuracy=[('>', '.85')])
        self.assertEqual(len(nonCons.toExternalFiles()), 2)
        for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))):
            self.assertEqual(read1, read2)
        self.assertEqual(len(list(aln)), len(list(nonCons)))

        log.debug("Test cli")
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        datafile = os.path.join(outdir, "merged.bam")
        xmlfile = os.path.join(outdir, "merged.xml")
        cmd = "dataset consolidate {i} {d} {x}".format(i=data.getXml(12),
                                                          d=datafile,
                                                          x=xmlfile)
        log.debug(cmd)
        o, r, m = backticks(cmd)
        self.assertEqual(r, 0)
 def setUpClass(cls):
     ds = AlignmentSet(pbcore.data.getBamAndCmpH5()[0], strict=True)
     cls.alignment_set_xml = tempfile.NamedTemporaryFile(
         suffix=".alignmentset.xml").name
     ds.write(cls.alignment_set_xml)
 def _get_input_file(cls):
     ds_xml = tempfile.NamedTemporaryFile(suffix=".alignmentset.xml").name
     bams = [cls.ALIGNMENTS, TestEmptyBAM.ALIGNMENTS]
     ds = AlignmentSet(*bams, strict=True)
     ds.write(ds_xml)
     return ds_xml
Пример #18
0
    def test_membership_filter(self):
        aln = AlignmentSet(data.getXml(11))
        assert len(list(aln)) == 177
        hns = np.unique(aln.index.holeNumber)[:1]
        aln.filters.addRequirement(zm=[('in', hns)])
        assert len(list(aln)) == 5

        aln = AlignmentSet(data.getXml(11))
        assert len(list(aln)) == 177
        hns = np.unique(aln.index.holeNumber)
        aln.filters.addRequirement(zm=[('in', hns)])
        assert len(list(aln)) == 177

        aln = AlignmentSet(data.getXml(11))
        assert len(list(aln)) == 177
        hns = np.unique(aln.index.holeNumber)
        hns = [n for _ in range(10000) for n in hns]
        hns = np.array(hns)
        aln.filters.addRequirement(zm=[('in', hns)])
        assert len(list(aln)) == 177

        aln = AlignmentSet(data.getXml(11))
        assert len(list(aln)) == 177
        hns = np.unique(aln.index.holeNumber)[:1]
        hns = list(hns)
        aln.filters.addRequirement(zm=[('in', hns)])
        assert len(list(aln)) == 5

        aln = AlignmentSet(data.getXml(11))
        assert len(list(aln)) == 177
        hns = np.unique(aln.index.holeNumber)[:1]
        hns = set(hns)
        aln.filters.addRequirement(zm=[('in', hns)])
        assert len(list(aln)) == 5

        aln = AlignmentSet(data.getXml(11))
        assert len(list(aln)) == 177
        qnames = [r.qName for r in aln[:10]]
        aln.filters.addRequirement(qname=[('in', qnames)])
        assert len(list(aln)) == 10

        aln = AlignmentSet(data.getXml(11))
        assert len(list(aln)) == 177
        qnames = [r.qName for r in aln[:1]]
        aln.filters.addRequirement(qname=[('in', qnames)])
        assert len(list(aln)) == 1

        # test partial qnames:
        aln = AlignmentSet(data.getXml(11))
        assert len(list(aln)) == 177
        qnames = ['/'.join(r.qName.split('/')[:2]) for r in aln[:1]]
        assert qnames == ['pbalchemy1GbRSIIsim0/6']
        aln.filters.addRequirement(qname=[('in', qnames)])
        assert len(list(aln)) == 7

        fn = tempfile.NamedTemporaryFile(suffix="alignmentset.xml").name
        aln = AlignmentSet(data.getXml(11))
        assert len(list(aln)) == 177
        hns = np.unique(aln.index.holeNumber)[:1]
        aln.filters.addRequirement(zm=[('in', hns)])
        aln.write(fn)
        aln.close()
        aln2 = AlignmentSet(fn)
        assert len(list(aln2)) == 5
 def _get_input_file(cls):
     ds_xml = tempfile.NamedTemporaryFile(
         suffix=".alignmentset.xml").name
     ds = AlignmentSet(cls.ALIGNMENTS, strict=True)
     ds.write(ds_xml)
     return ds_xml
Пример #20
0
    def test_alignmentset_consolidate(self):
        log.debug("Test methods directly")
        aln = AlignmentSet(data.getXml(12))
        self.assertEqual(len(aln.toExternalFiles()), 2)
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        outfn = os.path.join(outdir, 'merged.bam')
        consolidateBams(aln.toExternalFiles(), outfn, filterDset=aln)
        self.assertTrue(os.path.exists(outfn))
        consAln = AlignmentSet(outfn)
        self.assertEqual(len(consAln.toExternalFiles()), 1)
        for read1, read2 in zip(sorted(list(aln)), sorted(list(consAln))):
            self.assertEqual(read1, read2)
        self.assertEqual(len(aln), len(consAln))

        log.debug("Test through API")
        aln = AlignmentSet(data.getXml(12))
        self.assertEqual(len(aln.toExternalFiles()), 2)
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        outfn = os.path.join(outdir, 'merged.bam')
        aln.consolidate(outfn)
        self.assertTrue(os.path.exists(outfn))
        self.assertEqual(len(aln.toExternalFiles()), 1)
        nonCons = AlignmentSet(data.getXml(12))
        self.assertEqual(len(nonCons.toExternalFiles()), 2)
        for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))):
            self.assertEqual(read1, read2)
        self.assertEqual(len(aln), len(nonCons))

        # Test that it is a valid xml:
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        datafile = os.path.join(outdir, "apimerged.bam")
        xmlfile = os.path.join(outdir, "apimerged.xml")
        log.debug(xmlfile)
        aln.write(xmlfile)

        log.debug("Test with cheap filter")
        aln = AlignmentSet(data.getXml(12))
        self.assertEqual(len(list(aln)), 177)
        aln.filters.addRequirement(rname=[('=', 'B.vulgatus.5')])
        self.assertEqual(len(list(aln)), 7)
        self.assertEqual(len(aln.toExternalFiles()), 2)
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        outfn = os.path.join(outdir, 'merged.bam')
        aln.consolidate(outfn)
        self.assertTrue(os.path.exists(outfn))
        self.assertEqual(len(aln.toExternalFiles()), 1)
        nonCons = AlignmentSet(data.getXml(12))
        nonCons.filters.addRequirement(rname=[('=', 'B.vulgatus.5')])
        self.assertEqual(len(nonCons.toExternalFiles()), 2)
        for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))):
            self.assertEqual(read1, read2)
        self.assertEqual(len(list(aln)), len(list(nonCons)))

        log.debug("Test with not refname filter")
        # This isn't trivial with bamtools
        """
        aln = AlignmentSet(data.getXml(12))
        self.assertEqual(len(list(aln)), 177)
        aln.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')])
        self.assertEqual(len(list(aln)), 7)
        self.assertEqual(len(aln.toExternalFiles()), 2)
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        outfn = os.path.join(outdir, 'merged.bam')
        aln.consolidate(outfn)
        self.assertTrue(os.path.exists(outfn))
        self.assertEqual(len(aln.toExternalFiles()), 1)
        nonCons = AlignmentSet(data.getXml(12))
        nonCons.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')])
        self.assertEqual(len(nonCons.toExternalFiles()), 2)
        for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))):
            self.assertEqual(read1, read2)
        self.assertEqual(len(list(aln)), len(list(nonCons)))
        """

        log.debug("Test with expensive filter")
        aln = AlignmentSet(data.getXml(12))
        self.assertEqual(len(list(aln)), 177)
        aln.filters.addRequirement(accuracy=[('>', '.85')])
        self.assertEqual(len(list(aln)), 174)
        self.assertEqual(len(aln.toExternalFiles()), 2)
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        outfn = os.path.join(outdir, 'merged.bam')
        aln.consolidate(outfn)
        self.assertTrue(os.path.exists(outfn))
        self.assertEqual(len(aln.toExternalFiles()), 1)
        nonCons = AlignmentSet(data.getXml(12))
        nonCons.filters.addRequirement(accuracy=[('>', '.85')])
        self.assertEqual(len(nonCons.toExternalFiles()), 2)
        for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))):
            self.assertEqual(read1, read2)
        self.assertEqual(len(list(aln)), len(list(nonCons)))

        log.debug("Test cli")
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        datafile = os.path.join(outdir, "merged.bam")
        xmlfile = os.path.join(outdir, "merged.xml")
        cmd = "dataset consolidate {i} {d} {x}".format(i=data.getXml(12),
                                                       d=datafile,
                                                       x=xmlfile)
        log.debug(cmd)
        o, r, m = backticks(cmd)
        self.assertEqual(r, 0)
Пример #21
0
    def test_alignmentset_consolidate(self):
        log.debug("Test through API")
        aln = AlignmentSet(data.getXml(11))
        assert len(aln.toExternalFiles()) == 2
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        outfn = os.path.join(outdir, 'merged.bam')
        aln.consolidate(outfn)
        assert os.path.exists(outfn)
        assert len(aln.toExternalFiles()) == 1
        nonCons = AlignmentSet(data.getXml(11))
        assert len(nonCons.toExternalFiles()) == 2
        for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))):
            assert read1 == read2
        assert len(aln) == len(nonCons)

        # Test that it is a valid xml:
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        datafile = os.path.join(outdir, "apimerged.bam")
        xmlfile = os.path.join(outdir, "apimerged.xml")
        log.debug(xmlfile)
        aln.write(xmlfile)

        log.debug("Test with cheap filter")
        aln = AlignmentSet(data.getXml(11))
        assert len(list(aln)) == 177
        aln.filters.addRequirement(rname=[('=', 'B.vulgatus.5')])
        assert len(list(aln)) == 7
        assert len(aln.toExternalFiles()) == 2
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        outfn = os.path.join(outdir, 'merged.bam')
        aln.consolidate(outfn)
        assert os.path.exists(outfn)
        assert len(aln.toExternalFiles()) == 1
        nonCons = AlignmentSet(data.getXml(11))
        nonCons.filters.addRequirement(rname=[('=', 'B.vulgatus.5')])
        assert len(nonCons.toExternalFiles()) == 2
        for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))):
            assert read1 == read2
        assert len(list(aln)) == len(list(nonCons))

        log.debug("Test with not refname filter")
        # This isn't trivial with bamtools
        """
        aln = AlignmentSet(data.getXml(11))
        assert len(list(aln)) == 177
        aln.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')])
        assert len(list(aln)) == 7
        assert len(aln.toExternalFiles()) == 2
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        outfn = os.path.join(outdir, 'merged.bam')
        aln.consolidate(outfn)
        assert os.path.exists(outfn)
        assert len(aln.toExternalFiles()) == 1
        nonCons = AlignmentSet(data.getXml(11))
        nonCons.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')])
        assert len(nonCons.toExternalFiles()) == 2
        for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))):
            assert read1 == read2
        assert len(list(aln)) == len(list(nonCons))
        """

        log.debug("Test with expensive filter")
        aln = AlignmentSet(data.getXml(11))
        assert len(list(aln)) == 177
        aln.filters.addRequirement(accuracy=[('>', '.85')])
        assert len(list(aln)) == 174
        assert len(aln.toExternalFiles()) == 2
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        outfn = os.path.join(outdir, 'merged.bam')
        aln.consolidate(outfn)
        assert os.path.exists(outfn)
        assert len(aln.toExternalFiles()) == 1
        nonCons = AlignmentSet(data.getXml(11))
        nonCons.filters.addRequirement(accuracy=[('>', '.85')])
        assert len(nonCons.toExternalFiles()) == 2
        for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))):
            assert read1 == read2
        assert len(list(aln)) == len(list(nonCons))

        log.debug("Test with one reference")
        aln = AlignmentSet(data.getXml(11))
        reference = upstreamData.getFasta()
        aln.externalResources[0].reference = reference
        nonCons = aln.copy()
        assert len(aln.toExternalFiles()) == 2
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        outfn = os.path.join(outdir, 'merged.bam')
        aln.consolidate(outfn)
        assert os.path.exists(outfn)
        assert len(aln.toExternalFiles()) == 1
        #nonCons = AlignmentSet(data.getXml(11))
        assert len(nonCons.toExternalFiles()) == 2
        for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))):
            assert read1 == read2
        assert len(aln) == len(nonCons)
        assert aln.externalResources[0].reference == reference

        log.debug("Test with two references")
        aln = AlignmentSet(data.getXml(11))
        reference = upstreamData.getFasta()
        for extRes in aln.externalResources:
            extRes.reference = reference
        assert len(aln.toExternalFiles()) == 2
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        outfn = os.path.join(outdir, 'merged.bam')
        aln.consolidate(outfn)
        assert os.path.exists(outfn)
        assert len(aln.toExternalFiles()) == 1
        #nonCons = AlignmentSet(data.getXml(11))
        assert len(nonCons.toExternalFiles()) == 2
        for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))):
            assert read1 == read2
        assert len(aln) == len(nonCons)
        assert aln.externalResources[0].reference == reference
Пример #22
0
    def test_alignmentset_consolidate(self):

        log.debug("Test through API")
        aln = AlignmentSet(pbtestdata.get_file("aligned-ds-2"))
        assert len(aln.toExternalFiles()) == 2
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        outfn = os.path.join(outdir, 'merged.bam')
        aln.consolidate(outfn)
        assert os.path.exists(outfn)
        assert len(aln.toExternalFiles()) == 1
        nonCons = AlignmentSet(pbtestdata.get_file("aligned-ds-2"))
        assert len(nonCons.toExternalFiles()) == 2
        for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))):
            assert read1 == read2
        assert len(aln) == len(nonCons)

        # Test that it is a valid xml:
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        datafile = os.path.join(outdir, "apimerged.bam")
        xmlfile = os.path.join(outdir, "apimerged.xml")
        log.debug(xmlfile)
        aln.write(xmlfile)

        log.debug("Test with cheap filter")
        aln = AlignmentSet(pbtestdata.get_file("aligned-ds-2"))
        assert len(list(aln)) == 21
        aln.filters.addRequirement(length=[(">=", 10000)])
        assert len(list(aln)) == 10
        assert len(aln.toExternalFiles()) == 2
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        outfn = os.path.join(outdir, 'merged.bam')
        aln.consolidate(outfn)
        assert os.path.exists(outfn)
        assert len(aln.toExternalFiles()) == 1
        nonCons = AlignmentSet(pbtestdata.get_file("aligned-ds-2"))
        nonCons.filters.addRequirement(length=[(">=", 10000)])
        assert len(nonCons.toExternalFiles()) == 2
        for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))):
            assert read1 == read2
        assert len(list(aln)) == len(list(nonCons))

        log.debug("Test with not refname filter")
        # This isn't trivial with bamtools
        """
        aln = AlignmentSet(data.getXml(11))
        assert len(list(aln)) == 177
        aln.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')])
        assert len(list(aln)) == 7
        assert len(aln.toExternalFiles()) == 2
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        outfn = os.path.join(outdir, 'merged.bam')
        aln.consolidate(outfn)
        assert os.path.exists(outfn)
        assert len(aln.toExternalFiles()) == 1
        nonCons = AlignmentSet(data.getXml(11))
        nonCons.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')])
        assert len(nonCons.toExternalFiles()) == 2
        for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))):
            assert read1 == read2
        assert len(list(aln)) == len(list(nonCons))
        """

        log.debug("Test with expensive filter")
        aln = AlignmentSet(data.getXml(11))
        assert len(list(aln)) == 177
        aln.filters.addRequirement(accuracy=[('>', '.85')])
        assert len(list(aln)) == 174
        assert len(aln.toExternalFiles()) == 2
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        outfn = os.path.join(outdir, 'merged.bam')
        aln.consolidate(outfn)
        assert os.path.exists(outfn)
        assert len(aln.toExternalFiles()) == 1
        nonCons = AlignmentSet(data.getXml(11))
        nonCons.filters.addRequirement(accuracy=[('>', '.85')])
        assert len(nonCons.toExternalFiles()) == 2
        for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))):
            assert read1 == read2
        assert len(list(aln)) == len(list(nonCons))

        log.debug("Test cli")
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        datafile = os.path.join(outdir, "merged.bam")
        xmlfile = os.path.join(outdir, "merged.xml")
        cmd = "dataset consolidate {i} {d} {x}".format(i=data.getXml(11),
                                                       d=datafile,
                                                       x=xmlfile)
        log.debug(cmd)
        subprocess.check_call(cmd.split())
Пример #23
0
    def test_alignmentset_consolidate(self):
        log.debug("Test through API")
        aln = AlignmentSet(data.getXml(12))
        self.assertEqual(len(aln.toExternalFiles()), 2)
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        outfn = os.path.join(outdir, 'merged.bam')
        aln.consolidate(outfn)
        self.assertTrue(os.path.exists(outfn))
        self.assertEqual(len(aln.toExternalFiles()), 1)
        nonCons = AlignmentSet(data.getXml(12))
        self.assertEqual(len(nonCons.toExternalFiles()), 2)
        for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))):
            self.assertEqual(read1, read2)
        self.assertEqual(len(aln), len(nonCons))

        # Test that it is a valid xml:
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        datafile = os.path.join(outdir, "apimerged.bam")
        xmlfile = os.path.join(outdir, "apimerged.xml")
        log.debug(xmlfile)
        aln.write(xmlfile)

        log.debug("Test with cheap filter")
        aln = AlignmentSet(data.getXml(12))
        self.assertEqual(len(list(aln)), 177)
        aln.filters.addRequirement(rname=[('=', 'B.vulgatus.5')])
        self.assertEqual(len(list(aln)), 7)
        self.assertEqual(len(aln.toExternalFiles()), 2)
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        outfn = os.path.join(outdir, 'merged.bam')
        aln.consolidate(outfn)
        self.assertTrue(os.path.exists(outfn))
        self.assertEqual(len(aln.toExternalFiles()), 1)
        nonCons = AlignmentSet(data.getXml(12))
        nonCons.filters.addRequirement(rname=[('=', 'B.vulgatus.5')])
        self.assertEqual(len(nonCons.toExternalFiles()), 2)
        for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))):
            self.assertEqual(read1, read2)
        self.assertEqual(len(list(aln)), len(list(nonCons)))

        log.debug("Test with not refname filter")
        # This isn't trivial with bamtools
        """
        aln = AlignmentSet(data.getXml(12))
        self.assertEqual(len(list(aln)), 177)
        aln.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')])
        self.assertEqual(len(list(aln)), 7)
        self.assertEqual(len(aln.toExternalFiles()), 2)
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        outfn = os.path.join(outdir, 'merged.bam')
        aln.consolidate(outfn)
        self.assertTrue(os.path.exists(outfn))
        self.assertEqual(len(aln.toExternalFiles()), 1)
        nonCons = AlignmentSet(data.getXml(12))
        nonCons.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')])
        self.assertEqual(len(nonCons.toExternalFiles()), 2)
        for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))):
            self.assertEqual(read1, read2)
        self.assertEqual(len(list(aln)), len(list(nonCons)))
        """

        log.debug("Test with expensive filter")
        aln = AlignmentSet(data.getXml(12))
        self.assertEqual(len(list(aln)), 177)
        aln.filters.addRequirement(accuracy=[('>', '.85')])
        self.assertEqual(len(list(aln)), 174)
        self.assertEqual(len(aln.toExternalFiles()), 2)
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        outfn = os.path.join(outdir, 'merged.bam')
        aln.consolidate(outfn)
        self.assertTrue(os.path.exists(outfn))
        self.assertEqual(len(aln.toExternalFiles()), 1)
        nonCons = AlignmentSet(data.getXml(12))
        nonCons.filters.addRequirement(accuracy=[('>', '.85')])
        self.assertEqual(len(nonCons.toExternalFiles()), 2)
        for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))):
            self.assertEqual(read1, read2)
        self.assertEqual(len(list(aln)), len(list(nonCons)))

        log.debug("Test with one reference")
        aln = AlignmentSet(data.getXml(12))
        reference = upstreamData.getFasta()
        aln.externalResources[0].reference = reference
        nonCons = aln.copy()
        self.assertEqual(len(aln.toExternalFiles()), 2)
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        outfn = os.path.join(outdir, 'merged.bam')
        aln.consolidate(outfn)
        self.assertTrue(os.path.exists(outfn))
        self.assertEqual(len(aln.toExternalFiles()), 1)
        #nonCons = AlignmentSet(data.getXml(12))
        self.assertEqual(len(nonCons.toExternalFiles()), 2)
        for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))):
            self.assertEqual(read1, read2)
        self.assertEqual(len(aln), len(nonCons))
        self.assertEqual(aln.externalResources[0].reference, reference)

        log.debug("Test with two references")
        aln = AlignmentSet(data.getXml(12))
        reference = upstreamData.getFasta()
        for extRes in aln.externalResources:
            extRes.reference = reference
        self.assertEqual(len(aln.toExternalFiles()), 2)
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        outfn = os.path.join(outdir, 'merged.bam')
        aln.consolidate(outfn)
        self.assertTrue(os.path.exists(outfn))
        self.assertEqual(len(aln.toExternalFiles()), 1)
        #nonCons = AlignmentSet(data.getXml(12))
        self.assertEqual(len(nonCons.toExternalFiles()), 2)
        for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))):
            self.assertEqual(read1, read2)
        self.assertEqual(len(aln), len(nonCons))
        self.assertEqual(aln.externalResources[0].reference, reference)
 def setUpClass(cls):
     super(TestPbreportMappingStats, cls).setUpClass()
     ds = AlignmentSet(pbcore.data.getBamAndCmpH5()[0], strict=True)
     ds.write(cls.INPUT_FILES[0])