def test_split_zmws_targetsize(self): N_RECORDS = 117 N_ZMWS = 48 test_file = upstreamdata.getUnalignedBam() ds1 = openDataFile(test_file) assert len([r for r in ds1]) == N_RECORDS assert len(ds1) == N_RECORDS assert len(set(ds1.index.holeNumber)) == N_ZMWS # with no split dss = list(ds1.split(targetSize=1000, zmws=True)) assert len(dss) == 1 assert sum([len([r for r in ds_]) for ds_ in dss]) == N_RECORDS assert sum([len(ds_) for ds_ in dss]) == N_RECORDS exp = [48] obs = sorted([len(set(ds.index.holeNumber)) for ds in dss]) assert exp == obs # with a split dss = list(ds1.split(targetSize=25, zmws=True)) assert len(dss) == 2 assert sum([len([r for r in ds_]) for ds_ in dss]) == N_RECORDS assert sum([len(ds_) for ds_ in dss]) == N_RECORDS exp = [24, 24] obs = sorted([len(set(ds.index.holeNumber)) for ds in dss]) assert exp == obs # with a split dss = list(ds1.split(targetSize=5, zmws=True)) assert len(dss) == 10 assert sum([len([r for r in ds_]) for ds_ in dss]) == N_RECORDS assert sum([len(ds_) for ds_ in dss]) == N_RECORDS exp = [4, 4, 5, 5, 5, 5, 5, 5, 5, 5] obs = sorted([len(set(ds.index.holeNumber)) for ds in dss]) assert exp == obs
def test_subreadset_from_bam(self): # DONE control experiment for bug 28698 bam = upstreamData.getUnalignedBam() ds1 = SubreadSet(bam, strict=False) fn = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name log.debug(fn) ds1.write(fn)
def test_get_dataset_uuid(self): ds = SubreadSet(upstreamdata.getUnalignedBam(), strict=True) ds_file = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name ds.write(ds_file) uuid = getDataSetUuid(ds_file) self.assertEqual(uuid, ds.uuid) with open(ds_file, "w") as out: out.write("hello world!") uuid = getDataSetUuid(ds_file) self.assertEqual(uuid, None)
def test_get_dataset_uuid(self): ds = SubreadSet(upstreamdata.getUnalignedBam(), strict=True) ds_file = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name ds.write(ds_file) uuid = getDataSetUuid(ds_file) assert uuid == ds.uuid with open(ds_file, "w") as out: out.write("hello world!") uuid = getDataSetUuid(ds_file) assert uuid is None
def test_split_zmws_targetsize(self): N_RECORDS = 117 N_ZMWS = 48 test_file = upstreamdata.getUnalignedBam() ds1 = openDataFile(test_file) self.assertEqual(len([r for r in ds1]), N_RECORDS) self.assertEqual(len(ds1), N_RECORDS) self.assertEqual(len(set(ds1.index.holeNumber)), N_ZMWS) # with no split dss = ds1.split(targetSize=1000, zmws=True) self.assertEqual(len(dss), 1) self.assertEqual(sum([len([r for r in ds_]) for ds_ in dss]), N_RECORDS) self.assertEqual(sum([len(ds_) for ds_ in dss]), N_RECORDS) exp = [48] obs = sorted([len(set(ds.index.holeNumber)) for ds in dss]) self.assertListEqual(exp, obs) # with a split dss = ds1.split(targetSize=25, zmws=True) self.assertEqual(len(dss), 2) self.assertEqual(sum([len([r for r in ds_]) for ds_ in dss]), N_RECORDS) self.assertEqual(sum([len(ds_) for ds_ in dss]), N_RECORDS) exp = [24, 24] obs = sorted([len(set(ds.index.holeNumber)) for ds in dss]) self.assertListEqual(exp, obs) # with a split dss = ds1.split(targetSize=5, zmws=True) self.assertEqual(len(dss), 10) self.assertEqual(sum([len([r for r in ds_]) for ds_ in dss]), N_RECORDS) self.assertEqual(sum([len(ds_) for ds_ in dss]), N_RECORDS) exp = [4, 4, 5, 5, 5, 5, 5, 5, 5, 5] obs = sorted([len(set(ds.index.holeNumber)) for ds in dss]) self.assertListEqual(exp, obs)
def test_split_zmws(self): N_RECORDS = 117 test_file = upstreamdata.getUnalignedBam() ds1 = openDataFile(test_file) self.assertEqual(len([r for r in ds1]), N_RECORDS) self.assertEqual(len(ds1), N_RECORDS) dss = ds1.split(chunks=1, zmws=True) self.assertEqual(len(dss), 1) self.assertEqual(sum([len([r for r in ds_]) for ds_ in dss]), N_RECORDS) self.assertEqual(sum([len(ds_) for ds_ in dss]), N_RECORDS) # We have a lower limit on the number of zmws, now dss = ds1.split(chunks=12, zmws=True) self.assertEqual(len(dss), 2) self.assertEqual(sum([len([r for r in ds_]) for ds_ in dss]), N_RECORDS) self.assertEqual(sum([len(ds_) for ds_ in dss]), N_RECORDS) self.assertEqual( dss[0].zmwRanges, [('m140905_042212_sidney_c100564852550000001823085912221377_s1_X0', 1650, 32328)]) self.assertEqual( dss[-1].zmwRanges, [('m140905_042212_sidney_c100564852550000001823085912221377_s1_X0', 32560, 54396)]) ranges = sorted([c.zmwRanges[0][1:] for c in dss]) interspans = [] last = None for rg in ranges: if not last is None: interspans.append((last, rg[0])) self.assertFalse(last == rg[0]) last = rg[1] for rg in interspans: self.assertEqual( len( np.nonzero( np.logical_and(ds1.index.holeNumber < rg[1], ds1.index.holeNumber > rg[0]))[0]), 0)
def test_split_zmws(self): N_RECORDS = 117 test_file = upstreamdata.getUnalignedBam() ds1 = openDataFile(test_file) self.assertEqual(len([r for r in ds1]), N_RECORDS) self.assertEqual(len(ds1), N_RECORDS) dss = ds1.split(chunks=1, zmws=True) self.assertEqual(len(dss), 1) self.assertEqual(sum([len([r for r in ds_]) for ds_ in dss]), N_RECORDS) self.assertEqual(sum([len(ds_) for ds_ in dss]), N_RECORDS) # We have a lower limit on the number of zmws, now dss = ds1.split(chunks=12, zmws=True) self.assertEqual(len(dss), 2) self.assertEqual(sum([len([r for r in ds_]) for ds_ in dss]), N_RECORDS) self.assertEqual(sum([len(ds_) for ds_ in dss]), N_RECORDS) self.assertEqual( dss[0].zmwRanges, [('m140905_042212_sidney_c100564852550000001823085912221377_s1_X0', 1650, 32328)]) self.assertEqual( dss[-1].zmwRanges, [('m140905_042212_sidney_c100564852550000001823085912221377_s1_X0', 32560, 54396)]) ranges = sorted([c.zmwRanges[0][1:] for c in dss]) interspans = [] last = None for rg in ranges: if not last is None: interspans.append((last, rg[0])) self.assertFalse(last == rg[0]) last = rg[1] for rg in interspans: self.assertEqual(len(np.nonzero(np.logical_and( ds1.index.holeNumber < rg[1], ds1.index.holeNumber > rg[0]))[0]), 0)
def __init__(self): self.bam = BamReader(data.getUnalignedBam()) self.bax = BaxH5Reader(data.getBaxForBam()) self.baxRead0 = next(self.bax.subreads()) self.bamRead0 = next(iter(self.bam))
def test_get_dataset_metatype(self): ds = SubreadSet(upstreamdata.getUnalignedBam(), strict=True) ds_file = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name ds.write(ds_file) meta_type = getDataSetMetaType(ds_file) assert meta_type == "PacBio.DataSet.SubreadSet"
def test_context_filters(self): ss = SubreadSet(upstreamdata.getUnalignedBam()) self.assertEqual(set(ss.index.contextFlag), {0, 1, 2, 3}) self.assertEqual([ len(np.flatnonzero(ss.index.contextFlag == cx)) for cx in sorted(set(ss.index.contextFlag)) ], [15, 33, 32, 37]) self.assertEqual(len(ss.index), 117) # no adapters/barcodes ss.filters.addRequirement(cx=[('=', 0)]) self.assertEqual(len(ss.index), 15) ss.filters.removeRequirement('cx') self.assertEqual(len(ss.index), 117) # no adapters/barcodes ss.filters.addRequirement(cx=[('=', 'NO_LOCAL_CONTEXT')]) self.assertEqual(len(ss.index), 15) ss.filters.removeRequirement('cx') self.assertEqual(len(ss.index), 117) # some adapters/barcodes ss.filters.addRequirement(cx=[('!=', 0)]) self.assertEqual(len(ss.index), 102) ss.filters.removeRequirement('cx') self.assertEqual(len(ss.index), 117) # adapter before ss.filters.addRequirement(cx=[('&', 1)]) self.assertEqual(len(ss.index), 70) ss.filters.removeRequirement('cx') self.assertEqual(len(ss.index), 117) # adapter before ss.filters.addRequirement(cx=[('&', 'ADAPTER_BEFORE')]) self.assertEqual(len(ss.index), 70) ss.filters.removeRequirement('cx') self.assertEqual(len(ss.index), 117) # adapter after ss.filters.addRequirement(cx=[('&', 2)]) self.assertEqual(len(ss.index), 69) ss.filters.removeRequirement('cx') self.assertEqual(len(ss.index), 117) # adapter before or after ss.filters.addRequirement(cx=[('&', 3)]) self.assertEqual(len(ss.index), 102) ss.filters.removeRequirement('cx') self.assertEqual(len(ss.index), 117) # adapter before or after ss.filters.addRequirement(cx=[('&', 'ADAPTER_BEFORE | ADAPTER_AFTER')]) self.assertEqual(len(ss.index), 102) ss.filters.removeRequirement('cx') self.assertEqual(len(ss.index), 117) # adapter before or after but not both ss.filters.addRequirement(cx=[('!=', 0)]) ss.filters.addRequirement(cx=[('~', 1), ('~', 2)]) self.assertEqual(len(ss.index), 65) ss.filters.removeRequirement('cx') self.assertEqual(len(ss.index), 117) # adapter before or after ss.filters.addRequirement(cx=[('&', 1), ('&', 2)]) self.assertEqual(len(ss.index), 102) ss.filters.removeRequirement('cx') self.assertEqual(len(ss.index), 117) # adapter before and after ss.filters.addRequirement(cx=[('&', 1)]) ss.filters.addRequirement(cx=[('&', 2)]) self.assertEqual(len(ss.index), 37) ss.filters.removeRequirement('cx') self.assertEqual(len(ss.index), 117) # adapter before but not after ss.filters.addRequirement(cx=[('&', 1)]) ss.filters.addRequirement(cx=[('~', 2)]) self.assertEqual(len(ss.index), 33) ss.filters.removeRequirement('cx') self.assertEqual(len(ss.index), 117) # no adapter before ss.filters.addRequirement(cx=[('~', 1)]) self.assertEqual(len(ss.index), 47) ss.filters.removeRequirement('cx') self.assertEqual(len(ss.index), 117) # no adapter before or after ss.filters.addRequirement(cx=[('~', 1)]) ss.filters.addRequirement(cx=[('~', 2)]) self.assertEqual(len(ss.index), 15) ss.filters.removeRequirement('cx') self.assertEqual(len(ss.index), 117) # no adapter before or after ss.filters.addRequirement(cx=[('~', 3)]) self.assertEqual(len(ss.index), 15) ss.filters.removeRequirement('cx') self.assertEqual(len(ss.index), 117)
def test_context_filters(self): ss = SubreadSet(upstreamdata.getUnalignedBam()) assert set(ss.index.contextFlag) == {0, 1, 2, 3} assert [ len(np.flatnonzero(ss.index.contextFlag == cx)) for cx in sorted(set(ss.index.contextFlag)) ] == [15, 33, 32, 37] assert len(ss.index) == 117 # no adapters/barcodes ss.filters.addRequirement(cx=[('=', 0)]) assert len(ss.index) == 15 ss.filters.removeRequirement('cx') assert len(ss.index) == 117 # no adapters/barcodes ss.filters.addRequirement(cx=[('=', 'NO_LOCAL_CONTEXT')]) assert len(ss.index) == 15 ss.filters.removeRequirement('cx') assert len(ss.index) == 117 # some adapters/barcodes ss.filters.addRequirement(cx=[('!=', 0)]) assert len(ss.index) == 102 ss.filters.removeRequirement('cx') assert len(ss.index) == 117 # adapter before ss.filters.addRequirement(cx=[('&', 1)]) assert len(ss.index) == 70 ss.filters.removeRequirement('cx') assert len(ss.index) == 117 # adapter before ss.filters.addRequirement(cx=[('&', 'ADAPTER_BEFORE')]) assert len(ss.index) == 70 ss.filters.removeRequirement('cx') assert len(ss.index) == 117 # adapter after ss.filters.addRequirement(cx=[('&', 2)]) assert len(ss.index) == 69 ss.filters.removeRequirement('cx') assert len(ss.index) == 117 # adapter before or after ss.filters.addRequirement(cx=[('&', 3)]) assert len(ss.index) == 102 ss.filters.removeRequirement('cx') assert len(ss.index) == 117 # adapter before or after ss.filters.addRequirement(cx=[('&', 'ADAPTER_BEFORE | ADAPTER_AFTER')]) assert len(ss.index) == 102 ss.filters.removeRequirement('cx') assert len(ss.index) == 117 # adapter before or after but not both ss.filters.addRequirement(cx=[('!=', 0)]) ss.filters.addRequirement(cx=[('~', 1), ('~', 2)]) assert len(ss.index) == 65 ss.filters.removeRequirement('cx') assert len(ss.index) == 117 # adapter before or after ss.filters.addRequirement(cx=[('&', 1), ('&', 2)]) assert len(ss.index) == 102 ss.filters.removeRequirement('cx') assert len(ss.index) == 117 # adapter before and after ss.filters.addRequirement(cx=[('&', 1)]) ss.filters.addRequirement(cx=[('&', 2)]) assert len(ss.index) == 37 ss.filters.removeRequirement('cx') assert len(ss.index) == 117 # adapter before but not after ss.filters.addRequirement(cx=[('&', 1)]) ss.filters.addRequirement(cx=[('~', 2)]) assert len(ss.index) == 33 ss.filters.removeRequirement('cx') assert len(ss.index) == 117 # no adapter before ss.filters.addRequirement(cx=[('~', 1)]) assert len(ss.index) == 47 ss.filters.removeRequirement('cx') assert len(ss.index) == 117 # no adapter before or after ss.filters.addRequirement(cx=[('~', 1)]) ss.filters.addRequirement(cx=[('~', 2)]) assert len(ss.index) == 15 ss.filters.removeRequirement('cx') assert len(ss.index) == 117 # no adapter before or after ss.filters.addRequirement(cx=[('~', 3)]) assert len(ss.index) == 15 ss.filters.removeRequirement('cx') assert len(ss.index) == 117
def setup_class(cls): cls.bam = BamReader(data.getUnalignedBam()) cls.bamRead0 = next(iter(cls.bam))
def test_get_dataset_metatype(self): ds = SubreadSet(upstreamdata.getUnalignedBam(), strict=True) ds_file = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name ds.write(ds_file) meta_type = getDataSetMetaType(ds_file) self.assertEqual(meta_type, "PacBio.DataSet.SubreadSet")
def __init__(self): self.V = ZmwReadStitcher(getUnalignedBam()) self.B = BasH5Reader(getBaxForBam()) self.VZ = self.V[1650] self.BZ = self.B[1650]
def test_context_filters(self): ss = SubreadSet(upstreamdata.getUnalignedBam()) self.assertEqual(set(ss.index.contextFlag), {0, 1, 2, 3}) self.assertEqual( [len(np.flatnonzero(ss.index.contextFlag == cx)) for cx in sorted(set(ss.index.contextFlag))], [15, 33, 32, 37]) self.assertEqual(len(ss.index), 117) # no adapters/barcodes ss.filters.addRequirement(cx=[('=', 0)]) self.assertEqual(len(ss.index), 15) ss.filters.removeRequirement('cx') self.assertEqual(len(ss.index), 117) # no adapters/barcodes ss.filters.addRequirement(cx=[('=', 'NO_LOCAL_CONTEXT')]) self.assertEqual(len(ss.index), 15) ss.filters.removeRequirement('cx') self.assertEqual(len(ss.index), 117) # some adapters/barcodes ss.filters.addRequirement(cx=[('!=', 0)]) self.assertEqual(len(ss.index), 102) ss.filters.removeRequirement('cx') self.assertEqual(len(ss.index), 117) # adapter before ss.filters.addRequirement(cx=[('&', 1)]) self.assertEqual(len(ss.index), 70) ss.filters.removeRequirement('cx') self.assertEqual(len(ss.index), 117) # adapter before ss.filters.addRequirement(cx=[('&', 'ADAPTER_BEFORE')]) self.assertEqual(len(ss.index), 70) ss.filters.removeRequirement('cx') self.assertEqual(len(ss.index), 117) # adapter after ss.filters.addRequirement(cx=[('&', 2)]) self.assertEqual(len(ss.index), 69) ss.filters.removeRequirement('cx') self.assertEqual(len(ss.index), 117) # adapter before or after ss.filters.addRequirement(cx=[('&', 3)]) self.assertEqual(len(ss.index), 102) ss.filters.removeRequirement('cx') self.assertEqual(len(ss.index), 117) # adapter before or after ss.filters.addRequirement(cx=[('&', 'ADAPTER_BEFORE | ADAPTER_AFTER')]) self.assertEqual(len(ss.index), 102) ss.filters.removeRequirement('cx') self.assertEqual(len(ss.index), 117) # adapter before or after but not both ss.filters.addRequirement(cx=[('!=', 0)]) ss.filters.addRequirement(cx=[('~', 1), ('~', 2)]) self.assertEqual(len(ss.index), 65) ss.filters.removeRequirement('cx') self.assertEqual(len(ss.index), 117) # adapter before or after ss.filters.addRequirement(cx=[('&', 1), ('&', 2)]) self.assertEqual(len(ss.index), 102) ss.filters.removeRequirement('cx') self.assertEqual(len(ss.index), 117) # adapter before and after ss.filters.addRequirement(cx=[('&', 1)]) ss.filters.addRequirement(cx=[('&', 2)]) self.assertEqual(len(ss.index), 37) ss.filters.removeRequirement('cx') self.assertEqual(len(ss.index), 117) # adapter before but not after ss.filters.addRequirement(cx=[('&', 1)]) ss.filters.addRequirement(cx=[('~', 2)]) self.assertEqual(len(ss.index), 33) ss.filters.removeRequirement('cx') self.assertEqual(len(ss.index), 117) # no adapter before ss.filters.addRequirement(cx=[('~', 1)]) self.assertEqual(len(ss.index), 47) ss.filters.removeRequirement('cx') self.assertEqual(len(ss.index), 117) # no adapter before or after ss.filters.addRequirement(cx=[('~', 1)]) ss.filters.addRequirement(cx=[('~', 2)]) self.assertEqual(len(ss.index), 15) ss.filters.removeRequirement('cx') self.assertEqual(len(ss.index), 117) # no adapter before or after ss.filters.addRequirement(cx=[('~', 3)]) self.assertEqual(len(ss.index), 15) ss.filters.removeRequirement('cx') self.assertEqual(len(ss.index), 117)
def __init__(self): self.bam = BamReader (data.getUnalignedBam()) self.bax = BaxH5Reader(data.getBaxForBam()) self.baxRead0 = next(self.bax.subreads()) self.bamRead0 = next(iter(self.bam))
def setup_class(self): self.bam = BamReader (data.getUnalignedBam()) self.bax = BaxH5Reader(data.getBaxForBam()) self.baxRead0 = next(self.bax.subreads()) self.bamRead0 = next(iter(self.bam))