def testSmallDecode(self):
        """
        Test modified fraction estimation in detection mode around a known modification in lambda
        """

        # First methlyated A in lambda:
        # strand            motif onTarget seqid   tpl
        #      0    GCACNNNNNNGTT       On     1 14983

        start = 14900
        end = 15100
        referenceWindow = ReferenceWindow(1, "lambda_NEB3011", start, end)
        bounds = (start, end)

        self.kw._prepForReferenceWindow(referenceWindow)
        kinetics = self.kw._summarizeReferenceRegion(bounds, True, False)

        # Verify that we detect m6A mods at 14982 and 14991
        m6AMods = [{
            'frac': x['frac'],
            'fracLow': x['fracLow'],
            'fracUp': x['fracUp'],
            'tpl': x['tpl'],
            'strand': x['strand']
        } for x in kinetics if 'frac' in x and x['tpl'] in (14982, 14991)]
        print(m6AMods)

        for mod in m6AMods:
            self.assertGreater(mod["frac"], 0.5)
Пример #2
0
 def test_private_api(self):
     start = 50
     end = 100
     REF_GROUP_ID = "gi|12057207|gb|AE001439.1|"
     referenceWindow = ReferenceWindow(0, REF_GROUP_ID, start, end)
     bounds = (start, end)
     rir = list(
         self.kw.caseCmpH5.readsInRange(referenceWindow.refName,
                                        referenceWindow.start,
                                        referenceWindow.end))
     self.assertEqual(len(rir), 301)
     chunks = self.kw._fetchChunks(REF_GROUP_ID, (start, end),
                                   self.kw.caseCmpH5)
     factor = 1.0 / self.ds.readGroupTable[0].FrameRate
     rawIpds = self.kw._loadRawIpds(rir, start, end, factor)
     logging.critical(len(rawIpds))
     # XXX note that this is very dependent on the exact order of reads
     # found by readsInRange(), which may be altered by changes to the
     # implementation of the dataset API.  It should, however, remain
     # consistent across equivalent input types.
     # XXX 2015-08-28 disabling this for now because it will change if the
     # dataset contains multiple .bam files
     #self.assertEqual("%.4f" % rawIpds[0][2], "0.2665")
     log.info(rawIpds)
     chunks = self.kw._chunkRawIpds(rawIpds)
Пример #3
0
    def test_small_decode(self):
        """Test for known modifications near the start of H. pylori genome"""
        # XXX should have mods on 60- (m4C), 89+ (m6A), 91- (m6A)
        start = 50
        end = 100
        REF_GROUP_ID = "gi|12057207|gb|AE001439.1|"
        referenceWindow = ReferenceWindow(0, REF_GROUP_ID, start, end)
        bounds = (start, end)

        self.kw._prepForReferenceWindow(referenceWindow)
        kinetics = self.kw._summarizeReferenceRegion(bounds, False, True)
        mods = self.kw._decodePositiveControl(kinetics, bounds)
        log.info(mods)

        # Verify that we detect m6A mods at 14982 and 14991
        m6AMods = [
            x for x in mods
            if x['modification'] == 'm6A' and x['tpl'] in (88, 90)
        ]
        self.assertEqual(len(m6AMods), 2)
        m4CMods = [
            x for x in mods
            if x['modification'] == 'm4C' and x['tpl'] in (59, )
        ]
        self.assertEqual(len(m4CMods), 1)
        for x in mods:
            if x['strand'] == 0:
                self.assertEqual(x['tpl'], 88)
            else:
                self.assertTrue(x['tpl'] in [59, 90])
Пример #4
0
    def test_small_decode(self):
        start = 985
        end = 1065
        REF_GROUP_ID = "gi|12057207|gb|AE001439.1|"
        referenceWindow = ReferenceWindow(0, REF_GROUP_ID, start, end)
        bounds = (start, end)

        self.kw._prepForReferenceWindow(referenceWindow)
        kinetics = self.kw._summarizeReferenceRegion(bounds, False, True)
        mods = self.kw._decodePositiveControl(kinetics, bounds)
        self.assertEqual(len(mods), 4)
Пример #5
0
 def test_small_decode(self):
     start = 50
     end = 100
     REF_GROUP_ID = "gi|12057207|gb|AE001439.1|"
     referenceWindow = ReferenceWindow(0, REF_GROUP_ID, start, end)
     bounds = (start, end)
     self.kw._prepForReferenceWindow(referenceWindow)
     kinetics = self.kw._summarizeReferenceRegion(bounds, False, True)
     # XXX note that this is very dependent on the exact order of reads
     # found by readsInRange(), which may be altered by changes to the
     # implementation of the dataset API.  It should be immune to stochastic
     # effects, however.
     self.assertEqual("%.5f" % kinetics[0]['ipdRatio'], "1.06460")
     mods = self.kw._decodePositiveControl(kinetics, bounds)
     self.assertEqual(len(mods), 3)
Пример #6
0
    def testSmallDecode(self):
        """
        Test a modification decode around a known modification in lambda
        """

        # First methlyated A in lambda:
        # strand            motif onTarget seqid   tpl
        #      0    GCACNNNNNNGTT       On     1 14983

        start = 14900
        end = 15100
        referenceWindow = ReferenceWindow(1, "lambda_NEB3011", start, end)
        bounds = (start, end)

        self.kw._prepForReferenceWindow(referenceWindow)
        kinetics = self.kw._summarizeReferenceRegion(bounds, False, True)
        mods = self.kw._decodePositiveControl(kinetics, bounds)
        print(mods)

        # Verify that we detect m6A mods at 14982 and 14991
        m6AMods = [x for x in mods if x['modification'] == 'm6A' and x['tpl'] in (14982, 14991)]
        self.assertEqual(len(m6AMods), 2)