Exemplo n.º 1
0
    def testTwoReadsTwoLandmarksSameOffsets(self):
        """
        If two identical reads are added, both with two landmarks at the same
        offsets, only one key is added to the backend and both reads are
        listed in the dictionary values for the key.

        Note that A3:A2:-23 is not added to the backend since that would be
        redundant (it's the same two landmarks, with the same separation,
        just with the sign changed).
        """
        dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[])
        be = Backend()
        be.configure(dbParams)
        be.addSubject(
            AARead('id1', 'FRRRFRRRFAAAAAAAAAAAAAAFRRRFRRRFRRRF'), '0')
        be.addSubject(
            AARead('id2', 'FRRRFRRRFAAAAAAAAAAAAAAFRRRFRRRFRRRF'), '1')
        distance23 = str(scaleLog(23, _DEFAULT_DISTANCE_BASE))
        self.assertEqual(
            {
                'A2:A3:' + distance23: {
                    '0': [[0, 9, 23, 13]],
                    '1': [[0, 9, 23, 13]],
                },
            },
            be.d)
Exemplo n.º 2
0
    def getFractionOfStructuresCovered(self):
        """
        Return the fraction of known structures matched by at least one
        substring in the subset that is being evaluated.
        """
        hit = 0
        total = 0

        db = DatabaseSpecifier().getDatabaseFromKeywords(
            trigPoints=[],
            landmarks=['AC ' + self.structureType],
            acAlphaHelixFilename=self.acAlphaHelixFilename,
            acAlphaHelix310Filename=self.acAlphaHelix310Filename,
            acAlphaHelixCombinedFilename=self.acAlphaHelixCombinedFilename,
            acAlphaHelixPiFilename=self.acAlphaHelixPiFilename,
            acExtendedStrandFilename=self.acExtendedStrandFilename)

        backend = Backend()
        backend.configure(db.dbParams)

        for read in FastaReads(self.structureFile,
                               readClass=AAReadWithX,
                               checkAlphabet=0):
            total += 1
            scannedRead = backend.scan(read)
            if len(scannedRead.landmarks) > 0:
                hit += 1

        return hit / total if total else 0.0
Exemplo n.º 3
0
    def testFindOneMatchingHashInOneLocation(self):
        """
        One matching subject with one matching hash (that occurs in one
        location) must be found correctly.
        """
        sequence = 'AFRRRFRRRFASAASA'
        subject = AARead('subject', sequence)
        query = AARead('query', sequence)
        dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                      trigPoints=[Peaks], maxDistance=11)
        be = Backend()
        be.configure(dbParams)
        be.addSubject(subject, '0')
        matches, hashCount, nonMatchingHashes = be.find(query)

        self.assertEqual(
            {
                '0': [{
                    'queryLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2),
                    'queryTrigPoint': TrigPoint('Peaks', 'P', 11),
                    'subjectLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2),
                    'subjectTrigPoint': TrigPoint('Peaks', 'P', 11),
                }]
            },
            matches)
        self.assertEqual(1, hashCount)
        self.assertEqual({}, nonMatchingHashes)
Exemplo n.º 4
0
    def testFindWithIdenticalNonMatchingHashes(self):
        """
        Identical non-matching hashes must be found correctly when
        storeFullAnalysis is passed to find() as True.
        """
        subject = AARead('subject', 'F')
        query = AARead('query', 'AFRRRFRRRFASAAAAAAAAAAAFRRRFRRRFASA')
        dbParams = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand],
                                      trigPoints=[Peaks], maxDistance=10)
        be = Backend()
        be.configure(dbParams)
        be.addSubject(subject, '0')
        matches, hashCount, nonMatchingHashes = be.find(query, True)

        self.assertEqual({}, matches)
        self.assertEqual(2, hashCount)
        self.assertEqual(
            {
                'A2:P:10': [
                    [Landmark(AlphaHelix.NAME, AlphaHelix.SYMBOL, 1, 9, 2),
                     TrigPoint(Peaks.NAME, Peaks.SYMBOL, 11)],
                    [Landmark(AlphaHelix.NAME, AlphaHelix.SYMBOL, 23, 9, 2),
                     TrigPoint(Peaks.NAME, Peaks.SYMBOL, 33)]
                ],
            },
            nonMatchingHashes)
Exemplo n.º 5
0
    def testFindOneMatchingHashInTwoLocations(self):
        """
        One matching subject with one matching hash (that occurs in two
        locations) must be found correctly.
        """
        subject = AARead('subject', 'AFRRRFRRRFASAASAVVVVVVASAVVVASA')
        query = AARead('query', 'FRRRFRRRFASAASAFRRRFRRRFFRRRFRRRFFRRRFRRRF')
        dbParams = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand],
                                      trigPoints=[Peaks])
        be = Backend()
        be.configure(dbParams)
        be.addSubject(subject, '0')
        matches, hashCount, nonMatchingHashes = be.find(query)

        self.assertEqual(
            {
                '0': [{
                    'queryLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2),
                    'queryTrigPoint': TrigPoint('Peaks', 'P', 10),
                    'subjectLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2),
                    'subjectTrigPoint': TrigPoint('Peaks', 'P', 11),
                }, {
                    'queryLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2),
                    'queryTrigPoint': TrigPoint('Peaks', 'P', 13),
                    'subjectLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2),
                    'subjectTrigPoint': TrigPoint('Peaks', 'P', 14),
                }]
            }, matches)

        self.assertEqual(14, hashCount)
        self.assertEqual({}, nonMatchingHashes)
Exemplo n.º 6
0
    def testFindTwoMatchingInSameSubject(self):
        """
        Two matching hashes in the subject must be found correctly.
        """
        sequence = 'FRRRFRRRFASAASA'
        subject = AARead('subject', sequence)
        query = AARead('query', sequence)
        dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                      trigPoints=[Peaks])
        be = Backend()
        be.configure(dbParams)
        be.addSubject(subject, '0')
        matches, hashCount, nonMatchingHashes = be.find(query)

        self.assertEqual(
            {
                '0': [{
                    'queryLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2),
                    'queryTrigPoint': TrigPoint('Peaks', 'P', 10),
                    'subjectLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2),
                    'subjectTrigPoint': TrigPoint('Peaks', 'P', 10),
                },
                    {
                    'queryLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2),
                    'queryTrigPoint': TrigPoint('Peaks', 'P', 13),
                    'subjectLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2),
                    'subjectTrigPoint': TrigPoint('Peaks', 'P', 13),
                }]
            }, matches)
        self.assertEqual(2, hashCount)
        self.assertEqual({}, nonMatchingHashes)
Exemplo n.º 7
0
    def testChecksumAfterSaveRestore(self):
        """
        A backend that has a sequence added to it, which is then saved and
        restored, and then has a second sequence is added to it must have the
        same checksum as a backend that simply has the two sequences added to
        it without interruption.
        """
        seq1 = 'FRRRFRRRFASAASA'
        seq2 = 'MMMMMMMMMFRRRFR'
        dbParams1 = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand],
                                       trigPoints=[Peaks, Troughs])
        be1 = Backend()
        be1.configure(dbParams1, 'name1', 0)
        be1.addSubject(AARead('id1', seq1), '0')
        fp = StringIO()
        be1.save(fp)
        fp.seek(0)
        be1 = Backend.restore(fp)
        be1.addSubject(AARead('id2', seq2), '1')

        dbParams2 = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand],
                                       trigPoints=[Peaks, Troughs])
        be2 = Backend()
        be2.configure(dbParams2, 'name2', 0)
        be2.addSubject(AARead('id1', seq1), '0')
        be2.addSubject(AARead('id2', seq2), '1')

        self.assertEqual(be1.checksum(), be2.checksum())
Exemplo n.º 8
0
    def testSaveContentIncludesExpectedKeysAndValues(self):
        """
        When a backend saves, its JSON content must include the expected
        keys and values.
        """
        dbParams = DatabaseParameters(landmarks=[], trigPoints=[],
                                      limitPerLandmark=16, maxDistance=17,
                                      minDistance=18, distanceBase=19.0)
        be = Backend()
        be.configure(dbParams, 'backend', 33)
        fp = StringIO()
        be.save(fp)
        fp.seek(0)

        DatabaseParameters.restore(fp)
        SubjectStore.restore(fp)
        state = loads(fp.readline()[:-1])

        # Keys
        self.assertEqual(
            set(['checksum', 'd', 'name', '_totalCoveredResidues']),
            set(state.keys()))

        # Values
        self.assertEqual(be.checksum(), state['checksum'])
        self.assertEqual({}, state['d'])
        self.assertEqual('backend', state['name'])
        self.assertEqual(0, state['_totalCoveredResidues'])
Exemplo n.º 9
0
 def testCollectReadHashes(self):
     """
     The getHashes method must return a dict keyed by (landmark, trigPoints)
     hash with values containing the read offsets.
     """
     dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                   trigPoints=[Peaks], distanceBase=1.0)
     be = Backend()
     be.configure(dbParams)
     query = AARead('query', 'FRRRFRRRFASAASAFRRRFRRRFASAASA')
     scannedQuery = be.scan(query)
     hashCount = be.getHashes(scannedQuery)
     helixAt0 = Landmark(AlphaHelix.NAME, AlphaHelix.SYMBOL, 0, 9, 2)
     helixAt15 = Landmark(AlphaHelix.NAME, AlphaHelix.SYMBOL, 15, 9, 2)
     peakAt10 = TrigPoint(Peaks.NAME, Peaks.SYMBOL, 10)
     peakAt13 = TrigPoint(Peaks.NAME, Peaks.SYMBOL, 13)
     peakAt25 = TrigPoint(Peaks.NAME, Peaks.SYMBOL, 25)
     peakAt28 = TrigPoint(Peaks.NAME, Peaks.SYMBOL, 28)
     self.assertEqual(
         {
             'A2:P:28': [[helixAt0, peakAt28]],
             'A2:P:25': [[helixAt0, peakAt25]],
             'A2:P:13': [[helixAt0, peakAt13], [helixAt15, peakAt28]],
             'A2:P:10': [[helixAt0, peakAt10], [helixAt15, peakAt25]],
             'A2:P:-5': [[helixAt15, peakAt10]],
             'A2:P:-2': [[helixAt15, peakAt13]],
             'A2:A2:15': [[helixAt0, helixAt15]],
         }, hashCount)
Exemplo n.º 10
0
 def testPrint(self):
     """
     The print_ function should produce the expected output.
     """
     subject = AARead('subject-id', 'FRRRFRRRFASAASA')
     dbParams = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand],
                                   trigPoints=[Peaks, Troughs],
                                   limitPerLandmark=16, maxDistance=10,
                                   minDistance=0, distanceBase=1)
     be = Backend()
     be.configure(dbParams)
     be.addSubject(subject, '0')
     expected = (
         'Name: backend\n'
         'Hash count: 3\n'
         'Checksum: 2751160351\n'
         'Subjects (with offsets) by hash:\n'
         '  A2:P:10\n'
         '    0 [[0, 9, 10, 1]]\n'
         '  A2:T:4\n'
         '    0 [[0, 9, 4, 1]]\n'
         '  A2:T:8\n'
         '    0 [[0, 9, 8, 1]]\n'
         'Landmark symbol counts:\n'
         '  AlphaHelix (A2): 3\n'
         'Trig point symbol counts:\n'
         '  Peaks (P): 1\n'
         '  Troughs (T): 2')
     self.assertEqual(expected, be.print_())
Exemplo n.º 11
0
 def testInitialBackendIsEmpty(self):
     """
     The index must be empty if no reads have been added.
     """
     dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                   trigPoints=[Peaks])
     be = Backend()
     be.configure(dbParams)
     self.assertEqual({}, be.d)
Exemplo n.º 12
0
 def testParametersAreStored(self):
     """
     The backend must call its super class so its parameters are stored.
     """
     dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                   trigPoints=[Peaks])
     be = Backend()
     be.configure(dbParams)
     self.assertIs(dbParams, be.dbParams)
Exemplo n.º 13
0
 def testInitialChecksum(self):
     """
     The backend checksum must be set to the value passed to its
     __init__ method.
     """
     dbParams = DatabaseParameters(landmarks=[], trigPoints=[])
     be = Backend()
     be.configure(dbParams, 'backend', 10)
     self.assertEqual(10, be.checksum())
Exemplo n.º 14
0
 def testOneReadOneLandmarkOnePeakNoTrigFinders(self):
     """
     If one subject is added and it has one landmark and one peak, but no
     trig finders are in use, nothing is added to the backend.
     """
     dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[])
     be = Backend()
     be.configure(dbParams)
     be.addSubject(AARead('id', 'FRRRFRRRFASA'), '0')
     self.assertEqual({}, be.d)
Exemplo n.º 15
0
 def testOneReadOneLandmark(self):
     """
     If one subject is added but it only has one landmark, nothing is added
     to the backend.
     """
     dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[])
     be = Backend()
     be.configure(dbParams)
     be.addSubject(AARead('id', 'FRRRFRRRF'), '0')
     self.assertEqual({}, be.d)
Exemplo n.º 16
0
 def testOneReadOneLandmarkTwoPeaksSevereMinDistance(self):
     """
     If one subject is added and it has one landmark and two peaks, but a
     severe minimum distance is imposed, no keys are added to
     the backend.
     """
     dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                   trigPoints=[Peaks], minDistance=100)
     be = Backend()
     be.configure(dbParams)
     be.addSubject(AARead('id', 'FRRRFRRRFASAASA'), '0')
     self.assertEqual({}, be.d)
Exemplo n.º 17
0
 def testAddSameSubjectIncreasesBackendSize(self):
     """
     If an identical subject is added multiple times, the backend size
     does not increase, because the backend subject store detect duplicates.
     """
     dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[])
     be = Backend()
     be.configure(dbParams)
     be.addSubject(AARead('id', 'FRRRFRRRF'), '0')
     self.assertEqual(1, be.subjectCount())
     be.addSubject(AARead('id', 'FRRRFRRRF'), '0')
     self.assertEqual(1, be.subjectCount())
Exemplo n.º 18
0
 def testHashWithFeatureOnRight(self):
     """
     The database hash function must return the expected (positive offset)
     hash when the second feature is to the right of the first.
     """
     dbParams = DatabaseParameters(landmarks=[], trigPoints=[])
     be = Backend()
     be.configure(dbParams)
     landmark = Landmark('name', 'A', 20, 0)
     trigPoint = TrigPoint('name', 'B', 30)
     distance10 = str(scaleLog(10, _DEFAULT_DISTANCE_BASE))
     self.assertEqual('A:B:' + distance10, be.hash(landmark, trigPoint))
Exemplo n.º 19
0
 def testHashWithSymbolDetail(self):
     """
     The database hash function must return the expected value when the
     landmark it is passed has a repeat count.
     """
     dbParams = DatabaseParameters(landmarks=[], trigPoints=[])
     be = Backend()
     be.configure(dbParams)
     landmark = Landmark('name', 'A', 20, 0, 5)
     trigPoint = TrigPoint('name', 'B', 30)
     distance10 = str(scaleLog(10, _DEFAULT_DISTANCE_BASE))
     self.assertEqual('A5:B:' + distance10, be.hash(landmark, trigPoint))
Exemplo n.º 20
0
 def testScan(self):
     """
     The scan method must return a scanned subject.
     """
     subject = AARead('subject', 'FRRRFRRRFASAASA')
     dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                   trigPoints=[Peaks])
     be = Backend()
     be.configure(dbParams)
     be.addSubject(subject, '0')
     scannedSubject = be.scan(subject)
     self.assertIsInstance(scannedSubject, ScannedRead)
Exemplo n.º 21
0
 def testCollectReadHashesWithOneLandmark(self):
     """
     The getHashes method must return a dict keyed by (landmark, trigPoints)
     hash with values containing the read offsets. The result should be
     empty if there is only one landmark in the read.
     """
     dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[])
     be = Backend()
     be.configure(dbParams)
     query = AARead('query', 'FRRRFRRRF')
     scannedQuery = be.scan(query)
     hashCount = be.getHashes(scannedQuery)
     self.assertEqual({}, hashCount)
Exemplo n.º 22
0
 def testHashWithFeatureOnRightAndNonDefaultDistanceBase(self):
     """
     The database hash function must return the expected hash when the
     database has a non-default distance base and the second feature is to
     the right of the first.
     """
     dbParams = DatabaseParameters(landmarks=[], trigPoints=[],
                                   distanceBase=1.5)
     be = Backend()
     be.configure(dbParams)
     landmark = Landmark('name', 'A', 20, 0)
     trigPoint = TrigPoint('name', 'B', 30)
     distance10 = str(scaleLog(10, 1.5))
     self.assertEqual('A:B:' + distance10, be.hash(landmark, trigPoint))
Exemplo n.º 23
0
 def testAddSubjectReturnsCorrectResult(self):
     """
     If one subject is added, addSubject must return whether the subject
     already existed, the index ('0' in this case) of the added subject,
     and the backend name.
     """
     dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[])
     be = Backend()
     be.configure(dbParams)
     subject = AARead('id', 'FRRRFRRRF')
     preExisting, subjectIndex, hashCount = be.addSubject(subject, '0')
     self.assertFalse(preExisting)
     self.assertEqual('0', subjectIndex)
     self.assertEqual(0, hashCount)
Exemplo n.º 24
0
 def testTwoReadsTwoLandmarksLimitZeroPairsPerLandmark(self):
     """
     If two identical reads are added, both with two landmarks, no keys
     will be added to the dictionary if limitPerLandmark is zero.
     """
     dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                   trigPoints=[], limitPerLandmark=0)
     be = Backend()
     be.configure(dbParams)
     be.addSubject(
         AARead('id1', 'FRRRFRRRFAAAAAAAAAAAAAAFRRRFRRRFRRRF'), '0')
     be.addSubject(
         AARead('id2', 'FRRRFRRRFAAAAAAAAAAAAAAFRRRFRRRFRRRF'), '1')
     self.assertEqual({}, be.d)
Exemplo n.º 25
0
 def testSaveRestoreWithNonDefaultParameters(self):
     """
     When asked to save and then restore a backend with non-default
     parameters, a backend with the correct parameters must result.
     """
     dbParams = DatabaseParameters(landmarks=[], trigPoints=[],
                                   limitPerLandmark=16, maxDistance=17,
                                   minDistance=18, distanceBase=19.0)
     be = Backend()
     be.configure(dbParams)
     fp = StringIO()
     be.save(fp)
     fp.seek(0)
     result = be.restore(fp)
     self.assertIs(None, dbParams.compare(result.dbParams))
Exemplo n.º 26
0
 def __init__(self, histogram, query, subject, dbParams, findParams=None):
     self._histogram = histogram
     self._queryLen = len(query)
     self._subjectLen = len(subject)
     from light.parameters import FindParameters
     self._findParams = findParams or FindParameters()
     from light.backend import Backend
     backend = Backend()
     backend.configure(dbParams)
     scannedQuery = backend.scan(query)
     self._allQueryFeatures = set(scannedQuery.landmarks +
                                  scannedQuery.trigPoints)
     scannedSubject = backend.scan(subject.read)
     self._allSubjectFeatures = set(scannedSubject.landmarks +
                                    scannedSubject.trigPoints)
Exemplo n.º 27
0
 def testOneReadTwoLandmarks(self):
     """
     If one subject is added and it has two landmarks, one key is added
     to the backend.
     """
     dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[])
     be = Backend()
     be.configure(dbParams)
     be.addSubject(
         AARead('id', 'FRRRFRRRFAAAAAAAAAAAAAAFRRRFRRRFRRRF'), '0')
     distance23 = str(scaleLog(23, _DEFAULT_DISTANCE_BASE))
     self.assertEqual(
         {
             'A2:A3:' + distance23: {'0': [[0, 9, 23, 13]]},
         },
         be.d)
Exemplo n.º 28
0
 def testOneReadOneLandmarkOnePeak(self):
     """
     If one subject is added and it has one landmark and one peak, one pair
     is added to the backend.
     """
     dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                   trigPoints=[Peaks])
     be = Backend()
     be.configure(dbParams)
     be.addSubject(AARead('id', 'FRRRFRRRFASA'), '0')
     distance10 = str(scaleLog(10, _DEFAULT_DISTANCE_BASE))
     self.assertEqual(
         {
             'A2:P:' + distance10: {'0': [[0, 9, 10, 1]]},
         },
         be.d)
Exemplo n.º 29
0
    def __init__(self, histogram, query, subject, dbParams):
        self._histogram = histogram
        self._queryLen = len(query)
        self._subjectLen = len(subject)

        from light.backend import Backend
        backend = Backend()
        backend.configure(dbParams)

        scannedQuery = backend.scan(query)
        allQueryHashes = backend.getHashes(scannedQuery)
        self._allQueryFeatures = getHashFeatures(allQueryHashes)

        scannedSubject = backend.scan(subject.read)
        allSubjectHashes = backend.getHashes(scannedSubject)
        self._allSubjectFeatures = getHashFeatures(allSubjectHashes)
Exemplo n.º 30
0
    def testFindNoMatch(self):
        """
        A query against an empty backend must produce no results.
        """
        subject = AARead('subject', 'FRRRFRRRFASAASA')
        query = AARead('query', 'FRRR')
        dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                      trigPoints=[Peaks])
        be = Backend()
        be.configure(dbParams)
        be.addSubject(subject, '0')
        matches, hashCount, nonMatchingHashes = be.find(query)

        self.assertEqual({}, matches)
        self.assertEqual(0, hashCount)
        self.assertEqual({}, nonMatchingHashes)