Exemplo n.º 1
0
    def getFractionOfStructuresCovered(self):
        """
        Return the fraction of known structures matched by at least one
        substring in the subset that is being evaluated.
        """
        hit = 0
        total = 0

        db = DatabaseSpecifier().getDatabaseFromKeywords(
            trigPoints=[],
            landmarks=['AC ' + self.structureType],
            acAlphaHelixFilename=self.acAlphaHelixFilename,
            acAlphaHelix310Filename=self.acAlphaHelix310Filename,
            acAlphaHelixCombinedFilename=self.acAlphaHelixCombinedFilename,
            acAlphaHelixPiFilename=self.acAlphaHelixPiFilename,
            acExtendedStrandFilename=self.acExtendedStrandFilename)

        backend = Backend()
        backend.configure(db.dbParams)

        for read in FastaReads(self.structureFile,
                               readClass=AAReadWithX,
                               checkAlphabet=0):
            total += 1
            scannedRead = backend.scan(read)
            if len(scannedRead.landmarks) > 0:
                hit += 1

        return hit / total if total else 0.0
Exemplo n.º 2
0
 def testInitialBackendIsEmpty(self):
     """
     The index must be empty if no reads have been added.
     """
     dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                   trigPoints=[Peaks])
     be = Backend()
     be.configure(dbParams)
     self.assertEqual({}, be.d)
Exemplo n.º 3
0
 def testParametersAreStored(self):
     """
     The backend must call its super class so its parameters are stored.
     """
     dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                   trigPoints=[Peaks])
     be = Backend()
     be.configure(dbParams)
     self.assertIs(dbParams, be.dbParams)
Exemplo n.º 4
0
 def testInitialChecksum(self):
     """
     The backend checksum must be set to the value passed to its
     __init__ method.
     """
     dbParams = DatabaseParameters(landmarks=[], trigPoints=[])
     be = Backend()
     be.configure(dbParams, 'backend', 10)
     self.assertEqual(10, be.checksum())
Exemplo n.º 5
0
 def testHashWithFeatureOnRight(self):
     """
     The database hash function must return the expected (positive offset)
     hash when the second feature is to the right of the first.
     """
     dbParams = DatabaseParameters(landmarks=[], trigPoints=[])
     be = Backend()
     be.configure(dbParams)
     landmark = Landmark('name', 'A', 20, 0)
     trigPoint = TrigPoint('name', 'B', 30)
     distance10 = str(scaleLog(10, _DEFAULT_DISTANCE_BASE))
     self.assertEqual('A:B:' + distance10, be.hash(landmark, trigPoint))
Exemplo n.º 6
0
 def testAddSameSubjectIncreasesBackendSize(self):
     """
     If an identical subject is added multiple times, the backend size
     does not increase, because the backend subject store detect duplicates.
     """
     dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[])
     be = Backend()
     be.configure(dbParams)
     be.addSubject(AARead('id', 'FRRRFRRRF'), '0')
     self.assertEqual(1, be.subjectCount())
     be.addSubject(AARead('id', 'FRRRFRRRF'), '0')
     self.assertEqual(1, be.subjectCount())
Exemplo n.º 7
0
 def testHashWithSymbolDetail(self):
     """
     The database hash function must return the expected value when the
     landmark it is passed has a repeat count.
     """
     dbParams = DatabaseParameters(landmarks=[], trigPoints=[])
     be = Backend()
     be.configure(dbParams)
     landmark = Landmark('name', 'A', 20, 0, 5)
     trigPoint = TrigPoint('name', 'B', 30)
     distance10 = str(scaleLog(10, _DEFAULT_DISTANCE_BASE))
     self.assertEqual('A5:B:' + distance10, be.hash(landmark, trigPoint))
Exemplo n.º 8
0
 def testHashWithFeatureOnRightAndNonDefaultDistanceBase(self):
     """
     The database hash function must return the expected hash when the
     database has a non-default distance base and the second feature is to
     the right of the first.
     """
     dbParams = DatabaseParameters(landmarks=[], trigPoints=[],
                                   distanceBase=1.5)
     be = Backend()
     be.configure(dbParams)
     landmark = Landmark('name', 'A', 20, 0)
     trigPoint = TrigPoint('name', 'B', 30)
     distance10 = str(scaleLog(10, 1.5))
     self.assertEqual('A:B:' + distance10, be.hash(landmark, trigPoint))
Exemplo n.º 9
0
 def testAddSubjectReturnsCorrectResult(self):
     """
     If one subject is added, addSubject must return whether the subject
     already existed, the index ('0' in this case) of the added subject,
     and the backend name.
     """
     dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[])
     be = Backend()
     be.configure(dbParams)
     subject = AARead('id', 'FRRRFRRRF')
     preExisting, subjectIndex, hashCount = be.addSubject(subject, '0')
     self.assertFalse(preExisting)
     self.assertEqual('0', subjectIndex)
     self.assertEqual(0, hashCount)
Exemplo n.º 10
0
    def __init__(self, dbParams, backend=None, filePrefix=None):
        self.dbParams = dbParams
        if backend:
            self._backend = backend
        else:
            self._backend = Backend(filePrefix=filePrefix)
            self._backend.configure(dbParams)
        self._filePrefix = filePrefix

        # Most of our implementation comes directly from our backend.
        for method in ('addSubject', 'getIndexBySubject', 'getSubjectByIndex',
                       'getSubjects', 'subjectCount', 'hashCount',
                       'totalResidues', 'totalCoveredResidues', 'checksum'):
            setattr(self, method, getattr(self._backend, method))
Exemplo n.º 11
0
    def testTwoReadsTwoLandmarksSameOffsets(self):
        """
        If two identical reads are added, both with two landmarks at the same
        offsets, only one key is added to the backend and both reads are
        listed in the dictionary values for the key.

        Note that A3:A2:-23 is not added to the backend since that would be
        redundant (it's the same two landmarks, with the same separation,
        just with the sign changed).
        """
        dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[])
        be = Backend()
        be.configure(dbParams)
        be.addSubject(
            AARead('id1', 'FRRRFRRRFAAAAAAAAAAAAAAFRRRFRRRFRRRF'), '0')
        be.addSubject(
            AARead('id2', 'FRRRFRRRFAAAAAAAAAAAAAAFRRRFRRRFRRRF'), '1')
        distance23 = str(scaleLog(23, _DEFAULT_DISTANCE_BASE))
        self.assertEqual(
            {
                'A2:A3:' + distance23: {
                    '0': [[0, 9, 23, 13]],
                    '1': [[0, 9, 23, 13]],
                },
            },
            be.d)
Exemplo n.º 12
0
    def __init__(self, **kwargs):
        # Set default landmark and trig point finders.
        if 'landmarks' not in kwargs:
            kwargs['landmarks'] = ALL_LANDMARK_CLASSES + [
                c for c in DEV_LANDMARK_CLASSES if c.NAME.startswith('PDB ')
            ]
        if 'trigPoints' not in kwargs:
            kwargs['trigPoints'] = [
                c for c in ALL_TRIG_CLASSES if c.NAME != 'Volume'
            ]

        db = DatabaseSpecifier().getDatabaseFromKeywords(**kwargs)
        self._backend = Backend()
        self._backend.configure(db.dbParams)

        self._names = (db.dbParams.landmarkFinderNames() +
                       db.dbParams.trigPointFinderNames())
Exemplo n.º 13
0
 def testNoOverlapDefaultDistanceBase(self):
     """
     There cannot be any index overlap between landmarks found by the
     GOR4 alpha helix and beta strand finders using the default distance
     base (currently 1.1).
     """
     alphaHelixBe = Backend()
     alphaHelixBe.configure(
         DatabaseParameters(landmarks=[GOR4AlphaHelix], trigPoints=[]))
     betaStrandBe = Backend()
     betaStrandBe.configure(
         DatabaseParameters(landmarks=[GOR4BetaStrand], trigPoints=[]))
     alphaHelixScanned = alphaHelixBe.scan(self.READ)
     betaStrandScanned = betaStrandBe.scan(self.READ)
     alphaHelixIndices = alphaHelixScanned.coveredIndices()
     betaStrandIndices = betaStrandScanned.coveredIndices()
     self.assertEqual(0, len(alphaHelixIndices & betaStrandIndices))
Exemplo n.º 14
0
    def testFindOneMatchingHashInOneLocation(self):
        """
        One matching subject with one matching hash (that occurs in one
        location) must be found correctly.
        """
        sequence = 'AFRRRFRRRFASAASA'
        subject = AARead('subject', sequence)
        query = AARead('query', sequence)
        dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                      trigPoints=[Peaks], maxDistance=11)
        be = Backend()
        be.configure(dbParams)
        be.addSubject(subject, '0')
        matches, hashCount, nonMatchingHashes = be.find(query)

        self.assertEqual(
            {
                '0': [{
                    'queryLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2),
                    'queryTrigPoint': TrigPoint('Peaks', 'P', 11),
                    'subjectLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2),
                    'subjectTrigPoint': TrigPoint('Peaks', 'P', 11),
                }]
            },
            matches)
        self.assertEqual(1, hashCount)
        self.assertEqual({}, nonMatchingHashes)
Exemplo n.º 15
0
    def testSaveContentIncludesExpectedKeysAndValues(self):
        """
        When a backend saves, its JSON content must include the expected
        keys and values.
        """
        dbParams = DatabaseParameters(landmarks=[], trigPoints=[],
                                      limitPerLandmark=16, maxDistance=17,
                                      minDistance=18, distanceBase=19.0)
        be = Backend()
        be.configure(dbParams, 'backend', 33)
        fp = StringIO()
        be.save(fp)
        fp.seek(0)

        DatabaseParameters.restore(fp)
        SubjectStore.restore(fp)
        state = loads(fp.readline()[:-1])

        # Keys
        self.assertEqual(
            set(['checksum', 'd', 'name', '_totalCoveredResidues']),
            set(state.keys()))

        # Values
        self.assertEqual(be.checksum(), state['checksum'])
        self.assertEqual({}, state['d'])
        self.assertEqual('backend', state['name'])
        self.assertEqual(0, state['_totalCoveredResidues'])
Exemplo n.º 16
0
 def testPrint(self):
     """
     The print_ function should produce the expected output.
     """
     subject = AARead('subject-id', 'FRRRFRRRFASAASA')
     dbParams = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand],
                                   trigPoints=[Peaks, Troughs],
                                   limitPerLandmark=16, maxDistance=10,
                                   minDistance=0, distanceBase=1)
     be = Backend()
     be.configure(dbParams)
     be.addSubject(subject, '0')
     expected = (
         'Name: backend\n'
         'Hash count: 3\n'
         'Checksum: 2751160351\n'
         'Subjects (with offsets) by hash:\n'
         '  A2:P:10\n'
         '    0 [[0, 9, 10, 1]]\n'
         '  A2:T:4\n'
         '    0 [[0, 9, 4, 1]]\n'
         '  A2:T:8\n'
         '    0 [[0, 9, 8, 1]]\n'
         'Landmark symbol counts:\n'
         '  AlphaHelix (A2): 3\n'
         'Trig point symbol counts:\n'
         '  Peaks (P): 1\n'
         '  Troughs (T): 2')
     self.assertEqual(expected, be.print_())
Exemplo n.º 17
0
    def testFindTwoMatchingInSameSubject(self):
        """
        Two matching hashes in the subject must be found correctly.
        """
        sequence = 'FRRRFRRRFASAASA'
        subject = AARead('subject', sequence)
        query = AARead('query', sequence)
        dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                      trigPoints=[Peaks])
        be = Backend()
        be.configure(dbParams)
        be.addSubject(subject, '0')
        matches, hashCount, nonMatchingHashes = be.find(query)

        self.assertEqual(
            {
                '0': [{
                    'queryLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2),
                    'queryTrigPoint': TrigPoint('Peaks', 'P', 10),
                    'subjectLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2),
                    'subjectTrigPoint': TrigPoint('Peaks', 'P', 10),
                },
                    {
                    'queryLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2),
                    'queryTrigPoint': TrigPoint('Peaks', 'P', 13),
                    'subjectLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2),
                    'subjectTrigPoint': TrigPoint('Peaks', 'P', 13),
                }]
            }, matches)
        self.assertEqual(2, hashCount)
        self.assertEqual({}, nonMatchingHashes)
Exemplo n.º 18
0
    def testFindWithIdenticalNonMatchingHashes(self):
        """
        Identical non-matching hashes must be found correctly when
        storeFullAnalysis is passed to find() as True.
        """
        subject = AARead('subject', 'F')
        query = AARead('query', 'AFRRRFRRRFASAAAAAAAAAAAFRRRFRRRFASA')
        dbParams = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand],
                                      trigPoints=[Peaks], maxDistance=10)
        be = Backend()
        be.configure(dbParams)
        be.addSubject(subject, '0')
        matches, hashCount, nonMatchingHashes = be.find(query, True)

        self.assertEqual({}, matches)
        self.assertEqual(2, hashCount)
        self.assertEqual(
            {
                'A2:P:10': [
                    [Landmark(AlphaHelix.NAME, AlphaHelix.SYMBOL, 1, 9, 2),
                     TrigPoint(Peaks.NAME, Peaks.SYMBOL, 11)],
                    [Landmark(AlphaHelix.NAME, AlphaHelix.SYMBOL, 23, 9, 2),
                     TrigPoint(Peaks.NAME, Peaks.SYMBOL, 33)]
                ],
            },
            nonMatchingHashes)
Exemplo n.º 19
0
    def testFindOneMatchingHashInTwoLocations(self):
        """
        One matching subject with one matching hash (that occurs in two
        locations) must be found correctly.
        """
        subject = AARead('subject', 'AFRRRFRRRFASAASAVVVVVVASAVVVASA')
        query = AARead('query', 'FRRRFRRRFASAASAFRRRFRRRFFRRRFRRRFFRRRFRRRF')
        dbParams = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand],
                                      trigPoints=[Peaks])
        be = Backend()
        be.configure(dbParams)
        be.addSubject(subject, '0')
        matches, hashCount, nonMatchingHashes = be.find(query)

        self.assertEqual(
            {
                '0': [{
                    'queryLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2),
                    'queryTrigPoint': TrigPoint('Peaks', 'P', 10),
                    'subjectLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2),
                    'subjectTrigPoint': TrigPoint('Peaks', 'P', 11),
                }, {
                    'queryLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2),
                    'queryTrigPoint': TrigPoint('Peaks', 'P', 13),
                    'subjectLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2),
                    'subjectTrigPoint': TrigPoint('Peaks', 'P', 14),
                }]
            }, matches)

        self.assertEqual(14, hashCount)
        self.assertEqual({}, nonMatchingHashes)
Exemplo n.º 20
0
 def testNoOverlapDistanceBaseOne(self):
     """
     There cannot be any index overlap between landmarks found by the
     GOR4 alpha helix and beta strand finders using a distance base of 1.0
     (which should do no scaling).
     """
     alphaHelixBe = Backend()
     alphaHelixBe.configure(
         DatabaseParameters(landmarks=[GOR4AlphaHelix],
                            trigPoints=[],
                            distanceBase=1.0))
     betaStrandBe = Backend()
     betaStrandBe.configure(
         DatabaseParameters(landmarks=[GOR4BetaStrand],
                            trigPoints=[],
                            distanceBase=1.0))
     alphaHelixScanned = alphaHelixBe.scan(self.READ)
     betaStrandScanned = betaStrandBe.scan(self.READ)
     alphaHelixIndices = alphaHelixScanned.coveredIndices()
     betaStrandIndices = betaStrandScanned.coveredIndices()
     self.assertEqual(0, len(alphaHelixIndices & betaStrandIndices))
Exemplo n.º 21
0
 def testCollectReadHashes(self):
     """
     The getHashes method must return a dict keyed by (landmark, trigPoints)
     hash with values containing the read offsets.
     """
     dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                   trigPoints=[Peaks], distanceBase=1.0)
     be = Backend()
     be.configure(dbParams)
     query = AARead('query', 'FRRRFRRRFASAASAFRRRFRRRFASAASA')
     scannedQuery = be.scan(query)
     hashCount = be.getHashes(scannedQuery)
     helixAt0 = Landmark(AlphaHelix.NAME, AlphaHelix.SYMBOL, 0, 9, 2)
     helixAt15 = Landmark(AlphaHelix.NAME, AlphaHelix.SYMBOL, 15, 9, 2)
     peakAt10 = TrigPoint(Peaks.NAME, Peaks.SYMBOL, 10)
     peakAt13 = TrigPoint(Peaks.NAME, Peaks.SYMBOL, 13)
     peakAt25 = TrigPoint(Peaks.NAME, Peaks.SYMBOL, 25)
     peakAt28 = TrigPoint(Peaks.NAME, Peaks.SYMBOL, 28)
     self.assertEqual(
         {
             'A2:P:28': [[helixAt0, peakAt28]],
             'A2:P:25': [[helixAt0, peakAt25]],
             'A2:P:13': [[helixAt0, peakAt13], [helixAt15, peakAt28]],
             'A2:P:10': [[helixAt0, peakAt10], [helixAt15, peakAt25]],
             'A2:P:-5': [[helixAt15, peakAt10]],
             'A2:P:-2': [[helixAt15, peakAt13]],
             'A2:A2:15': [[helixAt0, helixAt15]],
         }, hashCount)
Exemplo n.º 22
0
 def testTwoReadsTwoLandmarksLimitZeroPairsPerLandmark(self):
     """
     If two identical reads are added, both with two landmarks, no keys
     will be added to the dictionary if limitPerLandmark is zero.
     """
     dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                   trigPoints=[], limitPerLandmark=0)
     be = Backend()
     be.configure(dbParams)
     be.addSubject(
         AARead('id1', 'FRRRFRRRFAAAAAAAAAAAAAAFRRRFRRRFRRRF'), '0')
     be.addSubject(
         AARead('id2', 'FRRRFRRRFAAAAAAAAAAAAAAFRRRFRRRFRRRF'), '1')
     self.assertEqual({}, be.d)
Exemplo n.º 23
0
 def testScan(self):
     """
     The scan method must return a scanned subject.
     """
     subject = AARead('subject', 'FRRRFRRRFASAASA')
     dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                   trigPoints=[Peaks])
     be = Backend()
     be.configure(dbParams)
     be.addSubject(subject, '0')
     scannedSubject = be.scan(subject)
     self.assertIsInstance(scannedSubject, ScannedRead)
Exemplo n.º 24
0
    def __init__(self, histogram, query, subject, dbParams):
        self._histogram = histogram
        self._queryLen = len(query)
        self._subjectLen = len(subject)

        from light.backend import Backend
        backend = Backend()
        backend.configure(dbParams)

        scannedQuery = backend.scan(query)
        allQueryHashes = backend.getHashes(scannedQuery)
        self._allQueryFeatures = getHashFeatures(allQueryHashes)

        scannedSubject = backend.scan(subject.read)
        allSubjectHashes = backend.getHashes(scannedSubject)
        self._allSubjectFeatures = getHashFeatures(allSubjectHashes)
Exemplo n.º 25
0
 def testOneReadOneLandmark(self):
     """
     If one subject is added but it only has one landmark, nothing is added
     to the backend.
     """
     dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[])
     be = Backend()
     be.configure(dbParams)
     be.addSubject(AARead('id', 'FRRRFRRRF'), '0')
     self.assertEqual({}, be.d)
Exemplo n.º 26
0
 def testOneReadOneLandmarkOnePeakNoTrigFinders(self):
     """
     If one subject is added and it has one landmark and one peak, but no
     trig finders are in use, nothing is added to the backend.
     """
     dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[])
     be = Backend()
     be.configure(dbParams)
     be.addSubject(AARead('id', 'FRRRFRRRFASA'), '0')
     self.assertEqual({}, be.d)
Exemplo n.º 27
0
    def __init__(self, histogram, query, subject, dbParams, weights=None):
        self._histogram = histogram
        self._queryLen = len(query)
        self._subjectLen = len(subject)

        self._weights = self.DEFAULT_WEIGHTS if weights is None else weights

        from light.backend import Backend
        backend = Backend()
        backend.configure(dbParams)

        scannedQuery = backend.scan(query)
        allQueryHashes = backend.getHashes(scannedQuery)
        self._allQueryFeatures = getHashFeatures(allQueryHashes)

        scannedSubject = backend.scan(subject.read)
        allSubjectHashes = backend.getHashes(scannedSubject)
        self._allSubjectFeatures = getHashFeatures(allSubjectHashes)
Exemplo n.º 28
0
 def testSaveRestoreWithNonDefaultParameters(self):
     """
     When asked to save and then restore a backend with non-default
     parameters, a backend with the correct parameters must result.
     """
     dbParams = DatabaseParameters(landmarks=[], trigPoints=[],
                                   limitPerLandmark=16, maxDistance=17,
                                   minDistance=18, distanceBase=19.0)
     be = Backend()
     be.configure(dbParams)
     fp = StringIO()
     be.save(fp)
     fp.seek(0)
     result = be.restore(fp)
     self.assertIs(None, dbParams.compare(result.dbParams))
Exemplo n.º 29
0
 def testOneReadOneLandmarkTwoPeaksSevereMinDistance(self):
     """
     If one subject is added and it has one landmark and two peaks, but a
     severe minimum distance is imposed, no keys are added to
     the backend.
     """
     dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                   trigPoints=[Peaks], minDistance=100)
     be = Backend()
     be.configure(dbParams)
     be.addSubject(AARead('id', 'FRRRFRRRFASAASA'), '0')
     self.assertEqual({}, be.d)
Exemplo n.º 30
0
    def testFindNoMatch(self):
        """
        A query against an empty backend must produce no results.
        """
        subject = AARead('subject', 'FRRRFRRRFASAASA')
        query = AARead('query', 'FRRR')
        dbParams = DatabaseParameters(landmarks=[AlphaHelix],
                                      trigPoints=[Peaks])
        be = Backend()
        be.configure(dbParams)
        be.addSubject(subject, '0')
        matches, hashCount, nonMatchingHashes = be.find(query)

        self.assertEqual({}, matches)
        self.assertEqual(0, hashCount)
        self.assertEqual({}, nonMatchingHashes)