def getFractionOfStructuresCovered(self): """ Return the fraction of known structures matched by at least one substring in the subset that is being evaluated. """ hit = 0 total = 0 db = DatabaseSpecifier().getDatabaseFromKeywords( trigPoints=[], landmarks=['AC ' + self.structureType], acAlphaHelixFilename=self.acAlphaHelixFilename, acAlphaHelix310Filename=self.acAlphaHelix310Filename, acAlphaHelixCombinedFilename=self.acAlphaHelixCombinedFilename, acAlphaHelixPiFilename=self.acAlphaHelixPiFilename, acExtendedStrandFilename=self.acExtendedStrandFilename) backend = Backend() backend.configure(db.dbParams) for read in FastaReads(self.structureFile, readClass=AAReadWithX, checkAlphabet=0): total += 1 scannedRead = backend.scan(read) if len(scannedRead.landmarks) > 0: hit += 1 return hit / total if total else 0.0
def testInitialBackendIsEmpty(self): """ The index must be empty if no reads have been added. """ dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[Peaks]) be = Backend() be.configure(dbParams) self.assertEqual({}, be.d)
def testParametersAreStored(self): """ The backend must call its super class so its parameters are stored. """ dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[Peaks]) be = Backend() be.configure(dbParams) self.assertIs(dbParams, be.dbParams)
def testInitialChecksum(self): """ The backend checksum must be set to the value passed to its __init__ method. """ dbParams = DatabaseParameters(landmarks=[], trigPoints=[]) be = Backend() be.configure(dbParams, 'backend', 10) self.assertEqual(10, be.checksum())
def testHashWithFeatureOnRight(self): """ The database hash function must return the expected (positive offset) hash when the second feature is to the right of the first. """ dbParams = DatabaseParameters(landmarks=[], trigPoints=[]) be = Backend() be.configure(dbParams) landmark = Landmark('name', 'A', 20, 0) trigPoint = TrigPoint('name', 'B', 30) distance10 = str(scaleLog(10, _DEFAULT_DISTANCE_BASE)) self.assertEqual('A:B:' + distance10, be.hash(landmark, trigPoint))
def testAddSameSubjectIncreasesBackendSize(self): """ If an identical subject is added multiple times, the backend size does not increase, because the backend subject store detect duplicates. """ dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[]) be = Backend() be.configure(dbParams) be.addSubject(AARead('id', 'FRRRFRRRF'), '0') self.assertEqual(1, be.subjectCount()) be.addSubject(AARead('id', 'FRRRFRRRF'), '0') self.assertEqual(1, be.subjectCount())
def testHashWithSymbolDetail(self): """ The database hash function must return the expected value when the landmark it is passed has a repeat count. """ dbParams = DatabaseParameters(landmarks=[], trigPoints=[]) be = Backend() be.configure(dbParams) landmark = Landmark('name', 'A', 20, 0, 5) trigPoint = TrigPoint('name', 'B', 30) distance10 = str(scaleLog(10, _DEFAULT_DISTANCE_BASE)) self.assertEqual('A5:B:' + distance10, be.hash(landmark, trigPoint))
def testHashWithFeatureOnRightAndNonDefaultDistanceBase(self): """ The database hash function must return the expected hash when the database has a non-default distance base and the second feature is to the right of the first. """ dbParams = DatabaseParameters(landmarks=[], trigPoints=[], distanceBase=1.5) be = Backend() be.configure(dbParams) landmark = Landmark('name', 'A', 20, 0) trigPoint = TrigPoint('name', 'B', 30) distance10 = str(scaleLog(10, 1.5)) self.assertEqual('A:B:' + distance10, be.hash(landmark, trigPoint))
def testAddSubjectReturnsCorrectResult(self): """ If one subject is added, addSubject must return whether the subject already existed, the index ('0' in this case) of the added subject, and the backend name. """ dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[]) be = Backend() be.configure(dbParams) subject = AARead('id', 'FRRRFRRRF') preExisting, subjectIndex, hashCount = be.addSubject(subject, '0') self.assertFalse(preExisting) self.assertEqual('0', subjectIndex) self.assertEqual(0, hashCount)
def __init__(self, dbParams, backend=None, filePrefix=None): self.dbParams = dbParams if backend: self._backend = backend else: self._backend = Backend(filePrefix=filePrefix) self._backend.configure(dbParams) self._filePrefix = filePrefix # Most of our implementation comes directly from our backend. for method in ('addSubject', 'getIndexBySubject', 'getSubjectByIndex', 'getSubjects', 'subjectCount', 'hashCount', 'totalResidues', 'totalCoveredResidues', 'checksum'): setattr(self, method, getattr(self._backend, method))
def testTwoReadsTwoLandmarksSameOffsets(self): """ If two identical reads are added, both with two landmarks at the same offsets, only one key is added to the backend and both reads are listed in the dictionary values for the key. Note that A3:A2:-23 is not added to the backend since that would be redundant (it's the same two landmarks, with the same separation, just with the sign changed). """ dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[]) be = Backend() be.configure(dbParams) be.addSubject( AARead('id1', 'FRRRFRRRFAAAAAAAAAAAAAAFRRRFRRRFRRRF'), '0') be.addSubject( AARead('id2', 'FRRRFRRRFAAAAAAAAAAAAAAFRRRFRRRFRRRF'), '1') distance23 = str(scaleLog(23, _DEFAULT_DISTANCE_BASE)) self.assertEqual( { 'A2:A3:' + distance23: { '0': [[0, 9, 23, 13]], '1': [[0, 9, 23, 13]], }, }, be.d)
def __init__(self, **kwargs): # Set default landmark and trig point finders. if 'landmarks' not in kwargs: kwargs['landmarks'] = ALL_LANDMARK_CLASSES + [ c for c in DEV_LANDMARK_CLASSES if c.NAME.startswith('PDB ') ] if 'trigPoints' not in kwargs: kwargs['trigPoints'] = [ c for c in ALL_TRIG_CLASSES if c.NAME != 'Volume' ] db = DatabaseSpecifier().getDatabaseFromKeywords(**kwargs) self._backend = Backend() self._backend.configure(db.dbParams) self._names = (db.dbParams.landmarkFinderNames() + db.dbParams.trigPointFinderNames())
def testNoOverlapDefaultDistanceBase(self): """ There cannot be any index overlap between landmarks found by the GOR4 alpha helix and beta strand finders using the default distance base (currently 1.1). """ alphaHelixBe = Backend() alphaHelixBe.configure( DatabaseParameters(landmarks=[GOR4AlphaHelix], trigPoints=[])) betaStrandBe = Backend() betaStrandBe.configure( DatabaseParameters(landmarks=[GOR4BetaStrand], trigPoints=[])) alphaHelixScanned = alphaHelixBe.scan(self.READ) betaStrandScanned = betaStrandBe.scan(self.READ) alphaHelixIndices = alphaHelixScanned.coveredIndices() betaStrandIndices = betaStrandScanned.coveredIndices() self.assertEqual(0, len(alphaHelixIndices & betaStrandIndices))
def testFindOneMatchingHashInOneLocation(self): """ One matching subject with one matching hash (that occurs in one location) must be found correctly. """ sequence = 'AFRRRFRRRFASAASA' subject = AARead('subject', sequence) query = AARead('query', sequence) dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[Peaks], maxDistance=11) be = Backend() be.configure(dbParams) be.addSubject(subject, '0') matches, hashCount, nonMatchingHashes = be.find(query) self.assertEqual( { '0': [{ 'queryLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2), 'queryTrigPoint': TrigPoint('Peaks', 'P', 11), 'subjectLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2), 'subjectTrigPoint': TrigPoint('Peaks', 'P', 11), }] }, matches) self.assertEqual(1, hashCount) self.assertEqual({}, nonMatchingHashes)
def testSaveContentIncludesExpectedKeysAndValues(self): """ When a backend saves, its JSON content must include the expected keys and values. """ dbParams = DatabaseParameters(landmarks=[], trigPoints=[], limitPerLandmark=16, maxDistance=17, minDistance=18, distanceBase=19.0) be = Backend() be.configure(dbParams, 'backend', 33) fp = StringIO() be.save(fp) fp.seek(0) DatabaseParameters.restore(fp) SubjectStore.restore(fp) state = loads(fp.readline()[:-1]) # Keys self.assertEqual( set(['checksum', 'd', 'name', '_totalCoveredResidues']), set(state.keys())) # Values self.assertEqual(be.checksum(), state['checksum']) self.assertEqual({}, state['d']) self.assertEqual('backend', state['name']) self.assertEqual(0, state['_totalCoveredResidues'])
def testPrint(self): """ The print_ function should produce the expected output. """ subject = AARead('subject-id', 'FRRRFRRRFASAASA') dbParams = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand], trigPoints=[Peaks, Troughs], limitPerLandmark=16, maxDistance=10, minDistance=0, distanceBase=1) be = Backend() be.configure(dbParams) be.addSubject(subject, '0') expected = ( 'Name: backend\n' 'Hash count: 3\n' 'Checksum: 2751160351\n' 'Subjects (with offsets) by hash:\n' ' A2:P:10\n' ' 0 [[0, 9, 10, 1]]\n' ' A2:T:4\n' ' 0 [[0, 9, 4, 1]]\n' ' A2:T:8\n' ' 0 [[0, 9, 8, 1]]\n' 'Landmark symbol counts:\n' ' AlphaHelix (A2): 3\n' 'Trig point symbol counts:\n' ' Peaks (P): 1\n' ' Troughs (T): 2') self.assertEqual(expected, be.print_())
def testFindTwoMatchingInSameSubject(self): """ Two matching hashes in the subject must be found correctly. """ sequence = 'FRRRFRRRFASAASA' subject = AARead('subject', sequence) query = AARead('query', sequence) dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[Peaks]) be = Backend() be.configure(dbParams) be.addSubject(subject, '0') matches, hashCount, nonMatchingHashes = be.find(query) self.assertEqual( { '0': [{ 'queryLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2), 'queryTrigPoint': TrigPoint('Peaks', 'P', 10), 'subjectLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2), 'subjectTrigPoint': TrigPoint('Peaks', 'P', 10), }, { 'queryLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2), 'queryTrigPoint': TrigPoint('Peaks', 'P', 13), 'subjectLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2), 'subjectTrigPoint': TrigPoint('Peaks', 'P', 13), }] }, matches) self.assertEqual(2, hashCount) self.assertEqual({}, nonMatchingHashes)
def testFindWithIdenticalNonMatchingHashes(self): """ Identical non-matching hashes must be found correctly when storeFullAnalysis is passed to find() as True. """ subject = AARead('subject', 'F') query = AARead('query', 'AFRRRFRRRFASAAAAAAAAAAAFRRRFRRRFASA') dbParams = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand], trigPoints=[Peaks], maxDistance=10) be = Backend() be.configure(dbParams) be.addSubject(subject, '0') matches, hashCount, nonMatchingHashes = be.find(query, True) self.assertEqual({}, matches) self.assertEqual(2, hashCount) self.assertEqual( { 'A2:P:10': [ [Landmark(AlphaHelix.NAME, AlphaHelix.SYMBOL, 1, 9, 2), TrigPoint(Peaks.NAME, Peaks.SYMBOL, 11)], [Landmark(AlphaHelix.NAME, AlphaHelix.SYMBOL, 23, 9, 2), TrigPoint(Peaks.NAME, Peaks.SYMBOL, 33)] ], }, nonMatchingHashes)
def testFindOneMatchingHashInTwoLocations(self): """ One matching subject with one matching hash (that occurs in two locations) must be found correctly. """ subject = AARead('subject', 'AFRRRFRRRFASAASAVVVVVVASAVVVASA') query = AARead('query', 'FRRRFRRRFASAASAFRRRFRRRFFRRRFRRRFFRRRFRRRF') dbParams = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand], trigPoints=[Peaks]) be = Backend() be.configure(dbParams) be.addSubject(subject, '0') matches, hashCount, nonMatchingHashes = be.find(query) self.assertEqual( { '0': [{ 'queryLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2), 'queryTrigPoint': TrigPoint('Peaks', 'P', 10), 'subjectLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2), 'subjectTrigPoint': TrigPoint('Peaks', 'P', 11), }, { 'queryLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2), 'queryTrigPoint': TrigPoint('Peaks', 'P', 13), 'subjectLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2), 'subjectTrigPoint': TrigPoint('Peaks', 'P', 14), }] }, matches) self.assertEqual(14, hashCount) self.assertEqual({}, nonMatchingHashes)
def testNoOverlapDistanceBaseOne(self): """ There cannot be any index overlap between landmarks found by the GOR4 alpha helix and beta strand finders using a distance base of 1.0 (which should do no scaling). """ alphaHelixBe = Backend() alphaHelixBe.configure( DatabaseParameters(landmarks=[GOR4AlphaHelix], trigPoints=[], distanceBase=1.0)) betaStrandBe = Backend() betaStrandBe.configure( DatabaseParameters(landmarks=[GOR4BetaStrand], trigPoints=[], distanceBase=1.0)) alphaHelixScanned = alphaHelixBe.scan(self.READ) betaStrandScanned = betaStrandBe.scan(self.READ) alphaHelixIndices = alphaHelixScanned.coveredIndices() betaStrandIndices = betaStrandScanned.coveredIndices() self.assertEqual(0, len(alphaHelixIndices & betaStrandIndices))
def testCollectReadHashes(self): """ The getHashes method must return a dict keyed by (landmark, trigPoints) hash with values containing the read offsets. """ dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[Peaks], distanceBase=1.0) be = Backend() be.configure(dbParams) query = AARead('query', 'FRRRFRRRFASAASAFRRRFRRRFASAASA') scannedQuery = be.scan(query) hashCount = be.getHashes(scannedQuery) helixAt0 = Landmark(AlphaHelix.NAME, AlphaHelix.SYMBOL, 0, 9, 2) helixAt15 = Landmark(AlphaHelix.NAME, AlphaHelix.SYMBOL, 15, 9, 2) peakAt10 = TrigPoint(Peaks.NAME, Peaks.SYMBOL, 10) peakAt13 = TrigPoint(Peaks.NAME, Peaks.SYMBOL, 13) peakAt25 = TrigPoint(Peaks.NAME, Peaks.SYMBOL, 25) peakAt28 = TrigPoint(Peaks.NAME, Peaks.SYMBOL, 28) self.assertEqual( { 'A2:P:28': [[helixAt0, peakAt28]], 'A2:P:25': [[helixAt0, peakAt25]], 'A2:P:13': [[helixAt0, peakAt13], [helixAt15, peakAt28]], 'A2:P:10': [[helixAt0, peakAt10], [helixAt15, peakAt25]], 'A2:P:-5': [[helixAt15, peakAt10]], 'A2:P:-2': [[helixAt15, peakAt13]], 'A2:A2:15': [[helixAt0, helixAt15]], }, hashCount)
def testTwoReadsTwoLandmarksLimitZeroPairsPerLandmark(self): """ If two identical reads are added, both with two landmarks, no keys will be added to the dictionary if limitPerLandmark is zero. """ dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[], limitPerLandmark=0) be = Backend() be.configure(dbParams) be.addSubject( AARead('id1', 'FRRRFRRRFAAAAAAAAAAAAAAFRRRFRRRFRRRF'), '0') be.addSubject( AARead('id2', 'FRRRFRRRFAAAAAAAAAAAAAAFRRRFRRRFRRRF'), '1') self.assertEqual({}, be.d)
def testScan(self): """ The scan method must return a scanned subject. """ subject = AARead('subject', 'FRRRFRRRFASAASA') dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[Peaks]) be = Backend() be.configure(dbParams) be.addSubject(subject, '0') scannedSubject = be.scan(subject) self.assertIsInstance(scannedSubject, ScannedRead)
def __init__(self, histogram, query, subject, dbParams): self._histogram = histogram self._queryLen = len(query) self._subjectLen = len(subject) from light.backend import Backend backend = Backend() backend.configure(dbParams) scannedQuery = backend.scan(query) allQueryHashes = backend.getHashes(scannedQuery) self._allQueryFeatures = getHashFeatures(allQueryHashes) scannedSubject = backend.scan(subject.read) allSubjectHashes = backend.getHashes(scannedSubject) self._allSubjectFeatures = getHashFeatures(allSubjectHashes)
def testOneReadOneLandmark(self): """ If one subject is added but it only has one landmark, nothing is added to the backend. """ dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[]) be = Backend() be.configure(dbParams) be.addSubject(AARead('id', 'FRRRFRRRF'), '0') self.assertEqual({}, be.d)
def testOneReadOneLandmarkOnePeakNoTrigFinders(self): """ If one subject is added and it has one landmark and one peak, but no trig finders are in use, nothing is added to the backend. """ dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[]) be = Backend() be.configure(dbParams) be.addSubject(AARead('id', 'FRRRFRRRFASA'), '0') self.assertEqual({}, be.d)
def __init__(self, histogram, query, subject, dbParams, weights=None): self._histogram = histogram self._queryLen = len(query) self._subjectLen = len(subject) self._weights = self.DEFAULT_WEIGHTS if weights is None else weights from light.backend import Backend backend = Backend() backend.configure(dbParams) scannedQuery = backend.scan(query) allQueryHashes = backend.getHashes(scannedQuery) self._allQueryFeatures = getHashFeatures(allQueryHashes) scannedSubject = backend.scan(subject.read) allSubjectHashes = backend.getHashes(scannedSubject) self._allSubjectFeatures = getHashFeatures(allSubjectHashes)
def testSaveRestoreWithNonDefaultParameters(self): """ When asked to save and then restore a backend with non-default parameters, a backend with the correct parameters must result. """ dbParams = DatabaseParameters(landmarks=[], trigPoints=[], limitPerLandmark=16, maxDistance=17, minDistance=18, distanceBase=19.0) be = Backend() be.configure(dbParams) fp = StringIO() be.save(fp) fp.seek(0) result = be.restore(fp) self.assertIs(None, dbParams.compare(result.dbParams))
def testOneReadOneLandmarkTwoPeaksSevereMinDistance(self): """ If one subject is added and it has one landmark and two peaks, but a severe minimum distance is imposed, no keys are added to the backend. """ dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[Peaks], minDistance=100) be = Backend() be.configure(dbParams) be.addSubject(AARead('id', 'FRRRFRRRFASAASA'), '0') self.assertEqual({}, be.d)
def testFindNoMatch(self): """ A query against an empty backend must produce no results. """ subject = AARead('subject', 'FRRRFRRRFASAASA') query = AARead('query', 'FRRR') dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[Peaks]) be = Backend() be.configure(dbParams) be.addSubject(subject, '0') matches, hashCount, nonMatchingHashes = be.find(query) self.assertEqual({}, matches) self.assertEqual(0, hashCount) self.assertEqual({}, nonMatchingHashes)