def testChecksumAfterSaveRestore(self): """ A backend that has a sequence added to it, which is then saved and restored, and then has a second sequence is added to it must have the same checksum as a backend that simply has the two sequences added to it without interruption. """ seq1 = 'FRRRFRRRFASAASA' seq2 = 'MMMMMMMMMFRRRFR' dbParams1 = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand], trigPoints=[Peaks, Troughs]) be1 = Backend() be1.configure(dbParams1, 'name1', 0) be1.addSubject(AARead('id1', seq1), '0') fp = StringIO() be1.save(fp) fp.seek(0) be1 = Backend.restore(fp) be1.addSubject(AARead('id2', seq2), '1') dbParams2 = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand], trigPoints=[Peaks, Troughs]) be2 = Backend() be2.configure(dbParams2, 'name2', 0) be2.addSubject(AARead('id1', seq1), '0') be2.addSubject(AARead('id2', seq2), '1') self.assertEqual(be1.checksum(), be2.checksum())
def testFindOneMatchingHashInOneLocation(self): """ One matching subject with one matching hash (that occurs in one location) must be found correctly. """ sequence = 'AFRRRFRRRFASAASA' subject = AARead('subject', sequence) query = AARead('query', sequence) dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[Peaks], maxDistance=11) be = Backend() be.configure(dbParams) be.addSubject(subject, '0') matches, hashCount, nonMatchingHashes = be.find(query) self.assertEqual( { '0': [{ 'queryLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2), 'queryTrigPoint': TrigPoint('Peaks', 'P', 11), 'subjectLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2), 'subjectTrigPoint': TrigPoint('Peaks', 'P', 11), }] }, matches) self.assertEqual(1, hashCount) self.assertEqual({}, nonMatchingHashes)
def testSaveContentIncludesExpectedKeysAndValues(self): """ When a backend saves, its JSON content must include the expected keys and values. """ dbParams = DatabaseParameters(landmarks=[], trigPoints=[], limitPerLandmark=16, maxDistance=17, minDistance=18, distanceBase=19.0) be = Backend() be.configure(dbParams, 'backend', 33) fp = StringIO() be.save(fp) fp.seek(0) DatabaseParameters.restore(fp) SubjectStore.restore(fp) state = loads(fp.readline()[:-1]) # Keys self.assertEqual( set(['checksum', 'd', 'name', '_totalCoveredResidues']), set(state.keys())) # Values self.assertEqual(be.checksum(), state['checksum']) self.assertEqual({}, state['d']) self.assertEqual('backend', state['name']) self.assertEqual(0, state['_totalCoveredResidues'])
def testFindOneMatchingHashInTwoLocations(self): """ One matching subject with one matching hash (that occurs in two locations) must be found correctly. """ subject = AARead('subject', 'AFRRRFRRRFASAASAVVVVVVASAVVVASA') query = AARead('query', 'FRRRFRRRFASAASAFRRRFRRRFFRRRFRRRFFRRRFRRRF') dbParams = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand], trigPoints=[Peaks]) be = Backend() be.configure(dbParams) be.addSubject(subject, '0') matches, hashCount, nonMatchingHashes = be.find(query) self.assertEqual( { '0': [{ 'queryLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2), 'queryTrigPoint': TrigPoint('Peaks', 'P', 10), 'subjectLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2), 'subjectTrigPoint': TrigPoint('Peaks', 'P', 11), }, { 'queryLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2), 'queryTrigPoint': TrigPoint('Peaks', 'P', 13), 'subjectLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2), 'subjectTrigPoint': TrigPoint('Peaks', 'P', 14), }] }, matches) self.assertEqual(14, hashCount) self.assertEqual({}, nonMatchingHashes)
def testPrint(self): """ The print_ function should produce the expected output. """ subject = AARead('subject-id', 'FRRRFRRRFASAASA') dbParams = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand], trigPoints=[Peaks, Troughs], limitPerLandmark=16, maxDistance=10, minDistance=0, distanceBase=1) be = Backend() be.configure(dbParams) be.addSubject(subject, '0') expected = ( 'Name: backend\n' 'Hash count: 3\n' 'Checksum: 2751160351\n' 'Subjects (with offsets) by hash:\n' ' A2:P:10\n' ' 0 [[0, 9, 10, 1]]\n' ' A2:T:4\n' ' 0 [[0, 9, 4, 1]]\n' ' A2:T:8\n' ' 0 [[0, 9, 8, 1]]\n' 'Landmark symbol counts:\n' ' AlphaHelix (A2): 3\n' 'Trig point symbol counts:\n' ' Peaks (P): 1\n' ' Troughs (T): 2') self.assertEqual(expected, be.print_())
def testFindWithIdenticalNonMatchingHashes(self): """ Identical non-matching hashes must be found correctly when storeFullAnalysis is passed to find() as True. """ subject = AARead('subject', 'F') query = AARead('query', 'AFRRRFRRRFASAAAAAAAAAAAFRRRFRRRFASA') dbParams = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand], trigPoints=[Peaks], maxDistance=10) be = Backend() be.configure(dbParams) be.addSubject(subject, '0') matches, hashCount, nonMatchingHashes = be.find(query, True) self.assertEqual({}, matches) self.assertEqual(2, hashCount) self.assertEqual( { 'A2:P:10': [ [Landmark(AlphaHelix.NAME, AlphaHelix.SYMBOL, 1, 9, 2), TrigPoint(Peaks.NAME, Peaks.SYMBOL, 11)], [Landmark(AlphaHelix.NAME, AlphaHelix.SYMBOL, 23, 9, 2), TrigPoint(Peaks.NAME, Peaks.SYMBOL, 33)] ], }, nonMatchingHashes)
def testCollectReadHashes(self): """ The getHashes method must return a dict keyed by (landmark, trigPoints) hash with values containing the read offsets. """ dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[Peaks], distanceBase=1.0) be = Backend() be.configure(dbParams) query = AARead('query', 'FRRRFRRRFASAASAFRRRFRRRFASAASA') scannedQuery = be.scan(query) hashCount = be.getHashes(scannedQuery) helixAt0 = Landmark(AlphaHelix.NAME, AlphaHelix.SYMBOL, 0, 9, 2) helixAt15 = Landmark(AlphaHelix.NAME, AlphaHelix.SYMBOL, 15, 9, 2) peakAt10 = TrigPoint(Peaks.NAME, Peaks.SYMBOL, 10) peakAt13 = TrigPoint(Peaks.NAME, Peaks.SYMBOL, 13) peakAt25 = TrigPoint(Peaks.NAME, Peaks.SYMBOL, 25) peakAt28 = TrigPoint(Peaks.NAME, Peaks.SYMBOL, 28) self.assertEqual( { 'A2:P:28': [[helixAt0, peakAt28]], 'A2:P:25': [[helixAt0, peakAt25]], 'A2:P:13': [[helixAt0, peakAt13], [helixAt15, peakAt28]], 'A2:P:10': [[helixAt0, peakAt10], [helixAt15, peakAt25]], 'A2:P:-5': [[helixAt15, peakAt10]], 'A2:P:-2': [[helixAt15, peakAt13]], 'A2:A2:15': [[helixAt0, helixAt15]], }, hashCount)
def testFindTwoMatchingInSameSubject(self): """ Two matching hashes in the subject must be found correctly. """ sequence = 'FRRRFRRRFASAASA' subject = AARead('subject', sequence) query = AARead('query', sequence) dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[Peaks]) be = Backend() be.configure(dbParams) be.addSubject(subject, '0') matches, hashCount, nonMatchingHashes = be.find(query) self.assertEqual( { '0': [{ 'queryLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2), 'queryTrigPoint': TrigPoint('Peaks', 'P', 10), 'subjectLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2), 'subjectTrigPoint': TrigPoint('Peaks', 'P', 10), }, { 'queryLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2), 'queryTrigPoint': TrigPoint('Peaks', 'P', 13), 'subjectLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2), 'subjectTrigPoint': TrigPoint('Peaks', 'P', 13), }] }, matches) self.assertEqual(2, hashCount) self.assertEqual({}, nonMatchingHashes)
def testTwoReadsTwoLandmarksSameOffsets(self): """ If two identical reads are added, both with two landmarks at the same offsets, only one key is added to the backend and both reads are listed in the dictionary values for the key. Note that A3:A2:-23 is not added to the backend since that would be redundant (it's the same two landmarks, with the same separation, just with the sign changed). """ dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[]) be = Backend() be.configure(dbParams) be.addSubject( AARead('id1', 'FRRRFRRRFAAAAAAAAAAAAAAFRRRFRRRFRRRF'), '0') be.addSubject( AARead('id2', 'FRRRFRRRFAAAAAAAAAAAAAAFRRRFRRRFRRRF'), '1') distance23 = str(scaleLog(23, _DEFAULT_DISTANCE_BASE)) self.assertEqual( { 'A2:A3:' + distance23: { '0': [[0, 9, 23, 13]], '1': [[0, 9, 23, 13]], }, }, be.d)
def getFractionOfStructuresCovered(self): """ Return the fraction of known structures matched by at least one substring in the subset that is being evaluated. """ hit = 0 total = 0 db = DatabaseSpecifier().getDatabaseFromKeywords( trigPoints=[], landmarks=['AC ' + self.structureType], acAlphaHelixFilename=self.acAlphaHelixFilename, acAlphaHelix310Filename=self.acAlphaHelix310Filename, acAlphaHelixCombinedFilename=self.acAlphaHelixCombinedFilename, acAlphaHelixPiFilename=self.acAlphaHelixPiFilename, acExtendedStrandFilename=self.acExtendedStrandFilename) backend = Backend() backend.configure(db.dbParams) for read in FastaReads(self.structureFile, readClass=AAReadWithX, checkAlphabet=0): total += 1 scannedRead = backend.scan(read) if len(scannedRead.landmarks) > 0: hit += 1 return hit / total if total else 0.0
def testNoOverlapDefaultDistanceBase(self): """ There cannot be any index overlap between landmarks found by the GOR4 alpha helix and beta strand finders using the default distance base (currently 1.1). """ alphaHelixBe = Backend() alphaHelixBe.configure( DatabaseParameters(landmarks=[GOR4AlphaHelix], trigPoints=[])) betaStrandBe = Backend() betaStrandBe.configure( DatabaseParameters(landmarks=[GOR4BetaStrand], trigPoints=[])) alphaHelixScanned = alphaHelixBe.scan(self.READ) betaStrandScanned = betaStrandBe.scan(self.READ) alphaHelixIndices = alphaHelixScanned.coveredIndices() betaStrandIndices = betaStrandScanned.coveredIndices() self.assertEqual(0, len(alphaHelixIndices & betaStrandIndices))
def testInitialBackendIsEmpty(self): """ The index must be empty if no reads have been added. """ dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[Peaks]) be = Backend() be.configure(dbParams) self.assertEqual({}, be.d)
def testParametersAreStored(self): """ The backend must call its super class so its parameters are stored. """ dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[Peaks]) be = Backend() be.configure(dbParams) self.assertIs(dbParams, be.dbParams)
def testInitialChecksum(self): """ The backend checksum must be set to the value passed to its __init__ method. """ dbParams = DatabaseParameters(landmarks=[], trigPoints=[]) be = Backend() be.configure(dbParams, 'backend', 10) self.assertEqual(10, be.checksum())
def testOneReadOneLandmarkOnePeakNoTrigFinders(self): """ If one subject is added and it has one landmark and one peak, but no trig finders are in use, nothing is added to the backend. """ dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[]) be = Backend() be.configure(dbParams) be.addSubject(AARead('id', 'FRRRFRRRFASA'), '0') self.assertEqual({}, be.d)
def testOneReadOneLandmark(self): """ If one subject is added but it only has one landmark, nothing is added to the backend. """ dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[]) be = Backend() be.configure(dbParams) be.addSubject(AARead('id', 'FRRRFRRRF'), '0') self.assertEqual({}, be.d)
def testScan(self): """ The scan method must return a scanned subject. """ subject = AARead('subject', 'FRRRFRRRFASAASA') dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[Peaks]) be = Backend() be.configure(dbParams) be.addSubject(subject, '0') scannedSubject = be.scan(subject) self.assertIsInstance(scannedSubject, ScannedRead)
def testAddSameSubjectIncreasesBackendSize(self): """ If an identical subject is added multiple times, the backend size does not increase, because the backend subject store detect duplicates. """ dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[]) be = Backend() be.configure(dbParams) be.addSubject(AARead('id', 'FRRRFRRRF'), '0') self.assertEqual(1, be.subjectCount()) be.addSubject(AARead('id', 'FRRRFRRRF'), '0') self.assertEqual(1, be.subjectCount())
def testHashWithSymbolDetail(self): """ The database hash function must return the expected value when the landmark it is passed has a repeat count. """ dbParams = DatabaseParameters(landmarks=[], trigPoints=[]) be = Backend() be.configure(dbParams) landmark = Landmark('name', 'A', 20, 0, 5) trigPoint = TrigPoint('name', 'B', 30) distance10 = str(scaleLog(10, _DEFAULT_DISTANCE_BASE)) self.assertEqual('A5:B:' + distance10, be.hash(landmark, trigPoint))
def testOneReadOneLandmarkTwoPeaksSevereMinDistance(self): """ If one subject is added and it has one landmark and two peaks, but a severe minimum distance is imposed, no keys are added to the backend. """ dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[Peaks], minDistance=100) be = Backend() be.configure(dbParams) be.addSubject(AARead('id', 'FRRRFRRRFASAASA'), '0') self.assertEqual({}, be.d)
def testHashWithFeatureOnRight(self): """ The database hash function must return the expected (positive offset) hash when the second feature is to the right of the first. """ dbParams = DatabaseParameters(landmarks=[], trigPoints=[]) be = Backend() be.configure(dbParams) landmark = Landmark('name', 'A', 20, 0) trigPoint = TrigPoint('name', 'B', 30) distance10 = str(scaleLog(10, _DEFAULT_DISTANCE_BASE)) self.assertEqual('A:B:' + distance10, be.hash(landmark, trigPoint))
def testNoOverlapDistanceBaseOne(self): """ There cannot be any index overlap between landmarks found by the GOR4 alpha helix and beta strand finders using a distance base of 1.0 (which should do no scaling). """ alphaHelixBe = Backend() alphaHelixBe.configure( DatabaseParameters(landmarks=[GOR4AlphaHelix], trigPoints=[], distanceBase=1.0)) betaStrandBe = Backend() betaStrandBe.configure( DatabaseParameters(landmarks=[GOR4BetaStrand], trigPoints=[], distanceBase=1.0)) alphaHelixScanned = alphaHelixBe.scan(self.READ) betaStrandScanned = betaStrandBe.scan(self.READ) alphaHelixIndices = alphaHelixScanned.coveredIndices() betaStrandIndices = betaStrandScanned.coveredIndices() self.assertEqual(0, len(alphaHelixIndices & betaStrandIndices))
def testCollectReadHashesWithOneLandmark(self): """ The getHashes method must return a dict keyed by (landmark, trigPoints) hash with values containing the read offsets. The result should be empty if there is only one landmark in the read. """ dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[]) be = Backend() be.configure(dbParams) query = AARead('query', 'FRRRFRRRF') scannedQuery = be.scan(query) hashCount = be.getHashes(scannedQuery) self.assertEqual({}, hashCount)
def __init__(self, dbParams, backend=None, filePrefix=None): self.dbParams = dbParams if backend: self._backend = backend else: self._backend = Backend(filePrefix=filePrefix) self._backend.configure(dbParams) self._filePrefix = filePrefix # Most of our implementation comes directly from our backend. for method in ('addSubject', 'getIndexBySubject', 'getSubjectByIndex', 'getSubjects', 'subjectCount', 'hashCount', 'totalResidues', 'totalCoveredResidues', 'checksum'): setattr(self, method, getattr(self._backend, method))
def testAddSubjectReturnsCorrectResult(self): """ If one subject is added, addSubject must return whether the subject already existed, the index ('0' in this case) of the added subject, and the backend name. """ dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[]) be = Backend() be.configure(dbParams) subject = AARead('id', 'FRRRFRRRF') preExisting, subjectIndex, hashCount = be.addSubject(subject, '0') self.assertFalse(preExisting) self.assertEqual('0', subjectIndex) self.assertEqual(0, hashCount)
def testHashWithFeatureOnRightAndNonDefaultDistanceBase(self): """ The database hash function must return the expected hash when the database has a non-default distance base and the second feature is to the right of the first. """ dbParams = DatabaseParameters(landmarks=[], trigPoints=[], distanceBase=1.5) be = Backend() be.configure(dbParams) landmark = Landmark('name', 'A', 20, 0) trigPoint = TrigPoint('name', 'B', 30) distance10 = str(scaleLog(10, 1.5)) self.assertEqual('A:B:' + distance10, be.hash(landmark, trigPoint))
def testTwoReadsTwoLandmarksLimitZeroPairsPerLandmark(self): """ If two identical reads are added, both with two landmarks, no keys will be added to the dictionary if limitPerLandmark is zero. """ dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[], limitPerLandmark=0) be = Backend() be.configure(dbParams) be.addSubject( AARead('id1', 'FRRRFRRRFAAAAAAAAAAAAAAFRRRFRRRFRRRF'), '0') be.addSubject( AARead('id2', 'FRRRFRRRFAAAAAAAAAAAAAAFRRRFRRRFRRRF'), '1') self.assertEqual({}, be.d)
def __init__(self, histogram, query, subject, dbParams, findParams=None): self._histogram = histogram self._queryLen = len(query) self._subjectLen = len(subject) from light.parameters import FindParameters self._findParams = findParams or FindParameters() from light.backend import Backend backend = Backend() backend.configure(dbParams) scannedQuery = backend.scan(query) self._allQueryFeatures = set(scannedQuery.landmarks + scannedQuery.trigPoints) scannedSubject = backend.scan(subject.read) self._allSubjectFeatures = set(scannedSubject.landmarks + scannedSubject.trigPoints)
def testSaveRestoreWithNonDefaultParameters(self): """ When asked to save and then restore a backend with non-default parameters, a backend with the correct parameters must result. """ dbParams = DatabaseParameters(landmarks=[], trigPoints=[], limitPerLandmark=16, maxDistance=17, minDistance=18, distanceBase=19.0) be = Backend() be.configure(dbParams) fp = StringIO() be.save(fp) fp.seek(0) result = be.restore(fp) self.assertIs(None, dbParams.compare(result.dbParams))
def __init__(self, histogram, query, subject, dbParams): self._histogram = histogram self._queryLen = len(query) self._subjectLen = len(subject) from light.backend import Backend backend = Backend() backend.configure(dbParams) scannedQuery = backend.scan(query) allQueryHashes = backend.getHashes(scannedQuery) self._allQueryFeatures = getHashFeatures(allQueryHashes) scannedSubject = backend.scan(subject.read) allSubjectHashes = backend.getHashes(scannedSubject) self._allSubjectFeatures = getHashFeatures(allSubjectHashes)