def testFindOneMatchingSignificant(self): """ One matching and significant subject must be found if the significanceFraction is sufficiently low. """ sequence = 'AFRRRFRRRFASAASA' subject = AARead('subject', sequence) query = AARead('query', sequence) dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[Peaks], maxDistance=11) db = Database(dbParams) db.addSubject(subject) findParams = FindParameters(significanceFraction=0.0) result = db.find(query, findParams) self.assertEqual( { '0': [ { 'queryLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2), 'queryTrigPoint': TrigPoint('Peaks', 'P', 11), 'subjectLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2), 'subjectTrigPoint': TrigPoint('Peaks', 'P', 11), }, ], }, result.matches)
def testFindOneMatchingSignificantWithSubjectIndicesIncludingIt(self): """ One matching and significant subject must be found, including when a non-empty subjectIndices is passed which includes the found index (and other non-matched subject indices) """ sequence = 'AFRRRFRRRFASAASA' subject = AARead('subject', sequence) query = AARead('query', sequence) dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[Peaks], maxDistance=11) db = Database(dbParams) db.addSubject(subject) findParams = FindParameters(significanceFraction=0.0) result = db.find(query, findParams, subjectIndices={'0', 'x', 'y'}) self.assertEqual( { '0': [ { 'queryLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2), 'queryTrigPoint': TrigPoint('Peaks', 'P', 11), 'subjectLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2), 'subjectTrigPoint': TrigPoint('Peaks', 'P', 11), }, ], }, result.matches)
def testFindTwoMatchingInSameSubject(self): """ Two matching hashes in the subject must be found correctly. """ sequence = 'FRRRFRRRFASAASA' subject = AARead('subject', sequence) query = AARead('query', sequence) dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[Peaks]) db = Database(dbParams) db.addSubject(subject) result = db.find(query) self.assertEqual( { '0': [{ 'queryLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2), 'queryTrigPoint': TrigPoint('Peaks', 'P', 10), 'subjectLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2), 'subjectTrigPoint': TrigPoint('Peaks', 'P', 10), }, { 'queryLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2), 'queryTrigPoint': TrigPoint('Peaks', 'P', 13), 'subjectLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2), 'subjectTrigPoint': TrigPoint('Peaks', 'P', 13), }] }, result.matches)
def testFindOneMatchingInsignificant(self): """ One matching subject should be found, but is not significant with the default value of significanceFraction. """ subject = AARead('subject', 'AFRRRFRRRFASAASAVVVVVVASAVVVASA') query = AARead('query', 'FRRRFRRRFASAASAFRRRFRRRFFRRRFRRRFFRRRFRRRF') dbParams = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand], trigPoints=[Peaks]) db = Database(dbParams) db.addSubject(subject) result = db.find(query) self.assertEqual( { '0': [{ 'queryLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2), 'queryTrigPoint': TrigPoint('Peaks', 'P', 10), 'subjectLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2), 'subjectTrigPoint': TrigPoint('Peaks', 'P', 11), }, { 'queryLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2), 'queryTrigPoint': TrigPoint('Peaks', 'P', 13), 'subjectLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2), 'subjectTrigPoint': TrigPoint('Peaks', 'P', 14), }] }, result.matches) self.assertEqual(0, len(list(result.significantSubjects())))
def testFindBug493Minimal(self): """ A minimal failing test case for https://github.com/acorg/light-matter/issues/493 """ query = SSAARead( '2HLA:A', 'ALKEDLRSWTAADMAAQTTKHKWEAAHVAEQWRAYLEGTCVEWLRRYLENGKETLQRTDAPK' 'THMTHHAVSDHEATLRCWALSFYPAEITLTWQRDGEDQTQDTELVETRPAGDGTFQKWVAVV', 'EE-TTSS-EEESSHHHHHHHHHHHHTTTHHHHHHHHHTHHHHHHHHHHHHHHHHHT--B--E' 'EEEEEEE-SSSEEEEEEEEEEEBSS-EEEEEEETTEEE-TTEEE---EE-SSS-EEEEEEEE') subject = SSAARead( '3D2U:A', 'HVLRYGYTGIFDDTSHMTLTVVGIFDGQHFFTYHVQSSDKASSRANGTISWMANVSAAYPTY' 'PVTHPVVKGGVRNQNDNRAEAFCTSYGFFPGEIQITFIHYGDKVPEDSEPQCNPLLPTLDGT', '-EEEEEEEEEESSSS-EEEEEEEEETTEEEEEEEEESS-SSS-EEEE-STHHHHHHHHSTTH' '--B--EEEEEEEEEETTEEEEEEEEEEEBSS--EEEEEEESS---TT---EE---EE-TTS-') dbParams = DatabaseParameters(landmarks=['PDB ExtendedStrand'], trigPoints=[], limitPerLandmark=50, distanceBase=1.1) db = Database(dbParams) _, subjectIndex, _ = db.addSubject(subject) findParams = FindParameters(significanceFraction=0.01) result = db.find(query, findParams, storeFullAnalysis=True) significantBins = result.analysis[subjectIndex]['significantBins'] for binInfo in significantBins: normalizeBin(binInfo['bin'], len(query))
def testSymmetricFindScoresDifferingSubjectAndQuery(self): """ The score of matching a sequence A against a sequence B must be the same as when matching B against A, including when the number of hashes in the two differs and the scores are not 1.0. """ subject = AARead('subject', 'AFRRRFRRRFASAASAFRRRFRRRF') query = AARead('query', 'FRRRFRRRFASAVVVVVV') dbParams1 = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand], trigPoints=[Peaks]) db = Database(dbParams1) _, index, _ = db.addSubject(subject) hashCount1 = db.getSubjectByIndex(index).hashCount findParams = FindParameters(significanceFraction=0.0) result = db.find(query, findParams) score1 = result.analysis['0']['bestBinScore'] dbParams2 = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand], trigPoints=[Peaks]) db = Database(dbParams2) _, index, _ = db.addSubject(query) hashCount2 = db.getSubjectByIndex(index).hashCount result = db.find(subject, findParams) score2 = result.analysis['0']['bestBinScore'] self.assertNotEqual(hashCount1, hashCount2) self.assertEqual(score1, score2) self.assertNotEqual(1.0, score1)
def testSymmetricFindScoresSameSubjectAndQuery(self): """ The score of matching a sequence A against a sequence B must be the same as when matching B against A, and that score must be 1.0 when the subject and the query are identical. """ sequence = 'AFRRRFRRRFASAASAFRRRFRRRF' subject = AARead('subject', sequence) query = AARead('query', sequence) dbParams = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand], trigPoints=[Peaks]) db = Database(dbParams) db.addSubject(subject) findParams = FindParameters(significanceFraction=0.0) result = db.find(query, findParams) score1 = result.analysis['0']['bestBinScore'] dbParams = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand], trigPoints=[Peaks]) db = Database(dbParams) db.addSubject(query) result = db.find(subject, findParams) score2 = result.analysis['0']['bestBinScore'] self.assertEqual(score1, score2) self.assertEqual(1.0, score1)
def testInitialDatabaseHasNoSubjectInfo(self): """ The database must not have any stored subject information if no subjects have been added. """ dbParams = DatabaseParameters() db = Database(dbParams) self.assertEqual([], list(db.getSubjects()))
def testInitialStatistics(self): """ The database statistics must be initially correct. """ dbParams = DatabaseParameters() db = Database(dbParams) self.assertEqual(0, db.subjectCount()) self.assertEqual(0, db.totalResidues()) self.assertEqual(0, db.totalCoveredResidues())
def testGetSubjectHashCount(self): """ If a subject is added, getSubjectByIndex must return a Subject instance that has the correct hash count. """ dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[]) db = Database(dbParams) subject = AARead('id', 'FRRRFRRRFAFRRRFRRRF') _, index, _ = db.addSubject(subject) self.assertEqual(1, db.getSubjectByIndex(index).hashCount)
def testGetSubjectBySubject(self): """ If a subject is added, getIndexBySubject must be able to return it given an identical subject to look up. """ dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[]) db = Database(dbParams) _, index, _ = db.addSubject(AARead('id', 'FRRRFRRRF')) self.assertEqual( index, db.getIndexBySubject(Subject(AARead('id', 'FRRRFRRRF'))))
def testAddSameSubjectReturnsSameIndex(self): """ If an identical subject is added multiple times, the same subject index must be returned. """ dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[]) db = Database(dbParams) self.assertEqual( db.addSubject(AARead('id', 'FRRRFRRRF'))[1], db.addSubject(AARead('id', 'FRRRFRRRF'))[1])
def testAddNewBackend(self): """ When a new backend is added, the returned parameters must be those of the database. """ dbParams1 = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[Peaks]) db = Database(dbParams1) name, checksum, dbParams2 = db.addBackend() self.assertIs(dbParams1, dbParams2)
def testGetSubjectByIndex(self): """ If a subject is added, getSubjectByIndex must be able to return it given its string index. """ dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[]) db = Database(dbParams) subject = AARead('id', 'FRRRFRRRF') _, index, _ = db.addSubject(subject) self.assertEqual(Subject(AARead('id', 'FRRRFRRRF')), db.getSubjectByIndex(index))
def testReconnectSameNameBackend(self): """ If a backend tries to connect but re-uses an existing backend name, a BackendException must be raised. """ dbParams = DatabaseParameters(landmarks=[AlphaHelix]) db = Database(dbParams) name, checksum, dbParams = db.addBackend() error = "^Backend %r is already connected\.$" % name six.assertRaisesRegex(self, BackendException, error, db.addBackend, name)
def testAddSameSubjectReturnsCorrectPreExisting(self): """ If an identical subject is added multiple times, the expected pre-existing values must be returned. """ dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[]) db = Database(dbParams) self.assertEqual([False, True], [ db.addSubject(AARead('id', 'FRRRFRRRF'))[0], db.addSubject(AARead('id', 'FRRRFRRRF'))[0] ])
def testFindWithOneUnreconnectedBackend(self): """ If a database has one unreconnected backend, calling find() must raise BackendException with the expected error message. """ dbParams = DatabaseParameters(landmarks=[AlphaHelix]) db = Database(dbParams) db.disconnectedBackends['dummy'] = None query = AARead('id', 'AAA') error = "^Backend 'dummy' has not reconnected\.$" six.assertRaisesRegex(self, BackendException, error, db.find, query)
def testChecksumEmptyDatabase(self): """ The database checksum must be the same as the checksum for its parameters plus the default backend name when no subjects have been added to the database. """ dbParams = DatabaseParameters() expected = Checksum(dbParams.checksum).update([ Backend.DEFAULT_NAME, ]) db = Database(dbParams) self.assertEqual(expected.value, db.checksum())
def testFindNoMatching(self): """ A non-matching key must not be found. """ subject = AARead('subject', 'FRRRFRRRFASAASA') query = AARead('query', 'FRRR') dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[Peaks]) db = Database(dbParams) db.addSubject(subject) result = db.find(query) self.assertEqual({}, result.matches)
def testOneReadTwoLandmarksGetSubjects(self): """ If one subject with two landmarks (and hence one hash) is added, an entry is appended to the database subject info. """ dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[]) db = Database(dbParams) db.addSubject(AARead('id', 'FRRRFRRRFAFRRRFRRRF')) subject = list(db.getSubjects())[0] read = AARead('id', 'FRRRFRRRFAFRRRFRRRF') self.assertEqual(Subject(read, 1), subject) self.assertEqual(1, subject.hashCount)
def testAddSubjects(self): """ How long does it take to add SUBJECT_COUNT subjects to a database. """ read = AARead('id', self.FEATURE_SEQUENCE * self.FEATURE_COUNT) dbParams = DatabaseParameters(landmarks=self.LANDMARKS, trigPoints=self.TRIG_POINTS) database = Database(dbParams=dbParams) startTime = time.time() for _ in range(self.SUBJECT_COUNT): database.addSubject(read) elapsed = time.time() - startTime self.details = elapsed
def testAddSubjectReturnsIndexAndPreExisting(self): """ If one subject is added, addSubject must return the index ('0') of the added subject and a Boolean to indicate whether the subject was already in the database. """ dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[]) db = Database(dbParams) preExisting, subjectIndex, hashCount = db.addSubject( AARead('id', 'FRRRFRRRF')) self.assertFalse(preExisting) self.assertEqual('0', subjectIndex) self.assertEqual(0, hashCount)
def testOneReadOneLandmarkGetSubjects(self): """ If one subject with just one landmark (and hence no hashes) is added, an entry is appended to the database subject info. """ dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[]) db = Database(dbParams) db.addSubject(AARead('id', 'FRRRFRRRF')) subjects = list(db.getSubjects()) self.assertEqual(1, len(subjects)) subject = subjects[0] self.assertEqual(Subject(AARead('id', 'FRRRFRRRF')), subject) self.assertEqual(0, subject.hashCount)
def testFindNoneMatchingNoTrigPoint(self): """ No matches should be found if there is only one landmark and there are no trig point finders. """ sequence = 'AFRRRFRRRFASAASA' subject = AARead('subject', sequence) query = AARead('query', sequence) dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[]) db = Database(dbParams) db.addSubject(subject) result = db.find(query) self.assertEqual({}, result.matches)
def testFindMatchAfterSaveRestore(self): """ A matching subject found before a save/restore must also be found following a database save/restore. """ subject = AARead('subject', 'AFRRRFRRRFASAASAVVVVVVASAVVVASA') query = AARead('query', 'FRRRFRRRFASAASAFRRRFRRRFFRRRFRRRFFRRRFRRRF') dbParams = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand], trigPoints=[Peaks]) db1 = Database(dbParams) db1.addSubject(subject) result = db1.find(query) expected = { '0': [{ 'queryLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2), 'queryTrigPoint': TrigPoint('Peaks', 'P', 10), 'subjectLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2), 'subjectTrigPoint': TrigPoint('Peaks', 'P', 11), }, { 'queryLandmark': Landmark('AlphaHelix', 'A', 0, 9, 2), 'queryTrigPoint': TrigPoint('Peaks', 'P', 13), 'subjectLandmark': Landmark('AlphaHelix', 'A', 1, 9, 2), 'subjectTrigPoint': TrigPoint('Peaks', 'P', 14), }] } self.assertEqual(expected, result.matches) fp = StringIO() db1.save(fp) fp.seek(0) db2 = Database.restore(fp) result = db2.find(query) self.assertEqual(expected, result.matches)
def testFindNoneMatchingTooSmallDistance(self): """ No matches should be found if the max distance is too small. """ sequence = 'AFRRRFRRRFASAASA' subject = AARead('subject', sequence) query = AARead('query', sequence) dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[Peaks], maxDistance=1) db = Database(dbParams) db.addSubject(subject) result = db.find(query) self.assertEqual({}, result.matches)
def testPrint(self): """ The print_ function should produce the expected output. """ self.maxDiff = None subject = AARead('subject', 'FRRRFRRRFASAASA') dbParams = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand], trigPoints=[Peaks, Troughs], limitPerLandmark=16, maxDistance=10, minDistance=0, distanceBase=1, randomLandmarkDensity=0.6, randomTrigPointDensity=0.4, acAlphaHelixFilename='xxx', acAlphaHelix310Filename='yyy', acAlphaHelixCombinedFilename='bbb', acAlphaHelixPiFilename='zzz', acExtendedStrandFilename='aaa') db = Database(dbParams) db.addSubject(subject) expected = ('Parameters:\n' ' Landmark finders:\n' ' AlphaHelix\n' ' BetaStrand\n' ' Trig point finders:\n' ' Peaks\n' ' Troughs\n' ' Limit per landmark: 16\n' ' Max distance: 10\n' ' Min distance: 0\n' ' Distance base: 1.000000\n' ' Feature length base: 1.350000\n' ' Random landmark density: 0.600000\n' ' Random trig point density: 0.400000\n' ' AC AlphaHelix filename: xxx\n' ' AC AlphaHelix 3-10 filename: yyy\n' ' AC AlphaHelix Combined filename: bbb\n' ' AC AlphaHelix pi filename: zzz\n' ' AC ExtendedStrand filename: aaa\n' 'Connector class: SimpleConnector\n' 'Subject count: 1\n' 'Hash count: 3\n' 'Total residues: 15\n' 'Coverage: 73.33%\n' 'Checksum: 3162290961\n' 'Connector:') self.assertEqual(expected, db.print_())
def testFindBug493(self): """ Failing test case for https://github.com/acorg/light-matter/issues/493 """ query = SSAARead( '2HLA:A', 'GSHSMRYFYTSVSRPGRGEPRFIAVGYVDDTQFVRFDSDAASQRMEPRAPWIEQEGPEYWDR' 'NTRNVKAQSQTDRVDLGTLRGYYNQSEAGSHTIQMMYGCDVGSDGRFLRGYRQDAYDGKDYI' 'ALKEDLRSWTAADMAAQTTKHKWEAAHVAEQWRAYLEGTCVEWLRRYLENGKETLQRTDAPK' 'THMTHHAVSDHEATLRCWALSFYPAEITLTWQRDGEDQTQDTELVETRPAGDGTFQKWVAVV' 'VPSGQEQRYTCHVQHEGLPKPL', '--EEEEEEEEEE--TTSS--EEEEEEEETTEEEEEEETTSTT-S-EE-SHHHHTS-HHHHHH' 'HHHHHHHHHHHHHHHHHHHHHHTT--TTS--EEEEEEEEEE-TTS-EEEEEEEEEETTEEEE' 'EE-TTSS-EEESSHHHHHHHHHHHHTTTHHHHHHHHHTHHHHHHHHHHHHHHHHHT--B--E' 'EEEEEEE-SSSEEEEEEEEEEEBSS-EEEEEEETTEEE-TTEEE---EE-SSS-EEEEEEEE' 'EETT-GGGEEEEEEETTB-S--') subject = SSAARead( '3D2U:A', 'HVLRYGYTGIFDDTSHMTLTVVGIFDGQHFFTYHVQSSDKASSRANGTISWMANVSAAYPTY' 'LDGERAKGDLIFNQTEQNLLELEIALGYRSQSVLTWTHECNTTENGSFVAGYEGFGWDGETL' 'MELKDNLTLWTGPNYEISWLKQQKTYIDGKIKNISEGDTTIQRNYLKGNCTQWSVIYSGFQP' 'PVTHPVVKGGVRNQNDNRAEAFCTSYGFFPGEIQITFIHYGDKVPEDSEPQCNPLLPTLDGT' 'FHQGCYVAIFSNQNYTCRVTHGNWTVEIPISVT', '-EEEEEEEEEESSSS-EEEEEEEEETTEEEEEEEEESS-SSS-EEEE-STHHHHHHHHSTTH' 'HHHHHHHHHHHHHHHHHHHHHHHHHH--SS--EEEEEEEEEE-TT--EEEEEEEEEETTEEE' 'EEE-TTS---B---TTT-GGGGGHHHHHHHHHT--SHHHHHHHHHHHTHHHHHHHHHHHHS-' '--B--EEEEEEEEEETTEEEEEEEEEEEBSS--EEEEEEESS---TT---EE---EE-TTS-' 'EEEEEEEEEETTSEEEEEEE-SS-EEEEEEE--') dbParams = DatabaseParameters(landmarks=[ 'PDB AlphaHelix', 'PDB AlphaHelix_3_10', 'PDB AlphaHelix_pi', 'PDB ExtendedStrand', 'AminoAcidsLm' ], trigPoints=[ 'AminoAcids', 'Peaks', 'Troughs', 'IndividualPeaks', 'IndividualTroughs' ], featureLengthBase=1.01, maxDistance=10000, limitPerLandmark=50, distanceBase=1.1) db = Database(dbParams) _, subjectIndex, _ = db.addSubject(subject) findParams = FindParameters(significanceFraction=0.01) result = db.find(query, findParams, storeFullAnalysis=True) significantBins = result.analysis[subjectIndex]['significantBins'] for binInfo in significantBins: normalizeBin(binInfo['bin'], len(query))
def testSaveRestoreEmpty(self): """ When asked to save and then restore an empty database, the correct database must result. """ dbParams = DatabaseParameters(landmarks=[AlphaHelix], trigPoints=[Peaks]) db = Database(dbParams) fp = StringIO() db.save(fp) fp.seek(0) result = db.restore(fp) self.assertEqual(0, result.subjectCount()) self.assertEqual(0, result.totalCoveredResidues()) self.assertEqual(0, result.totalResidues()) self.assertIs(None, dbParams.compare(result.dbParams))
def testPrintNoHashes(self): """ The print_ function should report the expected result if no hashes are found in the subject. """ self.maxDiff = None subject = AARead('subject', '') dbParams = DatabaseParameters(landmarks=[AlphaHelix, BetaStrand], trigPoints=[Peaks, Troughs], limitPerLandmark=16, maxDistance=10, minDistance=0, distanceBase=1) db = Database(dbParams) db.addSubject(subject) expected = ( 'Parameters:\n' ' Landmark finders:\n' ' AlphaHelix\n' ' BetaStrand\n' ' Trig point finders:\n' ' Peaks\n' ' Troughs\n' ' Limit per landmark: 16\n' ' Max distance: 10\n' ' Min distance: 0\n' ' Distance base: 1.000000\n' ' Feature length base: 1.350000\n' ' Random landmark density: 0.100000\n' ' Random trig point density: 0.100000\n' ' AC AlphaHelix filename: ac-alpha-helix-substrings-20-0.85\n' ' AC AlphaHelix 3-10 filename: ac-alpha-helix-3-10-substrings-' '1-0.5\n' ' AC AlphaHelix Combined filename: ac-alpha-helix-combined-' 'substrings-20-0.85\n' ' AC AlphaHelix pi filename: ac-alpha-helix-pi-substrings-1-0.5\n' ' AC ExtendedStrand filename: ac-extended-strand-substrings-' '10-0.5\n' 'Connector class: SimpleConnector\n' 'Subject count: 1\n' 'Hash count: 0\n' 'Total residues: 0\n' 'Coverage: 0.00%\n' 'Checksum: 3786852290\n' 'Connector:') self.assertEqual(expected, db.print_())