def setUp(self): """ Initalize some objects for use in the tests """ self.me = BaselineMutationExtractor()
class BaselineMutationExtractorTests(TestCase): """ Tests of the BaselineMutationExtractor class """ _single_letter_aa_codes = [aa[0] for aa in amino_acid_three_to_one_letter_map] _triple_letter_aa_codes = [aa[1] for aa in amino_acid_three_to_one_letter_map] def setUp(self): """ Initalize some objects for use in the tests """ self.me = BaselineMutationExtractor() def test_init(self): """BME: __init__ returns without error """ me = BaselineMutationExtractor() def test_call_no_mutations(self): """BME: extraction functions with no extraction-worthy data """ self.assertEqual(self.me(''),{}) self.assertEqual(self.me('There is no mutation data here.'),{}) self.assertEqual(self.me('T64 is almost a valid mutation.'),{}) self.assertEqual(self.me('So is 42S.'),{}) def test_call_single_mutation(self): """BME:extraction functions when one mutation is present """ expected = {PointMutation(42,'S','T'):1} self.assertEqual(self.me('S42T'),expected) self.assertEqual(self.me('The S42T mutation was made.'),expected) def test_call_boundaries_required(self): """BME:match boundaries are recognized """ expected = {PointMutation(42,'S','T'):1} self.assertEqual(self.me('S42T'),expected) self.assertEqual(self.me('S42Test'),{}) self.assertEqual(self.me('S42-Test mutation was made.'),{}) self.assertEqual(self.me('gfS42T'),{}) self.assertEqual(self.me('S42Thr'),{}) def test_call_punc_ignored(self): """BME:puncuation ignored in mutation words """ expected = {PointMutation(42,'S','T'):1} # internal punctuation self.assertEqual(self.me('S42-T'),expected) # leading punctuation self.assertEqual(self.me('?S42T'),expected) # training punctuation self.assertEqual(self.me('S42T?'),expected) # all punctuation marks self.assertEqual(self.me('!@#$%^&*()~`"\';:.,><?/{}[]\|+=-_S42T'),\ expected) def test_call_multiple_mutations(self): """BME:extraction functions when more than one mutation is present """ expected = {PointMutation(42,'S','T'):1,PointMutation(36,'W','Y'):1} self.assertEqual(self.me('S42T and W36Y'),expected) self.assertEqual(self.me('S42T W36Y'),expected) def test_call_count(self): """BME:counting of mentions works """ expected = {PointMutation(42,'S','T'):1,PointMutation(36,'W','Y'):1} self.assertEqual(self.me('S42T and W36Y'),expected) expected = {PointMutation(42,'S','T'):1,PointMutation(36,'W','Y'):2} self.assertEqual(self.me('S42T, W36Y, and W36Y'),expected) expected = {PointMutation(42,'S','T'):1,PointMutation(36,'W','Y'):3} self.assertEqual(self.me('S42T, W36Y, Trp36Tyr, and W36Y'),expected) def test_call_three_to_one_letter_map(self): """BME:identical Mutation objects created for varied matches""" expected = {PointMutation(42,'A','G'):1} self.assertEqual(self.me('The A42G mutation was made.'),expected) self.assertEqual(self.me('The Ala42Gly mutation was made.'),expected) self.assertEqual(self.me('The A42 to glycine mutation was made.'),\ expected) def test_regex_case_sensitive(self): """BME:regex case sensitive functions as expected""" # one-letter abbreviations must be uppercase self.assertEqual(self.me._word_regexs[0].match('a64t'),None) self.assertEqual(self.me._word_regexs[0].match('A64t'),None) self.assertEqual(self.me._word_regexs[0].match('a64T'),None) self.assertEqual(self.me._word_regexs[0].match('A64T').group(),'A64T') # three-letter abbreviations must be titlecase self.assertEqual(self.me._word_regexs[1].match('ala64gly'),None) self.assertEqual(self.me._word_regexs[1].match('ALA64GLY'),None) self.assertEqual(self.me._word_regexs[1].match('aLa64gLy'),None) self.assertEqual(self.me._word_regexs[1].match('Ala64Gly').group(),\ 'Ala64Gly') # full names must be lowercase or titlecase self.assertEqual(self.me._string_regexs[3].match(\ 'Ala64 to glycine').group(),'Ala64 to glycine') self.assertEqual(self.me._string_regexs[3].match(\ 'Ala64 to Glycine').group(),'Ala64 to Glycine') self.assertEqual(self.me._string_regexs[3].match(\ 'Ala64 to GLYCINE'),None) self.assertEqual(self.me._string_regexs[3].match(\ 'Ala64 to glYcine'),None) def test_one_letter_match(self): """BME:regex identifies one-letter codes""" self.assertEqual(self.me._word_regexs[0].match('A64G').group(),'A64G') def test_three_letter_match(self): """BME:regex identifies three-letter codes""" self.assertEqual(self.me._word_regexs[1].match('Ala64Gly').group(),'Ala64Gly') def test_varied_digit_length(self): """BME:regex identifies mutations w/ different location lengths""" self.assertEqual(self.me._word_regexs[0].match('A4G').group(),'A4G') self.assertEqual(self.me._word_regexs[0].match('A64G').group(),'A64G') self.assertEqual(self.me._word_regexs[0].match('A864G').group(),'A864G') self.assertEqual(self.me._word_regexs[0].match('A8864G').group(),'A8864G') def test_word_boundary_requirement(self): """BME:regex requries word boundaries surrounding mutation""" for i in range(len(self.me._word_regexs)): self.assertEqual(self.me._word_regexs[i].match('TheAla64Glymut'),None) self.assertEqual(self.me._word_regexs[i].match('Ala64Gly/p53634'),None) def test_mix_one_three_letter_match(self): """BME:regex ignores one/three letter code mixes""" for i in range(len(self.me._word_regexs)): self.assertEqual(self.me._word_regexs[i].match('Ala64G'),None) self.assertEqual(self.me._word_regexs[i].match('A64Gly'),None) def test_preprocess_words(self): """BME:word-level preprocessing functions as expected""" r = "this is a t64g mutation." expected = ['this','is','a','t64g','mutation'] self.assertEqual(self.me._preprocess_words(r),expected) r = "this is ! t64g mutation." expected = ['this','is','','t64g','mutation'] self.assertEqual(self.me._preprocess_words(r),expected) r = "" expected = [] self.assertEqual(self.me._preprocess_words(r),expected) def test_preprocess_sentences(self): """BME:sentence-level preprocessing functions as expected""" r = "This is a test. The T65->Y mutation" expected = ['This is a test','The T65Y mutation'] self.assertEqual(self.me._preprocess_sentences(r),expected) def test_replace_regex(self): """BME: replace regex functions as expected""" self.assertEqual(self.me._replace_regex.sub('',''),'') self.assertEqual(self.me._replace_regex.sub('','a46t'),'a46t') self.assertEqual(self.me._replace_regex.sub('','a46->t'),'a46t') self.assertEqual(self.me._replace_regex.sub('','A234-T'),'A234T') self.assertEqual(self.me._replace_regex.sub('','A(234)T'),'A234T') self.assertEqual(self.me._replace_regex.sub(\ '','The Gly64->Thr mutation.'),'The Gly64Thr mutation') def test_ten_word_match(self): """BME: ten-word pattern functions as expected """ expected = {PointMutation(42,'S','A'):1} self.assertEqual(self.me('Ser42 was mutated to Ala'),expected) self.assertEqual(self.me('S42 was mutated to Ala'),expected) self.assertEqual(self.me('Ser42 was mutated to alanine'),expected) self.assertEqual(self.me('the S42 was mutated to alanine'),expected) self.assertEqual(self.me('S42 was mutated to alanine'),expected) # Tenth word is alanine, so it's a match self.assertEqual(self.me('S42 a a a a a a a a a alanine'),expected) # Eleventh word is alanine, so no match self.assertEqual(self.me('S42 a a a a a a a a a a alanine'),{})