def test_type(self): """VectorFromMatches should return correct type of vector""" v = VectorFromMatches('a', 'a') self.assertEqual(v, Bitvector('1')) self.assertTrue(isinstance(v, ImmutableBitvector)) v = VectorFromMatches('a', 'a', constructor=MutableBitvector) self.assertEqual(v, Bitvector('1')) v[0] = 0 self.assertEqual(v, Bitvector('0')) self.assertTrue(isinstance(v, MutableBitvector))
def testSingleBasePattern(self): """VectorFromMatches should match every matching char in string""" sequences = ['', 'a', 'b', 'aaa', 'bbb', 'aba', 'bab'] a_matches = ['', '1', '0', '111', '000', '101', '010'] b_matches = ['', '0', '1', '000', '111', '010', '101'] for i, s in enumerate(sequences): vec = VectorFromMatches(s, 'a') self.assertEqual(str(vec), a_matches[i]) vec = VectorFromMatches(s, 'b') self.assertEqual(str(vec), b_matches[i])
def testMultiBasePattern(self): """VectorFromMatches should match multi-char string matches""" pattern = 'aba' sequences = [ '', 'a', 'aba', 'abab', 'ababa', 'ababab', 'abaaba', 'aaba' ] overlap = ['', '0', '111', '1110', '11111', '111110', '111111', '0111'] discrete = [ '', '0', '111', '1110', '11100', '111000', '111111', '0111' ] for i, s in enumerate(sequences): vec = VectorFromMatches(s, pattern) self.assertEqual(str(vec), overlap[i]) vec = VectorFromMatches(s, pattern, 1) self.assertEqual(str(vec), overlap[i]) vec = VectorFromMatches(s, pattern, 0) self.assertEqual(str(vec), discrete[i])
def testSingleBaseRegex(self): """VectorFromMatches should match every matching character in regex""" sequences = ['', 'a', 'b', 'aaa', 'bbb', 'aba', 'bab', 'axb', 'xxx'] a_matches = ['', '1', '0', '111', '000', '101', '010', '100', '000'] b_matches = ['', '0', '1', '000', '111', '010', '101', '001', '000'] ab_matches = ['', '1', '1', '111', '111', '111', '111', '101', '000'] a = re.compile('a') b = re.compile('b') ab = re.compile('a|b') for i, s in enumerate(sequences): #test that a works as regex or list vec = VectorFromMatches(s, a) self.assertEqual(str(vec), a_matches[i]) vec = VectorFromMatches(s, ['a']) self.assertEqual(str(vec), a_matches[i]) #test that b works as regex or list vec = VectorFromMatches(s, b) self.assertEqual(str(vec), b_matches[i]) vec = VectorFromMatches(s, ['b']) self.assertEqual(str(vec), b_matches[i]) #test that [a or b] works as regex or list vec = VectorFromMatches(s, ab) self.assertEqual(str(vec), ab_matches[i]) vec = VectorFromMatches(s, ['a', 'b']) self.assertEqual(str(vec), ab_matches[i])
def testMultiBaseRegex(self): """VectorFromMatches should match every matching combination of chars""" sequences = ['aaabbb', 'aaaxbbb', 'ababab', 'abaabaabab'] patterns = ['aaa', 'bbb', 'aba', 'aaa|bbb', 'aaa|aab'] overlap = { 'aaa': [ '111000', #aaabbb '1110000', #aaaxbbb '000000', #ababab '0000000000', #abaabaabab ], 'bbb': [ '000111', #aaabbb '0000111', #aaaxbbb '000000', #ababab '0000000000', #abaabaabab ], 'aba': [ '000000', #aaabbb '0000000', #aaaxbbb '111110', #ababab '1111111110', #abaabaabab ], 'aaa|bbb': [ '111111', #aaabbb '1110111', #aaaxbbb '000000', #ababab '0000000000', #abaabaabab ], 'aaa|aab': [ '111100', #aaabbb '1110000', #aaaxbbb '000000', #ababab '0011111100', #abaabaabab ] } no_overlap = { 'aaa': [ '111000', #aaabbb '1110000', #aaaxbbb '000000', #ababab '0000000000', #abaabaabab ], 'bbb': [ '000111', #aaabbb '0000111', #aaaxbbb '000000', #ababab '0000000000', #abaabaabab ], 'aba': [ '000000', #aaabbb '0000000', #aaaxbbb '111000', #ababab '1111111110', #abaabaabab ], 'aaa|bbb': [ '111111', #aaabbb '1110111', #aaaxbbb '000000', #ababab '0000000000', #abaabaabab ], 'aaa|aab': [ '111000', #aaabbb '1110000', #aaaxbbb '000000', #ababab '0011111100', #abaabaabab ], } for i, s in enumerate(sequences): for pat in patterns: regex = re.compile(pat) vec = VectorFromMatches(s, regex, 1) #overlapping self.assertEqual(str(vec), overlap[pat][i]) vec = VectorFromMatches(s, regex, 0) #non-overlapping self.assertEqual(str(vec), no_overlap[pat][i])
def testEmptyPattern(self): """VectorFromMatches empty pattern should return zeroes for len(string)""" sequences = ['', 'a', 'aa', 'aaaaaaaaaa'] for s in sequences: vec = VectorFromMatches(s, '') self.assertEqual(str(vec), '0' * len(s))
def testBothEmpty(self): """VectorFromMatches empty string/pattern should return empty vector""" self.assertEqual(str(VectorFromMatches('', '')), '')