示例#1
0
def get_numbers_from_tree(tree):

    res = []
    for line in tree.split('\n'):
        if len(line.strip()) == 0:
            # regexes.append(None)
            continue

        if not (line.strip().startswith('<') and \
                line.strip().endswith('>')):
            text_regex= regex.build_regex(line)
            compiled_regex = None
            if (text_regex not in compiled_regexes.keys()):
                compiled_regex = compiled_regexes[text_regex] = re.compile(text_regex)
            else:
                compiled_regex = compiled_regexes[text_regex]

            match = compiled_regex.match(line)
            if match != None:
                for i in range(len(match.groups())):
                    if i == 0:
                        continue

                    text = match.group(i)
                    if numbers_compiled_regex.match(text) != None:
                        res.append(text)

    return res
示例#2
0
 def test_zero_or_one(self):
     p = regex.build_regex("ba?")
     result = regex.match(p, "ba")
     self.assertTrue(result)
     result = regex.match(p, "b")
     self.assertTrue(result)
     result = regex.match(p, "aa")
     self.assertFalse(result)
示例#3
0
 def test_zero_or_one(self):
     p = regex.build_regex("ba?")
     result = regex.match(p, "ba")
     self.assertTrue(result)
     result = regex.match(p, "b")
     self.assertTrue(result)
     result = regex.match(p, "aa")
     self.assertFalse(result)
示例#4
0
 def test_match_many(self):
     p = regex.build_regex("ab[cde]fg")
     result = regex.match(p, "abcfg")
     self.assertTrue(result)
     result = regex.match(p, "abdfg")
     self.assertTrue(result)
     result = regex.match(p, "abefg")
     self.assertTrue(result)
     result = regex.match(p, "abfg")
     self.assertFalse(result)
示例#5
0
 def test_match_many(self):
     p = regex.build_regex("ab[cde]fg")
     result = regex.match(p, "abcfg")
     self.assertTrue(result)
     result = regex.match(p, "abdfg")
     self.assertTrue(result)
     result = regex.match(p, "abefg")
     self.assertTrue(result)
     result = regex.match(p, "abfg")
     self.assertFalse(result)
示例#6
0
def build_regexes_for_all_unique_trees():
    print('len(html_analysis[unique_trees]):',
          len(html_analysis[unique_trees]))
    for unique_tree in html_analysis[unique_trees]:
        adler_crc = get_adler_crc(unique_tree)
        tree = html_analysis[unique_trees_actual][adler_crc]
        regexes = []
        for line in tree.split('\n'):
            if len(line.strip()) == 0:
                regexes.append(None)
                continue

            if not (line.strip().startswith('<') and \
                    line.strip().endswith('>')):
                regexes.append(regex.build_regex(line.strip().strip('"')))
            else:
                regexes.append(None)
        add_regex_for_tree(unique_tree, regexes)
示例#7
0
 def test_one_or_more_exception(self):
     try:
         p = regex.build_regex("+")
         self.assertTrue(False)
     except SyntaxError:
         self.assertTrue(True)
示例#8
0
 def test_multi_many3(self):
     p = regex.build_regex(".[cd]+.f")
     result = regex.match(p, "cddcdccdchef")
     self.assertFalse(result)
示例#9
0
 def test_one_or_more3(self):
     p = regex.build_regex("a+")
     result = regex.match(p, "b")
     self.assertFalse(result)
示例#10
0
 def test_one_or_more3(self):
     p = regex.build_regex("a+")
     result = regex.match(p, "b")
     self.assertFalse(result)
示例#11
0
 def test_match_any_end(self):
     p = regex.build_regex("ab.")
     result = regex.match(p, "abk")
     self.assertTrue(result)
示例#12
0
 def test_match_zero_or_more_matches_any(self):
     p = regex.build_regex("a.*c")
     result = regex.match(p,
                          "aasdhfjkli ieuxnreu;anjanxeearunjkljadsxnfldjc")
     self.assertTrue(result)
示例#13
0
 def test_zero_or_one_with_many(self):
     p = regex.build_regex("h[abc]?d")
     result = regex.match(p, "had")
     self.assertTrue(result)
示例#14
0
 def test_dont_match_any1(self):
     p = regex.build_regex("ab[^cde]fg")
     result = regex.match(p, "abhfg")
     self.assertTrue(result)
示例#15
0
 def test_dont_match_any1(self):
     p = regex.build_regex("ab[^cde]fg")
     result = regex.match(p, "abhfg")
     self.assertTrue(result)
示例#16
0
 def test_multi_many3(self):
     p = regex.build_regex(".[cd]+.f")
     result = regex.match(p, "cddcdccdchef")
     self.assertFalse(result)
示例#17
0
 def test_dont_match_any(self):
     p = regex.build_regex("ab[^cde]fg")
     result = regex.match(p, "abcdefg")
     self.assertFalse(result)
示例#18
0
 def test_dont_match1(self):
     p = regex.build_regex("ab!cdef")
     result = regex.match(p, "abcdef")
     self.assertFalse(result)
示例#19
0
 def test_dont_match(self):
     p = regex.build_regex("ab!cdef")
     result = regex.match(p, "abqdef")
     self.assertTrue(result)
示例#20
0
 def test_multi_many5(self):
     p = regex.build_regex(".+")
     result = regex.match(p, "aaasdf")
     self.assertTrue(result)
示例#21
0
 def test_single_letter_at_end_of_long_string(self):
     p = regex.build_regex("e")
     result = regex.match(p, "abcde")
     self.assertTrue(result)
示例#22
0
 def test_dont_match1(self):
     p = regex.build_regex("ab!cdef")
     result = regex.match(p, "abcdef")
     self.assertFalse(result)
示例#23
0
 def test_letter_not_in_string(self):
     p = regex.build_regex("e")
     result = regex.match(p, "abcd")
     self.assertFalse(result)
示例#24
0
 def test_multi_many5(self):
     p = regex.build_regex(".+")
     result = regex.match(p, "aaasdf")
     self.assertTrue(result)
示例#25
0
 def test_char_pattern_in_string(self):
     p = regex.build_regex("This")
     result = regex.match(p, "Is This The Real Life")
     self.assertTrue(result)
示例#26
0
 def test_match_zero_or_more_end1(self):
     p = regex.build_regex("a*b*c*")
     result = regex.match(p, "aaaaabbbbbbbc")
     self.assertTrue(result)
示例#27
0
 def test_match_any_char(self):
     p = regex.build_regex(".")
     result = regex.match(p, "a")
     self.assertTrue(result)
示例#28
0
 def test_escaped_chars(self):
     p = regex.build_regex("\*\.")
     result = regex.match(p, "*.")
     self.assertTrue(result)
示例#29
0
 def test_match_any_char_longer(self):
     p = regex.build_regex("a.c")
     result = regex.match(p, "abc")
     self.assertTrue(result)
示例#30
0
 def test_multi_many1(self):
     p = regex.build_regex("[cd]*")
     result = regex.match(p, "cddcdccdce")
     self.assertTrue(result)
示例#31
0
 def test_match_beginning(self):
     p = regex.build_regex(".bc")
     result = regex.match(p, "abc")
     self.assertTrue(result)
示例#32
0
 def test_one_or_more_exception(self):
     try:
         p = regex.build_regex("+")
         self.assertTrue(False)
     except SyntaxError:
         self.assertTrue(True)
示例#33
0
 def test_match_any_end(self):
     p = regex.build_regex("ab.")
     result = regex.match(p, "abk")
     self.assertTrue(result)
示例#34
0
 def test_multi_many1(self):
     p = regex.build_regex("[cd]*")
     result = regex.match(p, "cddcdccdce")
     self.assertTrue(result)
示例#35
0
 def test_single_letter_at_end_of_long_string(self):
     p = regex.build_regex("e")
     result = regex.match(p, "abcde")
     self.assertTrue(result)
示例#36
0
 def test_multi_many4(self):
     p = regex.build_regex(".[cd]+.f")
     result = regex.match(p, "cddcdccdchf")
     self.assertTrue(result)
示例#37
0
 def test_letter_not_in_string(self):
     p = regex.build_regex("e")
     result = regex.match(p, "abcd")
     self.assertFalse(result)
示例#38
0
 def test_dont_match(self):
     p = regex.build_regex("ab!cdef")
     result = regex.match(p, "abqdef")
     self.assertTrue(result)
示例#39
0
 def test_match_any_char(self):
     p = regex.build_regex(".")
     result = regex.match(p, "a")
     self.assertTrue(result)
示例#40
0
 def test_dont_match_any(self):
     p = regex.build_regex("ab[^cde]fg")
     result = regex.match(p, "abcdefg")
     self.assertFalse(result)
示例#41
0
 def test_match_beginning(self):
     p = regex.build_regex(".bc")
     result = regex.match(p, "abc")
     self.assertTrue(result)
示例#42
0
 def test_single_letter(self):
     p = regex.build_regex("a")
     result = regex.match(p, "a")
     self.assertTrue(result)
示例#43
0
 def test_multi_many4(self):
     p = regex.build_regex(".[cd]+.f")
     result = regex.match(p, "cddcdccdchf")
     self.assertTrue(result)
示例#44
0
 def test_zero_or_one_with_many(self):
     p = regex.build_regex("h[abc]?d")
     result = regex.match(p, "had")
     self.assertTrue(result)
示例#45
0
 def test_match_zero_or_more_begining(self):
     p = regex.build_regex("a*b*c")
     result = regex.match(p, "bbbbbbbc")
     self.assertTrue(result)
示例#46
0
 def test_char_pattern_in_string(self):
     p = regex.build_regex("This")
     result = regex.match(p, "Is This The Real Life")
     self.assertTrue(result)
示例#47
0
 def test_match_zero_or_more_end1(self):
     p = regex.build_regex("a*b*c*")
     result = regex.match(p, "aaaaabbbbbbbc")
     self.assertTrue(result)
示例#48
0
 def test_match_any_char_longer(self):
     p = regex.build_regex("a.c")
     result = regex.match(p, "abc")
     self.assertTrue(result)
示例#49
0
 def test_match_zero_or_more_matches_empty(self):
     p = regex.build_regex("a*b*c*")
     result = regex.match(p, "")
     self.assertTrue(result)
示例#50
0
 def test_single_letter(self):
     p = regex.build_regex("a")
     result = regex.match(p, "a")
     self.assertTrue(result)
示例#51
0
 def test_match_zero_or_more_matches_any(self):
     p = regex.build_regex("a.*c")
     result = regex.match(p, "aasdhfjkli ieuxnreu;anjanxeearunjkljadsxnfldjc")
     self.assertTrue(result)
示例#52
0
 def test_match_zero_or_more_begining(self):
     p = regex.build_regex("a*b*c")
     result = regex.match(p, "bbbbbbbc")
     self.assertTrue(result)
示例#53
0
 def test_match_complicated(self):
     p = regex.build_regex("a .og was* wa*lking down .h. stre*t")
     result = regex.match(p, "a dog was walking down the street")
     self.assertTrue(result)
示例#54
0
 def test_match_zero_or_more_matches_empty(self):
     p = regex.build_regex("a*b*c*")
     result = regex.match(p, "")
     self.assertTrue(result)
示例#55
0
 def test_escaped_chars(self):
     p = regex.build_regex("\*\.")
     result = regex.match(p, "*.")
     self.assertTrue(result)
示例#56
0
 def test_match_complicated(self):
     p = regex.build_regex("a .og was* wa*lking down .h. stre*t")
     result = regex.match(p, "a dog was walking down the street")
     self.assertTrue(result)
示例#57
0
 def test_exception(self):
     try:
         p = regex.build_regex("**.")
         self.assertTrue(False)
     except SyntaxError:
         self.assertTrue(True)
示例#58
0
 def test_exception(self):
     try:
         p = regex.build_regex("**.")
         self.assertTrue(False)
     except SyntaxError:
         self.assertTrue(True)
示例#59
0
 def test_one_or_more2(self):
     p = regex.build_regex("a+")
     result = regex.match(p, "bab")
     self.assertTrue(result)