def test_escaped_space_parsing(self): r = Regex.get_parse_tree(r'\ a') l = list(find_all(r))[1:] # skip root print('\n'.join(fmttree(r))) self.assertEqual(2, len(l)) self.assertEqual(r'\ ', l[0].data) self.assertEqual(Other.Suspicious, l[0].type)
def test_charclass_parsing(self): r = Regex.get_parse_tree(r'[ a]') l = list(find_all(r))[1:] # skip root print('\n'.join(fmttree(r))) self.assertEqual(3, len(l)) self.assertEqual(r' ', l[1].data) self.assertEqual(r'a', l[2].data)
def test_toknum_too_many(self): r = Regex.get_parse_tree("((a)b)") errs = [] bygroups_check_toknum(r, errs, (Text, )) print(errs) self.assertEqual(len(errs), 1) self.assertEqual(("107", logging.INFO, 0), errs[0][:3])
def test_charclass(pat): r = Regex().get_parse_tree(pat) regexlint_version = r.children[0].matching_character_codes sre_parsed = sre_parse.parse(pat) print(sre_parsed) if isinstance(sre_parsed[0][1], int): sre_chars = sre_parsed else: sre_chars = sre_parsed[0][1] print('inner', sre_chars) golden = list(expand_sre_in(sre_chars)) order_matters = True try: if (sre_parsed[0][0] == sre_constants.NOT_LITERAL or sre_parsed[0][1][0][0] == sre_constants.NEGATE): golden = [i for i in range(256) if i not in golden] order_matters = False except TypeError: pass print('sre_parse', golden) print('regexlint', regexlint_version) if order_matters: assert golden == regexlint_version else: print('extra:', sorted(set(regexlint_version) - set(golden))) print('missing:', sorted(set(golden) - set(regexlint_version))) assert sorted(golden) == sorted(regexlint_version)
def test_out_of_order_alternation_with_anchor_after(self): r = Regex.get_parse_tree(r"(a|ab)\b") print("\n".join(fmttree(r))) errs = [] check_prefix_ordering(r, errs) print(errs) self.assertEqual(len(errs), 0)
def test_capture_group_in_repetition(self): r = Regex.get_parse_tree(r"(a)+((b)|c)*") print("\n".join(fmttree(r))) errs = [] bygroups_check_no_capture_group_in_repetition(r, errs, (Text, Text)) print(errs) self.assertEqual(len(errs), 3)
def test_empty_alternation_in_root(self): # special case because linenum is bogus on root. r = Regex.get_parse_tree(r"a|") print("\n".join(fmttree(r))) errs = [] check_no_empty_alternations(r, errs) self.assertEqual(len(errs), 1)
def test_unnecessary_m_flag(self): r = Regex.get_parse_tree(r"(?m).") errs = [] check_bad_flags(r, errs) print(errs) self.assertEqual(len(errs), 1) self.assertEqual(("113", logging.WARNING, 0), errs[0][:3])
def test_consecutive_dots(self): r = Regex.get_parse_tree("a...") errs = [] check_no_consecutive_dots(r, errs) print(errs) self.assertEqual(len(errs), 1) self.assertEqual(("111", logging.WARNING, 1), errs[0][:3])
def test_single_entry_optional_charclass(self): r = Regex.get_parse_tree(r"0[0]?") errs = [] check_charclass_len(r, errs) print(errs) self.assertEqual(len(errs), 1) self.assertEqual(logging.INFO, errs[0][1])
def test_single_entry_charclass(self): r = Regex.get_parse_tree(r"[0]") errs = [] check_charclass_len(r, errs) print(errs) self.assertEqual(len(errs), 1) self.assertEqual(logging.WARNING, errs[0][1])
def test_manual_empty_string_when_pop(self): # default() is handled in cmdline.py r = Regex.get_parse_tree(r"") errs = [] manual_check_for_empty_string_match(r, errs, (r"", Token, "#pop")) print(errs) self.assertEqual(len(errs), 1)
def test_charclass_simplify_insensitive3(self): r = Regex.get_parse_tree(r"[eE]", re.I | re.A) errs = [] check_charclass_simplify(r, errs) print(errs) self.assertEqual(len(errs), 1) self.assertTrue("-> e" in errs[0][3], errs[0][3])
def test_manual_zerowidth_match(self): # This one shouldn't produce an error. r = Regex.get_parse_tree(r"$\b") errs = [] manual_check_for_empty_string_match(r, errs, (r"$\b", Token, "#pop")) print(errs) self.assertEqual(len(errs), 0)
def test_charclass_parsing(self): r = Regex.get_parse_tree(r'[ a]') l = list(find_all(r))[1:] # skip root print '\n'.join(fmttree(r)) self.assertEquals(3, len(l)) self.assertEquals(r' ', l[1].data) self.assertEquals(r'a', l[2].data)
def test_escaped_space_parsing(self): r = Regex.get_parse_tree(r'\ a') l = list(find_all(r))[1:] # skip root print '\n'.join(fmttree(r)) self.assertEquals(2, len(l)) self.assertEquals(r'\ ', l[0].data) self.assertEquals(Other.Suspicious, l[0].type)
def test_out_of_order_crazy_complicated(self): r = Regex.get_parse_tree(r"""(!=|#|&&|&|\(|\)|\*|\+|,|-|-\.)""") # |->|\.|\.\.|::|:=|:>|:|;;|;|<|<-|=|>|>]|>}|\?|\?\?|\[|\[<|\[>|\[\||]|_|`|{|{<|\||\|]|}|~)''') print("\n".join(fmttree(r))) errs = [] check_prefix_ordering(r, errs) self.assertEqual(len(errs), 1)
def test_charclass_parsing(self): r = Regex.get_parse_tree(r"[ a]") l = list(find_all(r))[1:] # skip root print("\n".join(fmttree(r))) self.assertEqual(3, len(l)) self.assertEqual(r" ", l[1].data) self.assertEqual(r"a", l[2].data)
def test_bygroups_check_overlap_but_none_for_token(self): r = Regex.get_parse_tree(r"(<(%)?)(\w+)((?(2)%)>)") print("\n".join(fmttree(r))) errs = [] bygroups_check_overlap(r, errs, (Punctuation, None, Name, Punctuation)) print(errs) self.assertEqual(len(errs), 0)
def test_bygroups_check_overlap_descending2(self): r = Regex.get_parse_tree(r"(?:^|xx)(foo)") print("\n".join(fmttree(r))) errs = [] bygroups_check_overlap(r, errs, (Text, )) print(errs) self.assertEqual(len(errs), 1)
def test_bygroups_check_overlap_lookaround_ok(self): r = Regex.get_parse_tree(r"(?<!\.)(Class|Structure|Enum)(\s+)") print("\n".join(fmttree(r))) errs = [] bygroups_check_overlap(r, errs, (Text, Text)) print(errs) self.assertEqual(len(errs), 0)
def test_toknum_too_few(self): r = Regex.get_parse_tree("(a)") errs = [] bygroups_check_toknum(r, errs, (Text, Text)) print(errs) self.assertEqual(len(errs), 1) self.assertEqual(("107", logging.ERROR, 0), errs[0][:3])
def test_bygroups_check_overlap_fail2(self): r = Regex.get_parse_tree(r"\b(a)$") print("\n".join(fmttree(r))) errs = [] bygroups_check_overlap(r, errs, (Text, )) print(errs) self.assertEqual(len(errs), 0)
def test_out_of_order_alternation_location(self): r = Regex.get_parse_tree(r"(foo|bar|@|@@)") print("\n".join(fmttree(r))) errs = [] check_prefix_ordering(r, errs) self.assertEqual(len(errs), 1) # location of the second one. self.assertEqual(errs[0][2], 11)
def test_bygroups_check_overlap_descending_with_capture_and_gap(self): r = Regex.get_parse_tree( r"(?:([A-Za-z_][A-Za-z0-9_]*)x(\.))?([A-Za-z_][A-Za-z0-9_]*)") print("\n".join(fmttree(r))) errs = [] bygroups_check_overlap(r, errs, (Text, Text, Text)) print(errs) self.assertEqual(len(errs), 1)
def test_good_unicode_charclass(self): r = Regex.get_parse_tree(u"[\u1000-\uffff]") print("\n".join(fmttree(r))) print(r.children[0].chars) errs = [] check_charclass_homogeneous_ranges(r, errs) print(errs) self.assertEqual(len(errs), 0)
def test_bad_charclass3(self): r = Regex.get_parse_tree(r"[\010-\020]") print("\n".join(fmttree(r))) print(r.children[0].chars) errs = [] check_charclass_homogeneous_ranges(r, errs) print(errs) self.assertEqual(len(errs), 1)
def test_no_capture_group_in_repetition(self): # '?' is special-cased as being an okay repetition. r = Regex.get_parse_tree(r"(a)?(b)") print("\n".join(fmttree(r))) errs = [] bygroups_check_no_capture_group_in_repetition(r, errs, (Text, Text)) print(errs) self.assertEqual(len(errs), 0)
def test_repetition_curly2(self): r = Regex.get_parse_tree(r'x{2,5}') l = list(find_all(r))[1:] # skip root self.assertEquals(2, len(l)) # l[0] is Repetition, l[1] is Literal(x) self.assertEquals(2, l[0].min) self.assertEquals(5, l[0].max) self.assertEquals(True, l[0].greedy)
def test_bygroups_check_overlap_nested_length2(self): r = Regex.get_parse_tree(r"\b(a)((b)c)$") print("\n".join(fmttree(r))) errs = [] bygroups_check_overlap(r, errs, (Text, Text, Text)) print(errs) self.assertEqual(len(errs), 1) self.assertEqual(errs[0][1], logging.ERROR)
def test_repetition_curly2(self): r = Regex.get_parse_tree(r'x{2,5}') l = list(find_all(r))[1:] # skip root self.assertEqual(2, len(l)) # l[0] is Repetition, l[1] is Literal(x) self.assertEqual(2, l[0].min) self.assertEqual(5, l[0].max) self.assertEqual(True, l[0].greedy)
def test_repetition_plus(self): r = Regex.get_parse_tree(r'x+') l = list(find_all(r))[1:] # skip root self.assertEqual(2, len(l)) # l[0] is Repetition, l[1] is Literal(x) self.assertEqual(1, l[0].min) self.assertEqual(None, l[0].max) self.assertEqual(True, l[0].greedy)
def test_single_entry_charclass_ok2(self): r = Regex.get_parse_tree(r"[#]", re.VERBOSE) errs = [] check_charclass_len(r, errs) print(errs) self.assertEqual(len(errs), 1) self.assertEqual(logging.WARNING, errs[0][1]) self.assertTrue("backslash" in errs[0][3])
def test_charclass_simplify_suggest_range(self): # Need to use ASCII mode to enable this checker. r = Regex.get_parse_tree(r"[01acb234]", re.A) errs = [] check_charclass_simplify(r, errs) print(errs) self.assertEqual(len(errs), 1) self.assertTrue("0-4a-c" in errs[0][3], errs[0][3])
def test_repetition_curly1(self): r = Regex.get_parse_tree(r'x{5,5}?') print '\n'.join(fmttree(r)) l = list(find_all(r))[1:] # skip root self.assertEquals(2, len(l)) # l[0] is Repetition, l[1] is Literal(x) self.assertEquals(5, l[0].min) self.assertEquals(5, l[0].max) self.assertEquals(False, l[0].greedy)
def test_basic_verbose_parsing(self): r = Regex.get_parse_tree(r'''(?x) a b # comment c d''') l = list(find_all(r))[1:] # skip root print '\n'.join(fmttree(r)) self.assertEquals(5, len(l)) self.assertEquals((4, 6), (l[1].parsed_start, l[1].start)) self.assertEquals('d', l[-1].data) self.assertEquals((7, 72), (l[-1].parsed_start, l[-1].start))
def test_end_set_correctly(self): r = Regex.get_parse_tree(r'\b(foo|bar)\b') self.assertEquals(0, r.start) capture = r.children[1] foo = capture.children[0].children[0] self.assertEquals(3, foo.start) self.assertEquals(6, foo.end) bar = capture.children[0].children[1] self.assertEquals(7, bar.start) self.assertEquals(10, bar.end) self.assertEquals(13, r.end)
def test_width(self): r = Regex.get_parse_tree(r'\s(?#foo)\b') l = list(find_all(r))[1:] # skip root self.assertEquals([True, False, False], [width(i.type) for i in l])
def test_complex_charclass(Self): r = Regex.get_parse_tree(r'[]\[:_@\".{}()|;,]') l = list(find_all(r))[1:] # skip root print '\n'.join(fmttree(r))
def reconstruct_runner(pat): r = Regex.get_parse_tree(pat) rec = r.reconstruct() assert pat == rec
def do_it(self, s): # for debugging for x in Regex().get_tokens_unprocessed(s): print x r = Regex.get_parse_tree(s) return r
def test_find_by_type(self): golden = [Node(t=Other.Directive, start=0, parsed_start=0, data='(?mi)')] r = Regex.get_parse_tree(r'(?mi)') self.assertEquals(golden, list(find_all_by_type(r, Other.Directive)))
def test_find_all_by_type(self): r = Regex.get_parse_tree(r'(?m)(?i)') directives = list(find_all_by_type(r, Other.Directive)) self.assertEquals(2, len(directives)) self.assertEquals('(?m)', directives[0].data) self.assertEquals('(?i)', directives[1].data)
def test_char_range(self): r = Regex.get_parse_tree(r'[a-z]') self.assertEquals(1, len(find_all_by_type(r, Other.CharClass).next().chars))
def test_comment(self): r = Regex.get_parse_tree(r'(?#foo)') l = list(find_all_by_type(r, Other.Comment)) self.assertEquals(1, len(l)) self.assertEquals('(?#foo)', l[0].data)