def test_escaped(): html_text = pg.NamedPattern( 'html_text', pg.Many( pg.Words( pg.Words.letters+"</>"))) escaped_text = pg.NamedPattern( 'escaped_text', pg.Escaped(html_text)) data = """<p>Some Text</p>""" expected = [ 'escaped_text', ['html_text', "<p>Some Text</p>"]] result = pg.parse_string(data, escaped_text) assert expected == result html_text = pg.Many( pg.Words( pg.Words.letters+"</>")) escaped_text = pg.NamedPattern( 'escaped_text', pg.Escaped(html_text)) expected = [ 'escaped_text', "<p>Some Text</p>"] result = pg.parse_string(data, escaped_text) assert expected == result
def test_match_insert(): insert_a = pg.Insert("a") expected = ['insert_a', "a"] match, rest = insert_a("", 'insert_a') assert match == expected assert rest == "" joined_lines = pg.AllOf( pg.Ignore("\n"), pg.Words(), pg.Insert(" : "), pg.Ignore("\n"), pg.Words()) data = """ flamble floosit""" expected = [ "joined_lines", 'flamble', " : ", 'floosit'] match, rest = joined_lines(data, 'joined_lines') assert match == expected assert rest == ""
def test_parse_many(): emphasis = pg.NamedPattern( 'emphasis', pg.AllOf( pg.Ignore("*"), pg.Words(), pg.Ignore("*"))) words = pg.NamedPattern( 'words', pg.Words()) phrase = pg.NamedPattern( 'phrase', pg.OneOf( words, emphasis)) body = pg.NamedPattern( 'body', pg.Many(phrase)) expected = ['body', ['phrase', ['words', 'a phrase with ']], ['phrase', ['emphasis', "bold words"]], ['phrase', ['words', " in it"]]] result = pg.parse_string("a phrase with *bold words* in it", body) assert result == expected with py.test.raises(pg.NoPatternFound): result = pg.parse_string("123", body)
def test_parse_one_of(): emphasis = pg.NamedPattern( 'emphasis', pg.AllOf( pg.Ignore("*"), pg.Words(), pg.Ignore("*"))) words = pg.NamedPattern( 'words', pg.Words()) phrase = pg.NamedPattern( 'phrase', pg.OneOf( words, emphasis)) expected = ['phrase', ['emphasis', "bold words"]] result = pg.parse_string("*bold words*", phrase) assert result == expected expected = ['phrase', ['words', "normal words"]] result = pg.parse_string("normal words", phrase) assert result == expected
def test_match_many_specificty(): letter_a = pg.NamedPattern( 'letter_a', "a") letter_b = pg.NamedPattern( 'letter_b', "b") other_letters = pg.NamedPattern( 'other_letters', pg.Words()) match_letters = pg.Many( letter_a, letter_b, other_letters) data = "abac" expected = [ 'match_letters', ['letter_a', "a"], ['letter_b', "b"], ['letter_a', "a"], ['other_letters', "c"]] match, rest = match_letters(data, 'match_letters') assert match == expected assert rest == ""
def test_match_one_of(): asterix = pg.Ignore("*") emphasis = pg.AllOf( asterix, pg.Many(pg.Not("*")), asterix) phrase = pg.OneOf( pg.Words(), emphasis) expected = ['phrase', "b", "o", "l", "d"] match, rest = phrase("*bold*", "phrase") assert match == expected assert rest == "" with py.test.raises(pg.NoPatternFound): match, rest = phrase("123", "phrase") match, rest = phrase("text", "phrase") assert match == ['phrase', "text"] assert rest == "" # Test match with no name match, rest = phrase("text", "") assert match == ['', "text"] assert rest == ""
def test_parse_words(): body = pg.NamedPattern( 'body', pg.Words()) result = pg.parse_string("The confused dog jumped over the fox", body) assert result == ['body', "The confused dog jumped over the fox"]
def test_indented_bullet(): paragraph = pg.NamedPattern( 'paragraph', pg.AllOf( pg.Ignore(pg.Optional("\n")), pg.Words())) indented_paragraphs = pg.Indented( pg.Many(paragraph), initial_indent="* ") data = """ * Paragraph One Paragraph Two """.strip() expected = [ 'indented_paragraphs', ['paragraph', "Paragraph One"], ['paragraph', "Paragraph Two"]] match, rest = indented_paragraphs(data, "indented_paragraphs") assert match == expected
def test_match_indented_nested_bullets(): bullet = pg.NamedPattern( 'bullet', pg.AllOf( pg.Ignore( pg.Optional( pg.Many("\n"))), pg.Ignore("* "), pg.Words())) @pg.lazy def indented_bullets(): return pg.Indented( pg.AllOf( bullet, pg.Optional( indented_bullets)), optional=True) data = """ * Line One * Line Two """ expected = [ 'indented_bullets', ['bullet', "Line One"], ['indented_bullets', ['bullet', "Line Two"]]] match, rest = indented_bullets(data, 'indented_bullets') assert match == expected assert rest == "\n"
def test_with_indent_chars(self): "Test that Indented can match with indents other than whitespace" lines = pg.Many( pg.OneOf( pg.Words(), pg.Text("\n"))) indented_text = pg.Indented( lines, indent_pattern="> ") data = """ > Some text > indented with > non whitespace """.strip() expected = ['indented_text', "Some text", "\n", "indented with", "\n", "non whitespace"] match, rest = indented_text(data, 'indented_text') assert match == expected assert rest == ""
def test_get_current_indentation_initial_indent(): indented_text = pg.Indented(pg.Words(), initial_indent=pg.AllOf("* ")) data = "* foo" expected = (" ", " foo") result = pg._get_current_indentation(data, indented_text) assert expected == result
def test_get_current_indentation_initial_indent_with_tabs(): indented_text = pg.Indented(pg.Words(), initial_indent=pg.AllOf(pg.Ignore("*"), "\t")) data = "*\tfoo" expected = ("\t", "\tfoo") result = pg._get_current_indentation(data, indented_text) assert expected == result
def title_level_2(): return pg.AllOf( pg.Ignore("## "), pg.Words(), pg.Ignore( pg.AllOf( pg.Optional(" "), pg.Optional("##"), "\n")))
def test_match_all_of(): letter_a = pg.NamedPattern( 'letter_a', "a") letter_b = pg.NamedPattern( 'letter_b', "b") word_ab = pg.AllOf(letter_a, letter_b) match, rest = word_ab("ab", "word_ab") assert match == ['word_ab', ['letter_a', "a"], ['letter_b', "b"]] assert rest == "" word_abc = pg.AllOf(letter_a, pg.Words()) expected = ['word_ab', ['letter_a', "a"], "bc"] match, rest = word_abc("abc", "word_ab") assert match == expected assert rest == "" match, rest = word_abc("abc!", "") assert match == ['', ['letter_a', "a"], "bc"] assert rest == "!" emphasis = pg.AllOf( pg.Ignore("*"), pg.Words(), pg.Ignore("*")) match, rest = emphasis("*abc*", "emphasis") assert match == ['emphasis', "abc"] with py.test.raises(pg.NoPatternFound): result = word_ab("cab", "word_ab") ignore_ab = pg.AllOf( pg.Ignore("a"), pg.Ignore("b")) match, rest = ignore_ab("ab", "ignore_ab") assert match == ['ignore_ab', ""] assert rest == ""
def test_parse_ignore(): emphasis = pg.NamedPattern( 'emphasis', pg.AllOf( pg.Ignore("*"), pg.Words(), pg.Ignore("*"))) result = pg.parse_string("*bold words*", emphasis) assert result == ['emphasis', "bold words"]
def test_reprs(): # Pattern matchers assert repr(pg.Some("a")) == "<Some pattern='a'>" assert repr(pg.Ignore("#")) == "<Ignore pattern=<Text pattern='#'>>" assert repr(pg.Not("#")) == "<Not pattern=<Text pattern='#'>>" assert repr(pg.Optional("#")) == "<Optional pattern=<Text pattern='#'>>" # Option matchers assert repr(pg.OneOf("abc", pg.Not("#"))) == "<OneOf options=(<Text pattern='abc'>, <Not pattern=<Text pattern='#'>>)>" assert repr(pg.Many("abc", pg.Not("#"))) == "<Many options=(<Text pattern='abc'>, <Not pattern=<Text pattern='#'>>)>" assert repr(pg.Words()) == "<Words letters='ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz .,'>"
def test_retaining_linebreaks(self): paragraph = (pg.Words()) indented_text = pg.Indented(paragraph) data = " Some text\n" expected = ['indented_text', "Some text"] match, rest = pg.match_indented(data, indented_text, 'indented_text') assert match == expected assert rest == "\n"
def test_indented_with_anonymous_pattern_and_subpattern(self): paragraph = (pg.Words()) indented_text = pg.Indented(paragraph) data = " Some text" expected = [None, "Some text"] match, rest = pg.match_indented(data, indented_text, None) assert match == expected assert rest == ""
def test_reindenting_indented_rest(self): paragraph = (pg.Words()) indented_text = pg.Indented(paragraph) data = " Some text\n Unmatched text\n More unmatched text" expected = ['indented_text', "Some text"] expected_rest = "\n Unmatched text\n More unmatched text" match, rest = pg.match_indented(data, indented_text, 'indented_text') assert match == expected assert rest == expected_rest
def test_match_many_complex(): emphasis = pg.NamedPattern( 'emphasis', pg.AllOf( pg.Ignore("*"), pg.Words(), pg.Ignore("*"))) words = pg.NamedPattern( 'words', pg.Words()) body = pg.Many(emphasis, words) expected = [ 'body', ['words', 'a phrase with '], ['emphasis', "bold words"], ['words', " in it"]] match, rest = body("a phrase with *bold words* in it", 'body') assert match == expected assert rest == ""
def test_match_all_of(): def letter_a(): return "a" def letter_b(): return "b" word_ab = pg.AllOf(letter_a, letter_b) match, rest = pg.match_all_of("ab", word_ab, "word_ab") assert match == ['word_ab', ['letter_a', "a"], ['letter_b', "b"]] assert rest == "" word_abc = pg.AllOf(letter_a, pg.Words()) expected = ['word_ab', ['letter_a', "a"], "bc"] match, rest = pg.match_all_of("abc", word_abc, "word_ab") assert match == expected assert rest == "" match, rest = pg.match_all_of("abc!", word_abc, "") assert match == ['', ['letter_a', "a"], "bc"] assert rest == "!" emphasis = pg.AllOf(lambda: pg.Ignore("*"), lambda: pg.Words(), lambda: pg.Ignore("*")) match, rest = pg.match_all_of("*abc*", emphasis, "emphasis") assert match == ['emphasis', "abc"] with py.test.raises(pg.NoPatternFound): result = pg.match_all_of("cab", word_ab, "word_ab") ignore_ab = pg.AllOf(pg.Ignore("a"), pg.Ignore("b")) match, rest = pg.match_all_of("ab", ignore_ab, "ignore_ab") assert match == ['ignore_ab', ""] assert rest == ""
def test_indented_with_anonymous_subpattern(self): paragraph = ( pg.Words()) indented_text = pg.Indented(paragraph) data = " Some text" expected = [ 'indented_text', "Some text"] match, rest = indented_text(data, 'indented_text') assert match == expected assert rest == ""
def test_match_escaped(): html_text = pg.NamedPattern( 'html_text', pg.Many( pg.Words( pg.Words.letters + "</>"))) escaped_text = pg.Escaped(html_text) data = """<p>Some text</p>""" expected = [ 'escaped_text', ['html_text', "<p>Some text</p>"]] match, rest = escaped_text(data, 'escaped_text') assert match == expected assert rest == ""
def test_indented_with_named_subpattern(self): paragraph = pg.NamedPattern( 'paragraph', pg.Words()) indented_text = pg.Indented(paragraph) data_with_spaces = """ Some text""" data_with_tabs = """\tSome text""" expected = [ 'indented_text', ['paragraph', "Some text"]] for data in [data_with_spaces, data_with_tabs]: match, rest = indented_text(data, 'indented_text') assert match == expected assert rest == ""
def test_match_words(): """Test that Words matches letters and punctuation""" plain = pg.Words() match, rest = plain("some words", 'plain') assert match == ['plain', "some words"] assert rest == "" # Match with no name match, rest = plain("some words", '') assert match == ['', "some words"] assert rest == "" match, rest = plain("some words 123", 'plain') assert match == ['plain', "some words "] assert rest == "123" match, rest = plain("Some words, and punctuation.", 'plain') assert match == ['plain', "Some words, and punctuation."] assert rest == ""
def test_with_optional(self): """Test that optional allows you to match without an indent""" list_item = pg.NamedPattern( 'list_item', pg.AllOf( pg.Ignore("* "), pg.Words())) indented_bullets = pg.Indented( pg.Many(list_item), optional=True) data = """* A bullet""" expected = [ 'indented_bullets', ['list_item', "A bullet"]] match, rest = indented_bullets(data, 'indented_bullets') assert match == expected assert rest == ""
def test_optional(): optional_a = pg.NamedPattern( 'optional_a', pg.Optional("a")) result = pg.parse_string("a", optional_a) expected = ['', "a"] assert expected == result letters = pg.NamedPattern( 'letters', pg.Words()) body = pg.NamedPattern( 'body', pg.AllOf(optional_a, letters)) result = pg.parse_string("abc", body) expected = [ 'body', "a", ['letters', "bc"]] assert expected == result
def html_text(): return pg.Many(pg.Words(pg.Words.letters + "</>"))
def list_item(): return pg.AllOf(pg.Ignore(pg.Optional(pg.Many("\n"))), pg.Ignore("* "), pg.Words())
def letters(): return pg.Words()