Пример #1
0
def test_glr_recovery_custom_new_position():
    """
    Test that custom recovery that increment position works.
    """
    def custom_recovery(head, error):
        # This recovery will just skip over erroneous part of input '& 89'.
        head.position += 4
        return head.parser.default_error_recovery(head)

    parser = GLRParser(g, actions=actions, error_recovery=custom_recovery)

    results = parser.parse('1 + 5 & 89 - 2')

    assert len(parser.errors) == 1
    assert len(results) == 2
    result_set = set([parser.call_actions(tree) for tree in results])
    assert len(result_set) == 1
    # Calculated result should be '1 + 5 - 2'
    assert result_set.pop() == 4
def test_glr_recovery_custom_new_position():
    """
    Test that custom recovery that increment position works.
    """

    def custom_recovery(context, error):
        # This recovery will just skip over erroneous part of input '& 89'.
        return None, context.position + 4

    parser = GLRParser(g, actions=actions, error_recovery=custom_recovery,
                       debug=True)

    results = parser.parse('1 + 5 & 89 - 2')

    assert len(parser.errors) == 1
    assert len(results) == 2
    assert len(set(results)) == 1
    # Calculate results should be '1 + 5 - 2'
    assert results[0] == 4
Пример #3
0
def test_unbounded_ambiguity():
    """
    This grammar has unbounded ambiguity.

    Grammar G6 from: Nozohoor-Farshi, Rahman: "GLR Parsing for ε-Grammers"
    """
    grammar = """
    S: M N;
    M: A M "b" | "x";
    N: "b" N A | "x";
    A: EMPTY;
    """

    g = Grammar.from_string(grammar)

    p = GLRParser(g)
    results = p.parse("xbbbbx")

    assert len(results) == 5
Пример #4
0
def test_right_nullable():
    """
    Grammar Γ2 (pp.17) from:
    Scott, E. and Johnstone, A., 2006. Right nulled GLR parsers. ACM
    Transactions on Programming Languages and Systems (TOPLAS), 28(4),
    pp.577-618.

    """
    grammar = """
    S: "a" S A | EMPTY;
    A: EMPTY;
    """

    g = Grammar.from_string(grammar)

    p = GLRParser(g)
    results = p.parse("aa")

    assert len(results) == 1
Пример #5
0
def test_reduce_enough_many_empty():
    """
    This is an extension of the previous grammar where parser must reduce
    enough A B pairs to succeed.

    The language is the same: xb^n, n>=0
    """
    grammar = """
    S: A B S "b";
    S: "x";
    A: EMPTY;
    B: EMPTY;
    """
    g = Grammar.from_string(grammar)

    p = GLRParser(g)
    results = p.parse("xbbb")

    assert len(results) == 1
Пример #6
0
def test_cyclic_grammar_1():
    """
    Grammar G1 from the paper: "GLR Parsing for e-Grammers" by Rahman Nozohoor-Farshi
    """
    grammar = """
    S: A;
    A: S;
    A: 'x';
    """
    g = Grammar.from_string(grammar)
    with pytest.raises(SRConflicts):
        Parser(g, prefer_shifts=False)

    p = GLRParser(g)
    results = p.parse('x')

    # This grammar builds infinite/looping tree
    # x -> A -> S -> A -> S...
    with pytest.raises(LoopError):
        len(results)
Пример #7
0
def test_lr2_grammar():

    grammar = r"""
    Model: Prods EOF;
    Prods: Prod | Prods Prod;
    Prod: ID "=" ProdRefs;
    ProdRefs: ID | ProdRefs ID;

    terminals
    ID: /\w+/;
    """

    input_str = """
    First = One Two three
    Second = Foo Bar
    Third = Baz
    """

    g = Grammar.from_string(grammar)

    # This grammar is not LR(1) as it requires
    # at least two tokens of lookahead to decide
    # what to do on each ID from the right side.
    # If '=' is after ID than it should reduce "Prod"
    # else it should reduce ID as ProdRefs.
    with pytest.raises(SRConflicts):
        Parser(g, prefer_shifts=False)

    # prefer_shifts strategy (the default)
    # will remove conflicts but the resulting parser
    # will fail to parse any input as it will consume
    # greadily next rule ID as the body element of the previous Prod rule.
    parser = Parser(g)
    with pytest.raises(ParseError):
        parser.parse(input_str)

    # But it can be parsed unambiguously by GLR.
    p = GLRParser(g)

    results = p.parse(input_str)
    assert len(results) == 1
Пример #8
0
def todo_test_cyclic_grammar_2():
    """
    From the paper: "GLR Parsing for e-Grammers" by Rahman Nozohoor-Farshi

    """
    grammar = """
    S: S S;
    S: 'x';
    S: EMPTY;
    """
    g = Grammar.from_string(grammar)

    with pytest.raises(SRConflicts):
        Parser(g, prefer_shifts=False)

    p = GLRParser(g, debug=True)
    results = p.parse('xx')

    # This grammar has infinite ambiguity but by minimizing empty reductions
    # we shall get only one result xx -> xS -> SS -> S
    assert len(results) == 1
Пример #9
0
def test_glr_recovery_custom_new_position():
    """
    Test that custom recovery that increment position works.
    """
    error = Error(0, 1, message="Error")

    def custom_recovery(parser, input_str, position, symbols):
        # This recovery will just skip over erroneous part of input '& 89'.
        return error, position + 4, None

    parser = GLRParser(g, actions=actions, error_recovery=custom_recovery,
                       debug=True)

    results = parser.parse('1 + 5 & 89 - 2')

    assert len(parser.errors) == 1
    assert parser.errors[0] is error
    assert len(results) == 2
    assert len(set(results)) == 1
    # Calculate results should be '1 + 5 - 2'
    assert results[0] == 4
Пример #10
0
def test_glr_recovery_custom_new_token():
    """
    Test that custom recovery that introduces new token works.
    """
    error = Error(0, 1, message="Error")

    def custom_recovery(parser, input_str, position, symbols):
        # Here we will introduce missing operation token
        return error, None, Token(g.get_terminal('-'), '-', 0)

    parser = GLRParser(g, actions=actions, error_recovery=custom_recovery,
                       debug=True)

    results = parser.parse('1 + 5 8 - 2')

    assert len(parser.errors) == 1
    assert parser.errors[0] is error
    assert len(results) == 5
    assert len(set(results)) == 2
    assert -4 in results
    assert 0 in results
Пример #11
0
def test_bounded_direct_ambiguity():
    """
    This grammar has bounded direct ambiguity of degree 2, in spite of being
    unboundedly ambiguous as for every k we can find a string that will give at
    least k solutions.

    The language is t^{m}xb^{n}, n>=m>=0

    Grammar G5 from: Nozohoor-Farshi, Rahman: "GLR Parsing for ε-Grammers"
    """
    grammar = """
    S: A S "b" | "x";
    A: "t" | EMPTY;
    """

    g = Grammar.from_string(grammar)

    p = GLRParser(g)
    results = p.parse("txbbbbb")

    assert len(results) == 5
Пример #12
0
def test_glr_recovery_custom_new_token():
    """
    Test that custom recovery that introduces new token works.
    """
    def custom_recovery(head, error):
        # Here we will introduce missing operation token
        head.token_ahead = Token(g.get_terminal('-'),
                                 '-',
                                 head.position,
                                 length=0)
        return True

    parser = GLRParser(g, actions=actions, error_recovery=custom_recovery)

    results = parser.parse('1 + 5 8 - 2')

    assert len(parser.errors) == 1
    assert len(results) == 5
    result_set = set([parser.call_actions(tree) for tree in results])
    assert len(result_set) == 2
    assert -4 in result_set
    assert 0 in result_set
Пример #13
0
def test_cyclic_grammar_2():
    """
    Grammar G2 from the paper: "GLR Parsing for e-Grammers" by Rahman Nozohoor-Farshi
    Classic Tomita's GLR algorithm doesn't terminate with this grammar.

    parglare will succeed parsing but will report LoopError during any tree traversal
    as the built SPPF is circular.
    """
    grammar = """
    S: S S;
    S: 'x';
    S: EMPTY;
    """
    g = Grammar.from_string(grammar)

    with pytest.raises(SRConflicts):
        Parser(g, prefer_shifts=False)

    p = GLRParser(g)
    results = p.parse('xx')

    with pytest.raises(LoopError):
        len(results)
Пример #14
0
def test_bounded_ambiguity():
    """
    This grammar has bounded ambiguity.

    The language is the same: xb^n, n>=0 but each valid sentence will
    always have two derivations.

    Grammar G4 from: Nozohoor-Farshi, Rahman: "GLR Parsing for ε-Grammers"
    """

    grammar = """
    S: M | N;
    M: A M "b" | "x";
    N: A N "b" | "x";
    A: EMPTY;
    """

    g = Grammar.from_string(grammar)

    p = GLRParser(g)
    results = p.parse("xbbb")

    assert len(results) == 2
Пример #15
0
def test_cyclic_grammar_3():
    """
    Grammar with indirect cycle.
    r:EMPTY->A ; r:A->S; r:EMPTY->A; r:SA->S; r:EMPTY->A; r:SA->S;...
    """
    grammar = """
    S: S A | A;
    A: "a" | EMPTY;
    """

    g = Grammar.from_string(grammar)

    Parser(g)

    p = GLRParser(g)
    results = p.parse('aa')

    assert len(results) == 2
    expected = [
        ['a', 'a'],
        [[[], 'a'], 'a']
    ]

    assert results == expected
Пример #16
0
def test_issue_112_wrong_error_report():
    """
    Test that token ahead is not among expected symbols in error message.
    """
    grammar = r'''
    _input:                  sentence
                        |   standalonePhrase;

    standalonePhrase:       interjection* __phrase;

    sentence:               interjection* sentence1
                        |   sentenceJoiningAdverb? sentence1;

    sentence1:              subordinateClause* _clause sentenceEnd
                        |   subordinateClause* quotationShortForm;

    subordinateClause:      _clause clauseConnector punctuation*;


    _clause:                __phrase* verbPhrase
                        |   __phrase* complement? copulaPhrase;

    // ---- phrases ----

    __phrase:                 topic
                        |   subject
                        |   object
                        |   adjectivalPhrase
                        |   adverbialPhrase
                        |   nounPhrase;

    topic:                  nounPhrase topicMarker;
    subject:                nounPhrase subjectMarker;
    object:                 nounPhrase objectMarker;
    complement:             nounPhrase complementMarker?;
    adjectivalPhrase:       adjective+ nounPhrase;

    // ---- noun-related ----

    nounPhrase:             singleNounPhrase
                        |   combinedNounPhrase;

    combinedNounPhrase:     singleNounPhrase continuedNounPhrase+;
    continuedNounPhrase:    conjunction singleNounPhrase;


    singleNounPhrase:       determiner singleNounPhrase1 auxiliaryParticle* punctuation*
                        |   singleNounPhrase1 auxiliaryParticle* punctuation*;
    singleNounPhrase1:      basicNounPhrase
                        |   modifiedNounPhrase
                        |   countingPhrase;

    basicNounPhrase:        noun+
                        |   possessive;
    possessive:             noun+ possessiveMarker;

    modifiedNounPhrase:     basicNounPhrase nounModifyingSuffix;

    countingPhrase:         basicNounPhrase number
                        |   basicNounPhrase number counter
                        |   number basicNounPhrase
                        |   counter possessiveMarker basicNounPhrase;

    noun:                   simpleNoun
                        |   nominalForm
                        |   nominalizedVerb
                        |   verbModifiedToNoun;

    nominalizedVerb:        _clause nominalizingSuffix;
    verbModifiedToNoun:     _clause verbToNounModifyingForm;

    adjective:              _clause adnominalSuffix
                        |   possessive;

    // ---- verb-related ----

    copulaPhrase:           adverb* copula verbSuffix* predicateEndingSuffix?;

    verbPhrase:             adverb* verbPhrase1 nominalVerbForm? verbSuffix* predicateEndingSuffix?;
    verbPhrase1:            basicVerbPhrase
                        |   negative basicVerbPhrase
                        |   basicVerbPhrase negative;

    basicVerbPhrase:        verbCombination
                        |   honorificVerb
                        |   verbAndAuxiliary
                        |   modifiedVerb
                        |   indirectQuotation
                        |   nominalAsVerb;

    verbCombination:        verb
                        |   verb verbCombiner verbCombination;

    verb:                   simpleVerb
                        |   descriptiveVerb;

    honorificVerb:          verb honorificMarker;

    verbAndAuxiliary:       verb nominalVerbForm? verbSuffix* auxiliaryVerb+;
    modifiedVerb:           verb honorificMarker? verbModifier
                        |   verbAndAuxiliary honorificMarker? verbModifier;
    nominalAsVerb:          verb verbNominal
                        |   verbAndAuxiliary verbNominal;

    auxiliaryVerb:          simpleAuxiliaryVerb honorificMarker?
                        |   auxiliaryVerbForm honorificMarker?;
    simpleAuxiliaryVerb:    auxiliaryVerbConnector verb;

    adverbialPhrase:        nounPhrase adverbialParticle auxiliaryParticle*
                        |   verb adverbialParticle auxiliaryParticle*;

    // ---- quotation forms ----

    indirectQuotation:      verb quotationSuffix;

    quotationShortForm:     basicVerbPhrase shortQuotationSuffix verbSuffix* predicateEndingSuffix?;

    // ------ others -----

    interjection:           interjectionTerminal punctuation*;

    // --- terminal symbols ------------------------------

    terminals
        sentenceEnd:            /[^:]+:(SF);/;
        interjectionTerminal:   /[^:]+:(IC);/;
        punctuation:            /[^:]+:(SP|SS|SE|SO|SW|SWK);/;
        clauseConnector:        /[^:]+:(EC|CCF|CCMOD|CCNOM);/;
        topicMarker:            /[^:]+:(TOP);/;
        objectMarker:           /[^:]+:(JKO);/;
        subjectMarker:          /[^:]+:(JKS);/;
        complementMarker:       /[^:]+:(JKC);/;
        conjunction:            /[^:]+:(JC|CON);/;
        determiner:             /[^:]+:(MM);/;
        auxiliaryParticle:      /[^:]+:(JX);/;
        possessiveMarker:       /[^:]+:(JKG);/;
        nounModifyingSuffix:    /[^:]+:(XSN|JKV);/;
        nominalizingSuffix:     /[^:]+:(ETN);/;
        adnominalSuffix:        /[^:]+:(ETM);/;
        verbSuffix:             /[^:]+:(EP|TNS);/;
        predicateEndingSuffix:  /[^:]+:(SEF|EF);/;
        negative:               /[^:]+:(NEG);/;
        verbCombiner:           /고:(EC|CCF);/;
        honorificMarker:        /(으시|시):EP;/;
        verbModifier:           /[^:]+:(VMOD);/;
        verbNominal:            /[^:]+:(VNOM);/;
        adverbialParticle:      /[^:]+:(JKB);/;
        quotationSuffix:        /[^:]+:(QOT);/;
        shortQuotationSuffix:   /[^:]+:(SQOT);/;
        sentenceJoiningAdverb:  /[^:]+:MAJ;/;
        simpleNoun:             /[^:]+:(NNG|NNP|NNB|NR|SL|NP|SN);/;
        adverb:                 /[^:]+:(MAG);/;
        simpleVerb:             /[^:]+:(VV|VVD|VHV);/;
        descriptiveVerb:        /[^:]+:(VA|VCP|VCN|VAD|VHA);/;
        auxiliaryVerbConnector: /[^:]+:(EC);/;
        auxiliaryVerbForm:      /[^:]+:(EC);/;
        copula:                 /(되:VV)|([^:]+:(VCP|VCN));/;
        number:                 /[^:]+:(SN|NR);/;
        counter:                /[^:]+:(NNB|NNG);/;
        nominalForm:            /[^:]+:(NNOM);/;
        verbToNounModifyingForm: /[^:]+:(NMOD);/;
        nominalVerbForm:        /[^:]+:(VNOM);/;
    '''  # noqa

    g = Grammar.from_string(grammar)
    parser = GLRParser(g)
    with pytest.raises(ParseError) as e:
        parser.parse('공부하:VHV; 는:ETM; 것:NNB; 은:TOP; 아니:VCN; ㅂ니다:SEF; .:SF;')

    assert 'Expected: adnominalSuffix or nominalizingSuffix or '\
        'verbToNounModifyingForm but found <sentenceEnd(.:SF;)>'\
        in str(e.value)
def test_glr_forest_disambiguation():
    parser = GLRParser(Grammar.from_string(grammar))

    forest = parser.parse(r'''
    part1
     part2
      part3
     part2
    part1
     part3
      part2
     part3
    part1
     part2
      part3
    part1
     part2
    ''')

    # We have 415 solutions.
    assert len(forest) == 415
    assert forest.ambiguities == 46

    forest.disambiguate(disambiguate)

    # After the disambiguation, only one solution remains.
    assert len(forest) == 1
    assert forest.to_str().strip() == r'''
document[5->147]
  parts[5->147]
    part1_1[5->147]
      part1_1[5->126]
        part1_1[5->93]
          part1_1[5->49]
            part1[5->49]
              title1[5->10, "part1"]
              parts_opt[16->49]
                parts[16->49]
                  part2_1[16->49]
                    part2_1[16->39]
                      part2[16->39]
                        title2[16->21, "part2"]
                        parts_opt[28->39]
                          parts[28->39]
                            part3_1[28->39]
                              part3[28->39]
                                title3[28->33, "part3"]
                                parts_opt[39->39]
                    part2[39->49]
                      title2[39->44, "part2"]
                      parts_opt[49->49]
          part1[49->93]
            title1[49->54, "part1"]
            parts_opt[60->93]
              parts[60->93]
                part3_1[60->93]
                  part3_1[60->83]
                    part3[60->83]
                      title3[60->65, "part3"]
                      parts_opt[72->83]
                        parts[72->83]
                          part2_1[72->83]
                            part2[72->83]
                              title2[72->77, "part2"]
                              parts_opt[83->83]
                  part3[83->93]
                    title3[83->88, "part3"]
                    parts_opt[93->93]
        part1[93->126]
          title1[93->98, "part1"]
          parts_opt[104->126]
            parts[104->126]
              part2_1[104->126]
                part2[104->126]
                  title2[104->109, "part2"]
                  parts_opt[116->126]
                    parts[116->126]
                      part3_1[116->126]
                        part3[116->126]
                          title3[116->121, "part3"]
                          parts_opt[126->126]
      part1[126->147]
        title1[126->131, "part1"]
        parts_opt[137->147]
          parts[137->147]
            part2_1[137->147]
              part2[137->147]
                title2[137->142, "part2"]
                parts_opt[147->147]
    '''.strip()
Пример #18
0
from parglare import GLRParser
from parglare.tables.persist import table_from_serializable

from _table import table
from grammar import grammar

table = table_from_serializable(table, grammar)
parser = GLRParser(grammar, table=table)

print(parser.parse('aaabbb'))
Пример #19
0
INPUT = '1 + 2 * 3 + 4'

grammar = r'''
E: E '+' E
 | E '*' E
 | '(' E ')'
 | number;

terminals
number: /\d+/;
'''

g = Grammar.from_string(grammar)
parser = GLRParser(g, build_tree=True)

result = parser.parse(INPUT)


def tree_str(node, depth=0):
    indent = '  ' * depth
    if isinstance(node, NodeNonTerm):
        s = '\n{}[.{} {}\n{}]'.format(
            indent, node.production.symbol,
            ''.join([tree_str(n, depth + 1) for n in node.children]), indent)
    else:
        s = '\n{}[.{} ]'.format(indent, node.value)
    return s


with open('qtree_out.txt', 'w') as f:
    f.write('\begin{{tabular}}{{{}}}\n'.format('c' * len(result)))
Пример #20
0
class CParser:
    def __init__(self):
        self._glr = None
        self._setup_parser()

        self.user_defined_types = set()

    def _setup_parser(self):
        """Setup parser."""
        file_path = os.path.realpath(os.path.dirname(__file__))
        root_path = os.path.split(os.path.abspath(os.path.join(file_path)))[0]
        grammar_path = os.path.join(root_path, "cparser", "cgrammar.pg")

        grammar = Grammar.from_file(grammar_path)

        def typedef_filter(context, action, subresults):
            """Filter for dynamic disambiguation

            Solves problems with typedef_name disambiguation. Whenever the
            REDUCE is called on typedef_name rule, we first check if the
            ID that is trying to be reduced is actually a user-defined type
            (struct, union, typedef). If yes, than the REDUCE will be called.

            """
            if action is None:
                return

            production = context.production

            if action is REDUCE and production.symbol.fqn == "typedef_name":

                var_name = subresults[0].value
                if var_name not in self.user_defined_types:
                    return False

            if action is REDUCE and production.symbol.fqn == "primary_exp":
                child = subresults[0]
                if child.symbol.fqn == "id":
                    if child.value in self.user_defined_types:
                        return False

            if action is REDUCE and production.symbol.fqn == "iteration_stat":
                if isrule(subresults[2], "decl_body"):
                    init_declarator_list_opt = subresults[2].children[1]
                    if len(init_declarator_list_opt.children) == 0:
                        return False

            return True

        self._glr = GLRParser(grammar,
                              build_tree=True,
                              call_actions_during_tree_build=True,
                              dynamic_filter=typedef_filter,
                              actions=self._setup_actions(),
                              ws='\n\r\t ')

    def _setup_actions(self):
        """Creates a dict of semantic actions that will be called during
        parsing

        Returns:
            dict
        """
        def decl_body(_, nodes):
            """Semantic action called for every decl_body production

            This semantic action is used to collect every user-defined type in
            a code. This includes structs, unions and typedefs.
            """
            def collect_direct_decl_name(init_dcl):
                """Adds the name of direct declarator into the set of
                user-defined types"""
                declarator = init_dcl.children[0]

                if isrule(declarator.children[0], "direct_declarator"):
                    direct_declarator = declarator.children[0]
                else:
                    # in case of pointer, declarator is a second
                    # child
                    direct_declarator = declarator.children[1]

                if isinstance(direct_declarator.children[0], NodeTerm):
                    value = direct_declarator.children[0].value
                    self.user_defined_types.add(value)

            def recurse_init_decl(init_dcl):
                """Recurses through the init declarator rule."""
                if len(init_dcl.children) > 1:

                    # last child is always direct declarator
                    collect_direct_decl_name(init_dcl.children[-1])
                    # first child is always recursive init_declarator_1_comma
                    recurse_init_decl(init_dcl.children[0])
                else:
                    collect_direct_decl_name(init_dcl.children[0])

            decl_specs = nodes[0]

            first_el = decl_specs.children[0]
            if isrule(first_el, "storage_class_spec"):

                if first_el.children[0].value == "typedef":
                    # If the current decl_specs is definition of custom type by
                    # using 'typedef', get the name of the defined type.
                    init_decl_list_opt = nodes[1]
                    if not init_decl_list_opt.children:
                        return

                    init_decl_list = init_decl_list_opt.children[0]
                    for init_decl in init_decl_list.children:
                        recurse_init_decl(init_decl)

            # Productions that start with type_spec
            if isrule(first_el, "type_spec"):
                type_spec_children = first_el.children
                ts_first = type_spec_children[0]

                if isrule(ts_first, "struct_or_union_spec"):
                    struct_name = ts_first.children[1].value
                    self.user_defined_types.add(struct_name)

        return {"decl_body": decl_body}

    def parse(self, code, debug=False):
        """Parses the given code string."""
        self.user_defined_types = set()
        self._glr.debug = debug

        results = self._glr.parse(code)
        return results[0]

    def parse_file(self,
                   file_path,
                   use_cpp=False,
                   cpp_path="cpp",
                   cpp_args=None,
                   debug=False):
        """Parses content from the given file."""
        # self.user_defined_types = set()
        # self._glr.debug = debug

        if use_cpp:
            content = preprocess_file(file_path, cpp_path, cpp_args)
        else:
            with open(file_path) as f:
                content = f.read()

        return self.parse(content, debug)
Пример #21
0
def test_expressions():

    actions = {
        "E": [
            lambda _, nodes: nodes[0] + nodes[2],
            lambda _, nodes: nodes[0] * nodes[2], lambda _, nodes: nodes[1],
            lambda _, nodes: int(nodes[0])
        ]
    }

    # This grammar is highly ambiguous if priorities and
    # associativities are not defined to disambiguate.
    grammar = """
    E: E "+" E | E "*" E | "(" E ")" | /\d+/;
    """
    g = Grammar.from_string(grammar)
    p = GLRParser(g, actions=actions, debug=True)

    # Even this simple expression has 2 different interpretations
    # (4 + 2) * 3 and
    # 4 + (2 * 3)
    results = p.parse("4 + 2 * 3")
    assert len(results) == 2
    assert 18 in results and 10 in results

    # Adding one more operand rises number of interpretations to 5
    results = p.parse("4 + 2 * 3 + 8")
    assert len(results) == 5

    # One more and there are 14 interpretations
    results = p.parse("4 + 2 * 3 + 8 * 5")
    assert len(results) == 14

    # The number of interpretation will be the Catalan number of n
    # where n is the number of operations.
    # https://en.wikipedia.org/wiki/Catalan_number
    # This number rises very fast. For 10 operations number of interpretations
    # will be 16796!

    # If we rise priority for multiplication operation we reduce ambiguity.
    # Default production priority is 10. Here we will raise it to 15 for
    # multiplication.
    grammar = """
    E: E "+" E | E "*" E {15}| "(" E ")" | /\d+/;
    """
    g = Grammar.from_string(grammar)
    p = GLRParser(g, actions=actions)

    # This expression now has 2 interpretation:
    # (4 + (2*3)) + 8
    # 4 + ((2*3) + 8)
    # This is due to associativity of + operation which is not defined.
    results = p.parse("4 + 2 * 3 + 8")
    assert len(results) == 2

    # If we define associativity for both + and * we have resolved all
    # ambiguities in the grammar.
    grammar = """
    E: E "+" E {left}| E "*" E {left, 15}| "(" E ")" | /\d+/;
    """
    g = Grammar.from_string(grammar)
    p = GLRParser(g, actions=actions)

    results = p.parse("4 + 2 * 3 + 8 * 5 * 3")
    assert len(results) == 1
    assert results[0] == 4 + 2 * 3 + 8 * 5 * 3
Пример #22
0
def test_issue_114_empty_and_lexical_ambiguity():

    g = Grammar.from_string(grammar)
    parser = GLRParser(g, build_tree=True)

    results = parser.parse("a car is a kind of vehicle.")
    assert len(results) == 2

    expected = r'''
Sentence[0->27]
KindDefinitionSentence[0->27]
  a[0->1, "a"]
  IdentifierWord_0[2->5]
    IdentifierWord_1[2->5]
      IdentifierWord[2->5, "car"]
  is[6->8, "is"]
  a[9->10, "a"]
  kind[11->15, "kind"]
  of[16->18, "of"]
  IdentifierWord_0[19->26]
    IdentifierWord_1[19->26]
      IdentifierWord[19->26, "vehicle"]
  KindWith_opt[26->26]
  DOT[26->27, "."]

Sentence[0->27]
OtherSentence[0->27]
  IdentifierWord_0[0->26]
    IdentifierWord_1[0->26]
      IdentifierWord_1[0->18]
        IdentifierWord_1[0->15]
          IdentifierWord_1[0->10]
            IdentifierWord_1[0->8]
              IdentifierWord_1[0->5]
                IdentifierWord_1[0->1]
                  IdentifierWord[0->1, "a"]
                IdentifierWord[2->5, "car"]
              IdentifierWord[6->8, "is"]
            IdentifierWord[9->10, "a"]
          IdentifierWord[11->15, "kind"]
        IdentifierWord[16->18, "of"]
      IdentifierWord[19->26, "vehicle"]
  DOT[26->27, "."]
    '''

    assert '\n\n'.join([r.tree_str()
                        for r in results]).strip() == expected.strip()

    results = parser.parse("a car is a kind of vehicle with wheels.")
    assert len(results) == 3

    expected = r'''
Sentence[0->39]
KindDefinitionSentence[0->39]
  a[0->1, "a"]
  IdentifierWord_0[2->5]
    IdentifierWord_1[2->5]
      IdentifierWord[2->5, "car"]
  is[6->8, "is"]
  a[9->10, "a"]
  kind[11->15, "kind"]
  of[16->18, "of"]
  IdentifierWord_0[19->38]
    IdentifierWord_1[19->38]
      IdentifierWord_1[19->31]
        IdentifierWord_1[19->26]
          IdentifierWord[19->26, "vehicle"]
        IdentifierWord[27->31, "with"]
      IdentifierWord[32->38, "wheels"]
  KindWith_opt[38->38]
  DOT[38->39, "."]

Sentence[0->39]
KindDefinitionSentence[0->39]
  a[0->1, "a"]
  IdentifierWord_0[2->5]
    IdentifierWord_1[2->5]
      IdentifierWord[2->5, "car"]
  is[6->8, "is"]
  a[9->10, "a"]
  kind[11->15, "kind"]
  of[16->18, "of"]
  IdentifierWord_0[19->26]
    IdentifierWord_1[19->26]
      IdentifierWord[19->26, "vehicle"]
  KindWith_opt[27->38]
    KindWith[27->38]
      with[27->31, "with"]
      IdentifierWord_0[32->38]
        IdentifierWord_1[32->38]
          IdentifierWord[32->38, "wheels"]
  DOT[38->39, "."]

Sentence[0->39]
OtherSentence[0->39]
  IdentifierWord_0[0->38]
    IdentifierWord_1[0->38]
      IdentifierWord_1[0->31]
        IdentifierWord_1[0->26]
          IdentifierWord_1[0->18]
            IdentifierWord_1[0->15]
              IdentifierWord_1[0->10]
                IdentifierWord_1[0->8]
                  IdentifierWord_1[0->5]
                    IdentifierWord_1[0->1]
                      IdentifierWord[0->1, "a"]
                    IdentifierWord[2->5, "car"]
                  IdentifierWord[6->8, "is"]
                IdentifierWord[9->10, "a"]
              IdentifierWord[11->15, "kind"]
            IdentifierWord[16->18, "of"]
          IdentifierWord[19->26, "vehicle"]
        IdentifierWord[27->31, "with"]
      IdentifierWord[32->38, "wheels"]
  DOT[38->39, "."]
    '''

    assert '\n\n'.join([r.tree_str()
                        for r in results]).strip() == expected.strip()
Пример #23
0
def test_nops():
    """
    Test that nops (no prefer shifts) will honored per rule.
    """
    grammar = """
    Program: "begin"
             statements=Statements
             ProgramEnd EOF;
    Statements: Statements1 | EMPTY;
    Statements1: Statements1 Statement | Statement;
    ProgramEnd: End;
    Statement: End "transaction" | "command";

    terminals
    End: "end";
    """

    g = Grammar.from_string(grammar, ignore_case=True)
    parser = GLRParser(g, build_tree=True, prefer_shifts=True)

    # Here we have "end transaction" which is a statement and "end" which
    # finish program. Prefer shift strategy will make parser always choose to
    # shift "end" in anticipation of "end transaction" statement instead of
    # reducing by "Statements" and finishing.
    with pytest.raises(ParseError):
        parser.parse("""
        begin
            command
            end transaction
            command
            end transaction
            command
        end
        """)

    # When {nops} is used, GLR parser will investigate both possibilities at
    # this place and find the correct interpretation while still using
    # prefer_shift strategy globaly.
    grammar = """
    Program: "begin"
             statements=Statements
             ProgramEnd EOF;
    Statements: Statements1 {nops} | EMPTY;
    Statements1: Statements1 Statement | Statement;
    ProgramEnd: End;
    Statement: End "transaction" | "command";

    terminals
    End: "end";
    """

    g = Grammar.from_string(grammar, ignore_case=True)
    parser = GLRParser(g, build_tree=True, prefer_shifts=True)
    parser.parse("""
    begin
        command
        end transaction
        command
        end transaction
        command
    end
    """)
Пример #24
0
def test_lexical_ambiguity2():
    g = Grammar.from_string(r'''
    Stuff: Stuff "+" Stuff | Something;
    Something: INT | FLOAT | Object;
    Object: INT DOT INT;

    terminals
    INT: /\d+/;
    FLOAT: /\d+(\.\d+)?/;
    DOT: ".";
    ''')

    parser = GLRParser(g)

    # Lexical ambiguity between FLOAT and INT . INT
    forest = parser.parse('42.12')
    assert len(forest) == 2
    assert forest.ambiguities == 1

    # Here also we have two ambiguities
    forest = parser.parse('42.12 + 3.8')
    assert len(forest) == 4
    assert forest.ambiguities == 2

    # Here we have 3 lexical ambiguities and 1 ambiguity
    # for + operation
    forest = parser.parse('34.78 + 8 + 3.3')
    assert len(forest) == 16
    assert forest.ambiguities == 4

    # Here we have 4 lexical ambiguities and 3 ambiguities
    # for + operation therefore 5 * 2 ^ 4 solutions
    forest = parser.parse('34.78 + 8 + 3.3 + 1.2')
    assert len(forest) == 80
    assert forest.ambiguities == 7

    # When default lexical disambiguation is activated
    # We should have only syntactical ambiguities where
    # default lexical disambiguation can resolve
    parser = GLRParser(g, lexical_disambiguation=True)

    # Longest match is used to choose FLOAT
    forest = parser.parse('42.12')
    assert len(forest) == 1
    forest[0].symbol.name == 'FLOAT'
    assert forest.ambiguities == 0

    # Also, longest match will choose FLOAT in both cases
    forest = parser.parse('42.12 + 3.8')
    assert len(forest) == 1
    assert forest.ambiguities == 0

    # Here we still have lexical ambiguity on "8"
    forest = parser.parse('34.78 + 8 + 3.3')
    assert len(forest) == 4
    assert forest.ambiguities == 2

    # Lexical ambiguity on "8" and 3 syntactical ambiguities
    # on + operations
    forest = parser.parse('34.78 + 8 + 3.3 + 1.2')
    assert len(forest) == 10
    assert forest.ambiguities == 4