示例#1
0
def test_extend_implicit_grammar():
    grammar1 = Grammar()
    grammar1.add_implicit('(')
    result = Grammar()
    result.extend(grammar1)
    assert result.patterns[0].token_id == result.tokens['(']
    assert result.patterns[0].priority == -len('(')
    assert result.patterns[0].is_implicit
示例#2
0
def test_add_brackets():
    grammar = Grammar()
    open_id = grammar.add_implicit('(')
    close_id = grammar.add_implicit(')')
    assert grammar.brackets == set()
    assert grammar.open_brackets == set()
    assert grammar.close_brackets == set()
    grammar.add_brackets(open_id, close_id)
    assert grammar.brackets == {(open_id, close_id)}
    assert grammar.open_brackets == {open_id}
    assert grammar.close_brackets == {close_id}
    assert grammar.bracket_pairs[open_id] == close_id
示例#3
0
def test_extend_brackets_grammar():
    grammar1 = Grammar()
    grammar1.add_brackets(grammar1.add_implicit('('),
                          grammar1.add_implicit(')'))
    grammar2 = Grammar()
    grammar2.add_brackets(grammar2.add_implicit('('),
                          grammar2.add_implicit(')'))
    grammar2.add_brackets(grammar2.add_implicit('['),
                          grammar2.add_implicit(']'))
    result = Grammar.merge(grammar1, grammar2)
    assert result.brackets == {(result.tokens['['], result.tokens[']']),
                               (result.tokens['('], result.tokens[')'])}
示例#4
0
def create_core_grammar() -> Grammar:
    """ This function is used for initialize default grammar """
    grammar = Grammar()
    grammar.add_pattern(grammar.add_token('Comment'), RE_COMMENT)
    grammar.add_pattern(grammar.add_token('Whitespace'), RE_WHITESPACE)
    grammar.add_pattern(grammar.add_token('Name'), RE_NAME)
    grammar.add_pattern(grammar.add_token('NewLine'), RE_NEWLINE)
    grammar.add_pattern(grammar.add_token('String'), RE_STRING_SINGLE)
    grammar.add_pattern(grammar.add_token('String'), RE_STRING_DOUBLE)
    grammar.add_pattern(grammar.add_token('Integer'), RE_NUMBER_BINARY)
    grammar.add_pattern(grammar.add_token('Integer'), RE_NUMBER_OCTAL)
    grammar.add_pattern(grammar.add_token('Integer'), RE_NUMBER_DECIMAL)
    grammar.add_pattern(grammar.add_token('Integer'), RE_NUMBER_HEXADECIMAL)
    grammar.add_pattern(grammar.add_token('Float'), RE_FLOAT_POINT)
    grammar.add_pattern(grammar.add_token('Float'), RE_FLOAT_EXPONENT)
    grammar.add_implicit('(')
    grammar.add_implicit(')')
    grammar.add_implicit('[')
    grammar.add_implicit(']')
    grammar.add_implicit('{')
    grammar.add_implicit('}')
    grammar.add_implicit('<')
    grammar.add_implicit('>')

    grammar.add_trivia(grammar.tokens['Comment'])
    grammar.add_trivia(grammar.tokens['Whitespace'])
    grammar.add_brackets(grammar.tokens['('], grammar.tokens[')'])
    grammar.add_brackets(grammar.tokens['['], grammar.tokens[']'])
    grammar.add_brackets(grammar.tokens['{'], grammar.tokens['}'])

    return grammar
示例#5
0
def test_add_packrat_parser():
    grammar = Grammar()
    stmt_id = grammar.add_parselet('stmt',
                                   kind=ParseletKind.Packrat,
                                   result_type=SyntaxToken)
    star_id = grammar.add_implicit('*')

    assert grammar.add_parser(
        stmt_id,
        make_sequence(grammar.add_implicit('('), stmt_id,
                      grammar.add_implicit(')')))
    assert grammar.add_parser(stmt_id,
                              make_sequence(grammar.add_implicit('(')))
    assert grammar.add_parser(stmt_id, star_id)
    assert grammar.add_parser(stmt_id, stmt_id)
示例#6
0
def convert_node(grammar: Grammar, node: CombinatorNode, location: Location) -> Combinator:
    if isinstance(node, SequenceNode):
        return make_sequence(*(convert_node(grammar, child, location) for child in node.combinators))
    if isinstance(node, RepeatNode):
        return make_repeat(convert_node(grammar, node.combinator, location))
    if isinstance(node, OptionalNode):
        return make_optional(convert_node(grammar, node.combinator, location))
    if isinstance(node, NamedNode):
        return make_named(node.name.value, convert_node(grammar, node.combinator, location))
    if isinstance(node, ImplicitNode):
        token_id = grammar.add_implicit(ast.literal_eval(node.value.value), location=location)
        return make_token(token_id)
    if isinstance(node, ReferenceNode):
        name = node.name.value
        if name in grammar.tokens:
            if node.priority:
                raise DiagnosticError(location, f'Token combinator can not have priority')
            return make_token(grammar.tokens[name])
        elif name in grammar.parselets:
            priority = node.priority and ast.literal_eval(node.priority.value)
            return make_parselet(grammar.parselets[name], priority)
        else:
            raise DiagnosticError(location, f"Not found symbol {name} in grammar")

    raise NotImplementedError(f'Not implemented conversion from node to combinator: {type(node).__name__}')
示例#7
0
def test_add_implicit_token():
    grammar = Grammar()
    token_id = grammar.add_implicit('+')

    assert token_id.name == '+'
    assert token_id.description == '+'
    assert token_id.is_implicit
    assert '+' in grammar.tokens

    assert len(grammar.patterns) == 1
    pattern = grammar.patterns[0]
    assert pattern.token_id == token_id
    assert pattern.pattern == re.compile(re.escape('+'))
    assert pattern.priority < 0
    assert pattern.is_implicit
示例#8
0
def grammar() -> Grammar:
    grammar = Grammar()

    whitespace_id = grammar.add_pattern(grammar.add_token('Whitespace'),
                                        r'\s+')
    grammar.add_trivia(whitespace_id)
    grammar.add_pattern(grammar.add_token('Number'), r'[0-9]+')
    grammar.add_pattern(grammar.add_token('Name'), r'[a-zA-Z_][a-zA-Z0-9]+')
    grammar.add_implicit("for")
    grammar.add_implicit("while")
    grammar.add_implicit("+")
    grammar.add_implicit("-")

    return grammar
示例#9
0
def test_add_pratt_parser():
    grammar = Grammar()
    expr_id = grammar.add_parselet('expr',
                                   kind=ParseletKind.Pratt,
                                   result_type=SyntaxToken)
    integer_id = grammar.add_token('Integer')
    string_id = grammar.add_token('String')
    plus_id = grammar.add_implicit('+')
    star_id = grammar.add_implicit('*')

    table = cast(PrattTable, grammar.tables[expr_id])

    assert table.prefix_tokens == set()
    assert grammar.add_parser(expr_id, integer_id)
    assert integer_id in table.prefix_tokens, "Cleanup of pratt table prefix tokens is not worked"
    assert grammar.add_parser(expr_id, make_named('value', string_id))
    assert string_id in table.prefix_tokens, "Cleanup of pratt table prefix tokens is not worked"
    assert grammar.add_parser(expr_id, make_sequence(expr_id, plus_id,
                                                     expr_id))
    assert grammar.add_parser(
        expr_id,
        make_sequence(make_named('lhs', expr_id), make_named('op', star_id),
                      expr_id))
示例#10
0
def create_combinator_grammar() -> Grammar:
    """
    Create grammar for parse combinator definition

    P.S. This grammar is used for bootstrap process of initial grammar, e.g. definition of combinators in grammar
    """
    grammar = Grammar()
    grammar.extend(create_core_grammar())

    # tokens
    name_id = grammar.tokens['Name']
    string_id = grammar.tokens['String']
    number_id = grammar.tokens['Integer']
    colon_id = grammar.add_implicit(':')
    parent_open_id = grammar.tokens['(']
    parent_close_id = grammar.tokens[')']
    square_open_id = grammar.tokens['[']
    square_close_id = grammar.tokens[']']
    curly_open_id = grammar.tokens['{']
    curly_close_id = grammar.tokens['}']
    less_id = grammar.tokens['<']
    great_id = grammar.tokens['>']

    # parse combinator definition
    comb_id = grammar.add_parselet('combinator', result_type=CombinatorNode)
    seq_id = grammar.add_parselet('combinator_sequence', result_type=SequenceNode)

    # combinator := name: Name ":" combinator=combinator            ; named variable
    grammar.add_parser(
        comb_id,
        make_sequence(make_named('name', name_id), colon_id, make_named('combinator', comb_id)),
        make_ctor(NamedNode)
    )

    # combinator := name: Name  [ '<' priority: Number '>' ]        ; reference to parselet or token
    grammar.add_parser(
        comb_id,
        make_sequence(make_named('name', name_id), make_optional(less_id, make_named('priority', number_id), great_id)),
        make_ctor(ReferenceNode)
    )

    # combinator := value: String                                   ; reference to implicit token
    grammar.add_parser(comb_id, make_named('value', string_id), make_ctor(ImplicitNode))

    # combinator := '[' combinator: combinator_sequence ']'         ; optional combinator
    grammar.add_parser(
        comb_id,
        make_sequence(square_open_id, make_named('combinator', seq_id), square_close_id),
        make_ctor(OptionalNode)
    )

    # combinator := '{' combinator: combinator_sequence '}'         ; repeat combinator
    grammar.add_parser(
        comb_id,
        make_sequence(curly_open_id, make_named('combinator', seq_id), curly_close_id),
        make_ctor(RepeatNode)
    )

    # combinator := '(' combinator: combinator_sequence ')'         ; parenthesis combinator
    grammar.add_parser(
        comb_id,
        make_sequence(parent_open_id, make_named('combinator', seq_id), parent_close_id),
        make_return_variable('combinator')
    )

    # combinator_sequence := combinators:combinator combinators:{ combinator }              ; sequence combinator
    grammar.add_parser(
        seq_id,
        make_sequence(make_named('combinators', comb_id), make_named('combinators', make_repeat(comb_id))),
        make_ctor(SequenceNode)
    )

    return grammar