def add_comments_and_ws(rules):
    # ONE_LINE_COMMENT: '//' .*? '\\r'? '\\n' -> channel(HIDDEN);
    olc = Antlr4Rule("ONE_LINE_COMMENT",
                     Antlr4Sequence([
                         Antlr4Symbol("//", True),
                         Antlr4Symbol(".*?", True, is_regex=True),
                         Antlr4Option(Antlr4Symbol("\r", True)),
                         Antlr4Selection([
                             Antlr4Symbol("\n", True),
                             Antlr4Symbol("EOF", False),
                         ])
                     ]),
                     lexer_actions=[Antlr4LexerAction.channel("HIDDEN")])
    rules.append(olc)
    # BLOCK_COMMENT: '/*' .*? '*/' -> channel (HIDDEN);
    bc = Antlr4Rule("BLOCK_COMMENT",
                    Antlr4Sequence([
                        Antlr4Symbol("/*", True),
                        Antlr4Symbol(".*?", True, is_regex=True),
                        Antlr4Symbol("*/", True),
                    ]),
                    lexer_actions=[Antlr4LexerAction.channel("HIDDEN")])
    rules.append(bc)
    # WHITE_SPACE: [ \\t\\n\\r] + -> skip;
    ws = Antlr4Rule("WHITE_SPACE",
                    Antlr4Sequence([
                        Antlr4Symbol("[ \\t\\n\\r] +", True, is_regex=True),
                    ]),
                    lexer_actions=[Antlr4LexerAction.channel("HIDDEN")])
    rules.append(ws)
Exemplo n.º 2
0
def split_rule(rules, rule_name, symbols_to_extract: List[str],
               subrule_name: str):
    """
    Let only options which are starting with symbols from symbols_to_extract.
    Put the rest to a subrule.
    """
    r = rule_by_name(rules, rule_name)

    assert isinstance(r.body, Antlr4Selection), r

    sub_options = Antlr4Selection([])
    for o in r.body:
        start_symbols = set()
        _direct_left_corner(o, start_symbols, allow_eps_in_sel=True)
        if not start_symbols.intersection(symbols_to_extract):
            sub_options.append(o)
    r.body = Antlr4Selection([o for o in r.body if not (o in sub_options)])
    r.body.insert(0, Antlr4Symbol(subrule_name, False))
    if len(r.body) == 1:
        r.body = r.body[0]

    assert len(sub_options) > 0
    if len(sub_options) == 1:
        sub_options = sub_options[0]
    else:
        sub_options = Antlr4Selection(sub_options)

    sub_r = Antlr4Rule(subrule_name, sub_options)
    rules.insert(rules.index(r), sub_r)
    return sub_r
Exemplo n.º 3
0
def optimise_subroutine_call(rules):
    r = rule_by_name(rules, "subroutine_call")
    Antlr4GenericOptimizer().optimize([
        r,
    ])
    c0 = Antlr4parser().from_str("""
    ( class_qualifier | ( primary | implicit_class_handle ) DOT )?
    ( 
        identifier ( attribute_instance )* ( LPAREN list_of_arguments RPAREN )? 
        |  array_method_name ( attribute_instance )* ( LPAREN list_of_arguments RPAREN )?
            ( KW_WITH LPAREN expression RPAREN )? 
        | randomize_call 
    )

    """)
    assert r.body[0].eq_relaxed(c0), r.body[0]
    subroutine_call_args = Antlr4Rule(
        "subroutine_call_args",
        Antlr4parser().from_str("""
        ( attribute_instance )* ( LPAREN list_of_arguments RPAREN )? 
        ( KW_WITH LPAREN expression RPAREN )?
    """))
    rules.insert(rules.index(r), subroutine_call_args)
    new_c0 = Antlr4parser().from_str("""
    ( primary_no_cast_no_call
      | cast 
    )
    subroutine_call_args
    ( 
       DOT ( array_method_name | randomize_call | primary_no_cast_no_call | cast )
       subroutine_call_args
    )*
    """)
    r.body[0] = new_c0
    primary = rule_by_name(rules, "primary")
    assert primary.body[0].eq_relaxed(
        Antlr4Symbol("primary_no_cast_no_call", False))
    del primary.body[0]

    c2 = Antlr4parser().from_str("""
        any_system_tf_identifier ( LPAREN ( list_of_arguments 
                                      | data_type ( COMMA expression )? 
                                      | expression ( COMMA ( expression )? )* ( COMMA 
                                      ( clocking_event )? )? 
                                      ) RPAREN )? 
    """)
    assert r.body[2].eq_relaxed(c2)
    r.body[2] = Antlr4parser().from_str("""
        any_system_tf_identifier ( LPAREN (
                                     ( data_type )? list_of_arguments
                                     ( COMMA clocking_event )?
                                 ) RPAREN )?
    """)

    c1 = Antlr4parser().from_str("""
        ps_or_hierarchical_identifier ( attribute_instance )*
        ( LPAREN list_of_arguments RPAREN )?
    """)
    assert r.body[1].eq_relaxed(c1), r.body[1]
    del r.body[1]
def _extract_option_as_rule(r, rules, options_i: List[Tuple[int,
                                                            iAntlr4GramElem]],
                            new_rule_name):
    assert isinstance(r.body, Antlr4Selection)

    new_body = Antlr4Selection([])
    consumed = set()
    for i, ev in options_i:
        assert r.body[i].eq_relaxed(ev), (r.body[i], ev)
        new_body.append(r.body[i])
        consumed.add(i)

    body = [
        Antlr4Symbol(new_rule_name, False),
    ]
    for i, x in enumerate(r.body):
        if i not in consumed:
            body.append(x)

    if len(body) == 1:
        r.body = body[0]
    else:
        r.body = Antlr4Selection(body)

    if len(new_body) == 1:
        new_body = new_body[0]

    new_r = Antlr4Rule(new_rule_name, new_body)
    rules.insert(rules.index(r), new_r)
    return new_r
def _extract_common_from_sequences(a: Antlr4Sequence, b: Antlr4Sequence,
                                   common, extracted_rule_name: str):
    # try to find longes common sequence
    # if the sequence is more complex than 1 symbol
    # extract it to the new rule

    common_rule = Antlr4Sequence([x[0] for x in common])
    if len(common_rule) == 1:
        common_rule = common_rule[0]
    common_rule = Antlr4Rule(extracted_rule_name, common_rule)

    a_prefix, a_suffix = cut_off_subsequence(a, [x[0] for x in common])
    b_prefix, b_suffix = cut_off_subsequence(b, [x[1] for x in common])
    differs_in_suffix = a_suffix or b_suffix
    a_prefix_to_non_optional = not _is_optional(b_prefix) and not (
        differs_in_suffix)
    b_prefix_to_non_optional = not _is_optional(a_prefix) and not (
        differs_in_suffix)

    # if two sequences differs only in optional items
    # convert this items to non optinal variant
    # (because the variant wihout this items is the extracted rule)
    extract_common_from_sequences_from_part(a, a_prefix, a_suffix,
                                            extracted_rule_name,
                                            a_prefix_to_non_optional)

    extract_common_from_sequences_from_part(b, b_prefix, b_suffix,
                                            extracted_rule_name,
                                            b_prefix_to_non_optional)
    return a, b, common_rule
Exemplo n.º 6
0
def add_file_path_literal_rules(p):
    FILE_PATH_SPEC_CHAR = Antlr4Rule(
        "FILE_PATH_SPEC_CHAR",
        Antlr4Symbol("[^ !$`&()+] | ( '\\\\' [ !$`&*()+] )", True, True),
        is_fragment=True)
    p.rules.append(FILE_PATH_SPEC_CHAR)

    file_spec_path = Antlr4Rule(
        "FILE_PATH_SPEC",
        Antlr4Iteration(Antlr4Sequence([
            Antlr4Symbol("FILE_PATH_SPEC_CHAR", False),
            Antlr4Option(
                Antlr4Sequence([
                    Antlr4Symbol('SEMI', False),
                    Antlr4Symbol("FILE_PATH_SPEC_CHAR", False),
                ])),
        ]),
                        positive=True))
    p.rules.append(file_spec_path)
Exemplo n.º 7
0
def direct_left_recurse_rm(rules, rule_name):
    r = rule_by_name(rules, rule_name)

    if isinstance(r.body, Antlr4Selection):
        choices = r.body
    elif isinstance(r.body, Antlr4Sequence):
        choices = [
            r.body,
        ]
    else:
        raise NotImplementedError()

    # find choices which starts with this rule non terminal
    lr_choices = []
    for c in choices:
        if isinstance(c, Antlr4Sequence):
            first = next(iter_non_visuals(c))
            if isinstance(first, Antlr4Symbol) and first.symbol == rule_name:
                lr_choices.append(c)
        else:
            raise NotImplementedError()

    # remove choices which are causing left recursion
    assert len(lr_choices) >= 1, rule_name
    for lr_choice in lr_choices:
        choices.remove(lr_choice)

    if len(choices) == 0:
        raise NotImplementedError()
    elif len(choices) == 1:
        r.body = choices[0]

    # renaame this rule to rule_item
    r_base_name = r.name + "_item"
    for _r in rules:
        assert r.name != r_base_name, r_base_name
    r.name = r_base_name

    # create new rule which will implement removed choices and also expands to rule_item
    choices_new = Antlr4Selection([])
    for lr_choice in lr_choices:
        first = next(iter_non_visuals(lr_choice))
        assert isinstance(first, Antlr4Symbol) and first.symbol == rule_name
        repl = Antlr4Symbol(r_base_name, False)
        _iterate_everything_except_first_and_replace_first(lr_choice, repl)

        if not choices_new:
            lr_choice.insert(0, Antlr4Newline())
            lr_choice.insert(1, Antlr4Indent(1))
        choices_new.append(lr_choice)

    body_new = choices_new[0] if len(choices_new) == 1 else choices_new
    r_new = Antlr4Rule(rule_name, body_new)
    rules.insert(rules.index(r), r_new)
Exemplo n.º 8
0
def extract_bin_ops(rules, current_expr_rule, ops_to_extrat, new_rule_name,
                    handle_conditional_fn, handle_inside_fn):
    # find option with binary op rule
    # expr = rule_by_name(rules, "expression")
    ops_no_special = [
        o for o in ops_to_extrat if o not in [
            "KW_INSIDE",
            "KW_DIST",
            "QUESTIONMARK",
        ]
    ]

    bin_op_choices = []
    if len(ops_no_special) > 0:
        if len(ops_no_special) == 1:
            op = Antlr4Symbol(ops_no_special[0], False)
        else:
            op = Antlr4Selection(
                [Antlr4Symbol(o, False) for o in ops_no_special])

        # expression (binary_operator ( attribute_instance )* expression)*
        bin_op_choice = Antlr4Sequence([
            op,
            Antlr4Iteration(Antlr4Symbol("attribute_instance", False)),
            Antlr4Symbol(current_expr_rule.name, False)
        ])
        bin_op_choices.append(bin_op_choice)

    if "KW_INSIDE" in ops_to_extrat:
        handle_inside_fn(bin_op_choices, current_expr_rule)

    if "KW_DIST" in ops_to_extrat:
        # handled differently, only allowed on specified places
        pass

    if "QUESTIONMARK" in ops_to_extrat:
        handle_conditional_fn(bin_op_choices, current_expr_rule)

    for c in bin_op_choices:
        assert isinstance(c, iAntlr4GramElem), c
    # create a new rule which contains rule for extracted binary operators
    if len(bin_op_choices) > 1:
        new_body = Antlr4Selection(bin_op_choices)
    else:
        new_body = bin_op_choices[0]
    new_body = Antlr4Sequence([
        Antlr4Symbol(current_expr_rule.name, False),
        Antlr4Iteration(new_body)
    ])
    new_r = Antlr4Rule(new_rule_name, new_body)
    rules.insert(rules.index(current_expr_rule), new_r)

    return new_r
def add_string_literal_rules(p):
    string_char = Antlr4Rule(
        "ANY_ASCII_CHARACTERS",
        Antlr4Selection([
            Antlr4Symbol('~["\\\\\\r\\n]', True, True),
            Antlr4Symbol('\\\n', True),
            Antlr4Symbol('\\\r\n', True),
            Antlr4Sequence([
                Antlr4Symbol("\\", True),
                Antlr4Symbol('[nt\\\\"vfa%]', True, is_regex=True),
            ]),
            Antlr4Symbol("'\\\\' [0-9] [0-9]? [0-9]?", True, True),
            Antlr4Symbol("'\\\\' 'x' [0-9A-Fa-f] [0-9A-Fa-f]?", True, True),
        ]),
        is_fragment=True)
    p.rules.append(string_char)

    any_printable_ASCII_character_except_white_space = Antlr4Rule(
        "ANY_PRINTABLE_ASCII_CHARACTER_EXCEPT_WHITE_SPACE",
        Antlr4Symbol("'\\u0021'..'\\u007E'", True, True),
        is_fragment=True)
    p.rules.append(any_printable_ASCII_character_except_white_space)
def numbers_add_whitespace_after_base(rules):
    number_rules = set([
        "DECIMAL_NUMBER_WITH_BASE",
        "DECIMAL_INVALID_NUMBER_WITH_BASE",
        "DECIMAL_TRISTATE_NUMBER_WITH_BASE",
        "BINARY_NUMBER",
        "OCTAL_NUMBER",
        "HEX_NUMBER",
    ])
    number_base_rules = set([
        "DECIMAL_BASE",
        "BINARY_BASE",
        "OCTAL_BASE",
        "HEX_BASE",
    ])
    # used only in integral_number
    inline_rule(rules, "decimal_number")

    def opt_ws():
        return Antlr4Option(Antlr4Symbol("WHITE_SPACE", False))

    Antlr4Option(Antlr4Symbol("UNSIGNED_NUMBER", False)),

    for r in rules:
        if r.name in number_rules:
            # ( SIZE )? *_BASE ....
            assert r.body[0].body.symbol == "SIZE", r
            assert r.body[1].symbol.endswith("_BASE"), r
            del r.body[0]
            r.is_fragment = True

        elif r.name in number_base_rules:
            # APOSTROPHE ( [sS] )? [dD];
            r.body.insert(2, opt_ws())
            r.body.insert(1, opt_ws())
            r.body.append(opt_ws())
    any_based_number = Antlr4Rule(
        "ANY_BASED_NUMBER",
        Antlr4Selection([Antlr4Symbol(n, False) for n in number_rules]))
    rules.insert(rules.index(rule_by_name(rules, "HEX_NUMBER")),
                 any_based_number)

    integral_number = rule_by_name(rules, "integral_number")
    integral_number.body = Antlr4parser().from_str("""
    ( UNSIGNED_NUMBER )? ANY_BASED_NUMBER
    | UNSIGNED_NUMBER
    """)
Exemplo n.º 11
0
def extract_option_as_rule(rules, rule_name, options_i, new_rule_name):
    r = rule_by_name(rules, rule_name)
    assert isinstance(r.body, Antlr4Selection)
    new_body = Antlr4Selection([])
    for i in options_i:
        new_body.append(r.body[i])

    r.body[options_i[0]] = Antlr4Sequence(
        [Antlr4Symbol(new_rule_name, False),
         Antlr4Newline(),
         Antlr4Indent(1)])
    r.body = Antlr4Selection(
        [x for i, x in enumerate(r.body) if i not in options_i[1:]])

    if len(new_body) == 1:
        new_body = new_body[0]

    new_r = Antlr4Rule(new_rule_name, new_body)
    rules.insert(rules.index(r), new_r)
    return new_r
Exemplo n.º 12
0
def fix_SYSTEM_TF_IDENTIFIER(rules):
    kws = collect_keywords(rules)
    SYSTEM_TF_IDENTIFIER = Antlr4Symbol("SYSTEM_TF_IDENTIFIER", False)
    any_system_tf_identifier = Antlr4Symbol("any_system_tf_identifier", False)

    def match_replace_fn(o):
        if o == SYSTEM_TF_IDENTIFIER:
            return deepcopy(any_system_tf_identifier)

    for rule in rules:
        replace_item_by_sequence(rule, match_replace_fn)

    rules.append(
        Antlr4Rule(
            "any_system_tf_identifier",
            Antlr4Selection([
                SYSTEM_TF_IDENTIFIER, *[
                    Antlr4Symbol(kw.replace("$", "KW_DOLAR_").upper(), False)
                    for kw in kws if kw.startswith("$")
                ]
            ])))
Exemplo n.º 13
0
def extract_keywords_to_specific_rule(p: SvRule2Antlr4Rule):
    keywords = collect_keywords(p.rules)

    def get_kw_name(k):
        return "KW_" + k.replace("$", "DOLAR_").upper()

    def renamer(obj: iAntlr4GramElem):
        if isinstance(obj, Antlr4Symbol) and obj.is_terminal\
                and obj.symbol in keywords:
            obj.is_terminal = False
            obj.symbol = get_kw_name(obj.symbol)

    for r in p.rules:
        if not r.is_lexer_rule():
            r.walk(renamer)

    for k in sorted(keywords):
        kw_name = get_kw_name(k)
        kw_rule = Antlr4Rule(kw_name, Antlr4Symbol(k, True))
        p.rules.append(kw_rule)
        # if not re.match("^[A-Za-z0-9_]*$", k):
        #    print(k)
    return keywords
Exemplo n.º 14
0
def fix_implications(rules):
    """
    :note: variants of implications are as a independent tokens, otherwise lexer parses it as -= > instead of - =>
    """
    any_impl_rule = Antlr4Rule(
        "any_implication",
        Antlr4parser().from_str("IMPLIES | IMPLIES_P | IMPLIES_N"))
    orig = Antlr4parser().from_str("( polarity_operator )? IMPLIES")

    def apply_rewrite(o):
        if isinstance(o, Antlr4Sequence):
            found_i = None
            for i, o2 in enumerate(o):
                if o2.eq_relaxed(orig[0]) and o[i + 1].eq_relaxed(orig[1]):
                    found_i = i
                    break
            if found_i is not None:
                del o[found_i + 1]
                o[found_i] = Antlr4Symbol(any_impl_rule.name, False)

    for r in rules:
        replace_item_by_sequence(r, apply_rewrite)

    rules.append(any_impl_rule)
Exemplo n.º 15
0
def wrap_in_lexer_mode(rules, mode_name, enter_tokens, exit_tokens, tokens,
                       shared_tokens):
    for enter_token in enter_tokens:
        enter_rule = rule_by_name(rules, enter_token)
        enter_rule.lexer_actions.append(Antlr4LexerAction.pushMode(mode_name))

    for t_name in sorted(tokens.union(shared_tokens)):
        t_rule = rule_by_name(rules, t_name)
        if t_name in shared_tokens:
            # copy the rule
            # translate mode specific token to a original token
            actions = deepcopy(t_rule.lexer_actions)
            if not Antlr4LexerAction.skip() in actions:
                actions.append(Antlr4LexerAction.type(t_name))
            mode_specific_t_rule = Antlr4Rule(mode_name + "_" + t_name,
                                              deepcopy(t_rule.body),
                                              lexer_mode=mode_name,
                                              lexer_actions=actions)
            rules.append(mode_specific_t_rule)
            t_rule = mode_specific_t_rule

        t_rule.lexer_mode = mode_name
        if t_name in sorted(exit_tokens):
            t_rule.lexer_actions.append(Antlr4LexerAction.popMode())
def selection_extract_common(rules, rule_name_a, rule_name_b, new_rule_name):
    """
    a0: a b c
    b0: a b d
    -> 
    new_rule_name: a b
    a0: new_rule_name c
    b0: new_rule_name d
    """

    a = rule_by_name(rules, rule_name_a)
    b = rule_by_name(rules, rule_name_b)
    assert isinstance(a.body, Antlr4Sequence), a
    assert isinstance(b.body, Antlr4Sequence), b
    i = 0
    for i, (_a, _b) in enumerate(zip(a.body, b.body)):
        if _a != _b:
            break
    assert i > 1
    body = Antlr4Sequence(a.body[:i])
    c = Antlr4Rule(new_rule_name, body)
    rules.insert(rules.index(a), c)
    a.body[:i] = [Antlr4Symbol(new_rule_name, False)]
    b.body[:i] = [Antlr4Symbol(new_rule_name, False)]
Exemplo n.º 17
0
                                    Antlr4Indent(1)]),
                    Antlr4Sequence([b, Antlr4Newline()]),
                ])

    HEADER = """/*
 * Grammar extracted from the VHDL 1993, 2002, 2008, 2018 standard and then merged together
 * (the standard is selected by parser property)
 */

grammar vhdl;
"""
    with open("vhdl.g4", "w") as f:
        f.write("\n\n")
        f.write(HEADER)
        for kw in keywords:
            r = Antlr4Rule(kw,
                           Antlr4Sequence([Antlr4Symbol(k, True) for k in kw]))
            f.write(r.toAntlr4())
            f.write("\n")

        for k, v in VhdlRule2Antlr4Rule.SPEC_SYMB.items():
            if k == "'":
                k = "'\\''"
            elif k == "\\":
                k = "'\\\\'"
            else:
                k = "'%s'" % k
            r = Antlr4Rule(v, Antlr4Symbol(k, True))
            f.write(r.toAntlr4())
            f.write("\n")

        with open("vhdl_base.g4") as b:
Exemplo n.º 18
0
def optimize_class_scope(rules):
    p = Antlr4parser()
    to_replace0 = p.from_str("( package_scope | class_scope )? identifier")
    to_replace1 = p.from_str("( class_scope | package_scope )? identifier")

    package_or_class_scoped_id = Antlr4Rule("package_or_class_scoped_id", p.from_str(
        """( identifier ( parameter_value_assignment )? | KW_DOLAR_UNIT )
           ( DOUBLE_COLON identifier ( parameter_value_assignment )? )*"""))
    rules.append(package_or_class_scoped_id)

    def match_replace_fn_reduce_1_item_sequence(o):
        if isinstance(o, Antlr4Sequence) and len(o) == 1:
            return o[0]

    q0 = Antlr4Query(to_replace0)
    q1 = Antlr4Query(to_replace1)

    for r in rules:
        replace_item_by_sequence(r, match_replace_fn_reduce_1_item_sequence)
        # if r.name == "net_type_declaration":
        #     print(r.toAntlr4())
        m = q0.match(r.body)
        if not m:
            m = q1.match(r.body)
        if m:

            def apply_to_replace0_and_1(o):
                for match in m:
                    v = match.get(id(o), None)
                    if v is not None:
                        del match[id(o)]
                        if (v is to_replace0
                             or v is to_replace1
                             or (isinstance(v, Antlr4Symbol) and v.symbol == "identifier")):
                            return Antlr4Symbol(package_or_class_scoped_id.name, False)
                        else:
                            return Antlr4Sequence([])

            replace_item_by_sequence(r, apply_to_replace0_and_1)
            for _m in m:
                # assert that all matching items were replaced
                assert not _m
        #    print(r.toAntlr4())
        #    print(m)
        # else:
        #     if "package_scope | class_scope" in r.toAntlr4() or "class_scope | package_scope" in r.toAntlr4():
        #         print("not found " + r.toAntlr4())

    # class_qualifier:
    #   ( KW_LOCAL DOUBLE_COLON )? ( implicit_class_handle DOT 
    #                            | class_scope 
    #                            )?;
    # class_scope:
    #     ps_identifier ( parameter_value_assignment )? 
    #     ( DOUBLE_COLON identifier 
    #       ( parameter_value_assignment )?
    #     )* DOUBLE_COLON;
    # implicit_class_handle:
    #     KW_THIS ( DOT KW_SUPER )? 
    #      | KW_SUPER 
    # ;
    # package_scope:
    #  ( KW_DOLAR_UNIT 
    #    | identifier 
    #  ) DOUBLE_COLON;
    # hierarchical_identifier: ( KW_DOLAR_ROOT DOT )? ( identifier constant_bit_select DOT )* identifier;
    to_replace2 = p.from_str("( class_qualifier | package_scope )? hierarchical_identifier")
    package_or_class_scoped_path = Antlr4Rule("package_or_class_scoped_path", p.from_str("""
        ( KW_LOCAL DOUBLE_COLON )?
        ( 
          KW_DOLAR_ROOT
          | implicit_class_handle
          | ( 
              ( 
                  KW_DOLAR_UNIT  
                | identifier ( parameter_value_assignment )? 
              )
              ( DOUBLE_COLON identifier ( parameter_value_assignment )? )*
           )
        )
    """))
    package_or_class_scoped_hier_id_with_const_select = Antlr4Rule(
        "package_or_class_scoped_hier_id_with_const_select",
        p.from_str("""
            package_or_class_scoped_path
            ( constant_bit_select )* ( DOT identifier ( constant_bit_select )* )*
    """))

    # bit_select:
    #  LSQUARE_BR expression RSQUARE_BR;
    # select:
    #  ( DOT identifier 
    #   | bit_select 
    #   )* ( LSQUARE_BR part_select_range RSQUARE_BR )?;
    # part_select_range:
    #  constant_range 
    #   | indexed_range 
    #  ;
    # indexed_range:
    #  expression ( PLUS 
    #               | MINUS 
    #               ) COLON constant_expression;
    # constant_range:
    #  constant_expression COLON constant_expression;

    package_or_class_scoped_hier_id_with_select = Antlr4Rule(
        "package_or_class_scoped_hier_id_with_select",
        p.from_str("""
            package_or_class_scoped_path
            ( bit_select )* ( DOT identifier ( bit_select )* )*
            ( LSQUARE_BR expression ( PLUS | MINUS )? COLON constant_expression RSQUARE_BR )?
    """))
    rules.append(package_or_class_scoped_path)
    rules.append(package_or_class_scoped_hier_id_with_const_select)
    rules.append(package_or_class_scoped_hier_id_with_select)
    primary_no_cast_no_call = rule_by_name(rules, "primary_no_cast_no_call")
    m = Antlr4Query(to_replace2).match(primary_no_cast_no_call.body)

    def apply_to_replace2(o):
        for match in m:
            v = match.get(id(o), None)
            if v is not None:
                if (v is to_replace2
                        or (isinstance(v, Antlr4Symbol)
                            and v.symbol == "hierarchical_identifier")):
                    return Antlr4Symbol(package_or_class_scoped_hier_id_with_const_select.name, False)
                else:
                    return Antlr4Sequence([])

    replace_item_by_sequence(primary_no_cast_no_call, apply_to_replace2)

    _optimize_ps_type_identifier(rules)
    _optimize_ps_parameter_identifier(rules)
    rules.remove(rule_by_name(rules, "class_qualifier"))
Exemplo n.º 19
0
 def parse_rule(self, rule: Proto_ruleContext):
     name = rule.NAME().getText()
     body = self.parse_element(rule.element())
     return Antlr4Rule(name, body)
Exemplo n.º 20
0
    def test_optimize0(self):
        # data_type rule
        r_str = """
            a ( b )? ( c )* 
             | d ( b )? 
             | e 
             | f ( kw0 ( b )? )? kw1 f0 ( f0 )* kw2 ( c )* 
             | kw3 ( a0 )? kw1 a1 ( kw4 a1 )* kw2 ( c )* 
             | kw5 
             | kw6 
             | kw7 ( kw8 )? a2 ( a3 )? ( kw8 a2 )? 
             | ( a4 | a5 )? a2 ( c )* 
             | a6
             | kw9 
             | a7
             | a8
        """
        exp0 = """
             ( a ( b )? 
               | f ( kw0 ( b )? )? kw1 f0 ( f0 )* kw2
               | kw3 ( a0 )? kw1 a1 ( kw4 a1 )* kw2
               | ( a4 | a5 )? a2
             ) ( c )* 
             | d ( b )? 
             | e 
             | kw5 
             | kw6 
             | kw7 ( kw8 )? a2 ( a3 )? ( kw8 a2 )? 
             | a6
             | kw9 
             | a7
             | a8
        """
        r = Antlr4parser().from_str(r_str)
        _selection_options_to_sequnces(r)
        res, _ = _selection_share_suffix(r)
        self.assertTextEq(exp0, res.toAntlr4())

        exp1 = """
            a ( b )?
            | ( f ( kw0 ( b )? )? kw1 f0 ( f0 )*
                | kw3 ( a0 )? kw1 a1 ( kw4 a1 )* 
              ) kw2
            | ( a4 | a5 )? a2
        """
        _selection_options_to_sequnces(r[0][0])
        res, _ = _selection_share_suffix(r[0][0])
        self.assertTextEq(exp1, res.toAntlr4())
        exp2 = """
             ( a ( b )?
              | ( f ( kw0 ( b )? )? kw1 f0 ( f0 )*
                  | kw3 ( a0 )? kw1 a1 ( kw4 a1 )* 
                ) kw2
              | ( a4 | a5 )? a2
             ) ( c )* 
             | d ( b )? 
             | e 
             | kw5 
             | kw6 
             | kw7 ( kw8 )? a2 ( a3 )? ( kw8 a2 )? 
             | a6
             | kw9 
             | a7
             | a8
        """
        r = Antlr4parser().from_str(r_str)
        Antlr4GenericOptimizer().optimize([
            Antlr4Rule("tmp", r),
        ])
        self.assertTextEq(exp2, r.toAntlr4())
def remove_useless_and_normalize_names(p):
    renames = {}
    for k, v in SvRule2Antlr4Rule.SPEC_SYMB.items():
        renames[k] = v
    # rm_newline_from_simple_rules(p.rules)
    # nts = get_used_non_terminals(p.rules)
    # def_nts = get_defined_non_terminals(p.rules)

    # overspecified
    # finish_number 0 - 2
    replace_rule("finish_number", "UNSIGNED_NUMBER", renames, p)

    # scalar_constant 1b number
    replace_rule("scalar_constant", "integral_number", renames, p)

    # init_val 1b value
    replace_rule("init_val", "integral_number", renames, p)

    # edge_descriptor 2 tristate digits
    # edge_descriptor: '01' | '10' | Z_OR_X ZERO_OR_ONE | ZERO_OR_ONE Z_OR_X;
    # dpi_spec_string  two concrete strings
    replace_rule("dpi_spec_string", "STRING_LITERAL", renames, p)

    # #0 -> # UNSIGNED_NUMBER
    primitive_delay = Antlr4Rule(
        "primitive_delay",
        Antlr4Sequence([
            Antlr4Symbol("HASH", False),
            Antlr4Symbol("UNSIGNED_NUMBER", False),
        ]))
    p.rules.append(primitive_delay)
    replace_rule("#0", "primitive_delay", renames, p)

    # all same
    ps_identifier_rules = [
        "ps_class_identifier",
        "ps_covergroup_identifier",
        "ps_checker_identifier",
    ]
    for name in ps_identifier_rules:
        replace_rule(name, "ps_identifier", renames, p)

    ps_or_hierarchical_id_rules = [
        "ps_or_hierarchical_net_identifier",
        "ps_or_hierarchical_property_identifier",
        "ps_or_hierarchical_sequence_identifier",
        "ps_or_hierarchical_tf_identifier",
    ]

    ps_or_hierarchical_identifier = Antlr4Rule(
        "ps_or_hierarchical_identifier",
        Antlr4Selection([
            Antlr4Sequence([
                Antlr4Symbol("package_scope", False),
                Antlr4Symbol("identifier", False)
            ]),
            # can be only identifier
            Antlr4Symbol("hierarchical_identifier", False),
        ]))
    p.rules.append(ps_or_hierarchical_identifier)
    for name in ps_or_hierarchical_id_rules:
        replace_rule(name, "ps_or_hierarchical_identifier", renames, p)

    to_lexer = [
        "c_identifier",
        "unsigned_number",
        "simple_identifier",
        "system_tf_identifier",
        "unsigned_number",
        "string_literal",
        "binary_number",
        "octal_number",
        "hex_number",
        "octal_number",
        "hex_number",
        "fixed_point_number",
        "escaped_identifier",
        "unbased_unsized_literal",
        "time_literal",

        # because it is very hard to switch mode to parse
        # edge_descriptor and it is easy to just parse coma separated list of 2 chars
        "edge_control_specifier",
        "level_symbol",
        "output_symbol",
        "edge_symbol",
        "file_path_spec",
    ]
    for tl in to_lexer:
        renames[tl] = tl.upper()

    fragments = {
        "binary_value", "octal_value", "hex_value", "decimal_base",
        "binary_base", "octal_base", "hex_base", "non_zero_unsigned_number",
        "size", "sign", "edge_descriptor", "non_zero_decimal_digit",
        "decimal_digit", "binary_digit", "octal_digit", "hex_digit", "x_digit",
        "z_digit", "exp", 'white_space', 'zero_or_one', 'z_or_x',
        'Any_ASCII_Characters',
        "any_printable_ASCII_character_except_white_space", "time_unit"
    }

    for r in p.rules:
        if r.name.startswith("$"):
            renames[r.name] = r.name.replace("$", "dolar_")
        for fr in fragments:
            if r.name in fragments:
                r.is_fragment = True
                renames[fr] = fr.upper()

    identifier_rule_equivalents = {
        r.name
        for r in collect_simple_rules(p.rules, "identifier")
    }
    hierarchical_identifier_rule_equivalents = {
        r.name
        for r in collect_simple_rules(p.rules, "hierarchical_identifier")
    }

    to_remove = {
        "comment",
        "one_line_comment",
        "block_comment",
        "comment_text",
        "white_space",

        # libary rules
        "library_text",
        "library_description",
        "library_declaration",
        "include_statement",
        "file_path_spec",
        "file_path_spec",
    }
    to_remove.update(identifier_rule_equivalents)
    to_remove.update(hierarchical_identifier_rule_equivalents)
    simple_rules_to_remove = [
        "default_clause",  # default kw
        "variable_port_type",
        "limit_value",  # used only in more specific limit values
        "dpi_function_proto",  # used only in dpi block so we already know
        "dpi_task_proto",  # used only in dpi block so we already know
        "property_lvar_port_direction",  # used only in property so we already know
        # "consecutive_repetition",  # useless
        "trans_item",
        "ordered_parameter_assignment",
        "function_statement",
        "case_expression",
        "case_item_expression",
        "open_value_range",  # used only in open_range_list so we already know
        "constant_assignment_pattern_expression",  # parser do not see the difference between const/non const
        "clockvar",  # used only in clockvar_expression
        "path_delay_expression",  # used only in more specific rules
        "constant_function_call",  # parser do not see the difference between const/non const
        "function_subroutine_call",
        "constant_let_expression",  # parser do not see the difference between const/non const
        "attr_name",  # used only in attr_spec
        "array_identifier",  # never used
        "checker_identifier",  # used only in rule with same name
        "class_identifier",
        "class_variable_identifier",
        "clocking_identifier",
        "config_identifier",
        "const_identifier",
        "constraint_identifier",
        "covergroup_identifier",
        "covergroup_variable_identifier",
        "cover_point_identifier",
        "cross_identifier",
        "enum_identifier",
        "formal_identifier",
        "function_identifier",
        "generate_block_identifier",
        "genvar_identifier",
        "hierarchical_array_identifier",
        "hierarchical_block_identifier",
        "hierarchical_event_identifier",
        "hierarchical_net_identifier",
        "hierarchical_parameter_identifier",
        "hierarchical_property_identifier",
        "hierarchical_sequence_identifier",
        "hierarchical_task_identifier",
        "hierarchical_tf_identifier",
        "hierarchical_variable_identifier",
        "index_variable_identifier",
        "interface_identifier",
        "interface_instance_identifier",
        # "inout_port_identifier",
        # "input_port_identifier",
        "instance_identifier",
        "member_identifier",
        "method_identifier",
        "modport_identifier",
        "module_identifier",
        "net_identifier",
        # "output_port_identifier"
        "package_identifier",
        "parameter_identifier",
        "port_identifier",
        "production_identifier",
        "program_identifier",
        "property_identifier",
        "sequence_identifier",
        "signal_identifier",
        "specparam_identifier",
        "task_identifier",
        "tf_identifier",
        "terminal_identifier",
        "topmodule_identifier",
        "udp_identifier",
        "variable_identifier",
        "let_identifier",
        "type_identifier",

        # covergroup_expression
        "with_covergroup_expression",
        "set_covergroup_expression",
        "integer_covergroup_expression",
        "cross_set_expression",
        "data_event",
        "reference_event",
    ]
    for sr in simple_rules_to_remove:
        remove_simple_rule(sr, p)
    p.rules = [r for r in p.rules if r.name not in to_remove]

    for idname in identifier_rule_equivalents:
        renames[idname] = "identifier"

    for idname in hierarchical_identifier_rule_equivalents:
        renames[idname] = "hierarchical_identifier"

    apply_rename = generate_renamer(renames, True)
    for r in p.rules:
        r.walk(apply_rename)
        r.walk(mark_regex)

    for k, v in SvRule2Antlr4Rule.SPEC_SYMB.items():
        body = Antlr4Symbol(k, True)
        r = Antlr4Rule(v, body)
        if k in ['"', "_"]:
            r.is_fragment = True
        p.rules.append(r)

    # because C_IDENTIFIER is just normal identifier without $ and can match identifiers
    identifier = rule_by_name(p.rules, "identifier")
    identifier.body.insert(0, Antlr4Symbol("C_IDENTIFIER", False))

    kws = collect_keywords(p.rules)
    for kw in kws:
        if kw not in IEEE1800_2017_KEYWORDS and kw != "1step" and "$" not in kw:
            identifier.body.append(Antlr4Symbol("KW_" + kw.upper(), False))
Exemplo n.º 22
0
def rule_from_str(rule_str):
    name, body = rule_str.split(":")
    return Antlr4Rule(name.strip(), Antlr4parser().from_str(body))