def fix_subroutine_call(rules): r = rule_by_name(rules, "subroutine_call") r.body.insert( 0, Antlr4Sequence([ Antlr4Option(Antlr4Symbol("class_qualifier", False)), Antlr4Symbol("method_call_body", False) ]))
def add_comments_and_ws(rules): # ONE_LINE_COMMENT: '//' .*? '\\r'? '\\n' -> channel(HIDDEN); olc = Antlr4Rule("ONE_LINE_COMMENT", Antlr4Sequence([ Antlr4Symbol("//", True), Antlr4Symbol(".*?", True, is_regex=True), Antlr4Option(Antlr4Symbol("\r", True)), Antlr4Selection([ Antlr4Symbol("\n", True), Antlr4Symbol("EOF", False), ]) ]), lexer_actions=[Antlr4LexerAction.channel("HIDDEN")]) rules.append(olc) # BLOCK_COMMENT: '/*' .*? '*/' -> channel (HIDDEN); bc = Antlr4Rule("BLOCK_COMMENT", Antlr4Sequence([ Antlr4Symbol("/*", True), Antlr4Symbol(".*?", True, is_regex=True), Antlr4Symbol("*/", True), ]), lexer_actions=[Antlr4LexerAction.channel("HIDDEN")]) rules.append(bc) # WHITE_SPACE: [ \\t\\n\\r] + -> skip; ws = Antlr4Rule("WHITE_SPACE", Antlr4Sequence([ Antlr4Symbol("[ \\t\\n\\r] +", True, is_regex=True), ]), lexer_actions=[Antlr4LexerAction.channel("HIDDEN")]) rules.append(ws)
def parse_element_text(self, ctx: Element_textContext) -> Antlr4Symbol: """ element_text: NAME | TERMINAL; """ n = ctx.NAME() if n is not None: return Antlr4Symbol(n.getText(), False) else: n = ctx.TERMINAL().getText() n = n[len("<b>"):-len("</b>")] return Antlr4Symbol(n, True)
def rm_ambiguity(rules): rule = rule_by_name(rules, "variable_decl_assignment") to_repl = Antlr4Option( Antlr4Sequence( [Antlr4Symbol("ASSIGN", False), Antlr4Symbol("class_new", False)])) def match_replace_fn(o): if o == to_repl: return o.body replace_item_by_sequence(rule, match_replace_fn)
def extract_bin_ops(rules, current_expr_rule, ops_to_extrat, new_rule_name, handle_conditional_fn, handle_inside_fn): # find option with binary op rule # expr = rule_by_name(rules, "expression") ops_no_special = [ o for o in ops_to_extrat if o not in [ "KW_INSIDE", "KW_DIST", "QUESTIONMARK", ] ] bin_op_choices = [] if len(ops_no_special) > 0: if len(ops_no_special) == 1: op = Antlr4Symbol(ops_no_special[0], False) else: op = Antlr4Selection( [Antlr4Symbol(o, False) for o in ops_no_special]) # expression (binary_operator ( attribute_instance )* expression)* bin_op_choice = Antlr4Sequence([ op, Antlr4Iteration(Antlr4Symbol("attribute_instance", False)), Antlr4Symbol(current_expr_rule.name, False) ]) bin_op_choices.append(bin_op_choice) if "KW_INSIDE" in ops_to_extrat: handle_inside_fn(bin_op_choices, current_expr_rule) if "KW_DIST" in ops_to_extrat: # handled differently, only allowed on specified places pass if "QUESTIONMARK" in ops_to_extrat: handle_conditional_fn(bin_op_choices, current_expr_rule) for c in bin_op_choices: assert isinstance(c, iAntlr4GramElem), c # create a new rule which contains rule for extracted binary operators if len(bin_op_choices) > 1: new_body = Antlr4Selection(bin_op_choices) else: new_body = bin_op_choices[0] new_body = Antlr4Sequence([ Antlr4Symbol(current_expr_rule.name, False), Antlr4Iteration(new_body) ]) new_r = Antlr4Rule(new_rule_name, new_body) rules.insert(rules.index(current_expr_rule), new_r) return new_r
def subroutine_call_rm_lr(rules): r = rule_by_name(rules, "subroutine_call") assert isinstance(r.body, Antlr4Selection) c = r.body[2] _body = list(iter_non_visuals(c)) assert _body[-1].symbol == "method_call_body", _body[-1].symbol start: Antlr4Selection = _body[0] start.clear() start.extend([ Antlr4Symbol("primary_no_cast_no_call", False), Antlr4Symbol("cast", False), Antlr4Symbol("implicit_class_handle", False) ])
def add_interface_class_declaration(rules): """ Because interface_class_definition is not used anywhere (is missing in specified rules) """ intf = Antlr4Symbol("interface_class_declaration", False) cls = Antlr4Symbol("class_declaration", False) def match_replace_fn(o): if o == cls: return Antlr4Selection([o, deepcopy(intf)]) for rule in rules: replace_item_by_sequence(rule, match_replace_fn)
def handle_inside_fn(bin_op_choices, current_expr_rule): bin_op_choices[-1].extend([Antlr4Newline(), Antlr4Indent(1)]) # expression (KW_INSIDE LBRACE open_range_list RBRACE)*; bin_op_choice = Antlr4Sequence([ Antlr4Symbol(current_expr_rule.name, False), Antlr4Iteration( Antlr4Sequence([ Antlr4Symbol("KW_INSIDE", False), Antlr4Symbol("LBRACE", False), Antlr4Symbol("open_range_list", False), Antlr4Symbol("RBRACE", False), ])) ]) bin_op_choices.append(bin_op_choice)
def fix_implicit_data_type(rules): r = rule_by_name(rules, "implicit_data_type") # : (signing)? (packed_dimension)* # -> # : signing (packed_dimension)* # | (packed_dimension)+ # ; r.body = Antlr4Selection([ Antlr4Sequence([ Antlr4Symbol("signing", False), Antlr4Iteration(Antlr4Symbol("packed_dimension", False)) ]), Antlr4Iteration(Antlr4Symbol("packed_dimension", False), positive=True) ])
def fix_priority_of__class_scope__package_scope(rules): orig = Antlr4Selection([ Antlr4Symbol("class_scope", False), Antlr4Symbol("package_scope", False) ]) repl = Antlr4Selection([ Antlr4Symbol("package_scope", False), Antlr4Symbol("class_scope", False) ]) def match_replace_fn(o): if o == orig: return deepcopy(repl) for rule in rules: replace_item_by_sequence(rule, match_replace_fn)
def fix_lexer_for_table_def(p): # because OUTPUT_SYMBOL is a special case of LEVEL_SYMBOL OUTPUT_SYMBOL = Antlr4Symbol("OUTPUT_SYMBOL", False) def OUTPUT_SYMBOL_to_LEVEL_SYMBOL(o): if o == OUTPUT_SYMBOL: o.symbol = "LEVEL_SYMBOL" for r in p.rules: r.body.walk(OUTPUT_SYMBOL_to_LEVEL_SYMBOL) p.rules.remove(rule_by_name(p.rules, "OUTPUT_SYMBOL")) table_tokens = get_all_used_lexer_tokens(p.rules, "combinational_body") table_tokens2 = get_all_used_lexer_tokens(p.rules, "sequential_entry") table_tokens = table_tokens.union(table_tokens2) # [TODO] += comments, whitespaces table_tokens.remove("KW_TABLE") table_shared_tokens = { 'SEMI', 'RPAREN', 'COLON', 'LPAREN', 'MINUS', *COMMENT_AND_WS_TOKENS } wrap_in_lexer_mode(p.rules, "TABLE_MODE", { "KW_TABLE", }, { "KW_ENDTABLE", }, table_tokens, table_shared_tokens)
def split_rule(rules, rule_name, symbols_to_extract: List[str], subrule_name: str): """ Let only options which are starting with symbols from symbols_to_extract. Put the rest to a subrule. """ r = rule_by_name(rules, rule_name) assert isinstance(r.body, Antlr4Selection), r sub_options = Antlr4Selection([]) for o in r.body: start_symbols = set() _direct_left_corner(o, start_symbols, allow_eps_in_sel=True) if not start_symbols.intersection(symbols_to_extract): sub_options.append(o) r.body = Antlr4Selection([o for o in r.body if not (o in sub_options)]) r.body.insert(0, Antlr4Symbol(subrule_name, False)) if len(r.body) == 1: r.body = r.body[0] assert len(sub_options) > 0 if len(sub_options) == 1: sub_options = sub_options[0] else: sub_options = Antlr4Selection(sub_options) sub_r = Antlr4Rule(subrule_name, sub_options) rules.insert(rules.index(r), sub_r) return sub_r
def optimise_subroutine_call(rules): r = rule_by_name(rules, "subroutine_call") Antlr4GenericOptimizer().optimize([ r, ]) c0 = Antlr4parser().from_str(""" ( class_qualifier | ( primary | implicit_class_handle ) DOT )? ( identifier ( attribute_instance )* ( LPAREN list_of_arguments RPAREN )? | array_method_name ( attribute_instance )* ( LPAREN list_of_arguments RPAREN )? ( KW_WITH LPAREN expression RPAREN )? | randomize_call ) """) assert r.body[0].eq_relaxed(c0), r.body[0] subroutine_call_args = Antlr4Rule( "subroutine_call_args", Antlr4parser().from_str(""" ( attribute_instance )* ( LPAREN list_of_arguments RPAREN )? ( KW_WITH LPAREN expression RPAREN )? """)) rules.insert(rules.index(r), subroutine_call_args) new_c0 = Antlr4parser().from_str(""" ( primary_no_cast_no_call | cast ) subroutine_call_args ( DOT ( array_method_name | randomize_call | primary_no_cast_no_call | cast ) subroutine_call_args )* """) r.body[0] = new_c0 primary = rule_by_name(rules, "primary") assert primary.body[0].eq_relaxed( Antlr4Symbol("primary_no_cast_no_call", False)) del primary.body[0] c2 = Antlr4parser().from_str(""" any_system_tf_identifier ( LPAREN ( list_of_arguments | data_type ( COMMA expression )? | expression ( COMMA ( expression )? )* ( COMMA ( clocking_event )? )? ) RPAREN )? """) assert r.body[2].eq_relaxed(c2) r.body[2] = Antlr4parser().from_str(""" any_system_tf_identifier ( LPAREN ( ( data_type )? list_of_arguments ( COMMA clocking_event )? ) RPAREN )? """) c1 = Antlr4parser().from_str(""" ps_or_hierarchical_identifier ( attribute_instance )* ( LPAREN list_of_arguments RPAREN )? """) assert r.body[1].eq_relaxed(c1), r.body[1] del r.body[1]
def extract_common_from_sequences_from_part(part, prefix, suffix, extracted_rule_name, prefix_to_non_optional): part.clear() if prefix: prefix_choices = [] prefix_optional = False for p in _sequence_expand_optionality(prefix): if p: prefix_choices.append(p) else: prefix_optional = True if len(prefix_choices) > 1: prefix_choices = Antlr4Selection(prefix_choices) else: prefix_choices = prefix_choices[0] if prefix_to_non_optional and prefix_optional: # because if the parts do not have different suffix # the prefix is there to distinguish between them prefix_choices = Antlr4Option(prefix_choices) part.append(prefix_choices) part.append(Antlr4Symbol(extracted_rule_name, False)) if suffix: part.extend(suffix)
def match_replace_fn(o): if isinstance(o, Antlr4Selection): char_symb_to_replace = [] for orig_c in o: c = orig_c if isinstance(c, Antlr4Sequence): if len(c) > 1: continue else: c = c[0] if isinstance(c, Antlr4Symbol) and c.is_terminal and len( c.symbol) == 1: char_symb_to_replace.append((orig_c, c)) if len(char_symb_to_replace) > 1: # build an regex out of them # and replace them by the regex for c, _ in char_symb_to_replace: o.remove(c) re_str = "[%s]" % ("".join( [c._escaped() for _, c in char_symb_to_replace])) re = Antlr4Symbol(re_str, True, is_regex=True) if len(o): o.append(re) else: return Antlr4Sequence([ re, ])
def _extract_option_as_rule(r, rules, options_i: List[Tuple[int, iAntlr4GramElem]], new_rule_name): assert isinstance(r.body, Antlr4Selection) new_body = Antlr4Selection([]) consumed = set() for i, ev in options_i: assert r.body[i].eq_relaxed(ev), (r.body[i], ev) new_body.append(r.body[i]) consumed.add(i) body = [ Antlr4Symbol(new_rule_name, False), ] for i, x in enumerate(r.body): if i not in consumed: body.append(x) if len(body) == 1: r.body = body[0] else: r.body = Antlr4Selection(body) if len(new_body) == 1: new_body = new_body[0] new_r = Antlr4Rule(new_rule_name, new_body) rules.insert(rules.index(r), new_r) return new_r
def numbers_add_whitespace_after_base(rules): number_rules = set([ "DECIMAL_NUMBER_WITH_BASE", "DECIMAL_INVALID_NUMBER_WITH_BASE", "DECIMAL_TRISTATE_NUMBER_WITH_BASE", "BINARY_NUMBER", "OCTAL_NUMBER", "HEX_NUMBER", ]) number_base_rules = set([ "DECIMAL_BASE", "BINARY_BASE", "OCTAL_BASE", "HEX_BASE", ]) # used only in integral_number inline_rule(rules, "decimal_number") def opt_ws(): return Antlr4Option(Antlr4Symbol("WHITE_SPACE", False)) Antlr4Option(Antlr4Symbol("UNSIGNED_NUMBER", False)), for r in rules: if r.name in number_rules: # ( SIZE )? *_BASE .... assert r.body[0].body.symbol == "SIZE", r assert r.body[1].symbol.endswith("_BASE"), r del r.body[0] r.is_fragment = True elif r.name in number_base_rules: # APOSTROPHE ( [sS] )? [dD]; r.body.insert(2, opt_ws()) r.body.insert(1, opt_ws()) r.body.append(opt_ws()) any_based_number = Antlr4Rule( "ANY_BASED_NUMBER", Antlr4Selection([Antlr4Symbol(n, False) for n in number_rules])) rules.insert(rules.index(rule_by_name(rules, "HEX_NUMBER")), any_based_number) integral_number = rule_by_name(rules, "integral_number") integral_number.body = Antlr4parser().from_str(""" ( UNSIGNED_NUMBER )? ANY_BASED_NUMBER | UNSIGNED_NUMBER """)
def add_predicate(r, std): # The Definitive ANTLR 4 Reference, p 194, in lexer predicate should be on right side r.body = Antlr4Sequence([ r.body, Antlr4Symbol("{language_version >= hdlConvertor::Language::%s}?" % std, True, is_regex=True) ])
def match_replace_fn(o: iAntlr4GramElem): if o == c_id: return Antlr4Selection([ o.body, Antlr4Sequence([ Antlr4Symbol("{%s != COLON}?" % la1, True, True), ]) ])
def match_replace_fn(o: iAntlr4GramElem): if isinstance(o, Antlr4Option) and isinstance(o.body, Antlr4Sequence): if o.body[0] == kw_else: return Antlr4Selection([ o.body, Antlr4Sequence([ Antlr4Symbol("{%s != KW_ELSE}?" % la1, True, True), ]) ])
def apply_rewrite(o): if isinstance(o, Antlr4Sequence): found_i = None for i, o2 in enumerate(o): if o2.eq_relaxed(orig[0]) and o[i + 1].eq_relaxed(orig[1]): found_i = i break if found_i is not None: del o[found_i + 1] o[found_i] = Antlr4Symbol(any_impl_rule.name, False)
def add_file_path_literal_rules(p): FILE_PATH_SPEC_CHAR = Antlr4Rule( "FILE_PATH_SPEC_CHAR", Antlr4Symbol("[^ !$`&()+] | ( '\\\\' [ !$`&*()+] )", True, True), is_fragment=True) p.rules.append(FILE_PATH_SPEC_CHAR) file_spec_path = Antlr4Rule( "FILE_PATH_SPEC", Antlr4Iteration(Antlr4Sequence([ Antlr4Symbol("FILE_PATH_SPEC_CHAR", False), Antlr4Option( Antlr4Sequence([ Antlr4Symbol('SEMI', False), Antlr4Symbol("FILE_PATH_SPEC_CHAR", False), ])), ]), positive=True)) p.rules.append(file_spec_path)
def optimize_item_rules(rules): for r in ["package_or_generate_item_declaration", "module_or_generate_item", "module_or_generate_item_declaration", "module_common_item", "interface_or_generate_item", "checker_or_generate_item_declaration", ]: inline_rule(rules, r) generate_item = rule_by_name(rules, "generate_item") assert generate_item.body[-1].eq_relaxed(Antlr4Symbol("checker_or_generate_item", False)) generate_item.body[-1] = Antlr4parser().from_str("KW_RAND data_declaration") generate_item.body.append(Antlr4parser().from_str("program_generate_item"))
def apply_to_replace2(o): for match in m: v = match.get(id(o), None) if v is not None: if (v is to_replace2 or (isinstance(v, Antlr4Symbol) and v.symbol == "hierarchical_identifier")): return Antlr4Symbol(package_or_class_scoped_hier_id_with_const_select.name, False) else: return Antlr4Sequence([])
def direct_left_recurse_rm(rules, rule_name): r = rule_by_name(rules, rule_name) if isinstance(r.body, Antlr4Selection): choices = r.body elif isinstance(r.body, Antlr4Sequence): choices = [ r.body, ] else: raise NotImplementedError() # find choices which starts with this rule non terminal lr_choices = [] for c in choices: if isinstance(c, Antlr4Sequence): first = next(iter_non_visuals(c)) if isinstance(first, Antlr4Symbol) and first.symbol == rule_name: lr_choices.append(c) else: raise NotImplementedError() # remove choices which are causing left recursion assert len(lr_choices) >= 1, rule_name for lr_choice in lr_choices: choices.remove(lr_choice) if len(choices) == 0: raise NotImplementedError() elif len(choices) == 1: r.body = choices[0] # renaame this rule to rule_item r_base_name = r.name + "_item" for _r in rules: assert r.name != r_base_name, r_base_name r.name = r_base_name # create new rule which will implement removed choices and also expands to rule_item choices_new = Antlr4Selection([]) for lr_choice in lr_choices: first = next(iter_non_visuals(lr_choice)) assert isinstance(first, Antlr4Symbol) and first.symbol == rule_name repl = Antlr4Symbol(r_base_name, False) _iterate_everything_except_first_and_replace_first(lr_choice, repl) if not choices_new: lr_choice.insert(0, Antlr4Newline()) lr_choice.insert(1, Antlr4Indent(1)) choices_new.append(lr_choice) body_new = choices_new[0] if len(choices_new) == 1 else choices_new r_new = Antlr4Rule(rule_name, body_new) rules.insert(rules.index(r), r_new)
def apply_to_replace0_and_1(o): for match in m: v = match.get(id(o), None) if v is not None: del match[id(o)] if (v is to_replace0 or v is to_replace1 or (isinstance(v, Antlr4Symbol) and v.symbol == "identifier")): return Antlr4Symbol(package_or_class_scoped_id.name, False) else: return Antlr4Sequence([])
def fix_dpi_import_export(rules): C_IDENTIFIER = Antlr4Symbol("C_IDENTIFIER", False) def match_replace_fn(o): if o == C_IDENTIFIER: return Antlr4Selection( [C_IDENTIFIER, Antlr4Symbol("ESCAPED_IDENTIFIER", False)]) r = rule_by_name(rules, "dpi_import_export") replace_item_by_sequence(r.body, match_replace_fn)
def fix_SYSTEM_TF_IDENTIFIER(rules): kws = collect_keywords(rules) SYSTEM_TF_IDENTIFIER = Antlr4Symbol("SYSTEM_TF_IDENTIFIER", False) any_system_tf_identifier = Antlr4Symbol("any_system_tf_identifier", False) def match_replace_fn(o): if o == SYSTEM_TF_IDENTIFIER: return deepcopy(any_system_tf_identifier) for rule in rules: replace_item_by_sequence(rule, match_replace_fn) rules.append( Antlr4Rule( "any_system_tf_identifier", Antlr4Selection([ SYSTEM_TF_IDENTIFIER, *[ Antlr4Symbol(kw.replace("$", "KW_DOLAR_").upper(), False) for kw in kws if kw.startswith("$") ] ])))
def rm_semi_from_cross_body_item(rules): """ Because SEMI is already part of cross_body_item """ rule = rule_by_name(rules, "cross_body") semi = Antlr4Symbol("SEMI", False) def match_replace_fn(o): if o == semi: return Antlr4Sequence([]) replace_item_by_sequence(rule.body[0], match_replace_fn)
def handle_conditional_fn(bin_op_choices, current_expr_rule): # rm left recursion from cond_predicate/conditional_expression replace_symbol_in_rule(rules, "conditional_expression", "cond_predicate", "cond_expr_predicate", only_first=True) iterate_everything_except_first(rules, "conditional_expression") # create new cond_predicate (cond_expr_predicate) whout left recursion cond_predicate = rule_by_name(rules, "cond_predicate") cond_expr_predicate = deepcopy(cond_predicate) cond_expr_predicate.name = "cond_expr_predicate" rules.insert(rules.index(cond_predicate), cond_expr_predicate) replace_symbol_in_rule(rules, "cond_expr_predicate", "expression", current_expr_rule.name, only_first=True) bin_op_choices.extend([ Antlr4Symbol(current_expr_rule.name, False), Antlr4Symbol("conditional_expression", False) ])