def _extract_option_as_rule(r, rules, options_i: List[Tuple[int, iAntlr4GramElem]], new_rule_name): assert isinstance(r.body, Antlr4Selection) new_body = Antlr4Selection([]) consumed = set() for i, ev in options_i: assert r.body[i].eq_relaxed(ev), (r.body[i], ev) new_body.append(r.body[i]) consumed.add(i) body = [ Antlr4Symbol(new_rule_name, False), ] for i, x in enumerate(r.body): if i not in consumed: body.append(x) if len(body) == 1: r.body = body[0] else: r.body = Antlr4Selection(body) if len(new_body) == 1: new_body = new_body[0] new_r = Antlr4Rule(new_rule_name, new_body) rules.insert(rules.index(r), new_r) return new_r
def split_rule(rules, rule_name, symbols_to_extract: List[str], subrule_name: str): """ Let only options which are starting with symbols from symbols_to_extract. Put the rest to a subrule. """ r = rule_by_name(rules, rule_name) assert isinstance(r.body, Antlr4Selection), r sub_options = Antlr4Selection([]) for o in r.body: start_symbols = set() _direct_left_corner(o, start_symbols, allow_eps_in_sel=True) if not start_symbols.intersection(symbols_to_extract): sub_options.append(o) r.body = Antlr4Selection([o for o in r.body if not (o in sub_options)]) r.body.insert(0, Antlr4Symbol(subrule_name, False)) if len(r.body) == 1: r.body = r.body[0] assert len(sub_options) > 0 if len(sub_options) == 1: sub_options = sub_options[0] else: sub_options = Antlr4Selection(sub_options) sub_r = Antlr4Rule(subrule_name, sub_options) rules.insert(rules.index(r), sub_r) return sub_r
def extract_bin_ops(rules, current_expr_rule, ops_to_extrat, new_rule_name, handle_conditional_fn, handle_inside_fn): # find option with binary op rule # expr = rule_by_name(rules, "expression") ops_no_special = [ o for o in ops_to_extrat if o not in [ "KW_INSIDE", "KW_DIST", "QUESTIONMARK", ] ] bin_op_choices = [] if len(ops_no_special) > 0: if len(ops_no_special) == 1: op = Antlr4Symbol(ops_no_special[0], False) else: op = Antlr4Selection( [Antlr4Symbol(o, False) for o in ops_no_special]) # expression (binary_operator ( attribute_instance )* expression)* bin_op_choice = Antlr4Sequence([ op, Antlr4Iteration(Antlr4Symbol("attribute_instance", False)), Antlr4Symbol(current_expr_rule.name, False) ]) bin_op_choices.append(bin_op_choice) if "KW_INSIDE" in ops_to_extrat: handle_inside_fn(bin_op_choices, current_expr_rule) if "KW_DIST" in ops_to_extrat: # handled differently, only allowed on specified places pass if "QUESTIONMARK" in ops_to_extrat: handle_conditional_fn(bin_op_choices, current_expr_rule) for c in bin_op_choices: assert isinstance(c, iAntlr4GramElem), c # create a new rule which contains rule for extracted binary operators if len(bin_op_choices) > 1: new_body = Antlr4Selection(bin_op_choices) else: new_body = bin_op_choices[0] new_body = Antlr4Sequence([ Antlr4Symbol(current_expr_rule.name, False), Antlr4Iteration(new_body) ]) new_r = Antlr4Rule(new_rule_name, new_body) rules.insert(rules.index(current_expr_rule), new_r) return new_r
def add_comments_and_ws(rules): # ONE_LINE_COMMENT: '//' .*? '\\r'? '\\n' -> channel(HIDDEN); olc = Antlr4Rule("ONE_LINE_COMMENT", Antlr4Sequence([ Antlr4Symbol("//", True), Antlr4Symbol(".*?", True, is_regex=True), Antlr4Option(Antlr4Symbol("\r", True)), Antlr4Selection([ Antlr4Symbol("\n", True), Antlr4Symbol("EOF", False), ]) ]), lexer_actions=[Antlr4LexerAction.channel("HIDDEN")]) rules.append(olc) # BLOCK_COMMENT: '/*' .*? '*/' -> channel (HIDDEN); bc = Antlr4Rule("BLOCK_COMMENT", Antlr4Sequence([ Antlr4Symbol("/*", True), Antlr4Symbol(".*?", True, is_regex=True), Antlr4Symbol("*/", True), ]), lexer_actions=[Antlr4LexerAction.channel("HIDDEN")]) rules.append(bc) # WHITE_SPACE: [ \\t\\n\\r] + -> skip; ws = Antlr4Rule("WHITE_SPACE", Antlr4Sequence([ Antlr4Symbol("[ \\t\\n\\r] +", True, is_regex=True), ]), lexer_actions=[Antlr4LexerAction.channel("HIDDEN")]) rules.append(ws)
def _selection_propagate_optionality(sel: Antlr4Selection): assert isinstance(sel, Antlr4Selection) non_optional_choices = [] entirely_optional = False for c in sel: is_optional = True for o in c: if _is_optional(o): continue else: is_optional = False break if is_optional: entirely_optional = True nocs = _sequence_expand_optionality(c) # the eps is represented by ptionality on while selection later non_optional_choices.extend([ n for n in nocs if not isinstance(o, (Antlr4Sequence, Antlr4Selection)) or len(n) > 0 ]) else: non_optional_choices.append(c) if entirely_optional: return Antlr4Option(Antlr4Selection(non_optional_choices)), True else: return sel, False
def fix_priority_of__class_scope__package_scope(rules): orig = Antlr4Selection([ Antlr4Symbol("class_scope", False), Antlr4Symbol("package_scope", False) ]) repl = Antlr4Selection([ Antlr4Symbol("package_scope", False), Antlr4Symbol("class_scope", False) ]) def match_replace_fn(o): if o == orig: return deepcopy(repl) for rule in rules: replace_item_by_sequence(rule, match_replace_fn)
def extract_common_from_sequences_from_part(part, prefix, suffix, extracted_rule_name, prefix_to_non_optional): part.clear() if prefix: prefix_choices = [] prefix_optional = False for p in _sequence_expand_optionality(prefix): if p: prefix_choices.append(p) else: prefix_optional = True if len(prefix_choices) > 1: prefix_choices = Antlr4Selection(prefix_choices) else: prefix_choices = prefix_choices[0] if prefix_to_non_optional and prefix_optional: # because if the parts do not have different suffix # the prefix is there to distinguish between them prefix_choices = Antlr4Option(prefix_choices) part.append(prefix_choices) part.append(Antlr4Symbol(extracted_rule_name, False)) if suffix: part.extend(suffix)
def match_replace_fn(o: iAntlr4GramElem): if o == c_id: return Antlr4Selection([ o.body, Antlr4Sequence([ Antlr4Symbol("{%s != COLON}?" % la1, True, True), ]) ])
def match_replace_fn(o: iAntlr4GramElem): if isinstance(o, Antlr4Option) and isinstance(o.body, Antlr4Sequence): if o.body[0] == kw_else: return Antlr4Selection([ o.body, Antlr4Sequence([ Antlr4Symbol("{%s != KW_ELSE}?" % la1, True, True), ]) ])
def parse_element_selection(self, ctx: Element_selectionContext) -> Antlr4Selection: """ element_selection: element_sequence ('|' element_sequence)+; """ body = [] for c in ctx.element_sequence(): body.append(self.parse_element_sequence(c)) return Antlr4Selection(body)
def direct_left_recurse_rm(rules, rule_name): r = rule_by_name(rules, rule_name) if isinstance(r.body, Antlr4Selection): choices = r.body elif isinstance(r.body, Antlr4Sequence): choices = [ r.body, ] else: raise NotImplementedError() # find choices which starts with this rule non terminal lr_choices = [] for c in choices: if isinstance(c, Antlr4Sequence): first = next(iter_non_visuals(c)) if isinstance(first, Antlr4Symbol) and first.symbol == rule_name: lr_choices.append(c) else: raise NotImplementedError() # remove choices which are causing left recursion assert len(lr_choices) >= 1, rule_name for lr_choice in lr_choices: choices.remove(lr_choice) if len(choices) == 0: raise NotImplementedError() elif len(choices) == 1: r.body = choices[0] # renaame this rule to rule_item r_base_name = r.name + "_item" for _r in rules: assert r.name != r_base_name, r_base_name r.name = r_base_name # create new rule which will implement removed choices and also expands to rule_item choices_new = Antlr4Selection([]) for lr_choice in lr_choices: first = next(iter_non_visuals(lr_choice)) assert isinstance(first, Antlr4Symbol) and first.symbol == rule_name repl = Antlr4Symbol(r_base_name, False) _iterate_everything_except_first_and_replace_first(lr_choice, repl) if not choices_new: lr_choice.insert(0, Antlr4Newline()) lr_choice.insert(1, Antlr4Indent(1)) choices_new.append(lr_choice) body_new = choices_new[0] if len(choices_new) == 1 else choices_new r_new = Antlr4Rule(rule_name, body_new) rules.insert(rules.index(r), r_new)
def extract_option_as_rule(rules, rule_name, options_i, new_rule_name): r = rule_by_name(rules, rule_name) assert isinstance(r.body, Antlr4Selection) new_body = Antlr4Selection([]) for i in options_i: new_body.append(r.body[i]) r.body[options_i[0]] = Antlr4Sequence( [Antlr4Symbol(new_rule_name, False), Antlr4Newline(), Antlr4Indent(1)]) r.body = Antlr4Selection( [x for i, x in enumerate(r.body) if i not in options_i[1:]]) if len(new_body) == 1: new_body = new_body[0] new_r = Antlr4Rule(new_rule_name, new_body) rules.insert(rules.index(r), new_r) return new_r
def _selection_reduce_optional(o: Antlr4Selection): """ a? | b? -> (a | b)? """ assert isinstance(o, Antlr4Selection) non_optional_items = [] for c in o: if isinstance(c, Antlr4Sequence): if len(c) != 1: return o, False c = c[0] if not isinstance(c, Antlr4Option): return o, False non_optional_items.append(c.body) o.clear() o.extend(non_optional_items) return Antlr4Sequence([ Antlr4Option(o), ]), True
def _selection_only_unique(sel: Antlr4Selection): assert isinstance(sel, Antlr4Selection) new_opts = [] for o in sel: if o in new_opts: continue else: new_opts.append(o) if len(new_opts) != len(sel): if len(new_opts) == 1: return new_opts[0], True else: return Antlr4Selection(new_opts), True return sel, False
def _selection_empty_option_to_optional(sel: Antlr4Selection): """" a | | c -> ( a | c )? """ eo = Antlr4Sequence([]) new_opts = [o for o in sel if o != eo] if len(new_opts) != len(sel): if len(new_opts) == 1: new_sel = new_opts[0] else: new_sel = Antlr4Selection(new_opts) return Antlr4Option(new_sel), True else: return sel, False
def fix_implicit_data_type(rules): r = rule_by_name(rules, "implicit_data_type") # : (signing)? (packed_dimension)* # -> # : signing (packed_dimension)* # | (packed_dimension)+ # ; r.body = Antlr4Selection([ Antlr4Sequence([ Antlr4Symbol("signing", False), Antlr4Iteration(Antlr4Symbol("packed_dimension", False)) ]), Antlr4Iteration(Antlr4Symbol("packed_dimension", False), positive=True) ])
def numbers_add_whitespace_after_base(rules): number_rules = set([ "DECIMAL_NUMBER_WITH_BASE", "DECIMAL_INVALID_NUMBER_WITH_BASE", "DECIMAL_TRISTATE_NUMBER_WITH_BASE", "BINARY_NUMBER", "OCTAL_NUMBER", "HEX_NUMBER", ]) number_base_rules = set([ "DECIMAL_BASE", "BINARY_BASE", "OCTAL_BASE", "HEX_BASE", ]) # used only in integral_number inline_rule(rules, "decimal_number") def opt_ws(): return Antlr4Option(Antlr4Symbol("WHITE_SPACE", False)) Antlr4Option(Antlr4Symbol("UNSIGNED_NUMBER", False)), for r in rules: if r.name in number_rules: # ( SIZE )? *_BASE .... assert r.body[0].body.symbol == "SIZE", r assert r.body[1].symbol.endswith("_BASE"), r del r.body[0] r.is_fragment = True elif r.name in number_base_rules: # APOSTROPHE ( [sS] )? [dD]; r.body.insert(2, opt_ws()) r.body.insert(1, opt_ws()) r.body.append(opt_ws()) any_based_number = Antlr4Rule( "ANY_BASED_NUMBER", Antlr4Selection([Antlr4Symbol(n, False) for n in number_rules])) rules.insert(rules.index(rule_by_name(rules, "HEX_NUMBER")), any_based_number) integral_number = rule_by_name(rules, "integral_number") integral_number.body = Antlr4parser().from_str(""" ( UNSIGNED_NUMBER )? ANY_BASED_NUMBER | UNSIGNED_NUMBER """)
def _selection_flatten(sel: Antlr4Selection): assert isinstance(sel, Antlr4Selection), sel new_choices = [] use_new_choinces = False for c in sel: _c = c while isinstance(_c, Antlr4Sequence) and len(_c) == 1: _c = _c[0] if isinstance(_c, Antlr4Selection): use_new_choinces = True new_choices.extend(_c) else: new_choices.append(c) if use_new_choinces: return Antlr4Selection(new_choices), True else: return sel, False
def _selection_share_common(sel: Antlr4Selection, start_index): assert start_index == 0 or start_index == -1 assert isinstance(sel, Antlr4Selection), sel.__class__ # find options which starting with the same element with_item = {i: set([ i, ]) for i in range(len(sel))} for i0, p_opt in enumerate(sel): for i1, opt in enumerate(islice(sel, i0 + 1, None)): if p_opt and opt and p_opt[start_index] == opt[start_index]: prefix_cls = with_item[i0] prefix_cls.update(with_item[i0 + i1 + 1]) with_item[i0 + i1 + 1] = prefix_cls resolved = set() new_options = [] changed = False for i, _prefix_eq_cls in sorted(with_item.items()): if i in resolved: continue if len(_prefix_eq_cls) == 1: # keep option as it is new_opt = sel[i] else: changed = True # extraction of shared prefix eq_cls = list(sorted(_prefix_eq_cls)) extracted_item = sel[i][start_index] if start_index == 0: new_opt = Antlr4Sequence([ extracted_item, Antlr4Selection( [Antlr4Sequence(sel[i0][1:]) for i0 in eq_cls]) ]) else: assert start_index == -1 new_opt = Antlr4Sequence([ Antlr4Selection( [Antlr4Sequence(sel[i0][:-1]) for i0 in eq_cls]), extracted_item ]) resolved.update(eq_cls) new_options.append(new_opt) if len(new_options) == 1: return new_options[0], changed else: sel.clear() sel.extend(new_options) return sel, changed
def fix_SYSTEM_TF_IDENTIFIER(rules): kws = collect_keywords(rules) SYSTEM_TF_IDENTIFIER = Antlr4Symbol("SYSTEM_TF_IDENTIFIER", False) any_system_tf_identifier = Antlr4Symbol("any_system_tf_identifier", False) def match_replace_fn(o): if o == SYSTEM_TF_IDENTIFIER: return deepcopy(any_system_tf_identifier) for rule in rules: replace_item_by_sequence(rule, match_replace_fn) rules.append( Antlr4Rule( "any_system_tf_identifier", Antlr4Selection([ SYSTEM_TF_IDENTIFIER, *[ Antlr4Symbol(kw.replace("$", "KW_DOLAR_").upper(), False) for kw in kws if kw.startswith("$") ] ])))
def add_string_literal_rules(p): string_char = Antlr4Rule( "ANY_ASCII_CHARACTERS", Antlr4Selection([ Antlr4Symbol('~["\\\\\\r\\n]', True, True), Antlr4Symbol('\\\n', True), Antlr4Symbol('\\\r\n', True), Antlr4Sequence([ Antlr4Symbol("\\", True), Antlr4Symbol('[nt\\\\"vfa%]', True, is_regex=True), ]), Antlr4Symbol("'\\\\' [0-9] [0-9]? [0-9]?", True, True), Antlr4Symbol("'\\\\' 'x' [0-9A-Fa-f] [0-9A-Fa-f]?", True, True), ]), is_fragment=True) p.rules.append(string_char) any_printable_ASCII_character_except_white_space = Antlr4Rule( "ANY_PRINTABLE_ASCII_CHARACTER_EXCEPT_WHITE_SPACE", Antlr4Symbol("'\\u0021'..'\\u007E'", True, True), is_fragment=True) p.rules.append(any_printable_ASCII_character_except_white_space)
def _selection(self): options = [] while True: try: o = self._sequence() except StopIteration: break options.append(o) try: n = self.next() except StopIteration: break if n != "|": self.back(n) break if not options: raise StopIteration() elif len(options) == 1: return options[0] else: return Antlr4Selection(options)
def simplify_select_rule(rules, rule_name): """ ( ( KW0 a0 ( a1 )* )* KW0 a0 )? ( a1 )* ... -> ( KW0 a0 | a1 )* ... """ r = rule_by_name(rules, rule_name) g0 = r.body[0] g1 = r.body[1] first_part = Antlr4Iteration(Antlr4Selection([Antlr4Sequence(g0.body[-2:]), g1.body]), positive=False) if len(r.body) > 2: if len(r.body) > 3: rest = r.body[2:] else: rest = [r.body[2], ] new_body = Antlr4Sequence([ first_part, *rest ]) else: new_body = first_part r.body = new_body
def remove_useless_and_normalize_names(p): renames = {} for k, v in SvRule2Antlr4Rule.SPEC_SYMB.items(): renames[k] = v # rm_newline_from_simple_rules(p.rules) # nts = get_used_non_terminals(p.rules) # def_nts = get_defined_non_terminals(p.rules) # overspecified # finish_number 0 - 2 replace_rule("finish_number", "UNSIGNED_NUMBER", renames, p) # scalar_constant 1b number replace_rule("scalar_constant", "integral_number", renames, p) # init_val 1b value replace_rule("init_val", "integral_number", renames, p) # edge_descriptor 2 tristate digits # edge_descriptor: '01' | '10' | Z_OR_X ZERO_OR_ONE | ZERO_OR_ONE Z_OR_X; # dpi_spec_string two concrete strings replace_rule("dpi_spec_string", "STRING_LITERAL", renames, p) # #0 -> # UNSIGNED_NUMBER primitive_delay = Antlr4Rule( "primitive_delay", Antlr4Sequence([ Antlr4Symbol("HASH", False), Antlr4Symbol("UNSIGNED_NUMBER", False), ])) p.rules.append(primitive_delay) replace_rule("#0", "primitive_delay", renames, p) # all same ps_identifier_rules = [ "ps_class_identifier", "ps_covergroup_identifier", "ps_checker_identifier", ] for name in ps_identifier_rules: replace_rule(name, "ps_identifier", renames, p) ps_or_hierarchical_id_rules = [ "ps_or_hierarchical_net_identifier", "ps_or_hierarchical_property_identifier", "ps_or_hierarchical_sequence_identifier", "ps_or_hierarchical_tf_identifier", ] ps_or_hierarchical_identifier = Antlr4Rule( "ps_or_hierarchical_identifier", Antlr4Selection([ Antlr4Sequence([ Antlr4Symbol("package_scope", False), Antlr4Symbol("identifier", False) ]), # can be only identifier Antlr4Symbol("hierarchical_identifier", False), ])) p.rules.append(ps_or_hierarchical_identifier) for name in ps_or_hierarchical_id_rules: replace_rule(name, "ps_or_hierarchical_identifier", renames, p) to_lexer = [ "c_identifier", "unsigned_number", "simple_identifier", "system_tf_identifier", "unsigned_number", "string_literal", "binary_number", "octal_number", "hex_number", "octal_number", "hex_number", "fixed_point_number", "escaped_identifier", "unbased_unsized_literal", "time_literal", # because it is very hard to switch mode to parse # edge_descriptor and it is easy to just parse coma separated list of 2 chars "edge_control_specifier", "level_symbol", "output_symbol", "edge_symbol", "file_path_spec", ] for tl in to_lexer: renames[tl] = tl.upper() fragments = { "binary_value", "octal_value", "hex_value", "decimal_base", "binary_base", "octal_base", "hex_base", "non_zero_unsigned_number", "size", "sign", "edge_descriptor", "non_zero_decimal_digit", "decimal_digit", "binary_digit", "octal_digit", "hex_digit", "x_digit", "z_digit", "exp", 'white_space', 'zero_or_one', 'z_or_x', 'Any_ASCII_Characters', "any_printable_ASCII_character_except_white_space", "time_unit" } for r in p.rules: if r.name.startswith("$"): renames[r.name] = r.name.replace("$", "dolar_") for fr in fragments: if r.name in fragments: r.is_fragment = True renames[fr] = fr.upper() identifier_rule_equivalents = { r.name for r in collect_simple_rules(p.rules, "identifier") } hierarchical_identifier_rule_equivalents = { r.name for r in collect_simple_rules(p.rules, "hierarchical_identifier") } to_remove = { "comment", "one_line_comment", "block_comment", "comment_text", "white_space", # libary rules "library_text", "library_description", "library_declaration", "include_statement", "file_path_spec", "file_path_spec", } to_remove.update(identifier_rule_equivalents) to_remove.update(hierarchical_identifier_rule_equivalents) simple_rules_to_remove = [ "default_clause", # default kw "variable_port_type", "limit_value", # used only in more specific limit values "dpi_function_proto", # used only in dpi block so we already know "dpi_task_proto", # used only in dpi block so we already know "property_lvar_port_direction", # used only in property so we already know # "consecutive_repetition", # useless "trans_item", "ordered_parameter_assignment", "function_statement", "case_expression", "case_item_expression", "open_value_range", # used only in open_range_list so we already know "constant_assignment_pattern_expression", # parser do not see the difference between const/non const "clockvar", # used only in clockvar_expression "path_delay_expression", # used only in more specific rules "constant_function_call", # parser do not see the difference between const/non const "function_subroutine_call", "constant_let_expression", # parser do not see the difference between const/non const "attr_name", # used only in attr_spec "array_identifier", # never used "checker_identifier", # used only in rule with same name "class_identifier", "class_variable_identifier", "clocking_identifier", "config_identifier", "const_identifier", "constraint_identifier", "covergroup_identifier", "covergroup_variable_identifier", "cover_point_identifier", "cross_identifier", "enum_identifier", "formal_identifier", "function_identifier", "generate_block_identifier", "genvar_identifier", "hierarchical_array_identifier", "hierarchical_block_identifier", "hierarchical_event_identifier", "hierarchical_net_identifier", "hierarchical_parameter_identifier", "hierarchical_property_identifier", "hierarchical_sequence_identifier", "hierarchical_task_identifier", "hierarchical_tf_identifier", "hierarchical_variable_identifier", "index_variable_identifier", "interface_identifier", "interface_instance_identifier", # "inout_port_identifier", # "input_port_identifier", "instance_identifier", "member_identifier", "method_identifier", "modport_identifier", "module_identifier", "net_identifier", # "output_port_identifier" "package_identifier", "parameter_identifier", "port_identifier", "production_identifier", "program_identifier", "property_identifier", "sequence_identifier", "signal_identifier", "specparam_identifier", "task_identifier", "tf_identifier", "terminal_identifier", "topmodule_identifier", "udp_identifier", "variable_identifier", "let_identifier", "type_identifier", # covergroup_expression "with_covergroup_expression", "set_covergroup_expression", "integer_covergroup_expression", "cross_set_expression", "data_event", "reference_event", ] for sr in simple_rules_to_remove: remove_simple_rule(sr, p) p.rules = [r for r in p.rules if r.name not in to_remove] for idname in identifier_rule_equivalents: renames[idname] = "identifier" for idname in hierarchical_identifier_rule_equivalents: renames[idname] = "hierarchical_identifier" apply_rename = generate_renamer(renames, True) for r in p.rules: r.walk(apply_rename) r.walk(mark_regex) for k, v in SvRule2Antlr4Rule.SPEC_SYMB.items(): body = Antlr4Symbol(k, True) r = Antlr4Rule(v, body) if k in ['"', "_"]: r.is_fragment = True p.rules.append(r) # because C_IDENTIFIER is just normal identifier without $ and can match identifiers identifier = rule_by_name(p.rules, "identifier") identifier.body.insert(0, Antlr4Symbol("C_IDENTIFIER", False)) kws = collect_keywords(p.rules) for kw in kws: if kw not in IEEE1800_2017_KEYWORDS and kw != "1step" and "$" not in kw: identifier.body.append(Antlr4Symbol("KW_" + kw.upper(), False))
and not isinstance(s2, Antlr4Indent)): all_to_remove = False if _s and all_to_remove: s.pop() continue break if r.name == "signature": # rm ()? as it is in ()? every where it is used a, b = r.body[0].body a = a.body b = b.body # ( ( type_mark ( COMMA type_mark )* )? ( RETURN type_mark )? )? r.body = Antlr4Selection([ Antlr4Sequence([a, Antlr4Newline(), Antlr4Indent(1)]), Antlr4Sequence([a, b, Antlr4Newline(), Antlr4Indent(1)]), Antlr4Sequence([b, Antlr4Newline()]), ]) HEADER = """/* * Grammar extracted from the VHDL 1993, 2002, 2008, 2018 standard and then merged together * (the standard is selected by parser property) */ grammar vhdl; """ with open("vhdl.g4", "w") as f: f.write("\n\n") f.write(HEADER) for kw in keywords:
def _optimise_selections(elm: iAntlr4GramElem): """ Reduce selection options which differ only in single item to a sequence with selection of different items. Example: a: b c d | b e d; to a: b (c | e) d; :note: ignores visuals :note: similar sequences have to be directly after each other because if they were not the priority of choices would be changed """ if isinstance(elm, Antlr4Sequence): modified = False for e in elm: modified = modified or _optimise_selections(e) return modified elif isinstance(elm, Antlr4Selection): # List[Tuple[index of different item, # List[Tuple[index in choices, selection options to replace]]]] to_reduce = [] # tuple (index in choices, value) similar_choices = [] diff_in = None for c_i, choice in enumerate(elm): if not similar_choices: if isinstance(choice, Antlr4Sequence) and len_without_visuals(choice) > 1: similar_choices.append((c_i, choice)) continue else: _, prev = similar_choices[0] compatible = True if (isinstance(prev, Antlr4Sequence) and isinstance(choice, Antlr4Sequence) and len_without_visuals(prev) == len_without_visuals(choice)): # check if differs in a single item for i, (prev_item, current_item) in enumerate( zip(iter_non_visuals(prev), iter_non_visuals(choice))): if prev_item != current_item: if diff_in == i or diff_in is None: diff_in = i else: compatible = False break if compatible: similar_choices.append((c_i, choice)) else: compatible = False if not compatible: if len(similar_choices) > 1: to_reduce.append((diff_in, similar_choices)) # reset search if isinstance(choice, Antlr4Sequence) and len_without_visuals(choice) > 1: similar_choices = [(c_i, choice)] else: similar_choices = [] diff_in = None if len(similar_choices) > 1: to_reduce.append((diff_in, similar_choices)) offset = 0 for diff_in, _choices in to_reduce: choices = [c[1] for c in _choices] start_i = _choices[0][0] + offset assert len(_choices) > 1 try: assert elm[start_i] is choices[0] except AssertionError: raise diff_item_substitution = Antlr4Selection([ index_non_visual(c, diff_in) for c in choices ]) part_to_exclude = index_non_visual(choices[0], diff_in) new_choice = Antlr4Sequence([ (e if e is not part_to_exclude else diff_item_substitution) for e in choices[0] ]) elm[start_i] = new_choice del elm[start_i + 1: start_i + len(choices)] offset -= len(choices) - 1 return len(to_reduce) return False
def match_replace_fn(o): if o == C_IDENTIFIER: return Antlr4Selection( [C_IDENTIFIER, Antlr4Symbol("ESCAPED_IDENTIFIER", False)])
def match_replace_fn(o): if o == cls: return Antlr4Selection([o, deepcopy(intf)])