def _sequence_flatten(seq: Antlr4Sequence) -> Tuple[Antlr4Sequence, bool]: """ Inline subsequences """ sequences = [] for i, e in enumerate(seq): if isinstance(e, Antlr4Sequence): sequences.append((i, e)) offset = 0 for i0, e0 in sequences: first = True for i1, e1 in enumerate(e0): i = offset + i0 + i1 if first: assert seq[i] is e0 seq[i] = e1 first = False else: seq.insert(i, e1) if not e0: del seq[offset + i0] offset += len(e0) - 1 changed = bool(sequences) return seq, changed
def add_comments_and_ws(rules): # ONE_LINE_COMMENT: '//' .*? '\\r'? '\\n' -> channel(HIDDEN); olc = Antlr4Rule("ONE_LINE_COMMENT", Antlr4Sequence([ Antlr4Symbol("//", True), Antlr4Symbol(".*?", True, is_regex=True), Antlr4Option(Antlr4Symbol("\r", True)), Antlr4Selection([ Antlr4Symbol("\n", True), Antlr4Symbol("EOF", False), ]) ]), lexer_actions=[Antlr4LexerAction.channel("HIDDEN")]) rules.append(olc) # BLOCK_COMMENT: '/*' .*? '*/' -> channel (HIDDEN); bc = Antlr4Rule("BLOCK_COMMENT", Antlr4Sequence([ Antlr4Symbol("/*", True), Antlr4Symbol(".*?", True, is_regex=True), Antlr4Symbol("*/", True), ]), lexer_actions=[Antlr4LexerAction.channel("HIDDEN")]) rules.append(bc) # WHITE_SPACE: [ \\t\\n\\r] + -> skip; ws = Antlr4Rule("WHITE_SPACE", Antlr4Sequence([ Antlr4Symbol("[ \\t\\n\\r] +", True, is_regex=True), ]), lexer_actions=[Antlr4LexerAction.channel("HIDDEN")]) rules.append(ws)
def _sequence_expand_optionality(obj): """ (a0)? (a1)* b -> a0 (a1)* b, (a1)+ b (a0)? -> a0, eps :attention: only prefix is expanded, if first non optional item is met the expansion ends """ variant_sequences = [] is_last = False for i, o in enumerate(obj): if not _is_optional(o): variant_sequences.append( Antlr4Sequence([deepcopy(x) for x in obj[i:]])) break o = _to_non_optional(o) is_last = i == len(obj) - 1 if is_last: no = o else: no = Antlr4Sequence([ o, ] + [deepcopy(x) for x in obj[i + 1:]]) variant_sequences.append(no) if is_last: variant_sequences.append(Antlr4Sequence([])) return variant_sequences
def cut_off_subsequence(a, sub_seq): """ :return: prefix, suffix """ start = a.index(sub_seq[0]) end = start + len(sub_seq) return Antlr4Sequence(a[:start]), Antlr4Sequence(a[end:])
def extract_bin_ops(rules, current_expr_rule, ops_to_extrat, new_rule_name, handle_conditional_fn, handle_inside_fn): # find option with binary op rule # expr = rule_by_name(rules, "expression") ops_no_special = [ o for o in ops_to_extrat if o not in [ "KW_INSIDE", "KW_DIST", "QUESTIONMARK", ] ] bin_op_choices = [] if len(ops_no_special) > 0: if len(ops_no_special) == 1: op = Antlr4Symbol(ops_no_special[0], False) else: op = Antlr4Selection( [Antlr4Symbol(o, False) for o in ops_no_special]) # expression (binary_operator ( attribute_instance )* expression)* bin_op_choice = Antlr4Sequence([ op, Antlr4Iteration(Antlr4Symbol("attribute_instance", False)), Antlr4Symbol(current_expr_rule.name, False) ]) bin_op_choices.append(bin_op_choice) if "KW_INSIDE" in ops_to_extrat: handle_inside_fn(bin_op_choices, current_expr_rule) if "KW_DIST" in ops_to_extrat: # handled differently, only allowed on specified places pass if "QUESTIONMARK" in ops_to_extrat: handle_conditional_fn(bin_op_choices, current_expr_rule) for c in bin_op_choices: assert isinstance(c, iAntlr4GramElem), c # create a new rule which contains rule for extracted binary operators if len(bin_op_choices) > 1: new_body = Antlr4Selection(bin_op_choices) else: new_body = bin_op_choices[0] new_body = Antlr4Sequence([ Antlr4Symbol(current_expr_rule.name, False), Antlr4Iteration(new_body) ]) new_r = Antlr4Rule(new_rule_name, new_body) rules.insert(rules.index(current_expr_rule), new_r) return new_r
def handle_inside_fn(bin_op_choices, current_expr_rule): bin_op_choices[-1].extend([Antlr4Newline(), Antlr4Indent(1)]) # expression (KW_INSIDE LBRACE open_range_list RBRACE)*; bin_op_choice = Antlr4Sequence([ Antlr4Symbol(current_expr_rule.name, False), Antlr4Iteration( Antlr4Sequence([ Antlr4Symbol("KW_INSIDE", False), Antlr4Symbol("LBRACE", False), Antlr4Symbol("open_range_list", False), Antlr4Symbol("RBRACE", False), ])) ]) bin_op_choices.append(bin_op_choice)
def _selection_share_common(sel: Antlr4Selection, start_index): assert start_index == 0 or start_index == -1 assert isinstance(sel, Antlr4Selection), sel.__class__ # find options which starting with the same element with_item = {i: set([ i, ]) for i in range(len(sel))} for i0, p_opt in enumerate(sel): for i1, opt in enumerate(islice(sel, i0 + 1, None)): if p_opt and opt and p_opt[start_index] == opt[start_index]: prefix_cls = with_item[i0] prefix_cls.update(with_item[i0 + i1 + 1]) with_item[i0 + i1 + 1] = prefix_cls resolved = set() new_options = [] changed = False for i, _prefix_eq_cls in sorted(with_item.items()): if i in resolved: continue if len(_prefix_eq_cls) == 1: # keep option as it is new_opt = sel[i] else: changed = True # extraction of shared prefix eq_cls = list(sorted(_prefix_eq_cls)) extracted_item = sel[i][start_index] if start_index == 0: new_opt = Antlr4Sequence([ extracted_item, Antlr4Selection( [Antlr4Sequence(sel[i0][1:]) for i0 in eq_cls]) ]) else: assert start_index == -1 new_opt = Antlr4Sequence([ Antlr4Selection( [Antlr4Sequence(sel[i0][:-1]) for i0 in eq_cls]), extracted_item ]) resolved.update(eq_cls) new_options.append(new_opt) if len(new_options) == 1: return new_options[0], changed else: sel.clear() sel.extend(new_options) return sel, changed
def match_replace_fn(o): if isinstance(o, Antlr4Selection): char_symb_to_replace = [] for orig_c in o: c = orig_c if isinstance(c, Antlr4Sequence): if len(c) > 1: continue else: c = c[0] if isinstance(c, Antlr4Symbol) and c.is_terminal and len( c.symbol) == 1: char_symb_to_replace.append((orig_c, c)) if len(char_symb_to_replace) > 1: # build an regex out of them # and replace them by the regex for c, _ in char_symb_to_replace: o.remove(c) re_str = "[%s]" % ("".join( [c._escaped() for _, c in char_symb_to_replace])) re = Antlr4Symbol(re_str, True, is_regex=True) if len(o): o.append(re) else: return Antlr4Sequence([ re, ])
def _extract_common_from_sequences(a: Antlr4Sequence, b: Antlr4Sequence, common, extracted_rule_name: str): # try to find longes common sequence # if the sequence is more complex than 1 symbol # extract it to the new rule common_rule = Antlr4Sequence([x[0] for x in common]) if len(common_rule) == 1: common_rule = common_rule[0] common_rule = Antlr4Rule(extracted_rule_name, common_rule) a_prefix, a_suffix = cut_off_subsequence(a, [x[0] for x in common]) b_prefix, b_suffix = cut_off_subsequence(b, [x[1] for x in common]) differs_in_suffix = a_suffix or b_suffix a_prefix_to_non_optional = not _is_optional(b_prefix) and not ( differs_in_suffix) b_prefix_to_non_optional = not _is_optional(a_prefix) and not ( differs_in_suffix) # if two sequences differs only in optional items # convert this items to non optinal variant # (because the variant wihout this items is the extracted rule) extract_common_from_sequences_from_part(a, a_prefix, a_suffix, extracted_rule_name, a_prefix_to_non_optional) extract_common_from_sequences_from_part(b, b_prefix, b_suffix, extracted_rule_name, b_prefix_to_non_optional) return a, b, common_rule
def fix_subroutine_call(rules): r = rule_by_name(rules, "subroutine_call") r.body.insert( 0, Antlr4Sequence([ Antlr4Option(Antlr4Symbol("class_qualifier", False)), Antlr4Symbol("method_call_body", False) ]))
def match_replace_fn(o: iAntlr4GramElem): if o == c_id: return Antlr4Selection([ o.body, Antlr4Sequence([ Antlr4Symbol("{%s != COLON}?" % la1, True, True), ]) ])
def add_predicate(r, std): # The Definitive ANTLR 4 Reference, p 194, in lexer predicate should be on right side r.body = Antlr4Sequence([ r.body, Antlr4Symbol("{language_version >= hdlConvertor::Language::%s}?" % std, True, is_regex=True) ])
def _auto_format(self, e: iAntlr4GramElem, x, indent) -> int: if isinstance(e, Antlr4Selection): item_indent = ceil(x / len(Antlr4Indent.INDENT)) x = item_indent * len(Antlr4Indent.INDENT) # item_indent = indent + 1 # assert item_indent >= indent for ci, c in enumerate(e): if not isinstance(c, Antlr4Sequence): c = Antlr4Sequence([ c, ]) e[ci] = c self._auto_format(c, x + 1, item_indent) if ci != len(e): c.extend([Antlr4Newline(), Antlr4Indent(item_indent)]) elif isinstance(e, Antlr4Sequence): line_breaks = [] for i, o in enumerate(e): x = self._auto_format(o, x, indent) + 1 if x >= self.line_len: line_breaks.append(i) x = indent * len(Antlr4Indent.INDENT) offset = 0 for i in line_breaks: e.insert(offset + i, Antlr4Newline()) e.insert(offset + i + 1, Antlr4Indent(indent)) offset += 2 elif isinstance(e, (Antlr4Option, Antlr4Iteration)): return self._auto_format(e.body, x, indent) + 5 elif isinstance(e, Antlr4Symbol): return x + len(e.symbol) elif isinstance(e, Antlr4Rule): if isinstance(e.body, Antlr4Symbol): return self._auto_format(e.body, len(e.name) + 2, 1) else: if not isinstance(e.body, Antlr4Sequence): e.body = Antlr4Sequence([ e.body, ]) x = self._auto_format(e.body, 0, 1) e.body.insert(0, Antlr4Newline()) else: raise TypeError(repr(e.__class__)) return x
def _selection_options_to_sequnces(sel: Antlr4Selection): """ Convert options in selection to a Antlr4Sequence """ for i, o in enumerate(sel): if not isinstance(o, Antlr4Sequence): sel[i] = Antlr4Sequence([ o, ])
def match_replace_fn(o: iAntlr4GramElem): if isinstance(o, Antlr4Option) and isinstance(o.body, Antlr4Sequence): if o.body[0] == kw_else: return Antlr4Selection([ o.body, Antlr4Sequence([ Antlr4Symbol("{%s != KW_ELSE}?" % la1, True, True), ]) ])
def match_replace_fn(o: iAntlr4GramElem): if isinstance(o, Antlr4Option): items = list(iter_non_visuals(o.body)) if len(items) == 1: s = items[0] if isinstance(s, Antlr4Symbol) and s.symbol == rule_name: return Antlr4Sequence([ s, ])
def handle_conditional_fn(bin_op_choices, current_expr_rule): # rm left recursion from cond_predicate/conditional_expression cond_predicate = rule_by_name(rules, "cond_predicate") conditional_expression = rule_by_name(rules, "conditional_expression") rules.remove(conditional_expression) _inline_rule([ conditional_expression, ], cond_predicate) bin_op_choices.append(Antlr4Sequence(conditional_expression.body[1:]))
def apply_to_replace2(o): for match in m: v = match.get(id(o), None) if v is not None: if (v is to_replace2 or (isinstance(v, Antlr4Symbol) and v.symbol == "hierarchical_identifier")): return Antlr4Symbol(package_or_class_scoped_hier_id_with_const_select.name, False) else: return Antlr4Sequence([])
def add_file_path_literal_rules(p): FILE_PATH_SPEC_CHAR = Antlr4Rule( "FILE_PATH_SPEC_CHAR", Antlr4Symbol("[^ !$`&()+] | ( '\\\\' [ !$`&*()+] )", True, True), is_fragment=True) p.rules.append(FILE_PATH_SPEC_CHAR) file_spec_path = Antlr4Rule( "FILE_PATH_SPEC", Antlr4Iteration(Antlr4Sequence([ Antlr4Symbol("FILE_PATH_SPEC_CHAR", False), Antlr4Option( Antlr4Sequence([ Antlr4Symbol('SEMI', False), Antlr4Symbol("FILE_PATH_SPEC_CHAR", False), ])), ]), positive=True)) p.rules.append(file_spec_path)
def apply_to_replace0_and_1(o): for match in m: v = match.get(id(o), None) if v is not None: del match[id(o)] if (v is to_replace0 or v is to_replace1 or (isinstance(v, Antlr4Symbol) and v.symbol == "identifier")): return Antlr4Symbol(package_or_class_scoped_id.name, False) else: return Antlr4Sequence([])
def match_replace_fn(o: iAntlr4GramElem): if isinstance(o, Antlr4Option): items = o.body if isinstance(items, Antlr4Sequence) and len(items) == 1: s = items[0] else: s = items if isinstance(s, Antlr4Symbol) and s.symbol == rule_name: return Antlr4Sequence([ s, ])
def _iterate_everything_except_first_and_replace_first(seq, repl): rest = list(iter_non_visuals(seq))[1:] if len(rest) == 1: rest = rest[0] else: rest = Antlr4Sequence(rest) rest_iterated = Antlr4Iteration(rest) seq.clear() seq.append(repl) seq.append(rest_iterated) seq.append(Antlr4Newline()) seq.append(Antlr4Indent(1))
def handle_conditional_fn(bin_op_choices, current_expr_rule): bin_op_choices.extend([ Antlr4Symbol(current_expr_rule.name, False), Antlr4Iteration( Antlr4Sequence([ Antlr4Symbol("QUESTIONMARK", False), Antlr4Iteration(Antlr4Symbol("attribute_instance", False)), Antlr4Symbol("constant_expression", False), Antlr4Symbol("COLON", False), Antlr4Symbol("constant_expression", False), ])) ])
def rm_ambiguity(rules): rule = rule_by_name(rules, "variable_decl_assignment") to_repl = Antlr4Option( Antlr4Sequence( [Antlr4Symbol("ASSIGN", False), Antlr4Symbol("class_new", False)])) def match_replace_fn(o): if o == to_repl: return o.body replace_item_by_sequence(rule, match_replace_fn)
def match_replace_fn(o): if isinstance(o, Antlr4Selection): potential_prefix = None potential_prefix_i = None to_remove = [] for i, c in enumerate(o): if potential_prefix is None: potential_prefix = c potential_prefix_i = i else: # check if the potential_prefix is really a prefix of this rule is_prefix, suffix = is_prefix_of_elem(potential_prefix, c) if is_prefix: # put suffix as a optional to a prefix if list(iter_non_visuals(suffix)): if not isinstance(potential_prefix, Antlr4Sequence): assert o[potential_prefix_i] is potential_prefix potential_prefix = Antlr4Sequence([potential_prefix, ]) o[potential_prefix_i] = potential_prefix if len(suffix) == 1: suffix = suffix[0] else: suffix = Antlr4Sequence(suffix) potential_prefix.append(Antlr4Option(suffix)) to_remove.append(c) potential_prefix = None potential_prefix_i = None modified = True else: potential_prefix = c potential_prefix_i = i for c in to_remove: o.remove(c) if len(o) == 1: return Antlr4Sequence([o[0], ])
def _sequence(self): items = [] while True: try: o = self._any() except StopIteration: break items.append(o) if len(items) == 1: return items[0] else: return Antlr4Sequence(items)
def parse_element_sequence(self, ctx: Element_sequenceContext) -> Antlr4Sequence: """ element_sequence: element_block (WS element_block)*; """ body = [] for c in ctx.children: if isinstance(c, Element_blockContext): res = self.parse_element_block(c) body.append(res) else: res = self.parse_ws(c) body.extend(res) return Antlr4Sequence(body)
def fix_implicit_data_type(rules): r = rule_by_name(rules, "implicit_data_type") # : (signing)? (packed_dimension)* # -> # : signing (packed_dimension)* # | (packed_dimension)+ # ; r.body = Antlr4Selection([ Antlr4Sequence([ Antlr4Symbol("signing", False), Antlr4Iteration(Antlr4Symbol("packed_dimension", False)) ]), Antlr4Iteration(Antlr4Symbol("packed_dimension", False), positive=True) ])
def _selection_empty_option_to_optional(sel: Antlr4Selection): """" a | | c -> ( a | c )? """ eo = Antlr4Sequence([]) new_opts = [o for o in sel if o != eo] if len(new_opts) != len(sel): if len(new_opts) == 1: new_sel = new_opts[0] else: new_sel = Antlr4Selection(new_opts) return Antlr4Option(new_sel), True else: return sel, False
def is_prefix_of_elem(prefix: iAntlr4GramElem, elem: iAntlr4GramElem): """ Chekc if the prefix in prefix of the element :returns: tuple (is_prefix, suffix) """ if not isinstance(prefix, Antlr4Sequence): prefix = Antlr4Sequence([prefix, ]) if not isinstance(elem, Antlr4Sequence): elem = Antlr4Sequence([elem, ]) pr_list = list(iter_non_visuals(prefix)) el_list = list(iter_non_visuals(elem)) if len(pr_list) > len(el_list): return (False, None) last_pr = None for el, pr in zip(el_list, pr_list): if not (el == pr): return (False, None) last_pr = pr return (True, elem[elem.index(last_pr) + 1:])