def _sequence_flatten(seq: Antlr4Sequence) -> Tuple[Antlr4Sequence, bool]:
    """
    Inline subsequences
    """
    sequences = []
    for i, e in enumerate(seq):
        if isinstance(e, Antlr4Sequence):
            sequences.append((i, e))

    offset = 0
    for i0, e0 in sequences:
        first = True
        for i1, e1 in enumerate(e0):
            i = offset + i0 + i1
            if first:
                assert seq[i] is e0
                seq[i] = e1
                first = False
            else:
                seq.insert(i, e1)
        if not e0:
            del seq[offset + i0]
        offset += len(e0) - 1
    changed = bool(sequences)
    return seq, changed
def add_comments_and_ws(rules):
    # ONE_LINE_COMMENT: '//' .*? '\\r'? '\\n' -> channel(HIDDEN);
    olc = Antlr4Rule("ONE_LINE_COMMENT",
                     Antlr4Sequence([
                         Antlr4Symbol("//", True),
                         Antlr4Symbol(".*?", True, is_regex=True),
                         Antlr4Option(Antlr4Symbol("\r", True)),
                         Antlr4Selection([
                             Antlr4Symbol("\n", True),
                             Antlr4Symbol("EOF", False),
                         ])
                     ]),
                     lexer_actions=[Antlr4LexerAction.channel("HIDDEN")])
    rules.append(olc)
    # BLOCK_COMMENT: '/*' .*? '*/' -> channel (HIDDEN);
    bc = Antlr4Rule("BLOCK_COMMENT",
                    Antlr4Sequence([
                        Antlr4Symbol("/*", True),
                        Antlr4Symbol(".*?", True, is_regex=True),
                        Antlr4Symbol("*/", True),
                    ]),
                    lexer_actions=[Antlr4LexerAction.channel("HIDDEN")])
    rules.append(bc)
    # WHITE_SPACE: [ \\t\\n\\r] + -> skip;
    ws = Antlr4Rule("WHITE_SPACE",
                    Antlr4Sequence([
                        Antlr4Symbol("[ \\t\\n\\r] +", True, is_regex=True),
                    ]),
                    lexer_actions=[Antlr4LexerAction.channel("HIDDEN")])
    rules.append(ws)
def _sequence_expand_optionality(obj):
    """
    (a0)? (a1)* b -> a0 (a1)* b, (a1)+ b
    (a0)? -> a0, eps

    :attention: only prefix is expanded, if first non optional item is met the expansion ends
    """
    variant_sequences = []
    is_last = False
    for i, o in enumerate(obj):
        if not _is_optional(o):
            variant_sequences.append(
                Antlr4Sequence([deepcopy(x) for x in obj[i:]]))
            break

        o = _to_non_optional(o)
        is_last = i == len(obj) - 1
        if is_last:
            no = o
        else:
            no = Antlr4Sequence([
                o,
            ] + [deepcopy(x) for x in obj[i + 1:]])
        variant_sequences.append(no)

    if is_last:
        variant_sequences.append(Antlr4Sequence([]))

    return variant_sequences
def cut_off_subsequence(a, sub_seq):
    """
    :return: prefix, suffix
    """
    start = a.index(sub_seq[0])
    end = start + len(sub_seq)
    return Antlr4Sequence(a[:start]), Antlr4Sequence(a[end:])
예제 #5
0
def extract_bin_ops(rules, current_expr_rule, ops_to_extrat, new_rule_name,
                    handle_conditional_fn, handle_inside_fn):
    # find option with binary op rule
    # expr = rule_by_name(rules, "expression")
    ops_no_special = [
        o for o in ops_to_extrat if o not in [
            "KW_INSIDE",
            "KW_DIST",
            "QUESTIONMARK",
        ]
    ]

    bin_op_choices = []
    if len(ops_no_special) > 0:
        if len(ops_no_special) == 1:
            op = Antlr4Symbol(ops_no_special[0], False)
        else:
            op = Antlr4Selection(
                [Antlr4Symbol(o, False) for o in ops_no_special])

        # expression (binary_operator ( attribute_instance )* expression)*
        bin_op_choice = Antlr4Sequence([
            op,
            Antlr4Iteration(Antlr4Symbol("attribute_instance", False)),
            Antlr4Symbol(current_expr_rule.name, False)
        ])
        bin_op_choices.append(bin_op_choice)

    if "KW_INSIDE" in ops_to_extrat:
        handle_inside_fn(bin_op_choices, current_expr_rule)

    if "KW_DIST" in ops_to_extrat:
        # handled differently, only allowed on specified places
        pass

    if "QUESTIONMARK" in ops_to_extrat:
        handle_conditional_fn(bin_op_choices, current_expr_rule)

    for c in bin_op_choices:
        assert isinstance(c, iAntlr4GramElem), c
    # create a new rule which contains rule for extracted binary operators
    if len(bin_op_choices) > 1:
        new_body = Antlr4Selection(bin_op_choices)
    else:
        new_body = bin_op_choices[0]
    new_body = Antlr4Sequence([
        Antlr4Symbol(current_expr_rule.name, False),
        Antlr4Iteration(new_body)
    ])
    new_r = Antlr4Rule(new_rule_name, new_body)
    rules.insert(rules.index(current_expr_rule), new_r)

    return new_r
예제 #6
0
 def handle_inside_fn(bin_op_choices, current_expr_rule):
     bin_op_choices[-1].extend([Antlr4Newline(), Antlr4Indent(1)])
     # expression (KW_INSIDE LBRACE open_range_list RBRACE)*;
     bin_op_choice = Antlr4Sequence([
         Antlr4Symbol(current_expr_rule.name, False),
         Antlr4Iteration(
             Antlr4Sequence([
                 Antlr4Symbol("KW_INSIDE", False),
                 Antlr4Symbol("LBRACE", False),
                 Antlr4Symbol("open_range_list", False),
                 Antlr4Symbol("RBRACE", False),
             ]))
     ])
     bin_op_choices.append(bin_op_choice)
def _selection_share_common(sel: Antlr4Selection, start_index):
    assert start_index == 0 or start_index == -1
    assert isinstance(sel, Antlr4Selection), sel.__class__
    # find options which starting with the same element
    with_item = {i: set([
        i,
    ])
                 for i in range(len(sel))}
    for i0, p_opt in enumerate(sel):
        for i1, opt in enumerate(islice(sel, i0 + 1, None)):
            if p_opt and opt and p_opt[start_index] == opt[start_index]:
                prefix_cls = with_item[i0]
                prefix_cls.update(with_item[i0 + i1 + 1])
                with_item[i0 + i1 + 1] = prefix_cls
    resolved = set()
    new_options = []
    changed = False
    for i, _prefix_eq_cls in sorted(with_item.items()):
        if i in resolved:
            continue
        if len(_prefix_eq_cls) == 1:
            # keep option as it is
            new_opt = sel[i]
        else:
            changed = True
            # extraction of shared prefix
            eq_cls = list(sorted(_prefix_eq_cls))
            extracted_item = sel[i][start_index]
            if start_index == 0:
                new_opt = Antlr4Sequence([
                    extracted_item,
                    Antlr4Selection(
                        [Antlr4Sequence(sel[i0][1:]) for i0 in eq_cls])
                ])
            else:
                assert start_index == -1
                new_opt = Antlr4Sequence([
                    Antlr4Selection(
                        [Antlr4Sequence(sel[i0][:-1]) for i0 in eq_cls]),
                    extracted_item
                ])
            resolved.update(eq_cls)
        new_options.append(new_opt)
    if len(new_options) == 1:
        return new_options[0], changed
    else:
        sel.clear()
        sel.extend(new_options)
        return sel, changed
    def match_replace_fn(o):
        if isinstance(o, Antlr4Selection):
            char_symb_to_replace = []
            for orig_c in o:
                c = orig_c
                if isinstance(c, Antlr4Sequence):
                    if len(c) > 1:
                        continue
                    else:
                        c = c[0]
                if isinstance(c, Antlr4Symbol) and c.is_terminal and len(
                        c.symbol) == 1:
                    char_symb_to_replace.append((orig_c, c))
            if len(char_symb_to_replace) > 1:
                # build an regex out of them
                # and replace them by the regex
                for c, _ in char_symb_to_replace:
                    o.remove(c)

                re_str = "[%s]" % ("".join(
                    [c._escaped() for _, c in char_symb_to_replace]))
                re = Antlr4Symbol(re_str, True, is_regex=True)
                if len(o):
                    o.append(re)
                else:
                    return Antlr4Sequence([
                        re,
                    ])
def _extract_common_from_sequences(a: Antlr4Sequence, b: Antlr4Sequence,
                                   common, extracted_rule_name: str):
    # try to find longes common sequence
    # if the sequence is more complex than 1 symbol
    # extract it to the new rule

    common_rule = Antlr4Sequence([x[0] for x in common])
    if len(common_rule) == 1:
        common_rule = common_rule[0]
    common_rule = Antlr4Rule(extracted_rule_name, common_rule)

    a_prefix, a_suffix = cut_off_subsequence(a, [x[0] for x in common])
    b_prefix, b_suffix = cut_off_subsequence(b, [x[1] for x in common])
    differs_in_suffix = a_suffix or b_suffix
    a_prefix_to_non_optional = not _is_optional(b_prefix) and not (
        differs_in_suffix)
    b_prefix_to_non_optional = not _is_optional(a_prefix) and not (
        differs_in_suffix)

    # if two sequences differs only in optional items
    # convert this items to non optinal variant
    # (because the variant wihout this items is the extracted rule)
    extract_common_from_sequences_from_part(a, a_prefix, a_suffix,
                                            extracted_rule_name,
                                            a_prefix_to_non_optional)

    extract_common_from_sequences_from_part(b, b_prefix, b_suffix,
                                            extracted_rule_name,
                                            b_prefix_to_non_optional)
    return a, b, common_rule
예제 #10
0
def fix_subroutine_call(rules):
    r = rule_by_name(rules, "subroutine_call")
    r.body.insert(
        0,
        Antlr4Sequence([
            Antlr4Option(Antlr4Symbol("class_qualifier", False)),
            Antlr4Symbol("method_call_body", False)
        ]))
예제 #11
0
 def match_replace_fn(o: iAntlr4GramElem):
     if o == c_id:
         return Antlr4Selection([
                 o.body,
                 Antlr4Sequence([
                     Antlr4Symbol("{%s != COLON}?" % la1, True, True),
                 ])
                 ])
def add_predicate(r, std):
    # The Definitive ANTLR 4 Reference, p 194, in lexer predicate should be on right side
    r.body = Antlr4Sequence([
        r.body,
        Antlr4Symbol("{language_version >= hdlConvertor::Language::%s}?" % std,
                     True,
                     is_regex=True)
    ])
예제 #13
0
    def _auto_format(self, e: iAntlr4GramElem, x, indent) -> int:
        if isinstance(e, Antlr4Selection):
            item_indent = ceil(x / len(Antlr4Indent.INDENT))
            x = item_indent * len(Antlr4Indent.INDENT)
            # item_indent = indent + 1
            # assert item_indent >= indent
            for ci, c in enumerate(e):
                if not isinstance(c, Antlr4Sequence):
                    c = Antlr4Sequence([
                        c,
                    ])
                    e[ci] = c
                self._auto_format(c, x + 1, item_indent)
                if ci != len(e):
                    c.extend([Antlr4Newline(), Antlr4Indent(item_indent)])
        elif isinstance(e, Antlr4Sequence):
            line_breaks = []
            for i, o in enumerate(e):
                x = self._auto_format(o, x, indent) + 1
                if x >= self.line_len:
                    line_breaks.append(i)
                    x = indent * len(Antlr4Indent.INDENT)
            offset = 0
            for i in line_breaks:
                e.insert(offset + i, Antlr4Newline())
                e.insert(offset + i + 1, Antlr4Indent(indent))
                offset += 2
        elif isinstance(e, (Antlr4Option, Antlr4Iteration)):
            return self._auto_format(e.body, x, indent) + 5
        elif isinstance(e, Antlr4Symbol):
            return x + len(e.symbol)
        elif isinstance(e, Antlr4Rule):
            if isinstance(e.body, Antlr4Symbol):
                return self._auto_format(e.body, len(e.name) + 2, 1)
            else:
                if not isinstance(e.body, Antlr4Sequence):
                    e.body = Antlr4Sequence([
                        e.body,
                    ])
                x = self._auto_format(e.body, 0, 1)
                e.body.insert(0, Antlr4Newline())
        else:
            raise TypeError(repr(e.__class__))

        return x
def _selection_options_to_sequnces(sel: Antlr4Selection):
    """
    Convert options in selection to a Antlr4Sequence 
    """
    for i, o in enumerate(sel):
        if not isinstance(o, Antlr4Sequence):
            sel[i] = Antlr4Sequence([
                o,
            ])
예제 #15
0
 def match_replace_fn(o: iAntlr4GramElem):
     if isinstance(o, Antlr4Option) and isinstance(o.body, Antlr4Sequence):
         if o.body[0] == kw_else:
             return Antlr4Selection([
                 o.body,
                 Antlr4Sequence([
                     Antlr4Symbol("{%s != KW_ELSE}?" % la1, True, True),
                 ])
                 ])
예제 #16
0
 def match_replace_fn(o: iAntlr4GramElem):
     if isinstance(o, Antlr4Option):
         items = list(iter_non_visuals(o.body))
         if len(items) == 1:
             s = items[0]
             if isinstance(s, Antlr4Symbol) and s.symbol == rule_name:
                 return Antlr4Sequence([
                     s,
                 ])
예제 #17
0
    def handle_conditional_fn(bin_op_choices, current_expr_rule):
        # rm left recursion from cond_predicate/conditional_expression
        cond_predicate = rule_by_name(rules, "cond_predicate")
        conditional_expression = rule_by_name(rules, "conditional_expression")
        rules.remove(conditional_expression)
        _inline_rule([
            conditional_expression,
        ], cond_predicate)

        bin_op_choices.append(Antlr4Sequence(conditional_expression.body[1:]))
예제 #18
0
 def apply_to_replace2(o):
     for match in m:
         v = match.get(id(o), None)
         if v is not None:
             if (v is to_replace2
                     or (isinstance(v, Antlr4Symbol)
                         and v.symbol == "hierarchical_identifier")):
                 return Antlr4Symbol(package_or_class_scoped_hier_id_with_const_select.name, False)
             else:
                 return Antlr4Sequence([])
예제 #19
0
def add_file_path_literal_rules(p):
    FILE_PATH_SPEC_CHAR = Antlr4Rule(
        "FILE_PATH_SPEC_CHAR",
        Antlr4Symbol("[^ !$`&()+] | ( '\\\\' [ !$`&*()+] )", True, True),
        is_fragment=True)
    p.rules.append(FILE_PATH_SPEC_CHAR)

    file_spec_path = Antlr4Rule(
        "FILE_PATH_SPEC",
        Antlr4Iteration(Antlr4Sequence([
            Antlr4Symbol("FILE_PATH_SPEC_CHAR", False),
            Antlr4Option(
                Antlr4Sequence([
                    Antlr4Symbol('SEMI', False),
                    Antlr4Symbol("FILE_PATH_SPEC_CHAR", False),
                ])),
        ]),
                        positive=True))
    p.rules.append(file_spec_path)
예제 #20
0
 def apply_to_replace0_and_1(o):
     for match in m:
         v = match.get(id(o), None)
         if v is not None:
             del match[id(o)]
             if (v is to_replace0
                  or v is to_replace1
                  or (isinstance(v, Antlr4Symbol) and v.symbol == "identifier")):
                 return Antlr4Symbol(package_or_class_scoped_id.name, False)
             else:
                 return Antlr4Sequence([])
    def match_replace_fn(o: iAntlr4GramElem):
        if isinstance(o, Antlr4Option):
            items = o.body
            if isinstance(items, Antlr4Sequence) and len(items) == 1:
                s = items[0]
            else:
                s = items

            if isinstance(s, Antlr4Symbol) and s.symbol == rule_name:
                return Antlr4Sequence([
                    s,
                ])
예제 #22
0
def _iterate_everything_except_first_and_replace_first(seq, repl):
    rest = list(iter_non_visuals(seq))[1:]
    if len(rest) == 1:
        rest = rest[0]
    else:
        rest = Antlr4Sequence(rest)
    rest_iterated = Antlr4Iteration(rest)
    seq.clear()
    seq.append(repl)
    seq.append(rest_iterated)
    seq.append(Antlr4Newline())
    seq.append(Antlr4Indent(1))
예제 #23
0
 def handle_conditional_fn(bin_op_choices, current_expr_rule):
     bin_op_choices.extend([
         Antlr4Symbol(current_expr_rule.name, False),
         Antlr4Iteration(
             Antlr4Sequence([
                 Antlr4Symbol("QUESTIONMARK", False),
                 Antlr4Iteration(Antlr4Symbol("attribute_instance", False)),
                 Antlr4Symbol("constant_expression", False),
                 Antlr4Symbol("COLON", False),
                 Antlr4Symbol("constant_expression", False),
             ]))
     ])
예제 #24
0
def rm_ambiguity(rules):
    rule = rule_by_name(rules, "variable_decl_assignment")
    to_repl = Antlr4Option(
        Antlr4Sequence(
            [Antlr4Symbol("ASSIGN", False),
             Antlr4Symbol("class_new", False)]))

    def match_replace_fn(o):
        if o == to_repl:
            return o.body

    replace_item_by_sequence(rule, match_replace_fn)
예제 #25
0
    def match_replace_fn(o):
        if isinstance(o, Antlr4Selection):
            potential_prefix = None
            potential_prefix_i = None

            to_remove = []
            for i, c in enumerate(o):
                if potential_prefix is None:
                    potential_prefix = c
                    potential_prefix_i = i
                else:
                    # check if the potential_prefix is really a prefix of this rule
                    is_prefix, suffix = is_prefix_of_elem(potential_prefix, c)
                    if is_prefix:
                        # put suffix as a optional to a prefix
                        if list(iter_non_visuals(suffix)):
                            if not isinstance(potential_prefix, Antlr4Sequence):
                                assert o[potential_prefix_i] is potential_prefix
                                potential_prefix = Antlr4Sequence([potential_prefix, ])
                                o[potential_prefix_i] = potential_prefix

                            if len(suffix) == 1:
                                suffix = suffix[0]
                            else:
                                suffix = Antlr4Sequence(suffix)

                            potential_prefix.append(Antlr4Option(suffix))
                        to_remove.append(c)
                        potential_prefix = None
                        potential_prefix_i = None
                        modified = True
                    else:
                        potential_prefix = c
                        potential_prefix_i = i

            for c in to_remove:
                o.remove(c)

            if len(o) == 1:
                return Antlr4Sequence([o[0], ])
    def _sequence(self):
        items = []
        while True:
            try:
                o = self._any()
            except StopIteration:
                break
            items.append(o)

        if len(items) == 1:
            return items[0]
        else:
            return Antlr4Sequence(items)
예제 #27
0
 def parse_element_sequence(self, ctx: Element_sequenceContext) -> Antlr4Sequence:
     """
     element_sequence: element_block (WS element_block)*;
     """
     body = []
     for c in ctx.children:
         if isinstance(c, Element_blockContext):
             res = self.parse_element_block(c)
             body.append(res)
         else:
             res = self.parse_ws(c)
             body.extend(res)
     return Antlr4Sequence(body)
예제 #28
0
def fix_implicit_data_type(rules):
    r = rule_by_name(rules, "implicit_data_type")
    # : (signing)? (packed_dimension)*
    # ->
    # : signing (packed_dimension)*
    # | (packed_dimension)+
    # ;
    r.body = Antlr4Selection([
        Antlr4Sequence([
            Antlr4Symbol("signing", False),
            Antlr4Iteration(Antlr4Symbol("packed_dimension", False))
        ]),
        Antlr4Iteration(Antlr4Symbol("packed_dimension", False), positive=True)
    ])
def _selection_empty_option_to_optional(sel: Antlr4Selection):
    """"
    a | | c -> ( a | c )?
    """
    eo = Antlr4Sequence([])
    new_opts = [o for o in sel if o != eo]
    if len(new_opts) != len(sel):
        if len(new_opts) == 1:
            new_sel = new_opts[0]
        else:
            new_sel = Antlr4Selection(new_opts)
        return Antlr4Option(new_sel), True
    else:
        return sel, False
예제 #30
0
def is_prefix_of_elem(prefix: iAntlr4GramElem, elem: iAntlr4GramElem):
    """
    Chekc if the prefix in prefix of the element

    :returns: tuple (is_prefix, suffix)
    """
    if not isinstance(prefix, Antlr4Sequence):
        prefix = Antlr4Sequence([prefix, ])
    if not isinstance(elem, Antlr4Sequence):
        elem = Antlr4Sequence([elem, ])

    pr_list = list(iter_non_visuals(prefix))
    el_list = list(iter_non_visuals(elem))
    if len(pr_list) > len(el_list):
        return (False, None)

    last_pr = None
    for el, pr in zip(el_list, pr_list):
        if not (el == pr):
            return (False, None)
        last_pr = pr

    return (True, elem[elem.index(last_pr) + 1:])