示例#1
0
def python_ast_to_asdl_ast(py_ast_node, grammar):
    # node should be composite
    py_node_name = type(py_ast_node).__name__
    # assert py_node_name.startswith('_ast.')

    production = grammar.get_prod_by_ctr_name(py_node_name)

    fields = []
    for field in production.fields:
        field_value = getattr(py_ast_node, field.name)
        asdl_field = RealizedField(field)
        if field.cardinality == 'single' or field.cardinality == 'optional':
            if field_value is not None:  # sometimes it could be 0
                if grammar.is_composite_type(field.type):
                    child_node = python_ast_to_asdl_ast(field_value, grammar)
                    asdl_field.add_value(child_node)
                else:
                    asdl_field.add_value(str(field_value))
        # field with multiple cardinality
        elif field_value is not None:
            if grammar.is_composite_type(field.type):
                for val in field_value:
                    child_node = python_ast_to_asdl_ast(val, grammar)
                    asdl_field.add_value(child_node)
            else:
                for val in field_value:
                    asdl_field.add_value(str(val))

        fields.append(asdl_field)

    asdl_node = AbstractSyntaxTree(production, realized_fields=fields)

    return asdl_node
示例#2
0
 def parse_orderby(self, orderby_clause: list, limit: int,
                   orderby_field: RealizedField):
     if limit is None:
         ast_node = AbstractSyntaxTree(self.grammar.get_prod_by_ctr_name('Asc')) if orderby_clause[0] == 'asc' \
             else AbstractSyntaxTree(self.grammar.get_prod_by_ctr_name('Desc'))
     else:
         ast_node = AbstractSyntaxTree(self.grammar.get_prod_by_ctr_name('AscLimit')) if orderby_clause[0] == 'asc' \
             else AbstractSyntaxTree(self.grammar.get_prod_by_ctr_name('DescLimit'))
     col_units_field = ast_node.fields[0]
     for val_unit in orderby_clause[1]:
         col_units_field.add_value(self.parse_col_unit(val_unit[1]))
     orderby_field.add_value(ast_node)
示例#3
0
 def parse_orderby(self, orderby_clause: list, limit: int, orderby_field: RealizedField):
     orderby_num = min(2, len(orderby_clause[1]))
     num_str = 'One' if orderby_num == 1 else 'Two'
     order_str = 'Asc' if orderby_clause[0] == 'asc' else 'Desc'
     limit_str = 'Limit' if limit else '' # e.g. OneAsc, TwoDescLimit
     ast_node = AbstractSyntaxTree(self.grammar.get_prod_by_ctr_name(num_str + order_str + limit_str))
     for i, val_unit in enumerate(orderby_clause[1]):
         if i >= 2: break
         col_unit = val_unit[1]
         ast_node.fields[i].add_value(self.parse_col_unit(col_unit))
         # ast_node.fields[i].add_value(self.parse_val_unit(val_unit))
     orderby_field.add_value(ast_node)
示例#4
0
 def parse_groupby(self, groupby_clause: list, having_clause: list, groupby_field: RealizedField):
     groupby_ctr = ['OneNoHaving', 'TwoNoHaving', 'OneHaving', 'TwoHaving']
     groupby_num = min(2, len(groupby_clause))
     if having_clause:
         ast_node = AbstractSyntaxTree(self.grammar.get_prod_by_ctr_name(groupby_ctr[groupby_num + 1]))
         having_field = ast_node.fields[-1]
         having_field.add_value(self.parse_conds(having_clause))
     else:
         ast_node = AbstractSyntaxTree(self.grammar.get_prod_by_ctr_name(groupby_ctr[groupby_num - 1]))
     for i, col_unit in enumerate(groupby_clause):
         if i >= 2: break
         # ast_node.fields[i].add_value(int(col_unit[1]))
         ast_node.fields[i].add_value(self.parse_col_unit(col_unit))
     groupby_field.add_value(ast_node)
示例#5
0
def lisp_node_to_ast(grammar, lisp_tokens, start_idx):
    node_name = lisp_tokens[start_idx]
    i = start_idx
    if node_name in [
            '_eq', 'select', 'filter', '_parts', '_time', '_inspect',
            'between', '_and', '_or', 'renew', 'cancel'
    ]:
        # it's a predicate
        prod = grammar.get_prod_by_ctr_name('apply')
        pred_field = RealizedField(prod['predicate'], value=node_name)

        arg_ast_nodes = []
        while True:
            i += 1
            lisp_token = lisp_tokens[i]
            if lisp_token == "(":
                arg_ast_node, end_idx = lisp_expr_to_ast_helper(
                    grammar, lisp_tokens, i)
            elif lisp_token == ")":
                i += 1
                break
            else:
                prod1 = grammar.get_prod_by_ctr_name('Literal')
                arg_ast_node, end_idx = AbstractSyntaxTree(
                    prod1,
                    [RealizedField(prod1['literal'], value=lisp_tokens[i])]), i

            arg_ast_nodes.append(arg_ast_node)

            i = end_idx
            if i >= len(lisp_tokens): break
            if lisp_tokens[i] == ')':
                i += 1
                break

        arg_field = RealizedField(prod['arguments'], arg_ast_nodes)
        ast_node = AbstractSyntaxTree(prod, [pred_field, arg_field])
    elif node_name.endswith('id0') or node_name.endswith('id1') or node_name.endswith('id2') \
            or node_name in ['periodid0', 'periodid1']:
        # it's a literal
        prod = grammar.get_prod_by_ctr_name('Literal')

        ast_node = AbstractSyntaxTree(
            prod, [RealizedField(prod['literal'], value=node_name)])

        i += 1
    else:
        raise NotImplementedError

    return ast_node, i
def prolog_node_to_ast(grammar, prolog_tokens, start_idx):
    node_name = prolog_tokens[start_idx]
    i = start_idx
    if node_name in [
            'job', 'language', 'loc', 'req_deg', 'application', 'area',
            'company', 'des_deg', 'des_exp', 'platform', 'recruiter',
            'req_exp', 'salary_greater_than', 'salary_less_than', 'title'
    ]:
        # it's a predicate
        prod = grammar.get_prod_by_ctr_name('Apply')
        pred_field = RealizedField(prod['predicate'], value=node_name)

        arg_ast_nodes = []
        i += 1
        assert prolog_tokens[i] == '('
        while True:
            i += 1
            arg_ast_node, end_idx = prolog_node_to_ast(grammar, prolog_tokens,
                                                       i)
            arg_ast_nodes.append(arg_ast_node)

            i = end_idx
            if i >= len(prolog_tokens): break
            if prolog_tokens[i] == ')':
                i += 1
                break

            assert prolog_tokens[i] == ','

        arg_field = RealizedField(prod['arguments'], arg_ast_nodes)
        ast_node = AbstractSyntaxTree(prod, [pred_field, arg_field])
    elif node_name in ['ANS', 'X', 'A', 'B', 'P', 'J']:
        # it's a variable
        prod = grammar.get_prod_by_ctr_name('Variable')
        ast_node = AbstractSyntaxTree(
            prod, [RealizedField(prod['variable'], value=node_name)])

        i += 1
    elif node_name.endswith('id0') or node_name.endswith('id1') or node_name.endswith('id2') \
            or node_name in ['20', 'hour', 'num_salary', 'year', 'year0', 'year1', 'month']:
        # it's a literal
        prod = grammar.get_prod_by_ctr_name('Literal')
        ast_node = AbstractSyntaxTree(
            prod, [RealizedField(prod['literal'], value=node_name)])

        i += 1
    else:
        raise NotImplementedError

    return ast_node, i
示例#7
0
 def parse_select(self, select_clause: list, select_field: RealizedField):
     select_clause = select_clause[1] # list of (agg, val_unit), ignore distinct flag
     select_num = min(5, len(select_clause))
     select_ctr = ['SelectOne', 'SelectTwo', 'SelectThree', 'SelectFour', 'SelectFive']
     ast_node = AbstractSyntaxTree(self.grammar.get_prod_by_ctr_name(select_ctr[select_num - 1]))
     for i, (agg, val_unit) in enumerate(select_clause):
         if i >= 5: break
         if agg != 0: # MAX/MIN/COUNT/SUM/AVG
             val_unit_ast = AbstractSyntaxTree(self.grammar.get_prod_by_ctr_name('Unary'))
             col_unit = [agg] + val_unit[1][1:]
             val_unit_ast.fields[0].add_value(self.parse_col_unit(col_unit))
         else:
             val_unit_ast = self.parse_val_unit(val_unit)
         ast_node.fields[i].add_value(val_unit_ast)
     select_field.add_value(ast_node)
示例#8
0
def lisp_expr_to_ast_helper(grammar, lisp_tokens, start_idx=0):
    i = start_idx
    if lisp_tokens[i] == '(':
        i += 1

    parsed_nodes = []
    while True:
        if lisp_tokens[i] == '(':
            ast_node, end_idx = lisp_expr_to_ast_helper(
                grammar, lisp_tokens, i)
            parsed_nodes.append(ast_node)
            i = end_idx
        else:
            ast_node, end_idx = lisp_node_to_ast(grammar, lisp_tokens, i)
            parsed_nodes.append(ast_node)
            i = end_idx

        if i >= len(lisp_tokens): break
        if lisp_tokens[i] == ')':
            # i += 1
            break

        if lisp_tokens[i] == ' ':
            # and
            i += 1

    assert parsed_nodes
    if len(parsed_nodes) > 1:
        prod = grammar.get_prod_by_ctr_name('And')
        return_node = AbstractSyntaxTree(
            prod, [RealizedField(prod['arguments'], parsed_nodes)])
    else:
        return_node = parsed_nodes[0]
    return return_node, i
示例#9
0
 def parse_groupby(self, groupby_clause: list, having_clause: list,
                   groupby_field: RealizedField):
     col_ids = []
     for col_unit in groupby_clause:
         col_ids.append(col_unit[1])  # agg is None and isDistinct False
     if having_clause:
         ast_node = AbstractSyntaxTree(
             self.grammar.get_prod_by_ctr_name('Having'))
         col_units_field, having_fields = ast_node.fields
         having_fields.add_value(self.parse_conds(having_clause))
     else:
         ast_node = AbstractSyntaxTree(
             self.grammar.get_prod_by_ctr_name('NoHaving'))
         col_units_field = ast_node.fields[0]
     for col_unit in groupby_clause:
         col_units_field.add_value(self.parse_col_unit(col_unit))
     groupby_field.add_value(ast_node)
示例#10
0
 def parse_from(self, from_clause: dict, from_field: RealizedField):
     """ Ignore from conditions, since it is not evaluated in evaluation script
     """
     table_units = from_clause['table_units']
     t = table_units[0][0]
     if t == 'table_unit':
         table_num = min(6, len(table_units))
         table_ctr = ['FromOneTable', 'FromTwoTable', 'FromThreeTable', 'FromFourTable', 'FromFiveTable', 'FromSixTable']
         ast_node = AbstractSyntaxTree(self.grammar.get_prod_by_ctr_name(table_ctr[table_num - 1]))
         for i, (_, tab_id) in enumerate(table_units):
             if i >= 6: break
             ast_node.fields[i].add_value(int(tab_id))
     else:
         assert t == 'sql'
         v = table_units[0][1]
         ast_node = AbstractSyntaxTree(self.grammar.get_prod_by_ctr_name('FromSQL'))
         ast_node.fields[0].add_value(self.parse_sql(v))
     from_field.add_value(ast_node)
示例#11
0
 def parse_select(self, select_clause: list, select_field: RealizedField):
     """
         ignore cases agg(col_id1 op col_id2) and agg(col_id1) op agg(col_id2)
     """
     select_clause = select_clause[1]  # list of (agg, val_unit)
     unit_op_list = ['Unary', 'Minus', 'Plus', 'Times', 'Divide']
     agg_op_list = ['None', 'Max', 'Min', 'Count', 'Sum', 'Avg']
     for agg, val_unit in select_clause:
         if agg != 0:  # agg col_id
             ast_node = AbstractSyntaxTree(
                 self.grammar.get_prod_by_ctr_name('Unary'))
             col_node = AbstractSyntaxTree(
                 self.grammar.get_prod_by_ctr_name(agg_op_list[agg]))
             col_node.fields[0].add_value(int(val_unit[1][1]))
             ast_node.fields[0].add_value(col_node)
         else:  # binary_op col_id1 col_id2
             ast_node = self.parse_val_unit(val_unit)
         select_field.add_value(ast_node)
示例#12
0
 def parse_from(self, from_clause: dict, from_field: RealizedField):
     """
         Ignore from conditions, since it is not evaluated in evaluation script
     """
     table_units = from_clause['table_units']
     t = table_units[0][0]
     if t == 'table_unit':
         ast_node = AbstractSyntaxTree(
             self.grammar.get_prod_by_ctr_name('FromTable'))
         tables_field = ast_node.fields[0]
         for _, v in table_units:
             tables_field.add_value(int(v))
     else:
         assert t == 'sql'
         v = table_units[0][1]
         ast_node = AbstractSyntaxTree(
             self.grammar.get_prod_by_ctr_name('FromSQL'))
         ast_node.fields[0].add_value(self.parse_sql(v))
     from_field.add_value(ast_node)
示例#13
0
def regex_ast_to_asdl_ast(grammar, reg_ast):
    if reg_ast.children:
        rule = _NODE_CLASS_TO_RULE[reg_ast.node_class]
        prod = grammar.get_prod_by_ctr_name(rule)
        # unary
        if rule in ["Not", "Star", "StartWith", "EndWith", "Contain"]:
            child_ast_node = regex_ast_to_asdl_ast(grammar,
                                                   reg_ast.children[0])
            ast_node = AbstractSyntaxTree(
                prod, [RealizedField(prod['arg'], child_ast_node)])
            return ast_node
        elif rule in ["Concat", "And", "Or"]:
            left_ast_node = regex_ast_to_asdl_ast(grammar, reg_ast.children[0])
            right_ast_node = regex_ast_to_asdl_ast(grammar,
                                                   reg_ast.children[1])
            ast_node = AbstractSyntaxTree(prod, [
                RealizedField(prod['left'], left_ast_node),
                RealizedField(prod['right'], right_ast_node)
            ])
            return ast_node
        elif rule in ["RepeatAtleast"]:
            # primitive node
            # RealizedField(prod['predicate'], value=node_name)
            child_ast_node = regex_ast_to_asdl_ast(grammar,
                                                   reg_ast.children[0])
            int_real_node = RealizedField(prod['k'], str(reg_ast.params[0]))
            ast_node = AbstractSyntaxTree(
                prod,
                [RealizedField(prod['arg'], child_ast_node), int_real_node])
            return ast_node
        else:
            raise ValueError("wrong node class", reg_ast.node_class)
    else:
        if reg_ast.node_class in [
                "<num>", "<let>", "<vow>", "<low>", "<cap>", "<any>"
        ]:
            rule = "CharClass"
        elif reg_ast.node_class in ["<m0>", "<m1>", "<m2>", "<m3>"]:
            rule = "Const"
        else:
            raise ValueError("wrong node class", reg_ast.node_class)
        prod = grammar.get_prod_by_ctr_name(rule)
        return AbstractSyntaxTree(
            prod, [RealizedField(prod['arg'], reg_ast.node_class)])
示例#14
0
def logical_form_to_ast(grammar, lf_node):
    if lf_node.name == 'lambda':
        # expr -> Lambda(var variable, var_type type, expr body)
        prod = grammar.get_prod_by_ctr_name('Lambda')

        var_node = lf_node.children[0]
        var_field = RealizedField(prod['variable'], var_node.name)

        var_type_node = lf_node.children[1]
        var_type_field = RealizedField(prod['type'], var_type_node.name)

        body_node = lf_node.children[2]
        body_ast_node = logical_form_to_ast(grammar, body_node)  # of type expr
        body_field = RealizedField(prod['body'], body_ast_node)

        ast_node = AbstractSyntaxTree(prod,
                                      [var_field, var_type_field, body_field])
    elif lf_node.name == 'argmax' or lf_node.name == 'argmin' or lf_node.name == 'sum':
        # expr -> Argmax|Sum(var variable, expr domain, expr body)

        prod = grammar.get_prod_by_ctr_name(lf_node.name.title())

        var_node = lf_node.children[0]
        var_field = RealizedField(prod['variable'], var_node.name)

        domain_node = lf_node.children[1]
        domain_ast_node = logical_form_to_ast(grammar, domain_node)
        domain_field = RealizedField(prod['domain'], domain_ast_node)

        body_node = lf_node.children[2]
        body_ast_node = logical_form_to_ast(grammar, body_node)
        body_field = RealizedField(prod['body'], body_ast_node)

        ast_node = AbstractSyntaxTree(prod,
                                      [var_field, domain_field, body_field])
    elif lf_node.name == 'and' or lf_node.name == 'or':
        # expr -> And(expr* arguments) | Or(expr* arguments)
        prod = grammar.get_prod_by_ctr_name(lf_node.name.title())

        arg_ast_nodes = []
        for arg_node in lf_node.children:
            arg_ast_node = logical_form_to_ast(grammar, arg_node)
            arg_ast_nodes.append(arg_ast_node)

        ast_node = AbstractSyntaxTree(
            prod, [RealizedField(prod['arguments'], arg_ast_nodes)])
    elif lf_node.name == 'not':
        # expr -> Not(expr argument)
        prod = grammar.get_prod_by_ctr_name('Not')

        arg_ast_node = logical_form_to_ast(grammar, lf_node.children[0])

        ast_node = AbstractSyntaxTree(
            prod, [RealizedField(prod['argument'], arg_ast_node)])
    elif lf_node.name == '>' or lf_node.name == '=' or lf_node.name == '<':
        # expr -> Compare(cmp_op op, expr left, expr right)
        prod = grammar.get_prod_by_ctr_name('Compare')
        op_name = 'GreaterThan' if lf_node.name == '>' else 'Equal' if lf_node.name == '=' else 'LessThan'
        op_field = RealizedField(
            prod['op'],
            AbstractSyntaxTree(grammar.get_prod_by_ctr_name(op_name)))

        left_node = lf_node.children[0]
        left_ast_node = logical_form_to_ast(grammar, left_node)
        left_field = RealizedField(prod['left'], left_ast_node)

        right_node = lf_node.children[1]
        right_ast_node = logical_form_to_ast(grammar, right_node)
        right_field = RealizedField(prod['right'], right_ast_node)

        ast_node = AbstractSyntaxTree(prod,
                                      [op_field, left_field, right_field])
    elif lf_node.name in [
            'jet', 'flight', 'from_airport', 'airport', 'airline',
            'airline_name', 'class_type', 'aircraft_code', 'aircraft_code:t',
            'from', 'to', 'day', 'month', 'year', 'arrival_time', 'limousine',
            'departure_time', 'meal', 'meal:t', 'meal_code', 'during_day',
            'tomorrow', 'daily', 'time_elapsed', 'time_zone_code',
            'booking_class:t', 'booking_class', 'economy', 'ground_fare',
            'class_of_service', 'capacity', 'weekday', 'today', 'turboprop',
            'aircraft', 'air_taxi_operation', 'month_return', 'day_return',
            'day_number_return', 'minimum_connection_time',
            'during_day_arrival', 'connecting', 'minutes_distant', 'named',
            'miles_distant', 'approx_arrival_time', 'approx_return_time',
            'approx_departure_time', 'has_stops', 'day_after_tomorrow',
            'manufacturer', 'discounted', 'overnight', 'nonstop', 'has_meal',
            'round_trip', 'oneway', 'loc:t', 'ground_transport', 'to_city',
            'flight_number', 'equals:t', 'abbrev', 'equals', 'rapid_transit',
            'stop_arrival_time', 'arrival_month', 'cost', 'fare', 'services',
            'fare_basis_code', 'rental_car', 'city', 'stop', 'day_number',
            'days_from_today', 'after_day', 'before_day', 'airline:e', 'stops',
            'month_arrival', 'day_number_arrival', 'day_arrival', 'taxi',
            'next_days', 'restriction_code', 'tomorrow_arrival', 'tonight',
            'population:i', 'state:t', 'next_to:t', 'elevation:i', 'size:i',
            'capital:t', 'len:i', 'city:t', 'named:t', 'river:t', 'place:t',
            'capital:c', 'major:t', 'town:t', 'mountain:t', 'lake:t', 'area:i',
            'density:i', 'high_point:t', 'elevation:t', 'population:t', 'in:t'
    ]:
        # expr -> Apply(pred predicate, expr* arguments)
        prod = grammar.get_prod_by_ctr_name('Apply')

        pred_field = RealizedField(prod['predicate'], value=lf_node.name)

        arg_ast_nodes = []
        for arg_node in lf_node.children:
            arg_ast_node = logical_form_to_ast(grammar, arg_node)
            arg_ast_nodes.append(arg_ast_node)
        arg_field = RealizedField(prod['arguments'], arg_ast_nodes)

        ast_node = AbstractSyntaxTree(prod, [pred_field, arg_field])
    elif lf_node.name.startswith('$'):
        prod = grammar.get_prod_by_ctr_name('Variable')
        ast_node = AbstractSyntaxTree(
            prod, [RealizedField(prod['variable'], value=lf_node.name)])
    elif ':ap' in lf_node.name or ':fb' in lf_node.name or ':mf' in lf_node.name or \
                    ':me' in lf_node.name or ':cl' in lf_node.name or ':pd' in lf_node.name or \
                    ':dc' in lf_node.name or ':al' in lf_node.name or \
                    lf_node.name in ['yr0', 'do0', 'fb1', 'rc0', 'ci0', 'fn0', 'ap0', 'al1', 'al2', 'ap1', 'ci1',
                                     'ci2', 'ci3', 'st0', 'ti0', 'ti1', 'da0', 'da1', 'da2', 'da3', 'da4', 'al0',
                                     'fb0', 'dn0', 'dn1', 'mn0', 'ac0', 'fn1', 'st1', 'st2',
                                     'c0', 'm0', 's0', 'r0', 'n0', 'co0', 'usa:co', 'death_valley:lo', 's1',
                                     'colorado:n']:
        prod = grammar.get_prod_by_ctr_name('Entity')
        ast_node = AbstractSyntaxTree(
            prod, [RealizedField(prod['entity'], value=lf_node.name)])
    elif lf_node.name.endswith(':i') or lf_node.name.endswith(':hr'):
        prod = grammar.get_prod_by_ctr_name('Number')
        ast_node = AbstractSyntaxTree(
            prod, [RealizedField(prod['number'], value=lf_node.name)])
    elif lf_node.name == 'the':
        # expr -> The(var variable, expr body)
        prod = grammar.get_prod_by_ctr_name('The')

        var_node = lf_node.children[0]
        var_field = RealizedField(prod['variable'], var_node.name)

        body_node = lf_node.children[1]
        body_ast_node = logical_form_to_ast(grammar, body_node)
        body_field = RealizedField(prod['body'], body_ast_node)

        ast_node = AbstractSyntaxTree(prod, [var_field, body_field])
    elif lf_node.name == 'exists' or lf_node.name == 'max' or lf_node.name == 'min' or lf_node.name == 'count':
        # expr -> Exists(var variable, expr body)
        prod = grammar.get_prod_by_ctr_name(lf_node.name.title())

        var_node = lf_node.children[0]
        var_field = RealizedField(prod['variable'], var_node.name)

        body_node = lf_node.children[1]
        body_ast_node = logical_form_to_ast(grammar, body_node)
        body_field = RealizedField(prod['body'], body_ast_node)

        ast_node = AbstractSyntaxTree(prod, [var_field, body_field])
    else:
        raise NotImplementedError

    return ast_node
示例#15
0
def pdf_to_ast(grammar, x, tr):
    if len(tr) >= 300:
        raise NotImplementedError
    if isinstance(x, PdfDict):
        prod = grammar.get_prod_by_ctr_name('PdfDict')

        ast_nodes = []
        for y in x:
            prod_ = grammar.get_prod_by_ctr_name('Apply')

            pred_field = RealizedField(prod_['name'], value=str(y))

            if y in ['/Parent', '/P', '/Dest', '/Prev']:
                op_field = RealizedField(prod_['op'])

            else:
                tr.append(y)
                args_ast_node = pdf_to_ast(grammar, x[y], tr)
                del tr[len(tr) - 1]
                op_field = RealizedField(prod_['op'], value=args_ast_node)

            ast_node_ = AbstractSyntaxTree(prod_, [pred_field, op_field])
            ast_nodes.append(ast_node_)

        if x.stream:
            prod_ = grammar.get_prod_by_ctr_name('PdfString')

            var_field = RealizedField(prod_['value'], value=str(x.stream))

            ast_node = AbstractSyntaxTree(prod_, [var_field])

            ast_nodes.append(ast_node)

        arg_field = RealizedField(prod['args'], ast_nodes)

        ast_node = AbstractSyntaxTree(prod, [arg_field]) 

    elif isinstance(x, PdfObject):
        prod = grammar.get_prod_by_ctr_name('PdfObject')

        var_field = RealizedField(prod['value'], value=str(x))

        ast_node = AbstractSyntaxTree(prod, [var_field])

    elif isinstance(x, PdfArray):
        dict_nodes = []
        list_nodes = []
        for y in x:
            if isinstance(y, PdfDict):
                args_ast_node = pdf_to_ast(grammar, y, tr)
                dict_nodes.append(args_ast_node)

            elif isinstance(y, PdfArray):
                raise NotImplementedError

            elif isinstance(y, BasePdfName):
                list_nodes.append(str(y))

            elif isinstance(y, PdfObject):
                list_nodes.append(str(y))


        if dict_nodes:
            prod = grammar.get_prod_by_ctr_name('PdfArray')

            arg_field = RealizedField(prod['args'], dict_nodes)

            ast_node = AbstractSyntaxTree(prod, [arg_field]) 

        else:
            prod = grammar.get_prod_by_ctr_name('PdfList')

            var_field = RealizedField(prod['value'], value=tuple(list_nodes))

            ast_node = AbstractSyntaxTree(prod, [var_field])

    elif isinstance(x, PdfString):
        prod = grammar.get_prod_by_ctr_name('PdfString')

        var_field = RealizedField(prod['value'], value=str(x))

        ast_node = AbstractSyntaxTree(prod, [var_field])

    elif isinstance(x, BasePdfName):
        prod = grammar.get_prod_by_ctr_name('BasePdfName')

        var_field = RealizedField(prod['value'], value=str(x))

        ast_node = AbstractSyntaxTree(prod, [var_field])

    else:
        print(type(x))
        raise NotImplementedError

    return ast_node
示例#16
0
def streg_ast_to_asdl_ast(grammar, reg_ast):
    if reg_ast.children:
        rule = _NODE_CLASS_TO_RULE[reg_ast.node_class]
        prod = grammar.get_prod_by_ctr_name(rule)
        # unary
        if rule in [
                "Not", "Star", "StartWith", "EndWith", "Contain", "NotCC",
                "Optional"
        ]:
            child_ast_node = streg_ast_to_asdl_ast(grammar,
                                                   reg_ast.children[0])
            ast_node = AbstractSyntaxTree(
                prod, [RealizedField(prod['arg'], child_ast_node)])
            return ast_node
        elif rule in ["Concat", "And", "Or"]:
            left_ast_node = streg_ast_to_asdl_ast(grammar, reg_ast.children[0])
            right_ast_node = streg_ast_to_asdl_ast(grammar,
                                                   reg_ast.children[1])
            ast_node = AbstractSyntaxTree(prod, [
                RealizedField(prod['left'], left_ast_node),
                RealizedField(prod['right'], right_ast_node)
            ])
            return ast_node
        elif rule in ["RepeatAtleast", "Repeat"]:
            # primitive node
            # RealizedField(prod['predicate'], value=node_name)
            child_ast_node = streg_ast_to_asdl_ast(grammar,
                                                   reg_ast.children[0])
            int_real_node = RealizedField(prod['k'], str(reg_ast.params[0]))
            ast_node = AbstractSyntaxTree(
                prod,
                [RealizedField(prod['arg'], child_ast_node), int_real_node])
            return ast_node
        elif rule in ["RepeatRange"]:
            child_ast_node = streg_ast_to_asdl_ast(grammar,
                                                   reg_ast.children[0])
            int_real_node1 = RealizedField(prod['k1'], str(reg_ast.params[0]))
            int_real_node2 = RealizedField(prod['k2'], str(reg_ast.params[1]))
            ast_node = AbstractSyntaxTree(prod, [
                RealizedField(prod['arg'], child_ast_node), int_real_node1,
                int_real_node2
            ])
            return ast_node
        elif rule in ["String"]:
            return AbstractSyntaxTree(
                prod,
                [RealizedField(prod['arg'], reg_ast.children[0].node_class)])
        else:
            raise ValueError("wrong node class", reg_ast.node_class)
    else:
        if reg_ast.node_class in [
                "<num>", "<let>", "<spec>", "<low>", "<cap>", "<any>"
        ]:
            rule = "CharClass"
        elif reg_ast.node_class.startswith(
                "const") and reg_ast.node_class[5:].isdigit():
            rule = "ConstSym"
        elif reg_ast.node_class.startswith(
                "<") and reg_ast.node_class.endswith(">"):
            rule = "Token"
        else:
            raise ValueError("wrong node class", reg_ast.node_class)
        prod = grammar.get_prod_by_ctr_name(rule)
        return AbstractSyntaxTree(
            prod, [RealizedField(prod['arg'], reg_ast.node_class)])
示例#17
0
def pdf_to_ast(grammar, lf_node):
    if lf_node.name.startswith('obj'):
        # obj = Objective(id name, expr* hdr)
        prod = grammar.get_prod_by_ctr_name('Objective')

        id_field = RealizedField(prod['name'], value=lf_node.name)

        hdr_ast_nodes = []
        for hdr_node in lf_node.children:
            hdr_ast_node = pdf_to_ast(grammar, hdr_node)
            hdr_ast_nodes.append(hdr_ast_node)
        hdr_field = RealizedField(prod['hdr'], hdr_ast_nodes)

        ast_node = AbstractSyntaxTree(prod, [id_field, hdr_field])

    elif lf_node.name in [
            'Type', 'SubType', 'Size', 'Length', 'Kids', 'Parent', 'Count',
            'Limits', 'Range', 'Filter', 'Domain', 'FuncType', 'Pages',
            'MediaBox', 'Resources'
    ]:
        # expr -> Apply(pred predicate, expr* arguments)
        prod = grammar.get_prod_by_ctr_name('Apply')

        pred_field = RealizedField(prod['predicate'], value=lf_node.name)

        arg_ast_nodes = []
        for arg_node in lf_node.children:
            arg_ast_node = pdf_to_ast(grammar, arg_node)
            arg_ast_nodes.append(arg_ast_node)
        arg_field = RealizedField(prod['arguments'], arg_ast_nodes)

        ast_node = AbstractSyntaxTree(prod, [pred_field, arg_field])

    elif lf_node.name.startswith('S'):
        # expr = Variable(var_type type, var variable)
        prod = grammar.get_prod_by_ctr_name('Variable')

        var_type_field = RealizedField(prod['type'], value='string')
        var_field = RealizedField(prod['variable'], value=lf_node.name[1:])

        ast_node = AbstractSyntaxTree(prod, [var_type_field, var_field])
    elif lf_node.name.startswith('I'):
        prod = grammar.get_prod_by_ctr_name('Variable')

        var_type_field = RealizedField(prod['type'], value='int')
        var_field = RealizedField(prod['variable'], value=lf_node.name[1:])

        ast_node = AbstractSyntaxTree(prod, [var_type_field, var_field])
    elif lf_node.name.startswith('H'):
        prod = grammar.get_prod_by_ctr_name('Variable')

        var_type_field = RealizedField(prod['type'], value='header')
        var_field = RealizedField(prod['variable'], value=lf_node.name[1:])

        ast_node = AbstractSyntaxTree(prod, [var_type_field, var_field])

    elif lf_node.name.startswith('R'):
        # expr = Reference(id ref)
        prod = grammar.get_prod_by_ctr_name('Reference')

        ref_var = 'obj' + lf_node.name[1:]
        ref_field = RealizedField(prod['ref'], value=ref_var)

        ast_node = AbstractSyntaxTree(prod, [ref_field])

    else:
        raise NotImplementedError

    return ast_node
示例#18
0
        def get_subtree(entry, parent_field, next_available_id):
            if entry is None:
                return None, next_available_id

            constructor_name = entry['Constructor']

            # terminal case
            if constructor_name == 'SyntaxToken':
                if entry['Value'] is None:
                    return None, next_available_id  # return None for optional field whose value is null

                token = SyntaxToken(parent_field.type,
                                    entry['Value'],
                                    position=entry['Position'],
                                    id=next_available_id)
                next_available_id += 1

                return token, next_available_id

            field_entries = entry['Fields']
            node_id = next_available_id
            next_available_id += 1
            prod = self.get_prod_by_ctr_name(constructor_name)
            realized_fields = []
            for field in prod.constructor.fields:
                field_value = field_entries[field.name]

                if isinstance(field_value, list):
                    assert 'SyntaxList' in field.type.name

                    sub_ast_id = next_available_id
                    next_available_id += 1

                    sub_ast_prod = self.get_prod_by_ctr_name(field.type.name)
                    sub_ast_constr_field = sub_ast_prod.constructor.fields[0]
                    sub_ast_field_values = []
                    for field_child_entry in field_value:
                        child_sub_ast, next_available_id = get_subtree(
                            field_child_entry,
                            sub_ast_constr_field,
                            next_available_id=next_available_id)
                        sub_ast_field_values.append(child_sub_ast)

                    sub_ast = AbstractSyntaxNode(sub_ast_prod, [
                        RealizedField(sub_ast_constr_field,
                                      sub_ast_field_values)
                    ],
                                                 id=sub_ast_id)

                    # FIXME: have a global mark_finished method!
                    for sub_ast_field in sub_ast.fields:
                        if sub_ast_field.cardinality in ('multiple',
                                                         'optional'):
                            sub_ast_field._not_single_cardinality_finished = True

                    realized_field = RealizedField(field, sub_ast)
                else:
                    # if the child is an AST or terminal SyntaxNode
                    sub_ast, next_available_id = get_subtree(
                        field_value, field, next_available_id)
                    realized_field = RealizedField(field, sub_ast)

                realized_fields.append(realized_field)

            ast_node = AbstractSyntaxNode(prod, realized_fields, id=node_id)
            for field in ast_node.fields:
                if field.cardinality in ('multiple', 'optional'):
                    field._not_single_cardinality_finished = True

            return ast_node, next_available_id
def prolog_expr_to_ast_helper(grammar, prolog_tokens, start_idx=0):
    i = start_idx
    if prolog_tokens[i] == '(':
        i += 1

    parsed_nodes = []
    while True:
        if prolog_tokens[i] == '\\+':
            # expr -> Not(expr argument)
            prod = grammar.get_prod_by_ctr_name('Not')
            i += 1
            if prolog_tokens[i] == '(':
                arg_ast_node, end_idx = prolog_expr_to_ast_helper(
                    grammar, prolog_tokens, i)
            else:
                arg_ast_node, end_idx = prolog_node_to_ast(
                    grammar, prolog_tokens, i)
            i = end_idx

            assert arg_ast_node.production.type.name == 'expr'
            ast_node = AbstractSyntaxTree(
                prod, [RealizedField(prod['argument'], arg_ast_node)])

            parsed_nodes.append(ast_node)
        elif prolog_tokens[i] == '(':
            ast_node, end_idx = prolog_expr_to_ast_helper(
                grammar, prolog_tokens, i)
            parsed_nodes.append(ast_node)
            i = end_idx
        else:
            ast_node, end_idx = prolog_node_to_ast(grammar, prolog_tokens, i)
            parsed_nodes.append(ast_node)
            i = end_idx

        if i >= len(prolog_tokens): break
        if prolog_tokens[i] == ')':
            i += 1
            break

        if prolog_tokens[i] == ',':
            # and
            i += 1
        elif prolog_tokens[i] == ';':
            # Or
            prod = grammar.get_prod_by_ctr_name('Or')

            assert parsed_nodes
            if len(parsed_nodes) == 1:
                left_ast_node = parsed_nodes[0]
            else:
                left_expr_prod = grammar.get_prod_by_ctr_name('And')
                left_ast_node = AbstractSyntaxTree(
                    left_expr_prod,
                    [RealizedField(left_expr_prod['arguments'], parsed_nodes)])
                parsed_nodes = []

            # get the right ast node
            i += 1
            right_ast_node, end_idx = prolog_expr_to_ast_helper(
                grammar, prolog_tokens, i)

            ast_node = AbstractSyntaxTree(prod, [
                RealizedField(prod['left'], left_ast_node),
                RealizedField(prod['right'], right_ast_node)
            ])

            i = end_idx
            parsed_nodes = [ast_node]

            if i >= len(prolog_tokens): break
            if prolog_tokens[i] == ')':
                i += 1
                break

    assert parsed_nodes
    if len(parsed_nodes) > 1:
        prod = grammar.get_prod_by_ctr_name('And')
        return_node = AbstractSyntaxTree(
            prod, [RealizedField(prod['arguments'], parsed_nodes)])
    else:
        return_node = parsed_nodes[0]

    return return_node, i