def ifttt_ast_to_parse_tree_helper(s, offset): """ adapted from ifttt codebase """ if s[offset] != '(': raise RuntimeError('malformed string: node did not start with open paren at position ' + offset) offset += 1 # extract node name(type) name = '' if s[offset] == '\"': offset += 1 while s[offset] != '\"': if s[offset] == '\\': offset += 1 name += s[offset] offset += 1 offset += 1 else: while s[offset] != ' ' and s[offset] != ')': name += s[offset] offset += 1 node = ASTNode(name) while True: if s[offset] == ')': offset += 1 return node, offset if s[offset] != ' ': raise RuntimeError('malformed string: node should have either had a ' 'close paren or a space at position ' + offset) offset += 1 child_node, offset = ifttt_ast_to_parse_tree_helper(s, offset) node.add_child(child_node)
def parse_raw(code): root_node = ASTNode('root') py_ast = ast.parse(code) for p in py_ast.body: tree = python_ast_to_parse_tree(p) root_node.add_child(tree) return root_node
def unary_link_to_closure(unary_link): closure = ASTNode(unary_link.type) last_node = unary_link.get_leaves()[0] closure_child = ASTNode(last_node.type) prod, _ = unary_link.get_productions() closure_child_label = '@'.join(str(rule).replace(' ', '$') for rule in prod) closure_child.label = closure_child_label closure.add_child(closure_child) return closure
def sql_ast_to_parse_tree(node): if isinstance(node, basestring): print(node) node_type = node["type"] if node_type == "literal": return ASTNode(node_type, label=node["variant"], value=node["value"]) if node_type == "identifier": return ASTNode(node_type, label=node["variant"], value=node["name"]) tree = ASTNode(node_type) for key in node: if key == "type": if node[key] == "literal": print(node) continue if isinstance(node[key], basestring) or isinstance(node[key], bool): child = ASTNode(key, value=node[key]) elif isinstance(node[key], dict): child = ASTNode(key) child.add_child(sql_ast_to_parse_tree(node[key])) elif isinstance(node[key], list): child = ASTNode(key + "*") for item in node[key]: child.add_child(sql_ast_to_parse_tree(item)) else: print key, node[key] tree.add_child(child) return tree
def seq2tree_repr_to_ast_tree_helper(tree_repr, offset): """convert a seq2tree representation to AST tree""" # extract node name node_name_end = offset while node_name_end < len(tree_repr) and tree_repr[node_name_end] != ' ': node_name_end += 1 node_repr = tree_repr[offset:node_name_end] m = node_re.match(node_repr) n_type = m.group('type') n_type = type_str_to_type(n_type) n_label = m.group('label') n_value = m.group('value') if n_type in {int, float, str, bool}: n_value = n_type(n_value) n_label = None if n_label == '' else n_label n_value = None if n_value == '' else n_value node = ASTNode(n_type, label=n_label, value=n_value) offset = node_name_end if offset == len(tree_repr): return node, offset offset += 1 if tree_repr[offset] == '(': offset += 2 while True: child_node, offset = seq2tree_repr_to_ast_tree_helper(tree_repr, offset=offset) node.add_child(child_node) if offset >= len(tree_repr) or tree_repr[offset] == ')': offset += 2 break return node, offset
def parse(code): """ parse a python code into a tree structure code -> AST tree -> AST tree to internal tree structure """ ''' code = canonicalize_code(code) py_ast = ast.parse(code) tree = python_ast_to_parse_tree(py_ast.body[0]) tree = add_root(tree) return tree ''' root_node = ASTNode('root') code = canonicalize_code(code) py_ast = ast.parse(code) for p in py_ast.body: tree = python_ast_to_parse_tree(p) root_node.add_child(tree) return root_node
def ifttt_ast_to_parse_tree_helper(s, offset): """ adapted from ifttt codebase """ if s[offset] != '(': raise RuntimeError( 'malformed string: node did not start with open paren at position ' + offset) offset += 1 # extract node name(type) name = '' if s[offset] == '\"': offset += 1 while s[offset] != '\"': if s[offset] == '\\': offset += 1 name += s[offset] offset += 1 offset += 1 else: while s[offset] != ' ' and s[offset] != ')': name += s[offset] offset += 1 node = ASTNode(name) while True: if s[offset] == ')': offset += 1 return node, offset if s[offset] != ' ': raise RuntimeError( 'malformed string: node should have either had a ' 'close paren or a space at position ' + offset) offset += 1 child_node, offset = ifttt_ast_to_parse_tree_helper(s, offset) node.add_child(child_node)
def decode_tree_to_python_ast(decode_tree): from lang.py.unaryclosure import compressed_ast_to_normal ast_tree = ASTNode('root') compressed_ast_to_normal(decode_tree) for t in decode_tree.children: #print(t) terminals = t.get_leaves() for terminal in terminals: if terminal.value is not None and type(terminal.value) is str: if terminal.value.endswith('<eos>'): terminal.value = terminal.value[:-5] if terminal.type in {int, float, str, bool}: # cast to target data type terminal.value = terminal.type(terminal.value) #print(decode_tree) #root_node.add_child(tree) pt = parse_tree_to_python_ast(t) # print(pt) #print('ptya') ast_tree.add_child(pt) #print(ast_tree) return ast_tree
def add_root(tree): root_node = ASTNode('root') root_node.add_child(tree) return root_node
def python_ast_to_parse_tree(node): assert isinstance(node, ast.AST) node_type = type(node) tree = ASTNode(node_type) # it's a leaf AST node, e.g., ADD, Break, etc. if len(node._fields) == 0: return tree # if it's a compositional AST node with empty fields if is_compositional_leaf(node): epsilon = ASTNode('epsilon') tree.add_child(epsilon) return tree fields_info = PY_AST_NODE_FIELDS[node_type.__name__] for field_name, field_value in ast.iter_fields(node): # remove ctx stuff if field_name in NODE_FIELD_BLACK_LIST: continue # omit empty fields, including empty lists if field_value is None or (isinstance(field_value, list) and len(field_value) == 0): continue # now it's not empty! field_type = fields_info[field_name]['type'] is_list_field = fields_info[field_name]['is_list'] if isinstance(field_value, ast.AST): child = ASTNode(field_type, field_name) child.add_child(python_ast_to_parse_tree(field_value)) elif type(field_value) is str or type(field_value) is int or \ type(field_value) is float or type(field_value) is object or \ type(field_value) is bool: # if field_type != type(field_value): # print 'expect [%s] type, got [%s]' % (field_type, type(field_value)) child = ASTNode(type(field_value), field_name, value=field_value) elif is_list_field: list_node_type = typename(field_type) + '*' child = ASTNode(list_node_type, field_name) for n in field_value: if field_type in { ast.comprehension, ast.excepthandler, ast.arguments, ast.keyword, ast.alias }: child.add_child(python_ast_to_parse_tree(n)) else: intermediate_node = ASTNode(field_type) if field_type is str: intermediate_node.value = n else: intermediate_node.add_child( python_ast_to_parse_tree(n)) child.add_child(intermediate_node) else: raise RuntimeError('unknown AST node field!') tree.add_child(child) return tree
def create_node_with_empty_leaf(node_name): tree = ASTNode(node_name) empty_child = ASTNode("empty") tree.add_child(empty_child) return tree