def parse_raw(code): root_node = ASTNode('root') py_ast = ast.parse(code) for p in py_ast.body: tree = python_ast_to_parse_tree(p) root_node.add_child(tree) return root_node
def parse_code(code): """ parse a python code into a tree structure code -> AST tree -> AST tree to internal tree structure """ root_node = ASTNode('root') code = canonicalize_code(code) py_ast = ast.parse(code) for p in py_ast.body: tree = python_ast_to_parse_tree(p) root_node.add_child(tree) return root_node
def decode_tree_to_python_ast(decode_tree): ast_tree = ASTNode('root') #root_node.add_child(tree) compressed_ast_to_normal(decode_tree) for t in decode_tree.children: #decode_tree = t terminals = t.get_leaves() for terminal in terminals: if terminal.value is not None and type(terminal.value) is str: if terminal.value.endswith('<eos>'): terminal.value = terminal.value[:-5] if terminal.type in {int, float, str, bool}: # cast to target data type terminal.value = terminal.type(terminal.value) #print(decode_tree) #root_node.add_child(tree) ast_tree.add_child(parse_tree_to_python_ast(t)) #print(ast_tree) return ast_tree
def unary_link_to_closure(unary_link): closure = ASTNode(unary_link.type) last_node = unary_link.get_leaves()[0] closure_child = ASTNode(last_node.type) prod, _ = unary_link.get_productions() closure_child_label = '@'.join( str(rule).replace(' ', '$') for rule in prod) closure_child.label = closure_child_label closure.add_child(closure_child) return closure
def extract_unary_closure_helper(parse_tree, unary_link, last_node): if parse_tree.is_leaf: if unary_link and unary_link.size > 2: return [unary_link] else: return [] elif len(parse_tree.children) > 1: unary_links = [] if unary_link and unary_link.size > 2: unary_links.append(unary_link) for child in parse_tree.children: new_node = ASTNode(child.type) child_unary_links = extract_unary_closure_helper( child, new_node, new_node) unary_links.extend(child_unary_links) return unary_links else: # has a single child child = parse_tree.children[0] new_node = ASTNode(child.type, label=child.label) last_node.add_child(new_node) last_node = new_node return extract_unary_closure_helper(child, unary_link, last_node)
def compressed_ast_to_normal(parse_tree): if parse_tree.label and '@' in parse_tree.label and '$' in parse_tree.label: label = parse_tree.label label = label.replace('$', ' ') rule_reprs = label.split('@') intermediate_nodes = [] first_node = last_node = None for rule_repr in rule_reprs: m = rule_regex.match(rule_repr) p = m.group('parent') c = m.group('child') cl = m.group('clabel') p_type = type_str_to_type(p) c_type = type_str_to_type(c) node = ASTNode(c_type, label=cl) if last_node: last_node.add_child(node) if not first_node: first_node = node last_node = node intermediate_nodes.append(node) last_node.value = parse_tree.value for child in parse_tree.children: last_node.add_child(child) compressed_ast_to_normal(child) parent_node = parse_tree.parent assert len(parent_node.children) == 1 del parent_node.children[0] parent_node.add_child(first_node) # return first_node else: new_child_trees = [] for child in parse_tree.children[:]: compressed_ast_to_normal(child)
def __getitem__(self, lhs): key_node = ASTNode(lhs.type, None) # Rules are indexed by types only if key_node in self.rule_index: return self.rule_index[key_node] else: KeyError('key=%s' % key_node)
def add_root(tree): root_node = ASTNode('root') root_node.add_child(tree) return root_node
def python_ast_to_parse_tree(node): assert isinstance(node, ast.AST) node_type = type(node) tree = ASTNode(node_type) # it's a leaf AST node, e.g., ADD, Break, etc. if len(node._fields) == 0: return tree # if it's a compositional AST node with empty fields if is_compositional_leaf(node): epsilon = ASTNode('epsilon') tree.add_child(epsilon) return tree fields_info = PY_AST_NODE_FIELDS[node_type.__name__] for field_name, field_value in ast.iter_fields(node): # remove ctx stuff if field_name in NODE_FIELD_BLACK_LIST: continue # omit empty fields, including empty lists if field_value is None or (isinstance(field_value, list) and len(field_value) == 0): continue # now it's not empty! field_type = fields_info[field_name]['type'] is_list_field = fields_info[field_name]['is_list'] if isinstance(field_value, ast.AST): child = ASTNode(field_type, field_name) child.add_child(python_ast_to_parse_tree(field_value)) elif type(field_value) is str \ or type(field_value) is bytes \ or type(field_value) is int \ or type(field_value) is float \ or type(field_value) is object \ or type(field_value) is bool: # if field_type != type(field_value): # print 'expect [%s] type, got [%s]' % (field_type, type(field_value)) child = ASTNode(type(field_value), field_name, value=field_value) elif is_list_field: list_node_type = typename(field_type) + '*' child = ASTNode(list_node_type, field_name) for n in field_value: if field_type in { ast.comprehension, ast.excepthandler, ast.arguments, ast.keyword, ast.alias }: child.add_child(python_ast_to_parse_tree(n)) else: intermediate_node = ASTNode(field_type) if field_type is str: intermediate_node.value = n else: intermediate_node.add_child( python_ast_to_parse_tree(n)) child.add_child(intermediate_node) else: raise RuntimeError('unknown AST node field!') tree.add_child(child) return tree
def extract_unary_closure(parse_tree): root_node_copy = ASTNode(parse_tree.type) unary_links = extract_unary_closure_helper(parse_tree, root_node_copy, root_node_copy) return unary_links