def test_dfs_labeled_edges_sorting(self): edges_asc = nx.dfs_labeled_edges(self.U, sort_neighbors=sorted) edges_desc = nx.dfs_labeled_edges(self.U, sort_neighbors=self.sort_desc) forward_asc = [(u, v) for (u, v, d) in edges_asc if d == "forward"] forward_desc = [(u, v) for (u, v, d) in edges_desc if d == "forward"] assert forward_asc == [(0, 0), (0, 1), (1, 3), (1, 4), (0, 2), (2, 5)] assert forward_desc == [(5, 5), (4, 4), (3, 3), (2, 2), (2, 5), (1, 1), (1, 4), (1, 3), (0, 0), (0, 2), (0, 1)]
def dfs_preorder_nodes(G, source=None): """Produce nodes in a depth-first-search pre-ordering starting at source.""" pre = (v for u, v, d in nx.dfs_labeled_edges(G, source=source) if d['dir'] == 'forward') # chain source to beginning of pre-ordering # return chain([source],pre) return pre
def dfs_preorder_nodes(G,source=None): """Produce nodes in a depth-first-search pre-ordering starting at source.""" pre=(v for u,v,d in nx.dfs_labeled_edges(G,source=source) if d['dir']=='forward') # chain source to beginning of pre-ordering # return chain([source],pre) return pre
def visit_tree(tree): """ Visit node tree and assign left and right index according Nested set algorithm https://en.wikipedia.org/wiki/Nested_set_model """ index = 0 depth = 0 parent = None for i in nx.dfs_labeled_edges(tree): node_name_1 = i[0] node_name_2 = i[1] sens = i[2] if sens == "forward": depth += 1 parent = node_name_1 tree.nodes[node_name_2].update({ "left": index, "depth": depth, "parent": parent }) if sens == "reverse": depth -= 1 tree.nodes[node_name_2].update({"right": index, "depth": depth}) index += 1 return tree
def find_path(graph, starting_node, ending_node): """Find a path through the graph that sews every satin.""" # This is done in two steps. First, we find the shortest path from the # start to the end. We remove it from the graph, and proceed to step 2. # # Then, we traverse the path node by node. At each node, we follow any # branchings with a depth-first search. We travel down each branch of # the tree, inserting each seen branch into the tree. When the DFS # hits a dead-end, as it back-tracks, we also add the seen edges _again_. # Repeat until there are no more edges left in the graph. # # Visiting the edges again on the way back allows us to set up # "underpathing". As we stitch down each branch, we'll do running stitch. # Then when we come back up, we'll do satin stitch, covering the previous # running stitch. path = nx.shortest_path(graph, starting_node, ending_node) # Copy the graph so that we can remove the edges as we visit them. # This also converts the directed graph into an undirected graph in the # case that "preserve_order" is set. This way we avoid going back and # forth on each satin twice due to the satin edges being in the graph # twice (forward and reverse). graph = nx.Graph(graph) graph.remove_edges_from(zip(path[:-1], path[1:])) final_path = [] prev = None for node in path: if prev is not None: final_path.append((prev, node)) prev = node for n1, n2, edge_type in list(nx.dfs_labeled_edges(graph, node)): if n1 == n2: # dfs_labeled_edges gives us (start, start, "forward") for # the starting node for some reason continue if edge_type == "forward": final_path.append((n1, n2)) graph.remove_edge(n1, n2) elif edge_type == "reverse": final_path.append((n2, n1)) elif edge_type == "nontree": # a "nontree" happens when there exists an edge from n1 to n2 # but n2 has already been visited. It's a dead-end that runs # into part of the graph that we've already traversed. We # do still need to make sure that satin is sewn, so we travel # down and back on this edge. # # It's possible for a given "nontree" edge to be listed more # than once so we'll deduplicate. if (n1, n2) in graph.edges: final_path.append((n1, n2)) final_path.append((n2, n1)) graph.remove_edge(n1, n2) return final_path
def format2linearinput(inputdir, dataset, task, option): outlinear = codecs.open(inputdir + dataset + '-sr11-{}-'.format(task) + option + '-src-linear.txt', 'w', encoding='utf8') labels = readGraphFile(inputdir + dataset + '-sr11-{}-'.format(task) + option + '-src-labels.txt') node1 = readGraphFile(inputdir + dataset + '-sr11-{}-'.format(task) + option + '-src-node1.txt') node2 = readGraphFile(inputdir + dataset + '-sr11-{}-'.format(task) + option + '-src-node2.txt') nodes = readGraphFile(inputdir + dataset + '-sr11-{}-'.format(task) + option + '-src-nodes.txt') print("* examples: {}".format(len(labels))) if labels and node1 and node2 and nodes: for exID in range(len(labels)): print("sentID={}".format(exID + 1)) #G = nx.MultiDiGraph() G = nx.DiGraph() for n in range(len(nodes[exID])): G.add_node(n) for label, node, head in zip(labels[exID], node1[exID], node2[exID]): G.add_edge(nodes[exID][int(head)], nodes[exID][int(node)], l=label) #if node=='0' and head=='0' and not len(labels)==1: # continue G.add_edge(int(head), int(node), l=label) linearised = [] edgeTuples = list(nx.dfs_labeled_edges(G, 0)) prevTokens = [] for u, v, d in edgeTuples: if u in prevTokens and v in prevTokens and u == v and u == 0: continue if d['dir'] == 'reverse': continue # we want to pass through d['dir'] == 'nontree' and 'forward' # nontree is when there is a cycle and the has already been through # target node subtree if not u in prevTokens: linearised.append(nodes[exID][u]) prevTokens.append(u) #do not control whether dep node was already printed/visited, #just print it again after the label linearised.append(G[u][v]['l']) linearised.append(nodes[exID][v]) prevTokens.append(v) outlinear.write(" ".join(linearised) + "\n") else: print("Some file was not found or did not contained data.") outlinear.close() print("Linear input saved.")
def dfs_postorder_nodes(G, source=None): """Produce nodes in a depth-first-search post-ordering starting from source. """ post = (v for u, v, d in nx.dfs_labeled_edges(G, source=source) if d["dir"] == "reverse") # chain source to end of pre-ordering # return chain(post,[source]) return post
def get_importance_graph(cls, tree, subscribers, root, make_copy=True): """ Computes the 'link-importance' metric for the given multicast tree, subscribers, and root. Stores this metric in the edges' attributes. The 'link-importance' is basically the number of root-to-subscriber paths that pass through this edge; it represents the resilience importance of this link. This method basically uses a modified depth-first search to calculate the importance of each link after it visits the nodes i.e. when considering 'reverse edges'. :param tree: :type tree: nx.Graph :param subscribers: collection of subscribers (ideally a set for fast presence querying) :param root: :param make_copy: if True, returns a copy of tree rather than storing the 'importance' directly as an attribute :return: the tree with every edge having a link importance attribute """ # This ensures the original graph doesn't have an importance attribute leaked between executions of this method if make_copy: tree = tree.copy() # we'll track the outgoing edges from each node so that we can sum up their importances outgoing_edges = dict() # Do the modified DFS and calculate importance on way 'back up tree' for u, v, edge_type in nx.dfs_labeled_edges(tree, source=root): if u == v or edge_type == 'nontree': continue elif edge_type == 'forward': outgoing_edges.setdefault(u, []).append((u, v)) else: # We know it's a reverse edge now so we're calculating importance. # On the way back 'up the tree', we know there's only one incoming edge so when we consider an edge as # incoming, we can easily sum up the other outgoing edges as they've been assigned importance already. # We'll also increment the importance by 1 when we hit a subscriber. Note that this should handle non-leaf # subscribers, but we don't really consider that case so it isn't fully tested... imp = sum((tree[_u][_v][cls.IMPORTANCE_ATTRIBUTE_NAME] for _u, _v in outgoing_edges.get(v, [])), 1 if v in subscribers else 0) tree[u][v][cls.IMPORTANCE_ATTRIBUTE_NAME] = imp # Verify all edges have some importance as we promised... if __debug__: # Also verify it's actually a tree as otherwise that'd explain edges without an importance... if not nx.is_tree(tree): raise RuntimeError("MDMT %s is not a tree! Edges: %s" % (tree.name, list(tree.edges()))) edges_no_importance = [ (u, v) for u, v, imp in tree.edges(data=cls.IMPORTANCE_ATTRIBUTE_NAME) if imp is None ] if edges_no_importance: raise RuntimeError("MDMT %s has edges with no importance: %s" % (tree.name, edges_no_importance)) return tree
def dfs_postorder_nodes(G, source=None): """Produce nodes in a depth-first-search post-ordering starting from source. """ post = (v for u, v, d in nx.dfs_labeled_edges(G, source=source) if d['dir'] == 'reverse') # chain source to end of pre-ordering # return chain(post,[source]) return post
def dfs_preorder_nodes(G, source=None, depth_limit=None, sort_neighbors=None): """Generate nodes in a depth-first-search pre-ordering starting at source. Parameters ---------- G : NetworkX graph source : node, optional Specify starting node for depth-first search and return nodes in the component reachable from source. depth_limit : int, optional (default=len(G)) Specify the maximum search depth. sort_neighbors : function A function that takes the list of neighbors of given node as input, and returns an *iterator* over these neighbors but with custom ordering. Returns ------- nodes: generator A generator of nodes in a depth-first-search pre-ordering. Examples -------- >>> G = nx.path_graph(5) >>> list(nx.dfs_preorder_nodes(G, source=0)) [0, 1, 2, 3, 4] >>> list(nx.dfs_preorder_nodes(G, source=0, depth_limit=2)) [0, 1, 2] Notes ----- If a source is not specified then a source is chosen arbitrarily and repeatedly until all components in the graph are searched. The implementation of this function is adapted from David Eppstein's depth-first search function in `PADS`_, with modifications to allow depth limits based on the Wikipedia article "`Depth-limited search`_". .. _PADS: http://www.ics.uci.edu/~eppstein/PADS .. _Depth-limited search: https://en.wikipedia.org/wiki/Depth-limited_search See Also -------- dfs_edges dfs_postorder_nodes dfs_labeled_edges bfs_edges """ edges = nx.dfs_labeled_edges(G, source=source, depth_limit=depth_limit, sort_neighbors=sort_neighbors) return (v for u, v, d in edges if d == "forward")
def _dfs_cycle_forest(G, root=None): """Builds a directed graph composed of cycles from the given graph. `G` is an undirected simple graph. `root` is a node in the graph from which the depth-first search is started. This function returns both the depth-first search cycle graph (as a :class:`~networkx.DiGraph`) and the list of nodes in depth-first preorder. The depth-first search cycle graph is a directed graph whose edges are the edges of `G` oriented toward the root if the edge is a tree edge and away from the root if the edge is a non-tree edge. If `root` is not specified, this performs a depth-first search on each connected component of `G` and returns a directed forest instead. If `root` is not in the graph, this raises :exc:`KeyError`. """ # Create a directed graph from the depth-first search tree with # root node `root` in which tree edges are directed toward the # root and nontree edges are directed away from the root. For # each node with an incident nontree edge, this creates a # directed cycle starting with the nontree edge and returning to # that node. # # The `parent` node attribute stores the parent of each node in # the DFS tree. The `nontree` edge attribute indicates whether # the edge is a tree edge or a nontree edge. # # We also store the order of the nodes found in the depth-first # search in the `nodes` list. H = nx.DiGraph() nodes = [] for u, v, d in nx.dfs_labeled_edges(G, source=root): if d == 'forward': # `dfs_labeled_edges()` yields (root, root, 'forward') # if it is beginning the search on a new connected # component. if u == v: H.add_node(v, parent=None) nodes.append(v) else: H.add_node(v, parent=u) H.add_edge(v, u, nontree=False) nodes.append(v) # `dfs_labeled_edges` considers nontree edges in both # orientations, so we need to not add the edge if it its # other orientation has been added. elif d == 'nontree' and v not in H[u]: H.add_edge(v, u, nontree=True) else: # Do nothing on 'reverse' edges; we only care about # forward and nontree edges. pass return H, nodes
def value(self, ast: AST): scope_status = ScopeStatus() lines_with_error: List[LineNumber] = [] for _, destination, edge_type in dfs_labeled_edges(ast.tree, ast.root): if edge_type == 'forward': VarMiddle._on_entering_node(destination, ast.tree, scope_status, lines_with_error) elif edge_type == 'reverse': VarMiddle._on_leaving_node(destination, ast.tree, scope_status) return lines_with_error
def value(self, filename): ast = AST.build_from_javalang(build_ast(filename)) scope_status = ScopeStatus() lines_with_error: List[LineNumber] = [] for _, destination, edge_type in dfs_labeled_edges(ast.tree, ast.root): if edge_type == 'forward': VarMiddle._on_entering_node(destination, ast.tree, scope_status, lines_with_error) elif edge_type == 'reverse': VarMiddle._on_leaving_node(destination, ast.tree, scope_status) return lines_with_error
def mark_unused(G, n_rules): for u, v, label in nx.dfs_labeled_edges(G, 'c0'): if label == 'reverse' and u != v: if v[0] == 't' or 'productive' in G.node[v]: G.node[u]['productive'] = True G.node[v]['productive'] = True # print(u, v) G[u][v]['productive'] = True used = [True] * n_rules for u, v in G.edges: if 'productive' not in G[u][v]: used[int(G[u][v]['rule'][1:]) - 1] = False return used
def tree_to_balanced_sequence(tree, open_to_close=None, toks=None, mode='tuple'): from collections import namedtuple Token = namedtuple('Token', ['action', 'value']) # mapping between opening and closing tokens sources = [n for n in tree.nodes if tree.in_degree[n] == 0] sequence = [] if open_to_close is None: open_to_close = {} if toks is None: toks = {} for source in sources: for u, v, etype in nx.dfs_labeled_edges(tree, source=source): if etype == 'forward': # u has been visited by v has not if v not in toks: if mode == 'tuple': # TODO: token encoding scheme where subdirectories # are matchable via a custom operation. # open_tok = '<{}>'.format(v) # close_tok = '</{}>'.format(v) open_tok = Token('open', v) close_tok = Token('close', v) elif mode == 'number': open_tok = len(toks) + 1 close_tok = -open_tok elif mode == 'paren': open_tok = '{}('.format(v) close_tok = '){}'.format(v) elif mode == 'chr': open_tok = str(v) close_tok = str(v) + u'\u0301' # chr(ord(v) + 128) toks[v] = open_tok open_to_close[open_tok] = close_tok open_tok = toks[v] sequence.append(open_tok) elif etype == 'reverse': # Both u and v are visited and the edge is in the tree close_tok = open_to_close[toks[v]] sequence.append(close_tok) else: raise KeyError(etype) sequence = tuple(sequence) return sequence, open_to_close, toks
def dfs_postorder_nodes(G, source=None, depth_limit=None): """Generate nodes in a depth-first-search post-ordering starting at source. Parameters ---------- G : NetworkX graph source : node, optional Specify starting node for depth-first search. depth_limit : int, optional (default=len(G)) Specify the maximum search depth. Returns ------- nodes: generator A generator of nodes in a depth-first-search post-ordering. Examples -------- >>> G = nx.path_graph(5) >>> list(nx.dfs_postorder_nodes(G, source=0)) [4, 3, 2, 1, 0] >>> list(nx.dfs_postorder_nodes(G, source=0, depth_limit=2)) [1, 0] Notes ----- If a source is not specified then a source is chosen arbitrarily and repeatedly until all components in the graph are searched. The implementation of this function is adapted from David Eppstein's depth-first search function in `PADS`_, with modifications to allow depth limits based on the Wikipedia article "`Depth-limited search`_". .. _PADS: http://www.ics.uci.edu/~eppstein/PADS .. _Depth-limited search: https://en.wikipedia.org/wiki/Depth-limited_search See Also -------- dfs_edges dfs_preorder_nodes dfs_labeled_edges edge_dfs bfs_tree """ edges = nx.dfs_labeled_edges(G, source=source, depth_limit=depth_limit) return (v for u, v, d in edges if d == 'reverse')
def dfs_preorder_nodes(G, source=None, depth_limit=None): """Generate nodes in a depth-first-search pre-ordering starting at source. Parameters ---------- G : NetworkX graph source : node, optional Specify starting node for depth-first search and return edges in the component reachable from source. depth_limit : int, optional (default=len(G)) Specify the maximum search depth. Returns ------- nodes: generator A generator of nodes in a depth-first-search pre-ordering. Examples -------- >>> G = nx.path_graph(5) >>> list(nx.dfs_preorder_nodes(G, source=0)) [0, 1, 2, 3, 4] >>> list(nx.dfs_preorder_nodes(G, source=0, depth_limit=2)) [0, 1, 2] Notes ----- If a source is not specified then a source is chosen arbitrarily and repeatedly until all components in the graph are searched. The implementation of this function is adapted from David Eppstein's depth-first search function in `PADS`_, with modifications to allow depth limits based on the Wikipedia article "`Depth-limited search`_". .. _PADS: http://www.ics.uci.edu/~eppstein/PADS .. _Depth-limited search: https://en.wikipedia.org/wiki/Depth-limited_search See Also -------- dfs_edges dfs_postorder_nodes dfs_labeled_edges """ edges = nx.dfs_labeled_edges(G, source=source, depth_limit=depth_limit) return (v for u, v, d in edges if d == 'forward')
def traverse(self, on_node_entering: TraverseCallback, on_node_leaving: TraverseCallback = lambda node: None, source_node: Optional[ASTNode] = None, undirected=False): traverse_graph = self.tree.to_undirected( as_view=True) if undirected else self.tree if source_node is None: source_node = self.get_root() for _, destination, edge_type in dfs_labeled_edges( traverse_graph, source_node.node_index): if edge_type == "forward": on_node_entering(ASTNode(self.tree, destination)) elif edge_type == "reverse": on_node_leaving(ASTNode(self.tree, destination))
def _traverse_graph( graph: DiGraph, start_node_id: NodeId, on_node_entering: TraverseCallback, on_node_leaving: TraverseCallback = lambda _: None, ) -> None: for _, destination_id, edge_type in dfs_labeled_edges( graph, start_node_id): destination_node = NodesFactory._detect_and_create_node( graph, destination_id) if edge_type == "forward": on_node_entering(destination_node) elif edge_type == "reverse": on_node_leaving(destination_node) else: raise RuntimeError(f"Unexpected edge type {edge_type}.")
def dfs_preorder_nodes(G, source=None): """Iterate over nodes in a depth-first-search pre-ordering starting from source. Parameters ---------- G : NetworkX graph source : node, optional Specify starting node for depth-first search and return edges in the component reachable from source. Returns ------- nodes: generator A generator of nodes in a depth-first-search pre-ordering. Examples -------- >>> G = nx.path_graph(3) >>> print(list(nx.dfs_preorder_nodes(G, 0))) [0, 1, 2] Notes ----- Based on http://www.ics.uci.edu/~eppstein/PADS/DFS.py by D. Eppstein, July 2004. If a source is not specified then a source is chosen arbitrarily and repeatedly until all components in the graph are searched. See Also -------- dfs_edges dfs_postorder_nodes dfs_labeled_edges """ pre = (v for u, v, d in nx.dfs_labeled_edges(G, source=source) if d == 'forward') # potential modification: chain source to beginning of pre-ordering # return chain([source], pre) return pre
def __str__(self) -> str: printed_graph = '' depth = 0 for _, destination, edge_type in dfs_labeled_edges( self.tree, self.root): if edge_type == 'forward': printed_graph += '| ' * depth node_type = self.tree.nodes[destination]['node_type'] printed_graph += str(node_type) + ': ' if node_type == ASTNodeType.STRING: printed_graph += self.tree.nodes[destination][ 'string'] + ', ' printed_graph += f'node index = {destination}' node_line = self.tree.nodes[destination]['line'] if node_line is not None: printed_graph += f', line = {node_line}' printed_graph += '\n' depth += 1 elif edge_type == 'reverse': depth -= 1 return printed_graph
def dfs_order(forest, roots): ''' Returns edge source, edge destination, tree ID, and whether u is generating a new children ''' edge_list = [] for i, root in enumerate(roots): edge_list.append([]) # The following gives the DFS order on edge on a tree. for u, v, t in nx.dfs_labeled_edges(forest, root): if u == v or t == 'nontree': continue elif t == 'forward': edge_list[-1].append((u, v, i, 1)) elif t == 'reverse': edge_list[-1].append((v, u, i, 0)) for edges in itertools.zip_longest(*edge_list): edges = (e for e in edges if e is not None) u, v, i, p = zip(*edges) yield u, v, i, p
def get_subtrees(self, *root_type: ASTNodeType) -> Iterator['AST']: ''' Yields subtrees with given type of the root. If such subtrees are one including the other, only the larger one is going to be in resulted sequence. ''' is_inside_subtree = False current_subtree_root = -1 # all node indexes are positive subtree: List[int] = [] for _, destination, edge_type in dfs_labeled_edges( self.tree, self.root): if edge_type == 'forward': if is_inside_subtree: subtree.append(destination) elif self.tree.nodes[destination]['node_type'] in root_type: subtree.append(destination) is_inside_subtree = True current_subtree_root = destination elif edge_type == 'reverse' and destination == current_subtree_root: is_inside_subtree = False yield AST(self.tree.subgraph(subtree), current_subtree_root) subtree = [] current_subtree_root = -1
def draw_strokes_from(main_edges): strokes = {} segments = [] for start, cluster_root in zip(main_edges, roots): for i, j, k in nx.dfs_labeled_edges(graph, start): if k['dir'] != 'forward': continue node_attr = graph.node[j] if i in strokes: last_start, last_finish = strokes[i] x = node_attr['positions'] if node_attr['positions'] else 0 stroke_start = interpolate(last_start, last_finish, x) else: stroke_start = cluster_root stroke_end = stroke(to_size(node_attr['lengths']), to_angle(node_attr['directions']), stroke_start) strokes[j] = Segment(np.array(stroke_start), np.array(stroke_end)) segments.append(strokes[j]) # print [nx.dfs_successors(graph, start) for i in nx.dfs_successors(graph, start)[start]] # draw_strokes_from(stroke_start, stroke_end, nx.dfs_predecessors(graph, start)) return segments
def dfs_postorder_nodes(G, source=None): """Produce nodes in a depth-first-search post-ordering starting from source. Parameters ---------- G : NetworkX graph source : node, optional Specify starting node for depth-first search and return edges in the component reachable from source. Returns ------- nodes: generator A generator of nodes in a depth-first-search post-ordering. Examples -------- >>> G = nx.Graph() >>> G.add_path([0,1,2]) >>> print(list(nx.dfs_postorder_nodes(G,0))) [2, 1, 0] Notes ----- Based on http://www.ics.uci.edu/~eppstein/PADS/DFS.py by D. Eppstein, July 2004. If a source is not specified then a source is chosen arbitrarily and# repeatedly until all components in the graph are searched. """ post = (v for u, v, d in nx.dfs_labeled_edges(G, source=source) if d['dir'] == 'reverse') # potential modification: chain source to end of post-ordering # return chain(post,[source]) return post
def dfs_preorder_nodes(G, source=None): """Produce nodes in a depth-first-search pre-ordering starting from source. Parameters ---------- G : NetworkX graph source : node, optional Specify starting node for depth-first search and return edges in the component reachable from source. Returns ------- nodes: generator A generator of nodes in a depth-first-search pre-ordering. Examples -------- >>> G = nx.Graph() >>> G.add_path([0,1,2]) >>> print(list(nx.dfs_preorder_nodes(G,0))) [0, 1, 2] Notes ----- Based on http://www.ics.uci.edu/~eppstein/PADS/DFS.py by D. Eppstein, July 2004. If a source is not specified then a source is chosen arbitrarily and repeatedly until all components in the graph are searched. """ pre=(v for u,v,d in nx.dfs_labeled_edges(G,source=source) if d['dir']=='forward') # potential modification: chain source to beginning of pre-ordering # return chain([source],pre) return pre
def dfs_postorder_nodes(G,source=None): """Produce nodes in a depth-first-search post-ordering starting from source. Parameters ---------- G : NetworkX graph source : node, optional Specify starting node for depth-first search and return edges in the component reachable from source. Returns ------- nodes: generator A generator of nodes in a depth-first-search post-ordering. Examples -------- >>> G = nx.path_graph(3) >>> print(list(nx.dfs_postorder_nodes(G,0))) [2, 1, 0] Notes ----- Based on http://www.ics.uci.edu/~eppstein/PADS/DFS.py by D. Eppstein, July 2004. If a source is not specified then a source is chosen arbitrarily and repeatedly until all components in the graph are searched. """ post = (v for u, v, d in nx.dfs_labeled_edges(G, source=source) if d == 'reverse') # potential modification: chain source to end of post-ordering # return chain(post,[source]) return post
def test_dls_labeled_disconnected_edges(self): edges = list(nx.dfs_labeled_edges(self.G, source=6, depth_limit=2)) forward = [(u, v) for (u, v, d) in edges if d == 'forward'] assert_equal(forward, [(6, 6), (6, 5), (5, 4)])
def DiGraph(dist_matrix, playlist): #convert to dataframe with trackIDs as columns columns = ['a','b','c','d','e','f','g','h','i','k','j','l','m','n','o','p','q','r'] df = pd.DataFrame(dist_matrix, index = columns, columns = playlist) index = 0 row = 2 tups = [] cols = columns #put distance matrix into list of lists [[track1, track2, weight],...] for depth first search for index1, rows in enumerate(df): for index, cols in enumerate(df): mytups = [df.index[index1], df.columns[index], df.ix[index1][index]] tups.append(mytups) #transform weights to create a higher penalty for higher weights for tup in tups: tup[2] = math.e**(tup[2]) # get an idea of the distribution of transition scores scores = [] for item in tups: scores.append(item[2]) scores = sorted(scores, reverse=True) average_score = sum(scores)/float(len(scores)) '''show histogram of plt.hist(p, bins = 20, cumulative=True) plt.show() ''' #prune edges from graph by removing lists in the edges list for worst in scores: for tup in tups: if tup[2] >= average_score: tups.remove(tup) if tup[2] == 1: tups.remove(tup) #build a Directed graph DG=nx.DiGraph() DG.add_weighted_edges_from(tups) #print DG.neighbors('a') #iterate over all starting songs Tlist = [] orderlist = [] for node in DG: T = nx.dfs_tree(DG,node) #print nx.dfs_postorder_nodes(T) order = list(v for u,v,d in nx.dfs_labeled_edges(DG,source=node) if d['dir']=='reverse') orderlist.append(order) print orderlist[0] #print(list(nx.dfs_labeled_edges(T,node))) Tlist.append(T) #print(list(T.edges())) #print Tlist UTlist = [] for trees in Tlist: UT=T.to_undirected() #print(nx.connected_components(UT)) UTlist.append(nx.connected_components(UT)) return UTlist, orderlist
def test_dfs_labeled_disconnected_edges(self): edges = list(nx.dfs_labeled_edges(self.D)) forward = [(u, v) for (u, v, d) in edges if d['dir'] == 'forward'] assert_equal(forward, [(0, 0), (0, 1), (2, 2), (2, 3)])
def test_dfs_labeled_edges(self): edges = list(nx.dfs_labeled_edges(self.G, source=0)) forward = [(u, v) for (u, v, d) in edges if d['dir'] == 'forward'] assert_equal(forward, [(0, 0), (0, 1), (1, 2), (2, 4), (4, 3)])
def DiGraph(dist_matrix): #convert to dataframe with trackIDs as columns columns = [ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'k', 'j', 'l', 'm', 'n', 'o', 'p', 'q', 'r' ] df = pd.DataFrame(dist_matrix, index=columns, columns=columns) index = 0 row = 2 tups = [] cols = columns #put distance matrix into list of lists [[track1, track2, weight],...] for depth first search for index1, rows in enumerate(df): for index, cols in enumerate(df): mytups = [ df.index[index1], df.columns[index], df.ix[index1][index] ] tups.append(mytups) #transform weights to create a higher penalty for higher weights for tup in tups: tup[2] = math.e**(tup[2]) # get an idea of the distribution of transition scores scores = [] for item in tups: scores.append(item[2]) scores = sorted(scores, reverse=True) average_score = sum(scores) / float(len(scores)) '''show histogram of plt.hist(p, bins = 20, cumulative=True) plt.show() ''' #prune edges from graph by removing lists in the edges list for worst in scores: for tup in tups: if tup[2] >= average_score: tups.remove(tup) if tup[2] == 1: tups.remove(tup) #build a Directed graph DG = nx.DiGraph() DG.add_weighted_edges_from(tups) #print DG.neighbors('a') #iterate over all starting songs Tlist = [] for node in DG: T = nx.dfs_tree(DG, node) print list(nx.dfs_postorder_nodes(T)) print(list(nx.dfs_labeled_edges(T, node))) Tlist.append(T) print(list(T.edges())) UTlist = [] for trees in Tlist: UT = T.to_undirected() #print(nx.connected_components(UT)) UTlist.append(nx.connected_components(UT)) return UTlist
def test_dfs_labeled_edges(self): edges=list(nx.dfs_labeled_edges(self.G,source=0)) forward=[(u,v) for (u,v,d) in edges if d['dir']=='forward'] assert_equal(forward,[(0,0), (0, 1), (1, 2), (2, 4), (4, 3)])
def update_insertable(self, v, stop_at=None): """Updates the insertable edges in the graph. For early stopping, set stop_at to k. Then, the function will return after when k edges have been added to the insertable set. """ K1 = 0 Kx = None cliques_visited = set() nodes_seen = [] min_weights = [] v_cliques = self.node_in_cliques[v] for clq in self.node_in_cliques[v]: K1 = clq break cliques_visited.add(K1) for clq1, clq2, data in nx.dfs_labeled_edges(self.cliquetree, source=K1): if data["dir"] is "nontree" or (clq1 == K1 and clq2 == K1): continue clq_min, clq_max = self._edge(clq1, clq2) sep = self.cliquetree[clq_min][clq_max]["nodes"] if data["dir"] is "forward": cliques_visited.add(clq2) if clq1 in v_cliques and clq2 not in v_cliques: Kx = clq1 Kx_nodes = self.nodes_in_clique[clq1] if Kx: w_e = len(sep) if not min_weights or w_e <= min_weights[-1]: # w(e) = w(x, y) min_weights.append(w_e) # is it a possible Ky? Ky_nodes = self.nodes_in_clique[clq2] if min_weights[-1] == len(Kx_nodes.intersection(Ky_nodes)): for u in self.nodes_in_clique[clq2]: if (not nodes_seen or u not in nodes_seen[-1]) and u not in self.G[v] and u != v: # Ky for u self.insertable.add(self._edge(u, v)) if u == v: raise ValueError("u is equal to v") if stop_at is not None and len(self.insertable) >= stop_at: return else: min_weights.append(min_weights[-1]) if nodes_seen: seen_previous = nodes_seen[-1] else: seen_previous = set() nodes_seen.append(self.nodes_in_clique[clq2].union(seen_previous)) elif data["dir"] is "reverse": first_Kx = False if clq1 in v_cliques and clq2 not in v_cliques: Kx = None Kx_nodes = None first_Kx = True if Kx is not None or first_Kx: min_weights.pop() nodes_seen.pop() for clq in self.cliquetree: # if clique is in another component, edge is insertable if clq not in cliques_visited: for u in self.nodes_in_clique[clq]: self.insertable.add(self._edge(u, v)) if stop_at is not None and len(self.insertable) >= stop_at: return
def set_text_density(self): """ Augment text statistics from the end node to calculate text density. """ stats = {} def add_stat(el, num_of_tags, num_of_link_tags, text_length, link_text_length, text_density): if el in stats: stats[el]['num_of_tags'] += num_of_tags stats[el]['num_of_link_tags'] += num_of_link_tags stats[el]['text_length'] += text_length stats[el]['link_text_length'] += link_text_length stats[el]['text_density'] += text_density else: stats[el] = { 'num_of_tags': num_of_tags, 'num_of_link_tags': num_of_link_tags, 'text_length': text_length, 'link_text_length': link_text_length, 'text_density': text_density, } def get_body_stat(): text = self.parser.get_all_text(self.parser.body) text_length = len(remove_space(text)) link_els = self.parser._parser.find_all('a', self.parser.body) link_text_length = sum([ len(remove_space(self.parser.get_all_text(_el))) for _el in link_els ]) return max(link_text_length, 1) / max(text_length, 1) edges = nx.dfs_labeled_edges(self.__G) body_stat = get_body_stat() for parent_el, el, d in edges: if d != 'reverse': continue stat = stats.get(el) num_of_tags = 0 num_of_link_tags = 0 text_length = 0 link_text_length = 0 text_density = 0.0 # the end node if stat is None: __link_els = self.parser.find_all('a', el) __num_of_all_tags = self.parser.count_tag(el, recursive=True) __num_of_text_tags = self.parser.count_tag( el, 'p', recursive=True) + self.parser.count_tag( el, 'br', recursive=True) num_of_tags = __num_of_all_tags - __num_of_text_tags num_of_link_tags = len(__link_els) text_length = len(remove_space(self.parser.get_all_text(el))) if self.parser.get_tag(el) == 'a': link_text_length = text_length num_of_link_tags += 1 text_density = 0.0 else: link_text_length = sum([ len(remove_space(self.parser.get_all_text(_el))) for _el in __link_els ]) text_density = self.__calculate_text_density( num_of_tags, num_of_link_tags, text_length, link_text_length, body_stat) else: __num_of_all_tags = self.parser.count_tag(el) __num_of_text_tags = self.parser.count_tag( el, 'p') + self.parser.count_tag(el, 'br') num_of_tags = __num_of_all_tags - __num_of_text_tags text_length = len(remove_space(self.parser.get_text(el))) text_density = self.__calculate_text_density( num_of_tags=num_of_tags + stat['num_of_tags'], num_of_link_tags=stat['num_of_link_tags'], text_length=text_length + stat['text_length'], link_text_length=stat['link_text_length'], body_stat=body_stat, ) add_stat(el, num_of_tags, num_of_link_tags, text_length, link_text_length, text_density) # Since lxml `el.text` method does not include text following child node, we need to add tail text length to parent node. text_length += len(remove_space(self.parser.get_tail_text(el))) if stat is None: add_stat(parent_el, num_of_tags, num_of_link_tags, text_length, link_text_length, text_density) else: add_stat( el=parent_el, num_of_tags=num_of_tags + stat['num_of_tags'], num_of_link_tags=stat['num_of_link_tags'], text_length=text_length + stat['text_length'], link_text_length=stat['link_text_length'], text_density=text_density, ) stat_text_density = {el: r['text_density'] for el, r in stats.items()} self.set_node_attributes('text_density', stat_text_density) return stats
def test_dls_labeled_edges(self): edges = list(nx.dfs_labeled_edges(self.G, source=5, depth_limit=1)) forward = [(u, v) for (u, v, d) in edges if d == 'forward'] assert_equal(forward, [(5, 5), (5, 4), (5, 6)])
def test_dfs_labeled_disconnected_edges(self): edges=list(nx.dfs_labeled_edges(self.D)) forward=[(u,v) for (u,v,d) in edges if d['dir']=='forward'] assert_equal(forward,[(0, 0), (0, 1), (2, 2), (2, 3)])
def update_insertable(self, v, stop_at=None): """Updates the insertable edges in the graph. For early stopping, set stop_at to k. Then, the function will return after when k edges have been added to the insertable set. """ K1 = 0 Kx = None cliques_visited = set() nodes_seen = [] min_weights = [] v_cliques = self.node_in_cliques[v] for clq in self.node_in_cliques[v]: K1 = clq break cliques_visited.add(K1) for clq1, clq2, data in \ nx.dfs_labeled_edges(self.cliquetree, source=K1): if data['dir'] is 'nontree' or (clq1 == K1 and clq2 == K1): continue clq_min, clq_max = self._edge(clq1, clq2) sep = self.cliquetree[clq_min][clq_max]['nodes'] if data['dir'] is 'forward': cliques_visited.add(clq2) if clq1 in v_cliques and clq2 not in v_cliques: Kx = clq1 Kx_nodes = self.nodes_in_clique[clq1] if Kx: w_e = len(sep) if not min_weights or w_e <= min_weights[-1]: # w(e) = w(x, y) min_weights.append(w_e) # is it a possible Ky? Ky_nodes = self.nodes_in_clique[clq2] if min_weights[-1] == len(Kx_nodes.intersection(Ky_nodes)): for u in self.nodes_in_clique[clq2]: if (not nodes_seen or u not in nodes_seen[-1]) \ and u not in self.G[v] and u != v: # Ky for u self.insertable.add(self._edge(u, v)) if u == v: raise ValueError('u is equal to v') if stop_at is not None and \ len(self.insertable) >= stop_at: return else: min_weights.append(min_weights[-1]) if nodes_seen: seen_previous = nodes_seen[-1] else: seen_previous = set() nodes_seen.append(self.nodes_in_clique[clq2] .union(seen_previous)) elif data['dir'] is 'reverse': first_Kx = False if clq1 in v_cliques and clq2 not in v_cliques: Kx = None Kx_nodes = None first_Kx = True if Kx is not None or first_Kx: min_weights.pop() nodes_seen.pop() for clq in self.cliquetree: # if clique is in another component, edge is insertable if clq not in cliques_visited: for u in self.nodes_in_clique[clq]: self.insertable.add(self._edge(u, v)) if stop_at is not None and \ len(self.insertable) >= stop_at: return
def tree_to_seq(tree, open_to_close=None, node_to_open=None, item_type="auto", container_type="auto"): r""" Converts an ordered tree to a balanced sequence --- typically with unique tokens --- for use in algorithm reductions. Used to convert a tree to a sequence before solving :func:`longest_common_balanced_embedding` in :func:`maximum_common_ordered_subtree_embedding`. Parameters ---------- tree: nx.OrderedDiGraph The forest to encode as a string sequence. open_to_close : Dict | None Dictionary of opening to closing tokens to be updated for problems where multiple trees are converted to sequences. node_to_open : Dict | None Dictionary of nodes mapped to the opening tokens to be updated for problems where multiple trees are converted to sequences. item_type : str Determines the item type of the sequence. Can be 'auto', 'number', 'chr', or 'label'. Default is 'auto', which will choose 'chr' if the graph is small enough otherwise 'number'. If item_type is 'label', then the label of each node is used to create the token, and the `open_to_close` dictionary must be specified. container_type : str Determines the container_type type. Can be "auto", "list", "tuple", or "str". If "auto" tries to choose the best given the input data. Returns: -------- Tuple[SeqT, Dict, Dict] A tuple containing sequence - the string representation of an ordered tree open_to_close - a mapping between opening and closing tokens node_to_open - a mapping between tree nodes and opening tokens Examples -------- >>> from netharn.initializers._nx_ext_v2.tree_embedding import tree_to_seq # NOQA >>> import networkx as nx >>> # This function helps us encode this graph as a balance sequence >>> tree = nx.path_graph(3, nx.OrderedDiGraph) >>> print(forest_str(tree)) ╙── 0 └─╼ 1 └─╼ 2 >>> # The sequence is represented by opening and closing tokens >>> # These are returned a container, which might be a tuple of numbers >>> sequence, open_to_close, node_to_open, *_ = tree_to_seq(tree, item_type='number') >>> print((''' ... sequence = {sequence} ... open_to_close = {open_to_close} ... node_to_open = {node_to_open} ... ''').format(**locals()).strip()) sequence = (1, 2, 3, -3, -2, -1) open_to_close = {1: -1, 2: -2, 3: -3} node_to_open = {0: 1, 1: 2, 2: 3} >>> # But you might also encode as a sequence of utf8-characters >>> # These can often be quicker to use than number encodings >>> sequence, open_to_close, node_to_open, *_ = tree_to_seq(tree, item_type='chr') >>> print((''' ... sequence = {sequence!r} ... open_to_close = {open_to_close!r} ... node_to_open = {node_to_open!r} ... ''').format(**locals()).strip()) sequence = '\x00\x02\x04\x05\x03\x01' open_to_close = {'\x00': '\x01', '\x02': '\x03', '\x04': '\x05'} node_to_open = {0: '\x00', 1: '\x02', 2: '\x04'} >>> # Here is a more complex example >>> tree = nx.balanced_tree(2, 2, nx.DiGraph) >>> print(forest_str(tree)) ╙── 0 ├─╼ 1 │ ├─╼ 3 │ └─╼ 4 └─╼ 2 ├─╼ 5 └─╼ 6 >>> sequence, *_ = tree_to_seq(tree, item_type='number') >>> print('sequence = {!r}'.format(sequence)) sequence = (1, 2, 3, -3, 4, -4, -2, 5, 6, -6, 7, -7, -5, -1) >>> sequence, *_ = tree_to_seq(tree, item_type='chr') >>> print('sequence = {!r}'.format(sequence)) sequence = '\x00\x02\x04\x05\x06\x07\x03\x08\n\x0b\x0c\r\t\x01' >>> # Demo custom label encoding: If you have custom labels on your >>> # tree nodes, those can be used in the encoding. >>> import random >>> tree = random_ordered_tree(10, seed=1, directed=True) >>> rng = random.Random(0) >>> open_to_close = dict(zip("[{(", "]})")) >>> for node in tree.nodes: ... tree.nodes[node]["label"] = rng.choice(list(open_to_close.keys())) >>> sequence, *_ = tree_to_seq(tree, item_type="label", container_type="str", open_to_close=open_to_close) >>> print('sequence = {!r}'.format(sequence)) sequence = '{[{{{{}({})}{}{}}}]}' """ import networkx as nx # Create a sequence and mapping from each index in the sequence to the # graph edge is corresponds to. sequence = [] # mapping between opening and closing tokens if open_to_close is None: open_to_close = {} if node_to_open is None: node_to_open = {} # utf8 can only encode this many chars NUM_CHRS = 1112064 NUM_OPEN_CHRS = NUM_CHRS // 2 if item_type == "label": # Special case, where the user specifies the encoding all_labels = {n["label"] for n in tree.nodes.values()} if container_type in {"auto", "str"}: # Determine if the container_type can be a string can_be_str = all( isinstance(x, str) and len(x) == 1 for x in all_labels) if container_type == "str" and not can_be_str: raise ValueError("Labels cannot be contained as a string") if container_type == "auto": container_type = "str" if can_be_str else "tuple" if not open_to_close: raise ValueError("must specify open_to_close for custom labeling") else: # Normal case where we will define the sequence encoding for the tree if item_type == "auto": # chr mode is fastest but limited to ~half-a-million nodes item_type = "chr" if len(tree) < NUM_OPEN_CHRS else "number" if container_type == "auto": container_type = "str" if item_type == "chr" else "tuple" sources = [n for n in tree.nodes if tree.in_degree[n] == 0] dfs_forest_edge_gen = ( (u, v, etype) for source in sources for u, v, etype in nx.dfs_labeled_edges(tree, source=source)) for u, v, etype in dfs_forest_edge_gen: if etype == "forward": # u has been visited by v has not if v not in node_to_open: if item_type == "number": # Pos nums are open toks. Neg nums are close toks. open_tok = len(node_to_open) + 1 close_tok = -open_tok elif item_type == "chr": # Even chars are open toks. Odd chars are close toks. open_tok = chr(len(node_to_open) * 2) close_tok = chr(len(node_to_open) * 2 + 1) elif item_type == "label": # The user must specify the closing token open_tok = tree.nodes[v]["label"] close_tok = open_to_close[open_tok] else: raise KeyError(item_type) node_to_open[v] = open_tok open_to_close[open_tok] = close_tok open_tok = node_to_open[v] sequence.append(open_tok) elif etype == "reverse": # Both u and v are visited and the edge is in the tree close_tok = open_to_close[node_to_open[v]] sequence.append(close_tok) elif etype == "nontree": raise TypeError("Input must be a forest") else: raise KeyError(etype) if item_type == "chr": assert len(node_to_open) < NUM_OPEN_CHRS, "graph is way too big" if container_type == "str": sequence = "".join(sequence) elif container_type == "list": sequence = sequence elif container_type == "tuple": sequence = tuple(sequence) else: raise KeyError(container_type) return sequence, open_to_close, node_to_open