def get_token_tree(self, token, depth_limit, cooccurrence_threshold): # Basic strategy is to pull the entire tree, which can be memory # intensive, and then to dfs_tree it to get the specific tree. g = rebuild_graph( self.repository.pull_graph( GetTokenTreeQuery(token, depth_limit, cooccurrence_threshold))) print(g.number_of_nodes()) print(g.number_of_edges()) if g.number_of_nodes() == 0: warn("empty result tree") return None # At this stage the occurrence properties should have been migrated to # in memory graph. However they won't appear on the tree export yet. # So, migrate the strength attributes correctly. g2 = transform(g) root = get_node_by_attribute(g2, 'content', token) g2.nodes[root]['strength'] = None tree = dfs_tree_with_node_attributes(g2, root, depth_limit=depth_limit) print("Number of nodes in tree", tree.number_of_nodes()) print("Number of edges in tree", tree.number_of_edges()) return networkx.tree_data(tree, root)
def export(self, root=None): if self.graph_type is 'node_link': graph_json = nx.node_link_data(self.graph) elif self.graph_type is 'cytoscape': graph_json = nx.cytoscape_data(self.graph) elif self.graph_type is 'tree': if nx.is_directed(self.graph): graph_json = nx.tree_data(self.graph, root) else: print('Graph passed into export data is not directed') return 0 return graph_json
def export_taxonomy_tree(self, root): """ Export the taxonomy tree in a JSON-able format. Should be interpretable by d3-hierarchy, networkx, and the JavaScript TreeModel library. Root is a string that specifies the 'content' property of a Taxon node with in-degree zero. That is, root is a string that uniquely names the 'top' Taxon of a taxonomy. If the database contains two taxonomies with a top-node with the same value for 'content', the behaviour is undefined (i.e. this should never happen and is considered a corruption). """ result = self.repository.pull_graph(SlurpTaxonomiesQuery()) g = rebuild_graph(result) root_id = find_root_by_content(g, root) tree = dfs_tree_with_node_attributes(g, root_id, depth_limit=None) return networkx.tree_data(tree, root_id)
def search_with_taxons(self, token, taxon_uris, depth_limit, cooccurrence_threshold): g = rebuild_graph( self.repository.pull_graph( GetTokenRootWithTaxonFilterQuery(token, taxon_uris, depth_limit, cooccurrence_threshold))) # if the graph is empty then nothing was matched. re-add the root # node if is_null_graph(g): g.add_node(0, content=token) # now reconnect the graph root = get_node_by_attribute(g, 'content', token) for node in g.nodes: g.nodes[node]['strength'] = 0 if node != root: g.add_edge(root, node) g.nodes[root]['strength'] = None return networkx.tree_data(g, root)
import json import networkx as nx G = nx.DiGraph() G.add_nodes_from(range(8)) weights = {0: 4, 1: 7, 2: 8, 3: 4, 4: 5, 5: 6, 6: 1, 7: 2} nx.set_node_attributes(G, weights, 'weight') edges = [ (0, 1), (0, 2), (2, 4), (4, 6), (4, 7), (1, 3), (1, 5), ] G.add_edges_from(edges) # save in json node_link_data = nx.tree_data(G, root=0) with open('tree.json', 'w') as f: json.dump(node_link_data, f, indent=4, sort_keys=True)
if __name__ == "__main__": df = load_data() df, child_counter = update_header(df) graph, G = create_node_links(df, child_counter) # print(graph) file_object = open("graph.json", 'w') json.dump(graph, file_object) # print(G.nodes.data()) # T = dfs_tree(G, 1) T = bfs_tree(G, 1) # print(T.nodes.data()) for node in T.nodes.data(): nodule = G.nodes[node[0]] index = int(node[0]) for attribute in nodule.keys(): T.nodes[node[0]][attribute] = nodule[attribute] treeJSON = tree_data(T, 1) print(treeJSON) file_object = open("tree.json", 'w') json.dump(treeJSON, file_object) # f = open("graph.json", "r") # print(f.read()) # G = nx.readwrite.json_graph.node_link_graph(graph)
# Skip to the next parent if it doesn't exist as a concrete row # This can happen and it's not an error if get_uri(source) not in g: continue for j in range(i + 1, len(category_sequence)): possible_target = get_concat_id(category_sequence, j) if get_uri(possible_target) in g: g.add_edge(get_uri(source), get_uri(possible_target)) break sources = [v for v, indegree in g.in_degree() if indegree == 0] print("Possible roots:", sources) ARTIFICIAL_ROOT = get_uri('00') # artificially reparent to form a rooted tree g.add_node(ARTIFICIAL_ROOT, content='Theme') for source in sources: g.add_edge(ARTIFICIAL_ROOT, source) tree = dfs_tree_with_node_attributes(g, ARTIFICIAL_ROOT) return tree if __name__ == '__main__': obj = SamuelsLoader() print("Tree is", networkx.tree_data(obj.load(sys.argv[1]), '00'))
sentences = [ 'keep a bar', 'keep a shop', 'keep the peace', 'keep the books' ] backend = occubrow.backend.OccubrowBackend(Mock(), Mock()) g = networkx.DiGraph() def consecutive_pairs(sequence): for i in range(len(sequence) - 1): yield sequence[i], sequence[i + 1] for sentence in sentences: tokens = backend.preprocess(sentence) for token1, token2 in consecutive_pairs(tokens): g.add_node(token1) g.add_node(token2) g.add_edge(token1, token2) result = networkx.tree_data(g, 'keep') pprint.pprint(result)
def narrow_graph_to_tree( self, graph, chosen_source, depth_limit, coocurrence_threshold ): g2 = dfs_tree_with_node_attributes(graph, chosen_source, depth_limit=depth_limit) data = networkx.tree_data(g2, root=chosen_source) return data