def test_two_nodes(self, method): G = nx.Graph() G.add_edge(0, 1, weight=1) A = nx.laplacian_matrix(G) assert almost_equal(nx.algebraic_connectivity(G, tol=1e-12, method=method), 2) x = nx.fiedler_vector(G, tol=1e-12, method=method) check_eigenvector(A, 2, x)
def main(): J = make_suspicious_graph() utils.plot_graph_random(J) LG = nx.laplacian_matrix(J) eigen_values = LA.eig(LG.toarray()) eigen_values = (np.around(eigen_values[0]), eigen_values[1]) sorted_eigen_values = [] for i in range(len(eigen_values[0])): sorted_eigen_values.append( (abs(eigen_values[0][i]), eigen_values[1][i])) sort(sorted_eigen_values) internal_fiedler_vector = nx.fiedler_vector(J) print("Fiedler Value: " + str(sorted_eigen_values[1][0])) print("Fiedler Vector: " + str(sorted_eigen_values[1][1])) print("Internal Fiedler Vector: " + str()) fiedler_partition = partition_fiedler(internal_fiedler_vector) print("J Nodes") print(J.nodes) print("Fiedler Partition") print(fiedler_partition) plot_partition(J, fiedler_partition)
def test_path(self, method): G = nx.path_graph(8) A = nx.laplacian_matrix(G) sigma = 2 - sqrt(2 + sqrt(2)) ac = nx.algebraic_connectivity(G, tol=1e-12, method=method) assert almost_equal(ac, sigma) x = nx.fiedler_vector(G, tol=1e-12, method=method) check_eigenvector(A, sigma, x)
def test_seed_argument(self, method): G = nx.cycle_graph(8) A = nx.laplacian_matrix(G) sigma = 2 - sqrt(2) ac = nx.algebraic_connectivity(G, tol=1e-12, method=method, seed=1) assert almost_equal(ac, sigma) x = nx.fiedler_vector(G, tol=1e-12, method=method, seed=1) check_eigenvector(A, sigma, x)
def test_abbreviation_of_method(self): G = nx.path_graph(8) A = nx.laplacian_matrix(G) sigma = 2 - sqrt(2 + sqrt(2)) ac = nx.algebraic_connectivity(G, tol=1e-12, method="tracemin") assert almost_equal(ac, sigma) x = nx.fiedler_vector(G, tol=1e-12, method="tracemin") check_eigenvector(A, sigma, x)
def test_abbreviation_of_method(self): G = nx.path_graph(8) A = nx.laplacian_matrix(G) sigma = 2 - sqrt(2 + sqrt(2)) ac = nx.algebraic_connectivity(G, tol=1e-12, method='tracemin') assert_almost_equal(ac, sigma) x = nx.fiedler_vector(G, tol=1e-12, method='tracemin') check_eigenvector(A, sigma, x)
def test_cycle(self): G = nx.cycle_graph(8) A = nx.laplacian_matrix(G) sigma = 2 - sqrt(2) for method in self._methods: ac = nx.algebraic_connectivity(G, tol=1e-12, method=method) assert_almost_equal(ac, sigma) x = nx.fiedler_vector(G, tol=1e-12, method=method) check_eigenvector(A, sigma, x)
def test_cycle(self, method): pytest.importorskip("scipy") G = nx.cycle_graph(8) A = nx.laplacian_matrix(G) sigma = 2 - sqrt(2) ac = nx.algebraic_connectivity(G, tol=1e-12, method=method) assert almost_equal(ac, sigma) x = nx.fiedler_vector(G, tol=1e-12, method=method) check_eigenvector(A, sigma, x)
def test_two_nodes(self, method): pytest.importorskip("scipy") G = nx.Graph() G.add_edge(0, 1, weight=1) A = nx.laplacian_matrix(G) assert nx.algebraic_connectivity( G, tol=1e-12, method=method) == pytest.approx(2, abs=1e-7) x = nx.fiedler_vector(G, tol=1e-12, method=method) check_eigenvector(A, 2, x)
def test_abbreviation_of_method(self): pytest.importorskip("scipy") G = nx.path_graph(8) A = nx.laplacian_matrix(G) sigma = 2 - sqrt(2 + sqrt(2)) ac = nx.algebraic_connectivity(G, tol=1e-12, method="tracemin") assert ac == pytest.approx(sigma, abs=1e-7) x = nx.fiedler_vector(G, tol=1e-12, method="tracemin") check_eigenvector(A, sigma, x)
def test_seed_argument(self, method): pytest.importorskip("scipy") G = nx.cycle_graph(8) A = nx.laplacian_matrix(G) sigma = 2 - sqrt(2) ac = nx.algebraic_connectivity(G, tol=1e-12, method=method, seed=1) assert ac == pytest.approx(sigma, abs=1e-7) x = nx.fiedler_vector(G, tol=1e-12, method=method, seed=1) check_eigenvector(A, sigma, x)
def test_problematic_graph_issue_2381(self, method): G = nx.path_graph(4) G.add_edges_from([(4, 2), (5, 1)]) A = nx.laplacian_matrix(G) sigma = 0.438447187191 ac = nx.algebraic_connectivity(G, tol=1e-12, method=method) assert almost_equal(ac, sigma) x = nx.fiedler_vector(G, tol=1e-12, method=method) check_eigenvector(A, sigma, x)
def test_problematic_graph_issue_2381(self): G = nx.path_graph(4) G.add_edges_from([(4, 2), (5, 1)]) A = nx.laplacian_matrix(G) sigma = 0.438447187191 for method in self._methods: ac = nx.algebraic_connectivity(G, tol=1e-12, method=method) assert_almost_equal(ac, sigma) x = nx.fiedler_vector(G, tol=1e-12, method=method) check_eigenvector(A, sigma, x)
def test_problematic_graph_issue_2381(self, method): pytest.importorskip("scipy") G = nx.path_graph(4) G.add_edges_from([(4, 2), (5, 1)]) A = nx.laplacian_matrix(G) sigma = 0.438447187191 ac = nx.algebraic_connectivity(G, tol=1e-12, method=method) assert ac == pytest.approx(sigma, abs=1e-7) x = nx.fiedler_vector(G, tol=1e-12, method=method) check_eigenvector(A, sigma, x)
def test_two_nodes(self): G = nx.Graph() G.add_edge(0, 1, weight=1) A = nx.laplacian_matrix(G) for method in self._methods: assert_almost_equal(nx.algebraic_connectivity( G, tol=1e-12, method=method), 2) x = nx.fiedler_vector(G, tol=1e-12, method=method) check_eigenvector(A, 2, x) G = nx.MultiGraph() G.add_edge(0, 0, spam=1e8) G.add_edge(0, 1, spam=1) G.add_edge(0, 1, spam=-2) A = -3 * nx.laplacian_matrix(G, weight='spam') for method in self._methods: assert_almost_equal(nx.algebraic_connectivity( G, weight='spam', tol=1e-12, method=method), 6) x = nx.fiedler_vector(G, weight='spam', tol=1e-12, method=method) check_eigenvector(A, 6, x)
def test_two_nodes_multigraph(self, method): G = nx.MultiGraph() G.add_edge(0, 0, spam=1e8) G.add_edge(0, 1, spam=1) G.add_edge(0, 1, spam=-2) A = -3 * nx.laplacian_matrix(G, weight="spam") assert almost_equal( nx.algebraic_connectivity(G, weight="spam", tol=1e-12, method=method), 6 ) x = nx.fiedler_vector(G, weight="spam", tol=1e-12, method=method) check_eigenvector(A, 6, x)
def find_communities(self): dolphin_count = len(self._nodes) fiedler_vector = nx.fiedler_vector(self._graph) X_data = np.array(fiedler_vector).reshape((len(fiedler_vector), 1)) kmeans = KMeans(n_clusters=2).fit(X_data) negative_cluster = [i for i in range(len(kmeans.labels_)) if kmeans.labels_[i] == 0] positive_cluster = [i for i in range(len(kmeans.labels_)) if kmeans.labels_[i] == 1] assert len(negative_cluster) + len(positive_cluster) == dolphin_count positive_dolphins = [self._nodes[i] for i in positive_cluster] negative_dolphins = [self._nodes[i] for i in negative_cluster] return positive_dolphins, negative_dolphins
def ComputeFiedlerVector(G): """ Given a graph adjacency matrix, return a Fielder vector. """ # TODO: implement a case where it converts to a networkx graph if G is a numpy array if type(G) == type(np.ndarray((1, 1, 1), dtype=float)): G = nx.to_networkx_graph(G) v = nx.fiedler_vector(G) return v
def spectral_partitioning(G): labels = [] fiedler = nx.fiedler_vector(G, method='lanczos') median = np.median(fiedler) labels = [] for i in range(len(fiedler)): if (fiedler[i] < median): labels.append(-1) else: labels.append(1) #print("labels:") #print(labels) return labels
def test_two_nodes_multigraph(self, method): pytest.importorskip("scipy") G = nx.MultiGraph() G.add_edge(0, 0, spam=1e8) G.add_edge(0, 1, spam=1) G.add_edge(0, 1, spam=-2) A = -3 * nx.laplacian_matrix(G, weight="spam") assert nx.algebraic_connectivity(G, weight="spam", tol=1e-12, method=method) == pytest.approx( 6, abs=1e-7) x = nx.fiedler_vector(G, weight="spam", tol=1e-12, method=method) check_eigenvector(A, 6, x)
def test_buckminsterfullerene(self): G = nx.Graph([ (1, 10), (1, 41), (1, 59), (2, 12), (2, 42), (2, 60), (3, 6), (3, 43), (3, 57), (4, 8), (4, 44), (4, 58), (5, 13), (5, 56), (5, 57), (6, 10), (6, 31), (7, 14), (7, 56), (7, 58), (8, 12), (8, 32), (9, 23), (9, 53), (9, 59), (10, 15), (11, 24), (11, 53), (11, 60), (12, 16), (13, 14), (13, 25), (14, 26), (15, 27), (15, 49), (16, 28), (16, 50), (17, 18), (17, 19), (17, 54), (18, 20), (18, 55), (19, 23), (19, 41), (20, 24), (20, 42), (21, 31), (21, 33), (21, 57), (22, 32), (22, 34), (22, 58), (23, 24), (25, 35), (25, 43), (26, 36), (26, 44), (27, 51), (27, 59), (28, 52), (28, 60), (29, 33), (29, 34), (29, 56), (30, 51), (30, 52), (30, 53), (31, 47), (32, 48), (33, 45), (34, 46), (35, 36), (35, 37), (36, 38), (37, 39), (37, 49), (38, 40), (38, 50), (39, 40), (39, 51), (40, 52), (41, 47), (42, 48), (43, 49), (44, 50), (45, 46), (45, 54), (46, 55), (47, 54), (48, 55) ]) for normalized in (False, True): if not normalized: A = nx.laplacian_matrix(G) sigma = 0.2434017461399311 else: A = nx.normalized_laplacian_matrix(G) sigma = 0.08113391537997749 for method in methods: try: assert almost_equal( nx.algebraic_connectivity(G, normalized=normalized, tol=1e-12, method=method), sigma) x = nx.fiedler_vector(G, normalized=normalized, tol=1e-12, method=method) check_eigenvector(A, sigma, x) except nx.NetworkXError as e: if e.args not in (('Cholesky solver unavailable.', ), ('LU solver unavailable.', )): raise
def normalized_cut(G): # C = laplacian_complete(L.shape[0]) # isqrtC = sqrtmi(C) # M = scipy.sparse.csr_matrix.dot(scipy.sparse.csr_matrix.dot(isqrtC, L), isqrtC) # (eigvals, eigvecs) = scipy.linalg.eigh(M,eigvals=(1,1)) # print(L.todense()) Gcc=sorted(networkx.connected_component_subgraphs(G), key = len, reverse=True) G0=Gcc[0] if networkx.number_of_nodes(G) == networkx.number_of_nodes(G0): x = networkx.fiedler_vector(G, method='lobpcg',tol=1e-5) x = sweep(x, G) else: x = separate_lcc(G, G0) return numpy.array(x)
def ratio_cut(G): """ Computes ratio-cut of G based on second eigenvector of the Laplacian. Input: * G: Graph Output: * x: Indicator vector """ Gcc = sorted(nx.connected_component_subgraphs(G), key=len, reverse=True) G0 = Gcc[0] if nx.number_of_nodes(G) == nx.number_of_nodes(G0): scipy.random.seed(1) x = nx.fiedler_vector(G, method=_method, tol=1e-5) x = sweep(x, G) else: # In case G is not connected x = separate_lcc(G, G0) return np.array(x)
def normalized_cut(G): # C = laplacian_complete(L.shape[0]) # isqrtC = sqrtmi(C) # M = scipy.sparse.csr_matrix.dot(scipy.sparse.csr_matrix.dot(isqrtC, L), isqrtC) # (eigvals, eigvecs) = scipy.linalg.eigh(M,eigvals=(1,1)) # print(L.todense()) Gcc = sorted(networkx.connected_component_subgraphs(G), key=len, reverse=True) G0 = Gcc[0] if networkx.number_of_nodes(G) == networkx.number_of_nodes(G0): x = networkx.fiedler_vector(G, method='lobpcg', tol=1e-5) x = sweep(x, G) else: x = separate_lcc(G, G0) return numpy.array(x)
def fiedler_vector(g, _weight, _normalized=False, _out_path=None): try: start = time.time() fv = nx.fiedler_vector(g, weight=_weight, normalized=_normalized) node_list = list(g.nodes) data = {} for i in range(len(node_list)): data[node_list[i]] = fv[i] ret = {'name': 'fiedler', 'normalized': _normalized, 'weight': _weight, 'num_of_nodes': g.number_of_nodes(), 'num_of_edges': g.number_of_edges(), 'process_time': (time.time() - start), 'data': __normalize_filter(data), 'ranked': __rank_filter(data)} if _out_path is not None: graph_io.write_json_data(_out_path, ret) return ret except nx.exception.NetworkXError: print(">>> FAILED (fiedler_vector): processing error. maybe graph not connected?") return None
def adj_mat(): A = nx.from_numpy_matrix(np.array([ [0, 1, 1, 0, 0, 1, 0, 0, 1, 1], [1, 0, 1, 0, 0, 0, 0, 0, 0, 0], [1, 1, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 1, 0, 0, 0, 0], [0, 0, 0, 1, 0, 1, 0, 0, 0, 0], [1, 0, 0, 1, 1, 0, 1, 1, 0, 0], [0, 0, 0, 0, 0, 1, 0, 1, 0, 0], [0, 0, 0, 0, 0, 1, 1, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 1], [1, 0, 0, 0, 0, 0, 0, 0, 1, 0]])) fiedler_vector = nx.fiedler_vector(A) LG = nx.laplacian_matrix(A) print(np.linalg.eigvals(LG.toarray())) print(fiedler_vector) partition = ([], []) for i in range(len(fiedler_vector)): if fiedler_vector[i] > 0: partition[1].append(i) else: partition[0].append(i) plot_partition(A, partition)
def test_buckminsterfullerene(self): G = nx.Graph( [(1, 10), (1, 41), (1, 59), (2, 12), (2, 42), (2, 60), (3, 6), (3, 43), (3, 57), (4, 8), (4, 44), (4, 58), (5, 13), (5, 56), (5, 57), (6, 10), (6, 31), (7, 14), (7, 56), (7, 58), (8, 12), (8, 32), (9, 23), (9, 53), (9, 59), (10, 15), (11, 24), (11, 53), (11, 60), (12, 16), (13, 14), (13, 25), (14, 26), (15, 27), (15, 49), (16, 28), (16, 50), (17, 18), (17, 19), (17, 54), (18, 20), (18, 55), (19, 23), (19, 41), (20, 24), (20, 42), (21, 31), (21, 33), (21, 57), (22, 32), (22, 34), (22, 58), (23, 24), (25, 35), (25, 43), (26, 36), (26, 44), (27, 51), (27, 59), (28, 52), (28, 60), (29, 33), (29, 34), (29, 56), (30, 51), (30, 52), (30, 53), (31, 47), (32, 48), (33, 45), (34, 46), (35, 36), (35, 37), (36, 38), (37, 39), (37, 49), (38, 40), (38, 50), (39, 40), (39, 51), (40, 52), (41, 47), (42, 48), (43, 49), (44, 50), (45, 46), (45, 54), (46, 55), (47, 54), (48, 55)]) for normalized in (False, True): if not normalized: A = nx.laplacian_matrix(G) sigma = 0.2434017461399311 else: A = nx.normalized_laplacian_matrix(G) sigma = 0.08113391537997749 for method in methods: try: assert_almost_equal(nx.algebraic_connectivity( G, normalized=normalized, tol=1e-12, method=method), sigma) x = nx.fiedler_vector(G, normalized=normalized, tol=1e-12, method=method) check_eigenvector(A, sigma, x) except nx.NetworkXError as e: if e.args not in (('Cholesky solver unavailable.',), ('LU solver unavailable.',)): raise
def test_buckminsterfullerene(self, normalized, sigma, laplacian_fn, method): pytest.importorskip("scipy") G = nx.Graph([ (1, 10), (1, 41), (1, 59), (2, 12), (2, 42), (2, 60), (3, 6), (3, 43), (3, 57), (4, 8), (4, 44), (4, 58), (5, 13), (5, 56), (5, 57), (6, 10), (6, 31), (7, 14), (7, 56), (7, 58), (8, 12), (8, 32), (9, 23), (9, 53), (9, 59), (10, 15), (11, 24), (11, 53), (11, 60), (12, 16), (13, 14), (13, 25), (14, 26), (15, 27), (15, 49), (16, 28), (16, 50), (17, 18), (17, 19), (17, 54), (18, 20), (18, 55), (19, 23), (19, 41), (20, 24), (20, 42), (21, 31), (21, 33), (21, 57), (22, 32), (22, 34), (22, 58), (23, 24), (25, 35), (25, 43), (26, 36), (26, 44), (27, 51), (27, 59), (28, 52), (28, 60), (29, 33), (29, 34), (29, 56), (30, 51), (30, 52), (30, 53), (31, 47), (32, 48), (33, 45), (34, 46), (35, 36), (35, 37), (36, 38), (37, 39), (37, 49), (38, 40), (38, 50), (39, 40), (39, 51), (40, 52), (41, 47), (42, 48), (43, 49), (44, 50), (45, 46), (45, 54), (46, 55), (47, 54), (48, 55), ]) A = laplacian_fn(G) try: assert nx.algebraic_connectivity(G, normalized=normalized, tol=1e-12, method=method) == pytest.approx( sigma, abs=1e-7) x = nx.fiedler_vector(G, normalized=normalized, tol=1e-12, method=method) check_eigenvector(A, sigma, x) except nx.NetworkXError as err: if err.args not in ( ("Cholesky solver unavailable.", ), ("LU solver unavailable.", ), ): raise
def test_fiedler_vector_tracemin_chol(): """Test that "tracemin_chol" raises an exception.""" pytest.importorskip("scipy") G = nx.barbell_graph(5, 4) with pytest.raises(nx.NetworkXError): nx.fiedler_vector(G, method="tracemin_chol")
def completed_graph(inchikey): #graphs generated using PIBAS dataset, UniChem and Bio2RDF G6 = nx.Graph() G8 = nx.Graph() #we start from PIBAS local ontology and for a given InChiKey we found compound acronym sparql = SPARQLWrapper("http://cpctas-lcmb.pmf.kg.ac.rs:2020/sparql") url = 'https://www.ebi.ac.uk/unichem/rest/inchikey/'+inchikey #print url j=0 storage = StringIO() c = pycurl.Curl() c.setopt(c.URL, url) c.setopt(c.WRITEFUNCTION, storage.write) c.perform() c.close() content = storage.getvalue() unichem_content = json.loads(content) for rs in unichem_content: src_compound_id= rs['src_compound_id'] url = 'https://www.ebi.ac.uk/unichem/rest/sources/'+rs['src_id'] storage = StringIO() c = pycurl.Curl() c.setopt(c.URL, url) c.setopt(c.WRITEFUNCTION, storage.write) c.perform() c.close() content = storage.getvalue() unichem_src_name = json.loads(content) for rs in unichem_src_name: name=rs['name'] #print name G6.add_node(j,name_label=name+'/'+src_compound_id) j=j+1 bio2rdf_dataset=['bindingdb','pubchem','pharmgkb','chebi','kegg_ligand','pdb','drugbank','chembl','pibas','ndc'] num1=G6.number_of_nodes() nodes1=G6.nodes() for x in range(0, num1): if((G6.node[x]['name_label']).split('/')[0] in bio2rdf_dataset): sparql = SPARQLWrapper("http://cpctas-lcmb.pmf.kg.ac.rs:2020/sparql") if((G6.node[x]['name_label']).split('/')[0]=='kegg_ligand'): sparql.setQuery( """ PREFIX bindingdb: <http://bio2rdf.org/bindingdb:> PREFIX pubchem: <http://bio2rdf.org/pubchem:> PREFIX pharmgkb: <http://bio2rdf.org/pharmgkb:> PREFIX chebi: <http://bio2rdf.org/chebi:> PREFIX kegg_ligand: <http://bio2rdf.org/kegg:> PREFIX pdb: <http://bio2rdf.org/pdb:> PREFIX drugbank: <http://bio2rdf.org/drugbank:> PREFIX chembl: <http://bio2rdf.org/chembl:> PREFIX ndc: <http://bio2rdf.org/ndc:> select ?p ?o WHERE { SERVICE SILENT<http://kegg.bio2rdf.org/sparql> { OPTIONAL{ %s:%s ?p ?o. FILTER(CONTAINS(str(?p),"http://bio2rdf.org/kegg_vocabulary:x-drugbank") || CONTAINS(str(?p),"http://bio2rdf.org/kegg_vocabulary:x-pubchem.compound") || CONTAINS(str(?p),"http://bio2rdf.org/kegg_vocabulary:x-kegg") || CONTAINS(str(?p),"http://bio2rdf.org/kegg_vocabulary:x-chembl") || CONTAINS(str(?p),"http://bio2rdf.org/kegg_vocabulary:x-chebi") || CONTAINS(str(?p),"http://bio2rdf.org/kegg_vocabulary:same-as")). } } } """ % (((G6.node[x]['name_label']).split('/')[0]),((G6.node[x]['name_label']).split('/')[1]))) else: sparql.setQuery( """ PREFIX bindingdb: <http://bio2rdf.org/bindingdb:> PREFIX pubchem: <http://bio2rdf.org/pubchem:> PREFIX pharmgkb: <http://bio2rdf.org/pharmgkb:> PREFIX chebi: <http://bio2rdf.org/chebi:> PREFIX kegg_ligand: <http://bio2rdf.org/kegg:> PREFIX pdb: <http://bio2rdf.org/pdb:> PREFIX drugbank: <http://bio2rdf.org/drugbank:> PREFIX chembl: <http://bio2rdf.org/chembl:> PREFIX ndc: <http://bio2rdf.org/ndc:> select ?p ?o WHERE { SERVICE SILENT<http://%s.bio2rdf.org/sparql> { OPTIONAL{ %s:%s ?p ?o. FILTER(CONTAINS(str(?p),"http://bio2rdf.org/%s_vocabulary:x-drugbank") || CONTAINS(str(?p),"http://bio2rdf.org/%s_vocabulary:x-pubchem.compound") || CONTAINS(str(?p),"http://bio2rdf.org/%s_vocabulary:x-kegg") || CONTAINS(str(?p),"http://bio2rdf.org/%s_vocabulary:x-chembl") || CONTAINS(str(?p),"http://bio2rdf.org/%s_vocabulary:x-chebi") || CONTAINS(str(?p),"http://bio2rdf.org/%s_vocabulary:x-ndc") || CONTAINS(str(?p),"http://bio2rdf.org/%s_vocabulary:same-as")). } } } """ % (((G6.node[x]['name_label']).split('/')[0]),((G6.node[x]['name_label']).split('/')[0]),((G6.node[x]['name_label']).split('/')[1]),((G6.node[x]['name_label']).split('/')[0]),((G6.node[x]['name_label']).split('/')[0]),((G6.node[x]['name_label']).split('/')[0]),((G6.node[x]['name_label']).split('/')[0]),((G6.node[x]['name_label']).split('/')[0]),((G6.node[x]['name_label']).split('/')[0]),((G6.node[x]['name_label']).split('/')[0]))) sparql.setReturnFormat(JSON) final_results1 = sparql.query().convert() try: v=G6.number_of_nodes() for result1 in final_results1["results"]["bindings"]: if(result1["o"]["value"]!=""): node_for_add=result1["o"]["value"] node_of_second_level=node_for_add.split("/")[-1] if(node_of_second_level.split(':')[0]=='kegg'): node_for_add='kegg_ligand/'+node_of_second_level.split(':')[1] else: node_for_add=node_of_second_level.split(':')[0]+'/'+node_of_second_level.split(':')[1] #print node_for_add compare_node=[] for x in range(0,G6.number_of_nodes()): compare_node.append(str((G6.node[x]['name_label']).split('/')[0])+'/'+str((G6.node[x]['name_label']).split('/')[1])) if(node_for_add not in compare_node): G6.add_node(v,name_label=node_for_add,predicate=result1["p"]["value"]) v=v+1 #print v except: continue nodes=G6.nodes() #print nodes edges = combinations(nodes, 2) G6.add_nodes_from(nodes) G6.add_edges_from(edges) num1=G6.number_of_nodes() nodes1=G6.nodes() left_graph_for_remove=[] for x in range(0, num1): if(((G6.node[x]['name_label']).split('/')[0] not in bio2rdf_dataset)): G6.remove_node(x) nodes1=G6.nodes() #print nodes1 custom_labels1={} for x in nodes1: custom_labels1[x] = str(x)+':'+G6.node[x]['name_label'] #create and draw graph G6 pos=nx.circular_layout(G6,dim=2, scale=100) plt.clf() nx.draw(G6, labels=custom_labels1, with_labels=True) plt.savefig('/var/www/specint.org/public_html/specint/img/completed_graph_bio2rdf_'+inchikey+'.png') #******************Using Chem2Bio2RDF*********************************************************************************** sparql = SPARQLWrapper("http://cpctas-lcmb.pmf.kg.ac.rs:2020/sparql") G7 = nx.Graph() index_for_new_graph=max(G6.nodes()) url = 'https://www.ebi.ac.uk/unichem/rest/inchikey/'+inchikey #print url j=index_for_new_graph+1 storage = StringIO() c = pycurl.Curl() c.setopt(c.URL, url) c.setopt(c.WRITEFUNCTION, storage.write) c.perform() c.close() content = storage.getvalue() unichem_content = json.loads(content) for rs in unichem_content: src_compound_id= rs['src_compound_id'] url = 'https://www.ebi.ac.uk/unichem/rest/sources/'+rs['src_id'] storage = StringIO() c = pycurl.Curl() c.setopt(c.URL, url) c.setopt(c.WRITEFUNCTION, storage.write) c.perform() c.close() content = storage.getvalue() unichem_src_name = json.loads(content) for rs in unichem_src_name: name=rs['name'] G7.add_node(j,name_label=name+'/'+src_compound_id) j=j+1 chem2bio2rdf_dataset=['bindingdb','pubchem','chebi','kegg_ligand','kegg','pdb','drugbank','chembl','uniprot','matador','ctd','dcdb','hgnc','pharmgkb','hprd'] num1=G7.number_of_nodes() nodes1=G7.nodes() for x in range(index_for_new_graph+1,index_for_new_graph+num1): if(((G7.node[x]['name_label']).split('/')[0] in bio2rdf_dataset) and ("chembl" not in (G7.node[x]['name_label']).split('/')[0])): sparql = SPARQLWrapper("http://cpctas-lcmb.pmf.kg.ac.rs:2020/sparql") if((G7.node[x]['name_label']).split('/')[0]=='kegg_ligand'): sparql.setQuery( """ PREFIX bindingdb: <http://chem2bio2rdf.org/bindingdb/resource/> PREFIX pubchem: <http://chem2bio2rdf.org/pubchem/resource/> PREFIX uniprot: <http://chem2bio2rdf.org/uniprot/resource/> PREFIX chebi: <http://chem2bio2rdf.org/chebi/resource/chebi/CHEBI~> PREFIX kegg_ligand: <http://chem2bio2rdf.org/kegg/resource/kegg_ligand/> PREFIX pdb: <http://chem2bio2rdf.org/pdb/resource/pdb_ligand/> PREFIX drugbank: <http://chem2bio2rdf.org/drugbank/resource/> PREFIX matador: <http://chem2bio2rdf.org/matador/resource/> PREFIX chembl: <http://chem2bio2rdf.org/chembl/resource/> PREFIX uniprot: <http://chem2bio2rdf.org/uniprot/resource/> PREFIX db: <http://chem2bio2rdf.org/kegg/resource/> select ?o WHERE { SERVICE SILENT<http://cheminfov.informatics.indiana.edu:8080/kegg/sparql> { OPTIONAL{ %s:%s db:CID ?o. } } } """ % (((G7.node[x]['name_label']).split('/')[0]),((G7.node[x]['name_label']).split('/')[1]))) elif((G7.node[x]['name_label']).split('/')[0]=='drugbank'): sparql.setQuery( """ PREFIX db:<http://chem2bio2rdf.org/drugbank/resource/> PREFIX drugbank:<http://chem2bio2rdf.org/drugbank/resource/drugbank_drug/> select ?o WHERE { SERVICE SILENT<http://147.91.203.161:8890/sparql>{ drugbank:%s db:CID ?o. } } """ % (G7.node[x]['name_label']).split('/')[1]) else: sparql.setQuery( """ PREFIX bindingdb: <http://bio2rdf.org/bindingdb:> PREFIX pubchem: <http://bio2rdf.org/pubchem:> PREFIX pharmgkb: <http://bio2rdf.org/pharmgkb:> PREFIX chebi: <http://bio2rdf.org/chebi:> PREFIX kegg_ligand: <http://bio2rdf.org/kegg:> PREFIX pdb: <http://bio2rdf.org/pdb:> PREFIX drugbank: <http://bio2rdf.org/drugbank:> PREFIX chembl: <http://bio2rdf.org/chembl:> PREFIX ndc: <http://bio2rdf.org/ndc:> PREFIX db: <http://chem2bio2rdf.org/%s/resource/> select ?o WHERE { SERVICE SILENT<http://cheminfov.informatics.indiana.edu:8080/%s/sparql> { OPTIONAL{ %s:%s db:CID ?o. } } } """ % (((G7.node[x]['name_label']).split('/')[0]),((G7.node[x]['name_label']).split('/')[0]),((G7.node[x]['name_label']).split('/')[0]),((G7.node[x]['name_label']).split('/')[1]))) sparql.setReturnFormat(JSON) final_results1 = sparql.query().convert() try: v=G7.number_of_nodes() for result1 in final_results1["results"]["bindings"]: if(result1["o"]["value"]!=""): node_for_add=result1["o"]["value"] node_of_second_level=node_for_add.split("/")[-1] if(node_of_second_level.split(':')[0]=='kegg'): node_for_add='kegg_ligand/'+node_of_second_level.split(':')[1] else: node_for_add=node_of_second_level.split(':')[0]+'/'+node_of_second_level.split(':')[1] #print node_for_add compare_node=[] for x in range(index_for_new_graph+1,index_for_new_graph+num1): compare_node.append(str((G7.node[x]['name_label']).split('/')[0])+'/'+str((G7.node[x]['name_label']).split('/')[1])) if(node_for_add not in compare_node): G7.add_node(v,name_label=node_for_add,predicate=result1["p"]["value"]) v=v+1 #print v except: continue nodes=G7.nodes() #print nodes edges = combinations(nodes, 2) G7.add_nodes_from(nodes) G7.add_edges_from(edges) num1=G7.number_of_nodes() nodes1=G7.nodes() left_graph_for_remove=[] for x in nodes1: if(((G7.node[x]['name_label']).split('/')[0] not in chem2bio2rdf_dataset)): G7.remove_node(x) nodes1=G7.nodes() custom_labels1={} for x in nodes1: custom_labels1[x] = str(x)+':'+G7.node[x]['name_label'] #create and draw graph G7 pos=nx.circular_layout(G7,dim=2, scale=100) plt.clf() nx.draw(G7, labels=custom_labels1, with_labels=True) plt.savefig('/var/www/specint.org/public_html/specint/img/completed_graph_chem2bio2rdf_'+inchikey+'.png') #*************Join grpahs************************* G10=nx.union(G6,G7) nodes1=G10.nodes() custom_labels1={} for x in nodes1: custom_labels1[x] = str(x)+':'+G10.node[x]['name_label'] #create and draw graph G10 pos=nx.circular_layout(G10,dim=2, scale=300) plt.clf() nx.draw(G10, labels=custom_labels1, with_labels=True) plt.savefig('/var/www/specint.org/public_html/specint/img/completed_graph_'+inchikey+'.eps',format='eps', dpi=300) plt.savefig('/var/www/specint.org/public_html/specint/img/completed_graph_'+inchikey+'.png') #************************Removing node************************ nodes_for_connection=[] nodes_value_for_connection=[] values_for_connected_nodes=['pubchem'] #for x in values_for_connected_nodes: #values_for_connected_nodes.remove(x) #print values_for_connected_nodes for x in values_for_connected_nodes: #connection_node=random.choice(values_for_connected_nodes) connection_node=x #print connection_node nodes5=G10.nodes() for x in nodes5: if((G10.node[x]['name_label']).split('/')[0]==connection_node): nodes_for_connection.append(x) nodes_value_for_connection.append((G10.node[x]['name_label']).split('/')[1]) #print nodes_for_connection if(len(nodes_for_connection)>=2): my_array_for_node_connected=[] for x in range(0,len(nodes_value_for_connection)-1): for y in range(x+1,len(nodes_value_for_connection)): if(nodes_value_for_connection[x]==nodes_value_for_connection[y]): my_array_for_node_connected.append(nodes_for_connection[x]) my_array_for_node_connected.append(nodes_for_connection[y]) #print my_array_for_node_connected #print max(my_array_for_node_connected) #print min(my_array_for_node_connected) G10.remove_node(max(my_array_for_node_connected)) for_delete=G10.nodes() #print for_delete position=(G10.nodes()).index(min(my_array_for_node_connected)) #print position for x in for_delete: if((x>min(my_array_for_node_connected)) and (x not in G6.nodes())): G10.add_edge(min(my_array_for_node_connected),x) nodes1=G10.nodes() custom_labels1={} for x in nodes1: custom_labels1[x] = str(x)+':'+G10.node[x]['name_label'] #create and draw graph G10 pos=nx.circular_layout(G10,dim=2, scale=300) plt.clf() nx.draw(G10, labels=custom_labels1, with_labels=True) plt.savefig('/var/www/specint.org/public_html/specint/img/completed_graph_'+inchikey+'.eps',format='eps', dpi=300) plt.savefig('/var/www/specint.org/public_html/specint/img/completed_graph_'+inchikey+'.png') #H = nx.Graph() #H.add_nodes_from(G10.nodes()) #H.add_edges_from(G10.edges()) image='myimage_for_completed_graph.png' fv=nx.fiedler_vector(G10,method='lobpcg') create_python_file(G10.nodes(),G10.edges(), inchikey) return {'fv':fv, 'connection_node':connection_node} break else: #return completed_graph(inchikey) #continue return {'fv':'Undirected graph is not connected! Please, try again!'} #completed_graph("IHUNBGSDBOWDMA-AQFIFDHZSA-N")
def approx_min_conductance_partitioning(g: LightMultiGraph, max_k=1): """ Approximate minimum conductance partinioning. I'm using the median method as referenced here: http://www.ieor.berkeley.edu/~goldberg/pubs/krishnan-recsys-final2.pdf :param g: graph to recursively partition :param max_k: upper bound of number of nodes allowed in the leaves :return: a dendrogram """ lvl = [] node_list = list(g.nodes()) if len(node_list) <= max_k: assert len(node_list) > 0 return node_list if not nx.is_connected(g): for p in nx.connected_component_subgraphs(g): lvl.append(approx_min_conductance_partitioning(p, max_k)) assert len(lvl) > 0 return lvl assert nx.is_connected(g), "g is not connected in cond" fiedler_vector = nx.fiedler_vector(g, method='lanczos') p1, p2 = set(), set() fiedler_dict = {} for idx, n in enumerate(fiedler_vector): fiedler_dict[idx] = n fiedler_vector = [ (k, fiedler_dict[k]) for k in sorted(fiedler_dict, key=fiedler_dict.get, reverse=True) ] half_idx = len(fiedler_vector) // 2 # floor division for idx, _ in fiedler_vector: if half_idx > 0: p1.add(node_list[idx]) else: p2.add(node_list[idx]) half_idx -= 1 # decrement so halfway through it crosses 0 and puts into p2 sg1 = g.subgraph(p1) sg2 = g.subgraph(p2) iter_count = 0 while not (nx.is_connected(sg1) and nx.is_connected(sg2)): sg1 = g.subgraph(p1) sg2 = g.subgraph(p2) # Hack to check and fix non connected subgraphs if not nx.is_connected(sg1): for sg in sorted(nx.connected_component_subgraphs(sg1), key=len, reverse=True)[1:]: p2.update(sg.nodes()) for n in sg.nodes(): p1.remove(n) sg2 = g.subgraph(p2) # updating sg2 since p2 has changed if not nx.is_connected(sg2): for sg in sorted(nx.connected_component_subgraphs(sg2), key=len, reverse=True)[1:]: p1.update(sg.nodes()) for n in sg.nodes(): p2.remove(n) iter_count += 1 if iter_count > 2: print('it took {} iterations to stabilize'.format(iter_count)) assert nx.is_connected(sg1) and nx.is_connected( sg2), "subgraphs are not connected in cond" lvl.append(approx_min_conductance_partitioning(sg1, max_k)) lvl.append(approx_min_conductance_partitioning(sg2, max_k)) assert (len(lvl) > 0) return lvl
def sccf_helper(seed_num, graph=None, graph_json_filename=None, graph_json_str=None): # parse the graph G = None if graph is not None: G = graph elif graph_json_filename is not None: G = util.load_graph(graph_json_filename=graph_json_filename) else: G = util.load_graph(graph_json_str=graph_json_str) # initialize queue for subgraphs # try to get about 2 nodes in each cluster node_per_cluster = 2 max_depth = int(math.ceil(np.log2(seed_num / node_per_cluster))) + 1 cluster_queue = Queue.Queue() cluster_queue.put(G) # divide graph into 2**max_depth clusters while (cluster_queue.qsize() < 2**max_depth ): G_curr = cluster_queue.get() # work only on the largest connected component G_curr_c = max(nx.connected_component_subgraphs(G_curr), key=len) if (G_curr_c.size() < 2 * node_per_cluster): # put it back if cluster is too small cluster_queue.put(G_curr_c) continue # get fiedler vector fiedler_vector = nx.fiedler_vector(G_curr_c, normalized=True, tol=1e-01) node_list_sub_1 = [] node_list_sub_2 = [] node_list = G_curr_c.nodes() # split positive and negative terms in fielder vector for i in range(len(fiedler_vector)): if (fiedler_vector[i] >= 0): node_list_sub_1.append(node_list[i]) else: node_list_sub_2.append(node_list[i]) # seperate the graph into two subgraphs if (len(node_list_sub_1) >= node_per_cluster): # ignore clusters too small G_sub_1 = G_curr_c.subgraph(node_list_sub_1) cluster_queue.put(G_sub_1) if (len(node_list_sub_2) >= node_per_cluster): # ignore clusters too small G_sub_2 = G_curr_c.subgraph(node_list_sub_2) cluster_queue.put(G_sub_2) # get node_per_cluster highest degree nodes from each cluster candidate_nodes = [] candidate_neighbors = {} while not (cluster_queue.empty()): G_curr = cluster_queue.get() # measure used to pick node with in clusters degree_dict = nx.closeness_centrality(G_curr) node_keys = sorted(degree_dict, key=degree_dict.get, reverse=True)[:node_per_cluster] for i in node_keys: # append i and a neighbor of i if (i not in candidate_nodes): candidate_nodes.append(i) candidate_neighbors[i] = list(nx.all_neighbors(G, i)) # return candidate nodes and neighbors return candidate_nodes, candidate_neighbors
ursi = graph_filename[4:13] ursis.append(ursi) try: y[i, :] = np.hstack( (df1.loc[ursi, 'CCI'], df1.loc[ursi, :].iloc[4:].as_matrix())) graph_data = np.load(root_dir + graphtype + '/' + graph_filename) # print('graph shape:', graph_data.shape) g = nx.Graph(graph_data) laplacian = nx.laplacian_matrix(g) spectrum = nx.laplacian_spectrum(g) connectivity = nx.fiedler_vector(g) communities = nx.k_clique_communities(g, 5) print('Laplacian shape:', laplacian.shape) print('Spectrum shape:', spectrum.shape) print('Connectivity:', connectivity) print('Communities:', communities.shape) # feature extraction rich_coeff_at_degree = rich_club_coefficient(g, normalized=False) rich_keys = list(rich_coeff_at_degree.keys()) rich_vals = list(rich_coeff_at_degree.values()) rich_hist, bin_edges = np.histogram(rich_vals, n_roi // 10)
def calcularVetorFiedlerNetworkx(grafo): return nx.fiedler_vector(grafo)
#!/usr/bin/env python import networkx as nx import numpy as np import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt G = nx.Graph() G.add_node(0) G.add_node(7) G.add_node(9) G.add_node(10) G.add_node(19) G.add_edge(0,9) G.add_edge(0,7) G.add_edge(7,9) G.add_edge(7,10) G.add_edge(7,19) G.add_edge(10,19) print G.nodes() print nx.fiedler_vector(G,method='lobpcg') pos=nx.circular_layout(G,dim=50, scale=100) plt.clf() nx.draw(G,with_labels=True) plt.savefig('C:/Users/Branko/Desktop/ZKZFPRUSWCYSGT-UHFFFAOYSA-N_unoriented.png')
def spectrum_cluster(seed_num, graph_json_filename=None, graph_json_str=None): """ Identifies clusters in the network using laplacian spectrum, and loops over each cluster to pick the node with largest degree until seed_num nodes were chosen. Parameters: seed_num: Number of nodes to choose. graph_json_filename: Filename where the adjacency list lives as JSON. graph_json_str: Graph as an adjacency list string in JSON. Return: List of the chosen nodes. """ # parse the graph G = None if graph_json_str is None: G = util.load_graph(graph_json_filename=graph_json_filename) else: G = util.load_graph(graph_json_str=graph_json_str) # initialize queue for subgraphs # try to get about 2 nodes in each cluster node_per_cluster = 2 max_depth = int(math.ceil(np.log2(seed_num / node_per_cluster))) + 1 cluster_queue = Queue.Queue() cluster_queue.put(G) # divide graph into 2**max_depth clusters while (cluster_queue.qsize() < 2**max_depth ): G_curr = cluster_queue.get() # work only on the largest connected component G_curr_c = max(nx.connected_component_subgraphs(G_curr), key=len) if (G_curr_c.size() < 2 * node_per_cluster): # put it back if cluster is too small cluster_queue.put(G_curr_c) continue # get fiedler vector fiedler_vector = nx.fiedler_vector(G_curr_c, normalized=True, tol=1e-04) node_list_sub_1 = [] node_list_sub_2 = [] node_list = G_curr_c.nodes() # split positive and negative terms in fielder vector for i in range(len(fiedler_vector)): if (fiedler_vector[i] >= 0): node_list_sub_1.append(node_list[i]) else: node_list_sub_2.append(node_list[i]) # seperate the graph into two subgraphs if (len(node_list_sub_1) >= node_per_cluster): # ignore clusters too small G_sub_1 = G_curr_c.subgraph(node_list_sub_1) cluster_queue.put(G_sub_1) if (len(node_list_sub_2) >= node_per_cluster): # ignore clusters too small G_sub_2 = G_curr_c.subgraph(node_list_sub_2) cluster_queue.put(G_sub_2) # get node_per_cluster highest degree nodes from each cluster candidate_nodes = [] while not (cluster_queue.empty()): G_curr = cluster_queue.get() # measure used to pick node with in clusters degree_dict = nx.degree(G_curr) node_keys = sorted(degree_dict, key=degree_dict.get, reverse=True)[:node_per_cluster] for i in node_keys: candidate_nodes.append(i) # randomly pick seed_num nodes from candidate_nodes rtn = list(np.random.choice(candidate_nodes, replace=False, size=seed_num)) return rtn