def test_two_nodes(self, method):
     G = nx.Graph()
     G.add_edge(0, 1, weight=1)
     A = nx.laplacian_matrix(G)
     assert almost_equal(nx.algebraic_connectivity(G, tol=1e-12, method=method), 2)
     x = nx.fiedler_vector(G, tol=1e-12, method=method)
     check_eigenvector(A, 2, x)
Exemplo n.º 2
0
def main():
    J = make_suspicious_graph()

    utils.plot_graph_random(J)

    LG = nx.laplacian_matrix(J)
    eigen_values = LA.eig(LG.toarray())
    eigen_values = (np.around(eigen_values[0]), eigen_values[1])
    sorted_eigen_values = []

    for i in range(len(eigen_values[0])):
        sorted_eigen_values.append(
            (abs(eigen_values[0][i]), eigen_values[1][i]))

    sort(sorted_eigen_values)
    internal_fiedler_vector = nx.fiedler_vector(J)
    print("Fiedler Value: " + str(sorted_eigen_values[1][0]))
    print("Fiedler Vector: " + str(sorted_eigen_values[1][1]))
    print("Internal Fiedler Vector: " + str())
    fiedler_partition = partition_fiedler(internal_fiedler_vector)
    print("J Nodes")
    print(J.nodes)
    print("Fiedler Partition")
    print(fiedler_partition)
    plot_partition(J, fiedler_partition)
 def test_path(self, method):
     G = nx.path_graph(8)
     A = nx.laplacian_matrix(G)
     sigma = 2 - sqrt(2 + sqrt(2))
     ac = nx.algebraic_connectivity(G, tol=1e-12, method=method)
     assert almost_equal(ac, sigma)
     x = nx.fiedler_vector(G, tol=1e-12, method=method)
     check_eigenvector(A, sigma, x)
 def test_seed_argument(self, method):
     G = nx.cycle_graph(8)
     A = nx.laplacian_matrix(G)
     sigma = 2 - sqrt(2)
     ac = nx.algebraic_connectivity(G, tol=1e-12, method=method, seed=1)
     assert almost_equal(ac, sigma)
     x = nx.fiedler_vector(G, tol=1e-12, method=method, seed=1)
     check_eigenvector(A, sigma, x)
 def test_abbreviation_of_method(self):
     G = nx.path_graph(8)
     A = nx.laplacian_matrix(G)
     sigma = 2 - sqrt(2 + sqrt(2))
     ac = nx.algebraic_connectivity(G, tol=1e-12, method="tracemin")
     assert almost_equal(ac, sigma)
     x = nx.fiedler_vector(G, tol=1e-12, method="tracemin")
     check_eigenvector(A, sigma, x)
 def test_abbreviation_of_method(self):
     G = nx.path_graph(8)
     A = nx.laplacian_matrix(G)
     sigma = 2 - sqrt(2 + sqrt(2))
     ac = nx.algebraic_connectivity(G, tol=1e-12, method='tracemin')
     assert_almost_equal(ac, sigma)
     x = nx.fiedler_vector(G, tol=1e-12, method='tracemin')
     check_eigenvector(A, sigma, x)
Exemplo n.º 7
0
 def test_cycle(self):
     G = nx.cycle_graph(8)
     A = nx.laplacian_matrix(G)
     sigma = 2 - sqrt(2)
     for method in self._methods:
         ac = nx.algebraic_connectivity(G, tol=1e-12, method=method)
         assert_almost_equal(ac, sigma)
         x = nx.fiedler_vector(G, tol=1e-12, method=method)
         check_eigenvector(A, sigma, x)
 def test_cycle(self, method):
     pytest.importorskip("scipy")
     G = nx.cycle_graph(8)
     A = nx.laplacian_matrix(G)
     sigma = 2 - sqrt(2)
     ac = nx.algebraic_connectivity(G, tol=1e-12, method=method)
     assert almost_equal(ac, sigma)
     x = nx.fiedler_vector(G, tol=1e-12, method=method)
     check_eigenvector(A, sigma, x)
Exemplo n.º 9
0
 def test_two_nodes(self, method):
     pytest.importorskip("scipy")
     G = nx.Graph()
     G.add_edge(0, 1, weight=1)
     A = nx.laplacian_matrix(G)
     assert nx.algebraic_connectivity(
         G, tol=1e-12, method=method) == pytest.approx(2, abs=1e-7)
     x = nx.fiedler_vector(G, tol=1e-12, method=method)
     check_eigenvector(A, 2, x)
Exemplo n.º 10
0
 def test_abbreviation_of_method(self):
     pytest.importorskip("scipy")
     G = nx.path_graph(8)
     A = nx.laplacian_matrix(G)
     sigma = 2 - sqrt(2 + sqrt(2))
     ac = nx.algebraic_connectivity(G, tol=1e-12, method="tracemin")
     assert ac == pytest.approx(sigma, abs=1e-7)
     x = nx.fiedler_vector(G, tol=1e-12, method="tracemin")
     check_eigenvector(A, sigma, x)
Exemplo n.º 11
0
 def test_seed_argument(self, method):
     pytest.importorskip("scipy")
     G = nx.cycle_graph(8)
     A = nx.laplacian_matrix(G)
     sigma = 2 - sqrt(2)
     ac = nx.algebraic_connectivity(G, tol=1e-12, method=method, seed=1)
     assert ac == pytest.approx(sigma, abs=1e-7)
     x = nx.fiedler_vector(G, tol=1e-12, method=method, seed=1)
     check_eigenvector(A, sigma, x)
 def test_problematic_graph_issue_2381(self, method):
     G = nx.path_graph(4)
     G.add_edges_from([(4, 2), (5, 1)])
     A = nx.laplacian_matrix(G)
     sigma = 0.438447187191
     ac = nx.algebraic_connectivity(G, tol=1e-12, method=method)
     assert almost_equal(ac, sigma)
     x = nx.fiedler_vector(G, tol=1e-12, method=method)
     check_eigenvector(A, sigma, x)
Exemplo n.º 13
0
 def test_problematic_graph_issue_2381(self):
     G = nx.path_graph(4)
     G.add_edges_from([(4, 2), (5, 1)])
     A = nx.laplacian_matrix(G)
     sigma = 0.438447187191
     for method in self._methods:
         ac = nx.algebraic_connectivity(G, tol=1e-12, method=method)
         assert_almost_equal(ac, sigma)
         x = nx.fiedler_vector(G, tol=1e-12, method=method)
         check_eigenvector(A, sigma, x)
Exemplo n.º 14
0
 def test_problematic_graph_issue_2381(self, method):
     pytest.importorskip("scipy")
     G = nx.path_graph(4)
     G.add_edges_from([(4, 2), (5, 1)])
     A = nx.laplacian_matrix(G)
     sigma = 0.438447187191
     ac = nx.algebraic_connectivity(G, tol=1e-12, method=method)
     assert ac == pytest.approx(sigma, abs=1e-7)
     x = nx.fiedler_vector(G, tol=1e-12, method=method)
     check_eigenvector(A, sigma, x)
Exemplo n.º 15
0
 def test_two_nodes(self):
     G = nx.Graph()
     G.add_edge(0, 1, weight=1)
     A = nx.laplacian_matrix(G)
     for method in self._methods:
         assert_almost_equal(nx.algebraic_connectivity(
             G, tol=1e-12, method=method), 2)
         x = nx.fiedler_vector(G, tol=1e-12, method=method)
         check_eigenvector(A, 2, x)
     G = nx.MultiGraph()
     G.add_edge(0, 0, spam=1e8)
     G.add_edge(0, 1, spam=1)
     G.add_edge(0, 1, spam=-2)
     A = -3 * nx.laplacian_matrix(G, weight='spam')
     for method in self._methods:
         assert_almost_equal(nx.algebraic_connectivity(
             G, weight='spam', tol=1e-12, method=method), 6)
         x = nx.fiedler_vector(G, weight='spam', tol=1e-12, method=method)
         check_eigenvector(A, 6, x)
 def test_two_nodes_multigraph(self, method):
     G = nx.MultiGraph()
     G.add_edge(0, 0, spam=1e8)
     G.add_edge(0, 1, spam=1)
     G.add_edge(0, 1, spam=-2)
     A = -3 * nx.laplacian_matrix(G, weight="spam")
     assert almost_equal(
         nx.algebraic_connectivity(G, weight="spam", tol=1e-12, method=method), 6
     )
     x = nx.fiedler_vector(G, weight="spam", tol=1e-12, method=method)
     check_eigenvector(A, 6, x)
 def find_communities(self):
     dolphin_count = len(self._nodes)
     fiedler_vector = nx.fiedler_vector(self._graph)
     X_data = np.array(fiedler_vector).reshape((len(fiedler_vector), 1))
     kmeans = KMeans(n_clusters=2).fit(X_data)
     negative_cluster = [i for i in range(len(kmeans.labels_)) if kmeans.labels_[i] == 0]
     positive_cluster = [i for i in range(len(kmeans.labels_)) if kmeans.labels_[i] == 1]
     assert len(negative_cluster) + len(positive_cluster) == dolphin_count
     positive_dolphins = [self._nodes[i] for i in positive_cluster]
     negative_dolphins = [self._nodes[i] for i in negative_cluster]
     return positive_dolphins, negative_dolphins
Exemplo n.º 18
0
def ComputeFiedlerVector(G):
    """
    Given a graph adjacency matrix, return a Fielder vector.
    """
    # TODO: implement a case where it converts to a networkx graph if G is a numpy array
    if type(G) == type(np.ndarray((1, 1, 1), dtype=float)):
        G = nx.to_networkx_graph(G)

    v = nx.fiedler_vector(G)

    return v
def spectral_partitioning(G):
    labels = []
    fiedler = nx.fiedler_vector(G, method='lanczos')
    median = np.median(fiedler)
    labels = []
    for i in range(len(fiedler)):
        if (fiedler[i] < median):
            labels.append(-1)
        else:
            labels.append(1)
    #print("labels:")
    #print(labels)

    return labels
Exemplo n.º 20
0
 def test_two_nodes_multigraph(self, method):
     pytest.importorskip("scipy")
     G = nx.MultiGraph()
     G.add_edge(0, 0, spam=1e8)
     G.add_edge(0, 1, spam=1)
     G.add_edge(0, 1, spam=-2)
     A = -3 * nx.laplacian_matrix(G, weight="spam")
     assert nx.algebraic_connectivity(G,
                                      weight="spam",
                                      tol=1e-12,
                                      method=method) == pytest.approx(
                                          6, abs=1e-7)
     x = nx.fiedler_vector(G, weight="spam", tol=1e-12, method=method)
     check_eigenvector(A, 6, x)
 def test_buckminsterfullerene(self):
     G = nx.Graph([
         (1, 10), (1, 41), (1, 59), (2, 12), (2, 42), (2, 60), (3, 6),
         (3, 43), (3, 57), (4, 8), (4, 44), (4, 58), (5, 13), (5, 56),
         (5, 57), (6, 10), (6, 31), (7, 14), (7, 56), (7, 58), (8, 12),
         (8, 32), (9, 23), (9, 53), (9, 59), (10, 15), (11, 24), (11, 53),
         (11, 60), (12, 16), (13, 14), (13, 25), (14, 26), (15, 27),
         (15, 49), (16, 28), (16, 50), (17, 18), (17, 19), (17, 54), (18,
                                                                      20),
         (18, 55), (19, 23), (19, 41), (20, 24), (20, 42), (21, 31), (21,
                                                                      33),
         (21, 57), (22, 32), (22, 34), (22, 58), (23, 24), (25, 35), (25,
                                                                      43),
         (26, 36), (26, 44), (27, 51), (27, 59), (28, 52), (28, 60), (29,
                                                                      33),
         (29, 34), (29, 56), (30, 51), (30, 52), (30, 53), (31, 47), (32,
                                                                      48),
         (33, 45), (34, 46), (35, 36), (35, 37), (36, 38), (37, 39), (37,
                                                                      49),
         (38, 40), (38, 50), (39, 40), (39, 51), (40, 52), (41, 47),
         (42, 48), (43, 49), (44, 50), (45, 46), (45, 54), (46, 55),
         (47, 54), (48, 55)
     ])
     for normalized in (False, True):
         if not normalized:
             A = nx.laplacian_matrix(G)
             sigma = 0.2434017461399311
         else:
             A = nx.normalized_laplacian_matrix(G)
             sigma = 0.08113391537997749
         for method in methods:
             try:
                 assert almost_equal(
                     nx.algebraic_connectivity(G,
                                               normalized=normalized,
                                               tol=1e-12,
                                               method=method), sigma)
                 x = nx.fiedler_vector(G,
                                       normalized=normalized,
                                       tol=1e-12,
                                       method=method)
                 check_eigenvector(A, sigma, x)
             except nx.NetworkXError as e:
                 if e.args not in (('Cholesky solver unavailable.', ),
                                   ('LU solver unavailable.', )):
                     raise
def normalized_cut(G):
#	C = laplacian_complete(L.shape[0])
#	isqrtC = sqrtmi(C)
#	M = scipy.sparse.csr_matrix.dot(scipy.sparse.csr_matrix.dot(isqrtC, L), isqrtC)
#	(eigvals, eigvecs) = scipy.linalg.eigh(M,eigvals=(1,1))
#	print(L.todense())
	
	Gcc=sorted(networkx.connected_component_subgraphs(G), key = len, reverse=True)
	G0=Gcc[0]

	if networkx.number_of_nodes(G) == networkx.number_of_nodes(G0):
		x = networkx.fiedler_vector(G, method='lobpcg',tol=1e-5)

		x = sweep(x, G)
	else:
		x = separate_lcc(G, G0)


	return numpy.array(x)
Exemplo n.º 23
0
def ratio_cut(G):
    """
        Computes ratio-cut of G based on second eigenvector of the Laplacian.
        Input:
            * G: Graph
        Output:
            * x: Indicator vector
    """

    Gcc = sorted(nx.connected_component_subgraphs(G), key=len, reverse=True)
    G0 = Gcc[0]

    if nx.number_of_nodes(G) == nx.number_of_nodes(G0):
        scipy.random.seed(1)
        x = nx.fiedler_vector(G, method=_method, tol=1e-5)
        x = sweep(x, G)
    else:
        # In case G is not connected
        x = separate_lcc(G, G0)
    return np.array(x)
Exemplo n.º 24
0
def normalized_cut(G):
    #	C = laplacian_complete(L.shape[0])
    #	isqrtC = sqrtmi(C)
    #	M = scipy.sparse.csr_matrix.dot(scipy.sparse.csr_matrix.dot(isqrtC, L), isqrtC)
    #	(eigvals, eigvecs) = scipy.linalg.eigh(M,eigvals=(1,1))
    #	print(L.todense())

    Gcc = sorted(networkx.connected_component_subgraphs(G),
                 key=len,
                 reverse=True)
    G0 = Gcc[0]

    if networkx.number_of_nodes(G) == networkx.number_of_nodes(G0):
        x = networkx.fiedler_vector(G, method='lobpcg', tol=1e-5)

        x = sweep(x, G)
    else:
        x = separate_lcc(G, G0)

    return numpy.array(x)
Exemplo n.º 25
0
def fiedler_vector(g, _weight, _normalized=False, _out_path=None):
    try:
        start = time.time()

        fv = nx.fiedler_vector(g, weight=_weight, normalized=_normalized)
        node_list = list(g.nodes)
        data = {}
        for i in range(len(node_list)):
            data[node_list[i]] = fv[i]

        ret = {'name': 'fiedler', 'normalized': _normalized, 'weight': _weight, 'num_of_nodes': g.number_of_nodes(),
               'num_of_edges': g.number_of_edges(), 'process_time': (time.time() - start),
               'data': __normalize_filter(data), 'ranked': __rank_filter(data)}

        if _out_path is not None:
            graph_io.write_json_data(_out_path, ret)

        return ret

    except nx.exception.NetworkXError:
        print(">>> FAILED (fiedler_vector): processing error. maybe graph not connected?")

    return None
Exemplo n.º 26
0
def adj_mat():
    A = nx.from_numpy_matrix(np.array([
                                        [0, 1, 1, 0, 0, 1, 0, 0, 1, 1],
                                        [1, 0, 1, 0, 0, 0, 0, 0, 0, 0],
                                        [1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
                                        [0, 0, 0, 0, 1, 1, 0, 0, 0, 0],
                                        [0, 0, 0, 1, 0, 1, 0, 0, 0, 0],
                                        [1, 0, 0, 1, 1, 0, 1, 1, 0, 0],
                                        [0, 0, 0, 0, 0, 1, 0, 1, 0, 0],
                                        [0, 0, 0, 0, 0, 1, 1, 0, 0, 0],
                                        [1, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                                        [1, 0, 0, 0, 0, 0, 0, 0, 1, 0]]))
    fiedler_vector = nx.fiedler_vector(A)
    LG = nx.laplacian_matrix(A)
    print(np.linalg.eigvals(LG.toarray()))
    print(fiedler_vector)
    partition = ([], [])
    for i in range(len(fiedler_vector)):
        if fiedler_vector[i] > 0:
            partition[1].append(i)
        else:
            partition[0].append(i)
    plot_partition(A, partition)
Exemplo n.º 27
0
 def test_buckminsterfullerene(self):
     G = nx.Graph(
         [(1, 10), (1, 41), (1, 59), (2, 12), (2, 42), (2, 60), (3, 6),
          (3, 43), (3, 57), (4, 8), (4, 44), (4, 58), (5, 13), (5, 56),
          (5, 57), (6, 10), (6, 31), (7, 14), (7, 56), (7, 58), (8, 12),
          (8, 32), (9, 23), (9, 53), (9, 59), (10, 15), (11, 24), (11, 53),
          (11, 60), (12, 16), (13, 14), (13, 25), (14, 26), (15, 27),
          (15, 49), (16, 28), (16, 50), (17, 18), (17, 19), (17, 54),
          (18, 20), (18, 55), (19, 23), (19, 41), (20, 24), (20, 42),
          (21, 31), (21, 33), (21, 57), (22, 32), (22, 34), (22, 58),
          (23, 24), (25, 35), (25, 43), (26, 36), (26, 44), (27, 51),
          (27, 59), (28, 52), (28, 60), (29, 33), (29, 34), (29, 56),
          (30, 51), (30, 52), (30, 53), (31, 47), (32, 48), (33, 45),
          (34, 46), (35, 36), (35, 37), (36, 38), (37, 39), (37, 49),
          (38, 40), (38, 50), (39, 40), (39, 51), (40, 52), (41, 47),
          (42, 48), (43, 49), (44, 50), (45, 46), (45, 54), (46, 55),
          (47, 54), (48, 55)])
     for normalized in (False, True):
         if not normalized:
             A = nx.laplacian_matrix(G)
             sigma = 0.2434017461399311
         else:
             A = nx.normalized_laplacian_matrix(G)
             sigma = 0.08113391537997749
         for method in methods:
             try:
                 assert_almost_equal(nx.algebraic_connectivity(
                     G, normalized=normalized, tol=1e-12, method=method),
                     sigma)
                 x = nx.fiedler_vector(G, normalized=normalized, tol=1e-12,
                                       method=method)
                 check_eigenvector(A, sigma, x)
             except nx.NetworkXError as e:
                 if e.args not in (('Cholesky solver unavailable.',),
                                   ('LU solver unavailable.',)):
                     raise
Exemplo n.º 28
0
 def test_buckminsterfullerene(self, normalized, sigma, laplacian_fn,
                               method):
     pytest.importorskip("scipy")
     G = nx.Graph([
         (1, 10),
         (1, 41),
         (1, 59),
         (2, 12),
         (2, 42),
         (2, 60),
         (3, 6),
         (3, 43),
         (3, 57),
         (4, 8),
         (4, 44),
         (4, 58),
         (5, 13),
         (5, 56),
         (5, 57),
         (6, 10),
         (6, 31),
         (7, 14),
         (7, 56),
         (7, 58),
         (8, 12),
         (8, 32),
         (9, 23),
         (9, 53),
         (9, 59),
         (10, 15),
         (11, 24),
         (11, 53),
         (11, 60),
         (12, 16),
         (13, 14),
         (13, 25),
         (14, 26),
         (15, 27),
         (15, 49),
         (16, 28),
         (16, 50),
         (17, 18),
         (17, 19),
         (17, 54),
         (18, 20),
         (18, 55),
         (19, 23),
         (19, 41),
         (20, 24),
         (20, 42),
         (21, 31),
         (21, 33),
         (21, 57),
         (22, 32),
         (22, 34),
         (22, 58),
         (23, 24),
         (25, 35),
         (25, 43),
         (26, 36),
         (26, 44),
         (27, 51),
         (27, 59),
         (28, 52),
         (28, 60),
         (29, 33),
         (29, 34),
         (29, 56),
         (30, 51),
         (30, 52),
         (30, 53),
         (31, 47),
         (32, 48),
         (33, 45),
         (34, 46),
         (35, 36),
         (35, 37),
         (36, 38),
         (37, 39),
         (37, 49),
         (38, 40),
         (38, 50),
         (39, 40),
         (39, 51),
         (40, 52),
         (41, 47),
         (42, 48),
         (43, 49),
         (44, 50),
         (45, 46),
         (45, 54),
         (46, 55),
         (47, 54),
         (48, 55),
     ])
     A = laplacian_fn(G)
     try:
         assert nx.algebraic_connectivity(G,
                                          normalized=normalized,
                                          tol=1e-12,
                                          method=method) == pytest.approx(
                                              sigma, abs=1e-7)
         x = nx.fiedler_vector(G,
                               normalized=normalized,
                               tol=1e-12,
                               method=method)
         check_eigenvector(A, sigma, x)
     except nx.NetworkXError as err:
         if err.args not in (
             ("Cholesky solver unavailable.", ),
             ("LU solver unavailable.", ),
         ):
             raise
Exemplo n.º 29
0
def test_fiedler_vector_tracemin_chol():
    """Test that "tracemin_chol" raises an exception."""
    pytest.importorskip("scipy")
    G = nx.barbell_graph(5, 4)
    with pytest.raises(nx.NetworkXError):
        nx.fiedler_vector(G, method="tracemin_chol")
Exemplo n.º 30
0
def completed_graph(inchikey):


    #graphs generated using PIBAS dataset, UniChem and Bio2RDF
    G6 = nx.Graph()
    G8 = nx.Graph()


    #we start from PIBAS local ontology and  for a given InChiKey we found compound acronym 


    sparql = SPARQLWrapper("http://cpctas-lcmb.pmf.kg.ac.rs:2020/sparql")


    url = 'https://www.ebi.ac.uk/unichem/rest/inchikey/'+inchikey
    #print url
    j=0
    storage = StringIO()
    c = pycurl.Curl()
    c.setopt(c.URL, url)
    c.setopt(c.WRITEFUNCTION, storage.write)
    c.perform()
    c.close()
    content = storage.getvalue()
    unichem_content = json.loads(content)
    for rs in unichem_content: 
     src_compound_id= rs['src_compound_id']
     url = 'https://www.ebi.ac.uk/unichem/rest/sources/'+rs['src_id']
     storage = StringIO()
     c = pycurl.Curl()
     c.setopt(c.URL, url)
     c.setopt(c.WRITEFUNCTION, storage.write)
     c.perform()
     c.close()
     content = storage.getvalue()
     unichem_src_name = json.loads(content)
     for rs in unichem_src_name:
      name=rs['name']
      #print name
      G6.add_node(j,name_label=name+'/'+src_compound_id)
      j=j+1


    
    bio2rdf_dataset=['bindingdb','pubchem','pharmgkb','chebi','kegg_ligand','pdb','drugbank','chembl','pibas','ndc']

    num1=G6.number_of_nodes()
    nodes1=G6.nodes()
    for x in range(0, num1):
     if((G6.node[x]['name_label']).split('/')[0] in bio2rdf_dataset):
      
      sparql = SPARQLWrapper("http://cpctas-lcmb.pmf.kg.ac.rs:2020/sparql")
      if((G6.node[x]['name_label']).split('/')[0]=='kegg_ligand'):
       sparql.setQuery(
       """  
        PREFIX bindingdb: <http://bio2rdf.org/bindingdb:>
        PREFIX pubchem: <http://bio2rdf.org/pubchem:>
        PREFIX pharmgkb: <http://bio2rdf.org/pharmgkb:>
        PREFIX chebi: <http://bio2rdf.org/chebi:>
        PREFIX kegg_ligand: <http://bio2rdf.org/kegg:>
        PREFIX pdb: <http://bio2rdf.org/pdb:>
        PREFIX drugbank: <http://bio2rdf.org/drugbank:>
        PREFIX chembl: <http://bio2rdf.org/chembl:> 
        PREFIX ndc: <http://bio2rdf.org/ndc:> 
        select ?p ?o
        WHERE
        {

         SERVICE SILENT<http://kegg.bio2rdf.org/sparql> { 
         OPTIONAL{
          %s:%s ?p ?o.
         FILTER(CONTAINS(str(?p),"http://bio2rdf.org/kegg_vocabulary:x-drugbank") || CONTAINS(str(?p),"http://bio2rdf.org/kegg_vocabulary:x-pubchem.compound") || CONTAINS(str(?p),"http://bio2rdf.org/kegg_vocabulary:x-kegg")  || CONTAINS(str(?p),"http://bio2rdf.org/kegg_vocabulary:x-chembl") || CONTAINS(str(?p),"http://bio2rdf.org/kegg_vocabulary:x-chebi") || CONTAINS(str(?p),"http://bio2rdf.org/kegg_vocabulary:same-as")).
        }
        }

        }

       """ % (((G6.node[x]['name_label']).split('/')[0]),((G6.node[x]['name_label']).split('/')[1])))

      else:
       
       sparql.setQuery(
       """  
        PREFIX bindingdb: <http://bio2rdf.org/bindingdb:>
        PREFIX pubchem: <http://bio2rdf.org/pubchem:>
        PREFIX pharmgkb: <http://bio2rdf.org/pharmgkb:>
        PREFIX chebi: <http://bio2rdf.org/chebi:>
        PREFIX kegg_ligand: <http://bio2rdf.org/kegg:>
        PREFIX pdb: <http://bio2rdf.org/pdb:>
        PREFIX drugbank: <http://bio2rdf.org/drugbank:>
        PREFIX chembl: <http://bio2rdf.org/chembl:>
        PREFIX ndc: <http://bio2rdf.org/ndc:> 
        select ?p ?o
        WHERE
        {

         SERVICE SILENT<http://%s.bio2rdf.org/sparql> { 
         OPTIONAL{
          %s:%s ?p ?o.
         FILTER(CONTAINS(str(?p),"http://bio2rdf.org/%s_vocabulary:x-drugbank") || CONTAINS(str(?p),"http://bio2rdf.org/%s_vocabulary:x-pubchem.compound") || CONTAINS(str(?p),"http://bio2rdf.org/%s_vocabulary:x-kegg")  || CONTAINS(str(?p),"http://bio2rdf.org/%s_vocabulary:x-chembl") || CONTAINS(str(?p),"http://bio2rdf.org/%s_vocabulary:x-chebi") || CONTAINS(str(?p),"http://bio2rdf.org/%s_vocabulary:x-ndc") || CONTAINS(str(?p),"http://bio2rdf.org/%s_vocabulary:same-as")).
        }
        }

        }

       """ % (((G6.node[x]['name_label']).split('/')[0]),((G6.node[x]['name_label']).split('/')[0]),((G6.node[x]['name_label']).split('/')[1]),((G6.node[x]['name_label']).split('/')[0]),((G6.node[x]['name_label']).split('/')[0]),((G6.node[x]['name_label']).split('/')[0]),((G6.node[x]['name_label']).split('/')[0]),((G6.node[x]['name_label']).split('/')[0]),((G6.node[x]['name_label']).split('/')[0]),((G6.node[x]['name_label']).split('/')[0])))

      sparql.setReturnFormat(JSON)
      final_results1 = sparql.query().convert()
      try:
       v=G6.number_of_nodes()
       for result1 in final_results1["results"]["bindings"]:
        if(result1["o"]["value"]!=""):  
         node_for_add=result1["o"]["value"] 
         node_of_second_level=node_for_add.split("/")[-1]
         if(node_of_second_level.split(':')[0]=='kegg'):
          node_for_add='kegg_ligand/'+node_of_second_level.split(':')[1]
         else:
          node_for_add=node_of_second_level.split(':')[0]+'/'+node_of_second_level.split(':')[1]
          #print node_for_add
         compare_node=[]
         for x in range(0,G6.number_of_nodes()):
           compare_node.append(str((G6.node[x]['name_label']).split('/')[0])+'/'+str((G6.node[x]['name_label']).split('/')[1]))

         if(node_for_add not in compare_node):
          G6.add_node(v,name_label=node_for_add,predicate=result1["p"]["value"])
          v=v+1
          #print v
      except:     
        continue



    nodes=G6.nodes()

    #print nodes

    edges = combinations(nodes, 2)
    G6.add_nodes_from(nodes)
    G6.add_edges_from(edges)

    num1=G6.number_of_nodes()
    nodes1=G6.nodes()
    left_graph_for_remove=[]
    for x in range(0, num1):
     if(((G6.node[x]['name_label']).split('/')[0] not in bio2rdf_dataset)):
      G6.remove_node(x)

    nodes1=G6.nodes()
    #print nodes1
    custom_labels1={}
    for x in nodes1:
     custom_labels1[x] = str(x)+':'+G6.node[x]['name_label']
   
 
    
    #create and draw graph G6
   
    pos=nx.circular_layout(G6,dim=2, scale=100)
    plt.clf()
    nx.draw(G6, labels=custom_labels1, with_labels=True)
    plt.savefig('/var/www/specint.org/public_html/specint/img/completed_graph_bio2rdf_'+inchikey+'.png')


    #******************Using Chem2Bio2RDF***********************************************************************************

    sparql = SPARQLWrapper("http://cpctas-lcmb.pmf.kg.ac.rs:2020/sparql")
    G7 = nx.Graph()
    
    index_for_new_graph=max(G6.nodes())
    url = 'https://www.ebi.ac.uk/unichem/rest/inchikey/'+inchikey
    #print url
    j=index_for_new_graph+1
    storage = StringIO()
    c = pycurl.Curl()
    c.setopt(c.URL, url)
    c.setopt(c.WRITEFUNCTION, storage.write)
    c.perform()
    c.close()
    content = storage.getvalue()
    unichem_content = json.loads(content)
    for rs in unichem_content: 
     src_compound_id= rs['src_compound_id']
     url = 'https://www.ebi.ac.uk/unichem/rest/sources/'+rs['src_id']
     storage = StringIO()
     c = pycurl.Curl()
     c.setopt(c.URL, url)
     c.setopt(c.WRITEFUNCTION, storage.write)
     c.perform()
     c.close()
     content = storage.getvalue()
     unichem_src_name = json.loads(content)
     for rs in unichem_src_name:
      name=rs['name']
      G7.add_node(j,name_label=name+'/'+src_compound_id)
      j=j+1


    chem2bio2rdf_dataset=['bindingdb','pubchem','chebi','kegg_ligand','kegg','pdb','drugbank','chembl','uniprot','matador','ctd','dcdb','hgnc','pharmgkb','hprd']


    num1=G7.number_of_nodes()
    nodes1=G7.nodes()
    for x in range(index_for_new_graph+1,index_for_new_graph+num1):
     if(((G7.node[x]['name_label']).split('/')[0] in bio2rdf_dataset) and ("chembl" not in (G7.node[x]['name_label']).split('/')[0])):
      sparql = SPARQLWrapper("http://cpctas-lcmb.pmf.kg.ac.rs:2020/sparql")
      if((G7.node[x]['name_label']).split('/')[0]=='kegg_ligand'):
       sparql.setQuery(
       """  
        PREFIX bindingdb: <http://chem2bio2rdf.org/bindingdb/resource/>
        PREFIX pubchem: <http://chem2bio2rdf.org/pubchem/resource/>
        PREFIX uniprot: <http://chem2bio2rdf.org/uniprot/resource/>
        PREFIX chebi: <http://chem2bio2rdf.org/chebi/resource/chebi/CHEBI~>
        PREFIX kegg_ligand: <http://chem2bio2rdf.org/kegg/resource/kegg_ligand/>
        PREFIX pdb: <http://chem2bio2rdf.org/pdb/resource/pdb_ligand/>
        PREFIX drugbank: <http://chem2bio2rdf.org/drugbank/resource/>
        PREFIX matador: <http://chem2bio2rdf.org/matador/resource/>
        PREFIX chembl: <http://chem2bio2rdf.org/chembl/resource/> 
        PREFIX uniprot: <http://chem2bio2rdf.org/uniprot/resource/>
        PREFIX db: <http://chem2bio2rdf.org/kegg/resource/>
        select ?o
        WHERE
        {

         SERVICE SILENT<http://cheminfov.informatics.indiana.edu:8080/kegg/sparql> { 
         OPTIONAL{
          %s:%s db:CID ?o.

            }

        }

        }

       """ % (((G7.node[x]['name_label']).split('/')[0]),((G7.node[x]['name_label']).split('/')[1])))
      elif((G7.node[x]['name_label']).split('/')[0]=='drugbank'):
       sparql.setQuery(
       """
        PREFIX db:<http://chem2bio2rdf.org/drugbank/resource/>
        PREFIX drugbank:<http://chem2bio2rdf.org/drugbank/resource/drugbank_drug/>
        select ?o
        WHERE
        {

         
         SERVICE SILENT<http://147.91.203.161:8890/sparql>{
          drugbank:%s db:CID ?o.

            }

        }
       """ % (G7.node[x]['name_label']).split('/')[1])
      else:
       sparql.setQuery(
       """  
        PREFIX bindingdb: <http://bio2rdf.org/bindingdb:>
        PREFIX pubchem: <http://bio2rdf.org/pubchem:>
        PREFIX pharmgkb: <http://bio2rdf.org/pharmgkb:>
        PREFIX chebi: <http://bio2rdf.org/chebi:>
        PREFIX kegg_ligand: <http://bio2rdf.org/kegg:>
        PREFIX pdb: <http://bio2rdf.org/pdb:>
        PREFIX drugbank: <http://bio2rdf.org/drugbank:>
        PREFIX chembl: <http://bio2rdf.org/chembl:>
        PREFIX ndc: <http://bio2rdf.org/ndc:> 
        PREFIX db: <http://chem2bio2rdf.org/%s/resource/>
        select ?o
        WHERE
        {

         SERVICE SILENT<http://cheminfov.informatics.indiana.edu:8080/%s/sparql> { 
         OPTIONAL{
          %s:%s db:CID ?o.
            }
        }

        }

       """ % (((G7.node[x]['name_label']).split('/')[0]),((G7.node[x]['name_label']).split('/')[0]),((G7.node[x]['name_label']).split('/')[0]),((G7.node[x]['name_label']).split('/')[1])))

      sparql.setReturnFormat(JSON)
      final_results1 = sparql.query().convert()
      try:
       v=G7.number_of_nodes()
       for result1 in final_results1["results"]["bindings"]:
        if(result1["o"]["value"]!=""):  
         node_for_add=result1["o"]["value"] 
         node_of_second_level=node_for_add.split("/")[-1]
         if(node_of_second_level.split(':')[0]=='kegg'):
          node_for_add='kegg_ligand/'+node_of_second_level.split(':')[1]
         else:
          node_for_add=node_of_second_level.split(':')[0]+'/'+node_of_second_level.split(':')[1]
          #print node_for_add
         compare_node=[]
         for x in range(index_for_new_graph+1,index_for_new_graph+num1):
           compare_node.append(str((G7.node[x]['name_label']).split('/')[0])+'/'+str((G7.node[x]['name_label']).split('/')[1]))
         if(node_for_add not in compare_node):
          G7.add_node(v,name_label=node_for_add,predicate=result1["p"]["value"])
          v=v+1
          #print v
      except:     
        continue



    nodes=G7.nodes()

    #print nodes

    edges = combinations(nodes, 2)
    G7.add_nodes_from(nodes)
    G7.add_edges_from(edges)

    num1=G7.number_of_nodes()
    nodes1=G7.nodes()
    left_graph_for_remove=[]
    for x in nodes1:
     if(((G7.node[x]['name_label']).split('/')[0] not in chem2bio2rdf_dataset)):
      G7.remove_node(x)


    nodes1=G7.nodes()
    custom_labels1={}
    for x in nodes1:
     custom_labels1[x] = str(x)+':'+G7.node[x]['name_label']


    #create and draw graph G7
 
    pos=nx.circular_layout(G7,dim=2, scale=100)
    plt.clf()
    nx.draw(G7, labels=custom_labels1, with_labels=True)
    plt.savefig('/var/www/specint.org/public_html/specint/img/completed_graph_chem2bio2rdf_'+inchikey+'.png')



    #*************Join grpahs*************************
    G10=nx.union(G6,G7)

    nodes1=G10.nodes()
    custom_labels1={}
    for x in nodes1:
     custom_labels1[x] = str(x)+':'+G10.node[x]['name_label']

    #create and draw graph G10
    pos=nx.circular_layout(G10,dim=2, scale=300)
    plt.clf()
    nx.draw(G10, labels=custom_labels1, with_labels=True)
    plt.savefig('/var/www/specint.org/public_html/specint/img/completed_graph_'+inchikey+'.eps',format='eps', dpi=300)
    plt.savefig('/var/www/specint.org/public_html/specint/img/completed_graph_'+inchikey+'.png')
    

    #************************Removing node************************
    nodes_for_connection=[]
    nodes_value_for_connection=[]

    values_for_connected_nodes=['pubchem']
    #for x in values_for_connected_nodes:
     #values_for_connected_nodes.remove(x)
   
    #print values_for_connected_nodes
    for x in values_for_connected_nodes:
        #connection_node=random.choice(values_for_connected_nodes)
        connection_node=x
        #print connection_node

        nodes5=G10.nodes()
		
        for x in nodes5:
         if((G10.node[x]['name_label']).split('/')[0]==connection_node):
          nodes_for_connection.append(x)
          nodes_value_for_connection.append((G10.node[x]['name_label']).split('/')[1])


        #print nodes_for_connection

        if(len(nodes_for_connection)>=2):
            my_array_for_node_connected=[]
            for x in range(0,len(nodes_value_for_connection)-1):
              for y in range(x+1,len(nodes_value_for_connection)):
                if(nodes_value_for_connection[x]==nodes_value_for_connection[y]):
                 my_array_for_node_connected.append(nodes_for_connection[x])
                 my_array_for_node_connected.append(nodes_for_connection[y])

            #print my_array_for_node_connected
            #print max(my_array_for_node_connected)
            #print min(my_array_for_node_connected)

            G10.remove_node(max(my_array_for_node_connected))

            for_delete=G10.nodes()
            #print for_delete

            position=(G10.nodes()).index(min(my_array_for_node_connected))
            #print position

            for x in for_delete:
             if((x>min(my_array_for_node_connected)) and (x not in G6.nodes())):
              G10.add_edge(min(my_array_for_node_connected),x)

            nodes1=G10.nodes()
            custom_labels1={}
            for x in nodes1:
             custom_labels1[x] = str(x)+':'+G10.node[x]['name_label']
            #create and draw graph G10
            pos=nx.circular_layout(G10,dim=2, scale=300)
            plt.clf()
            nx.draw(G10,  labels=custom_labels1, with_labels=True)
            plt.savefig('/var/www/specint.org/public_html/specint/img/completed_graph_'+inchikey+'.eps',format='eps', dpi=300)
            plt.savefig('/var/www/specint.org/public_html/specint/img/completed_graph_'+inchikey+'.png')
            #H = nx.Graph()
            #H.add_nodes_from(G10.nodes())
            #H.add_edges_from(G10.edges())			
            image='myimage_for_completed_graph.png'
            fv=nx.fiedler_vector(G10,method='lobpcg')
            create_python_file(G10.nodes(),G10.edges(), inchikey)
            
            return {'fv':fv, 'connection_node':connection_node}
            break


        else:
         #return completed_graph(inchikey)
         #continue
         return {'fv':'Undirected graph is not connected! Please, try again!'}


#completed_graph("IHUNBGSDBOWDMA-AQFIFDHZSA-N")
Exemplo n.º 31
0
def approx_min_conductance_partitioning(g: LightMultiGraph, max_k=1):
    """
    Approximate minimum conductance partinioning. I'm using the median method as referenced here:
    http://www.ieor.berkeley.edu/~goldberg/pubs/krishnan-recsys-final2.pdf
    :param g: graph to recursively partition
    :param max_k: upper bound of number of nodes allowed in the leaves
    :return: a dendrogram
    """
    lvl = []
    node_list = list(g.nodes())
    if len(node_list) <= max_k:
        assert len(node_list) > 0
        return node_list

    if not nx.is_connected(g):
        for p in nx.connected_component_subgraphs(g):
            lvl.append(approx_min_conductance_partitioning(p, max_k))
        assert len(lvl) > 0
        return lvl

    assert nx.is_connected(g), "g is not connected in cond"

    fiedler_vector = nx.fiedler_vector(g, method='lanczos')

    p1, p2 = set(), set()

    fiedler_dict = {}
    for idx, n in enumerate(fiedler_vector):
        fiedler_dict[idx] = n
    fiedler_vector = [
        (k, fiedler_dict[k])
        for k in sorted(fiedler_dict, key=fiedler_dict.get, reverse=True)
    ]
    half_idx = len(fiedler_vector) // 2  # floor division

    for idx, _ in fiedler_vector:
        if half_idx > 0:
            p1.add(node_list[idx])
        else:
            p2.add(node_list[idx])
        half_idx -= 1  # decrement so halfway through it crosses 0 and puts into p2

    sg1 = g.subgraph(p1)
    sg2 = g.subgraph(p2)

    iter_count = 0
    while not (nx.is_connected(sg1) and nx.is_connected(sg2)):
        sg1 = g.subgraph(p1)
        sg2 = g.subgraph(p2)

        # Hack to check and fix non connected subgraphs
        if not nx.is_connected(sg1):
            for sg in sorted(nx.connected_component_subgraphs(sg1),
                             key=len,
                             reverse=True)[1:]:
                p2.update(sg.nodes())
                for n in sg.nodes():
                    p1.remove(n)

            sg2 = g.subgraph(p2)  # updating sg2 since p2 has changed

        if not nx.is_connected(sg2):
            for sg in sorted(nx.connected_component_subgraphs(sg2),
                             key=len,
                             reverse=True)[1:]:
                p1.update(sg.nodes())
                for n in sg.nodes():
                    p2.remove(n)

        iter_count += 1

    if iter_count > 2:
        print('it took {} iterations to stabilize'.format(iter_count))

    assert nx.is_connected(sg1) and nx.is_connected(
        sg2), "subgraphs are not connected in cond"

    lvl.append(approx_min_conductance_partitioning(sg1, max_k))
    lvl.append(approx_min_conductance_partitioning(sg2, max_k))

    assert (len(lvl) > 0)
    return lvl
def sccf_helper(seed_num, graph=None, graph_json_filename=None, graph_json_str=None):

  # parse the graph
  G = None
  if graph is not None:
    G = graph
  elif graph_json_filename is not None:
    G = util.load_graph(graph_json_filename=graph_json_filename)
  else:
    G = util.load_graph(graph_json_str=graph_json_str)

  # initialize queue for subgraphs
  # try to get about 2 nodes in each cluster
  node_per_cluster = 2
  max_depth = int(math.ceil(np.log2(seed_num / node_per_cluster))) + 1

  cluster_queue = Queue.Queue()
  cluster_queue.put(G)

  # divide graph into 2**max_depth clusters
  while (cluster_queue.qsize() < 2**max_depth ):
    G_curr = cluster_queue.get()
    # work only on the largest connected component
    G_curr_c = max(nx.connected_component_subgraphs(G_curr), key=len)
    if (G_curr_c.size() < 2 * node_per_cluster):
      # put it back if cluster is too small
      cluster_queue.put(G_curr_c)
      continue
    # get fiedler vector
    fiedler_vector = nx.fiedler_vector(G_curr_c, normalized=True, tol=1e-01)
    node_list_sub_1 = []
    node_list_sub_2 = []
    node_list = G_curr_c.nodes()
    # split positive and negative terms in fielder vector
    for i in range(len(fiedler_vector)):
      if (fiedler_vector[i] >= 0):
        node_list_sub_1.append(node_list[i])
      else:
        node_list_sub_2.append(node_list[i])
    # seperate the graph into two subgraphs
    if (len(node_list_sub_1) >= node_per_cluster):
      # ignore clusters too small
      G_sub_1 = G_curr_c.subgraph(node_list_sub_1)
      cluster_queue.put(G_sub_1)
    if (len(node_list_sub_2) >= node_per_cluster):
      # ignore clusters too small
      G_sub_2 = G_curr_c.subgraph(node_list_sub_2)
      cluster_queue.put(G_sub_2)

  # get node_per_cluster highest degree nodes from each cluster
  candidate_nodes = []
  candidate_neighbors = {}
  while not (cluster_queue.empty()):
    G_curr = cluster_queue.get()
    # measure used to pick node with in clusters
    degree_dict = nx.closeness_centrality(G_curr)
    node_keys = sorted(degree_dict, key=degree_dict.get, 
                                    reverse=True)[:node_per_cluster]
    for i in node_keys:
      # append i and a neighbor of i
      if (i not in candidate_nodes):
        candidate_nodes.append(i)
        candidate_neighbors[i] = list(nx.all_neighbors(G, i))
      
  # return candidate nodes and neighbors
  return candidate_nodes, candidate_neighbors
Exemplo n.º 33
0
        ursi = graph_filename[4:13]
        ursis.append(ursi)

        try:
            y[i, :] = np.hstack(
                (df1.loc[ursi, 'CCI'], df1.loc[ursi, :].iloc[4:].as_matrix()))

            graph_data = np.load(root_dir + graphtype + '/' + graph_filename)
            # print('graph shape:', graph_data.shape)

            g = nx.Graph(graph_data)

            laplacian = nx.laplacian_matrix(g)
            spectrum = nx.laplacian_spectrum(g)
            connectivity = nx.fiedler_vector(g)

            communities = nx.k_clique_communities(g, 5)

            print('Laplacian shape:', laplacian.shape)
            print('Spectrum shape:', spectrum.shape)

            print('Connectivity:', connectivity)
            print('Communities:', communities.shape)

            # feature extraction
            rich_coeff_at_degree = rich_club_coefficient(g, normalized=False)
            rich_keys = list(rich_coeff_at_degree.keys())
            rich_vals = list(rich_coeff_at_degree.values())

            rich_hist, bin_edges = np.histogram(rich_vals, n_roi // 10)
Exemplo n.º 34
0
def calcularVetorFiedlerNetworkx(grafo):
    return nx.fiedler_vector(grafo)
Exemplo n.º 35
0
#!/usr/bin/env python
import networkx as nx
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

G = nx.Graph()

G.add_node(0)
G.add_node(7)
G.add_node(9)
G.add_node(10)
G.add_node(19)

G.add_edge(0,9)
G.add_edge(0,7)
G.add_edge(7,9)
G.add_edge(7,10)
G.add_edge(7,19)
G.add_edge(10,19)

print G.nodes()
print nx.fiedler_vector(G,method='lobpcg')

pos=nx.circular_layout(G,dim=50, scale=100)
plt.clf()
nx.draw(G,with_labels=True)
plt.savefig('C:/Users/Branko/Desktop/ZKZFPRUSWCYSGT-UHFFFAOYSA-N_unoriented.png')
Exemplo n.º 36
0
def spectrum_cluster(seed_num, graph_json_filename=None, graph_json_str=None):
  """
  Identifies clusters in the network using laplacian spectrum, and loops over
  each cluster to pick the node with largest degree until seed_num nodes were
  chosen. 

  Parameters:
    seed_num: Number of nodes to choose.
    graph_json_filename: Filename where the adjacency list lives as JSON.
    graph_json_str: Graph as an adjacency list string in JSON.
    
  Return: List of the chosen nodes.
  """

  # parse the graph
  G = None
  if graph_json_str is None:
    G = util.load_graph(graph_json_filename=graph_json_filename)
  else:
    G = util.load_graph(graph_json_str=graph_json_str)

  # initialize queue for subgraphs
  # try to get about 2 nodes in each cluster
  node_per_cluster = 2
  max_depth = int(math.ceil(np.log2(seed_num / node_per_cluster))) + 1

  cluster_queue = Queue.Queue()
  cluster_queue.put(G)

  # divide graph into 2**max_depth clusters
  while (cluster_queue.qsize() < 2**max_depth ):
    G_curr = cluster_queue.get()
    # work only on the largest connected component
    G_curr_c = max(nx.connected_component_subgraphs(G_curr), key=len)
    if (G_curr_c.size() < 2 * node_per_cluster):
      # put it back if cluster is too small
      cluster_queue.put(G_curr_c)
      continue
    # get fiedler vector
    fiedler_vector = nx.fiedler_vector(G_curr_c, normalized=True, tol=1e-04)
    node_list_sub_1 = []
    node_list_sub_2 = []
    node_list = G_curr_c.nodes()
    # split positive and negative terms in fielder vector
    for i in range(len(fiedler_vector)):
      if (fiedler_vector[i] >= 0):
        node_list_sub_1.append(node_list[i])
      else:
        node_list_sub_2.append(node_list[i])
    # seperate the graph into two subgraphs
    if (len(node_list_sub_1) >= node_per_cluster):
      # ignore clusters too small
      G_sub_1 = G_curr_c.subgraph(node_list_sub_1)
      cluster_queue.put(G_sub_1)
    if (len(node_list_sub_2) >= node_per_cluster):
      # ignore clusters too small
      G_sub_2 = G_curr_c.subgraph(node_list_sub_2)
      cluster_queue.put(G_sub_2)

  # get node_per_cluster highest degree nodes from each cluster
  candidate_nodes = []
  while not (cluster_queue.empty()):
    G_curr = cluster_queue.get()
    # measure used to pick node with in clusters
    degree_dict = nx.degree(G_curr)
    node_keys = sorted(degree_dict, key=degree_dict.get, 
                                    reverse=True)[:node_per_cluster]
    for i in node_keys:
      candidate_nodes.append(i)
  # randomly pick seed_num nodes from candidate_nodes
  rtn = list(np.random.choice(candidate_nodes, replace=False, size=seed_num))
  return rtn