Exemplo n.º 1
0
def generate_grid_graph():
    """Generates k cuts for grid graphs"""
    k = int(input("k for grid graph:"))
    trials = int(input("number of trials:"))
    gridfname = input("output file:")
    gridfname = "hard_instances/" + gridfname
    gridfile = open(gridfname, "wb", 0)
    n = int(input("Number of dimensions: "))
    d = []
    for i in range(0, n):
        tmp = int(input("Size of dimension " + str(i + 1) + ": "))
        d.append(tmp)
    G = nx.grid_graph(dim=d)
    A = nx.adjacency_matrix(G).toarray()
    L = nx.normalized_laplacian_matrix(G).toarray()
    (tmpw, tmpv) = la.eigh(L, eigvals=(0, 1))
    tmp = 2 * math.sqrt(tmpw[1])
    print("cheeger upperbound:" + str(tmp))
    (w, v) = spectral_projection(L, k)
    lambda_k = w[k - 1]
    k_cuts_list = lrtv(A, v, k, lambda_k, trials, gridfile)
    plotname = gridfname + "plot"
    plot(k_cuts_list, plotname)
    tmp_str = "Grid graph of dimension: " + str(d) + "\n"
    tmp_str += "k = " + str(k) + ", "
    tmp_str += "trials = " + str(trials) + "\n\n\n"
    tmp_str = tmp_str.encode("utf-8")
    gridfile.write(tmp_str)
    for i in range(len(k_cuts_list)):
        k_cuts = k_cuts_list[i]
        tmp_str = list(map(str, k_cuts))
        tmp_str = " ".join(tmp_str)
        tmp_str += "\n\n"
        tmp_str = tmp_str.encode("utf-8")
        gridfile.write(tmp_str)
Exemplo n.º 2
0
def generate_dyn_heat(G, s, jump, n):
	Fs = []
	L = networkx.normalized_laplacian_matrix(G)
	L = L.todense()
	F0s = []	
	seeds = []

	for i in range(s):
		F0 = numpy.zeros(len(G.nodes()))
		v = random.randint(0, len(G.nodes())-1)
		seeds.append(v)
		F0[v] = len(G.nodes())
		F0s.append(F0)

	Fs.append(numpy.sum(F0s, axis=0))

	for j in range(n):
		FIs = []
		for i in range(s):
			FI = numpy.multiply(linalg.expm(-j*jump*L), F0s[i])[:,seeds[i]]
			FIs.append(FI)
		
		Fs.append(numpy.sum(FIs, axis=0))

	return numpy.array(Fs)[1:]
Exemplo n.º 3
0
def spectral_clustering(graph, n_cluster):
	"""
	return the prediction of kmeans model of the spectral clustering
	"""
	Lap_nom = nx.normalized_laplacian_matrix(graph).todense()
	eig_val, eig_vec = np.linalg.eig(Lap_nom)
	k = 10
	selected_vec = np.zeros([len(eig_val),k])
	thr = sorted(eig_val)[k-1]
	eig_val, eig_vec = np.linalg.eig(Lap_nom)
	ind = 0

	for i in range(len(eig_val)):
		if eig_val[i]<=thr:
			selected_vec[:,ind] = np.array(eig_vec)[:,i]
			ind += 1
	
	# X = selected_vec
	cluster_km = KMeans(n_clusters = n_cluster,max_iter = 10000,tol = 0.00001)
	
	features_spectre = selected_vec
	cluster_km.fit(features_spectre)
	pred = cluster_km.predict(selected_vec)

	dict_predict = {}
	for i in range(len(graph.nodes())):
		dict_predict.update(
		{	graph.nodes()[i] : int(pred[i])
			})
			
	return dict_predict
def normalized_laplacian_spectrum(G, weight='weight'):
    """Return eigenvalues of the Laplacian of G

    Parameters
    ----------
    G : graph
       A NetworkX graph

    weight : string or None, optional (default='weight')
       The edge data key used to compute each value in the matrix.
       If None, then each edge has weight 1.

    Returns
    -------
    evals : NumPy array
      Eigenvalues

    Notes
    -----
    For MultiGraph/MultiDiGraph, the edges weights are summed.
    See to_numpy_matrix for other options.

    See Also
    --------
    laplacian_matrix
    """
    from scipy.linalg import eigvalsh
    #return eigvalsh(nx.normalized_laplacian_matrix(G,weight=weight).todense())
    import scipy.sparse as sparse

    w , v = sparse.linalg.eigsh(nx.normalized_laplacian_matrix(G,weight=weight), which='SM')
    return w
    def test_normalized_laplacian(self):
        "Generalized Graph Laplacian"
        GL=numpy.array([[ 1.00, -0.408, -0.408, -0.577,  0.00],
                        [-0.408,  1.00, -0.50,  0.00 , 0.00],
                        [-0.408, -0.50,  1.00,  0.00,  0.00],
                        [-0.577,  0.00,  0.00,  1.00,  0.00],
                        [ 0.00,  0.00,  0.00,  0.00,  0.00]])
        Lsl = numpy.array([[ 0.75  , -0.2887, -0.2887, -0.3536,  0.],
                           [-0.2887,  0.6667, -0.3333,  0.    ,  0.],
                           [-0.2887, -0.3333,  0.6667,  0.    ,  0.],
                           [-0.3536,  0.    ,  0.    ,  0.5   ,  0.],
                           [ 0.    ,  0.    ,  0.    ,  0.    ,  0.]])

        assert_almost_equal(nx.normalized_laplacian_matrix(self.G),GL,decimal=3)
        assert_almost_equal(nx.normalized_laplacian_matrix(self.MG),GL,decimal=3)
        assert_almost_equal(nx.normalized_laplacian_matrix(self.WG),GL,decimal=3)
        assert_almost_equal(nx.normalized_laplacian_matrix(self.WG,weight='other'),GL,decimal=3)
        assert_almost_equal(nx.normalized_laplacian_matrix(self.Gsl), Lsl, decimal=3)
def my_algebraic_connectivity(graph, normalise=False):
    if normalise:
        eigvals, eigvecs = sp.sparse.linalg.eigsh(nx.normalized_laplacian_matrix(graph).asfptype(), 2, which='SA')
        a = eigvals[1]
    else:
        eigvals, eigvecs = sp.sparse.linalg.eigsh(nx.laplacian_matrix(graph).asfptype(), 2, which='SA')
        a = eigvals[1]
    if a < MACHINE_EPSILON: a = 0.0
    return a
Exemplo n.º 7
0
def find_nonzero_eigenvalues_magnitudes(g, edge_weight='weight'):
    print '  Calculating normalized laplacian ...'
    L = nx.normalized_laplacian_matrix(g, weight=edge_weight)
    print '  Calculating eigenvalues of laplacian ...'
    ev_i = np.absolute(np.linalg.eigvals(L.A.astype(np.float64)))
    nz_ix = ev_i != 0
    if np.sum(nz_ix) <= 0:
        raise RuntimeError('All eigenvalues zero for the Laplacian of supplied graph')
    return ev_i[nz_ix]
Exemplo n.º 8
0
	def transform(self, _F):
		"""
		"""
		(self.G, self.F) = build_stacked_graph_dense(self.G_unstacked, _F)
		L = networkx.normalized_laplacian_matrix(self.G)
		L = L.todense()
		self.U, self.lamb_str = compute_eigenvectors_and_eigenvalues(L)

		return graph_fourier(self.F, self.U)
Exemplo n.º 9
0
def generate_product_graph():
    """Generates k cuts for cartesian product of a path and a double tree"""
    k = int(input("k for product of tree & path:"))
    trials = int(input("number of trials:"))
    prodfname = input("output file:")
    prodfname = "hard_instances/" + prodfname
    prodfile = open(prodfname, "wb", 0)
    h = int(input("height of the tree: "))
    H1 = nx.balanced_tree(2, h)
    H2 = nx.balanced_tree(2, h)
    H = nx.disjoint_union(H1, H2)
    n = H.number_of_nodes()
    p = math.pow(2, h + 1) - 1
    H.add_edge(0, p)
    n = 4 * math.sqrt(n)
    n = math.floor(n)
    print("Length of path graph: " + str(n))
    G = nx.path_graph(n)
    tmpL = nx.normalized_laplacian_matrix(G).toarray()
    T = nx.cartesian_product(G, H)
    A = nx.adjacency_matrix(T).toarray()
    L = nx.normalized_laplacian_matrix(T).toarray()
    (tmpw, tmpv) = la.eigh(L)
    tmp = 2 * math.sqrt(tmpw[1])
    print("cheeger upperbound:" + str(tmp))
    (w, v) = spectral_projection(L, k)
    lambda_k = w[k - 1]
    tmp_str = "Cartesian product of balanced tree of height " + str(h)
    tmp_str += " and path of length " + str(n - 1) + "\n"
    tmp_str += "k = " + str(k) + ", "
    tmp_str += "trials = " + str(trials) + "\n\n\n"
    tmp_str = tmp_str.encode("utf-8")
    prodfile.write(tmp_str)
    k_cuts_list = lrtv(A, v, k, lambda_k, trials, prodfile)
    plotname = prodfname + "plot"
    plot(k_cuts_list, plotname)
    for i in range(len(k_cuts_list)):
        k_cuts = k_cuts_list[i]
        tmp_str = list(map(str, k_cuts))
        tmp_str = " ".join(tmp_str)
        tmp_str += "\n\n"
        tmp_str = tmp_str.encode("utf-8")
        prodfile.write(tmp_str)
Exemplo n.º 10
0
def write_adj_mat(G, fileobj=sys.stdout):
    """Write G to a sparse matrix format that Julia and Matlab can read."""
    lapmatrix = nx.laplacian_matrix(G)
    norm_lapl = nx.normalized_laplacian_matrix(G)
    adjmatrix = nx.adjacency_matrix(G)
    mdict = {'laplacian': lapmatrix,
             'norm_lapl': norm_lapl,
             'adjacency': adjmatrix}
    sio.savemat(fileobj, mdict)
    return mdict
Exemplo n.º 11
0
    def do(self):
        logger.info("SPECTRAL Eigenvalues %i" % (self.spectral_eigenvalues, ))
        logger.info("SPECTRAL Dimensions %i" % (self.spectral_dimensions,))
        logger.info("SPECTRAL Laplacian %i" % (self.spectral_laplacian,))
        logger.info("SPECTRAL Eigenvector %i" % (self.spectral_eigenvector,))

        logger.info("SPECTRAL Compute Neuron Graph")
        neuron_graph = self.database.db2graph()

        logger.info("SPECTRAL Compute Laplacian Matrix")
        if self.spectral_laplacian == Config.SPECTRAL_LAPLACIAN_DIRECTED:
            neuron_graph_laplacian = networkx.directed_laplacian_matrix(neuron_graph, weight=None,
                                                                        walk_type='pagerank', alpha=0.95)
        elif self.spectral_laplacian == Config.SPECTRAL_LAPLACIAN_UNDIRECTED:
            neuron_graph = neuron_graph.to_undirected()
            neuron_graph_laplacian = networkx.normalized_laplacian_matrix(neuron_graph, weight=None)
        else:
            logger.critical("SPECTRAL Laplacian Method not supported")
            raise RuntimeError()

        logger.info("SPECTRAL Compute Lapalcian Eigenvectors")
        if self.spectral_eigenvector == Config.EIGENVECTOR_LEFT:
            eigenvalues, eigenvectors = scipy.sparse.linalg.eigen.arpack.eigs(neuron_graph_laplacian,
                                                                              self.spectral_eigenvalues,
                                                                              sigma=0, which='LM')
        elif self.spectral_eigenvector == Config.EIGENVECTOR_RIGHT:
            eigenvalues, eigenvectors = scipy.sparse.linalg.eigen.arpack.eigs(neuron_graph_laplacian,
                                                                              self.spectral_eigenvalues)
        else:
            logger.critical("SPECTRAL Eigenvector direction not supported")
            raise RuntimeError()

        points = eigenvectors.real[:, :self.spectral_dimensions]

        database_session = self.database.new_session()
        db_populations = {p.id: p for p in
                          database_session.query(orm.population.Population).filter_by(shadow=False).all()}

        self.data = dict()
        for neuron, neuron_data in neuron_graph.nodes_iter(data=True):
            pop_id = neuron_data['p']
            
            if pop_id not in self.data .keys():
                self.data[pop_id] = dict()
                self.data[pop_id]['label'] = db_populations[pop_id].label
                self.data[pop_id]['neurons'] = db_populations[pop_id].neurons
                self.data[pop_id]['neurons_core'] = db_populations[pop_id].neurons_core
                self.data[pop_id]['space'] = dict()
                self.data[pop_id]['clusters'] = dict()
                self.data[pop_id]['nodes'] = dict()

            pop_neuron_id = neuron_data['n']
            self.data[pop_id]['space'][pop_neuron_id] = points[neuron_graph.nodes().index(neuron)]

        return self.data
Exemplo n.º 12
0
def eigenvalues(graph):
    try:
        import numpy.linalg as linal
        eigenvalues = linal.eigvals
    except ImportError:
        raise ImportError("numpy can not be imported.")

    L = nx.normalized_laplacian_matrix(graph)
    eigen_values = eigenvalues(L.A)

    return sorted(eigen_values, reverse=True)[:25]
Exemplo n.º 13
0
def largest_eigenvector(G):
    """Return the largest eigenvector of a graph G."""
    L = nx.normalized_laplacian_matrix(G)

    eigenvalues, eigenvectors = np.linalg.eig(L)

    # highest eigenvalue index and ...
    ind = np.argmax(eigenvalues)
    # ... its corresponding eigenvector.
    largest = eigenvectors[:, ind]

    return dict(zip(G, largest))
Exemplo n.º 14
0
	def set_graph(self, _G):
		self.G = _G
		L = networkx.normalized_laplacian_matrix(self.G)
		L = L.todense()
		self.U, self.lamb_str = compute_eigenvectors_and_eigenvalues(L)
		lamb_max = max(self.lamb_str.real)
		K = 10
		J = 10
		gamma = comp_gamma()
		self.T = comp_scales(lamb_max, K, J)
		self.w = graph_wavelets(self.lamb_str.real, self.U.real, range(len(self.G.nodes())), self.T)
		self.s = graph_low_pass(self.lamb_str.real, self.U.real, range(len(self.G.nodes())), self.T, gamma, lamb_max, K)
Exemplo n.º 15
0
	def transform(self, _F):
		"""
		"""
		(self.G, self.F) = build_stacked_graph_dense(self.G_unstacked, _F)
		L = networkx.normalized_laplacian_matrix(self.G)
		L = L.todense()
		self.U, self.lamb_str = compute_eigenvectors_and_eigenvalues(L)
		lamb_max = max(self.lamb_str)
		gamma = comp_gamma()
		K = 10
		J = 10
		self.T = comp_scales(lamb_max, K, J)
		self.w = graph_wavelets(self.lamb_str, self.U, range(len(self.G.nodes())), self.T)
		self.s = graph_low_pass(self.lamb_str, self.U, range(len(self.G.nodes())), self.T, gamma, lamb_max, K)

		return hammond_wavelet_transform(self.w, self.s, self.T, self.F)
Exemplo n.º 16
0
def calc_laplacian_matrix(g):
    """
    calc_laplacian_matrix(g)
    calculate directed Laplacian matrix of G = (V, E) (directed)
        L = D - A
    where A is the adjacency matrix and D is the diagonal matrix of node degrees..
    :param g:   graph for processing
    :return:    eigen_values
    """
    logging.info(cs_ref, 'laplacian_matrix')
    g2 = cv.convert_to_directed(g)
    laplacian = nx.normalized_laplacian_matrix(g)
    eigen_values= lg.eigvals(laplacian.A)
    eigen_info = "\tLaplacian : \n\tLargest eigen-value :" + str(max(eigen_values)) + "\n\tSmallest eigen-value : " \
                 + str(min(eigen_values))
    with open(dest_file, "a") as dat_file:
        dat_file.write("\n" + eigen_info)
    print(eigen_info)
    return eigen_values
Exemplo n.º 17
0
def generate_noisy_hypercube():
    """Generates n dimensional noisy hypercube graph, with epsilon noise"""
    k = int(input("k for noisy hypercube: "))
    trials = int(input("number of trials: "))
    cubefname = input("output file:")
    cubefname = "hard_instances/" + cubefname
    cubefile = open(cubefname, "wb", 0)
    n = int(input("dimension of hypercube: "))
    nodes = 2 ** n
    epsilon = float(input("noise:"))
    G = nx.empty_graph(nodes)
    for u in G.nodes():
        for v in G.nodes():
            if u == v:
                continue
            else:
                d = hamming_dist(u, v, n)
                w = epsilon ** d
                G.add_edge(u, v, weight=w)
    A = nx.adjacency_matrix(G).toarray()
    L = nx.normalized_laplacian_matrix(G).toarray()
    (tmpw, tmpv) = la.eigh(L, eigvals=(0, 1))
    tmp = 2 * math.sqrt(tmpw[1])
    print("cheeger upperbound:" + str(tmp))
    (w, v) = spectral_projection(L, k)
    lambda_k = w[k - 1]
    tmp_str = "Noisy hypercube of dimension " + str(n)
    tmp_str += " with noise parameter " + str(epsilon) + "\n"
    tmp_str += "k = " + str(k) + ", "
    tmp_str += "trials = " + str(trials) + "\n\n\n"
    tmp_str = tmp_str.encode("utf-8")
    cubefile.write(tmp_str)
    k_cuts_list = lrtv(A, v, k, lambda_k, trials, cubefile)
    plotname = cubefname + "plot"
    plot(k_cuts_list, plotname)
    for i in range(len(k_cuts_list)):
        k_cuts = k_cuts_list[i]
        tmp_str = list(map(str, k_cuts))
        tmp_str = " ".join(tmp_str)
        tmp_str += "\n\n"
        tmp_str = tmp_str.encode("utf-8")
        cubefile.write(tmp_str)
 def __init__(self, graph, feature_list=[]):
     self.no_feature = 39
     self.G = graph
     self.nodes = nx.number_of_nodes(self.G)
     self.edges = nx.number_of_edges(self.G)
     self.Lap = nx.normalized_laplacian_matrix(self.G)
     # ??? how to check whether comparable, addable?
     self.eigvals = numpy.linalg.eigvals(self.Lap.A).tolist()
     try:
         self.radius = nx.radius(self.G)
     except nx.exception.NetworkXError:
         self.radius = "ND"
     try:
         self.ecc_dic = nx.eccentricity(self.G)
     except nx.exception.NetworkXError:
         self.ecc_dic = {}
     self.degree_dic = nx.average_neighbor_degree(self.G)
     self.pagerank = nx.pagerank(self.G).values()
     if feature_list == []:
         self.feature_list = list(range(1, self.no_feature + 1))
     else:
         self.feature_list = feature_list
     self.feature_vector = []
     self.feature_time = []
Exemplo n.º 19
0
 def test_buckminsterfullerene(self):
     G = nx.Graph(
         [(1, 10), (1, 41), (1, 59), (2, 12), (2, 42), (2, 60), (3, 6),
          (3, 43), (3, 57), (4, 8), (4, 44), (4, 58), (5, 13), (5, 56),
          (5, 57), (6, 10), (6, 31), (7, 14), (7, 56), (7, 58), (8, 12),
          (8, 32), (9, 23), (9, 53), (9, 59), (10, 15), (11, 24), (11, 53),
          (11, 60), (12, 16), (13, 14), (13, 25), (14, 26), (15, 27),
          (15, 49), (16, 28), (16, 50), (17, 18), (17, 19), (17, 54),
          (18, 20), (18, 55), (19, 23), (19, 41), (20, 24), (20, 42),
          (21, 31), (21, 33), (21, 57), (22, 32), (22, 34), (22, 58),
          (23, 24), (25, 35), (25, 43), (26, 36), (26, 44), (27, 51),
          (27, 59), (28, 52), (28, 60), (29, 33), (29, 34), (29, 56),
          (30, 51), (30, 52), (30, 53), (31, 47), (32, 48), (33, 45),
          (34, 46), (35, 36), (35, 37), (36, 38), (37, 39), (37, 49),
          (38, 40), (38, 50), (39, 40), (39, 51), (40, 52), (41, 47),
          (42, 48), (43, 49), (44, 50), (45, 46), (45, 54), (46, 55),
          (47, 54), (48, 55)])
     for normalized in (False, True):
         if not normalized:
             A = nx.laplacian_matrix(G)
             sigma = 0.2434017461399311
         else:
             A = nx.normalized_laplacian_matrix(G)
             sigma = 0.08113391537997749
         for method in methods:
             try:
                 assert_almost_equal(nx.algebraic_connectivity(
                     G, normalized=normalized, tol=1e-12, method=method),
                     sigma)
                 x = nx.fiedler_vector(G, normalized=normalized, tol=1e-12,
                                       method=method)
                 check_eigenvector(A, sigma, x)
             except nx.NetworkXError as e:
                 if e.args not in (('Cholesky solver unavailable.',),
                                   ('LU solver unavailable.',)):
                     raise
Exemplo n.º 20
0
    def test_normalized_laplacian(self):
        "Generalized Graph Laplacian"
        # fmt: off
        G = np.array([[1., -0.408, -0.408, -0.577, 0.],
                      [-0.408, 1., -0.5, 0., 0.], [-0.408, -0.5, 1., 0., 0.],
                      [-0.577, 0., 0., 1., 0.], [0., 0., 0., 0., 0.]])
        GL = np.array([[1., -0.408, -0.408, -0.577, 0.],
                       [-0.408, 1., -0.5, 0., 0.], [-0.408, -0.5, 1., 0., 0.],
                       [-0.577, 0., 0., 1., 0.], [0., 0., 0., 0., 0.]])
        Lsl = np.array([[0.75, -0.2887, -0.2887, -0.3536, 0.],
                        [-0.2887, 0.6667, -0.3333, 0., 0.],
                        [-0.2887, -0.3333, 0.6667, 0., 0.],
                        [-0.3536, 0., 0., 0.5, 0.], [0., 0., 0., 0., 0.]])
        # fmt: on

        np.testing.assert_almost_equal(
            nx.normalized_laplacian_matrix(self.G,
                                           nodelist=range(5)).todense(),
            G,
            decimal=3,
        )
        np.testing.assert_almost_equal(nx.normalized_laplacian_matrix(
            self.G).todense(),
                                       GL,
                                       decimal=3)
        np.testing.assert_almost_equal(nx.normalized_laplacian_matrix(
            self.MG).todense(),
                                       GL,
                                       decimal=3)
        np.testing.assert_almost_equal(nx.normalized_laplacian_matrix(
            self.WG).todense(),
                                       GL,
                                       decimal=3)
        np.testing.assert_almost_equal(
            nx.normalized_laplacian_matrix(self.WG, weight="other").todense(),
            GL,
            decimal=3,
        )
        np.testing.assert_almost_equal(nx.normalized_laplacian_matrix(
            self.Gsl).todense(),
                                       Lsl,
                                       decimal=3)
Exemplo n.º 21
0
def compute_metrics(fs, outdir, atlas, verb=False):
    """
    Given a set of files and a directory to put things, loads graphs and
    performs set of analyses on them, storing derivatives in a pickle format
    in the desired output location.

    Required parameters:
        fs:
            - Dictionary of lists of files in each dataset
        outdir:
            - Path to derivative save location
        atlas:
            - Name of atlas of interest as it appears in the directory titles
    Optional parameters:
        verb:
            - Toggles verbose output statements
    """

    graphs = loadGraphs(fs, verb=verb)
    nodes = nx.number_of_nodes(graphs.values()[0])

    #  Number of non-zero edges (i.e. binary edge count)
    print("Computing: NNZ")
    nnz = OrderedDict((subj, len(nx.edges(graphs[subj]))) for subj in graphs)
    write(outdir, 'number_non_zeros', nnz, atlas)
    print("Sample Mean: %.2f" % np.mean(nnz.values()))

    #  Degree sequence
    print("Computing: Degree Seuqence")
    temp_deg = OrderedDict((subj, np.array(nx.degree(graphs[subj]).values()))
                           for subj in graphs)
    deg = density(temp_deg, nbins=nodes)
    write(outdir, 'degree_distribution', deg, atlas)
    show_means(temp_deg)

    #  Edge Weights
    print("Computing: Edge Weight Sequence")
    temp_ew = OrderedDict((s, [graphs[s].get_edge_data(e[0], e[1])['weight']
                           for e in graphs[s].edges()]) for s in graphs)
    ew = density(temp_ew, nbins=2*nodes)
    write(outdir, 'edge_weight_distribution', ew, atlas)
    show_means(temp_ew)

    #   Clustering Coefficients
    print("Computing: Clustering Coefficient Sequence")
    temp_cc = OrderedDict((subj, nx.clustering(graphs[subj]).values())
                          for subj in graphs)
    ccoefs = density(temp_cc, nbins=2*nodes)
    write(outdir, 'clustering_coefficients', ccoefs, atlas)
    show_means(temp_cc)

    # Scan Statistic-1
    print("Computing: Max Local Statistic Sequence")
    temp_ss1 = scan_statistic(graphs, 1)
    ss1 = density(temp_ss1, nbins=2*nodes)
    write(outdir, 'scan_statistic_1', ss1, atlas)
    show_means(temp_ss1)

    # Eigen Values
    print("Computing: Eigen Value Sequence")
    laplac = OrderedDict((subj, nx.normalized_laplacian_matrix(graphs[subj]))
                         for subj in graphs)
    eigs = OrderedDict((subj, np.sort(np.linalg.eigvals(laplac[subj].A))[::-1])
                       for subj in graphs)
    write(outdir, 'eigen_sequence', eigs, atlas)
    print("Subject Maxes: " + ", ".join(["%.2f" % np.max(eigs[key])
                                         for key in eigs.keys()]))

    scree = OrderedDict((subj, np.cumsum(eigs[subj])/np.sum(eigs[subj]))
                        for subj in eigs)
    write(outdir, 'scree_eigen', scree, atlas)

    # Betweenness Centrality
    print("Computing: Betweenness Centrality Sequence")
    nxbc = nx.algorithms.betweenness_centrality  # For PEP8 line length...
    temp_bc = OrderedDict((subj, nxbc(graphs[subj]).values())
                          for subj in graphs)
    centrality = density(temp_bc, nbins=2*nodes)
    write(outdir, 'betweenness_centrality', centrality, atlas)
    show_means(temp_bc)

    outf = outdir + '/' + atlas + '_summary.png'
Exemplo n.º 22
0
def plot_graph(graph, label=None, cache=False, max_id=None):
    val_map = [
        'cyan', 'red', 'blue', 'magenta', 'gray', 'purple', 'orange', 'yellow',
        'green', 'black', 'pink'
    ]

    if label is not None:
        if len(np.shape(label)) > 1:
            values = [val_map[_] for _ in np.where(label == 1)[1].tolist()]
        else:
            values = [val_map[int(_)] for _ in label]
    else:
        values = None
    # if graph.name == 'Zachary\'s Karate Club':
    #     vals = {'Mr. Hi': 0, 'Officer': 1}
    #     values = [vals[__['club']] for _, __ in graph._node.items()]

    if cache:
        pos = pk.load(open('./pos.pk', 'rb'))
    else:
        pos = nx.fruchterman_reingold_layout(graph, k=0.1, iterations=50)
        pk.dump(pos, open('./pos.pk', 'wb'))
    # pos = nx.circular_layout(graph,scale=1)
    # pos = nx.random_layout(graph)
    # pos = nx.shell_layout(graph)
    # pos = nx.spectral_layout(graph)

    if nx.bipartite.is_bipartite(graph):
        l, r = nx.bipartite.sets(graph)
        pos = {}
        pos.update((node, (1, index)) for index, node in enumerate(l))
        pos.update((node, (2, index)) for index, node in enumerate(r))

    print("\nPlotting a graph...")

    # plot graph
    plt.axis('off')
    plt.figure(1, figsize=(10, 10))
    gs = gridspec.GridSpec(2, 1, height_ratios=[1, 3])

    ax0 = plt.subplot(gs[0])
    # plot eigenvalues
    norm_lap = nx.normalized_laplacian_matrix(graph)
    eigval, eigvec = LA.eigh(norm_lap.A)
    ax0.plot(eigval, 'ro')

    ax1 = plt.subplot(gs[1])
    # nx.draw(graph, pos=pos, node_color=values, node_size=15, width=0.1)
    nx.draw(graph,
            pos,
            node_color=label[:0],
            node_size=20,
            width=0.1,
            cmap=plt.cm.rainbow,
            with_labels=False)
    # color bar
    sm = plt.cm.ScalarMappable(cmap=plt.cm.rainbow,
                               norm=plt.Normalize(vmin=np.min(label),
                                                  vmax=np.max(label)))
    sm._A = []
    divider = make_axes_locatable(ax1)
    cax = divider.append_axes("bottom", size="5%")
    cbar = plt.colorbar(sm,
                        cax=cax,
                        ticks=[-1, -.5, -.25, -.1, 0, .1, .25, .5, 1],
                        orientation='horizontal')
    cbar.ax.tick_params(labelsize=8)
    cbar.ax.set_xlabel('node text in graph is index=val', rotation=360)

    plt.title('original networks w/ eigenvalues')
    plt.show()

    # visualize eigen vectors
    # for _ in range(3):  # len(graph._node)):
    for _ in list(range(3)) + max_id.flatten().tolist():  # len(graph._node)):
        plt.figure(1, figsize=(7, 8))
        fig, ax = plt.subplots(2, 1, num=1)

        gs = gridspec.GridSpec(2, 1, height_ratios=[1, 3])

        # plot eigen vector values
        ax0 = plt.subplot(gs[0])
        cur_eigv = eigvec[:, _]
        print("{}:{:.4f}".format(_, eigval[_]))
        print(["{}:{:.4f}".format(k, v) for k, v in enumerate(cur_eigv)])
        ax0.plot(range(cur_eigv.shape[0]), cur_eigv, 'b--')
        for i, txt in enumerate(cur_eigv):
            ax0.annotate(i, (range(cur_eigv.shape[0])[i], cur_eigv[i]))

        # ax0.set_ylabel('scale')
        ax0.set_xlabel('eigenvector of i={}'.format(_))

        # plot eigen vector as labels on graph
        ax1 = plt.subplot(gs[1])
        nx.draw(graph,
                pos,
                node_color=cur_eigv,
                node_size=20,
                width=0.1,
                cmap=plt.cm.rainbow,
                with_labels=False)

        # label draw
        labels = {
            k: "{}={:.3f}".format(k, v)
            for k, v in enumerate(cur_eigv.tolist())
        }
        pos_higher = {}

        for k, v in pos.items():
            pos_higher[k] = (v[0],
                             v[1] + (1 if random.random() < 0.5 else -1) *
                             random.uniform(0.02, 0.03))
        # nx.draw_networkx_labels(graph, pos_higher, labels, font_size=6)

        # color bar
        sm = plt.cm.ScalarMappable(cmap=plt.cm.rainbow,
                                   norm=plt.Normalize(vmin=np.min(cur_eigv),
                                                      vmax=np.max(cur_eigv)))
        sm._A = []
        divider = make_axes_locatable(ax1)
        cax = divider.append_axes("bottom", size="5%")
        cbar = plt.colorbar(sm,
                            cax=cax,
                            ticks=[-1, -.5, -.25, -.1, 0, .1, .25, .5, 1],
                            orientation='horizontal')
        cbar.ax.tick_params(labelsize=8)
        cbar.ax.set_xlabel('node text in graph is index=val', rotation=360)
        plt.show()
Exemplo n.º 23
0
 def create_er_graph(self):
     self.er_prob = 0.2
     self.er_graph = nx.fast_gnp_random_graph(self.num_vertices,
                                              self.er_prob)
     self.er_normL = nx.normalized_laplacian_matrix(self.er_graph)
     return
Exemplo n.º 24
0
Arquivo: main.py Projeto: Sngunfei/HSD
def f(graph: nx.Graph):
    es1, _ = np.linalg.eigh(nx.laplacian_matrix(graph).todense())
    es2, _ = np.linalg.eigh(nx.normalized_laplacian_matrix(graph).todense())
    print(es1)
    print(es2)
    return
Exemplo n.º 25
0
    def spectral_clustering(self):
        print("\tSpectral Clustering Data Precomputing ...")
        from sklearn.cluster import KMeans
        from scipy.stats import halfnorm

        for graph, prefix in [
            (self.__drugtarget, "drug_target"),
            (self.__drugdrug, "drug_projection"),
            (self.__targettarget, "target_projection"),
        ]:
            maj = graph.subgraph(
                max(list(nx.connected_components(graph)), key=len))
            L = nx.normalized_laplacian_matrix(graph).toarray()
            evals, evects = np.linalg.eigh(L)
            relevant = [
                n for n, dif in enumerate(np.diff(evals))
                if dif > halfnorm.ppf(0.99, *halfnorm.fit(np.diff(evals)))
            ]
            relevant = [
                relevant[n] for n in range(len(relevant) - 1)
                if relevant[n] + 1 != relevant[n + 1]
            ] + [
                relevant[-1]
            ]  # keeps only the highest value if there are consecutive ones
            n_clusters = (relevant[0] + 1 if (
                relevant[0] > 1
                and relevant[0] + 1 != nx.number_connected_components(graph))
                          else relevant[1] + 1)
            km = KMeans(n_clusters=n_clusters, n_init=100)
            clusters = km.fit_predict(evects[:, :n_clusters])
            L_maj = nx.normalized_laplacian_matrix(maj).toarray()
            evals_maj, evects_maj = np.linalg.eigh(L_maj)
            relevant_maj = [
                n for n, dif in enumerate(np.diff(evals_maj))
                if dif > halfnorm.ppf(0.99, *halfnorm.fit(np.diff(evals_maj)))
            ]
            relevant_maj = [
                relevant_maj[n] for n in range(len(relevant_maj) - 1)
                if relevant_maj[n] + 1 != relevant_maj[n + 1]
            ] + [
                relevant_maj[-1]
            ]  # keeps only the highest value if there are consecutive ones
            n_clusters_maj = (relevant_maj[0] + 1 if (
                relevant_maj[0] > 1
                and relevant_maj[0] + 1 != nx.number_connected_components(maj))
                              else relevant_maj[1] + 1)
            km_maj = KMeans(n_clusters=n_clusters_maj, n_init=100)
            clusters_maj = km_maj.fit_predict(evects_maj[:, :n_clusters_maj])
            name = "data/groups/" + prefix + "_spectral.pickle"
            with open(name, "wb") as bkp:
                pickle.dump(
                    [
                        L,
                        evals,
                        evects,
                        n_clusters,
                        clusters,
                        L_maj,
                        evals_maj,
                        evects_maj,
                        n_clusters_maj,
                        clusters_maj,
                    ],
                    bkp,
                )
            if os.path.isfile(name + ".bkp"):
                os.remove(name + ".bkp")
Exemplo n.º 26
0
    # Whether use GAE embedding
    debuginfoStr('Start Graph Autoencoder training')
    if args.useGAEembedding or args.useBothembedding:
        zDiscret = zOut > np.mean(zOut, axis=0)
        zDiscret = 1.0 * zDiscret
        if args.useGAEembedding:
            zOut = GAEembedding(zDiscret, adj, args)
        elif args.useBothembedding:
            zEmbedding = GAEembedding(zDiscret, adj, args)
            zOut = np.concatenate((zOut, zEmbedding), axis=1)
    debuginfoStr('Graph Autoencoder training finished')

    # For iteration studies
    G0 = nx.Graph()
    G0.add_weighted_edges_from(edgeList)
    nlG0 = nx.normalized_laplacian_matrix(G0)
    # set iteration criteria for converge
    adjOld = nlG0
    # set celltype criteria for converge
    listResultOld = [1 for i in range(zOut.shape[0])]

    # Fill the zeros before EM iteration
    # TODO: better implementation later, now we don't filling zeros for now
    if args.zerofillFlag:
        for nz_index in range(len(scData.nz_i)):
            # tmp = scipy.sparse.lil_matrix.todense(scData.features[scData.nz_i[nz_index], scData.nz_j[nz_index]])
            # tmp = np.asarray(tmp).reshape(-1)[0]
            tmp = scData.features[scData.nz_i[nz_index], scData.nz_j[nz_index]]
            reconOut[scData.nz_i[nz_index], scData.nz_j[nz_index]] = tmp
        recon = reconOut
Exemplo n.º 27
0
for i in nodelist:
    features.append([
        pagerank[i], triangles[i], deg_centrality[i], core_number[i],
        color_number[i]
    ])
    features_dict[i] = np.array([
        pagerank[i], triangles[i], deg_centrality[i], core_number[i],
        color_number[i]
    ])

features = np.array(features)
nx.set_node_attributes(G, name='x', values=features_dict)

print('Normalizing Laplacian')
## Peut-être que la normalisation n'est pas bonne
adj = nx.normalized_laplacian_matrix(G, nodelist=nodelist).tocoo()
values = adj.data
indices = np.vstack((adj.row, adj.col))

# Yields indices to split data into training, validation and test sets
features = torch.FloatTensor(features)
idx = np.random.permutation(n_train_host)
idx_train = idx[:int(0.8 * n_train_host)]
idx_val = idx[int(0.8 * n_train_host):]
idx_test = np.arange(n_train_host, n)

# Transform the numpy matrices/vectors to torch tensors
y = torch.LongTensor(np.argmax(labels, axis=1))
i = torch.LongTensor(indices)
v = torch.FloatTensor(values)
# .todense(?)
Exemplo n.º 28
0
def make_laplacian_matrix(G):
    a = nx.normalized_laplacian_matrix(G)
    # print(a.todense())
    return a.todense()
Exemplo n.º 29
0
degrees = []
for (_, d) in G.degree():
    degrees.append(d)

# plot degree distribution
degree_sequence = sorted([d for n, d in G.degree()], reverse=True)
dmax = max(degree_sequence)
plt.loglog(degree_sequence, "b-", marker="o")
plt.title("Degree rank plot")
plt.ylabel("degree")
plt.xlabel("rank")
plt.savefig(os.path.join(out_dir, 'degree_distribution.png'))
plt.close()

plt.matshow(nx.normalized_laplacian_matrix(G).A, cmap='prism')
plt.savefig(os.path.join(out_dir, 'laplacian_norm.png'))
plt.close()

# dict for storing results (will be converted to DataFrame)
metrics = {'method': [], 'precision': [], 'recall': [], 'F1': [], 'Sn': [], 'PPV': [], 'Acc': [], 'use_GO': []}
for method_name, method in zip(['TENE', 'TADW', 'FSCNMF', 'ASNE', 'SINE', 'BANE', 'AE', 'MUSAE'],  # the name of the embedding/community detection method
                                [TENE, TADW, FSCNMF, ASNE, SINE, BANE, AE, MUSAE]):  # the model class
    for use_go in [True, False]:
        if use_go:
            attr_m = coo_matrix(A)
            dataset = dset_file + '_GO'
        else:
            attr_m = coo_matrix(np.ones_like(A))
            dataset = dset_file
        out_dir = 'plots/%s' % dataset
Exemplo n.º 30
0
k=16  #降维后的向量长度
ft=open("E:\\node2vec\\node2vec\\graph\\Les.edgelist",'r')
edges=ft.readlines()
g=nx.Graph()
for i in edges:
    edge=i.split()
    g.add_edge(int(edge[0]),int(edge[1]),weight=int(edge[2]))
print(g.edges())
#标准化的拉普拉斯矩阵
L=nx.adjacency_matrix(g)
# print(L.todense())
L1=np.array(L.todense())
#聚类
# spectral=SpectralClustering(n_clusters=k,affinity='nearest_neighbors',n_neighbors=4,eigen_solver='arpack',n_jobs=20)
# pre=spectral.fit(L1)
L_new=nx.normalized_laplacian_matrix(g)
a,b=np.linalg.eig(np.array(L_new.todense()))
print(a)
print(b)
eig_vec_dic={}
eig_vec_dic=dict(zip(a,b))
result=list(sorted(eig_vec_dic.items(),key=lambda x:x[0]))
coll=[]
for i in range(k):
    coll.append(result[i][1])
#coll中的每一列存放的是一个节点降维后的向量
coll1=np.array(coll)
print(coll1.shape)
A=coll1.transpose()  #降维后的样本

#聚类
Exemplo n.º 31
0
def main_operation():

	#Loading gene interactions JSON file into a variable 
	with open('JSON rows/gene_interactions.json') as json_data:
		interactions = json.load(json_data)


	#Information about Graph Connectivity
	graph_info = interactions["results"]

	#Creating NetworkX instance
	graph = nx.Graph()

	i = 0
	#Extracting the edge relationships
	for edge in graph_info:
		#Adding the edge in NetworkX
		graph.add_edge(edge[0],edge[2])

		if i == 1000:
			break
		i += 1


	#Converting into numpy array
	adjacency_matrix = nx.to_numpy_matrix(graph)

	#Normalized Laplacian Matrix
	normalized_laplacian = nx.normalized_laplacian_matrix(graph)

	#Normal Laplacian Matrix
	laplacian = nx.laplacian_matrix(graph)
    
    #Conversion to dense matrix
	dense_laplacian = laplacian.todense()

	#Conversion into Dense Matrix
	normalized_laplacian_dense = normalized_laplacian.todense()

	#The number of eigen vectors to be obtained for K-means clustering
	eigen_number = 100

	#Function Call for Spectral Clustering
	cluster_labels = spectral_clustering(normalized_laplacian_dense,eigen_number)

	#List of Nodes
	nodes = graph.nodes()

	#Cluster consisting of similar nodes
	clusters = {}

	#Initializing the clusters
	for num in range(0,eigen_number):
		clusters[num] = []

	i = 0

	#Populating with the actual node names in the cluster
	for cluster_number in cluster_labels:
		clusters[cluster_number].append(nodes[i])
		i+=1

	#Cluster Mappings for color
	cluster_dict = create_color_mappings(clusters)

	#Drawing the Graph
	values = [cluster_dict.get(node,0.25) for node in graph.nodes()]
	nx.draw(graph,cmap=plt.get_cmap('jet'),node_color = values)
	plt.show()
def main():
  parser  = get_parser()
  args  = vars(parser.parse_args())
  print args

  ''' draw a graph of citing-users and their follower count 
      output: figures/outfig.pdf
  '''
  if args['do_fcount'] == True:
    print '-'*4, 'draw a graph of citing-users and their follower count'
    draw_citing_users_follower_count()
    exit()


  infname = 'Results/procjson.tsv'
  infname = "Results/clustered_relevant_users.tsv"

  with open(infname) as f:
    lines = f.readlines()
  edges = []
  sourc = []
  for j,l in enumerate(lines):
    l = l.rstrip('\r\n')
    lparts = l.split('\t')
    edgesLst= [np.int64(p.lstrip('[').rstrip(']')) for p in lparts]
    edges.append(tuple(edgesLst))
    sourc.append(edgesLst[0])

  # Add the twitter users' follower network
  # processes this file: twtrs_follower_network.tsv
  plusEdgesLst = convert_follower_network_2edgelist()

  fllwrsEdges =[]
  for x,y in plusEdgesLst:
    x = np.int64(x)
    y = np.int64(x)
    fllwrsEdges.append((x,y))
  
  ####
  #### Builds the basic graph
  ####
  g = nx.Graph()
  g.add_edges_from(edges)
  
  print nx.info(g)
  print '-'*4,'draw basic network'
  draw_basic_network(g,sourc)
  g.add_edges_from(plusEdgesLst)
  print nx.info(g)


  if args ['do_metrics'] == True:
    print '-'*4,'compute network metrics and write to disk'
    ## \  /
    ##  \/ isualization
    # deg distrib
    snm.get_degree_dist([g],"citeplus", 'orig')

    # write to disk clustering coeffs for this graph
    snm.get_clust_coeff([g], 'orig', 'citeplus')

    # write to disk egienvalue
    snm.network_value_distribution([g], [], 'citeplus')

    if 0:
      L = nx.normalized_laplacian_matrix(g)
      e = np.linalg.eigvals(L.A)
      print("Largest eigenvalue:", max(e))
      print("Smallest eigenvalue:", min(e))
 def create_sparse_matrix(self, network, normalize=False):
     if normalize:
         return csc_matrix(networkx.normalized_laplacian_matrix(network))
     else:
         return csc_matrix(networkx.laplacian_matrix(network))
Exemplo n.º 34
0
def driver(names, fs, outdir, atlas, verb=False):
    """
    Given a set of files and a directory to put things, loads graphs and
    performs set of analyses on them, storing derivatives in a pickle format
    in the desired output location.

    Required parameters:
        names:
            - List of names of the datasets
        fs:
            - Dictionary of lists of files in each dataset
        outdir:
            - Path to derivative save location
        atlas:
            - Name of atlas of interest as it appears in the directory titles
    Optional parameters:
        verb:
            - Toggles verbose output statements
    """

    graphs = constructGraphDict(names, fs, verb=verb)

    #  Number of non-zero edges (i.e. binary edge count)
    print "Computing: NNZ"
    nnz = OrderedDict()
    for idx, name in enumerate(names):
        nnz[name] = OrderedDict((subj, len(nx.edges(graphs[name][subj])))
                                for subj in graphs[name])
    write(outdir, 'nnz', nnz, atlas)

    #  Degree sequence
    print "Computing: Degree Seuqence"
    deg = OrderedDict()
    for idx, name in enumerate(names):
        temp_deg = OrderedDict((subj, np.array(nx.degree(graphs[name][subj]).values()))
                               for subj in graphs[name])
        deg[name] = density(temp_deg)
    write(outdir, 'degree', deg, atlas)

    #  Edge Weights
    print "Computing: Edge Weight Sequence"
    ew = OrderedDict()
    for idx, name in enumerate(names):
        temp_ew = OrderedDict((subj, [graphs[name][subj].get_edge_data(e[0], e[1])['weight']
                              for e in graphs[name][subj].edges()])
                              for subj in graphs[name])
        ew[name] = density(temp_ew)
    write(outdir, 'edgeweight', ew, atlas)

    #   Clustering Coefficients
    print "Computing: Clustering Coefficient Sequence"
    ccoefs = OrderedDict()
    nxc = nx.clustering  # For PEP8 line length...
    for idx, name in enumerate(names):
        temp_cc = OrderedDict((subj, nxc(graphs[name][subj]).values())
                              for subj in graphs[name])
        ccoefs[name] = density(temp_cc)
    write(outdir, 'ccoefs', ccoefs, atlas)

    # Scan Statistic-1
    print "Computing: Scan Statistic-1 Sequence"
    ss1 = OrderedDict()
    for idx, name in enumerate(names):
        temp_ss1 = scan_statistic(graphs[name], 1)
        ss1[name] = density(temp_ss1)
    write(outdir, 'ss1', ss1, atlas)

    # Eigen Values
    print "Computing: Eigen Value Sequence"
    laplacian = OrderedDict()
    eigs = OrderedDict()
    for idx, name in enumerate(names):
        laplacian[name] = OrderedDict((subj, nx.normalized_laplacian_matrix(graphs[name][subj]))
                                      for subj in graphs[name])
        eigs[name] = OrderedDict((subj, np.sort(np.linalg.eigvals(laplacian[name][subj].A))[::-1])
                                 for subj in graphs[name])
    write(outdir, 'eigs', eigs, atlas)

    # Betweenness Centrality
    print "Computing: Betweenness Centrality Sequence"
    centrality = OrderedDict()
    nxbc = nx.algorithms.betweenness_centrality  # For PEP8 line length...
    for idx, name in enumerate(names):
        temp_bc = OrderedDict((subj, nxbc(graphs[name][subj]).values())
                              for subj in graphs[name])
        centrality[name] = density(temp_bc)
    write(outdir, 'centrality', centrality, atlas)
Exemplo n.º 35
0
 def create_ba_graph(self):
     self.ba_graph = nx.barabasi_albert_graph(self.num_vertices, 1)
     self.ba_normL = nx.normalized_laplacian_matrix(self.ba_graph)
Exemplo n.º 36
0
        weight_total += G_u[edge[0]][edge[1]]['weight']

    # Adds the number fo nodes, edges, total weigths and max eigenvalue to their respective lists
    node_count = G_u.number_of_nodes()  # Gets number of nodes in the egonet

    # TODO:
    #   Only computes the eigenvalue when the egonet has more than one node
    #   If the ego net only has one node it dose not have an edge and thus no Adjacency matrix to use to compute the eigenvalue
    #   selects the max eigenvalue if edges exstis, otherwise sets the eigenvalue to 0
    #   I am not sure if this method is correct or not
    if node_count > 1:
        node_counts.append(node_count)
        edge_counts.append(G_u.number_of_edges())
        weight_totals.append(weight_total)

        L = nx.normalized_laplacian_matrix(
            G_u)  # Computes the adjacency matrix
        e = np.linalg.eigvals(
            L.A)  # Gets the egienvalues of the adjacency matrix

        # Gets the max eginvalue of the Adjaceny matrix
        # Adds it to the eigenvalues list
        eigenvalues.append(sum(e))

#####################################################################################
# Plot on a log-log scale:                                                          #
#   (i) E_u versus V_u for every egonet G_u                                         #
#   (ii) the least squares fit on the median values for each bucket of points after #
#   applying logairthmic binning on the x-axis                                      #
#   (iii) two lines of slope 1 and 2, the correspond to the stars and cliques       #
#   respectivly.                                                                    #
#   Additionally, plot (on a separate figure) the value of lamda_wu versus W_u for  #
Exemplo n.º 37
0
def eigenvalue_n(g, n=0):
    L = nx.normalized_laplacian_matrix(g)
    e = np.linalg.eigvals(L.A)
    return np.real(e[n])
Exemplo n.º 38
0
    def perform_split(
        self,
        character_matrix: pd.DataFrame,
        samples: List[int],
        weights: Optional[Dict[int, Dict[int, float]]] = None,
        missing_state_indicator: int = -1,
    ) -> Tuple[List[str], List[str]]:
        """Partitions the samples using the spectral algorithm.

        First, a similarity graph is generated with samples as nodes such that
        edges between a pair of nodes is some provided function on the number
        of character/state mutations shared. Then, Fiedler's algorithm is used
        to generate a partition on this graph that minimizes a modified
        normalized cut: weight of edges across cut/ min(weight of edges within
        each side of cut). It does this efficiently by first calculating the
        2nd eigenvector of the normalized Laplacian of the similarity matrix.
        Then, it orders the nodes in a graph by the eigenvector values and finds
        an index such that partitioning the ordered nodes on that index
        minimizes the normalized cut ratio. As the optimal partition can be
        determined using the 2nd eigenvector, this greatly reduces the space of
        cuts needed to be explored.

        Args:
            character_matrix: Character matrix
            samples: A list of samples to partition
            weights: Weighting of each (character, state) pair. Typically a
                transformation of the priors.
            missing_state_indicator: Character representing missing data.

        Returns:
            A tuple of lists, representing the left and right partition groups
        """
        G = graph_utilities.construct_similarity_graph(
            character_matrix,
            missing_state_indicator,
            samples,
            similarity_function=self.similarity_function,
            threshold=self.threshold,
            weights=weights,
        )

        L = nx.normalized_laplacian_matrix(G).todense()
        diag = sp.linalg.eig(L)
        second_eigenvector = diag[1][:, 1]
        nodes_to_eigenvector = {}
        vertices = list(G.nodes())
        for i in range(len(vertices)):
            nodes_to_eigenvector[vertices[i]] = second_eigenvector[i]
        vertices.sort(key=lambda v: nodes_to_eigenvector[v])
        total_weight = 2 * sum([G[e[0]][e[1]]["weight"] for e in G.edges()])
        # If the similarity graph is empty and there are no meaningful splits,
        # return a polytomy over the remaining samples
        if total_weight == 0:
            return samples, []
        cut = set()
        numerator = 0
        denominator = 0
        prev_numerator = -1
        best_score = np.inf
        best_index = 0
        for i in range(len(vertices) - 1):
            v = vertices[i]
            cut.add(v)
            cut_edges = 0
            neighbor_weight = 0
            for w in G.neighbors(v):
                neighbor_weight += G[v][w]["weight"]
                if w in cut:
                    cut_edges += G[v][w]["weight"]
            denominator += neighbor_weight
            if i > 0:
                prev_numerator = numerator
            numerator += neighbor_weight - 2 * cut_edges
            # Avoids naively taking the first zero-weight cut. If more samples
            # can be added without changing the cut weight, those samples do not
            # share any similarity with the other side of the partition.
            if numerator != 0 and prev_numerator == 0:
                best_index = i - 1
                break
            if min(denominator, total_weight - denominator) != 0:
                if (numerator / min(denominator, total_weight - denominator) <
                        best_score):
                    best_score = numerator / min(denominator,
                                                 total_weight - denominator)
                    best_index = i
            else:
                best_score = 0
                best_index = i

        improved_left_set = graph_utilities.spectral_improve_cut(
            G, vertices[:best_index + 1])

        improved_right_set = []
        for i in samples:
            if i not in improved_left_set:
                improved_right_set.append(i)

        return improved_left_set, improved_right_set
Exemplo n.º 39
0
def gen_laplacian(G, _MODE_):
    if _MODE_ == 1:
        return nx.laplacian_matrix(G)
        else:
            return nx.normalized_laplacian_matrix(G)
Exemplo n.º 40
0
'''If original graph is BA graph'''
G_1 = nx.generators.random_graphs.barabasi_albert_graph(N,5,100)
G_2 = nx.generators.random_graphs.barabasi_albert_graph(N,5,100)

'''If original graph is small world graph'''
#G_1 = nx.generators.random_graphs.watts_strogatz_graph(N,4,0.3,100)
#G_2 = nx.generators.random_graphs.watts_strogatz_graph(N,4,0.3,100)

laplacian_mat_original = nx.laplacian_matrix(G_1)
laplacian_mat_original = laplacian_mat_original.todense()

adj_original = nx.adjacency_matrix(G_1)
adj_original = adj_original.todense()

norm_laplacian_original = nx.normalized_laplacian_matrix(G_1)
norm_laplacian_original = norm_laplacian_original.todense()

deg,cnt = deg_count(G_1)

for batch in range(num_batch):

    for i in range(edges_per_batch):
        G1 = preferential_add(G_1)
        G2 = small_world_add(G_2)
        #if i%200 == 0 and i!=0:
         #   print('%d edges added'%i)
    edges_added = (batch+1)*edges_per_batch
    print('%d edges added'%edges_added)
    print('{} edges added'.format(edges_added),file=output)
    laplacian_mat_1 = nx.laplacian_matrix(G_1)
Exemplo n.º 41
0
import operator
from sklearn.cluster import KMeans
from matplotlib.pyplot import figure

#### Extracting Data from gml File

dolphins_data = nx.read_gml("../data/dolphins/dolphins.gml")

#### Nodes

nodes = np.array(dolphins_data.node)

### Normalized Laplacian Matrix

normalized_laplacian = nx.normalized_laplacian_matrix(dolphins_data,
                                                      nodelist=None,
                                                      weight='weight')
normalized_laplacian = np.array(normalized_laplacian.todense())
#print(normalized_laplacian)

#### Eigen Value and eigen Vector
eigen_value, eigen_vector = np.linalg.eigh(normalized_laplacian)
eigen_vector = eigen_vector.T

#### Eigen Value Sorting
dict = {}
sorted_dict = {}
for i in range(len(eigen_value)):
    dict[eigen_value[i]] = eigen_vector[i]

sorted_dict = sorted(dict.items(), key=operator.itemgetter(0))
Exemplo n.º 42
0
def spectral_analysis(G, k=None, normalize=True):
    """Given an input graph (G), number of clusters (k), and whether the graph
    Laplacian is to be normalized (True) or not (False) runs spectral clustering
    on the graph Laplacian using hierarchial method. Clusters are returned as a list of sets,
    where the contents of the first set are the nodes that belong to "cluster 1"

    Returns Partitions (list of sets of ints)
    """
    EIGEN_GAP = 0.1

    if normalize:
        # get_mat = lambda G : nx.normalized_laplacian_matrix(G).todense()
        get_mat = lambda G: nx.normalized_laplacian_matrix(G).asfptype()
    else:
        # get_mat = lambda G : nx.laplacian_matrix(G).todense()
        get_mat = lambda G: nx.laplacian_matrix(G).asfptype()

    partitions = [G]
    while True:
        second_least_eigenvalues = []

        min_partition_eigenvalue = None
        best_partition = None
        partition_eigenvector = None

        for i, partition in enumerate(partitions):
            if len(partition.nodes) > 1:
                mat = get_mat(partition)

                # in the case of having 2 nodes, the 2nd least eigenvalue is the largest eigenvalue
                if len(partition.nodes) == 2:
                    U, s, _ = svds(mat,
                                   k=1,
                                   which='LM',
                                   return_singular_vectors="u")
                    cur_eigenvector = U[:, 0]
                    partition_eigenvalue = s[0]

                # else we can just use the smallest two eigenvalues
                else:
                    U, s, _ = svds(mat,
                                   k=2,
                                   which='SM',
                                   return_singular_vectors="u")
                    cur_eigenvector = U[:, 1]
                    partition_eigenvalue = s[1]

                # _, s, _ = np.linalg.svd(get_mat(partition))
                if min_partition_eigenvalue is None or partition_eigenvalue < min_partition_eigenvalue:
                    best_partition = i
                    partition_eigenvector = cur_eigenvector
                    min_partition_eigenvalue = partition_eigenvalue

        _plot_eigenvalues(s,
                          "eigen/eigenvalues_{}.png".format(len(partitions)))
        _plot_eigenvector(partition_eigenvector,
                          "eigen/eigenvector_{}.png".format(len(partitions)))

        if k is None:
            smallest_eigenvalues = np.array(s[::-1][:10])
            eigen_steps = [
                (smallest_eigenvalues[i] - smallest_eigenvalues[i - 1])
                for i in range(1, len(smallest_eigenvalues))
            ]
            _plot_eigenvalues(smallest_eigenvalues, "smallest_eigenvalues.png")
            _plot_eigenvalues(eigen_steps, "eigen_step.png")

            for i, eigen_step in enumerate(eigen_steps):
                if eigen_step > EIGEN_GAP:
                    k = i + 1
            if k is None:
                k = 1
            print("Partitioning into {} clusters".format(k))

        if len(partitions) >= k:
            break

        new_partitions = _partition_graph(partitions[best_partition],
                                          partition_eigenvector)
        del partitions[best_partition]

        if len(partitions + new_partitions) > k:
            new_partitions = [
                nx.compose(new_partitions[0], new_partitions[1]),
                new_partitions[2]
            ]
        partitions += new_partitions
    print("Completed partitioning w/ {} partitions".format(k))
    # return partitions

    partitions = [set(partition.nodes()) for partition in partitions]
    return partitions
Exemplo n.º 43
0
    def __init__(self,
                 G,
                 n=2000,
                 portion=0.02,
                 vectors=[13],
                 spectrum_disp=False,
                 cut_disp=False,
                 vectors_disp=False,
                 k_means_disp=False):
        iters = []
        spectrum = []
        accs = []
        number_of_edges = []
        n = G.number_of_nodes()

        laplacian_matrix = nx.normalized_laplacian_matrix(G)
        vals, vecs = sparse.linalg.eigs(laplacian_matrix.asfptype(),
                                        k=int(portion * (G.n_1 + G.n_2)),
                                        which='SM')
        ground_labels = nx.get_node_attributes(G, 'ground_label')
        optimal_val = 2 * G.b / (G.a + G.b)
        step = 2 * (G.a**2 + G.b**2) * np.log(n) / (G.a + G.b) / n

        if len(vectors) <= 0:
            vec_idxs = [
                i for i in range(int(n * portion)) if vals[i] > optimal_val -
                2 * step and vals[i] < optimal_val + 3 * step
            ]
        else:
            vec_idxs = vectors

        if spectrum_disp:
            sns.set()
            plt.rcParams['figure.figsize'] = [14, 7]

            plt.scatter(vals, [1 for i in range(len(vals))],
                        marker='o',
                        facecolors='none',
                        edgecolors='b')
            plt.axvline(x=optimal_val, linewidth=2, color='black')
            plt.xlabel(r"spectrum")
            plt.ylabel(r"iterations")
            plt.show()

        if cut_disp:
            for i in vec_idxs:
                vector = vecs[:, i]
                vector = vector.astype('float64')
                labels_pred_spectral = checkSign(vector)
                accuracy = max(
                    accuracy_score(labels_pred_spectral, G.ground_labels),
                    1 - accuracy_score(labels_pred_spectral, G.ground_labels))
                accs += [accuracy]
                labels_dict = dict(zip(list(G.nodes), labels_pred_spectral))
                nx.set_node_attributes(G, labels_dict, "label")

                sns.distplot(vector, kde=False, bins=50)
                plt.show()

                sns.distplot([
                    G.nodes[node]["coordinate"]
                    for node in G.nodes if G.nodes[node]['label'] == 0
                ],
                             label="Cluster 0",
                             kde=False,
                             bins=50)
                sns.distplot([
                    G.nodes[node]["coordinate"]
                    for node in G.nodes if G.nodes[node]['label'] == 1
                ],
                             label="Cluster 1",
                             kde=False,
                             bins=50)
                plt.title("i = " + str(i) + ", eigenvalue = " + str(vals[i]) +
                          ", accuracy = " + str(accuracy))
                plt.show()

                coordinates0 = [
                    G.nodes[node]["coordinate"] for node in G
                    if G.nodes[node]['ground_label'] == 0
                ]
                coordinates1 = [
                    G.nodes[node]["coordinate"] for node in G
                    if G.nodes[node]['ground_label'] == 1
                ]

                plt.scatter(coordinates0, vector[:int(n / 2)])
                plt.scatter(coordinates1, vector[int(n / 2):])
                plt.title("i = " + str(i) + ", eigenvalue = " + str(vals[i]) +
                          ", accuracy = " + str(accuracy))
                plt.show()

                dist_dict = nx.shortest_path_length(G, source=0)
                dist = [dist_dict[x] for x in sorted(dist_dict)]
                plt.scatter(dist[1:int(n / 2)], vector[1:int(n / 2)])
                plt.scatter(dist[int(n / 2):], vector[int(n / 2):])
                plt.title("i = " + str(i) + ", eigenvalue = " + str(vals[i]) +
                          ", accuracy = " + str(accuracy))
                plt.show()

        if vectors_disp:
            accs = []
            c_norms = []
            spectra = []

            for i in vec_idxs:
                vector = vecs[:, i]
                vector = vector.astype('float64')
                km = k_means([vector], n)
                labels_pred = dict(zip(list(G.nodes), km['labels']))
                accuracy = G.GetAccuracy(labels_pred)
                accs += [accuracy]
                spectra += [vals[i]]
                c_norms += [np.linalg.norm(sum(km['centers']))]

            sns.set()
            plt.plot(vec_idxs, accs, marker='o', label='Iteration ' + str(i))
            plt.xlabel("Order of eigenvector")
            plt.ylabel("Accuracy")
            plt.show()
            # plt.plot(vec_idxs, c_norms, marker='o', label = 'Iteration ' + str(i))
            # plt.show()
            plt.plot(vec_idxs, spectra, marker='o')
            plt.axhline(y=optimal_val, linewidth=2, color='black')
            plt.show()

        if k_means_disp:
            k = len(vec_idxs)
            accs = []
            c_norms = []
            inerts = []
            min_dists = []
            sum_dists = []
            balances = []

            for j in range(1, k + 1):
                # print([vec_idxs[i] for i in range(j)])
                # km_vectors = [vecs[:,vectors[1]], vecs[:,vectors[j]]]
                km_vectors = [vecs[:, vec_idxs[i]] for i in range(j)]
                km = k_means(km_vectors, n)
                accuracy = max(
                    accuracy_score(km['labels'], G.ground_labels),
                    1 - accuracy_score(km['labels'], G.ground_labels))
                accs += [accuracy]
                c_norms += [np.linalg.norm(sum(km['centers']), ord=2)]
                inerts += [km['inertia'] / j]
                min_dists += [min(km['dists'])]
                sum_dists += [sum(km['dists'])]
                balances += [abs(sum(km['labels']) - n / 2)]

            plt.plot(vec_idxs[:k], accs, marker='o')
            plt.show()
            plt.plot(vec_idxs[:k], c_norms, marker='o', color='red')
            plt.show()
            # plt.plot(vec_idxs[:k], min_dists, marker='o', color = 'red')
            # plt.show()
            # plt.plot(vec_idxs[:k], sum_dists, marker='o', color = 'green')
            # plt.show()
            plt.plot(vec_idxs[:k], balances, marker='o', color='orange')
            plt.show()

        # k_means_vectors = [vecs[:,i] for i in vec_idxs]
        # labels_k_means = k_means(k_means_vectors, n)['labels']
        # print(k_means(k_means_vectors, n)['labels'][:100])
        # accuracy = max(accuracy_score(labels_k_means, G.ground_labels), 1 - accuracy_score(labels_k_means, G.ground_labels))
        # print("Total accuracy after k-means = %.3f" % accuracy)

        self.n_edges = number_of_edges
        self.spectrum = vals
        self.accs = accs
        self.accuracy = accuracy
Exemplo n.º 44
0
def erdos_renyi_lap(
    G
):  #ER build graphs based on individual proteins, so number of nodes and probability of those edges being created

    lap_test = []
    lap_test1 = []
    lap_test2 = []
    N = nx.number_of_nodes(G)
    E = nx.number_of_edges(G)
    prob_edges = ((
        (N * (N - 1)) / 2) / E) / 100  #divide by 100 float not percentage
    #print prob_edges
    '''start_time = time.time()
	for i in range(500):
		er_graph = nx.gnp_random_graph(N,prob_edges,seed=None) #number of nodes, probability for edge creation
		lap = nx.normalized_laplacian_matrix(er_graph, weight='weight')
		lap = np.eye(lap.shape[0])-lap 
		eigenvalues,eigenvectors = scipy.sparse.linalg.eigsh(lap,k=2)
		lap_sum = (eigenvalues[1]-eigenvalues[0]) 
		lap_test1.append(lap_sum)
	print lap_test1
	print ("--- %s seconds ---" % (time.time() - start_time))
	start_time = time.time()
	for i in range(500):
		er_graph = nx.fast_gnp_random_graph(N,prob_edges,seed=None) #number of nodes, probability for edge creation
		lap = nx.normalized_laplacian_matrix(er_graph, weight='weight')
		lap = np.eye(lap.shape[0])-lap 
		eigenvalues,eigenvectors = scipy.sparse.linalg.eigsh(lap,k=2)
		lap_sum = (eigenvalues[1]-eigenvalues[0]) 
		lap_test2.append(lap_sum)
	print lap_test2
	print ("--- %s seconds ---" % (time.time() - start_time))'''
    start_time = time.time()
    for i in range(500):
        er_graph = nx.erdos_renyi_graph(
            N, prob_edges,
            seed=None)  #number of nodes, probability for edge creation
        lap = nx.normalized_laplacian_matrix(er_graph, weight='weight')
        lap = np.eye(lap.shape[0]) - lap
        eigenvalues, eigenvectors = scipy.sparse.linalg.eigsh(lap, k=2)
        lap_sum = (eigenvalues[1] - eigenvalues[0])
        lap_test.append(lap_sum)
    print lap_test
    print("--- %s seconds ---" % (time.time() - start_time))
    start_time = time.time()
    for i in range(500):
        er_graph = nx.erdos_renyi_graph(
            N, prob_edges,
            seed=None)  #number of nodes, probability for edge creation
        lap = nx.normalized_laplacian_matrix(er_graph, weight='weight')
        lap = np.eye(lap.shape[0]) - lap
        eigenvalues, eigenvectors = np.linalg.eigh(lap)
        lap_sum = (eigenvalues[1] - eigenvalues[0])
        lap_test1.append(eigenvalues)
    print lap_test1
    print("--- %s seconds ---" % (time.time() - start_time))
    # using IGRAPH
    start_time = time.time()
    for i in range(500):
        er_graph = Graph.Erdos_Renyi(
            n=N, p=prob_edges, directed=True,
            loops=False)  #number of nodes, probability for edge creation
        lap = er_graph.laplacian(normalized=True)
        e = np.linalg.eigvals(lap)
        lap_test2.append(e)
    print lap_test2
    print("--- %s seconds ---" % (time.time() - start_time))
    start_time = time.time()

    for i in range(500):
        er_graph = nx.gnp_random_graph(
            N, prob_edges,
            seed=None)  #number of nodes, probability for edge creation
        '''lap = nx.normalized_laplacian_matrix(er_graph, weight='weight')
		lap = np.eye(lap.shape[0])-lap 
		eigenvalues,eigenvectors = scipy.sparse.linalg.eigsh(lap,k=2)
		lap_sum = (eigenvalues[1]-eigenvalues[0]) 
		lap_test1.append(lap_sum)'''
    print er_graph
    print("--- %s seconds ---" % (time.time() - start_time))
    start_time = time.time()
    for i in range(500):
        er_graph = nx.fast_gnp_random_graph(
            N, prob_edges,
            seed=None)  #number of nodes, probability for edge creation
        '''lap = nx.normalized_laplacian_matrix(er_graph, weight='weight')
		lap = np.eye(lap.shape[0])-lap 
		eigenvalues,eigenvectors = scipy.sparse.linalg.eigsh(lap,k=2)
		lap_sum = (eigenvalues[1]-eigenvalues[0]) 
		lap_test2.append(lap_sum)'''
    print er_graph
    print("--- %s seconds ---" % (time.time() - start_time))
    start_time = time.time()
    for i in range(500):
        er_graph = nx.erdos_renyi_graph(
            N, prob_edges,
            seed=None)  #number of nodes, probability for edge creation
        '''lap = nx.normalized_laplacian_matrix(er_graph, weight='weight')
		lap = np.eye(lap.shape[0])-lap 
		eigenvalues,eigenvectors = scipy.sparse.linalg.eigsh(lap,k=2)
		lap_sum = (eigenvalues[1]-eigenvalues[0]) 
		lap_test.append(lap_sum)'''
    print er_graph
    print("--- %s seconds ---" % (time.time() - start_time))
Exemplo n.º 45
0
degree = list()
for i in range(len(non_isolated)):
    degree.append(assembly_graph.degree[non_isolated[i]])
assembly_graph_degree = np.diag(degree)
assembly_graph_adjacent = nx.adjacency_matrix(assembly_graph, nodelist=non_isolated).A
degree = list()
for i in range(len(non_isolated)):
    degree.append(PE_graph.degree[non_isolated[i]])
PE_graph_degree = np.diag(degree)
PE_graph_adjacent = nx.adjacency_matrix(PE_graph, nodelist=non_isolated).A

F = np.zeros([len(non_isolated), n_bins])
for i in range(len(non_isolated)):
    if non_isolated[i] in contigs_bin: F[i, contigs_bin[non_isolated[i]]] = 1
F_l = F[:binned_cnt,]
assembly_graph_L = nx.normalized_laplacian_matrix(assembly_graph, nodelist=non_isolated).A
PE_graph_L = nx.normalized_laplacian_matrix(PE_graph, nodelist=non_isolated).A

Obj_fun = list()
alpha = np.array([0.5, 0.5], dtype=np.float64)
for i in range(max_iter):
    all_degree = alpha[0]*assembly_graph_degree + alpha[1]*PE_graph_degree
    all_adjacant = alpha[0]*assembly_graph_adjacent + alpha[1]*PE_graph_adjacent
    all_trans = np.dot(np.linalg.inv(all_degree), all_adjacant)
    all_trans_uu = all_trans[binned_cnt:, binned_cnt:]
    all_trans_ul = all_trans[binned_cnt:, :binned_cnt]
    F_u = np.dot(np.dot(np.linalg.inv(np.eye(all_trans_uu.shape[0]) - all_trans_uu), all_trans_ul), F_l)
    F = np.concatenate((F_l, F_u), axis=0)
    alpha[0] = 0.5/math.sqrt(np.trace(np.dot(np.dot(F.T,assembly_graph_L),F)))
    alpha[1] = 0.5/math.sqrt(np.trace(np.dot(np.dot(F.T,PE_graph_L),F)))
    obj = math.sqrt(np.trace(np.dot(np.dot(F.T,assembly_graph_L),F)))
Exemplo n.º 46
0
def compute_metrics(fs, outdir, atlas, verb=False):
    """
    Given a set of files and a directory to put things, loads graphs and
    performs set of analyses on them, storing derivatives in a pickle format
    in the desired output location.

    Required parameters:
        fs:
            - Dictionary of lists of files in each dataset
        outdir:
            - Path to derivative save location
        atlas:
            - Name of atlas of interest as it appears in the directory titles
    Optional parameters:
        verb:
            - Toggles verbose output statements
    """

    graphs = loadGraphs(fs, verb=verb)

    #  Number of non-zero edges (i.e. binary edge count)
    print("Computing: NNZ")
    nnz = OrderedDict((subj, len(nx.edges(graphs[subj]))) for subj in graphs)
    write(outdir, 'number_non_zeros', nnz, atlas)
    print("Sample Mean: %.2f" % np.mean(nnz.values()))

    #  Degree sequence
    print("Computing: Degree Seuqence")
    temp_deg = OrderedDict(
        (subj, np.array(nx.degree(graphs[subj]).values())) for subj in graphs)
    deg = density(temp_deg)
    write(outdir, 'degree_distribution', deg, atlas)
    show_means(temp_deg)

    #  Edge Weights
    print("Computing: Edge Weight Sequence")
    temp_ew = OrderedDict((s, [
        graphs[s].get_edge_data(e[0], e[1])['weight']
        for e in graphs[s].edges()
    ]) for s in graphs)
    ew = density(temp_ew)
    write(outdir, 'edge_weight_distribution', ew, atlas)
    show_means(temp_ew)

    #   Clustering Coefficients
    print("Computing: Clustering Coefficient Sequence")
    temp_cc = OrderedDict(
        (subj, nx.clustering(graphs[subj]).values()) for subj in graphs)
    ccoefs = density(temp_cc)
    write(outdir, 'clustering_coefficients', ccoefs, atlas)
    show_means(temp_cc)

    # Scan Statistic-1
    print("Computing: Max Local Statistic Sequence")
    temp_ss1 = scan_statistic(graphs, 1)
    ss1 = density(temp_ss1)
    write(outdir, 'scan_statistic_1', ss1, atlas)
    show_means(temp_ss1)

    # Eigen Values
    print("Computing: Eigen Value Sequence")
    laplac = OrderedDict((subj, nx.normalized_laplacian_matrix(graphs[subj]))
                         for subj in graphs)
    eigs = OrderedDict((subj, np.sort(np.linalg.eigvals(laplac[subj].A))[::-1])
                       for subj in graphs)
    write(outdir, 'eigen_sequence', eigs, atlas)
    print("Subject Maxes: " +
          ", ".join(["%.2f" % np.max(eigs[key]) for key in eigs.keys()]))

    scree = OrderedDict(
        (subj, np.cumsum(eigs[subj]) / np.sum(eigs[subj])) for subj in eigs)
    write(outdir, 'scree_eigen', scree, atlas)

    # Betweenness Centrality
    print("Computing: Betweenness Centrality Sequence")
    nxbc = nx.algorithms.betweenness_centrality  # For PEP8 line length...
    temp_bc = OrderedDict(
        (subj, nxbc(graphs[subj]).values()) for subj in graphs)
    centrality = density(temp_bc)
    write(outdir, 'betweenness_centrality', centrality, atlas)
    show_means(temp_bc)

    outf = outdir + '/' + atlas + '_summary.png'
Exemplo n.º 47
0
edges=np.arange(0,max(nx.degree(G).values())+2)
axD.hist(sorted(nx.degree(G).values()), edges)
axD.set_xlabel('Node degree')
axD.set_ylabel('Frequency')
axD.title.set_text('Node degree distribution of A')


A=nx.to_numpy_matrix(G)
axA.plot(np.sort(np.linalg.eigvals( A ) ) )
axA.set_xlim(0,np.shape(A)[0])
axA.set_xlabel('eigenvector index')
axA.set_ylabel('eigenvalue')
axA.title.set_text('Eigenvalue spectrum of A')


L=nx.normalized_laplacian_matrix(G)
axL.plot(np.sort(np.linalg.eigvals( L.todense() ) ) )
axL.set_xlim(0,np.shape(L)[0])
axL.set_xlabel('eigenvector')
axL.set_ylabel('eigenvalue')
axL.title.set_text('Eigenvalue spectrum of L_n')
    #plt.show()

C=nx.degree_centrality(G)
axC.plot(np.sort(C.values()) )
axC.set_xlim(0,np.shape(L)[0])
axC.set_xlabel('node index')
axC.set_ylabel('centrality')
axC.title.set_text('Degree centrality spectrum')

plt.tight_layout()
Exemplo n.º 48
0
 def get_lap_torch(self):
     return torch.from_numpy(
         nx.normalized_laplacian_matrix(
             self.nx_graph).toarray()).unsqueeze(0)
Exemplo n.º 49
0
 def _layout_laplacian(self, graph):
     nlm = nx.normalized_laplacian_matrix(graph)
     eigvals, eigvects = eigs(nlm, k=self.n_eigenvectors, which='SR')
     eigvals, eigvects = np.real(eigvals), np.real(eigvects)
     return scale(eigvects)
Exemplo n.º 50
0
def min_eigenvalue(g):
    L = nx.normalized_laplacian_matrix(g)
    e = np.linalg.eigvals(L.A)
    return np.real(min(e))
Exemplo n.º 51
0
	def set_graph(self, _G):
		self.G = _G
		L = networkx.normalized_laplacian_matrix(self.G)
		L = L.todense()
		self.U, self.lamb_str = compute_eigenvectors_and_eigenvalues(L)
Exemplo n.º 52
0
W1 = img_to_graph(f, return_as=np.ndarray) # gradient weighted
W1 = np.exp(-W1 / W1.std())


W2 = im2graph(f) # spatial location and intensity-based weighted

"""## 3. Calculate Laplacian"""

## 1. Using csgraph
Lapl_1 =  csgraph.laplacian(W1, normed=True) 
print(Lapl_1.shape)

## 2. Using networkx
G = nx.from_numpy_matrix(W1)
Lapl_2 = nx.normalized_laplacian_matrix(G)
Lapl_2b = nx.laplacian_matrix(G)
print(Lapl_2.shape)


## 3. Doing the math --> https://en.wikipedia.org/wiki/Laplacian_matrix
D = (np.diag(np.power(W1.sum(axis=1), -0.5)))
k = re_size*re_size
Lapl_3 = np.ones(k) - np.matmul(np.matmul(D, W1), D) 
Lapl_4 = D - W1
print(Lapl_3.shape)



print(np.sum(Lapl_1-Lapl_2)) # both works great!
print(np.sum(Lapl_1-Lapl_3))
Exemplo n.º 53
0
def create_celegans(location):
    # load data
    data = scipy.io.loadmat(location + 'celegans.mat')
    A = data['A_combined']
    G = nx.DiGraph(A)

    #set labels
    labels = {}
    for i in G:
        G.node[i]['labels'] = data['Labels'][i][0][0]
        labels[i] = data['Labels'][i][0][0]

    #set neuron types
    neuron_tpe = np.loadtxt(location + 'neuron_type.txt', dtype=str)
    neuron_type = {}
    colors = []
    for i in range(len(neuron_tpe)):
        for il, l in enumerate(labels):
            if neuron_tpe[i][1] == labels[il]:
                if neuron_tpe[i][2] == 'M':
                    G.node[il]['type'] = 0
                    neuron_type[il] = 0
                    colors.append('C0')

                elif neuron_tpe[i][2] == 'I':
                    G.node[il]['type'] = 1
                    neuron_type[il] = 1
                    colors.append('C1')

                elif neuron_tpe[i][2] == 'S':
                    G.node[il]['type'] = 2
                    neuron_type[il] = 2
                    colors.append('C2')

    #set positions of nodes, following SI of Varshney et al 2011
    A = np.array(nx.to_numpy_matrix(G))

    W = 0.5 * (A + A.T)

    D = np.diag(np.array(W.sum(1)).flatten())
    b = np.array((W * np.sign(A - A.T)).sum(0)).flatten()

    L = D - W

    L = (nx.laplacian_matrix(nx.Graph(W))).toarray()

    z = np.array(np.linalg.pinv(L).dot(b)).flatten()

    #z = sc.sparse.linalg.spsolve(L,b)

    L_norm = nx.normalized_laplacian_matrix(nx.Graph(W))

    vs = sc.sparse.linalg.eigs(L_norm, which='SM', k=3)[1][:, 1:]
    D_sqrt_inv = np.diag(1 / np.sqrt(np.diag(D)))

    v2 = D_sqrt_inv.dot(vs[:, 0])
    v3 = D_sqrt_inv.dot(vs[:, 1])
    pos = {}
    for i in G:
        #pos[i] = (-np.real(v2[i]), np.real(v3[i]))
        pos[i] = (-np.real(v2[i]), -z[i])

    return G, pos, labels, neuron_type, colors
Exemplo n.º 54
0
from data_process import create_graph, yield_data_time
from utils import MSE, normalize

import pickle as pk

batch_size = 768
seq_len = 10
nodes_nums = 450
encode_dim = 100

load_name = 'result/rnn_new/encode2_seqlen/finfull_e47'


graph_data = np.load('data/new/result_fin.npy')
G = create_graph(graph_data[:nodes_nums, :nodes_nums])
adj = np.array(nx.normalized_laplacian_matrix(G).todense())
data = np.load('data/new/data2.npy').T[:, :nodes_nums]
data = normalize(data)


device = torch.device("cpu")
if torch.cuda.is_available():
    device = torch.device("cuda")
    
    
with open(load_name+'.model', 'rb') as f:
    trainer = torch.load(f).to(device)
    
encoder = trainer.encoder
decoder = trainer.decoder
Exemplo n.º 55
0
def gen_nor_laplacian(G):
	lap_nor_matrix = nx.normalized_laplacian_matrix(G)
	return lap_nor_matrix	
#A_full = _A_obs.todense()
A_matrix = np.loadtxt('plots/three_cluster_line_training.txt')
A_full = np.loadtxt('plots/three_cluster_line.txt')
N = A_full.shape[0]

valid_edges = np.loadtxt('plots/three_cluster_line_val_edges.txt').tolist()
valid_nonEdges = np.loadtxt(
    'plots/three_cluster_line_val_non_edges.txt').tolist()
valid = valid_edges + valid_nonEdges
test_edges = np.loadtxt('plots/three_cluster_line_test_edges.txt').tolist()
test_nonEdges = np.loadtxt(
    'plots/three_cluster_line_test_non_edges.txt').tolist()
test = test_edges + test_nonEdges
G = nx.from_numpy_matrix(A_full)

L = nx.normalized_laplacian_matrix(G).todense()
eig_vals, eig_vecs = linalg.eig(L)
eig_list = zip(eig_vals, np.transpose(eig_vecs))
eig_list.sort(key=lambda x: x[0])

u = np.asarray([u_i.real for u_i in eig_list[-2][1]])[0][0]

truth = utils.compute_graph_statistics(np.asarray(A_full))
f = open('plots/truth.txt', "w")
f.write(str(truth))
f.close()

truth_spec = utils.specGap(A_full)

#train_spec = utils.specGap(A_matrix)
Exemplo n.º 57
0
def compute_metrics(fs, outdir, atlas, verb=False):
    """
    Given a set of files and a directory to put things, loads graphs and
    performs set of analyses on them, storing derivatives in a pickle format
    in the desired output location.

    Required parameters:
        fs:
            - Dictionary of lists of files in each dataset
        outdir:
            - Path to derivative save location
        atlas:
            - Name of atlas of interest as it appears in the directory titles
    Optional parameters:
        verb:
            - Toggles verbose output statements
    """

    graphs = loadGraphs(fs, verb=verb)
    nodes = nx.number_of_nodes(graphs.values()[0])

    #  Number of non-zero edges (i.e. binary edge count)
    print("Computing: NNZ")
    nnz = OrderedDict((subj, len(nx.edges(graphs[subj]))) for subj in graphs)
    write(outdir, 'number_non_zeros', nnz, atlas)
    print("Sample Mean: %.2f" % np.mean(nnz.values()))

    #  Degree sequence
    print("Computing: Degree Sequence")
    total_deg = OrderedDict((subj, np.array(nx.degree(graphs[subj]).values()))
                            for subj in graphs)
    ipso_deg = OrderedDict()
    contra_deg = OrderedDict()
    for subj in graphs:  # TODO GK: remove forloop and use comprehension maybe?
        g = graphs[subj]
        N = len(g.nodes())
        LLnodes = g.nodes()[0:N/2]  # TODO GK: don't assume hemispheres
        LL = g.subgraph(LLnodes)
        LLdegs = [LL.degree()[n] for n in LLnodes]

        RRnodes = g.nodes()[N/2:N]  # TODO GK: don't assume hemispheres
        RR = g.subgraph(RRnodes)
        RRdegs = [RR.degree()[n] for n in RRnodes]

        LRnodes = g.nodes()
        ipso_list = LLdegs + RRdegs
        degs = [g.degree()[n] for n in LRnodes]
        contra_deg[subj] = [a_i - b_i for a_i, b_i in zip(degs, ipso_list)]
        ipso_deg[subj] = ipso_list
        # import pdb; pdb.set_trace()

    deg = {'total_deg': total_deg,
           'ipso_deg': ipso_deg,
           'contra_deg': contra_deg}
    write(outdir, 'degree_distribution', deg, atlas)
    show_means(total_deg)

    #  Edge Weights
    print("Computing: Edge Weight Sequence")
    temp_ew = OrderedDict((s, [graphs[s].get_edge_data(e[0], e[1])['weight']
                           for e in graphs[s].edges()]) for s in graphs)
    ew = temp_ew
    write(outdir, 'edge_weight', ew, atlas)
    show_means(temp_ew)

    #   Clustering Coefficients
    print("Computing: Clustering Coefficient Sequence")
    temp_cc = OrderedDict((subj, nx.clustering(graphs[subj]).values())
                          for subj in graphs)
    ccoefs = temp_cc
    write(outdir, 'clustering_coefficients', ccoefs, atlas)
    show_means(temp_cc)

    # Scan Statistic-1
    print("Computing: Max Local Statistic Sequence")
    temp_ss1 = scan_statistic(graphs, 1)
    ss1 = temp_ss1
    write(outdir, 'locality_statistic', ss1, atlas)
    show_means(temp_ss1)

    # Eigen Values
    print("Computing: Eigen Value Sequence")
    laplac = OrderedDict((subj, nx.normalized_laplacian_matrix(graphs[subj]))
                         for subj in graphs)
    eigs = OrderedDict((subj, np.sort(np.linalg.eigvals(laplac[subj].A))[::-1])
                       for subj in graphs)
    write(outdir, 'eigen_sequence', eigs, atlas)
    print("Subject Maxes: " + ", ".join(["%.2f" % np.max(eigs[key])
                                         for key in eigs.keys()]))

    # Betweenness Centrality
    print("Computing: Betweenness Centrality Sequence")
    nxbc = nx.algorithms.betweenness_centrality  # For PEP8 line length...
    temp_bc = OrderedDict((subj, nxbc(graphs[subj]).values())
                          for subj in graphs)
    centrality = temp_bc
    write(outdir, 'betweenness_centrality', centrality, atlas)
    show_means(temp_bc)

    # Mean connectome
    print("Computing: Mean Connectome")
    adj = OrderedDict((subj, nx.adj_matrix(graphs[subj]).todense())
                      for subj in graphs)
    mat = np.zeros(adj.values()[0].shape)
    for subj in adj:
        mat += adj[subj]
    mat = mat/len(adj.keys())
    write(outdir, 'study_mean_connectome', mat, atlas)