def graph_fusion(adjacency_matrix_list, weights=None, method="zhou"): # Get number of matrices. number_of_views = len(adjacency_matrix_list) if number_of_views < 1: print("Empty adjacency matrix list.") raise RuntimeError # Make sure number of weights is equal to number of matrices. if method == "zhou": if weights is None: weights = (1 / number_of_views) * np.ones(number_of_views, dtype=np.float64) else: if len(weights) != number_of_views: print( "Number of adjacency matrices not equal to number of weights." ) raise RuntimeError else: weights /= npla.norm(weights, "fro") # Make sure all matrices are in csr format. adjacency_matrix_list = (spsp.csr_matrix(adjacency_matrix) for adjacency_matrix in adjacency_matrix_list) # Get natural random walk transition matrices. transition_tuple_list = [ get_natural_random_walk_matrix(adjacency_matrix) for adjacency_matrix in adjacency_matrix_list ] transition_matrix_list = [t[0] for t in transition_tuple_list] out_degree_list = [t[1] for t in transition_tuple_list] in_degree_list = [t[2] for t in transition_tuple_list] # Calculate actual weights for matrices. if method == "zhou": actual_weights = safe_convex_weight_calculation( transition_matrix_list, out_degree_list, weights) elif method == "entropy": actual_weights = entropy_view_weight_calculation( adjacency_matrix_list, transition_matrix_list, out_degree_list) else: print("Invalid view weighting method selected.") raise RuntimeError # Calculate the multiview implicit transition matrix. number_of_nodes = transition_matrix_list[0].shape[0] weight_diagonal_matrix = spsp.csr_matrix( spsp.spdiags(actual_weights[0], [0], number_of_nodes, number_of_nodes)) multiview_implicit_transition_matrix = weight_diagonal_matrix.dot( transition_matrix_list[0]) for v in range(1, number_of_views): weight_diagonal_matrix = spsp.csr_matrix( spsp.spdiags(actual_weights[v], [0], number_of_nodes, number_of_nodes)) multiview_implicit_transition_matrix += weight_diagonal_matrix.dot( transition_matrix_list[v]) return multiview_implicit_transition_matrix
def graph_fusion(adjacency_matrix_list, weights=None, method="zhou"): # Get number of matrices. number_of_views = len(adjacency_matrix_list) if number_of_views < 1: print("Empty adjacency matrix list.") raise RuntimeError # Make sure number of weights is equal to number of matrices. if method == "zhou": if weights is None: weights = (1/number_of_views) * np.ones(number_of_views, dtype=np.float64) else: if len(weights) != number_of_views: print("Number of adjacency matrices not equal to number of weights.") raise RuntimeError else: weights /= npla.norm(weights, "fro") # Make sure all matrices are in csr format. adjacency_matrix_list = (spsp.csr_matrix(adjacency_matrix) for adjacency_matrix in adjacency_matrix_list) # Get natural random walk transition matrices. transition_tuple_list = [get_natural_random_walk_matrix(adjacency_matrix) for adjacency_matrix in adjacency_matrix_list] transition_matrix_list = [t[0] for t in transition_tuple_list] out_degree_list = [t[1] for t in transition_tuple_list] in_degree_list = [t[2] for t in transition_tuple_list] # Calculate actual weights for matrices. if method == "zhou": actual_weights = safe_convex_weight_calculation(transition_matrix_list, out_degree_list, weights) elif method == "entropy": actual_weights = entropy_view_weight_calculation(adjacency_matrix_list, transition_matrix_list, out_degree_list) else: print("Invalid view weighting method selected.") raise RuntimeError # Calculate the multiview implicit transition matrix. number_of_nodes = transition_matrix_list[0].shape[0] weight_diagonal_matrix = spsp.csr_matrix(spsp.spdiags(actual_weights[0], [0], number_of_nodes, number_of_nodes)) multiview_implicit_transition_matrix = weight_diagonal_matrix.dot(transition_matrix_list[0]) for v in range(1, number_of_views): weight_diagonal_matrix = spsp.csr_matrix(spsp.spdiags(actual_weights[v], [0], number_of_nodes, number_of_nodes)) multiview_implicit_transition_matrix += weight_diagonal_matrix.dot(transition_matrix_list[v]) return multiview_implicit_transition_matrix
def get_implicit_adjacency_matrices(adjacency_matrix, rho=0.2): # Calculate random walk with restart and teleportation. rw_transition, out_degree, in_degree = get_natural_random_walk_matrix(adjacency_matrix, make_shared=False) rw_transition = rw_transition.tocoo() rw_transition_t = rw_transition.T.tocsr() rw_transition = rw_transition.tocsr() stationary_distribution = get_stationary_distribution_directed(adjacency_matrix, rho) # Calculate implicit combinatorial adjacency matrix. implicit_combinatorial_matrix, com_phi = get_implicit_combinatorial_adjacency_matrix(stationary_distribution, rw_transition, rw_transition_t) # Calculate implicit directed adjacency matrix. implicit_directed_matrix, dir_phi = get_implicit_directed_adjacency_matrix(stationary_distribution, rw_transition) return implicit_combinatorial_matrix, com_phi, implicit_directed_matrix, dir_phi
def get_implicit_adjacency_matrices(adjacency_matrix, rho=0.2): # Calculate random walk with restart and teleportation. rw_transition, out_degree, in_degree = get_natural_random_walk_matrix( adjacency_matrix, make_shared=False) rw_transition = rw_transition.tocoo() rw_transition_t = rw_transition.T.tocsr() rw_transition = rw_transition.tocsr() stationary_distribution = get_stationary_distribution_directed( adjacency_matrix, rho) # Calculate implicit combinatorial adjacency matrix. implicit_combinatorial_matrix, com_phi = get_implicit_combinatorial_adjacency_matrix( stationary_distribution, rw_transition, rw_transition_t) # Calculate implicit directed adjacency matrix. implicit_directed_matrix, dir_phi = get_implicit_directed_adjacency_matrix( stationary_distribution, rw_transition) return implicit_combinatorial_matrix, com_phi, implicit_directed_matrix, dir_phi
def graph_fusion_directed(adjacency_matrix_list, weights, fusion_type, laplacian_type): number_of_nodes = adjacency_matrix_list[0].shape[0] # Get number of views. number_of_views = len(adjacency_matrix_list) if number_of_views < 1: print("Empty adjacency matrix list.") raise RuntimeError # Make sure number of weights is equal to number of matrices. if weights is None: weights = (1/number_of_views) * np.ones(number_of_views, dtype=np.float64) else: if len(weights) != number_of_views: print("Number of adjacency matrices not equal to number of weights.") raise RuntimeError else: weights /= np.sum(weights) # Make sure all matrices are in csr format. adjacency_matrix_list = [spsp.csr_matrix(adjacency_matrix) for adjacency_matrix in adjacency_matrix_list] # Get natural random walk transition matrices. transition_tuple_list = [get_natural_random_walk_matrix(adjacency_matrix) for adjacency_matrix in adjacency_matrix_list] transition_matrix_list = [t[0] for t in transition_tuple_list] out_degree_list = [t[1] for t in transition_tuple_list] in_degree_list = [t[2] for t in transition_tuple_list] # Calculate actual weights for matrices. if fusion_type == "zhou": actual_weights = safe_convex_weight_calculation(transition_matrix_list, out_degree_list, weights) stationary_distribution_list = [get_stationary_distribution_directed(spsp.csr_matrix(adjacency_matrix), 0.15) for adjacency_matrix in adjacency_matrix_list] multiview_implicit_stationary_distribution = fuse_stationary_distributions(stationary_distribution_list, actual_weights) multiview_implicit_transition_matrix = fuse_transition_matrices(transition_matrix_list, stationary_distribution_list, actual_weights, multiview_implicit_stationary_distribution) # Calculate the multiview implicit transition matrix. if laplacian_type == "combinatorial": multiview_implicit_adjacency_matrix,\ diagonal = get_implicit_combinatorial_adjacency_matrix(multiview_implicit_stationary_distribution, multiview_implicit_transition_matrix, spsp.csr_matrix(multiview_implicit_transition_matrix.transpose())) elif laplacian_type == "directed": multiview_implicit_adjacency_matrix,\ diagonal = get_implicit_directed_adjacency_matrix(multiview_implicit_stationary_distribution, multiview_implicit_transition_matrix) else: print("Invalid laplacian type.") raise RuntimeError diagonal_matrix = spsp.spdiags(diagonal, [0], number_of_nodes, number_of_nodes) multiview_implicit_laplacian_matrix = diagonal_matrix - multiview_implicit_adjacency_matrix elif fusion_type == "addition": actual_weights = safe_convex_weight_calculation(transition_matrix_list, out_degree_list, weights) multiview_implicit_adjacency_matrix = simple_adjacency_matrix_addition(adjacency_matrix_list, actual_weights) degree = spsp.dia_matrix((multiview_implicit_adjacency_matrix.sum(axis=0), np.array([0])), shape=multiview_implicit_adjacency_matrix.shape) degree = degree.tocsr() # Calculate sparse graph Laplacian. multiview_implicit_laplacian_matrix = spsp.csr_matrix(-multiview_implicit_adjacency_matrix + degree, dtype=np.float64) elif fusion_type == "entropy": actual_weights = safe_convex_weight_calculation(transition_matrix_list, out_degree_list, weights) stationary_distribution_list = [get_stationary_distribution_directed(spsp.csr_matrix(adjacency_matrix), 0.15) for adjacency_matrix in adjacency_matrix_list] multiview_implicit_stationary_distribution = fuse_stationary_distributions(stationary_distribution_list, actual_weights) multiview_implicit_transition_matrix = fuse_transition_matrices(transition_matrix_list, stationary_distribution_list, actual_weights, multiview_implicit_stationary_distribution) degree = spsp.dia_matrix((multiview_implicit_adjacency_matrix.sum(axis=0), np.array([0])), shape=multiview_implicit_adjacency_matrix.shape) degree = degree.tocsr() # Calculate sparse graph Laplacian. multiview_implicit_laplacian_matrix = spsp.csr_matrix(-multiview_implicit_adjacency_matrix + degree, dtype=np.float64) else: print("Invalid fusion type.") raise RuntimeError multiview_implicit_adjacency_matrix = spsp.csr_matrix(multiview_implicit_adjacency_matrix) multiview_implicit_adjacency_matrix.eliminate_zeros() multiview_implicit_laplacian_matrix = spsp.csr_matrix(multiview_implicit_laplacian_matrix) multiview_implicit_laplacian_matrix.eliminate_zeros() return multiview_implicit_adjacency_matrix, multiview_implicit_laplacian_matrix
def arcte(adjacency_matrix, rho, epsilon, number_of_threads=None): """ Extracts local community features for all graph nodes based on the partitioning of node-centric similarity vectors. Inputs: - A in R^(nxn): Adjacency matrix of an undirected network represented as a SciPy Sparse COOrdinate matrix. - rho: Restart probability - epsilon: Approximation threshold Outputs: - X in R^(nxC_n): The latent space embedding represented as a SciPy Sparse COOrdinate matrix. """ adjacency_matrix = sparse.csr_matrix(adjacency_matrix) number_of_nodes = adjacency_matrix.shape[0] if number_of_threads is None: number_of_threads = get_threads_number() if number_of_threads == 1: # Calculate natural random walk transition probability matrix. rw_transition, out_degree, in_degree = get_natural_random_walk_matrix(adjacency_matrix, make_shared=False) a = adjacency_matrix.copy() a.data = np.ones_like(a.data) edge_count_vector = np.squeeze(np.asarray(a.sum(axis=0), dtype=np.int64)) iterate_nodes = np.where(edge_count_vector != 0)[0] argsort_indices = np.argsort(edge_count_vector[iterate_nodes]) iterate_nodes = iterate_nodes[argsort_indices][::-1] iterate_nodes = iterate_nodes[np.where(edge_count_vector[iterate_nodes] > 1.0)[0]] # iterate_nodes = np.where(out_degree != 0)[0] # argsort_indices = np.argsort(out_degree[iterate_nodes]) # iterate_nodes = iterate_nodes[argsort_indices][::-1] # iterate_nodes = iterate_nodes[np.where(out_degree[iterate_nodes] > 1.0)[0]] local_features = arcte_worker(iterate_nodes, rw_transition.indices, rw_transition.indptr, rw_transition.data, out_degree, in_degree, rho, epsilon) else: # Calculate natural random walk transition probability matrix. rw_transition, out_degree, in_degree = get_natural_random_walk_matrix(adjacency_matrix, make_shared=True) a = adjacency_matrix.copy() a.data = np.ones_like(a.data) edge_count_vector = np.squeeze(np.asarray(a.sum(axis=0), dtype=np.int64)) iterate_nodes = np.where(edge_count_vector != 0)[0] argsort_indices = np.argsort(edge_count_vector[iterate_nodes]) iterate_nodes = iterate_nodes[argsort_indices][::-1] iterate_nodes = iterate_nodes[np.where(edge_count_vector[iterate_nodes] > 1.0)[0]] # iterate_nodes = np.where(out_degree != 0)[0] # argsort_indices = np.argsort(out_degree[iterate_nodes]) # iterate_nodes = iterate_nodes[argsort_indices][::-1] # iterate_nodes = iterate_nodes[np.where(out_degree[iterate_nodes] > 1.0)[0]] pool = mp.Pool(number_of_threads) node_chunks = list(parallel_chunks(iterate_nodes, number_of_threads)) node_count = 0 for chunk in node_chunks: node_count += len(list(chunk)) results = list() for chunk_no in range(len(pool._pool)): pool.apply_async(arcte_worker, args=(node_chunks[chunk_no], rw_transition.indices, rw_transition.indptr, rw_transition.data, out_degree, in_degree, rho, epsilon), callback=results.append) pool.close() pool.join() # local_features = sparse.hstack(results) local_features = results[0] for additive_features in results[1:]: local_features = local_features + additive_features local_features = sparse.csr_matrix(local_features) # Form base community feature matrix. identity_matrix = sparse.csr_matrix(sparse.eye(number_of_nodes, number_of_nodes, dtype=np.float64)) adjacency_matrix_ones = adjacency_matrix adjacency_matrix_ones.data = np.ones_like(adjacency_matrix.data) base_community_features = identity_matrix + adjacency_matrix_ones # Stack horizontally matrices to form feature matrix. try: features = sparse.hstack([base_community_features, local_features]).tocsr() except ValueError as e: print("Failure with horizontal feature stacking.") features = base_community_features return features
def arcte(adjacency_matrix, rho, epsilon, number_of_threads=None): """ Extracts local community features for all graph nodes based on the partitioning of node-centric similarity vectors. Inputs: - A in R^(nxn): Adjacency matrix of an undirected network represented as a SciPy Sparse COOrdinate matrix. - rho: Restart probability - epsilon: Approximation threshold Outputs: - X in R^(nxC_n): The latent space embedding represented as a SciPy Sparse COOrdinate matrix. """ adjacency_matrix = sparse.csr_matrix(adjacency_matrix) number_of_nodes = adjacency_matrix.shape[0] if number_of_threads is None: number_of_threads = get_threads_number() if number_of_threads == 1: # Calculate natural random walk transition probability matrix. rw_transition, out_degree, in_degree = get_natural_random_walk_matrix( adjacency_matrix, make_shared=False) a = adjacency_matrix.copy() a.data = np.ones_like(a.data) edge_count_vector = np.squeeze( np.asarray(a.sum(axis=0), dtype=np.int64)) iterate_nodes = np.where(edge_count_vector != 0)[0] argsort_indices = np.argsort(edge_count_vector[iterate_nodes]) iterate_nodes = iterate_nodes[argsort_indices][::-1] iterate_nodes = iterate_nodes[np.where( edge_count_vector[iterate_nodes] > 1.0)[0]] # iterate_nodes = np.where(out_degree != 0)[0] # argsort_indices = np.argsort(out_degree[iterate_nodes]) # iterate_nodes = iterate_nodes[argsort_indices][::-1] # iterate_nodes = iterate_nodes[np.where(out_degree[iterate_nodes] > 1.0)[0]] local_features = arcte_worker(iterate_nodes, rw_transition.indices, rw_transition.indptr, rw_transition.data, out_degree, in_degree, rho, epsilon) else: # Calculate natural random walk transition probability matrix. rw_transition, out_degree, in_degree = get_natural_random_walk_matrix( adjacency_matrix, make_shared=True) a = adjacency_matrix.copy() a.data = np.ones_like(a.data) edge_count_vector = np.squeeze( np.asarray(a.sum(axis=0), dtype=np.int64)) iterate_nodes = np.where(edge_count_vector != 0)[0] argsort_indices = np.argsort(edge_count_vector[iterate_nodes]) iterate_nodes = iterate_nodes[argsort_indices][::-1] iterate_nodes = iterate_nodes[np.where( edge_count_vector[iterate_nodes] > 1.0)[0]] # iterate_nodes = np.where(out_degree != 0)[0] # argsort_indices = np.argsort(out_degree[iterate_nodes]) # iterate_nodes = iterate_nodes[argsort_indices][::-1] # iterate_nodes = iterate_nodes[np.where(out_degree[iterate_nodes] > 1.0)[0]] pool = mp.Pool(number_of_threads) node_chunks = list(parallel_chunks(iterate_nodes, number_of_threads)) node_count = 0 for chunk in node_chunks: node_count += len(list(chunk)) results = list() for chunk_no in range(len(pool._pool)): pool.apply_async(arcte_worker, args=(node_chunks[chunk_no], rw_transition.indices, rw_transition.indptr, rw_transition.data, out_degree, in_degree, rho, epsilon), callback=results.append) pool.close() pool.join() # local_features = sparse.hstack(results) local_features = results[0] for additive_features in results[1:]: local_features = local_features + additive_features local_features = sparse.csr_matrix(local_features) # Form base community feature matrix. identity_matrix = sparse.csr_matrix( sparse.eye(number_of_nodes, number_of_nodes, dtype=np.float64)) adjacency_matrix_ones = adjacency_matrix adjacency_matrix_ones.data = np.ones_like(adjacency_matrix.data) base_community_features = identity_matrix + adjacency_matrix_ones # Stack horizontally matrices to form feature matrix. try: features = sparse.hstack([base_community_features, local_features]).tocsr() except ValueError as e: print("Failure with horizontal feature stacking.") features = base_community_features return features
def graph_fusion_directed(adjacency_matrix_list, weights, fusion_type, laplacian_type): number_of_nodes = adjacency_matrix_list[0].shape[0] # Get number of views. number_of_views = len(adjacency_matrix_list) if number_of_views < 1: print("Empty adjacency matrix list.") raise RuntimeError # Make sure number of weights is equal to number of matrices. if weights is None: weights = (1 / number_of_views) * np.ones(number_of_views, dtype=np.float64) else: if len(weights) != number_of_views: print( "Number of adjacency matrices not equal to number of weights.") raise RuntimeError else: weights /= np.sum(weights) # Make sure all matrices are in csr format. adjacency_matrix_list = [ spsp.csr_matrix(adjacency_matrix) for adjacency_matrix in adjacency_matrix_list ] # Get natural random walk transition matrices. transition_tuple_list = [ get_natural_random_walk_matrix(adjacency_matrix) for adjacency_matrix in adjacency_matrix_list ] transition_matrix_list = [t[0] for t in transition_tuple_list] out_degree_list = [t[1] for t in transition_tuple_list] in_degree_list = [t[2] for t in transition_tuple_list] # Calculate actual weights for matrices. if fusion_type == "zhou": actual_weights = safe_convex_weight_calculation( transition_matrix_list, out_degree_list, weights) stationary_distribution_list = [ get_stationary_distribution_directed( spsp.csr_matrix(adjacency_matrix), 0.15) for adjacency_matrix in adjacency_matrix_list ] multiview_implicit_stationary_distribution = fuse_stationary_distributions( stationary_distribution_list, actual_weights) multiview_implicit_transition_matrix = fuse_transition_matrices( transition_matrix_list, stationary_distribution_list, actual_weights, multiview_implicit_stationary_distribution) # Calculate the multiview implicit transition matrix. if laplacian_type == "combinatorial": multiview_implicit_adjacency_matrix,\ diagonal = get_implicit_combinatorial_adjacency_matrix(multiview_implicit_stationary_distribution, multiview_implicit_transition_matrix, spsp.csr_matrix(multiview_implicit_transition_matrix.transpose())) elif laplacian_type == "directed": multiview_implicit_adjacency_matrix,\ diagonal = get_implicit_directed_adjacency_matrix(multiview_implicit_stationary_distribution, multiview_implicit_transition_matrix) else: print("Invalid laplacian type.") raise RuntimeError diagonal_matrix = spsp.spdiags(diagonal, [0], number_of_nodes, number_of_nodes) multiview_implicit_laplacian_matrix = diagonal_matrix - multiview_implicit_adjacency_matrix elif fusion_type == "addition": actual_weights = safe_convex_weight_calculation( transition_matrix_list, out_degree_list, weights) multiview_implicit_adjacency_matrix = simple_adjacency_matrix_addition( adjacency_matrix_list, actual_weights) degree = spsp.dia_matrix( (multiview_implicit_adjacency_matrix.sum(axis=0), np.array([0])), shape=multiview_implicit_adjacency_matrix.shape) degree = degree.tocsr() # Calculate sparse graph Laplacian. multiview_implicit_laplacian_matrix = spsp.csr_matrix( -multiview_implicit_adjacency_matrix + degree, dtype=np.float64) elif fusion_type == "entropy": actual_weights = safe_convex_weight_calculation( transition_matrix_list, out_degree_list, weights) stationary_distribution_list = [ get_stationary_distribution_directed( spsp.csr_matrix(adjacency_matrix), 0.15) for adjacency_matrix in adjacency_matrix_list ] multiview_implicit_stationary_distribution = fuse_stationary_distributions( stationary_distribution_list, actual_weights) multiview_implicit_transition_matrix = fuse_transition_matrices( transition_matrix_list, stationary_distribution_list, actual_weights, multiview_implicit_stationary_distribution) degree = spsp.dia_matrix( (multiview_implicit_adjacency_matrix.sum(axis=0), np.array([0])), shape=multiview_implicit_adjacency_matrix.shape) degree = degree.tocsr() # Calculate sparse graph Laplacian. multiview_implicit_laplacian_matrix = spsp.csr_matrix( -multiview_implicit_adjacency_matrix + degree, dtype=np.float64) else: print("Invalid fusion type.") raise RuntimeError multiview_implicit_adjacency_matrix = spsp.csr_matrix( multiview_implicit_adjacency_matrix) multiview_implicit_adjacency_matrix.eliminate_zeros() multiview_implicit_laplacian_matrix = spsp.csr_matrix( multiview_implicit_laplacian_matrix) multiview_implicit_laplacian_matrix.eliminate_zeros() return multiview_implicit_adjacency_matrix, multiview_implicit_laplacian_matrix