def __init__(self, est_path, prune, norm, out_fmt="gpickle", remove_self_loops=True): import graspologic.utils as gu self.est_path = est_path self.prune = prune self.norm = norm self.out_fmt = out_fmt self.in_mat = None # Load and threshold matrix self.in_mat_raw = utils.load_mat(self.est_path) # De-diagnal and remove nan's and inf's, ensure edge weights are # positive self.in_mat = np.array( np.array(thresholding.autofix(np.array(np.abs(self.in_mat_raw))))) # Remove self-loops and ensure symmetry if remove_self_loops is True: self.in_mat = gu.remove_loops(gu.symmetrize(self.in_mat)) else: self.in_mat = gu.symmetrize(self.in_mat) self.in_mat[np.where(np.isnan(self.in_mat) | np.isinf(self.in_mat))] = 0 # Create nx graph self.G = nx.from_numpy_array(self.in_mat)
def sbm_corr_weighted(n, mu1, mu2, Sigma, directed=False, loops=False): """ Parameters ---------- n: list of int, shape (n_communities) Number of vertices in each community. Communities are assigned n[0], n[1], ... mu1: array-like, shape (n_communities, n_communities) Mean of the edge weight between each of the communities in the first graph, where mu1[i, j] indicates the mean of the edge weights of edges in communities [i, j]. mu2: array-like, shape (n_communities, n_communities) same as mu1, but for the second graph Sigma: list or ndarray (2, 2) The covariance matrix encoding the variances of the edge weights of G1, G2 and the covariance beteween them. Right now we are forcing the entire graph to have the same variance and covariance """ n = np.array(n) G1 = np.zeros((np.sum(n), np.sum(n))) G2 = np.zeros((np.sum(n), np.sum(n))) block_indices = np.insert(np.cumsum(np.array(n)), 0, 0) for i in range(n.size): # for each row for j in range(n.size): # for each column g1, g2 = sample_edges_corr_weighted((n[i], n[j]), mu1[i][j], mu2[i][j], Sigma) G1[block_indices[i]:block_indices[i + 1], block_indices[j]:block_indices[j + 1], ] = g1 G2[block_indices[i]:block_indices[i + 1], block_indices[j]:block_indices[j + 1], ] = g2 if not directed: G1 = symmetrize(G1, method="triu") G2 = symmetrize(G2, method="triu") if not loops: G1 = G1 - np.diag(np.diag(G1)) G2 = G2 - np.diag(np.diag(G2)) return G1, G2
def er_corr_weighted(n, mu1, mu2, Sigma, directed=False, loops=False): """ Generate a pair of correlated graphs with the bivariate normal distribution. Both G1 and G2 are non-binary matrices. Every pair of edges is distributed as a bivariate normal, with mean = [mu1, mu2] and covariance matrix Sigma The correlation between G1 and G2 is Sigma12 / sqrt(Sigma11 * Sigma22) Parameters ---------- n: int Number of vertices mu1: float The mean of the edge weights of G1 (analogous the marginal probability p in correlated Bernoulli graph) mu2: float The mean of the edge weights of G2 (analogous the marginal probability q in correlated Bernoulli graph) Sigma: list or ndarray (2, 2) The covariance matrix encoding the variances of the edge weights of G1, G2 and the covariance beteween them Returns ------- G1: ndarray (n_vertices, n_vertices) Adjacency matrix representing a random graph. G2: ndarray (n_vertices, n_vertices) Adjacency matrix representing a random graph. """ G1, G2 = sample_edges_corr_weighted((n, n), mu1, mu2, Sigma) if not directed: G1 = symmetrize(G1, method="triu") G2 = symmetrize(G2, method="triu") if not loops: G1 = G1 - np.diag(np.diag(G1)) G2 = G2 - np.diag(np.diag(G2)) return G1, G2
def _gen_mat_data(n: int = 20, m: int = 20, p: int = 0.50, mat_type: str = 'sb', binary: bool = False, asfile: bool = True, n_graphs: int = 1): if binary is True: wt = 1 else: wt = np.random.uniform mat_list = [] mat_file_list = [] for nm in range(n_graphs): if mat_type == 'er': mat = largest_connected_component( symmetrize( remove_loops( er_nm(n, m, wt=np.random.uniform, wtargs=dict(low=0, high=1))))) elif mat_type == 'sb': if p is None: raise ValueError( f"for mat_type {mat_type}, p cannot be None") mat = largest_connected_component( symmetrize( remove_loops( sbm(np.array([n]), np.array([[p]]), wt=wt, wtargs=dict(low=0, high=1))))) else: raise ValueError(f"mat_type {mat_type} not recognized!") mat_list.append(mat) if asfile is True: mat_path_tmp = tempfile.NamedTemporaryFile(mode='w+', suffix='.npy', delete=False) mat_path = str(mat_path_tmp.name) np.save(mat_path, mat) mat_file_list.append(mat_path) mat_path_tmp.close() return {'mat_list': mat_list, 'mat_file_list': mat_file_list}
def test_eigsh(self): np.random.seed(123) X = np.vstack([ np.repeat([[0.2, 0.2, 0.2]], 50, axis=0), np.repeat([[0.5, 0.5, 0.5]], 50, axis=0), ]) P = X @ X.T A = np.random.binomial(1, P).astype(np.float) A = symmetrize(A, method="triu") n_components = 3 # Full SVD U_full, D_full, V_full = select_svd(A, n_components=n_components, algorithm="full") X_full = U_full @ np.diag(np.sqrt(D_full)) _, _, norm_full = procrustes(X, X_full) # eigsh SVD U_square, D_square, V_square = select_svd(A, n_components=n_components, algorithm="eigsh", n_iter=10) X_square = U_square @ np.diag(np.sqrt(D_square)) _, _, norm_square = procrustes(X, X_square) rtol = 1e-4 atol = 1e-4 np.testing.assert_allclose(norm_full, norm_square, rtol, atol)
def _gen_mat_data(n: int=20, m: int=20, p: int=0.50, mat_type: str='sb', binary: bool=False, asfile: bool=True, n_graphs: int=1, lcc: bool=False, modality: str='func'): if binary is True: wt = 1 else: wt = np.random.uniform mat_list = [] mat_file_list = [] if n_graphs > 0: for nm in range(n_graphs): if mat_type == 'er': mat = symmetrize( remove_loops(er_nm(n, m, wt=np.random.uniform, wtargs=dict(low=0, high=1)))) elif mat_type == 'sb': if p is None: raise ValueError( f"for mat_type {mat_type}, p cannot be None") mat = symmetrize( remove_loops(sbm(np.array([n]), np.array([[p]]), wt=wt, wtargs=dict(low=0, high=1)))) else: raise ValueError(f"mat_type {mat_type} not recognized!") if lcc is True: mat = largest_connected_component(mat) mat_list.append(autofix(mat)) if asfile is True: path_tmp = tempfile.NamedTemporaryFile(mode='w+', suffix='.npy', delete=False) mat_path_tmp = str(path_tmp.name) out_folder = f"{str(Path.home())}/test_mats" os.makedirs(out_folder, exist_ok=True) if modality == 'func': mat_path = f"{out_folder}/graph_sub-999_modality-func_" \ f"model-corr_template-" \ f"MNI152_2mm_" \ f"parc_tol-6fwhm_hpass-" \ f"0Hz_" \ f"signal-mean_thrtype-prop_thr-" \ f"{round(random.uniform(0, 1),2)}.npy" elif modality == 'dwi': mat_path = f"{out_folder}/graph_sub-999_modality-func_" \ f"model-csa_template-" \ f"MNI152_2mm_tracktype-local_" \ f"traversal-det_minlength-30_" \ f"tol-5_thrtype-prop_thr-" \ f"{round(random.uniform(0, 1),2)}.npy" shutil.copyfile(mat_path_tmp, mat_path) np.save(mat_path, mat) mat_file_list.append(mat_path) path_tmp.close() return {'mat_list': mat_list, 'mat_file_list': mat_file_list}
alpha = 0.7 for source_node in anytree.PreOrderIter(root): for target_node in anytree.PreOrderIter(root): if source_node.is_leaf and target_node.is_leaf: nca = nearest_common_ancestor(source_node, target_node).name base_prob = probs[nca] new_prob = np.random.uniform(base_prob - alpha * base_prob, base_prob + alpha * base_prob) i = source_node.name j = target_node.name sbm_probs.loc[i, j] = new_prob from graspologic.utils import symmetrize sbm_probs = sbm_probs.values sbm_probs = symmetrize(sbm_probs) fig, ax = plt.subplots(1, 1, figsize=(6, 6)) adjplot(sbm_probs, ax=ax) # %% flat_labels = [] node_data = mt.node_data for node, row in node_data.iterrows(): path = row.values[:4] path = path[~np.isnan(path)] label = path[-1] flat_labels.append(label) flat_labels = np.array(flat_labels) #%%
U, D, Vt = selectSVD(X, n_components=n_components) return U, Vt.T #%% from pathlib import Path import networkx as nx from graspologic.utils import pass_to_ranks, get_lcc, symmetrize data_dir = Path("sparse_new_basis/data/maggot") g = nx.read_weighted_edgelist( data_dir / "G.edgelist", create_using=nx.DiGraph, nodetype=int ) meta = pd.read_csv(data_dir / "meta_data.csv", index_col=0) adj = nx.to_numpy_array(g, nodelist=meta.index) adj = symmetrize(adj) adj, inds = get_lcc(adj, return_inds=True) meta = meta.iloc[inds] hemisphere = "left" if hemisphere == "left": meta["inds"] = np.arange(len(meta)) meta = meta[meta["left"]] inds = meta["inds"] adj = adj[np.ix_(inds, inds)] # TODO just try with one hemisphere preprocessing = "ptr" if preprocessing == "ptr": adj_to_embed = pass_to_ranks(adj) elif preprocessing == "sqrt":
def motif_matching( paths, ID, atlas, namer_dir, name_list, metadata_list, multigraph_list_all, graph_path_list_all, rsn=None, ): import networkx as nx import numpy as np import glob import pickle from pynets.core import thresholding from pynets.stats.netmotifs import compare_motifs from sklearn.metrics.pairwise import cosine_similarity from pynets.stats.netstats import community_resolution_selection from graspologic.utils import remove_loops, symmetrize from pynets.core.nodemaker import get_brainnetome_node_attributes [struct_graph_path, func_graph_path] = paths struct_mat = np.load(struct_graph_path) func_mat = np.load(func_graph_path) [struct_coords, struct_labels, struct_label_intensities] = \ get_brainnetome_node_attributes(glob.glob( f"{str(Path(struct_graph_path).parent.parent)}/nodes/*.json"), struct_mat.shape[0]) [func_coords, func_labels, func_label_intensities] = \ get_brainnetome_node_attributes(glob.glob( f"{str(Path(func_graph_path).parent.parent)}/nodes/*.json"), func_mat.shape[0]) # Find intersecting nodes across modalities (i.e. assuming the same # parcellation, but accomodating for the possibility of dropped nodes) diff1 = list(set(struct_label_intensities) - set(func_label_intensities)) diff2 = list(set(func_label_intensities) - set(struct_label_intensities)) G_struct = nx.from_numpy_array(struct_mat) G_func = nx.from_numpy_array(func_mat) bad_idxs = [] for val in diff1: bad_idxs.append(struct_label_intensities.index(val)) bad_idxs = sorted(list(set(bad_idxs)), reverse=True) if type(struct_coords) is np.ndarray: struct_coords = list(tuple(x) for x in struct_coords) for j in bad_idxs: G_struct.remove_node(j) print(f"Removing: {(struct_labels[j], struct_coords[j])}...") del struct_labels[j], struct_coords[j] bad_idxs = [] for val in diff2: bad_idxs.append(func_label_intensities.index(val)) bad_idxs = sorted(list(set(bad_idxs)), reverse=True) if type(func_coords) is np.ndarray: func_coords = list(tuple(x) for x in func_coords) for j in bad_idxs: G_func.remove_node(j) print(f"Removing: {(func_labels[j], func_coords[j])}...") del func_labels[j], func_coords[j] struct_mat = nx.to_numpy_array(G_struct) func_mat = nx.to_numpy_array(G_func) struct_mat = thresholding.autofix(symmetrize(remove_loops(struct_mat))) func_mat = thresholding.autofix(symmetrize(remove_loops(func_mat))) if func_mat.shape == struct_mat.shape: func_mat[~struct_mat.astype("bool")] = 0 struct_mat[~func_mat.astype("bool")] = 0 print( "Edge disagreements after matching: ", sum(sum(abs(func_mat - struct_mat))), ) metadata = {} assert (len(struct_coords) == len(struct_labels) == len(func_coords) == len(func_labels) == func_mat.shape[0]) metadata["coords"] = struct_coords metadata["labels"] = struct_labels metadata_list.append(metadata) struct_mat = np.maximum(struct_mat, struct_mat.T) func_mat = np.maximum(func_mat, func_mat.T) struct_mat = thresholding.standardize(struct_mat) func_mat = thresholding.standardize(func_mat) struct_node_comm_aff_mat = community_resolution_selection( nx.from_numpy_matrix(np.abs(struct_mat)))[1] func_node_comm_aff_mat = community_resolution_selection( nx.from_numpy_matrix(np.abs(func_mat)))[1] struct_comms = [] for i in np.unique(struct_node_comm_aff_mat): struct_comms.append(struct_node_comm_aff_mat == i) func_comms = [] for i in np.unique(func_node_comm_aff_mat): func_comms.append(func_node_comm_aff_mat == i) sims = cosine_similarity(struct_comms, func_comms) try: struct_comm = struct_comms[np.argmax(sims, axis=0)[0]] except BaseException: print('Matching by structural communities failed...') struct_comm = struct_mat try: func_comm = func_comms[np.argmax(sims, axis=0)[0]] except BaseException: print('Matching by functional communities failed...') func_comm = func_mat comm_mask = np.equal.outer(struct_comm, func_comm).astype(bool) try: assert comm_mask.shape == struct_mat.shape == func_mat.shape except AssertionError as e: e.args += (comm_mask, comm_mask.shape, struct_mat, struct_mat.shape, func_mat, func_mat.shape) try: struct_mat[~comm_mask] = 0 except BaseException: print('Skipping community masking...') try: func_mat[~comm_mask] = 0 except BaseException: print('Skipping community masking...') struct_name = struct_graph_path.split("/rawgraph_")[-1].split( ".npy")[0] func_name = func_graph_path.split("/rawgraph_")[-1].split(".npy")[0] name = f"sub-{ID}_{atlas}_mplx_Layer-1_{struct_name}_" \ f"Layer-2_{func_name}" name_list.append(name) struct_mat = np.maximum(struct_mat, struct_mat.T) func_mat = np.maximum(func_mat, func_mat.T) try: [mldict, g_dict] = compare_motifs(struct_mat, func_mat, name, namer_dir) except BaseException: print(f"Adaptive thresholding by motif comparisons failed " f"for {name}. This usually happens when no motifs are found") return [], [], [], [] multigraph_list_all.append(list(mldict.values())[0]) graph_path_list = [] for thr in list(g_dict.keys()): multigraph_path_list_dict = {} [struct, func] = g_dict[thr] struct_out = f"{namer_dir}/struct_{atlas}_{struct_name}.npy" func_out = f"{namer_dir}/struct_{atlas}_{func_name}_" \ f"motif-{thr}.npy" np.save(struct_out, struct) np.save(func_out, func) multigraph_path_list_dict[f"struct_{atlas}_{thr}"] = struct_out multigraph_path_list_dict[f"func_{atlas}_{thr}"] = func_out graph_path_list.append(multigraph_path_list_dict) graph_path_list_all.append(graph_path_list) else: print( f"Skipping {rsn} rsn, since structural and functional graphs are " f"not identical shapes.") return name_list, metadata_list, multigraph_list_all, graph_path_list_all
colors = list(map(get_rgb, cc.glasbey_light)) color_objs = [sRGBColor(*rgb) for rgb in colors] color_objs = [convert_color(x, LabColor) for x in color_objs] color_pairs = cartesian_product(color_objs, color_objs) color_dist_mat = np.empty((len(colors), len(colors))) for i, color1 in enumerate(color_objs): for j, color2 in enumerate(color_objs): dist = delta_e_cie2000(color1, color2) color_dist_mat[i, j] = dist print(is_almost_symmetric(color_dist_mat)) color_dist_mat = symmetrize(color_dist_mat) #%% Z = linkage(squareform(color_dist_mat), method="average") sns.clustermap( color_dist_mat, row_colors=colors, col_colors=colors, row_linkage=Z, col_linkage=Z, xticklabels=False, yticklabels=False, ) stashfig("clustermap") # %% cmds = ClassicalMDS(n_components=2, dissimilarity="precomputed")
def matching( paths, atlas, namer_dir, ): import glob import networkx as nx import numpy as np from pynets.core import thresholding from pynets.statistics.utils import parse_closest_ixs from graspologic.utils import remove_loops, symmetrize, \ multigraph_lcc_intersection [dwi_graph_path, func_graph_path] = paths dwi_mat = np.load(dwi_graph_path) func_mat = np.load(func_graph_path) dwi_mat = thresholding.autofix(symmetrize(remove_loops(dwi_mat))) func_mat = thresholding.autofix(symmetrize(remove_loops(func_mat))) dwi_mat = thresholding.standardize(dwi_mat) func_mat = thresholding.standardize(func_mat) node_dict_dwi = parse_closest_ixs( glob.glob(f"{str(Path(dwi_graph_path).parent.parent)}" f"/nodes/*.json"), dwi_mat.shape[0])[1] node_dict_func = parse_closest_ixs( glob.glob(f"{str(Path(func_graph_path).parent.parent)}" f"/nodes/*.json"), func_mat.shape[0])[1] G_dwi = nx.from_numpy_array(dwi_mat) nx.set_edge_attributes(G_dwi, 'structural', nx.get_edge_attributes(G_dwi, 'weight').values()) nx.set_node_attributes(G_dwi, dict(node_dict_dwi), name='dwi') #G_dwi.nodes(data=True) G_func = nx.from_numpy_array(func_mat) nx.set_edge_attributes(G_func, 'functional', nx.get_edge_attributes(G_func, 'weight').values()) nx.set_node_attributes(G_func, dict(node_dict_func), name='func') #G_func.nodes(data=True) R = G_dwi.copy() R.remove_nodes_from(n for n in G_dwi if n not in G_func) R.remove_edges_from(e for e in G_dwi.edges if e not in G_func.edges) G_dwi = R.copy() R = G_func.copy() R.remove_nodes_from(n for n in G_func if n not in G_dwi) R.remove_edges_from(e for e in G_func.edges if e not in G_dwi.edges) G_func = R.copy() [G_dwi, G_func] = multigraph_lcc_intersection([G_dwi, G_func]) def writeJSON(metadata_str, outputdir): import json import uuid modality = metadata_str.split('modality-')[1].split('_')[0] metadata_list = [ i for i in metadata_str.split('modality-')[1].split('_') if '-' in i ] hash = str(uuid.uuid4()) filename = f"{outputdir}/sidecar_modality-{modality}_{hash}.json" metadata_dict = {} for meta in metadata_list: k, v = meta.split('-') metadata_dict[k] = v with open(filename, 'w+') as jsonfile: json.dump(metadata_dict, jsonfile, indent=4) jsonfile.close() return hash dwi_name = dwi_graph_path.split("/")[-1].split(".npy")[0] func_name = func_graph_path.split("/")[-1].split(".npy")[0] dwi_hash = writeJSON(dwi_name, namer_dir) func_hash = writeJSON(func_name, namer_dir) name = f"{atlas}_mplx_layer1-dwi_ensemble-{dwi_hash}_" \ f"layer2-func_ensemble-{func_hash}" dwi_opt, func_opt, best_mi = optimize_mutual_info( nx.to_numpy_array(G_dwi), nx.to_numpy_array(G_func), bins=50) func_mat_final = list(func_opt.values())[0] dwi_mat_final = list(dwi_opt.values())[0] G_dwi_final = nx.from_numpy_array(dwi_mat_final) G_func_final = nx.from_numpy_array(func_mat_final) G_multi = nx.OrderedMultiGraph(nx.compose(G_dwi_final, G_func_final)) out_name = f"{name}_matchthr-{list(dwi_opt.keys())[0]}_" \ f"{list(func_opt.keys())[0]}" mG = build_mx_multigraph(nx.to_numpy_array(G_func_final), nx.to_numpy_array(G_dwi_final), out_name, namer_dir) mG_nx = f"{namer_dir}/{out_name}.gpickle" nx.write_gpickle(G_multi, mG_nx) dwi_file_out = f"{namer_dir}/{dwi_name}.npy" func_file_out = f"{namer_dir}/{func_name}.npy" np.save(dwi_file_out, dwi_mat_final) np.save(func_file_out, func_mat_final) return mG_nx, mG, dwi_file_out, func_file_out