def snf2(args, aff, dicts_common, dicts_unique, original_order): """ Performs Similarity Network Fusion on `aff` matrices Parameters ---------- aff : (N, N) pandas dataframe Input similarity arrays; all arrays should be square but no need to be equal size. dicts_common: dictionaries, required Dictionaries for getting common samples from different views Example: dicts_common[(0, 1)] == dicts_common[(1, 0)], meaning the common patients between view 1&2 dicts_unique: dictionaries, required Dictionaries for getting unique samples for different views Example: dicts_unique[(0, 1)], meaning the unique samples from view 1 that are not in view 2 dicts_unique[(1, 0)], meaning the unique samples from view 2 that are not in view 1 original_order: lists, required The original order of each view K : (0, N) int, optional Hyperparameter normalization factor for scaling. Default: 20 t : int, optional Number of iterations to perform information swapping. Default: 20 alpha : (0, 1) float, optional Hyperparameter normalization factor for scaling. Default: 1.0 Returns ------- W: (N, N) Ouputs similarity arrays Fused similarity networks of input arrays """ print("Start applying diffusion! with new method") start_time = time.time() newW = [0] * len(aff) # First, normalize different networks to avoid scale problems, it is compatible with pandas dataframe for n, mat in enumerate(aff): # normalize affinity matrix based on strength of edges # mat = mat / np.nansum(mat, axis=1, keepdims=True) mat = _stable_normalized_pd(mat) aff[n] = check_symmetric(mat, raise_warning=False) # apply KNN threshold to normalized affinity matrix # We need to crop the intersecting samples from newW matrices neighbor_size = min(int(args.neighbor_size), aff[n].shape[0]) newW[n] = _find_dominate_set(aff[n], neighbor_size) for iteration in range(args.fusing_iteration): for n, mat in enumerate(aff): # temporarily convert nans to 0 to avoid propagation errors nzW = newW[n] # TODO: not sure this is a deep copy or not # Your goal is to update aff[n], but it is the average of all the defused matrices. # Make a copy of add[n], and set it to 0 aff0_copy = aff[n].copy() for col in aff0_copy.columns: aff0_copy[col].values[:] = 0 for j, mat_tofuse in enumerate(aff): if n == j: continue # reorder mat_tofuse to have the common samples mat_tofuse = mat_tofuse.reindex( (sorted(dicts_common[(j, n)]) + sorted(dicts_unique[(j, n)])), axis=1, ) mat_tofuse = mat_tofuse.reindex( (sorted(dicts_common[(j, n)]) + sorted(dicts_unique[(j, n)])), axis=0, ) # Next, let's crop mat_tofuse num_common = len(dicts_common[(n, j)]) to_drop_mat = mat_tofuse.columns[num_common:mat_tofuse. shape[1]].values.tolist() mat_tofuse_crop = mat_tofuse.drop(to_drop_mat, axis=1) mat_tofuse_crop = mat_tofuse_crop.drop(to_drop_mat, axis=0) # Next, add the similarity from the view to fused to the current view identity matrix nzW_identity = pd.DataFrame( data=np.identity(nzW.shape[0]), index=original_order[n], columns=original_order[n], ) mat_tofuse_union = nzW_identity + mat_tofuse_crop mat_tofuse_union.fillna(0.0, inplace=True) mat_tofuse_union = _stable_normalized_pd(mat_tofuse_union) mat_tofuse_union = mat_tofuse_union.reindex(original_order[n], axis=1) mat_tofuse_union = mat_tofuse_union.reindex(original_order[n], axis=0) # Now we are ready to do the diffusion nzW_T = np.transpose(nzW) aff0_temp = nzW.dot(mat_tofuse_union.dot( nzW_T)) # Matmul is not working, but .dot() is good aff0_temp = _B0_normalized(aff0_temp, alpha=args.normalization_factor) aff0_copy = np.add(aff0_temp, aff0_copy) aff[n] = np.divide(aff0_copy, len(aff) - 1) for n, mat in enumerate(aff): mat = _stable_normalized_pd(mat) aff[n] = check_symmetric(mat, raise_warning=False) end_time = time.time() print("Diffusion ends! Times: {}s".format(end_time - start_time)) return aff
def test_B0_normalized(affinity, alpha): out = compute._B0_normalized(affinity[0], alpha=alpha) # amounts to adding alpha to the diagonal (and symmetrizing) assert np.allclose(np.diag(out), np.diag(affinity[0]) + alpha) # resulting array IS symmetrical assert np.allclose(out, out.T)
def snf2_np(*aff, numofCom, K=20, t=20, alpha=1.0): """ Performs Similarity Network Fusion on `aff` matrices Parameters ---------- *aff : (N, N) array_like Input similarity arrays; all arrays should be square but no need to be equal size. Note: these arrays must have all the common samples appeared first in the matrix numofCom: int, required Number of common samples across all the matrices K : (0, N) int, optional Hyperparameter normalization factor for scaling. Default: 20 t : int, optional Number of iterations to perform information swapping. Default: 20 alpha : (0, 1) float, optional Hyperparameter normalization factor for scaling. Default: 1.0 Returns ------- W: (N, N) Ouputs similarity arrays Fused similarity networks of input arrays """ print("Start applying diffusion!") aff = _check_SNF2_inputs(aff) newW = [0] * len(aff) aff_com = [0] * len(aff) # First, normalize different networks to avoid scale problems for n, mat in enumerate(aff): # normalize affinity matrix based on strength of edges # mat = mat / np.nansum(mat, axis=1, keepdims=True) mat = _stable_normalized(mat) aff[n] = check_symmetric(mat, raise_warning=False) aff_com[n] = aff[n][0:numofCom, :][:, 0:numofCom] # apply KNN threshold to normalized affinity matrix # We need to crop the intersecting samples from newW matrices newW[n] = _find_dominate_set(aff[n], int(K)) newW[n] = newW[n][:, 0:numofCom] # take sum of all normalized (not thresholded) affinity matrices of the intersections part Wsum = np.nansum(aff_com, axis=0) # get number of modalities informing each subject x subject affinity n_aff = len(aff_com) - np.sum([np.isnan(a) for a in aff_com], axis=0) for iteration in range(t): for n, mat in enumerate(aff): # temporarily convert nans to 0 to avoid propagation errors nzW = np.nan_to_num(newW[n]) mat = mat[0:numofCom, :][:, 0:numofCom] aw = np.nan_to_num(mat) # propagate `Wsum` through masked affinity matrix (`nzW`) aff0 = np.matmul(np.matmul(nzW, (Wsum - aw) / (n_aff - 1)), nzW.T) # TODO: / by 0 # ensure diagonal retains highest similarity aff[n] = _B0_normalized(aff0, alpha=alpha) aff_com[n] = aff[n][0:numofCom, :][:, 0:numofCom] # compute updated sum of normalized affinity matrices Wsum = np.nansum(aff_com, axis=0) for n, mat in enumerate(aff): mat = _stable_normalized(mat) aff[n] = check_symmetric(mat, raise_warning=False) return aff