Пример #1
0
def snf2(args, aff, dicts_common, dicts_unique, original_order):
    """
    Performs Similarity Network Fusion on `aff` matrices

    Parameters
    ----------
    aff : (N, N) pandas dataframe
        Input similarity arrays; all arrays should be square but no need to be equal size.

    dicts_common: dictionaries, required
        Dictionaries for getting common samples from different views
        Example: dicts_common[(0, 1)] == dicts_common[(1, 0)], meaning the common patients between view 1&2

    dicts_unique: dictionaries, required
        Dictionaries for getting unique samples for different views
        Example: dicts_unique[(0, 1)], meaning the unique samples from view 1 that are not in view 2
                 dicts_unique[(1, 0)], meaning the unique samples from view 2 that are not in view 1

    original_order: lists, required
        The original order of each view

    K : (0, N) int, optional
        Hyperparameter normalization factor for scaling. Default: 20

    t : int, optional
        Number of iterations to perform information swapping. Default: 20

    alpha : (0, 1) float, optional
        Hyperparameter normalization factor for scaling. Default: 1.0

    Returns
    -------
    W: (N, N) Ouputs similarity arrays
        Fused similarity networks of input arrays
    """

    print("Start applying diffusion! with new method")

    start_time = time.time()

    newW = [0] * len(aff)

    # First, normalize different networks to avoid scale problems, it is compatible with pandas dataframe
    for n, mat in enumerate(aff):
        # normalize affinity matrix based on strength of edges
        # mat = mat / np.nansum(mat, axis=1, keepdims=True)
        mat = _stable_normalized_pd(mat)
        aff[n] = check_symmetric(mat, raise_warning=False)

        # apply KNN threshold to normalized affinity matrix
        # We need to crop the intersecting samples from newW matrices
        neighbor_size = min(int(args.neighbor_size), aff[n].shape[0])
        newW[n] = _find_dominate_set(aff[n], neighbor_size)

    for iteration in range(args.fusing_iteration):
        for n, mat in enumerate(aff):
            # temporarily convert nans to 0 to avoid propagation errors
            nzW = newW[n]  # TODO: not sure this is a deep copy or not

            # Your goal is to update aff[n], but it is the average of all the defused matrices.
            # Make a copy of add[n], and set it to 0
            aff0_copy = aff[n].copy()
            for col in aff0_copy.columns:
                aff0_copy[col].values[:] = 0

            for j, mat_tofuse in enumerate(aff):
                if n == j:
                    continue

                # reorder mat_tofuse to have the common samples
                mat_tofuse = mat_tofuse.reindex(
                    (sorted(dicts_common[(j, n)]) +
                     sorted(dicts_unique[(j, n)])),
                    axis=1,
                )
                mat_tofuse = mat_tofuse.reindex(
                    (sorted(dicts_common[(j, n)]) +
                     sorted(dicts_unique[(j, n)])),
                    axis=0,
                )

                # Next, let's crop mat_tofuse
                num_common = len(dicts_common[(n, j)])
                to_drop_mat = mat_tofuse.columns[num_common:mat_tofuse.
                                                 shape[1]].values.tolist()
                mat_tofuse_crop = mat_tofuse.drop(to_drop_mat, axis=1)
                mat_tofuse_crop = mat_tofuse_crop.drop(to_drop_mat, axis=0)

                # Next, add the similarity from the view to fused to the current view identity matrix
                nzW_identity = pd.DataFrame(
                    data=np.identity(nzW.shape[0]),
                    index=original_order[n],
                    columns=original_order[n],
                )
                mat_tofuse_union = nzW_identity + mat_tofuse_crop
                mat_tofuse_union.fillna(0.0, inplace=True)
                mat_tofuse_union = _stable_normalized_pd(mat_tofuse_union)
                mat_tofuse_union = mat_tofuse_union.reindex(original_order[n],
                                                            axis=1)
                mat_tofuse_union = mat_tofuse_union.reindex(original_order[n],
                                                            axis=0)

                # Now we are ready to do the diffusion
                nzW_T = np.transpose(nzW)
                aff0_temp = nzW.dot(mat_tofuse_union.dot(
                    nzW_T))  # Matmul is not working, but .dot() is good

                aff0_temp = _B0_normalized(aff0_temp,
                                           alpha=args.normalization_factor)

                aff0_copy = np.add(aff0_temp, aff0_copy)

            aff[n] = np.divide(aff0_copy, len(aff) - 1)

    for n, mat in enumerate(aff):
        mat = _stable_normalized_pd(mat)
        aff[n] = check_symmetric(mat, raise_warning=False)

    end_time = time.time()
    print("Diffusion ends! Times: {}s".format(end_time - start_time))
    return aff
Пример #2
0
def test_B0_normalized(affinity, alpha):
    out = compute._B0_normalized(affinity[0], alpha=alpha)
    # amounts to adding alpha to the diagonal (and symmetrizing)
    assert np.allclose(np.diag(out), np.diag(affinity[0]) + alpha)
    # resulting array IS symmetrical
    assert np.allclose(out, out.T)
Пример #3
0
def snf2_np(*aff, numofCom, K=20, t=20, alpha=1.0):
    """
    Performs Similarity Network Fusion on `aff` matrices

    Parameters
    ----------
    *aff : (N, N) array_like
        Input similarity arrays; all arrays should be square but no need to be equal size.
        Note: these arrays must have all the common samples appeared first in the matrix

    numofCom: int, required
        Number of common samples across all the matrices

    K : (0, N) int, optional
        Hyperparameter normalization factor for scaling. Default: 20

    t : int, optional
        Number of iterations to perform information swapping. Default: 20

    alpha : (0, 1) float, optional
        Hyperparameter normalization factor for scaling. Default: 1.0

    Returns
    -------
    W: (N, N) Ouputs similarity arrays
        Fused similarity networks of input arrays
    """
    print("Start applying diffusion!")
    aff = _check_SNF2_inputs(aff)
    newW = [0] * len(aff)
    aff_com = [0] * len(aff)

    # First, normalize different networks to avoid scale problems
    for n, mat in enumerate(aff):
        # normalize affinity matrix based on strength of edges
        # mat = mat / np.nansum(mat, axis=1, keepdims=True)
        mat = _stable_normalized(mat)
        aff[n] = check_symmetric(mat, raise_warning=False)
        aff_com[n] = aff[n][0:numofCom, :][:, 0:numofCom]
        # apply KNN threshold to normalized affinity matrix
        # We need to crop the intersecting samples from newW matrices
        newW[n] = _find_dominate_set(aff[n], int(K))
        newW[n] = newW[n][:, 0:numofCom]

    # take sum of all normalized (not thresholded) affinity matrices of the intersections part
    Wsum = np.nansum(aff_com, axis=0)

    # get number of modalities informing each subject x subject affinity
    n_aff = len(aff_com) - np.sum([np.isnan(a) for a in aff_com], axis=0)

    for iteration in range(t):
        for n, mat in enumerate(aff):
            # temporarily convert nans to 0 to avoid propagation errors
            nzW = np.nan_to_num(newW[n])
            mat = mat[0:numofCom, :][:, 0:numofCom]
            aw = np.nan_to_num(mat)
            # propagate `Wsum` through masked affinity matrix (`nzW`)
            aff0 = np.matmul(np.matmul(nzW, (Wsum - aw) / (n_aff - 1)),
                             nzW.T)  # TODO: / by 0
            # ensure diagonal retains highest similarity
            aff[n] = _B0_normalized(aff0, alpha=alpha)
            aff_com[n] = aff[n][0:numofCom, :][:, 0:numofCom]

        # compute updated sum of normalized affinity matrices
        Wsum = np.nansum(aff_com, axis=0)

    for n, mat in enumerate(aff):
        mat = _stable_normalized(mat)
        aff[n] = check_symmetric(mat, raise_warning=False)

    return aff