Пример #1
0
def test_degree_matrix():
    print "\n-- 'degree_matrix' --"
    row = [0, 0, 0, 1, 2, 3]
    col = [1, 2, 3, 4, 4, 4]
    weight = [2, 3, 4, 1, 2, 3]
    W = sps.csr_matrix((weight, (row, col)), shape=(5, 5))
    print "Dense:\n", W.todense()
    D_in = degree_matrix(W, indegree=True)
    D_out = degree_matrix(W, indegree=False)
    print "D_in (col sum):\n", D_in.todense()
    print "D_out (row sum):\n", D_out.todense()

    print "\nTest with big random matrix"
    n = 100000
    d = 10
    row = np.random.randint(n, size=n*d)
    col = np.random.randint(n, size=n*d)
    weight = np.random.randint(1, 10, size=n*d)
    W = sps.csr_matrix((weight, (row, col)), shape=(n, n))

    # -- optionally replace all degrees by 1
    row, col = W.nonzero()
    weight = [1]*len(row)
    W = sps.csr_matrix((weight, (row, col)), shape=(n, n))

    start = time.time()
    D_in = degree_matrix(W, indegree=True)
    end = time.time()-start
    print "Time:", end
Пример #2
0
def test_degree_matrix():
    print "\n-- 'degree_matrix' --"
    row = [0, 0, 0, 1, 2, 3]
    col = [1, 2, 3, 4, 4, 4]
    weight = [2, 3, 4, 1, 2, 3]
    W = sps.csr_matrix((weight, (row, col)), shape=(5, 5))
    print "Dense:\n", W.todense()
    D_in = degree_matrix(W, indegree=True)
    D_out = degree_matrix(W, indegree=False)
    print "D_in (col sum):\n", D_in.todense()
    print "D_out (row sum):\n", D_out.todense()

    print "\nTest with big random matrix"
    n = 100000
    d = 10
    row = np.random.randint(n, size=n * d)
    col = np.random.randint(n, size=n * d)
    weight = np.random.randint(1, 10, size=n * d)
    W = sps.csr_matrix((weight, (row, col)), shape=(n, n))

    # -- optionally replace all degrees by 1
    row, col = W.nonzero()
    weight = [1] * len(row)
    W = sps.csr_matrix((weight, (row, col)), shape=(n, n))

    start = time.time()
    D_in = degree_matrix(W, indegree=True)
    end = time.time() - start
    print "Time:", end
Пример #3
0
def linBP_symmetric(X,
                    W,
                    H,
                    echo=True,
                    compensation=False,
                    numMaxIt=10,
                    convergencePercentage=None,
                    convergenceThreshold=0.9961947,
                    similarity='cosine_ratio',
                    debug=1):
    """Linearized belief propagation given one symmetric, doubly-stochastic compatibility matrix H

    Parameters
    ----------
    X : [n x k] np array
        seed belief matrix. Can be explicit beliefs or centered residuals
    W : [n x n] sparse.csr_matrix
        sparse weighted adjacency matrix (a
    H : [k x k] np array
        Compatibility matrix (does not have to be centered)
    echo:  Boolean (Default = True)
        True to include the echo cancellation term
    compensation : boolean (Default=False)
        True calculates the exact compensation for echo H* (only works if echo=True)
        Only semantically correct if W is unweighted        (TODO: extend with more general formula)
        Only makes sense if H is centered                   (TODO: verify)
    numMaxIt : int (Default = 10)
        number of maximal iterations to perform
    convergencePercentage : float (Default = None)
        percentage of nodes that need to have converged in order to interrupt the iterations.
        If None, then runs until numMaxIt
        Notice that a node with undefined beliefs does not count as converged if it does not change anymore
        (in order to avoid counting nodes without explicit beliefs as converged in first few rounds).
    convergenceThreshold : float (Default = 0.9961947)
        cose similarity (actually, the "cosine_ratio" similarity) between two belief vectors in order to deem them as identicial (thus converged).
        In case both vectors have the same length, then: cos(5 deg) = 0.996194698092. cos(1 deg) = 0.999847695156
    similarity : String (Default = 'cosine_ratio'
        Type of similarity that is used for matrix_convergence_percentage
    debug : int (Default = 1)
        0 : no debugging and just returns F
        1 : tests for correct input, and just returns F
        2 : tests for correct input, and returns (F, actualNumIt, actualNumIt, convergenceRatios)
        3 : tests for correct input, and returns (list of F, actualNumIt, list of convergenceRatios)

    Returns (if debug == 0 or debug == 1)
    -------------------------------------
    F : [n x k] np array
        final belief matrix, each row normalized to form a label distribution

    Returns (if debug == 2)
    -----------------------
    F : [n x k] np array
        final belief matrix, each row normalized to form a label distribution
    actualNumIt : int
        actual number of iterations performed
    actualPercentageConverged : float
        percentage of nodes that converged

    Returns (if debug == 3)
    -----------------------
    List of F : [(actualNumIt+1) x n x k] np array
        list of final belief matrices for each iteration, represented as 3-dimensional numpy array
        Also includes the original beliefs as first entry (0th iteration). Thus has (actualNumIt + 1) entries, not actualNumIt
    actualNumIt : int
        actual number of iterations performed (not counting the first pass = 0th iteration for initializing)
    List of actualPercentageConverged : list of float (with length actualNumIt)
        list of percentages of nodes that converged in each iteration > 0. Thus has actualNumIt entries


    """

    # -- Create variables for convergence checking and debugging
    assert debug in {0, 1, 2, 3}
    if debug >= 1:
        n1, n2 = W.shape
        n3, k1 = X.shape
        k2, k3 = H.shape
        assert (n1 == n2 & n2 == n3)
        assert (k1 == k2 & k2 == k3)
        # -- following part commented out (takes almost as long as 10 iterations)
        assert similarity in ('accuracy', 'cosine', 'cosine_ratio', 'l2')
    if convergencePercentage is not None or debug >= 2:
        F1 = X  # F1 needs to be initialized to track the convergence progress (either for stopping condition, or for debug information)
    if debug >= 3:
        listF = [X]  # store the belief matrices for each iteration
        listConverged = [
        ]  # store the percentage of converged nodes for each iteration

    # -- Initialize values
    F = X  # initialized for iteration
    if echo:
        H2 = H.dot(H)
        D = degree_matrix(W, undirected=True, squared=True)
        if compensation:
            H_star = np.linalg.inv(np.identity(len(H)) - H2).dot(
                H
            )  # TODO: can become singular matrix. Then error for inverting
            H_star2 = H.dot(H_star)

    # -- Actual loop including convergence conditions
    converged = False
    actualNumIt = 0

    while actualNumIt < numMaxIt and not converged:
        actualNumIt += 1

        # -- Calculate new beliefs
        if echo is False:
            F = X + W.dot(F).dot(H)
        else:
            if not compensation:
                F = X + W.dot(F).dot(H) - D.dot(F).dot(
                    H2
                )  # W.dot(F) is short form for: sparse.csr_matrix.dot(W, F)
            else:
                F = X + W.dot(F).dot(H_star) - D.dot(F).dot(H_star2)

        # -- Check convergence (or too big divergence) and store information if debug
        if convergencePercentage is not None or debug >= 2:
            actualPercentageConverged = matrix_convergence_percentage(
                F1, F, threshold=convergenceThreshold, similarity=similarity)
            diff = np.linalg.norm(
                F - F1
            )  # interrupt loop if it is diverging (Time 0.1msec per iteration for n = 5000, d = 10)

            if (convergencePercentage is not None and actualPercentageConverged >= convergencePercentage)\
                    or (diff > 1e10):
                converged = True
            F1 = F  # save for comparing in *next* iteration

        if debug == 3:
            listF.append(F)  # stores (actualNumIt+1) values
            listConverged.append(actualPercentageConverged)

    # -- Various return formats
    if debug <= 1:
        return F
    elif debug == 2:
        return F, actualNumIt, actualPercentageConverged
    else:
        return np.array(listF), actualNumIt, listConverged
Пример #4
0
def linBP_directed(X,
                   W,
                   P,
                   eps=1,
                   echo=True,
                   numMaxIt=10,
                   convergencePercentage=None,
                   convergenceThreshold=0.9961947,
                   debug=1,
                   paperVariant=True):
    """Linearized belief propagation given one directed graph and one (directed) arbitrary potential P.
    Contrast with undirected variant: uses Potential, and thus needs eps as parameter

    Parameters
    ----------
    X : [n x k] np array
        seed belief matrix

    W : [n x n] sparse.csr_matrix
        sparse weighted adjacency matrix for directed graph
    P : [k x k] np array
        aribitrary potential
    eps : float (Default = 1)
        parameter by which to scale the row- or column-recentered potentials
    echo:  Boolean (Default = True)
        whether or not echo cancellation term is used
    numMaxIt : int (Default = 10)
        number of maximal iterations to perform
    convergencePercentage : float (Default = None)
        percentage of nodes that need to have converged in order to interrupt the iterations.
        Notice that a node with undefined beliefs does not count as converged if it does not change anymore
        (in order to avoid counting nodes without explicit beliefs as converged in first few rounds).
        If None, then runs until numMaxIt
    convergenceThreshold : float (Default = 0.9961947)
        cose similarity (actually, the "cosine_ratio" similarity) between two belief vectors in order to deem them as identicial (thus converged).
        In case both vectors have the same length, then: cos(5 deg) = 0.996194698092. cos(1 deg) = 0.999847695156
    debug : int (Default = 1)
        0 : no debugging and just returns F
        1 : tests for correct input, and just returns F
        2 : tests for correct input, and returns (F, actualNumIt, convergenceRatios)
        3 : tests for correct input, and returns (list of F, list of convergenceRatios)
    paperVariant: Boolean (Default = True)
        whether the row-normalization is done according to version proposed in original paper

    Returns (if debug == 0 or debug == 1)
    -------------------------------------
    F : [n x k] np array
        final belief matrix, each row normalized to form a label distribution

    Returns (if debug == 2)
    -----------------------
    F : [n x k] np array
        final belief matrix, each row normalized to form a label distribution
    actualNumIt : int
        actual number of iterations performed
    actualPercentageConverged : float
        percentage of nodes that converged

    Returns (if debug == 3)
    -----------------------
    List of F : [(actualNumIt+1) x n x k] np array
        list of final belief matrices for each iteration, represented as 3-dimensional numpy array
        Also includes the original beliefs as first entry (0th iteration). Thus has (actualNumIt + 1) entries
    actualNumIt : int
        actual number of iterations performed (not counting the first pass = 0th iteration for initializing)
    List of actualPercentageConverged : list of float (with length actualNumIt)
        list of percentages of nodes that converged in each iteration > 0. Thus has actualNumIt entries

    """

    # -- Create variables for convergence checking and debugging
    if debug >= 1:
        n, n2 = W.shape
        n3, k = X.shape
        k2, k3 = P.shape
        assert (n == n2 & n2 == n3)
        assert (k == k2 & k2 == k3)
    if debug >= 2:
        F1 = X.copy()
    if debug >= 3:
        listF = [X]  # store the belief matrices for each iteration
        listConverged = [
        ]  # store the percentage of converged nodes for each iteration

    # -- Initialize values
    Pc1 = row_recentered_residual(P, paperVariant=paperVariant).dot(
        eps)  # scaled by eps
    Pc2T = row_recentered_residual(P.transpose(),
                                   paperVariant=paperVariant).dot(eps)
    WsT = W.transpose()
    Cstar = (WsT.dot(np.ones(
        (n, k), dtype=np.int)).dot(Pc1) + W.dot(np.ones(
            (n, k), dtype=np.int)).dot(Pc2T)).dot(1. / k)
    F = X
    Const = X + Cstar  # Cstar includes

    if echo:
        D_in = degree_matrix(W, indegree=True, undirected=False, squared=True)
        D_out = degree_matrix(W,
                              indegree=False,
                              undirected=False,
                              squared=True)
        Pstar1 = Pc2T * Pc1
        Pstar2 = Pc1 * Pc2T

    # -- Actual loop including convergence conditions
    converged = False
    actualNumIt = 0

    while actualNumIt < numMaxIt and not converged:
        actualNumIt += 1

        # -- Calculate new beliefs
        if echo is False:
            F = Const + WsT.dot(F).dot(Pc1) + W.dot(F).dot(Pc2T)
        else:
            F = Const + WsT.dot(F).dot(Pc1) + W.dot(F).dot(Pc2T) - D_in.dot(
                F).dot(Pstar1) - D_out.dot(F).dot(Pstar2)

        # -- Check convergence and store information if debug
        if convergencePercentage is not None or debug >= 2:
            actualPercentageConverged = matrix_convergence_percentage(
                F1, F,
                threshold=convergenceThreshold)  # TODO: allow similarity
            diff = np.linalg.norm(F - F1)  # interrupt loop if it is diverging
            if (convergencePercentage is not None and actualPercentageConverged >= convergencePercentage)\
                    or (diff > 1e10):
                converged = True
            F1 = F  # save for comparing in *next* iteration

        if debug == 3:
            listF.append(F)  # stores (actualNumIt+1) values
            listConverged.append(actualPercentageConverged)

    # -- Various return formats
    if debug <= 1:
        return F
    elif debug == 2:
        return F, actualNumIt, actualPercentageConverged
    else:
        return np.array(listF), actualNumIt, listConverged
Пример #5
0
def linBP_undirected(X, W, Hc, echo=True, numIt=10, debug=1):
    """Linearized belief propagation for undirected graphs

    Parameters
    ----------
    X : [n x k] np array
        seed belief matrix
    W : [n x n] sparse.csr_matrix
        sparse weighted adjacency matrix
    Hc : [k x k] np array
        centered coupling matrix
    echo:  Boolean (Default=True)
        whether or not echo cancellation term is used
    numIt : int
        number of iterations to perform
    debug : int (Default = 1)
        0 : no debugging and just returns F
        1 : tests for correct input, and just returns F
        2 : tests for correct input, and returns list of F

    Returns (if debug==0 or ==1)
    -------------------------------
    F : [n x k] np array
        final belief matrix, each row normalized to form a label distribution

    Returns (if debug==2 )
    ------------------------
    List of F : [(actualNumIt+1) x n x k] np array
        list of final belief matrices for each iteration, represented as 3-dimensional numpy array
        Also includes the original beliefs as first entry (0th iteration). Thus has (actualNumIt + 1) entries

    Notes
    -----
    Uses: degree_Matrix(W)

    References
    ----------
    .. [1] W. Gatterbauer, S. Guennemann, D. Koutra, and C. Faloutsos, and H. van der Vorst,
        "Linearized and Single-Pass Belief Propagation", PVLDB 8(5): 581-592 (2015).
    """
    # TODO: include convergence condition

    if debug >= 1:
        n1, n2 = W.shape
        n3, k1 = X.shape
        k2, k3 = Hc.shape
        assert(n1 == n2 & n2 == n3)
        assert(k1 == k2 & k2 == k3)
        assert(issparse(W))
    if debug == 2:
        listF = [X]     # store the beliefs for each iteration (including 0th iteration = explicit beliefs)

    if echo is False:
        F = X
        for _ in range(numIt):
            F = X + W.dot(F).dot(Hc)
            if debug == 2:
                listF.append(F)
    else:
        F = X
        H2 = Hc.dot(Hc)
        D = degree_matrix(W)
        for _ in range(numIt):
            F = X + W.dot(F).dot(Hc) - D.dot(F).dot(H2)     # W.dot(F) is short form for: sparse.csr_matrix.dot(W, F)
            if debug == 2:
                listF.append(F)

    if debug <= 1:
        return F
    else:
        return np.array(listF)