def test_degree_matrix(): print "\n-- 'degree_matrix' --" row = [0, 0, 0, 1, 2, 3] col = [1, 2, 3, 4, 4, 4] weight = [2, 3, 4, 1, 2, 3] W = sps.csr_matrix((weight, (row, col)), shape=(5, 5)) print "Dense:\n", W.todense() D_in = degree_matrix(W, indegree=True) D_out = degree_matrix(W, indegree=False) print "D_in (col sum):\n", D_in.todense() print "D_out (row sum):\n", D_out.todense() print "\nTest with big random matrix" n = 100000 d = 10 row = np.random.randint(n, size=n*d) col = np.random.randint(n, size=n*d) weight = np.random.randint(1, 10, size=n*d) W = sps.csr_matrix((weight, (row, col)), shape=(n, n)) # -- optionally replace all degrees by 1 row, col = W.nonzero() weight = [1]*len(row) W = sps.csr_matrix((weight, (row, col)), shape=(n, n)) start = time.time() D_in = degree_matrix(W, indegree=True) end = time.time()-start print "Time:", end
def test_degree_matrix(): print "\n-- 'degree_matrix' --" row = [0, 0, 0, 1, 2, 3] col = [1, 2, 3, 4, 4, 4] weight = [2, 3, 4, 1, 2, 3] W = sps.csr_matrix((weight, (row, col)), shape=(5, 5)) print "Dense:\n", W.todense() D_in = degree_matrix(W, indegree=True) D_out = degree_matrix(W, indegree=False) print "D_in (col sum):\n", D_in.todense() print "D_out (row sum):\n", D_out.todense() print "\nTest with big random matrix" n = 100000 d = 10 row = np.random.randint(n, size=n * d) col = np.random.randint(n, size=n * d) weight = np.random.randint(1, 10, size=n * d) W = sps.csr_matrix((weight, (row, col)), shape=(n, n)) # -- optionally replace all degrees by 1 row, col = W.nonzero() weight = [1] * len(row) W = sps.csr_matrix((weight, (row, col)), shape=(n, n)) start = time.time() D_in = degree_matrix(W, indegree=True) end = time.time() - start print "Time:", end
def linBP_symmetric(X, W, H, echo=True, compensation=False, numMaxIt=10, convergencePercentage=None, convergenceThreshold=0.9961947, similarity='cosine_ratio', debug=1): """Linearized belief propagation given one symmetric, doubly-stochastic compatibility matrix H Parameters ---------- X : [n x k] np array seed belief matrix. Can be explicit beliefs or centered residuals W : [n x n] sparse.csr_matrix sparse weighted adjacency matrix (a H : [k x k] np array Compatibility matrix (does not have to be centered) echo: Boolean (Default = True) True to include the echo cancellation term compensation : boolean (Default=False) True calculates the exact compensation for echo H* (only works if echo=True) Only semantically correct if W is unweighted (TODO: extend with more general formula) Only makes sense if H is centered (TODO: verify) numMaxIt : int (Default = 10) number of maximal iterations to perform convergencePercentage : float (Default = None) percentage of nodes that need to have converged in order to interrupt the iterations. If None, then runs until numMaxIt Notice that a node with undefined beliefs does not count as converged if it does not change anymore (in order to avoid counting nodes without explicit beliefs as converged in first few rounds). convergenceThreshold : float (Default = 0.9961947) cose similarity (actually, the "cosine_ratio" similarity) between two belief vectors in order to deem them as identicial (thus converged). In case both vectors have the same length, then: cos(5 deg) = 0.996194698092. cos(1 deg) = 0.999847695156 similarity : String (Default = 'cosine_ratio' Type of similarity that is used for matrix_convergence_percentage debug : int (Default = 1) 0 : no debugging and just returns F 1 : tests for correct input, and just returns F 2 : tests for correct input, and returns (F, actualNumIt, actualNumIt, convergenceRatios) 3 : tests for correct input, and returns (list of F, actualNumIt, list of convergenceRatios) Returns (if debug == 0 or debug == 1) ------------------------------------- F : [n x k] np array final belief matrix, each row normalized to form a label distribution Returns (if debug == 2) ----------------------- F : [n x k] np array final belief matrix, each row normalized to form a label distribution actualNumIt : int actual number of iterations performed actualPercentageConverged : float percentage of nodes that converged Returns (if debug == 3) ----------------------- List of F : [(actualNumIt+1) x n x k] np array list of final belief matrices for each iteration, represented as 3-dimensional numpy array Also includes the original beliefs as first entry (0th iteration). Thus has (actualNumIt + 1) entries, not actualNumIt actualNumIt : int actual number of iterations performed (not counting the first pass = 0th iteration for initializing) List of actualPercentageConverged : list of float (with length actualNumIt) list of percentages of nodes that converged in each iteration > 0. Thus has actualNumIt entries """ # -- Create variables for convergence checking and debugging assert debug in {0, 1, 2, 3} if debug >= 1: n1, n2 = W.shape n3, k1 = X.shape k2, k3 = H.shape assert (n1 == n2 & n2 == n3) assert (k1 == k2 & k2 == k3) # -- following part commented out (takes almost as long as 10 iterations) assert similarity in ('accuracy', 'cosine', 'cosine_ratio', 'l2') if convergencePercentage is not None or debug >= 2: F1 = X # F1 needs to be initialized to track the convergence progress (either for stopping condition, or for debug information) if debug >= 3: listF = [X] # store the belief matrices for each iteration listConverged = [ ] # store the percentage of converged nodes for each iteration # -- Initialize values F = X # initialized for iteration if echo: H2 = H.dot(H) D = degree_matrix(W, undirected=True, squared=True) if compensation: H_star = np.linalg.inv(np.identity(len(H)) - H2).dot( H ) # TODO: can become singular matrix. Then error for inverting H_star2 = H.dot(H_star) # -- Actual loop including convergence conditions converged = False actualNumIt = 0 while actualNumIt < numMaxIt and not converged: actualNumIt += 1 # -- Calculate new beliefs if echo is False: F = X + W.dot(F).dot(H) else: if not compensation: F = X + W.dot(F).dot(H) - D.dot(F).dot( H2 ) # W.dot(F) is short form for: sparse.csr_matrix.dot(W, F) else: F = X + W.dot(F).dot(H_star) - D.dot(F).dot(H_star2) # -- Check convergence (or too big divergence) and store information if debug if convergencePercentage is not None or debug >= 2: actualPercentageConverged = matrix_convergence_percentage( F1, F, threshold=convergenceThreshold, similarity=similarity) diff = np.linalg.norm( F - F1 ) # interrupt loop if it is diverging (Time 0.1msec per iteration for n = 5000, d = 10) if (convergencePercentage is not None and actualPercentageConverged >= convergencePercentage)\ or (diff > 1e10): converged = True F1 = F # save for comparing in *next* iteration if debug == 3: listF.append(F) # stores (actualNumIt+1) values listConverged.append(actualPercentageConverged) # -- Various return formats if debug <= 1: return F elif debug == 2: return F, actualNumIt, actualPercentageConverged else: return np.array(listF), actualNumIt, listConverged
def linBP_directed(X, W, P, eps=1, echo=True, numMaxIt=10, convergencePercentage=None, convergenceThreshold=0.9961947, debug=1, paperVariant=True): """Linearized belief propagation given one directed graph and one (directed) arbitrary potential P. Contrast with undirected variant: uses Potential, and thus needs eps as parameter Parameters ---------- X : [n x k] np array seed belief matrix W : [n x n] sparse.csr_matrix sparse weighted adjacency matrix for directed graph P : [k x k] np array aribitrary potential eps : float (Default = 1) parameter by which to scale the row- or column-recentered potentials echo: Boolean (Default = True) whether or not echo cancellation term is used numMaxIt : int (Default = 10) number of maximal iterations to perform convergencePercentage : float (Default = None) percentage of nodes that need to have converged in order to interrupt the iterations. Notice that a node with undefined beliefs does not count as converged if it does not change anymore (in order to avoid counting nodes without explicit beliefs as converged in first few rounds). If None, then runs until numMaxIt convergenceThreshold : float (Default = 0.9961947) cose similarity (actually, the "cosine_ratio" similarity) between two belief vectors in order to deem them as identicial (thus converged). In case both vectors have the same length, then: cos(5 deg) = 0.996194698092. cos(1 deg) = 0.999847695156 debug : int (Default = 1) 0 : no debugging and just returns F 1 : tests for correct input, and just returns F 2 : tests for correct input, and returns (F, actualNumIt, convergenceRatios) 3 : tests for correct input, and returns (list of F, list of convergenceRatios) paperVariant: Boolean (Default = True) whether the row-normalization is done according to version proposed in original paper Returns (if debug == 0 or debug == 1) ------------------------------------- F : [n x k] np array final belief matrix, each row normalized to form a label distribution Returns (if debug == 2) ----------------------- F : [n x k] np array final belief matrix, each row normalized to form a label distribution actualNumIt : int actual number of iterations performed actualPercentageConverged : float percentage of nodes that converged Returns (if debug == 3) ----------------------- List of F : [(actualNumIt+1) x n x k] np array list of final belief matrices for each iteration, represented as 3-dimensional numpy array Also includes the original beliefs as first entry (0th iteration). Thus has (actualNumIt + 1) entries actualNumIt : int actual number of iterations performed (not counting the first pass = 0th iteration for initializing) List of actualPercentageConverged : list of float (with length actualNumIt) list of percentages of nodes that converged in each iteration > 0. Thus has actualNumIt entries """ # -- Create variables for convergence checking and debugging if debug >= 1: n, n2 = W.shape n3, k = X.shape k2, k3 = P.shape assert (n == n2 & n2 == n3) assert (k == k2 & k2 == k3) if debug >= 2: F1 = X.copy() if debug >= 3: listF = [X] # store the belief matrices for each iteration listConverged = [ ] # store the percentage of converged nodes for each iteration # -- Initialize values Pc1 = row_recentered_residual(P, paperVariant=paperVariant).dot( eps) # scaled by eps Pc2T = row_recentered_residual(P.transpose(), paperVariant=paperVariant).dot(eps) WsT = W.transpose() Cstar = (WsT.dot(np.ones( (n, k), dtype=np.int)).dot(Pc1) + W.dot(np.ones( (n, k), dtype=np.int)).dot(Pc2T)).dot(1. / k) F = X Const = X + Cstar # Cstar includes if echo: D_in = degree_matrix(W, indegree=True, undirected=False, squared=True) D_out = degree_matrix(W, indegree=False, undirected=False, squared=True) Pstar1 = Pc2T * Pc1 Pstar2 = Pc1 * Pc2T # -- Actual loop including convergence conditions converged = False actualNumIt = 0 while actualNumIt < numMaxIt and not converged: actualNumIt += 1 # -- Calculate new beliefs if echo is False: F = Const + WsT.dot(F).dot(Pc1) + W.dot(F).dot(Pc2T) else: F = Const + WsT.dot(F).dot(Pc1) + W.dot(F).dot(Pc2T) - D_in.dot( F).dot(Pstar1) - D_out.dot(F).dot(Pstar2) # -- Check convergence and store information if debug if convergencePercentage is not None or debug >= 2: actualPercentageConverged = matrix_convergence_percentage( F1, F, threshold=convergenceThreshold) # TODO: allow similarity diff = np.linalg.norm(F - F1) # interrupt loop if it is diverging if (convergencePercentage is not None and actualPercentageConverged >= convergencePercentage)\ or (diff > 1e10): converged = True F1 = F # save for comparing in *next* iteration if debug == 3: listF.append(F) # stores (actualNumIt+1) values listConverged.append(actualPercentageConverged) # -- Various return formats if debug <= 1: return F elif debug == 2: return F, actualNumIt, actualPercentageConverged else: return np.array(listF), actualNumIt, listConverged
def linBP_undirected(X, W, Hc, echo=True, numIt=10, debug=1): """Linearized belief propagation for undirected graphs Parameters ---------- X : [n x k] np array seed belief matrix W : [n x n] sparse.csr_matrix sparse weighted adjacency matrix Hc : [k x k] np array centered coupling matrix echo: Boolean (Default=True) whether or not echo cancellation term is used numIt : int number of iterations to perform debug : int (Default = 1) 0 : no debugging and just returns F 1 : tests for correct input, and just returns F 2 : tests for correct input, and returns list of F Returns (if debug==0 or ==1) ------------------------------- F : [n x k] np array final belief matrix, each row normalized to form a label distribution Returns (if debug==2 ) ------------------------ List of F : [(actualNumIt+1) x n x k] np array list of final belief matrices for each iteration, represented as 3-dimensional numpy array Also includes the original beliefs as first entry (0th iteration). Thus has (actualNumIt + 1) entries Notes ----- Uses: degree_Matrix(W) References ---------- .. [1] W. Gatterbauer, S. Guennemann, D. Koutra, and C. Faloutsos, and H. van der Vorst, "Linearized and Single-Pass Belief Propagation", PVLDB 8(5): 581-592 (2015). """ # TODO: include convergence condition if debug >= 1: n1, n2 = W.shape n3, k1 = X.shape k2, k3 = Hc.shape assert(n1 == n2 & n2 == n3) assert(k1 == k2 & k2 == k3) assert(issparse(W)) if debug == 2: listF = [X] # store the beliefs for each iteration (including 0th iteration = explicit beliefs) if echo is False: F = X for _ in range(numIt): F = X + W.dot(F).dot(Hc) if debug == 2: listF.append(F) else: F = X H2 = Hc.dot(Hc) D = degree_matrix(W) for _ in range(numIt): F = X + W.dot(F).dot(Hc) - D.dot(F).dot(H2) # W.dot(F) is short form for: sparse.csr_matrix.dot(W, F) if debug == 2: listF.append(F) if debug <= 1: return F else: return np.array(listF)