def test_matrix_convergence_percentage(): print "\n-- 'matrix_convergence_percentage' --" X0 = np.array([[2, 0, 0], [2, 0, 2], [0, 1, 0], [0, 0, 3], [0, 0, 3], [1, 0, 2], [0, 3, 3], [0, 0, 0], [9, 9, 9], [100, 100, 100],]) X1 = np.array([[1, 1, 2], [2, 1, 2], [3, 4, 0], [1, 1, 2], [2, 1, 1], [1, 2, 2], [1, 2, 3], [0, 0, 0], [8, 9, 9], [100, 100, 101],]) print "X0:\n", X0 print "X1:\n", X1 threshold = 0.5 percentage = matrix_convergence_percentage(X0, X1, threshold) print "percentage converged (original):\n", percentage X0z = row_normalize_matrix(X0, norm='zscores') X1z = row_normalize_matrix(X1, norm='zscores') percentage = matrix_convergence_percentage(X0z, X1z, threshold) print "percentage converged (after zscore):\n", percentage
def test_matrix_convergence_percentage(): print "\n-- 'matrix_convergence_percentage' --" X0 = np.array([ [2, 0, 0], [2, 0, 2], [0, 1, 0], [0, 0, 3], [0, 0, 3], [1, 0, 2], [0, 3, 3], [0, 0, 0], [9, 9, 9], [100, 100, 100], ]) X1 = np.array([ [1, 1, 2], [2, 1, 2], [3, 4, 0], [1, 1, 2], [2, 1, 1], [1, 2, 2], [1, 2, 3], [0, 0, 0], [8, 9, 9], [100, 100, 101], ]) print "X0:\n", X0 print "X1:\n", X1 threshold = 0.5 percentage = matrix_convergence_percentage(X0, X1, threshold) print "percentage converged (original):\n", percentage X0z = row_normalize_matrix(X0, norm='zscores') X1z = row_normalize_matrix(X1, norm='zscores') percentage = matrix_convergence_percentage(X0z, X1z, threshold) print "percentage converged (after zscore):\n", percentage
def beliefPropagation(X, W, P, numMaxIt=10, convergencePercentage=None, convergenceThreshold=0.9961947, debug=1, damping=1, clamping=False): """Standard belief propagation assuming a directed graph with two variants: V1: one directed potential across edge direction: P is one potential, and W contains the weights of edges V2: a set of potentials on different edges: P is a tensor, and W indexes the potentials Dimensions of P (2 or 3) determines variant. Uses message-passing with division: see [Koller,Friedman 2009] Section 10.3.1. Uses damping: see [Koller,Friedman 2009] Section 11.1. Can be run either with given number of maximal iterations or until specified percentage of nodes have converged. Convergence of a node is determined by (variant of) cosine similarity between *centered beliefs* from two iterations. If convergence criterium is reached, the iterations will stop before maximal iterations. Parameter "debug" allows alternative, more detailed outputs, e.g., to get intermediate belief values. Checks that every entry in X and P are > 0. Can model undirected graphs by (1) specifing every edge only for one direction, an d(2) using symmetric potentials. Parameters ---------- X : [n x k] np array prior (explicit) belief matrix. Rows do not have to be row-normalized. Rows can be all 0, which get later replaced by undefined prior belief. W : [n x n] sparse.csr_matrix directed sparse weighted adjacency matrix (thus a directed graph is assumed) Also allows undirected graph by simply specifying only symmetric potentials V1: weight determines the actual edge weight V2: weight determines the index of a potential (from potential tensor P) P : V1: [k x k] any directed potential (no requirement for normalization or identical row or column sums) V2: [num_pot_P x k x k] np array set of potentials (as tensor) numMaxIt : int (Default = 10) number of maximal iterations to perform convergencePercentage : float (Default = None) percentage of nodes that need to have converged in order to interrupt the iterations. Notice that a node with undefined beliefs does not count as converged if it does not change anymore (in order to avoid counting nodes without explicit beliefs as converged in first few rounds). If None, then runs until numMaxIt convergenceThreshold : float (Default = 0.9961947) cose similarity (actually, the "cosine_ratio" similarity) between two belief vectors in order to deem them as identicial (thus converged). In case both vectors have the same length, then: cos(5 deg) = 0.996194698092. cos(1 deg) = 0.999847695156 debug : int (Default = 1) 0 : no debugging and just returns F 1 : tests for correct input, and just returns F 2 : tests for correct input, and returns (F, actualNumIt, convergenceRatios) 3 : tests for correct input, and returns (list of F, list of convergenceRatios) damping : float (Default = 1) fraction of message values that come from new iteration (if 1, then no re-use of prior iteration) clamping : Boolean (Default = False) whether or not the explicit beliefs in X should be clamped to the nodes or not Returns (if debug == 0 or debug == 1) ------------------------------------- F : [n x k] np array final belief matrix, each row normalized to form a label distribution Returns (if debug == 2 ) ------------------------ F : [n x k] np array final belief matrix, each row normalized to form a label distribution actualNumIt : int actual number of iterations performed actualPercentageConverged : float percentage of nodes that converged Returns (if debug == 3 ) ------------------------ List of F : [(actualNumIt+1) x n x k] np array list of final belief matrices for each iteration, represented as 3-dimensional numpy array Also includes the original beliefs as first entry (0th iteration). Thus has (actualNumIt + 1) entries actualNumIt : int actual number of iterations performed (not counting the first pass = 0th iteration for initializing) List of actualPercentageConverged : list of float (with length actualNumIt) list of percentages of nodes that converged in each iteration > 0. Thus has actualNumIt entries """ # --- create variables for convergence checking and debugging n, k = X.shape dim_pot = len(P.shape) # dimensions 2 or 3: determines V1 or V2 Pot = P # for case of dim_pot = 2 if debug >= 1: assert (X >= 0).all(), "All explicit beliefs need to be >=0 " assert (issparse(W)), "W needs to be sparse" n2, n3 = W.shape assert type( P ).__module__ == "numpy", "P needs to be numpy array (and not a matrix)" assert dim_pot in [ 2, 3 ], "Input Potentials need to be 2-dimensional or 3-dimensional" if dim_pot == 2: assert (P >= 0).all(), "All entries in the potentials need to be >=0 " k2, k3 = P.shape else: num_pot_P, k2, k3 = P.shape for P_entry in P: assert ( P_entry >= 0).all(), "All entries in each potential need to be >=0 " assert W.dtype == int, "Entries of weight matrix need to be integers to reference index of the potential" weight = W.data set_pot = set(weight) max_pot_W = max(set_pot) assert max_pot_W <= set_pot, "Indices in W refering to P need to be smaller than the number of potentials" assert (n == n2 & n2 == n3), "X and W need to have compatible dimensions" assert (k == k2 & k2 == k3), "X and P need to have compatible dimensions" if debug >= 3: listF = [] # store the belief matrices for each iteration listConverged = [] # store all L2 norms to previous iteration # --- create edge dictionaries row, col = W.nonzero() nodes = set(np.concatenate((row, col))) dict_edges_out = {} # dictionary: i to all nodes j with edge (i->j) for node in nodes: dict_edges_out[node] = set() dict_edges_in = deepcopy( dict_edges_out) # dictionary: i to all nodes j with edge (i<-j) for (i, j) in zip(row, col): dict_edges_out[i].add(j) dict_edges_in[j].add(i) if dim_pot == 3: dict_edges_pot = { } # Dictionary: for each directed edge (i,j) -> index of the potential in P[index, :, :] for (i, j, d) in zip(row, col, weight): dict_edges_pot[(i, j)] = d # --- X -> X0: replace all-0-rows with all 1s (no need to normalize initial beliefs) implicitVector = 1 - 1 * to_explicit_bool_vector( X) # indicator numpy array with 1s for rows with only 0s implicitVectorT = np.array( [implicitVector]).transpose() # vertical 1 vector for implicit nodes X0 = X + implicitVectorT # X0: prio beliefs: addition of [n x k] matrix with [n x 1] vector is ok F1 = X0 # old F: only for checking convergence (either because convergencePercantage not None or debug >= 2) F2 = X0.astype( float ) # new F: copy is necessary as to not change original X0 matrix when F2 is changed # --- Actual loop: each loop calculates (a) the new messages (with damping) and (b) the new beliefs converged = False actualNumIt = -1 # iterations start with 0th iteration while actualNumIt < numMaxIt and not converged: actualNumIt += 1 # --- (a) calculate messages if actualNumIt == 0: # --- first pass (counts as 0th iteration): create message dictionaries and initialize messages with ones dict_messages_along_1 = { } # dictionary: messages for each edge (i->j) in direction i->j dict_messages_against_1 = { } # dictionary: messages for each edge (i<-j) in direction i->j default = np.ones(k) # first message vector: all 1s for (i, j) in zip(row, col): dict_messages_along_1[(i, j)] = default dict_messages_against_1[(j, i)] = default else: # --- other iterations: calculate "messages_new" using message-passing with division (from F and messages) dict_messages_along_2 = { } # new dictionary: messages for each edge (i->j) in direction i->j dict_messages_against_2 = { } # new dictionary: messages for each edge (i<-j) in direction i->j for (i, j) in dict_messages_along_1.keys( ): # also includes following case: "for (j,i) in dict_messages_against_1.keys()" if dim_pot == 3: # need to reference the correct potential in case dim_pot == 3 Pot = P[dict_edges_pot[(i, j)] - 1, :, :] dict_messages_along_2[( i, j)] = (F2[i] / dict_messages_against_1[(j, i)]).dot( Pot) # entry-wise division dict_messages_against_2[(j, i)] = ( F2[j] / dict_messages_along_1[(i, j)]).dot(Pot.transpose()) # TODO above two lines can contain errors # --- assign new to old message dictionaries, and optionally damp messages if damping == 1: dict_messages_along_1 = dict_messages_along_2.copy( ) # requires shallow copy because of later division dict_messages_against_1 = dict_messages_against_2.copy() else: for (i, j) in dict_messages_along_1.keys(): dict_messages_along_1[(i,j)] = damping*dict_messages_along_2[(i,j)] + \ (1-damping)*dict_messages_along_1[(i,j)] for (i, j) in dict_messages_against_1.keys(): dict_messages_against_1[(i,j)] = damping*dict_messages_against_2[(i,j)] + \ (1-damping)*dict_messages_against_1[(i,j)] # --- (b) create new beliefs by multiplying prior beliefs with all incoming messages (pointing in both directions) for (i, f) in enumerate(F2): if not clamping or implicitVector[ i] == 0: # only update beliefs if those are not explicit and clamped F2[i] = X0[ i] # need to start multiplying from explicit beliefs, referencing the row with separate variable did not work out for j in dict_edges_out[i]: # edges pointing away F2[i] *= dict_messages_against_1[(j, i)] for j in dict_edges_in[i]: # edges pointing inwards F2[i] *= dict_messages_along_1[(j, i)] # TODO line can contain errors # --- normalize beliefs [TODO: perhaps remove later to optimize except in last round] F2 = row_normalize_matrix(F2, norm='l1') # --- check convergence and store information if debug if convergencePercentage is not None or debug >= 2: F1z = to_centering_beliefs(F1) F2z = to_centering_beliefs(F2) actualPercentageConverged = matrix_convergence_percentage( F1z, F2z, threshold=convergenceThreshold) if convergencePercentage is not None \ and actualPercentageConverged >= convergencePercentage\ and actualNumIt > 0: # end the loop early converged = True F1 = F2.copy( ) # save for comparing in *next* iteration, make copy since F entries get changed if debug == 3: listF.append( F2.copy() ) # stores (actualNumIt+1) values (copy is important as F2 is later overwritten) if actualNumIt > 0: listConverged.append( actualPercentageConverged) # stores actualNumIt values # --- Various return formats if debug <= 1: return F2 elif debug == 2: return F2, actualNumIt, actualPercentageConverged else: return np.array(listF), actualNumIt, listConverged
def test_matrix_difference_with_cosine_simililarity(): print "\n-- 'matrix_difference' (cosine), 'row_normalize_matrix' --" print "k=3" v1 = np.array([1, 0, 0]) v2 = np.array([0, 1, 0]) v3 = np.array([1, 1, 0]) print "Cosine with original:\n ", \ matrix_difference(v1, v1, similarity='cosine') print "Cosine with original zscore:\n ", \ matrix_difference(row_normalize_matrix(v1, norm='zscores'), row_normalize_matrix(v1, norm='zscores'), similarity='cosine') print "Cosine with zscore :\n ", \ matrix_difference(v1, row_normalize_matrix(v1, norm='zscores'), similarity='cosine') print "Cosine with normal:\n ", \ matrix_difference(v1, v2, similarity='cosine') print "Cosine with normal after both zscore:\n ", \ matrix_difference(row_normalize_matrix(v1, norm='zscores'), row_normalize_matrix(v2, norm='zscores'), similarity='cosine') print "! Notice that average guessing leads to expectation of 0!" print "Cosine v1, v3:\n ", \ matrix_difference(v1, v3, similarity='cosine') print "Cosine v1, v3 after zscore:\n ", \ matrix_difference(row_normalize_matrix(v1, norm='zscores'), row_normalize_matrix(v3, norm='zscores'), similarity='cosine') print "\nk=5" v1 = np.array([1, 0, 0, 0, 0]) v2 = np.array([0, 1, 0, 0, 0]) v3 = np.array([1, 1, 0, 0, 0]) v4 = np.array([0, 0, 0, 0, 0]) print "Cosine with normal:\n ", \ matrix_difference(v1, v2, similarity='cosine') print "Cosine with normal after both zscore:\n ", \ matrix_difference(row_normalize_matrix(v1, norm='zscores'), row_normalize_matrix(v2, norm='zscores'), similarity='cosine') print "! Notice that average guessing leads to expectation of 0!" print "Cosine v1, v3:\n ", \ matrix_difference(v1, v3, similarity='cosine') print "Cosine v1, v3 after zscore:\n ", \ matrix_difference(row_normalize_matrix(v1, norm='zscores'), row_normalize_matrix(v3, norm='zscores'), similarity='cosine') print "Average Cos similarity partly zscore:\n ", \ matrix_difference(row_normalize_matrix(v1, norm='zscores'), row_normalize_matrix(v3, norm='zscores'), similarity='cosine') print "Cosine with 0-vector:\n ", \ matrix_difference(row_normalize_matrix(v1, norm='zscores'), row_normalize_matrix(v4, norm='zscores'), similarity='cosine') print X = np.array([[1, 0, 0, 0, 0], [1, 0, 0, 0, 0], [1, 0, 0, 0, 0], [1, 0, 0, 0, 0], [1, 1, 0, 0, 0]]) Y = np.array([[1, 0, 0, 0, 0], [1, 1, 0, 0, 0], [0, 0, 0, 0, 0], [0, 1, 0, 0, 0], [1, 1.1, 0, 0, 0]]) print "X\n", X print "Y\n", Y Xs = row_normalize_matrix(X, norm='zscores') Ys = row_normalize_matrix(Y, norm='zscores') print "Xs\n", Xs print "Ys\n", Ys print "\nCosine original:\n ", \ matrix_difference(X, Y, vector=True, similarity='cosine') print "Cosine zscore:\n ", \ matrix_difference(Xs, Ys, vector=True, similarity='cosine') print "Average cosine zscore:\n ", \ matrix_difference(X, Y, similarity='cosine')
def test_row_normalize_matrix(): print "\n-- 'row_normalize_matrix' (l1, l2, zscores) --" v = np.array([1, 1, 0, 0, 0]) print "original:\n ", v print "l2:\n ", row_normalize_matrix(v, norm='l2') print "l1:\n ", row_normalize_matrix(v, norm='l1') print "zscores:\n ", row_normalize_matrix(v, norm='zscores') v = np.array([1, 1, 1, 0, 0]) print "\noriginal:\n ", v print "l2:\n ", row_normalize_matrix(v, norm='l2') print "l1 :\n ", row_normalize_matrix(v, norm='l1') print "zscores:\n ", row_normalize_matrix(v, norm='zscores') X = np.array( [[1, 0, 0], [0, 0, 0], [1, -1, -1], [1, -1, -1.1], [1, -2, -3],]) print "\noriginal:\n", X print "l2:\n", row_normalize_matrix(X, norm='l2') print "!!! Notice that l1 norm with negative values is counterintuitive: !!!" print "l1:\n", row_normalize_matrix(X, norm='l1') print "zscores:\n", row_normalize_matrix(X, norm='zscores') X = np.array([[0, 20, 0], [21, 0, 0], [0, 0, 14]]) print "\noriginal:\n", X print "l2:\n", row_normalize_matrix(X, norm='l2') print "l1:\n", row_normalize_matrix(X, norm='l1') print "zscores:\n", row_normalize_matrix(X, norm='zscores') print "\n -- zscore and normalizing together --" v = np.array([1, 1, 0, 0, 0]) print "original:\n ", v print "zscore:\n ", row_normalize_matrix(v, norm='zscores') print "normalized zscore:\n ", \ row_normalize_matrix( row_normalize_matrix(v, norm='zscores'), norm='l2') print "normalized zscore normalized:\n ", \ row_normalize_matrix( row_normalize_matrix( row_normalize_matrix(v,norm='l2'), norm='zscores'), norm='l2') X = np.array( [[1, 0, 0], [1, -1, -1], [1, -1, -1.1], [1, -2, -3], [0, 0, 0], [1,1,-1], [1,1.1,-1], [1,1,1]]) print "\noriginal:\n", X print "zscore:\n", row_normalize_matrix(X, norm='zscores') print "normalized:\n", row_normalize_matrix(X, norm='l2') print "normalized zscore:\n", \ row_normalize_matrix( row_normalize_matrix(X,norm='zscores'), norm='l2') print "normalized zscore normalized:\n", \ row_normalize_matrix( row_normalize_matrix( row_normalize_matrix(X,norm='l2'),norm='zscores'),norm='l2') print "zscore normalized zscore normalized:\n", \ row_normalize_matrix( row_normalize_matrix( row_normalize_matrix( row_normalize_matrix(X,norm='l2'),norm='zscores'),norm='l2'),norm='zscores')
def test_row_normalize_matrix(): print "\n-- 'row_normalize_matrix' (l1, l2, zscores) --" v = np.array([1, 1, 0, 0, 0]) print "original:\n ", v print "l2:\n ", row_normalize_matrix(v, norm='l2') print "l1:\n ", row_normalize_matrix(v, norm='l1') print "zscores:\n ", row_normalize_matrix(v, norm='zscores') v = np.array([1, 1, 1, 0, 0]) print "\noriginal:\n ", v print "l2:\n ", row_normalize_matrix(v, norm='l2') print "l1 :\n ", row_normalize_matrix(v, norm='l1') print "zscores:\n ", row_normalize_matrix(v, norm='zscores') X = np.array([ [1, 0, 0], [0, 0, 0], [1, -1, -1], [1, -1, -1.1], [1, -2, -3], ]) print "\noriginal:\n", X print "l2:\n", row_normalize_matrix(X, norm='l2') print "!!! Notice that l1 norm with negative values is counterintuitive: !!!" print "l1:\n", row_normalize_matrix(X, norm='l1') print "zscores:\n", row_normalize_matrix(X, norm='zscores') X = np.array([[0, 20, 0], [21, 0, 0], [0, 0, 14]]) print "\noriginal:\n", X print "l2:\n", row_normalize_matrix(X, norm='l2') print "l1:\n", row_normalize_matrix(X, norm='l1') print "zscores:\n", row_normalize_matrix(X, norm='zscores') print "\n -- zscore and normalizing together --" v = np.array([1, 1, 0, 0, 0]) print "original:\n ", v print "zscore:\n ", row_normalize_matrix(v, norm='zscores') print "normalized zscore:\n ", \ row_normalize_matrix( row_normalize_matrix(v, norm='zscores'), norm='l2') print "normalized zscore normalized:\n ", \ row_normalize_matrix( row_normalize_matrix( row_normalize_matrix(v,norm='l2'), norm='zscores'), norm='l2') X = np.array([[1, 0, 0], [1, -1, -1], [1, -1, -1.1], [1, -2, -3], [0, 0, 0], [1, 1, -1], [1, 1.1, -1], [1, 1, 1]]) print "\noriginal:\n", X print "zscore:\n", row_normalize_matrix(X, norm='zscores') print "normalized:\n", row_normalize_matrix(X, norm='l2') print "normalized zscore:\n", \ row_normalize_matrix( row_normalize_matrix(X,norm='zscores'), norm='l2') print "normalized zscore normalized:\n", \ row_normalize_matrix( row_normalize_matrix( row_normalize_matrix(X,norm='l2'),norm='zscores'),norm='l2') print "zscore normalized zscore normalized:\n", \ row_normalize_matrix( row_normalize_matrix( row_normalize_matrix( row_normalize_matrix(X,norm='l2'),norm='zscores'),norm='l2'),norm='zscores')