def test_transform_potentials(): print("\n-- 'calculate_potential_from_row_normalized' --") H = np.array([[0.1, 0.8, 0.1], [0.8, 0.1, 0.1], [0.1, 0.1, 0.8]]) alpha = [0.6, 0.2, 0.2] # works with list or np.array print("H0:\n", H) print("alpha:\n", alpha) Pot = calculate_potential_from_row_normalized(H, alpha) print("Pot:\n", Pot) alpha0T = np.array([alpha]).transpose() print("\nalphaT:\n", alpha0T) print("H0 * alpha:\n", H * alpha) print("H0 * alphaT:\n", H * alpha0T) Pot2 = np.array(Pot) print("row sum (Pot):\n", Pot2.sum(1, keepdims=True)) print("H(Pot):\n", row_normalize_matrix(Pot)) print("H(H(Pot)):\n", row_normalize_matrix(row_normalize_matrix(Pot))) # -- Also consider outdegrees (in particular for undirected graph -> symmetric H) print("\n--Specify also outdegrees (so that potential becomes symmetric)") d_vec = [1, 3, 3] # [1, 2, 3] d_vec = np.array(d_vec) # check that np.array also works print("d_vec:", d_vec) Pot2 = calculate_potential_from_row_normalized(H, alpha, d_vec) print("Pot (symmetric):\n", Pot2)
def test_graph_statistics_forced_block_model(): print("\n--- test_graph_statistics_forced_block_model() ---") H0 = np.array([[0.1, 0.8, 0.1], [0.8, 0.1, 0.1], [0.1, 0.1, 0.8]]) alpha0 = np.array([0.4, 0.3, 0.3]) print("alpha0: ", alpha0) print("H0:\n", H0) print("\n") n = 40 b = 2 start = time.time() Ws, X = graphGenerator(n, b, H=H0, alpha=alpha0, model='CBM', seed=None, directed=True) time_est = time.time()-start print("Time for graph generation: ", time_est) print("\n") Xd = to_dictionary_beliefs(X) n_vec = calculate_nVec_from_Xd(Xd) P_tot = calculate_Ptot_from_graph(Ws, Xd) H = row_normalize_matrix(P_tot) print("n_vec: ", n_vec) print("alpha: ", 1.*n_vec / sum(n_vec)) print("P_tot:\n", P_tot) print("P:\n", 1. * P_tot / sum(P_tot.flatten())) # Potential: normalized sum = 1 print("H:\n", H) d_vec = calculate_outdegree_distribution_from_graph(Ws, Xd=None) print("Indegree distribution:\n", d_vec) d_vec_list = calculate_outdegree_distribution_from_graph(Ws, Xd) print("List of indegree distributions:") for dict in d_vec_list: print(" ", dict)
def test_matrix_convergence_percentage(): print("\n-- 'matrix_convergence_percentage' --") X0 = np.array([ [2, 0, 0], [2, 0, 2], [0, 1, 0], [0, 0, 3], [0, 0, 3], [1, 0, 2], [0, 3, 3], [0, 0, 0], [9, 9, 9], [100, 100, 100], ]) X1 = np.array([ [1, 1, 2], [2, 1, 2], [3, 4, 0], [1, 1, 2], [2, 1, 1], [1, 2, 2], [1, 2, 3], [0, 0, 0], [8, 9, 9], [100, 100, 101], ]) print("X0:\n", X0) print("X1:\n", X1) threshold = 0.5 percentage = matrix_convergence_percentage(X0, X1, threshold) print("percentage converged (original):\n", percentage) X0z = row_normalize_matrix(X0, norm='zscores') X1z = row_normalize_matrix(X1, norm='zscores') percentage = matrix_convergence_percentage(X0z, X1z, threshold) print("percentage converged (after zscore):\n", percentage)
def maximumMarginals(X, W, P, debug=1): """Calculates Maximum Marginals by enumeration over all k^n possible worlds, assuming a directed graph with two variants: V1: one directed potential across edge direction: P is one potential, and W contains the weights of edges V2: a set of potentials on different edges: P is a tensor, and W indexes the potentials Dimensions of P (2 or 3) determines variant. Checks that every entry in X and P are > 0. Can model undirected graphs by (1) specifing every edge only for one direction, an d(2) using symmetric potentials. Creates one n-dimensional array with k^n entries for unnormalized probabilities of all worlds. Then sums up appropriate marginals. TODO: future variant with non-constant k and different potential dimensions Parameters ---------- X : [n x k] np array prior (explicit) belief matrix. Rows do not have to be row-normalized. Rows can be all 0, which get later replaced by undefined prior belief. W : [n x n] sparse.csr_matrix directed sparse weighted adjacency matrix (thus a directed graph is assumed) Also allows undirected graph by simply specifying only symmetric potentials V1: weight determines thea ctual edge weight V2: weight determines the index of a potential (from potential tensor P) P : V1: [k x k] any directed potential (no requirement for normalization or identical row or column sums) V2: [num_pot_P x k x k] np array set of potentials (as tensor) debug : int (Default = 1) 0 : no debugging 1 : tests for correct input Returns ------- F : [n x k] np array final belief matrix, each row normalized to form a label distribution """ n, k = X.shape dim_pot = len(P.shape) # dimensions 2 or 3: determines V1 or V2 dimensions = k*np.ones(n, dtype=int) # dimensions of multi-dimensional unnormalized probability array Prob = np.zeros(dimensions) # n-dimensional array containing unnormalized probabilities of k^n worlds if debug >= 1: assert (X >= 0).all(), "All explicit beliefs need to be >=0 " assert(issparse(W)), "W needs to be sparse" n2, n3 = W.shape assert type(P).__module__ == "numpy", "P needs to be numpy array (and not a matrix)" assert dim_pot in [2, 3], "Input Potentials need to be 2-dimensional or 3-dimensional" if dim_pot == 2: assert (P >= 0).all(), "All entries in the potentials need to be >=0 " k2, k3 = P.shape else: num_pot_P, k2, k3 = P.shape for P_entry in P: assert (P_entry >= 0).all(), "All entries in each potential need to be >=0 " assert W.dtype == int, "Given several potentials, the entries of weight matrix need to be integers, in order to reference the index of the potential in the potential tensor" weight = W.data set_pot = set(weight) max_pot_W = max(set_pot) assert max_pot_W <= set_pot, "Indices in W refering to P need to be smaller than the number of potentials" assert(n == n2 & n2 == n3), "X and W need to have compatible dimensions" assert(k == k2 & k2 == k3), "X and P need to have compatible dimensions" # --- X -> X0: replace all-0-rows with all 1s (no need to normalize initial beliefs) implicitVector = 1-1*to_explicit_bool_vector(X) # indicator numpy array with 1s for rows with only 0s implicitVectorT = np.array([implicitVector]).transpose() # vertical 1 vector for implicit nodes X0 = X + implicitVectorT # X0: prio beliefs: addition of [n x k] matrix with [n x 1] vector is ok # --- Calculate the unnormalized probability of a world (uses implicitly: X, W, P, dim_pot) def unnormalized_probability(assignment): # Calculates the weight by multiplying all priors and all edge potentials (instead of log sum) prob = 1 # log_prob = 0 for (row, j) in zip(X0, assignment): # assignment contains the classes for each node prob *= row[j] # log_prob += math.log((row[i]), 2) # log factor 2 row, col = W.nonzero() weight = W.data for (r, c, w) in zip(row, col, weight): if dim_pot == 2: factor = P[assignment[r], assignment[c]] else: # need to reference the correct potential in case dim_pot == 3 factor = P[w-1, assignment[r], assignment[c]] prob *= factor # log_prob += math.log(factor, 2) return prob # return 2**log_prob # --- Create the multi-dimensional unnormalized Prob array with k^n entries for index, _ in np.ndenumerate(Prob): # np.ndenumerate allows to iterate over all indices of the numpy array Prob[index] = unnormalized_probability(index) # print index, Prob[index] # --- Calculate marginals by summing up F = [] for i in range(n): inverse_axis = list(range(n)) # contains all variables except the one for which we calculate marginals del inverse_axis[i] # Thus sums over all other "axis" of n-dimensional array row = np.sum(Prob, axis=tuple(inverse_axis)) # row contains unnormalized marginals for node i F.append(row) return row_normalize_matrix(F, norm='l1') # not necessary to transform to numpy
def beliefPropagation(X, W, P, numMaxIt=10, convergencePercentage=None, convergenceThreshold=0.9961947, debug=1, damping=1, clamping=False): """Standard belief propagation assuming a directed graph with two variants: V1: one directed potential across edge direction: P is one potential, and W contains the weights of edges V2: a set of potentials on different edges: P is a tensor, and W indexes the potentials Dimensions of P (2 or 3) determines variant. Uses message-passing with division: see [Koller,Friedman 2009] Section 10.3.1. Uses damping: see [Koller,Friedman 2009] Section 11.1. Can be run either with given number of maximal iterations or until specified percentage of nodes have converged. Convergence of a node is determined by (variant of) cosine similarity between *centered beliefs* from two iterations. If convergence criterium is reached, the iterations will stop before maximal iterations. Parameter "debug" allows alternative, more detailed outputs, e.g., to get intermediate belief values. Checks that every entry in X and P are > 0. Can model undirected graphs by (1) specifing every edge only for one direction, an d(2) using symmetric potentials. TODO: also implement version without message passing with division TODO: future variant with non-constant k and different potential dimensions TODO: future variant without echo cancellation TODO: alternative convergence condition: if np.allclose(x, x_new, atol=1e-10): break] TODO: clamping not necessary: all depends on relative strength of prior beliefs Parameters ---------- X : [n x k] np array prior (explicit) belief matrix. Rows do not have to be row-normalized. Rows can be all 0, which get later replaced by undefined prior belief. W : [n x n] sparse.csr_matrix directed sparse weighted adjacency matrix (thus a directed graph is assumed) Also allows undirected graph by simply specifying only symmetric potentials V1: weight determines thea ctual edge weight V2: weight determines the index of a potential (from potential tensor P) P : V1: [k x k] any directed potential (no requirement for normalization or identical row or column sums) V2: [num_pot_P x k x k] np array set of potentials (as tensor) numMaxIt : int (Default = 10) number of maximal iterations to perform convergencePercentage : float (Default = None) percentage of nodes that need to have converged in order to interrupt the iterations. Notice that a node with undefined beliefs does not count as converged if it does not change anymore (in order to avoid counting nodes without explicit beliefs as converged in first few rounds). If None, then runs until numMaxIt convergenceThreshold : float (Default = 0.9961947) cose similarity (actually, the "cosine_ratio" similarity) between two belief vectors in order to deem them as identicial (thus converged). In case both vectors have the same length, then: cos(5 deg) = 0.996194698092. cos(1 deg) = 0.999847695156 debug : int (Default = 1) 0 : no debugging and just returns F 1 : tests for correct input, and just returns F 2 : tests for correct input, and returns (F, actualNumIt, convergenceRatios) 3 : tests for correct input, and returns (list of F, list of convergenceRatios) damping : float (Default = 1) fraction of message values that come from new iteration (if 1, then no re-use of prior iteration) clamping : Boolean (Default = False) whether or not the explicit beliefs in X should be clamped to the nodes or not Returns (if debug == 0 or debug == 1) ------------------------------------- F : [n x k] np array final belief matrix, each row normalized to form a label distribution Returns (if debug == 2 ) ------------------------ F : [n x k] np array final belief matrix, each row normalized to form a label distribution actualNumIt : int actual number of iterations performed actualPercentageConverged : float percentage of nodes that converged Returns (if debug == 3 ) ------------------------ List of F : [(actualNumIt+1) x n x k] np array list of final belief matrices for each iteration, represented as 3-dimensional numpy array Also includes the original beliefs as first entry (0th iteration). Thus has (actualNumIt + 1) entries actualNumIt : int actual number of iterations performed (not counting the first pass = 0th iteration for initializing) List of actualPercentageConverged : list of float (with length actualNumIt) list of percentages of nodes that converged in each iteration > 0. Thus has actualNumIt entries """ # --- create variables for convergence checking and debugging n, k = X.shape dim_pot = len(P.shape) # dimensions 2 or 3: determines V1 or V2 Pot = P # for case of dim_pot = 2 if debug >= 1: assert (X >= 0).all(), "All explicit beliefs need to be >=0 " assert(issparse(W)), "W needs to be sparse" n2, n3 = W.shape assert type(P).__module__ == "numpy", "P needs to be numpy array (and not a matrix)" assert dim_pot in [2, 3], "Input Potentials need to be 2-dimensional or 3-dimensional" if dim_pot == 2: assert (P >= 0).all(), "All entries in the potentials need to be >=0 " k2, k3 = P.shape else: num_pot_P, k2, k3 = P.shape for P_entry in P: assert (P_entry >= 0).all(), "All entries in each potential need to be >=0 " assert W.dtype == int, "Entries of weight matrix need to be integers to reference index of the potential" weight = W.data set_pot = set(weight) max_pot_W = max(set_pot) assert max_pot_W <= set_pot, "Indices in W refering to P need to be smaller than the number of potentials" assert(n == n2 & n2 == n3), "X and W need to have compatible dimensions" assert(k == k2 & k2 == k3), "X and P need to have compatible dimensions" if debug >= 3: listF = [] # store the belief matrices for each iteration listConverged = [] # store all L2 norms to previous iteration # --- create edge dictionaries row, col = W.nonzero() nodes = set(np.concatenate((row, col))) dict_edges_out = {} # dictionary: i to all nodes j with edge (i->j) for node in nodes: dict_edges_out[node] = set() dict_edges_in = deepcopy(dict_edges_out) # dictionary: i to all nodes j with edge (i<-j) for (i,j) in zip(row, col): dict_edges_out[i].add(j) dict_edges_in[j].add(i) if dim_pot == 3: dict_edges_pot = {} # Dictionary: for each directed edge (i,j) -> index of the potential in P[index, :, :] for (i, j, d) in zip(row, col, weight): dict_edges_pot[(i, j)] = d # --- X -> X0: replace all-0-rows with all 1s (no need to normalize initial beliefs) implicitVector = 1-1*to_explicit_bool_vector(X) # indicator numpy array with 1s for rows with only 0s implicitVectorT = np.array([implicitVector]).transpose() # vertical 1 vector for implicit nodes X0 = X + implicitVectorT # X0: prio beliefs: addition of [n x k] matrix with [n x 1] vector is ok F1 = X0 # old F: only for checking convergence (either because convergencePercantage not None or debug >= 2) F2 = X0.astype(float) # new F: copy is necessary as to not change original X0 matrix when F2 is changed # --- Actual loop: each loop calculates (a) the new messages (with damping) and (b) the new beliefs converged = False actualNumIt = -1 # iterations start with 0th iteration while actualNumIt < numMaxIt and not converged: actualNumIt += 1 # --- (a) calculate messages if actualNumIt == 0: # --- first pass (counts as 0th iteration): create message dictionaries and initialize messages with ones dict_messages_along_1 = {} # dictionary: messages for each edge (i->j) in direction i->j dict_messages_against_1 = {} # dictionary: messages for each edge (i<-j) in direction i->j default = np.ones(k) # first message vector: all 1s for (i,j) in zip(row, col): dict_messages_along_1[(i,j)] = default dict_messages_against_1[(j,i)] = default else: # --- other iterations: calculate "messages_new" using message-passing with division (from F and messages) dict_messages_along_2 = {} # new dictionary: messages for each edge (i->j) in direction i->j dict_messages_against_2 = {} # new dictionary: messages for each edge (i<-j) in direction i->j for (i,j) in dict_messages_along_1.keys(): # also includes following case: "for (j,i) in dict_messages_against_1.keys()" if dim_pot == 3: # need to reference the correct potential in case dim_pot == 3 Pot = P[dict_edges_pot[(i,j)]-1, :, :] dict_messages_along_2[(i,j)] = (F2[i] / dict_messages_against_1[(j,i)]).dot(Pot) # entry-wise division dict_messages_against_2[(j,i)] = (F2[j] / dict_messages_along_1[(i,j)]).dot(Pot.transpose()) # TODO above two lines can contain errors # --- assign new to old message dictionaries, and optionally damp messages if damping == 1: dict_messages_along_1 = dict_messages_along_2.copy() # requires shallow copy because of later division dict_messages_against_1 = dict_messages_against_2.copy() else: for (i,j) in dict_messages_along_1.keys(): dict_messages_along_1[(i,j)] = damping*dict_messages_along_2[(i,j)] + \ (1-damping)*dict_messages_along_1[(i,j)] for (i,j) in dict_messages_against_1.keys(): dict_messages_against_1[(i,j)] = damping*dict_messages_against_2[(i,j)] + \ (1-damping)*dict_messages_against_1[(i,j)] # --- (b) create new beliefs by multiplying prior beliefs with all incoming messages (pointing in both directions) for (i, f) in enumerate(F2): if not clamping or implicitVector[i] == 0: # only update beliefs if those are not explicit and clamped F2[i] = X0[i] # need to start multiplying from explicit beliefs, referencing the row with separate variable did not work out for j in dict_edges_out[i]: # edges pointing away F2[i] *= dict_messages_against_1[(j,i)] for j in dict_edges_in[i]: # edges pointing inwards F2[i] *= dict_messages_along_1[(j,i)] # TODO line can contain errors # --- normalize beliefs [TODO: perhaps remove later to optimize except in last round] F2 = row_normalize_matrix(F2, norm='l1') # --- check convergence and store information if debug if convergencePercentage is not None or debug >= 2: F1z = to_centering_beliefs(F1) F2z = to_centering_beliefs(F2) actualPercentageConverged = matrix_convergence_percentage(F1z, F2z, threshold=convergenceThreshold) if convergencePercentage is not None \ and actualPercentageConverged >= convergencePercentage\ and actualNumIt > 0: # end the loop early converged = True F1 = F2.copy() # save for comparing in *next* iteration, make copy since F entries get changed if debug == 3: listF.append(F2.copy()) # stores (actualNumIt+1) values (copy is important as F2 is later overwritten) if actualNumIt > 0: listConverged.append(actualPercentageConverged) # stores actualNumIt values # --- Various return formats if debug <= 1: return F2 elif debug == 2: return F2, actualNumIt, actualPercentageConverged else: return np.array(listF), actualNumIt, listConverged
def test_matrix_difference_with_cosine_simililarity(): print("\n-- 'matrix_difference' (cosine), 'row_normalize_matrix' --") print("k=3") v1 = np.array([1, 0, 0]) v2 = np.array([0, 1, 0]) v3 = np.array([1, 1, 0]) print("Cosine with original:\n ", \ matrix_difference(v1, v1, similarity='cosine')) print("Cosine with original zscore:\n ", \ matrix_difference(row_normalize_matrix(v1, norm='zscores'), row_normalize_matrix(v1, norm='zscores'), similarity='cosine')) print("Cosine with zscore :\n ", \ matrix_difference(v1, row_normalize_matrix(v1, norm='zscores'), similarity='cosine')) print("Cosine with normal:\n ", \ matrix_difference(v1, v2, similarity='cosine')) print("Cosine with normal after both zscore:\n ", \ matrix_difference(row_normalize_matrix(v1, norm='zscores'), row_normalize_matrix(v2, norm='zscores'), similarity='cosine')) print("! Notice that average guessing leads to expectation of 0!") print("Cosine v1, v3:\n ", \ matrix_difference(v1, v3, similarity='cosine')) print("Cosine v1, v3 after zscore:\n ", \ matrix_difference(row_normalize_matrix(v1, norm='zscores'), row_normalize_matrix(v3, norm='zscores'), similarity='cosine')) print("\nk=5") v1 = np.array([1, 0, 0, 0, 0]) v2 = np.array([0, 1, 0, 0, 0]) v3 = np.array([1, 1, 0, 0, 0]) v4 = np.array([0, 0, 0, 0, 0]) print("Cosine with normal:\n ", \ matrix_difference(v1, v2, similarity='cosine')) print("Cosine with normal after both zscore:\n ", \ matrix_difference(row_normalize_matrix(v1, norm='zscores'), row_normalize_matrix(v2, norm='zscores'), similarity='cosine')) print("! Notice that average guessing leads to expectation of 0!") print("Cosine v1, v3:\n ", \ matrix_difference(v1, v3, similarity='cosine')) print("Cosine v1, v3 after zscore:\n ", \ matrix_difference(row_normalize_matrix(v1, norm='zscores'), row_normalize_matrix(v3, norm='zscores'), similarity='cosine')) print("Average Cos similarity partly zscore:\n ", \ matrix_difference(row_normalize_matrix(v1, norm='zscores'), row_normalize_matrix(v3, norm='zscores'), similarity='cosine')) print("Cosine with 0-vector:\n ", \ matrix_difference(row_normalize_matrix(v1, norm='zscores'), row_normalize_matrix(v4, norm='zscores'), similarity='cosine')) print() X = np.array([[1, 0, 0, 0, 0], [1, 0, 0, 0, 0], [1, 0, 0, 0, 0], [1, 0, 0, 0, 0], [1, 1, 0, 0, 0]]) Y = np.array([[1, 0, 0, 0, 0], [1, 1, 0, 0, 0], [0, 0, 0, 0, 0], [0, 1, 0, 0, 0], [1, 1.1, 0, 0, 0]]) print("X\n", X) print("Y\n", Y) Xs = row_normalize_matrix(X, norm='zscores') Ys = row_normalize_matrix(Y, norm='zscores') print("Xs\n", Xs) print("Ys\n", Ys) print("\nCosine original:\n ", \ matrix_difference(X, Y, vector=True, similarity='cosine')) print("Cosine zscore:\n ", \ matrix_difference(Xs, Ys, vector=True, similarity='cosine')) print("Average cosine zscore:\n ", \ matrix_difference(X, Y, similarity='cosine'))
def test_degree_matrix(): print("\n-- 'degree_matrix' --") print("- Directed case") row = [0, 0, 0, 1, 2, 3] col = [1, 2, 3, 4, 4, 4] weight = [2, 3, 4, 1, 2, 3] Ws = sps.csr_matrix((weight, (row, col)), shape=(5, 5)) print("Ws:\n{}".format(Ws)) print("W:\n{}".format(Ws.todense())) print("\nSquared=False") D_in = degree_matrix(Ws, indegree=True, squared=False) D_out = degree_matrix(Ws, indegree=False, squared=False) print("D_in (col sum):\n{}".format(D_in)) print("D_in (col sum):\n{}".format(D_in.todense())) print("D_out (row sum):\n{}".format(D_out)) print("\nSquared=True") D_in = degree_matrix(Ws, indegree=True) D_out = degree_matrix(Ws, indegree=False) print("D_in (col sum):\n{}".format(D_in)) print("D_out (row sum):\n{}".format(D_out)) print("\n- Undirected case (undirected=True)") row = [0, 1, 0, 2, 1, 2, 2, 3, 3] col = [1, 0, 2, 0, 2, 1, 3, 2, 1] weight = [1, 2, 2, 1, 1, 1, 1, 0.1, 0.5] Ws = sps.csr_matrix((weight, (row, col)), shape=(4, 4)) print("Ws:\n{}".format(Ws)) print("W:\n{}".format(Ws.todense())) print("\nSquared=False") print("D (undirected, in):\n{}".format( degree_matrix(Ws, undirected=True, indegree=True, squared=False).todense())) print("D (undirected, out):\n{}".format( degree_matrix(Ws, undirected=True, indegree=False, squared=False).todense())) print("\nSquared=True") print("D (undirected):\n{}".format( degree_matrix(Ws, undirected=True).todense())) print("\n- Undirected case with row-normalized matrix (undirected=True)") Wrow = W_row(Ws) print("Wrow:\n{}".format(Wrow)) print("Wrow:\n{}".format(Wrow.todense())) Wrow2 = row_normalize_matrix(Ws.todense()) print("Wrow2 (with 'row_normalize_matrix' after '.todense()':\n{}".format( Wrow2)) print("\nSquared=False") print("D_row (undirected, in):\n{}".format( degree_matrix(Wrow, undirected=True, indegree=True, squared=False).todense())) print("D_row (undirected, out):\n{}".format( degree_matrix(Wrow, undirected=True, indegree=False, squared=False).todense())) print("\nSquared=True") print("D_row (undirected):\n{}".format( degree_matrix(Wrow, undirected=True).todense())) # -- Timing print("\nTiming with big random matrix (n=100k, d=10) and random weights") n = 100000 d = 10 row = np.random.randint(n, size=n * d) col = np.random.randint(n, size=n * d) weight = np.random.randint(1, 10, size=n * d) Ws = sps.csr_matrix((weight, (row, col)), shape=(n, n)) if False: Ws.data[:] = [1] * len( Ws.data) # faster by factor 3 if all degress have same weight 1 start = time.time() D_in = degree_matrix(Ws, indegree=True) end = time.time() - start print("Time to calculate D_in:", end)
def test_row_normalize_matrix(): print("\n-- 'row_normalize_matrix' (l1, l2, zscores) --") v = np.array([1, 1, 0, 0, 0]) print("original:\n ", v) print("l2:\n ", row_normalize_matrix(v, norm='l2')) print("l1:\n ", row_normalize_matrix(v, norm='l1')) print("zscores:\n ", row_normalize_matrix(v, norm='zscores')) v = np.array([1, 1, 1, 0, 0]) print("\noriginal:\n ", v) print("l2:\n ", row_normalize_matrix(v, norm='l2')) print("l1 :\n ", row_normalize_matrix(v, norm='l1')) print("zscores:\n ", row_normalize_matrix(v, norm='zscores')) X = np.array([ [1, 0, 0], [0, 0, 0], [1, -1, -1], [1, -1, -1.1], [1, -2, -3], ]) print("\noriginal:\n", X) print("l2:\n", row_normalize_matrix(X, norm='l2')) print( "!!! Notice that l1 norm with negative values is counterintuitive: !!!" ) print("l1:\n", row_normalize_matrix(X, norm='l1')) print("zscores:\n", row_normalize_matrix(X, norm='zscores')) X = np.array([[0, 20, 0], [21, 0, 0], [0, 0, 14]]) print("\noriginal:\n", X) print("l2:\n", row_normalize_matrix(X, norm='l2')) print("l1:\n", row_normalize_matrix(X, norm='l1')) print("zscores:\n", row_normalize_matrix(X, norm='zscores')) print("\n -- zscore and normalizing together --") v = np.array([1, 1, 0, 0, 0]) print("original:\n ", v) print("zscore:\n ", row_normalize_matrix(v, norm='zscores')) print("normalized zscore:\n ", \ row_normalize_matrix( row_normalize_matrix(v, norm='zscores'), norm='l2')) print("normalized zscore normalized:\n ", \ row_normalize_matrix( row_normalize_matrix( row_normalize_matrix(v,norm='l2'), norm='zscores'), norm='l2')) X = np.array([[1, 0, 0], [1, -1, -1], [1, -1, -1.1], [1, -2, -3], [0, 0, 0], [1, 1, -1], [1, 1.1, -1], [1, 1, 1]]) print("\noriginal:\n", X) print("zscore:\n", row_normalize_matrix(X, norm='zscores')) print("normalized:\n", row_normalize_matrix(X, norm='l2')) print("normalized zscore:\n", \ row_normalize_matrix( row_normalize_matrix(X, norm='zscores'), norm='l2')) print("normalized zscore normalized:\n", \ row_normalize_matrix( row_normalize_matrix( row_normalize_matrix(X,norm='l2'), norm='zscores'), norm='l2')) print("zscore normalized zscore normalized:\n", \ row_normalize_matrix( row_normalize_matrix( row_normalize_matrix( row_normalize_matrix(X,norm='l2'), norm='zscores'), norm='l2'), norm='zscores'))
def test_planted_distribution_model(): """ Tests the main graph generator with statistics and visualized degree distribution and edge adjacency matrix """ print("\n--- 'planted_distribution_model_H', 'planted_distribution_model_P', 'number_of_connectedComponents', 'create_blocked_matrix_from_graph' --") CHOICE = 21 print("CHOICE:", CHOICE) debug = 0 # directed = True # !!! TODO: not yet clear what undirected means here, only P accepts directed backEdgesAllowed = True # ??? should be enforced in code sameInAsOutDegreeRanking = False distribution = 'powerlaw' exponent = -0.3 VERSION_P = True # --- AAAI figures --- if CHOICE in [1, 2, 3, 4, 5, 6]: n = 120 alpha0 = [1/6, 1/3, 1/2] h = 8 P = np.array([[1, h, 1], [1, 1, h], [h, 1, 1]]) if CHOICE == 1: # P (equivalent to 2), AAAI 2 m = 1080 elif CHOICE == 2: # H (equivalent to 1) H0 = row_normalize_matrix(P) d_vec = [18, 9, 6] VERSION_P = False elif CHOICE == 3: # H (equivalent to 4), AAAI 3 H0 = row_normalize_matrix(P) d_vec = 9 VERSION_P = False elif CHOICE == 4: # P (equivalent to 3) P = np.array([[1, h, 1], [2, 2, 2*h], [3*h, 3, 3]]) m = 1080 elif CHOICE == 5: # H (equivalent to 2), but backedges=False H0 = row_normalize_matrix(P) d_vec = [18, 9, 6] VERSION_P = False backEdgesAllowed = False elif CHOICE == 6: # P undirected, AAAI 4 P = np.array([[1, h, 1], [h, 1, 1], [1, 1, h]]) directed = False backEdgesAllowed = False m = 540 # --- AGAIN DIRECTED --- if CHOICE == 12: n = 1001 alpha0 = [0.6, 0.2, 0.2] P = np.array([[0.1, 0.8, 0.1], [0.8, 0.1, 0.1], [0.1, 0.1, 0.8]]) m = 3000 distribution = 'uniform' # uniform powerlaw exponent = None backEdgesAllowed = False # ??? should be enforced in code if CHOICE == 13: # Nice for block matrix visualization n = 1000 alpha0 = [0.334, 0.333, 0.333] h = 2 P = np.array([[1, h, 1], [h, 1, 1], [1, 1, h]]) m = 2000 distribution = 'uniform' # uniform powerlaw exponent = None backEdgesAllowed = False # ??? should be enforced in code if CHOICE == 14: n = 1000 alpha0 = [0.3334, 0.3333, 0.3333] h = 10 P = np.array([[1, h, 1], [h, 1, 1], [1, 1, h]]) m = 10000 exponent = -0.55 # --- UNDIRECTED --- if CHOICE == 20: n = 100 alpha0 = [0.6, 0.2, 0.2] h = 1.4 P = np.array([[1, h, 1], [h, 1, 1], [1, 1, h]]) H0 = row_normalize_matrix(P) d_vec = 5 directed = False exponent = -0.3 VERSION_P = False elif CHOICE == 21: n = 1001 alpha0 = [0.6, 0.2, 0.2] h = 4 P = np.array([[1, h, 1], [h, 1, 1], [1, 1, h]]) H0 = row_normalize_matrix(P) d_vec = 3.4 # don't specify vector for undirected distribution = 'uniform' # uniform powerlaw exponent = -0.5 directed = False backEdgesAllowed = True # ignored in code for undirected VERSION_P = False sameInAsOutDegreeRanking = True # ignored in code for undirected elif CHOICE == 22: n = 1000 m = 3000 alpha0 = [0.6, 0.2, 0.2] h = 4 P = np.array([[1, 3*h, 1], [2*h, 1, 1], [1, 1, h]]) distribution = 'uniform' # uniform powerlaw exponent = -0.5 directed = False backEdgesAllowed = False # ignored in code for undirected sameInAsOutDegreeRanking = True # ignored in code for undirected debug=0 VERSION_P = True H0 = row_normalize_matrix(P) # --- Create the graph start = time.time() if VERSION_P: W, Xd = planted_distribution_model(n, alpha=alpha0, P=P, m=m, distribution=distribution, exponent=exponent, directed=directed, backEdgesAllowed=backEdgesAllowed, sameInAsOutDegreeRanking=sameInAsOutDegreeRanking, debug=debug) else: W, Xd = planted_distribution_model_H(n, alpha=alpha0, H=H0, d_out=d_vec, distribution=distribution, exponent=exponent, directed=directed, backEdgesAllowed=backEdgesAllowed, sameInAsOutDegreeRanking=sameInAsOutDegreeRanking, debug=debug) time_est = time.time()-start print("Time for graph generation: {}".format(time_est)) # - Undirectd degrees: In + Out W_und = W.multiply(W.transpose()) """if backEdgesAllowed then there can be edges in both directions.""" # W_und.data[:] = np.sign(W_und.data) # W contains weighted edges -> unweighted before counting edges with Ptot print("Fraction of edges that go in both directions: {}".format(np.sum(W_und.data) / np.sum(W.data))) # --- Statistics on created graph print("\n- 'calculate_Ptot_from_graph':") P_tot = calculate_Ptot_from_graph(W, Xd) print("P_tot:\n{}".format(P_tot)) print("sum(P_tot): {}".format(np.sum(P_tot))) print("P (normalized to sum=1):\n{}".format(1. * P_tot / np.sum(P_tot))) # Potential: normalized sum = 1 H = row_normalize_matrix(P_tot) print("H (row-normalized):\n{}".format(H)) print("\n- 'calculate_nVec_from_Xd':") n_vec = calculate_nVec_from_Xd(Xd) print("n_vec: {}".format(n_vec)) print("alpha: {}".format(1.*n_vec / sum(n_vec))) print("\n- Average Out/Indegree 'calculate_average_outdegree_from_graph' (assumes directed for total; for undirected the totals are incorrect):") print("Average outdegree: {}".format(calculate_average_outdegree_from_graph(W))) print("Average indegree: {}".format(calculate_average_outdegree_from_graph(W.transpose()))) print("Average total degree: {}".format(calculate_average_outdegree_from_graph(W + W.transpose()))) print("Average outdegree per class: {}".format(calculate_average_outdegree_from_graph(W, Xd))) print("Average indegree per class: {}".format(calculate_average_outdegree_from_graph(W.transpose(), Xd))) print("Average total degree per class: {}".format(calculate_average_outdegree_from_graph(W + W.transpose(), Xd))) # - Overall degree distribution: In / out print("\n- Overall Out/In/Total degree distribution 'calculate_outdegree_distribution_from_graph':") print("Overall Out and Indegree distribution:") d_out_vec_tot = calculate_outdegree_distribution_from_graph(W, Xd=None) d_in_vec_tot = calculate_outdegree_distribution_from_graph(W.transpose(), Xd=None) print("Outdegree distribution (degree / number):\n{}".format(np.array([d_out_vec_tot.keys(), d_out_vec_tot.values()]))) print("Indegree distribution (degree / number):\n{}".format(np.array([d_in_vec_tot.keys(), d_in_vec_tot.values()]))) # - Overall degree distribution: In + Out d_tot_vec_tot = calculate_outdegree_distribution_from_graph(W + W.transpose(), Xd=None) print("Total degree distribution (degree / number):\n{}".format(np.array([d_tot_vec_tot.keys(), d_tot_vec_tot.values()]))) # - Per-class degree distribution: In / out print("\n- Per-class Out/In/Total degree distribution 'calculate_outdegree_distribution_from_graph':") print("\nOutdegree distribution per class:") d_out_vec = calculate_outdegree_distribution_from_graph(W, Xd) for i in range(len(d_out_vec)): print("Class {}:".format(i)) print(np.array([d_out_vec[i].keys(), d_out_vec[i].values()])) print("Indegree distribution per class:") d_in_vec = calculate_outdegree_distribution_from_graph(W.transpose(), Xd) for i in range(len(d_in_vec)): print("Class {}:".format(i)) print(np.array([d_in_vec[i].keys(), d_in_vec[i].values()])) # - per-class degree distribution: In + out print("\nTotal degree distribution per class:") d_vec_und = calculate_outdegree_distribution_from_graph(W + W.transpose(), Xd) for i in range(len(d_vec_und)): print("Class {}:".format(i)) print(np.array([d_vec_und[i].keys(), d_vec_und[i].values()])) print("\n- number of weakly connected components':") print("Number of weakly connected components: {}".format(connected_components(W, directed=True, connection='weak', return_labels=False))) # --- convergence boundary # print("\n- '_out_eps_convergence_directed_linbp', 'eps_convergence_linbp'") # if directed: # eps_noEcho = _out_eps_convergence_directed_linbp(P, W, echo=False) # eps_Echo = _out_eps_convergence_directed_linbp(P, W, echo=True) # else: Hc = to_centering_beliefs(H) eps_noEcho = eps_convergence_linbp(Hc, W, echo=False) eps_Echo = eps_convergence_linbp(Hc, W, echo=True) print("Eps (w/ echo): {}".format(eps_Echo)) print("Eps (no echo): {}".format(eps_noEcho)) # --- Fig1: Draw edge distributions print("\n- Fig1: Draw degree distributions") params = {'backend': 'pdf', 'lines.linewidth': 4, 'font.size': 10, 'axes.labelsize': 24, # fontsize for x and y labels (was 10) 'axes.titlesize': 22, 'xtick.labelsize': 20, 'ytick.labelsize': 20, 'legend.fontsize': 8, 'figure.figsize': [5, 4], 'font.family': 'sans-serif' } mpl.rcdefaults() mpl.rcParams.update(params) fig = plt.figure(1) ax = fig.add_axes([0.15, 0.15, 0.8, 0.8]) # main axes ax.xaxis.labelpad = -12 ax.yaxis.labelpad = -12 # A: Draw directed degree distribution y_vec = [] for i in range(len(d_out_vec)): y = np.repeat(list(d_out_vec[i].keys()), list(d_out_vec[i].values()) ) # !!! np.repeat y = -np.sort(-y) y_vec.append(y) # print ("Class {}:\n{}".format(i,y)) y_tot = np.repeat(list(d_out_vec_tot.keys()), list(d_out_vec_tot.values())) # total outdegree y_tot = -np.sort(-y_tot) plt.loglog(range(1, len(y_vec[0])+1), y_vec[0], lw=4, color='orange', label=r"A out", linestyle='-') # !!! plot default index starts from 0 otherwise plt.loglog(range(1, len(y_vec[1])+1), y_vec[1], lw=4, color='blue', label=r"B out", linestyle='--') plt.loglog(range(1, len(y_vec[2])+1), y_vec[2], lw=4, color='green', label=r"C out", linestyle=':') plt.loglog(range(1, len(y_tot)+1), y_tot, lw=1, color='black', label=r"tot out", linestyle='-') # B: Draw second edge distribution of undirected degree distribution y_vec = [] for i in range(len(d_vec_und)): y = np.repeat(list(d_vec_und[i].keys()), list(d_vec_und[i].values()) ) # !!! np.repeat y = -np.sort(-y) y_vec.append(y) # print ("Class {}:\n{}".format(i,y)) y_tot = np.repeat(list(d_tot_vec_tot.keys()), list(d_tot_vec_tot.values())) # total outdegree y_tot = -np.sort(-y_tot) plt.loglog(range(1, len(y_vec[0])+1), y_vec[0], lw=4, color='orange', label=r"A", linestyle='-') plt.loglog(range(1, len(y_vec[1])+1), y_vec[1], lw=4, color='blue', label=r"B", linestyle='--') plt.loglog(range(1, len(y_vec[2])+1), y_vec[2], lw=4, color='green', label=r"C", linestyle=':') plt.loglog(range(1, len(y_tot)+1), y_tot, lw=1, color='black', label=r"tot", linestyle='-') plt.legend(loc='upper right', labelspacing=0) filename = 'figs/Fig_test_planted_distribution_model1_{}.pdf'.format(CHOICE) plt.savefig(filename, dpi=None, facecolor='w', edgecolor='w', orientation='portrait', papertype='letter', format='pdf', transparent=True, bbox_inches='tight', pad_inches=0.1, # frameon=None, # TODO: frameon deprecated ) os.system("open " + filename) # --- Fig2: Draw block matrix print("\n- Fig2: 'create_blocked_matrix_from_graph'") W_new, Xd_new = create_blocked_matrix_from_graph(W, Xd) fig = plt.figure(2) row, col = W_new.nonzero() # transform the sparse W back to row col format plt.plot(col, row, 'o', color='r', markersize=2, markeredgewidth=2, lw=0, zorder=3) # Notice (col, row) because first axis is vertical in matrices # plt.matshow(W_new.todense(), cmap=plt.cm.Greys) # cmap=plt.cm.gray / Blues # alternative that does not work as well plt.gca().invert_yaxis() # invert the y-axis to start on top and go down # Show quadrants d1 = alpha0[0] * n d2 = (alpha0[0] + alpha0[1]) * n plt.grid(which='major', color='0.7', linestyle='-', linewidth=1) plt.xticks([0, d1, d2, n]) plt.yticks([0, d1, d2, n]) plt.xlabel('to', labelpad=-1) plt.ylabel('from', rotation=90, labelpad=0) frame = plt.gca() # frame.axes.xaxis.set_ticklabels([]) # would hide the labels # frame.axes.yaxis.set_ticklabels([]) frame.tick_params(direction='inout', width=1, length=10) filename = 'figs/Fig_test_planted_distribution_model2_{}.pdf'.format(CHOICE) plt.savefig(filename, dpi=None, facecolor='w', edgecolor='w', orientation='portrait', papertype='letter', format='pdf', transparent=True, bbox_inches='tight', pad_inches=0.1) os.system("open " + filename)