def test_spectral_clustering_on_generative_model(scalar): params = { 'alpha': 0.05, 'beta': 0.08, 'mu_diag': 0.00075 * scalar, 'mu_off_diag': 0.00035 if sim_type == 'b' else 0.00035 * scalar, 'scale': False, 'number_of_nodes': 256 } event_dict, true_class_assignments = utils.simulate_community_hawkes( params) num_nodes = len(true_class_assignments) # Spectral clustering on aggregated adjacency matrix agg_adj = utils.event_dict_to_aggregated_adjacency(num_nodes, event_dict) agg_adj_pred = spectral_cluster(agg_adj, num_classes=n_classes) agg_adj_sc_rand = adjusted_rand_score(true_class_assignments, agg_adj_pred) if not also_use_unweighted_adjacency: return agg_adj_sc_rand # Spectral clustering on aggregated adjacency matrix adj = utils.event_dict_to_adjacency(num_nodes, event_dict) adj_pred = spectral_cluster(adj, num_classes=n_classes) adj_sc_rand = adjusted_rand_score(true_class_assignments, adj_pred) return agg_adj_sc_rand, adj_sc_rand, np.sum(adj) / (num_nodes**2)
def test_spectral_clustering_on_generative_model(n_nodes): if agg_adj_should_fail: params = { 'number_of_nodes': n_nodes, 'alpha': 7.0, 'beta': 8.0, 'mu_off_diag': 0.001, 'mu_diag': 0.002, 'scale': False, 'end_time': 400, 'class_probabilities': class_prob, 'n_cores': chip_n_cores } else: params = { 'number_of_nodes': n_nodes, 'alpha': 0.001, 'beta': 0.008, 'mu_off_diag': 0.001, 'mu_diag': 0.001, # 'mu_diag': 0.002, 'alpha_diag': 0.006, 'scale': False, 'end_time': 400, 'class_probabilities': class_prob, 'n_cores': chip_n_cores } # event_dict, true_class_assignments = utils.simulate_community_hawkes( # params, network_name="10-block-10k-nodes-higher-mu-diff") event_dict, true_class_assignments = utils.simulate_community_hawkes( params) # Spectral clustering on adjacency matrix adj = utils.event_dict_to_adjacency(n_nodes, event_dict) adj_sc_pred = spectral_cluster(adj, num_classes=n_classes, verbose=False) adj_sc_rand = adjusted_rand_score(true_class_assignments, adj_sc_pred) # Spectral clustering on aggregated adjacency matrix agg_adj = utils.event_dict_to_aggregated_adjacency(n_nodes, event_dict) agg_adj_pred = spectral_cluster(agg_adj, num_classes=n_classes, verbose=False) agg_adj_sc_rand = adjusted_rand_score(true_class_assignments, agg_adj_pred) return adj_sc_rand, agg_adj_sc_rand
def fit_poisson_baseline_model(event_dict, num_nodes, duration, num_classes, verbose=False): """ Fits a Poisson baseline model to a network. :param event_dict: Edge dictionary of events between all node pair. :param num_nodes: (int) Total number of nodes :param duration: (int) duration of the network :param num_classes: (int) number of blocks / classes :param verbose: Prints fitted Poisson baseline parameters :return: node_membership, lambda, block_pair_events """ agg_adj = utils.event_dict_to_aggregated_adjacency(num_nodes, event_dict) # if number of there are as many classes as nodes, assign each node to its own class if num_classes == num_nodes: node_membership = list(range(num_nodes)) else: # Running spectral clustering node_membership = spectral_cluster(agg_adj, num_classes=num_classes) count_matrix = event_dict_to_block_pair_event_counts( event_dict, node_membership, num_classes) bp_lambda = estimate_poisson_lambda(count_matrix, node_membership, duration, num_classes, default_lambda=1e-10 / duration) # Printing information about the fit if verbose: _, block_count = np.unique(node_membership, return_counts=True) class_prob = block_count / sum(block_count) print(f"Membership percentage: ", class_prob) print("Lambda:") print(bp_lambda) return node_membership, bp_lambda, count_matrix
def test_spectral_clustering_on_generative_model(n, t, k): params = {'number_of_nodes': n, 'end_time': t, 'class_probabilities': np.ones(k) / k, 'alpha': 0.06, 'beta': 0.08, 'mu_diag': 0.085, 'mu_off_diag': 0.065, 'scale': False, 'n_cores': 1} event_dict, true_class_assignments = utils.simulate_community_hawkes(params) # Spectral clustering on aggregated adjacency matrix agg_adj = utils.event_dict_to_aggregated_adjacency(len(true_class_assignments), event_dict) agg_adj_pred = spectral_cluster(agg_adj, num_classes=k) agg_adj_sc_rand = adjusted_rand_score(true_class_assignments, agg_adj_pred) return agg_adj_sc_rand
def calc_mean_and_error_of_count_estiamte(n_nodes): params = { 'number_of_nodes': n_nodes, 'class_probabilities': class_probs, 'end_time': end_time, 'mu_diag': mu_diag, 'mu_off_diag': mu_off_diag, 'alpha': alpha_off_diag, 'alpha_diag': alpha_diag, 'beta': beta_off_diag, 'beta_diag': beta_diag, 'scale': False } event_dict, true_node_membership = utils.simulate_community_hawkes(params) invalid_cluster = True while invalid_cluster: # Spectral clustering on aggregated adjacency matrix agg_adj = utils.event_dict_to_aggregated_adjacency(n_nodes, event_dict) node_membership = spectral_cluster(agg_adj, num_classes=n_classes, verbose=False) unique_vals, cnts = np.unique(node_membership, return_counts=True) invalid_cluster = len(unique_vals) != n_classes if len(unique_vals) != n_classes: print(unique_vals, cnts) sc_rand = adjusted_rand_score(true_node_membership, node_membership) sc_rand = np.zeros( (n_classes, n_classes )) + sc_rand # match the shape of other params to retrieve easily # param estimation with estimated communities bp_mu, bp_alpha, bp_beta, bp_alpha_beta_ratio = model_utils.estimate_bp_hawkes_params( event_dict, node_membership, end_time, n_classes) # param estimation with known communities. k_ is for known_ k_bp_mu, k_bp_alpha, k_bp_beta, k_bp_alpha_beta_ratio = model_utils.estimate_bp_hawkes_params( event_dict, true_node_membership, end_time, n_classes) return bp_mu, bp_alpha_beta_ratio, bp_alpha, bp_beta, sc_rand, k_bp_mu, k_bp_alpha_beta_ratio, k_bp_alpha, k_bp_beta
train_num_nodes, train_event_dict) if not use_agg_adj: train_adj = utils.event_dict_to_adjacency(train_num_nodes, train_event_dict) toc = time.time() if verbose: print(f"Generated aggregated adj in {toc - tic:.1f}s") tic_tot = time.time() tic = time.time() # Running spectral clustering if use_agg_adj: train_node_membership = spectral_cluster(train_agg_adj, num_classes=num_classes, verbose=False, plot_eigenvalues=False) else: train_node_membership = spectral_cluster(train_adj, num_classes=num_classes, verbose=False, plot_eigenvalues=False) toc = time.time() print(f"Spectral clustering done in {toc - tic:.1f}s") if verbose: print( "Community assignment prob:", np.unique(train_node_membership, return_counts=True)[1] / train_num_nodes)
def main(): r = 5 # number of random initial d = 2 # reduced dimension k = 2 # number of cluster N = 200 # sample size part = input("Input part A/B \n") number = input("Input number 1~6 \n") if (part == 'A'): data = sio.loadmat('HW3_Data/dataset1.mat') Y = data['Y'] if (number == 1): plt.close() x1 = Y[0, :] y1 = Y[1, :] plt.scatter(x1, y1) plt.show() plt.close() if (number == 2): plt.close() x1 = Y[0, :] y1 = Y[2, :] plt.scatter(x1, y1) plt.show() if (number == 3): plt.close() u, y_reduced = pca.pca(Y, 2) x = y_reduced[0, :] y = y_reduced[1, :] plt.scatter(np.asarray(x), np.asarray(y)) plt.show() if (number == 4): plt.close() result = kmeans.k_means(np.matrix(Y), k, r) x1 = [] y1 = [] x2 = [] y2 = [] U, y_2d = pca.pca(Y, d) for i in range(N): if (result[i] == 0): x1.append(y_2d[0, i]) y1.append(y_2d[1, i]) else: x2.append(y_2d[0, i]) y2.append(y_2d[1, i]) plt.scatter(x1, y1, color='red') plt.scatter(x2, y2, color='blue') plt.show() if (number == 5): plt.close() U, y_2d = pca.pca(Y, d) result = kmeans.k_means(y_2d, k, r) x1 = [] y1 = [] x2 = [] y2 = [] for i in range(200): if (result[i] == 0): x1.append(y_2d[0, i]) y1.append(y_2d[1, i]) else: x2.append(y_2d[0, i]) y2.append(y_2d[1, i]) plt.scatter(x1, y1, color='red') plt.scatter(x2, y2, color='blue') plt.show() if (part == 'B'): data = sio.loadmat('HW3_Data/dataset2.mat') Y = data['Y'] if (number == 1): x1 = Y[0, :] y1 = Y[1, :] plt.scatter(x1, y1) plt.show() if (number == 2): x1 = Y[0, :] y1 = Y[2, :] plt.scatter(x1, y1) plt.show() if (number == 3): u, y_reduced = pca.pca(Y, 2) x = y_reduced[0, :] y = y_reduced[1, :] plt.scatter(np.asarray(x), np.asarray(y)) plt.show() if (number == 4): U, y_2d = pca.pca(Y, d) result = kmeans.k_means(np.matrix(y_2d), k, r) x1 = [] y1 = [] x2 = [] y2 = [] for i in range(N): if (result[i] == 0): x1.append(y_2d[0, i]) y1.append(y_2d[1, i]) else: x2.append(y_2d[0, i]) y2.append(y_2d[1, i]) plt.scatter(x1, y1, color='red') plt.scatter(x2, y2, color='blue') plt.show() if (number == 5): kernel = pca.get_kernel(Y) u = pca.kernel_pca(kernel, d) y_reduced = np.matrix(kernel * u) result = kmeans.k_means(y_reduced.T, k, r) x1 = [] y1 = [] x2 = [] y2 = [] #U, y_2d = pca.pca(Y, d) y_2d = y_reduced.T for i in range(N): if (result[i] == 0): x1.append(y_2d[0, i]) y1.append(y_2d[1, i]) else: x2.append(y_2d[0, i]) y2.append(y_2d[1, i]) plt.scatter(x1, y1, color='red') plt.scatter(x2, y2, color='blue') plt.show() if (number == 6): W = np.matrix(spectral.get_w_matrix(Y, 5, 1)) result = spectral.spectral_cluster(W, 2) x1 = [] y1 = [] x2 = [] y2 = [] U, x = pca.pca(Y, 2) y_2d = x for i in range(200): if (result[i] == 0): x1.append(y_2d[0, i]) y1.append(y_2d[1, i]) else: x2.append(y_2d[0, i]) y2.append(y_2d[1, i]) plt.scatter(x1, y1, color='red') plt.scatter(x2, y2, color='blue') plt.show()
def fit_community_model(event_dict, num_nodes, duration, num_classes, local_search_max_iter, local_search_n_cores, verbose=False): """ Fits CHIP model to a network. :param event_dict: Edge dictionary of events between all node pair. :param num_nodes: (int) Total number of nodes :param duration: (int) duration of the network :param num_classes: (int) number of blocks / classes :param local_search_max_iter: Maximum number of local search to be performed. If 0, no local search is done :param local_search_n_cores: Number of cores to parallelize local search. Only applicable if `local_search_max_iter` > 0 :param verbose: Prints fitted Block Hawkes parameters :return: node_membership, mu, alpha, beta, block_pair_events """ agg_adj = utils.event_dict_to_aggregated_adjacency(num_nodes, event_dict) # adj = utils.event_dict_to_adjacency(num_nodes, event_dict) # Running spectral clustering node_membership = spectral_cluster(agg_adj, num_classes, verbose=False) if local_search_max_iter > 0 and num_classes > 1: node_membership, bp_mu, bp_alpha, bp_beta = cls.chip_local_search( event_dict, num_classes, node_membership, duration, max_iter=local_search_max_iter, n_cores=local_search_n_cores, return_fitted_param=True, verbose=False) block_pair_events = utils.event_dict_to_block_pair_events( event_dict, node_membership, num_classes) else: bp_mu, bp_alpha_beta_ratio = estimate_utils.estimate_hawkes_from_counts( agg_adj, node_membership, duration, 1e-10 / duration) bp_beta = np.zeros((num_classes, num_classes), dtype=np.float) block_pair_events = utils.event_dict_to_block_pair_events( event_dict, node_membership, num_classes) for b_i in range(num_classes): for b_j in range(num_classes): bp_size = len(np.where(node_membership == b_i)[0]) * len( np.where(node_membership == b_j)[0]) if b_i == b_j: bp_size -= len(np.where(node_membership == b_i)[0]) bp_beta[b_i, b_j], _ = estimate_utils.estimate_beta_from_events( block_pair_events[b_i][b_j], bp_mu[b_i, b_j], bp_alpha_beta_ratio[b_i, b_j], duration, bp_size) bp_alpha = bp_alpha_beta_ratio * bp_beta # Printing information about the fit if verbose: _, block_count = np.unique(node_membership, return_counts=True) class_prob = block_count / sum(block_count) print(f"Membership percentage: ", class_prob) print("Mu:") print(bp_mu) print("\nAlpha:") print(bp_alpha) print("\nBeta:") print(bp_beta) return node_membership, bp_mu, bp_alpha, bp_beta, block_pair_events
# fit Facebook Wall-posts if fit_chip: tic = time.time() agg_adj = utils.event_dict_to_aggregated_adjacency(fb_num_node, fb_event_dict) adj = utils.event_dict_to_adjacency(fb_num_node, fb_event_dict) toc = time.time() if verbose: print(f"Generated aggregated adj in {toc - tic:.1f}s") tic_tot = time.time() tic = time.time() # Running spectral clustering node_membership = spectral_cluster(agg_adj, num_classes=10, verbose=False, plot_eigenvalues=True) toc = time.time() print(f"Spectral clustering done in {toc - tic:.1f}s") if verbose: print("Community assignment prob:", np.unique(node_membership, return_counts=True)[1] / fb_num_node) tic = time.time() bp_mu, bp_alpha_beta_ratio = estimate_utils.estimate_hawkes_from_counts( agg_adj, node_membership, fb_duration, 1e-10 / fb_duration) toc = time.time()
params = { 'number_of_nodes': n_nodes, 'alpha': 0.6, 'beta': 0.8, 'mu_off_diag': 0.8, 'mu_diag': 1.6, 'end_time': duration, 'class_probabilities': np.ones(n_classes) / n_classes, 'n_cores': -1 } event_dict, true_class_assignments = utils.simulate_community_hawkes( params, network_name="local_seach_test_networks", load_if_exists=False) agg_adj = utils.event_dict_to_aggregated_adjacency(n_nodes, event_dict) spectral_node_membership = spectral_cluster(agg_adj, num_classes=n_classes) sc_rand = adjusted_rand_score(true_class_assignments, spectral_node_membership) print(f"SC Rand index: {sc_rand:.3f}") print("Parallel") tic = time.time() local_search_node_membership = chip_local_search(event_dict, n_classes, spectral_node_membership, duration, max_iter=10, n_cores=34, verbose=True) toc = time.time() print(f"local search took {toc - tic:.2f}s.")
def fit_community_model(event_dict, num_nodes, duration, num_classes, local_search_max_iter, local_search_n_cores, verbose=False): """ Fits CHIP model to a network. :param event_dict: Edge dictionary of events between all node pair. :param num_nodes: (int) Total number of nodes :param duration: (int) duration of the network :param num_classes: (int) number of blocks / classes :param local_search_max_iter: Maximum number of local search to be performed. If 0, no local search is done :param local_search_n_cores: Number of cores to parallelize local search. Only applicable if `local_search_max_iter` > 0 :param verbose: Prints fitted Block Hawkes parameters :return: node_membership, mu, alpha, beta, block_pair_events """ agg_adj = utils.event_dict_to_aggregated_adjacency(num_nodes, event_dict) # adj = utils.event_dict_to_adjacency(num_nodes, event_dict) # Running spectral clustering node_membership = spectral_cluster(agg_adj, num_classes, verbose=False, plot_eigenvalues=False) if local_search_max_iter > 0 and num_classes > 1: node_membership, bp_mu, bp_alpha, bp_beta = cls.chip_local_search( event_dict, num_classes, node_membership, duration, max_iter=local_search_max_iter, n_cores=local_search_n_cores, return_fitted_param=True, verbose=False) block_pair_events = utils.event_dict_to_block_pair_events( event_dict, node_membership, num_classes) else: (bp_mu, bp_alpha, bp_beta, bp_alpha_beta_ratio, block_pair_events) = estimate_bp_hawkes_params( event_dict, node_membership, duration, num_classes, agg_adj=agg_adj, return_block_pair_events=True) # Printing information about the fit if verbose: _, block_count = np.unique(node_membership, return_counts=True) class_prob = block_count / sum(block_count) print(f"Membership percentage: ", class_prob) print("Mu:") print(bp_mu) print("\nAlpha:") print(bp_alpha) print("\nBeta:") print(bp_beta) return node_membership, bp_mu, bp_alpha, bp_beta, block_pair_events