Пример #1
0
def test_spectral_clustering_on_generative_model(scalar):
    params = {
        'alpha': 0.05,
        'beta': 0.08,
        'mu_diag': 0.00075 * scalar,
        'mu_off_diag': 0.00035 if sim_type == 'b' else 0.00035 * scalar,
        'scale': False,
        'number_of_nodes': 256
    }

    event_dict, true_class_assignments = utils.simulate_community_hawkes(
        params)
    num_nodes = len(true_class_assignments)
    # Spectral clustering on aggregated adjacency matrix
    agg_adj = utils.event_dict_to_aggregated_adjacency(num_nodes, event_dict)
    agg_adj_pred = spectral_cluster(agg_adj, num_classes=n_classes)
    agg_adj_sc_rand = adjusted_rand_score(true_class_assignments, agg_adj_pred)

    if not also_use_unweighted_adjacency:
        return agg_adj_sc_rand

    # Spectral clustering on aggregated adjacency matrix
    adj = utils.event_dict_to_adjacency(num_nodes, event_dict)
    adj_pred = spectral_cluster(adj, num_classes=n_classes)
    adj_sc_rand = adjusted_rand_score(true_class_assignments, adj_pred)

    return agg_adj_sc_rand, adj_sc_rand, np.sum(adj) / (num_nodes**2)
def test_spectral_clustering_on_generative_model(n_nodes):
    if agg_adj_should_fail:
        params = {
            'number_of_nodes': n_nodes,
            'alpha': 7.0,
            'beta': 8.0,
            'mu_off_diag': 0.001,
            'mu_diag': 0.002,
            'scale': False,
            'end_time': 400,
            'class_probabilities': class_prob,
            'n_cores': chip_n_cores
        }
    else:
        params = {
            'number_of_nodes': n_nodes,
            'alpha': 0.001,
            'beta': 0.008,
            'mu_off_diag': 0.001,
            'mu_diag': 0.001,
            # 'mu_diag': 0.002,
            'alpha_diag': 0.006,
            'scale': False,
            'end_time': 400,
            'class_probabilities': class_prob,
            'n_cores': chip_n_cores
        }

    # event_dict, true_class_assignments = utils.simulate_community_hawkes(
    #     params, network_name="10-block-10k-nodes-higher-mu-diff")

    event_dict, true_class_assignments = utils.simulate_community_hawkes(
        params)

    # Spectral clustering on adjacency matrix
    adj = utils.event_dict_to_adjacency(n_nodes, event_dict)
    adj_sc_pred = spectral_cluster(adj, num_classes=n_classes, verbose=False)
    adj_sc_rand = adjusted_rand_score(true_class_assignments, adj_sc_pred)

    # Spectral clustering on aggregated adjacency matrix
    agg_adj = utils.event_dict_to_aggregated_adjacency(n_nodes, event_dict)
    agg_adj_pred = spectral_cluster(agg_adj,
                                    num_classes=n_classes,
                                    verbose=False)
    agg_adj_sc_rand = adjusted_rand_score(true_class_assignments, agg_adj_pred)

    return adj_sc_rand, agg_adj_sc_rand
Пример #3
0
def fit_poisson_baseline_model(event_dict,
                               num_nodes,
                               duration,
                               num_classes,
                               verbose=False):
    """
    Fits a Poisson baseline model to a network.

    :param event_dict: Edge dictionary of events between all node pair.
    :param num_nodes: (int) Total number of nodes
    :param duration: (int) duration of the network
    :param num_classes: (int) number of blocks / classes
    :param verbose: Prints fitted Poisson baseline parameters

    :return: node_membership, lambda, block_pair_events
    """
    agg_adj = utils.event_dict_to_aggregated_adjacency(num_nodes, event_dict)

    # if number of there are as many classes as nodes, assign each node to its own class
    if num_classes == num_nodes:
        node_membership = list(range(num_nodes))
    else:
        # Running spectral clustering
        node_membership = spectral_cluster(agg_adj, num_classes=num_classes)

    count_matrix = event_dict_to_block_pair_event_counts(
        event_dict, node_membership, num_classes)

    bp_lambda = estimate_poisson_lambda(count_matrix,
                                        node_membership,
                                        duration,
                                        num_classes,
                                        default_lambda=1e-10 / duration)

    # Printing information about the fit
    if verbose:
        _, block_count = np.unique(node_membership, return_counts=True)
        class_prob = block_count / sum(block_count)

        print(f"Membership percentage: ", class_prob)

        print("Lambda:")
        print(bp_lambda)

    return node_membership, bp_lambda, count_matrix
Пример #4
0
def test_spectral_clustering_on_generative_model(n, t, k):
    params = {'number_of_nodes': n,
              'end_time': t,
              'class_probabilities': np.ones(k) / k,
              'alpha': 0.06,
              'beta': 0.08,
              'mu_diag': 0.085,
              'mu_off_diag': 0.065,
              'scale': False,
              'n_cores': 1}

    event_dict, true_class_assignments = utils.simulate_community_hawkes(params)

    # Spectral clustering on aggregated adjacency matrix
    agg_adj = utils.event_dict_to_aggregated_adjacency(len(true_class_assignments), event_dict)
    agg_adj_pred = spectral_cluster(agg_adj, num_classes=k)
    agg_adj_sc_rand = adjusted_rand_score(true_class_assignments, agg_adj_pred)

    return agg_adj_sc_rand
def calc_mean_and_error_of_count_estiamte(n_nodes):
    params = {
        'number_of_nodes': n_nodes,
        'class_probabilities': class_probs,
        'end_time': end_time,
        'mu_diag': mu_diag,
        'mu_off_diag': mu_off_diag,
        'alpha': alpha_off_diag,
        'alpha_diag': alpha_diag,
        'beta': beta_off_diag,
        'beta_diag': beta_diag,
        'scale': False
    }

    event_dict, true_node_membership = utils.simulate_community_hawkes(params)

    invalid_cluster = True

    while invalid_cluster:
        # Spectral clustering on aggregated adjacency matrix
        agg_adj = utils.event_dict_to_aggregated_adjacency(n_nodes, event_dict)
        node_membership = spectral_cluster(agg_adj,
                                           num_classes=n_classes,
                                           verbose=False)
        unique_vals, cnts = np.unique(node_membership, return_counts=True)
        invalid_cluster = len(unique_vals) != n_classes
        if len(unique_vals) != n_classes:
            print(unique_vals, cnts)

    sc_rand = adjusted_rand_score(true_node_membership, node_membership)
    sc_rand = np.zeros(
        (n_classes, n_classes
         )) + sc_rand  # match the shape of other params to retrieve easily

    # param estimation with estimated communities
    bp_mu, bp_alpha, bp_beta, bp_alpha_beta_ratio = model_utils.estimate_bp_hawkes_params(
        event_dict, node_membership, end_time, n_classes)
    # param estimation with known communities. k_ is for known_
    k_bp_mu, k_bp_alpha, k_bp_beta, k_bp_alpha_beta_ratio = model_utils.estimate_bp_hawkes_params(
        event_dict, true_node_membership, end_time, n_classes)
    return bp_mu, bp_alpha_beta_ratio, bp_alpha, bp_beta, sc_rand, k_bp_mu, k_bp_alpha_beta_ratio, k_bp_alpha, k_bp_beta
        train_num_nodes, train_event_dict)

    if not use_agg_adj:
        train_adj = utils.event_dict_to_adjacency(train_num_nodes,
                                                  train_event_dict)
    toc = time.time()

    if verbose:
        print(f"Generated aggregated adj in {toc - tic:.1f}s")

    tic_tot = time.time()
    tic = time.time()
    # Running spectral clustering
    if use_agg_adj:
        train_node_membership = spectral_cluster(train_agg_adj,
                                                 num_classes=num_classes,
                                                 verbose=False,
                                                 plot_eigenvalues=False)
    else:
        train_node_membership = spectral_cluster(train_adj,
                                                 num_classes=num_classes,
                                                 verbose=False,
                                                 plot_eigenvalues=False)
    toc = time.time()

    print(f"Spectral clustering done in {toc - tic:.1f}s")

    if verbose:
        print(
            "Community assignment prob:",
            np.unique(train_node_membership, return_counts=True)[1] /
            train_num_nodes)
Пример #7
0
def main():
    r = 5  # number of random initial
    d = 2  # reduced dimension
    k = 2  # number of cluster
    N = 200  # sample size
    part = input("Input part A/B \n")
    number = input("Input number 1~6 \n")

    if (part == 'A'):
        data = sio.loadmat('HW3_Data/dataset1.mat')
        Y = data['Y']
        if (number == 1):
            plt.close()
            x1 = Y[0, :]
            y1 = Y[1, :]
            plt.scatter(x1, y1)
            plt.show()
            plt.close()
        if (number == 2):
            plt.close()
            x1 = Y[0, :]
            y1 = Y[2, :]
            plt.scatter(x1, y1)
            plt.show()
        if (number == 3):
            plt.close()
            u, y_reduced = pca.pca(Y, 2)
            x = y_reduced[0, :]
            y = y_reduced[1, :]
            plt.scatter(np.asarray(x), np.asarray(y))
            plt.show()
        if (number == 4):
            plt.close()
            result = kmeans.k_means(np.matrix(Y), k, r)
            x1 = []
            y1 = []
            x2 = []
            y2 = []
            U, y_2d = pca.pca(Y, d)
            for i in range(N):
                if (result[i] == 0):
                    x1.append(y_2d[0, i])
                    y1.append(y_2d[1, i])
                else:
                    x2.append(y_2d[0, i])
                    y2.append(y_2d[1, i])

            plt.scatter(x1, y1, color='red')
            plt.scatter(x2, y2, color='blue')
            plt.show()
        if (number == 5):
            plt.close()
            U, y_2d = pca.pca(Y, d)
            result = kmeans.k_means(y_2d, k, r)

            x1 = []
            y1 = []
            x2 = []
            y2 = []

            for i in range(200):
                if (result[i] == 0):
                    x1.append(y_2d[0, i])
                    y1.append(y_2d[1, i])
                else:
                    x2.append(y_2d[0, i])
                    y2.append(y_2d[1, i])

            plt.scatter(x1, y1, color='red')
            plt.scatter(x2, y2, color='blue')
            plt.show()

    if (part == 'B'):
        data = sio.loadmat('HW3_Data/dataset2.mat')
        Y = data['Y']
        if (number == 1):
            x1 = Y[0, :]
            y1 = Y[1, :]
            plt.scatter(x1, y1)
            plt.show()

        if (number == 2):

            x1 = Y[0, :]
            y1 = Y[2, :]
            plt.scatter(x1, y1)
            plt.show()
        if (number == 3):

            u, y_reduced = pca.pca(Y, 2)
            x = y_reduced[0, :]
            y = y_reduced[1, :]
            plt.scatter(np.asarray(x), np.asarray(y))
            plt.show()

        if (number == 4):

            U, y_2d = pca.pca(Y, d)
            result = kmeans.k_means(np.matrix(y_2d), k, r)
            x1 = []
            y1 = []
            x2 = []
            y2 = []

            for i in range(N):
                if (result[i] == 0):
                    x1.append(y_2d[0, i])
                    y1.append(y_2d[1, i])
                else:
                    x2.append(y_2d[0, i])
                    y2.append(y_2d[1, i])

            plt.scatter(x1, y1, color='red')
            plt.scatter(x2, y2, color='blue')
            plt.show()

        if (number == 5):
            kernel = pca.get_kernel(Y)
            u = pca.kernel_pca(kernel, d)
            y_reduced = np.matrix(kernel * u)
            result = kmeans.k_means(y_reduced.T, k, r)

            x1 = []
            y1 = []
            x2 = []
            y2 = []

            #U, y_2d = pca.pca(Y, d)
            y_2d = y_reduced.T
            for i in range(N):
                if (result[i] == 0):
                    x1.append(y_2d[0, i])
                    y1.append(y_2d[1, i])
                else:
                    x2.append(y_2d[0, i])
                    y2.append(y_2d[1, i])

            plt.scatter(x1, y1, color='red')
            plt.scatter(x2, y2, color='blue')
            plt.show()

        if (number == 6):
            W = np.matrix(spectral.get_w_matrix(Y, 5, 1))
            result = spectral.spectral_cluster(W, 2)

            x1 = []
            y1 = []
            x2 = []
            y2 = []

            U, x = pca.pca(Y, 2)
            y_2d = x
            for i in range(200):
                if (result[i] == 0):
                    x1.append(y_2d[0, i])
                    y1.append(y_2d[1, i])
                else:
                    x2.append(y_2d[0, i])
                    y2.append(y_2d[1, i])

            plt.scatter(x1, y1, color='red')
            plt.scatter(x2, y2, color='blue')
            plt.show()
def fit_community_model(event_dict,
                        num_nodes,
                        duration,
                        num_classes,
                        local_search_max_iter,
                        local_search_n_cores,
                        verbose=False):
    """
    Fits CHIP model to a network.

    :param event_dict: Edge dictionary of events between all node pair.
    :param num_nodes: (int) Total number of nodes
    :param duration: (int) duration of the network
    :param num_classes: (int) number of blocks / classes
    :param local_search_max_iter: Maximum number of local search to be performed. If 0, no local search is done
    :param local_search_n_cores: Number of cores to parallelize local search. Only applicable if
                                 `local_search_max_iter` > 0
    :param verbose: Prints fitted Block Hawkes parameters

    :return: node_membership, mu, alpha, beta, block_pair_events
    """

    agg_adj = utils.event_dict_to_aggregated_adjacency(num_nodes, event_dict)
    # adj = utils.event_dict_to_adjacency(num_nodes, event_dict)

    # Running spectral clustering
    node_membership = spectral_cluster(agg_adj, num_classes, verbose=False)

    if local_search_max_iter > 0 and num_classes > 1:
        node_membership, bp_mu, bp_alpha, bp_beta = cls.chip_local_search(
            event_dict,
            num_classes,
            node_membership,
            duration,
            max_iter=local_search_max_iter,
            n_cores=local_search_n_cores,
            return_fitted_param=True,
            verbose=False)

        block_pair_events = utils.event_dict_to_block_pair_events(
            event_dict, node_membership, num_classes)

    else:
        bp_mu, bp_alpha_beta_ratio = estimate_utils.estimate_hawkes_from_counts(
            agg_adj, node_membership, duration, 1e-10 / duration)
        bp_beta = np.zeros((num_classes, num_classes), dtype=np.float)

        block_pair_events = utils.event_dict_to_block_pair_events(
            event_dict, node_membership, num_classes)

        for b_i in range(num_classes):
            for b_j in range(num_classes):
                bp_size = len(np.where(node_membership == b_i)[0]) * len(
                    np.where(node_membership == b_j)[0])
                if b_i == b_j:
                    bp_size -= len(np.where(node_membership == b_i)[0])

                bp_beta[b_i,
                        b_j], _ = estimate_utils.estimate_beta_from_events(
                            block_pair_events[b_i][b_j], bp_mu[b_i, b_j],
                            bp_alpha_beta_ratio[b_i, b_j], duration, bp_size)

        bp_alpha = bp_alpha_beta_ratio * bp_beta

    # Printing information about the fit
    if verbose:
        _, block_count = np.unique(node_membership, return_counts=True)
        class_prob = block_count / sum(block_count)

        print(f"Membership percentage: ", class_prob)

        print("Mu:")
        print(bp_mu)

        print("\nAlpha:")
        print(bp_alpha)

        print("\nBeta:")
        print(bp_beta)

    return node_membership, bp_mu, bp_alpha, bp_beta, block_pair_events
# fit Facebook Wall-posts
if fit_chip:
    tic = time.time()
    agg_adj = utils.event_dict_to_aggregated_adjacency(fb_num_node,
                                                       fb_event_dict)
    adj = utils.event_dict_to_adjacency(fb_num_node, fb_event_dict)
    toc = time.time()

    if verbose:
        print(f"Generated aggregated adj in {toc - tic:.1f}s")

    tic_tot = time.time()
    tic = time.time()
    # Running spectral clustering
    node_membership = spectral_cluster(agg_adj,
                                       num_classes=10,
                                       verbose=False,
                                       plot_eigenvalues=True)

    toc = time.time()

    print(f"Spectral clustering done in {toc - tic:.1f}s")

    if verbose:
        print("Community assignment prob:",
              np.unique(node_membership, return_counts=True)[1] / fb_num_node)

    tic = time.time()
    bp_mu, bp_alpha_beta_ratio = estimate_utils.estimate_hawkes_from_counts(
        agg_adj, node_membership, fb_duration, 1e-10 / fb_duration)
    toc = time.time()
    params = {
        'number_of_nodes': n_nodes,
        'alpha': 0.6,
        'beta': 0.8,
        'mu_off_diag': 0.8,
        'mu_diag': 1.6,
        'end_time': duration,
        'class_probabilities': np.ones(n_classes) / n_classes,
        'n_cores': -1
    }

    event_dict, true_class_assignments = utils.simulate_community_hawkes(
        params, network_name="local_seach_test_networks", load_if_exists=False)

    agg_adj = utils.event_dict_to_aggregated_adjacency(n_nodes, event_dict)
    spectral_node_membership = spectral_cluster(agg_adj, num_classes=n_classes)
    sc_rand = adjusted_rand_score(true_class_assignments,
                                  spectral_node_membership)
    print(f"SC Rand index: {sc_rand:.3f}")

    print("Parallel")
    tic = time.time()
    local_search_node_membership = chip_local_search(event_dict,
                                                     n_classes,
                                                     spectral_node_membership,
                                                     duration,
                                                     max_iter=10,
                                                     n_cores=34,
                                                     verbose=True)
    toc = time.time()
    print(f"local search took {toc - tic:.2f}s.")
Пример #11
0
def fit_community_model(event_dict,
                        num_nodes,
                        duration,
                        num_classes,
                        local_search_max_iter,
                        local_search_n_cores,
                        verbose=False):
    """
    Fits CHIP model to a network.

    :param event_dict: Edge dictionary of events between all node pair.
    :param num_nodes: (int) Total number of nodes
    :param duration: (int) duration of the network
    :param num_classes: (int) number of blocks / classes
    :param local_search_max_iter: Maximum number of local search to be performed. If 0, no local search is done
    :param local_search_n_cores: Number of cores to parallelize local search. Only applicable if
                                 `local_search_max_iter` > 0
    :param verbose: Prints fitted Block Hawkes parameters

    :return: node_membership, mu, alpha, beta, block_pair_events
    """

    agg_adj = utils.event_dict_to_aggregated_adjacency(num_nodes, event_dict)
    # adj = utils.event_dict_to_adjacency(num_nodes, event_dict)

    # Running spectral clustering
    node_membership = spectral_cluster(agg_adj,
                                       num_classes,
                                       verbose=False,
                                       plot_eigenvalues=False)

    if local_search_max_iter > 0 and num_classes > 1:
        node_membership, bp_mu, bp_alpha, bp_beta = cls.chip_local_search(
            event_dict,
            num_classes,
            node_membership,
            duration,
            max_iter=local_search_max_iter,
            n_cores=local_search_n_cores,
            return_fitted_param=True,
            verbose=False)

        block_pair_events = utils.event_dict_to_block_pair_events(
            event_dict, node_membership, num_classes)

    else:
        (bp_mu, bp_alpha, bp_beta, bp_alpha_beta_ratio,
         block_pair_events) = estimate_bp_hawkes_params(
             event_dict,
             node_membership,
             duration,
             num_classes,
             agg_adj=agg_adj,
             return_block_pair_events=True)

    # Printing information about the fit
    if verbose:
        _, block_count = np.unique(node_membership, return_counts=True)
        class_prob = block_count / sum(block_count)

        print(f"Membership percentage: ", class_prob)

        print("Mu:")
        print(bp_mu)

        print("\nAlpha:")
        print(bp_alpha)

        print("\nBeta:")
        print(bp_beta)

    return node_membership, bp_mu, bp_alpha, bp_beta, block_pair_events