示例#1
0
def calc_full_log_likelihood(count_matrix,
                             node_membership,
                             duration,
                             bp_lambda,
                             num_classes,
                             add_com_assig_log_prob=True):
    """
    Calculates the full log likelihood of the Poisson baseline model.

    :param count_matrix: n_classes x n_classes where entry ij is denotes the number of events in block-pair ij
    :param node_membership: (list) membership of every node to one of K classes
    :param duration: (int) duration of the network
    :param bp_lambda: n_classes x n_classes where entry ij is the lambda of the block pair ij
    :param num_classes: (int) number of blocks / classes
    :param add_com_assig_log_prob: if True, adds the likelihood the community assignment to the total log-likelihood.

    :return: log-likelihood of the Poisson baseline model
    """
    log_likelihood = 0

    bp_size = utils.calc_block_pair_size(node_membership, num_classes)
    bp_ll = count_matrix * np.log(bp_lambda) - (bp_lambda * duration * bp_size)
    log_likelihood += np.sum(bp_ll)

    if add_com_assig_log_prob:
        # Adding the log probability of the community assignments to the full log likelihood
        n_nodes = len(node_membership)
        _, block_count = np.unique(node_membership, return_counts=True)
        class_prob_mle = block_count / sum(block_count)
        rv_multi = multinomial(n_nodes, class_prob_mle)
        log_prob_community_assignment = rv_multi.logpmf(block_count)

        log_likelihood += log_prob_community_assignment

    return log_likelihood
示例#2
0
def compute_prediction_mean_and_variance_for_block_pair_event_count(
        train_bp_mu, train_bp_alpha_beta_ratio, test_block_pair_events,
        train_node_membership, n_classes, train_duration, test_duration):
    """
    Computes sample mean and variance of block pair event counts

    :param test_block_pair_events: (list) n_classes x n_classes where entry ij is a list of event lists between nodes in
                                    block i to nodes in block j of the test dataset.
    :param train_node_membership: (list) membership of every node to one of K classes in the train dataset.
    :param n_classes: (int) number of blocks / classes
    :param train_duration: duration of the train dataset
    :param test_duration: duration of the test dataset

    :return:
    """
    train_bp_size = utils.calc_block_pair_size(train_node_membership,
                                               n_classes)

    sample_mean = (train_bp_mu * train_duration) / (1 -
                                                    train_bp_alpha_beta_ratio)
    sample_mean = (sample_mean /
                   train_duration) * test_duration * train_bp_size

    sample_var = (train_bp_mu * train_duration) / (
        (1 - train_bp_alpha_beta_ratio)**3)
    sample_var = (sample_var / train_duration) * test_duration * train_bp_size

    test_block_pair_event_count = compute_block_pair_total_event_count(
        test_block_pair_events, n_classes)

    return sample_mean, sample_var, test_block_pair_event_count
示例#3
0
def compute_block_pair_event_count_empirical_mean_and_variance(
        block_pair_events, node_membership, n_classes):
    """
    Computes the mean and variance of block pair event counts

    :param block_pair_events: (list) n_classes x n_classes where entry ij is a list of event lists between nodes in
                              block i to nodes in block j.
    :param node_membership: (list) membership of every node to one of K classes.
    :param n_classes: (int) number of blocks / classes

    :return: a tuple of two matrices of KxK for mean and variance of block pair event counts
    """

    bp_size = utils.calc_block_pair_size(node_membership,
                                         n_classes).astype(int)

    block_pair_events_counts_mean = np.zeros((n_classes, n_classes))
    block_pair_events_counts_variance = np.zeros((n_classes, n_classes))
    for i in range(n_classes):
        for j in range(n_classes):
            temp_counts = [
                len(event_list) for event_list in block_pair_events[i][j]
            ]  # actual counts
            temp_counts.extend(
                [0] *
                (bp_size[i, j] -
                 len(temp_counts)))  # add 0's for node-pairs with no events

            block_pair_events_counts_mean[i, j] = np.mean(temp_counts)
            block_pair_events_counts_variance[i, j] = np.std(temp_counts)**2

    return block_pair_events_counts_mean, block_pair_events_counts_variance
示例#4
0
def compute_mu_pairwise_difference_confidence_interval(
        event_dict, node_membership, num_classes, mu, duration,
        block_pair_tuple_list, z_alpha):
    """
    Computes the pairwise difference if mu along with its confidence interval

    :param event_dict: Edge dictionary of events between all node pair.
    :param node_membership: (list) membership of every node to one of K classes.
    :param num_classes: (int) number of blocks / classes
    :param mu: KxK matrix of mu values for each block pair
    :param duration: the duration of the network
    :param block_pair_tuple_list: (list) of tuples for pairwise difference [(1, 1, 1, 2), (1, 1, 2, 1)]
    :param z_alpha: significance level (resulting in (1 - z_alpha) * 100 % CI)

    :return: dict with passed tuples as keys and a tuple of (difference, CI) as value
    """
    num_nodes = len(node_membership)
    agg_adj = utils.event_dict_to_aggregated_adjacency(num_nodes, event_dict)

    sample_mean, sample_var = estimate_utils.compute_sample_mean_and_variance(
        agg_adj, node_membership)
    bp_size = utils.calc_block_pair_size(node_membership, num_classes)

    z = 1 - (z_alpha / (4 * (num_classes - 1) * num_classes))
    ci_percentile = norm.ppf(1 - ((1 - z) / 2))

    pairwise_res_dict = {}
    for a, b, x, y in block_pair_tuple_list:
        diff = mu[a, b] - mu[x, y]
        sqroot = np.sqrt((9 / 4) * ((sample_mean[a, b] / bp_size[a, b]) +
                                    (sample_mean[x, y] / bp_size[x, y])))
        ci = ci_percentile * (1 / duration) * sqroot
        pairwise_res_dict[(a, b, x, y)] = (diff, ci)

    return pairwise_res_dict
示例#5
0
def compute_mu_and_m_confidence_interval(event_dict, node_membership,
                                         num_classes, z_alpha, duration):
    """
    Computes the confidence interval for mu and m (alpha to beta ratio)

    :param event_dict: Edge dictionary of events between all node pair.
    :param node_membership: (list) membership of every node to one of K classes.
    :param num_classes: (int) number of blocks / classes
    :param z_alpha: significance level (resulting in (1 - z_alpha) * 100 % CI)
    :param duration: the duration of the network

    :return: matrix of KxK confidence interval for mu and m
    """
    num_nodes = len(node_membership)
    agg_adj = utils.event_dict_to_aggregated_adjacency(num_nodes, event_dict)

    sample_mean, sample_var = estimate_utils.compute_sample_mean_and_variance(
        agg_adj, node_membership)
    bp_size = utils.calc_block_pair_size(node_membership, num_classes)

    z = 1 - (z_alpha / (2 * (num_classes**2)))
    ci_percentile = norm.ppf(1 - ((1 - z) / 2))

    mu_ci = ci_percentile * np.sqrt((9 * sample_mean) / (4 * bp_size))
    mu_ci /= duration

    m_ci = ci_percentile * np.sqrt(1 / (4 * bp_size * sample_mean))

    return mu_ci, m_ci
示例#6
0
def calc_full_log_likelihood(block_pair_events,
                             node_membership,
                             bp_mu,
                             bp_alpha,
                             bp_beta,
                             duration,
                             num_classes,
                             add_com_assig_log_prob=True):
    """
    Calculates the full log likelihood of the CHIP model.

    :param block_pair_events: (list) n_classes x n_classes where entry ij is a list of event lists between nodes in
                              block i to nodes in block j.
    :param node_membership: (list) membership of every node to one of K classes.
    :param bp_mu: n_classes x n_classes where entry ij is the mu of the block pair ij
    :param bp_alpha: n_classes x n_classes where entry ij is the alpha of the block pair ij
    :param bp_beta: n_classes x n_classes where entry ij is the beta of the block pair ij
    :param duration: (int) duration of the network
    :param num_classes: (int) number of blocks / classes
    :param add_com_assig_log_prob: if True, adds the likelihood the community assignment to the total log-likelihood.

    :return: log-likelihood of the CHIP model
    """

    log_likelihood = 0
    bp_size = utils.calc_block_pair_size(node_membership, num_classes)
    for b_i in range(num_classes):
        for b_j in range(num_classes):
            log_likelihood += estimate_utils.block_pair_full_hawkes_log_likelihood(
                block_pair_events[b_i][b_j],
                bp_mu[b_i, b_j],
                bp_alpha[b_i, b_j],
                bp_beta[b_i, b_j],
                duration,
                block_pair_size=bp_size[b_i, b_j])

    if add_com_assig_log_prob:
        # Adding the log probability of the community assignments to the full log likelihood
        n_nodes = len(node_membership)
        _, block_count = np.unique(node_membership, return_counts=True)
        class_prob_mle = block_count / sum(block_count)
        rv_multi = multinomial(n_nodes, class_prob_mle)
        log_prob_community_assignment = rv_multi.logpmf(block_count)

        log_likelihood += log_prob_community_assignment

    return log_likelihood
示例#7
0
def estimate_bp_hawkes_params(event_dict,
                              node_membership,
                              duration,
                              num_classes,
                              agg_adj=None,
                              return_block_pair_events=False):
    """
    Estimate CHIP Hawkes parameters.

    :param event_dict: Edge dictionary of events between all node pair.
    :param node_membership: (list) membership of every node to one of K classes.
    :param duration: (int) duration of the network
    :param num_classes: (int) number of blocks / classes
    :param agg_adj: (optional) np array (num_nodes x num_nodes) Adjacency matrix where element ij denotes the
                    number of events between nodes i an j. If None, this will be calculated.
    :param return_block_pair_events: (bool) If True, returns the return_block_pair_events

    :return: parameters of the CHIP model -> mu, alpha, beta, m
    """

    if agg_adj is None:
        num_nodes = len(node_membership)
        agg_adj = utils.event_dict_to_aggregated_adjacency(
            num_nodes, event_dict)

    bp_mu, bp_alpha_beta_ratio = estimate_utils.estimate_hawkes_from_counts(
        agg_adj, node_membership, duration, 1e-10 / duration)

    bp_beta = np.zeros((num_classes, num_classes), dtype=np.float)
    block_pair_events = utils.event_dict_to_block_pair_events(
        event_dict, node_membership, num_classes)
    bp_size = utils.calc_block_pair_size(node_membership, num_classes)

    for b_i in range(num_classes):
        for b_j in range(num_classes):
            bp_beta[b_i, b_j], _ = estimate_utils.estimate_beta_from_events(
                block_pair_events[b_i][b_j], bp_mu[b_i, b_j],
                bp_alpha_beta_ratio[b_i, b_j], duration, bp_size[b_i, b_j])

    bp_alpha = bp_alpha_beta_ratio * bp_beta

    if return_block_pair_events:
        return bp_mu, bp_alpha, bp_beta, bp_alpha_beta_ratio, block_pair_events

    return bp_mu, bp_alpha, bp_beta, bp_alpha_beta_ratio
示例#8
0
def estimate_poisson_lambda(count_matrix,
                            node_membership,
                            duration,
                            num_classes,
                            default_lambda=1e-10):
    """
    Estimate lambda for all block pairs.

    :param count_matrix: n_classes x n_classes where entry ij is denotes the number of events in block-pair ij
    :param node_membership: (list) membership of every node to one of K classes.
    :param duration: (int) duration of the network
    :param default_lambda: default value for lambda if there are no events in a block pair to estimate lambda
    :return: n_classes x n_classes where entry ij is the lambda of the block pair ij
    """
    bp_size = utils.calc_block_pair_size(node_membership, num_classes)
    # if a block only has 1 node in it, its own bp_size will be 0.
    # But since count_matrix will be zero setting to 1 won't change the outcome.
    bp_size[bp_size == 0] = 1
    bp_lambda = count_matrix / (duration * bp_size)
    bp_lambda[bp_lambda == 0] = default_lambda

    return bp_lambda