def test_entropy(sigma: float, sample: np.ndarray, base: float = np.e):
    log_fun = _select_vectorized_log_fun_for_base(base)

    assert np.isclose(entropy_from_samples(sample, base=base, discrete=False),
                      entropy_of_normal_distribution(sigma, log_fun=log_fun),
                      rtol=1e-2,
                      atol=1e-2)
示例#2
0
def discrete_joint_entropy(sample_x: np.ndarray,
                           sample_y: np.ndarray,
                           base: float = np.e) -> float:
    """
    Approximate the joint entropy of x and y

       H(X, Y) = - E_{p_{x, y}} \left[ \log p_{x, y} (x, y) \right]

    from a sample of both distributions.

    Parameters
    ----------
    sample_x: a NumPy array of draws of variable x
    sample_y: a NumPy array of draws of variable y
    base: the base of the logarithm used to control the units of measurement for the result

    Returns
    -------
    The joint entropy between of x and y
    """

    log_fun = _select_vectorized_log_fun_for_base(base)
    sample_x, sample_y, n = _check_dimensions_of_two_variable_sample(
        sample_x, sample_y)

    unique_combinations_xy, counts_xy = \
        _construct_unique_combinations_and_counts_from_two_samples(sample_x, sample_y)

    joint_frequency = (1.0 / n) * counts_xy

    return -np.sum(joint_frequency * log_fun(joint_frequency))
def test_relative_entropy(base: float = np.e):
    log_fun = _select_vectorized_log_fun_for_base(base)

    assert np.isclose(relative_entropy_from_samples(sample_p,
                                                    sample_q,
                                                    base=base,
                                                    discrete=False),
                      relative_entropy_between_two_normal_distributions(
                          mu_p, sigma_p, mu_q, sigma_q, log_fun=log_fun),
                      rtol=1e-1,
                      atol=1e-1)
示例#4
0
def test_compare_slow_and_fast_implementations_of_relative_entropy(
        sample_p: np.ndarray, sample_q: np.ndarray, base: float):
    log_fun = _select_vectorized_log_fun_for_base(base)

    relative_entropy_from_slow_calculation = \
        _discrete_relative_entropy_slow(sample_p=sample_p,
                                        sample_q=sample_q,
                                        log_fun=log_fun)

    relative_entropy_from_fast_calculation = \
        discrete_relative_entropy(sample_p=sample_p,
                                  sample_q=sample_q,
                                  base=base)

    assert np.isclose(relative_entropy_from_slow_calculation,
                      relative_entropy_from_fast_calculation)
示例#5
0
def discrete_relative_entropy(sample_p: np.ndarray,
                              sample_q: np.ndarray,
                              base: float = np.e):
    """
    Approximate the relative entropy of the discrete distribution q relative to the discrete
    distribution p

                D_KL(p||q) = E_p [log(p/q)]

    from samples of these distributions.

    Parameters
    ----------
    sample_p: sample from the distribution p
    sample_q: sample from the distribution q
    base: the base of the logarithm used to control the units of measurement for the result

    Returns
    -------
    The relative entropy of the distribution q relative to the distribution p.

    """
    log_fun = _select_vectorized_log_fun_for_base(base)
    combined_sample = np.hstack((sample_p, sample_q))
    unique_combined = np.unique(combined_sample)

    unique_q, counts_q = np.unique(sample_q, return_counts=True)
    frequencies_q = counts_q / len(sample_q)

    unique_p, counts_p = np.unique(sample_p, return_counts=True)
    frequencies_p = counts_p / len(sample_p)

    combined_frequencies_p, combined_frequencies_q = \
        _construct_frequencies_for_two_samples(sorted_p_realizations=unique_p,
                                               sorted_q_realizations=unique_q,
                                               sorted_q_frequencies=frequencies_q,
                                               sorted_p_frequencies=frequencies_p,
                                               sorted_combined_realizations=unique_combined)

    return np.sum(combined_frequencies_p *
                  log_fun(combined_frequencies_p / combined_frequencies_q))
示例#6
0
def discrete_entropy(sample: np.ndarray, base: float = np.e) -> float:
    """
    Approximate the entropy of a discrete distribution

                H(p) = - E_p[log(p)]

    from a sample.

    Parameters
    ----------
    sample: a sample from the discrete distribution
    base: the base of the logarithm used to control the units of measurement for the result

    Returns
    -------
    An approximation of the entropy of the discrete distribution from which the sample is drawn.

    """
    log_fun = _select_vectorized_log_fun_for_base(base)
    frequencies = _construct_frequencies_for_one_sample(sample)
    return -np.sum(frequencies * log_fun(frequencies))
示例#7
0
def test_entropy(sample: np.ndarray, base: float):
    log_fun = _select_vectorized_log_fun_for_base(base)
    entropy_from_divergence = discrete_entropy(sample=sample, base=base)
    entropy_from_scipy = discrete_entropy_scipy(sample=sample, log_fun=log_fun)
    assert np.isclose(entropy_from_divergence, entropy_from_scipy)