Пример #1
0
def jsd(freqs1, freqs2, validate=False):
    """
    Parameters
    ----------
    freqs1 : one dimensional array
        row vector frequencies, sum to 1
    freqs2 : one dimensional array
        row vector frequencies, sum to 1
    validate : bool
    Returns
    -------
    the mathematical calculation of Jensen–Shannon divergence
    between two probability distributions
    """
    # Convert input arrays into numpy arrays
    freqs1 = array(freqs1)
    freqs2 = array(freqs2)

    if validate:
        assert_equal(freqs1.shape,
                     freqs2.shape,
                     err_msg="freqs1/freqs2 mismatched shape")
        assert freqs1.ndim == 1, "freqs1 has incorrect dimension"
        assert freqs2.ndim == 1, "freqs2 has incorrect dimension"
        assert_allclose(sum(freqs1), 1, err_msg="invalid freqs1")
        assert_allclose(sum(freqs2), 1, err_msg="invalid freqs2")

    H_mn = safe_p_log_p(freqs1 / 2 + freqs2 / 2).sum()
    mn_H = sum([sum(i) for i in map(safe_p_log_p, [freqs1, freqs2])]) / 2
    return H_mn - mn_H
Пример #2
0
def jsd(freqs1, freqs2, validate=False):
    """calculate Jensen–Shannon divergence between two probability distributions

    Parameters
    ----------
    freqs1 : one dimensional array
        row vector frequencies, sum to 1
    freqs2 : one dimensional array
        row vector frequencies, sum to 1
    validate : bool

    """
    # Convert input arrays into numpy arrays
    freqs1 = array(freqs1)
    freqs2 = array(freqs2)

    if validate:
        assert_equal(freqs1.shape,
                     freqs2.shape,
                     err_msg="freqs1/freqs2 mismatched shape")
        assert freqs1.ndim == 1, "freqs1 has incorrect dimension"
        assert freqs2.ndim == 1, "freqs2 has incorrect dimension"
        try:
            validate_freqs_array(freqs1)
            validate_freqs_array(freqs2)
        except ValueError as err:
            raise AssertionError("freqs not valid") from err

    H_mn = safe_p_log_p(freqs1 / 2 + freqs2 / 2).sum()
    mn_H = sum([sum(i) for i in map(safe_p_log_p, [freqs1, freqs2])]) / 2
    return H_mn - mn_H
Пример #3
0
 def entropy_terms(self):
     """Returns
     -------
     entropies : array
          Has same dimension as self.array with
          safe log operation applied.
     """
     entropies = safe_p_log_p(self.array)
     return self.template.wrap(entropies)
Пример #4
0
 def test_safe_p_log_p(self):
     """safe_p_log_p: should handle pos/neg/zero/empty arrays"""
     # normal valid array
     a = array([[4, 0, 8], [2, 16, 4]])
     assert_equal(safe_p_log_p(a), array([[-8, 0, -24], [-2, -64, -8]]))
     # just zeros
     a = array([[0, 0], [0, 0]])
     assert_equal(safe_p_log_p(a), array([[0, 0], [0, 0]]))
     # negative number -- throw error
     with self.assertRaises(FloatingPointError):
         safe_p_log_p(array([-4]))
     # integer input, float output
     assert_allclose(safe_p_log_p(array([3])), array([-4.75488750]))
     # empty array
     assert_equal(safe_p_log_p(array([])), array([]))
Пример #5
0
def jsd(freqs1, freqs2, validate=False):
    """calculate Jensen–Shannon divergence between two probability distributions

    Parameters
    ----------
    freqs1 : one dimensional array
        row vector frequencies, sum to 1
    freqs2 : one dimensional array
        row vector frequencies, sum to 1
    validate : bool

    """
    # Convert input arrays into numpy arrays
    freqs1 = array(freqs1)
    freqs2 = array(freqs2)

    if validate:
        assert_equal(freqs1.shape,
                     freqs2.shape,
                     err_msg="freqs1/freqs2 mismatched shape")
        assert freqs1.ndim == 1, "freqs1 has incorrect dimension"
        assert freqs2.ndim == 1, "freqs2 has incorrect dimension"
        try:
            validate_freqs_array(freqs1)
            validate_freqs_array(freqs2)
        except ValueError as err:
            raise AssertionError("freqs not valid") from err

    H_mn = fsum(safe_p_log_p(freqs1 / 2 + freqs2 / 2))
    mn_H = fsum([fsum(i) for i in map(safe_p_log_p, [freqs1, freqs2])]) / 2
    jsd_ = H_mn - mn_H
    if jsd_ < 0 and isclose(jsd_, 0, atol=1e-10):
        jsd_ = 0
    elif jsd_ < 0:
        raise ArithmeticError(
            f"{jsd_} is negative and below defined precision threshold")

    return jsd_
Пример #6
0
 def entropy(self):
     """Shannon entropy per position using log2"""
     entropies = safe_p_log_p(self.array)
     return entropies.sum(axis=1)