示例#1
0
 def __init__(self, *args, lmbda_1=0., lmbda_2=0.):
     super().__init__(*args)
     self.y = asarray2d(self.y)
     if lmbda_1 <= 0:
         lmbda_1 = estimate_entropy(self.y) / LAMBDA_1_ADJUSTMENT
     if lmbda_2 <= 0:
         lmbda_2 = estimate_entropy(self.y) / LAMBDA_2_ADJUSTMENT
     self.lmbda_1 = lmbda_1
     self.lmbda_2 = lmbda_2
示例#2
0
def test_entropy_multiple_disc():
    same_val_arr_zero = np.zeros((50, 1))
    same_val_arr_ones = np.ones((50, 1))
    # The 0.5 forces float => classified as continuous
    cont_val_arange = asarray2d(np.arange(50) + 0.5)
    all_disc_arr = np.concatenate((same_val_arr_ones, same_val_arr_zero),
                                  axis=1)
    mixed_val_arr = np.concatenate((all_disc_arr, cont_val_arange), axis=1)

    all_disc_h = estimate_entropy(all_disc_arr)
    mixed_h = estimate_entropy(mixed_val_arr)
    assert mixed_h > all_disc_h, \
        'Expected adding continuous column increases entropy'
示例#3
0
文件: gfssf.py 项目: ballet/ballet
 def __init__(self,
              *args,
              lmbda_1: float = 0.0,
              lmbda_2: float = 0.0,
              lambda_1_adjustment: float = LAMBDA_1_ADJUSTMENT,
              lambda_2_adjustment: float = LAMBDA_2_ADJUSTMENT):
     super().__init__(*args)
     self.y_val = asarray2d(self.y_val)
     if lmbda_1 <= 0:
         lmbda_1 = estimate_entropy(self.y_val) / lambda_1_adjustment
     if lmbda_2 <= 0:
         lmbda_2 = estimate_entropy(self.y_val) / lambda_2_adjustment
     self.lmbda_1 = lmbda_1
     self.lmbda_2 = lmbda_2
示例#4
0
def test_mi_informative():
    x = np.reshape(np.arange(1, 101), (-1, 1))
    y = np.reshape(np.arange(1, 101), (-1, 1))
    mi = estimate_mutual_information(x, y)
    h_y = estimate_entropy(y)
    assert mi > h_y / 4, \
        'exact copy columns should have high information'
示例#5
0
def test_mi_uninformative():
    x = np.reshape(np.arange(1, 101), (-1, 1))
    y = np.ones((100, 1))
    mi = estimate_mutual_information(x, y)
    h_z = estimate_entropy(x)
    assert h_z / 4 > mi, \
        'uninformative column should have no information'
示例#6
0
 def test_mi_informative(self):
     x = np.reshape(np.arange(1, 101), (-1, 1))
     y = np.reshape(np.arange(1, 101), (-1, 1))
     mi = estimate_mutual_information(x, y)
     h_y = estimate_entropy(y)
     self.assertGreater(mi, h_y / 4,
                        'exact copy columns should have high information')
示例#7
0
 def test_mi_uninformative(self):
     x = np.reshape(np.arange(1, 101), (-1, 1))
     y = np.ones((100, 1))
     mi = estimate_mutual_information(x, y)
     h_z = estimate_entropy(x)
     self.assertGreater(h_z / 4, mi,
                        'uninformative column should have no information')
示例#8
0
def test_cmi_redundant_info():
    x = np.reshape(np.arange(1, 101), (-1, 1))
    y = np.reshape(np.arange(1, 101), (-1, 1))
    exact_z = np.reshape(np.arange(1, 101), (-1, 1))

    h_y = estimate_entropy(y)
    cmi = estimate_conditional_information(x, y, exact_z)
    assert h_y / 4 > cmi, \
        'redundant copies should have little information'
示例#9
0
def H(a):  # noqa
    return estimate_entropy(asarray2d(a))