def test_kl2(self): x_probs = np.array([0.04, 0.16] * 5 + [0]) xy_probs = np.array([0.02, 0.18] * 5 + [0]) dkl = 0 for i in range(len(x_probs) - 1): div = x_probs[i] / xy_probs[i] dkl += x_probs[i] * math.log(div, 2) self.assertAlmostEqual(entropy.kullback_leiber_divergence(x_probs, xy_probs), dkl)
def main(features_fpath, classes_fpath, use): classes = np.loadtxt(classes_fpath) if use in {'user', 'tags', 'cat'}: prob_class, prob_col, prob_class_col = load_text_file(features_fpath, classes, use) else: prob_class, prob_col, prob_class_col = load_svm_file(features_fpath, classes) info_gains = [] mutual_info = 0 for token in prob_class_col: dkl = kullback_leiber_divergence(prob_class_col[token], prob_class) mutual_info += prob_col[token] * dkl info_gains.append((dkl, token)) print('Mutual info: ', mutual_info) for dkl, token in sorted(info_gains, reverse=True): print(dkl, token)
def main(features_fpath, classes_fpath, use): classes = np.loadtxt(classes_fpath) if use in {'user', 'tags', 'cat'}: prob_class, prob_col, prob_class_col = load_text_file( features_fpath, classes, use) else: prob_class, prob_col, prob_class_col = load_svm_file( features_fpath, classes) info_gains = [] mutual_info = 0 for token in prob_class_col: dkl = kullback_leiber_divergence(prob_class_col[token], prob_class) mutual_info += prob_col[token] * dkl info_gains.append((dkl, token)) print('Mutual info: ', mutual_info) for dkl, token in sorted(info_gains, reverse=True): print(dkl, token)
def test_kl3(self): x_probs = np.array([0.25, 0.20, 0, 0.55]) xy_probs = np.array([0.20, 0, 0.25, 0.55]) self.assertAlmostEqual(entropy.kullback_leiber_divergence(x_probs, xy_probs), float("inf"))