def test_hjmi(library, madelon): pymit._set_library(library) data, labels = madelon bins = 10 expected_features = [ 241, 338, 378, 105, 472 ] #, 475, 433, 64, 128, 442, 453, 336, 48, 493, 281, 318, 153, 28, 451, 455] [num_examples, num_features] = data.shape data_discrete = np.zeros([num_examples, num_features]) for i in range(num_features): _, bin_edges = pymit._lib.histogram(data[:, i], bins=bins) data_discrete[:, i] = pymit._lib.digitize(data[:, i], bin_edges, right=False) max_features = len(expected_features) selected_features = [] j_h = 0 hjmi = None for i in range(0, max_features): jmi = np.zeros([num_features], dtype=np.float) for X_k in range(num_features): if X_k in selected_features: continue jmi_1 = pymit.I(data_discrete[:, X_k], labels, bins=[bins, 2]) jmi_2 = 0 for X_j in selected_features: tmp1 = pymit.I(data_discrete[:, X_k], data_discrete[:, X_j], bins=[bins, bins]) tmp2 = pymit.I_cond(data_discrete[:, X_k], data_discrete[:, X_j], labels, bins=[bins, bins, 2]) jmi_2 += tmp1 - tmp2 if len(selected_features) == 0: jmi[X_k] = j_h + jmi_1 else: jmi[X_k] = j_h + jmi_1 - jmi_2 / len(selected_features) f = jmi.argmax() j_h = jmi[f] if hjmi is None or (j_h - hjmi) / hjmi > 0.03: hjmi = j_h selected_features.append(f) else: break assert np.array_equal(expected_features, selected_features)
def test_jmi(library, madelon): pymit._set_library(library) data, labels = madelon bins = 10 expected_features = [ 241, 338, 378, 105, 472 ] #, 475, 433, 64, 128, 442, 453, 336, 48, 493, 281, 318, 153, 28, 451, 455] [num_examples, num_features] = data.shape data_discrete = np.zeros([num_examples, num_features]) for i in range(num_features): _, bin_edges = pymit._lib.histogram(data[:, i], bins=bins) data_discrete[:, i] = pymit._lib.digitize(data[:, i], bin_edges, right=False) max_features = len(expected_features) selected_features = [] mi = np.zeros([num_features], dtype=np.float) for i in range(num_features): mi[i] = pymit.I(data_discrete[:, i], labels, bins=[bins, 2]) f = mi.argmax() selected_features.append(f) for i in range(1, max_features): jmi = np.zeros([num_features], dtype=np.float) for X_k in range(num_features): if X_k in selected_features: continue for X_j in selected_features: sum1 = pymit.I(data_discrete[:, X_j], labels, bins=[bins, 2]) sum2 = pymit.I_cond(data_discrete[:, X_k], labels, data_discrete[:, X_j], bins=[bins, 2, bins]) jmi[X_k] += sum1 + sum2 f = jmi.argmax() selected_features.append(f) assert np.array_equal(expected_features, selected_features)
def calculate_jmi(X, Y, features, selected_features): JMI = numpy.full([len(features)], numpy.nan, dtype=numpy.float) for i,X_k in enumerate(features): if X_k in selected_features: continue jmi = 0 for X_j in selected_features: sum1 = pymit.I(X[:, X_j], Y, bins=[bins, 2]) sum2 = pymit.I_cond(X[:, X_k], Y, X[:, X_j], bins=[bins, 2, bins]) jmi += sum1 + sum2 JMI[i] = jmi return [JMI]
def calculate_mi(X, Y, features): MI = numpy.full([len(features)], numpy.nan, dtype=numpy.float) for i,X_i in enumerate(features): MI[i] = pymit.I(X[:, X_i], Y , bins=[bins, 2]) return [MI]
Y = labels bins = 10 [tmp, features] = X.shape D = numpy.zeros([tmp, features]) for i in range(features): N, E = numpy.histogram(X[:, i], bins=bins) D[:, i] = numpy.digitize(X[:, i], E, right=False) max_features = 20 selected_features = [] MI = numpy.full([features], numpy.nan, dtype=numpy.float) for i in range(features): MI[i] = pymit.I(D[:, i], Y, bins=[bins, 2]) f = MI.argmax() selected_features.append(f) print("001 {:0>3d} {}".format(f, MI[f])) for i in range(1, max_features): JMI = numpy.zeros([features], dtype=numpy.float) for X_k in range(features): if X_k in selected_features: continue for X_j in selected_features: sum1 = pymit.I(D[:, X_j], Y, bins=[bins, 2]) sum2 = pymit.I_cond(D[:, X_k], Y, D[:, X_j], bins=[bins, 2, bins])
for i in range(features): N, E = numpy.histogram(X[:, i], bins=bins) D[:, i] = numpy.digitize(X[:, i], E, right=False) max_features = 200 selected_features = [] j_h = 0 hjmi = None for i in range(0, max_features): JMI = numpy.zeros([features], dtype=numpy.float) for X_k in range(features): if X_k in selected_features: continue jmi_1 = pymit.I(D[:, X_k], Y, bins=[bins, 2]) jmi_2 = 0 for X_j in selected_features: tmp1 = pymit.I(D[:, X_k], D[:, X_j], bins=[bins, bins]) tmp2 = pymit.I_cond(D[:, X_k], D[:, X_j], Y, bins=[bins, bins, 2]) jmi_2 += tmp1 - tmp2 if len(selected_features) == 0: JMI[X_k] += j_h + jmi_1 else: JMI[X_k] += j_h + jmi_1 - jmi_2 / len(selected_features) f = JMI.argmax() j_h = JMI[f] if (hjmi == None) or ((j_h - hjmi) / hjmi > 0.03): hjmi = j_h selected_features.append(f) print("{:0>3d} {:>3d} {}".format(len(selected_features), f, j_h))