Exemplo n.º 1
0
def test_hjmi(library, madelon):
    pymit._set_library(library)
    data, labels = madelon
    bins = 10
    expected_features = [
        241, 338, 378, 105, 472
    ]  #, 475, 433, 64, 128, 442, 453, 336, 48, 493, 281, 318, 153, 28, 451, 455]

    [num_examples, num_features] = data.shape
    data_discrete = np.zeros([num_examples, num_features])
    for i in range(num_features):
        _, bin_edges = pymit._lib.histogram(data[:, i], bins=bins)
        data_discrete[:, i] = pymit._lib.digitize(data[:, i],
                                                  bin_edges,
                                                  right=False)

    max_features = len(expected_features)
    selected_features = []
    j_h = 0
    hjmi = None

    for i in range(0, max_features):
        jmi = np.zeros([num_features], dtype=np.float)
        for X_k in range(num_features):
            if X_k in selected_features:
                continue
            jmi_1 = pymit.I(data_discrete[:, X_k], labels, bins=[bins, 2])
            jmi_2 = 0
            for X_j in selected_features:
                tmp1 = pymit.I(data_discrete[:, X_k],
                               data_discrete[:, X_j],
                               bins=[bins, bins])
                tmp2 = pymit.I_cond(data_discrete[:, X_k],
                                    data_discrete[:, X_j],
                                    labels,
                                    bins=[bins, bins, 2])
                jmi_2 += tmp1 - tmp2
            if len(selected_features) == 0:
                jmi[X_k] = j_h + jmi_1
            else:
                jmi[X_k] = j_h + jmi_1 - jmi_2 / len(selected_features)
        f = jmi.argmax()
        j_h = jmi[f]
        if hjmi is None or (j_h - hjmi) / hjmi > 0.03:
            hjmi = j_h
            selected_features.append(f)
        else:
            break

    assert np.array_equal(expected_features, selected_features)
Exemplo n.º 2
0
def test_jmi(library, madelon):
    pymit._set_library(library)

    data, labels = madelon
    bins = 10
    expected_features = [
        241, 338, 378, 105, 472
    ]  #, 475, 433, 64, 128, 442, 453, 336, 48, 493, 281, 318, 153, 28, 451, 455]

    [num_examples, num_features] = data.shape
    data_discrete = np.zeros([num_examples, num_features])
    for i in range(num_features):
        _, bin_edges = pymit._lib.histogram(data[:, i], bins=bins)
        data_discrete[:, i] = pymit._lib.digitize(data[:, i],
                                                  bin_edges,
                                                  right=False)

    max_features = len(expected_features)
    selected_features = []

    mi = np.zeros([num_features], dtype=np.float)
    for i in range(num_features):
        mi[i] = pymit.I(data_discrete[:, i], labels, bins=[bins, 2])
    f = mi.argmax()
    selected_features.append(f)

    for i in range(1, max_features):
        jmi = np.zeros([num_features], dtype=np.float)
        for X_k in range(num_features):
            if X_k in selected_features:
                continue
            for X_j in selected_features:
                sum1 = pymit.I(data_discrete[:, X_j], labels, bins=[bins, 2])
                sum2 = pymit.I_cond(data_discrete[:, X_k],
                                    labels,
                                    data_discrete[:, X_j],
                                    bins=[bins, 2, bins])
                jmi[X_k] += sum1 + sum2
        f = jmi.argmax()
        selected_features.append(f)

    assert np.array_equal(expected_features, selected_features)
Exemplo n.º 3
0
def calculate_jmi(X, Y, features, selected_features):
    JMI = numpy.full([len(features)], numpy.nan, dtype=numpy.float)

    for i,X_k in enumerate(features):
        if X_k in selected_features:
            continue
        jmi = 0
        for X_j in selected_features:
            sum1 = pymit.I(X[:, X_j], Y, bins=[bins, 2])
            sum2 = pymit.I_cond(X[:, X_k], Y, X[:, X_j], bins=[bins, 2, bins])
            jmi += sum1 + sum2
        JMI[i] = jmi
      
    return [JMI]
Exemplo n.º 4
0
def calculate_mi(X, Y, features):
    MI = numpy.full([len(features)], numpy.nan, dtype=numpy.float)
    for i,X_i in enumerate(features):
        MI[i] = pymit.I(X[:, X_i], Y , bins=[bins, 2])
    return [MI]
Exemplo n.º 5
0
Y = labels
bins = 10

[tmp, features] = X.shape
D = numpy.zeros([tmp, features])

for i in range(features):
    N, E = numpy.histogram(X[:, i], bins=bins)
    D[:, i] = numpy.digitize(X[:, i], E, right=False)

max_features = 20
selected_features = []

MI = numpy.full([features], numpy.nan, dtype=numpy.float)
for i in range(features):
    MI[i] = pymit.I(D[:, i], Y, bins=[bins, 2])

f = MI.argmax()
selected_features.append(f)

print("001 {:0>3d} {}".format(f, MI[f]))

for i in range(1, max_features):
    JMI = numpy.zeros([features], dtype=numpy.float)
    for X_k in range(features):
        if X_k in selected_features:
            continue

        for X_j in selected_features:
            sum1 = pymit.I(D[:, X_j], Y, bins=[bins, 2])
            sum2 = pymit.I_cond(D[:, X_k], Y, D[:, X_j], bins=[bins, 2, bins])
Exemplo n.º 6
0
for i in range(features):
    N, E = numpy.histogram(X[:, i], bins=bins)
    D[:, i] = numpy.digitize(X[:, i], E, right=False)

max_features = 200
selected_features = []
j_h = 0
hjmi = None

for i in range(0, max_features):
    JMI = numpy.zeros([features], dtype=numpy.float)
    for X_k in range(features):
        if X_k in selected_features:
            continue
        jmi_1 = pymit.I(D[:, X_k], Y, bins=[bins, 2])
        jmi_2 = 0
        for X_j in selected_features:
            tmp1 = pymit.I(D[:, X_k], D[:, X_j], bins=[bins, bins])
            tmp2 = pymit.I_cond(D[:, X_k], D[:, X_j], Y, bins=[bins, bins, 2])
            jmi_2 += tmp1 - tmp2
        if len(selected_features) == 0:
            JMI[X_k] += j_h + jmi_1
        else:
            JMI[X_k] += j_h + jmi_1 - jmi_2 / len(selected_features)
    f = JMI.argmax()
    j_h = JMI[f]
    if (hjmi == None) or ((j_h - hjmi) / hjmi > 0.03):
        hjmi = j_h
        selected_features.append(f)
        print("{:0>3d} {:>3d} {}".format(len(selected_features), f, j_h))