Exemplo n.º 1
0
 def test_multilabel_with_auxiliary_learner_mmr(self):
     trn_ds = Dataset(self.X,
                      self.y[:5] + [None] * (len(self.y) - 5))
     qs = MultilabelWithAuxiliaryLearner(trn_ds,
             major_learner=BinaryRelevance(LogisticRegression(solver='liblinear',
                                                              multi_class="ovr")),
             auxiliary_learner=BinaryRelevance(SVM(gamma="auto")),
             criterion='mmr',
             random_state=1126)
     qseq = run_qs(trn_ds, qs, self.y, self.quota)
     assert_array_equal(qseq,
             np.array([1258, 1461, 231, 1198, 1498, 1374, 955, 1367, 265, 144]))
 def test_multilabel_with_auxiliary_learner_hlr(self):
     trn_ds = Dataset(self.X, self.y[:5] + [None] * (len(self.y) - 5))
     qs = MultilabelWithAuxiliaryLearner(
         trn_ds,
         major_learner=BinaryRelevance(LogisticRegression()),
         auxiliary_learner=BinaryRelevance(SVM()),
         criterion='hlr',
         random_state=1126)
     qseq = run_qs(trn_ds, qs, self.y, self.quota)
     assert_array_equal(
         qseq,
         np.array([701, 1403, 147, 897, 974, 1266, 870, 703, 292, 1146]))
Exemplo n.º 3
0
 def test_multilabel_with_auxiliary_learner_shlr(self):
     trn_ds = Dataset(self.X,
                      self.y[:5] + [None] * (len(self.y) - 5))
     qs = MultilabelWithAuxiliaryLearner(trn_ds,
             major_learner=BinaryRelevance(LogisticRegression(solver='liblinear',
                                                              multi_class="ovr")),
             auxiliary_learner=BinaryRelevance(SVM(gamma="auto")),
             criterion='shlr',
             b=1.,
             random_state=1126)
     qseq = run_qs(trn_ds, qs, self.y, self.quota)
     assert_array_equal(qseq,
             np.array([1258, 805, 459, 550, 783, 964, 736, 1004, 38, 750]))
Exemplo n.º 4
0
def main():
    test_size = 0.25  # the percentage of samples in the dataset that will be
    # randomly selected and assigned to the test set

    result = {'E1': [], 'E2': [], 'E3': [], 'E4': [], 'E5': [], 'E6': []}
    for i in range(10):  # repeat experiment
        trn_ds, tst_ds, fully_labeled_trn_ds = split_train_test(test_size)
        trn_ds2 = copy.deepcopy(trn_ds)
        trn_ds3 = copy.deepcopy(trn_ds)
        trn_ds4 = copy.deepcopy(trn_ds)
        trn_ds5 = copy.deepcopy(trn_ds)
        trn_ds6 = copy.deepcopy(trn_ds)
        lbr = IdealLabeler(fully_labeled_trn_ds)
        model = BinaryRelevance(LogisticRegression())

        quota = 150  # number of samples to query

        qs = MMC(trn_ds, br_base=LogisticRegression())
        _, E_out_1 = run(trn_ds, tst_ds, lbr, model, qs, quota)
        result['E1'].append(E_out_1)

        qs2 = RandomSampling(trn_ds2)
        _, E_out_2 = run(trn_ds2, tst_ds, lbr, model, qs2, quota)
        result['E2'].append(E_out_2)

        qs3 = MultilabelWithAuxiliaryLearner(trn_ds3,
                                             BinaryRelevance(
                                                 LogisticRegression()),
                                             BinaryRelevance(SVM()),
                                             criterion='hlr')
        _, E_out_3 = run(trn_ds3, tst_ds, lbr, model, qs3, quota)
        result['E3'].append(E_out_3)

        qs4 = MultilabelWithAuxiliaryLearner(trn_ds4,
                                             BinaryRelevance(
                                                 LogisticRegression()),
                                             BinaryRelevance(SVM()),
                                             criterion='shlr')
        _, E_out_4 = run(trn_ds4, tst_ds, lbr, model, qs4, quota)
        result['E4'].append(E_out_4)

        qs5 = MultilabelWithAuxiliaryLearner(trn_ds5,
                                             BinaryRelevance(
                                                 LogisticRegression()),
                                             BinaryRelevance(SVM()),
                                             criterion='mmr')
        _, E_out_5 = run(trn_ds5, tst_ds, lbr, model, qs5, quota)
        result['E5'].append(E_out_5)

        qs6 = BinaryMinimization(trn_ds6, LogisticRegression())
        _, E_out_6 = run(trn_ds6, tst_ds, lbr, model, qs6, quota)
        result['E6'].append(E_out_6)

    E_out_1 = np.mean(result['E1'], axis=0)
    E_out_2 = np.mean(result['E2'], axis=0)
    E_out_3 = np.mean(result['E3'], axis=0)
    E_out_4 = np.mean(result['E4'], axis=0)
    E_out_5 = np.mean(result['E5'], axis=0)
    E_out_6 = np.mean(result['E6'], axis=0)

    print("MMC: ", E_out_1[::5].tolist())
    print("Random: ", E_out_2[::5].tolist())
    print("MultilabelWithAuxiliaryLearner_hlr: ", E_out_3[::5].tolist())
    print("MultilabelWithAuxiliaryLearner_shlr: ", E_out_4[::5].tolist())
    print("MultilabelWithAuxiliaryLearner_mmr: ", E_out_5[::5].tolist())
    print("BinaryMinimization: ", E_out_6[::5].tolist())

    query_num = np.arange(1, quota + 1)
    fig = plt.figure(figsize=(9, 6))
    ax = plt.subplot(111)
    ax.plot(query_num, E_out_1, 'g', label='MMC')
    ax.plot(query_num, E_out_2, 'k', label='Random')
    ax.plot(query_num, E_out_3, 'r', label='AuxiliaryLearner_hlr')
    ax.plot(query_num, E_out_4, 'b', label='AuxiliaryLearner_shlr')
    ax.plot(query_num, E_out_5, 'c', label='AuxiliaryLearner_mmr')
    ax.plot(query_num, E_out_6, 'm', label='BinaryMinimization')

    box = ax.get_position()
    ax.set_position([box.x0, box.y0, box.width * 0.75, box.height])
    plt.legend(loc=2, bbox_to_anchor=(1.05, 1), borderaxespad=0.)
    plt.xlabel('Number of Queries')
    plt.ylabel('Loss')
    plt.title('Experiment Result (Hamming Loss)')
    plt.show()
Exemplo n.º 5
0
'''
MAIN FUNCTION
'''

result = {'Hamming': [],'F1': []}
    
model = BinaryRelevance(LogisticRegression())

quota = 20  # number of samples to query


#EXECUTE FROM HERE FOR ITERATIONS

qs1 = MultilabelWithAuxiliaryLearner(
trn_ds,
BinaryRelevance(LogisticRegression()),
BinaryRelevance(SVM()),
criterion='hlr')

run(data_CV_train,trn_ds, qs1, quota)

model.train(trn_ds)

X , y = zip(*tst_ds.get_labeled_entries())

pred = model.predict(X)

output = pd.DataFrame()
output['UE_pred'] = [pred[i][0] for i in range(len(pred))]
output['BR_pred'] = [pred[i][1] for i in range(len(pred))]
output['FR_pred'] = [pred[i][2] for i in range(len(pred))]