Exemplo n.º 1
0
    def test_binary_relevance_lr(self):
        br = BinaryRelevance(base_clf=LogisticRegression(random_state=1126))
        br.train(Dataset(self.X_train, self.Y_train))

        br_pred_train = br.predict(self.X_train).astype(int)
        br_pred_test = br.predict(self.X_test).astype(int)

        br_pred_proba_train = br.predict_proba(self.X_train).astype(float)
        br_pred_proba_test = br.predict_proba(self.X_test).astype(float)

        for i in range(np.shape(self.Y_train)[1]):
            clf = sklearn.linear_model.LogisticRegression(random_state=1126)
            clf.fit(self.X_train, self.Y_train[:, i])

            assert_array_equal(clf.predict(self.X_train).astype(int),
                               br_pred_train[:, i])
            assert_array_equal(clf.predict(self.X_test).astype(int),
                               br_pred_test[:, i])

            assert_array_equal(clf.predict_proba(self.X_train)[:, 1].astype(float),
                               br_pred_proba_train[:, i])
            assert_array_equal(clf.predict_proba(self.X_test)[:, 1].astype(float),
                               br_pred_proba_test[:, i])

        self.assertEqual(
            np.mean(np.abs(self.Y_test - br_pred_test).mean(axis=1)),
            br.score(Dataset(self.X_test, self.Y_test), 'hamming'))

        self.assertRaises(NotImplementedError,
                lambda: br.score(Dataset(self.X_test, self.Y_test),
                                 criterion='not_exist'))
Exemplo n.º 2
0
    def test_binary_relevance_lr(self):
        br = BinaryRelevance(base_clf=LogisticRegression(random_state=1126))
        br.train(Dataset(self.X_train, self.Y_train))

        br_pred_train = br.predict(self.X_train).astype(int)
        br_pred_test = br.predict(self.X_test).astype(int)

        br_pred_proba_train = br.predict_proba(self.X_train).astype(float)
        br_pred_proba_test = br.predict_proba(self.X_test).astype(float)

        for i in range(np.shape(self.Y_train)[1]):
            clf = sklearn.linear_model.LogisticRegression(random_state=1126)
            clf.fit(self.X_train, self.Y_train[:, i])

            assert_array_equal(clf.predict(self.X_train).astype(int),
                               br_pred_train[:, i])
            assert_array_equal(clf.predict(self.X_test).astype(int),
                               br_pred_test[:, i])

            assert_array_equal(clf.predict_proba(self.X_train)[:, 1].astype(float),
                               br_pred_proba_train[:, i])
            assert_array_equal(clf.predict_proba(self.X_test)[:, 1].astype(float),
                               br_pred_proba_test[:, i])

        self.assertEqual(
            np.mean(np.abs(self.Y_test - br_pred_test).mean(axis=1)),
            br.score(Dataset(self.X_test, self.Y_test), 'hamming'))

        self.assertRaises(NotImplementedError,
                lambda: br.score(Dataset(self.X_test, self.Y_test),
                                 criterion='not_exist'))
Exemplo n.º 3
0
pred = model.predict(X)

output = pd.DataFrame()
output['UE_pred'] = [pred[i][0] for i in range(len(pred))]
output['BR_pred'] = [pred[i][1] for i in range(len(pred))]
output['FR_pred'] = [pred[i][2] for i in range(len(pred))]

true = Y.iloc[test_index].reset_index(drop=True)

output['reviewText'] = np.array(data_CV_concat.iloc[test_index]['reviewText'])

output = pd.concat([output,true],axis = 1)

output.to_csv('output_test.csv')

score_hamming = model.score(tst_ds, criterion='hamming')
score_f1 = model.score(tst_ds,criterion = 'f1')

result['Hamming'].append(score_hamming)
result['F1'].append(score_f1)  

data.iloc[train_index][['UE','BR','FR']] = data_CV_train[['UE','BR','FR']]
data.to_csv('data_new.csv')

print('The result of Hamming scores is ',result['Hamming'])
print('The result of F1 scores is ',result['F1'])