def test_save_load_classifier(self):
        X, y = datasets.load_breast_cancer(return_X_y=True)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.2)
        k = 4

        classifier_before = pyfms.Classifier(X.shape[1], k=k)
        classifier_before.fit(X_train, y_train, nb_epoch=1000)

        weights_before = classifier_before.get_weights()
        accuracy_before = accuracy_score(y_test,
                                         classifier_before.predict(X_test))

        classifier_file = os.path.join(self.workspace, 'classifier.fm')
        classifier_before.save_weights(classifier_file)

        classifier_after = pyfms.Classifier(X.shape[1])
        classifier_after.load_weights(classifier_file)

        weights_after = classifier_after.get_weights()
        accuracy_after = accuracy_score(y_test,
                                        classifier_after.predict(X_test))

        for wb, wa in zip(weights_before, weights_after):
            np.testing.assert_array_equal(wb, wa)
        self.assertEqual(accuracy_before, accuracy_after)
    def test_weighted_classifier(self):
        np.random.seed(0)
        X, y = datasets.load_breast_cancer(return_X_y=True)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.2)
        # Re-weight instances so that each class gets equal total weighting.
        class_count_lookup = dict(zip(*np.unique(y_train, return_counts=True)))
        sample_weight = np.array(
            [1.0 / class_count_lookup[_y] for _y in y_train])

        classifier = pyfms.Classifier(X.shape[1])
        classifier.fit(X_train,
                       y_train,
                       sample_weight=sample_weight,
                       nb_epoch=10000)

        accuracy = accuracy_score(y_test, classifier.predict(X_test))
        expected_accuracy = 0.9649122807017544
        self.assertAlmostEqual(accuracy, expected_accuracy)
    def test_sparse_classifier(self):
        np.random.seed(0)
        X, y = datasets.load_boston(return_X_y=True)
        y = y > 30  # Binarize target
        # Columns 1 and 3 (0-indexed) are sparse.
        # Slice data to the first 5 columns for a higher sparsity ratio.
        X = X[:, :5]
        X = sparse.csr_matrix(X)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.2)
        # Sparsify data
        X_train = sparse.csr_matrix(X_train)
        X_test = sparse.csr_matrix(X_test)

        classifier_dims = X.shape[1]
        classifier = pyfms.Classifier(classifier_dims, k=2, X_format="csr")
        classifier.fit(X_train, y_train, nb_epoch=20000)

        accuracy = accuracy_score(y_test, classifier.predict(X_test))
        self.assertAlmostEqual(accuracy, 0.8725490196078431)
Exemplo n.º 4
0
reg = pyfms.regularizers.L2(0, 0, .01)
fm_regressor.fit(X_train, y_train, nb_epoch=50000, regularizer=reg)
print('  Factorization Machine MSE: {}'.format(
    mean_squared_error(y_test, fm_regressor.predict(X_test))))

linear_regression = LinearRegression()
linear_regression.fit(X_train, y_train)
print('  Linear Regression MSE: {}'.format(
    mean_squared_error(y_test, linear_regression.predict(X_test))))

print('\n*** Binary Classification Example (with verbose output) ***')

X, y = datasets.load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

fm_classifier = pyfms.Classifier(X.shape[1])
fm_classifier.fit(X_train, y_train, verbosity=2000, nb_epoch=10000)
print('  Factorization Machine Error: {}'.format(
    error_score(y_test, fm_classifier.predict(X_test))))

logistic_regression = LogisticRegression()
logistic_regression.fit(X, y)
print('  Logistic Regression Error: {}'.format(
    error_score(y_test, logistic_regression.predict(X_test))))

print('\n*** Saving Model Example ***')

# Save the factorization machine classifier that was trained earlier

f = "weights.fm"
fm_classifier.save_weights(f)
# Columns 1 and 3 (0-indexed) are sparse.
# Slice data to the first 5 columns for a higher sparsity ratio.
X = X[:, :5]
X = sparse.csr_matrix(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Re-weight instances so that each class gets equal total weighting.
class_count_lookup = dict(zip(*np.unique(y_train, return_counts=True)))
sample_weight = np.array([1.0 / class_count_lookup[_y] for _y in y_train])

# Sparsify data
X_train = sparse.csr_matrix(X_train)
X_test = sparse.csr_matrix(X_test)

classifier_dims = X.shape[1]
fm_classifier = pyfms.Classifier(classifier_dims, k=2, X_format="csr")
fm_classifier.fit(X_train,
                  y_train,
                  sample_weight=sample_weight,
                  nb_epoch=20000)
print('Factorization Machine Error: {}'.format(
    error_score(y_test, fm_classifier.predict(X_test))))

logistic_regression = LogisticRegression(solver='lbfgs')
logistic_regression.fit(X_train, y_train, sample_weight=sample_weight)
print('Logistic Regression Error: {}'.format(
    error_score(y_test, logistic_regression.predict(X_test))))
print()

print('*******************************************')
print('* Regression Example')