def test_performances(): X, y = make_blobs(n_samples=1000, random_state=0, centers=2) # make labels imbalanced by remove all but 100 instances from class 1 indexes = np.ones(X.shape[0]).astype(bool) ind = np.array([False] * 100 + list(((y == 1)[100:]))) indexes[ind] = 0 X = X[indexes] y = y[indexes] n_samples, n_features = X.shape clf = FraudToRules() # fit clf.fit(X, y) # with lists clf.fit(X.tolist(), y.tolist()) y_pred = clf.predict(X) assert_equal(y_pred.shape, (n_samples, )) # training set performance assert_greater(accuracy_score(y, y_pred), 0.83) # decision_function agrees with predict decision = -clf.decision_function(X) assert_equal(decision.shape, (n_samples, )) dec_pred = (decision.ravel() < 0).astype(np.int) assert_array_equal(dec_pred, y_pred)
def test_fraudetorules_works(): # toy sample (the last two samples are outliers) X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1], [6, 3], [4, -7]] y = [0] * 6 + [1] * 2 X_test = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1], [10, 5], [5, -7]] # Test LOF clf = FraudToRules(random_state=rng, max_samples=1.) clf.fit(X, y) decision_func = -clf.decision_function(X_test) pred = clf.predict(X_test) # assert detect outliers: assert_greater(np.max(decision_func[:-2]), np.min(decision_func[-2:])) assert_array_equal(pred, 6 * [0] + 2 * [1])