def test_check_consistent_shape(self): X_train, y_train, X_test, y_test = generate_data( n_train=self.n_train, n_test=self.n_test, contamination=self.contamination) X_train_n, y_train_n, X_test_n, y_test_n, y_train_pred_n, y_test_pred_n \ = check_consistent_shape(X_train, y_train, X_test, y_test, y_train, y_test) assert_allclose(X_train_n, X_train) assert_allclose(y_train_n, y_train) assert_allclose(X_test_n, X_test) assert_allclose(y_test_n, y_test) assert_allclose(y_train_pred_n, y_train) assert_allclose(y_test_pred_n, y_test) # test shape difference with assert_raises(ValueError): check_consistent_shape(X_train, y_train, y_train, y_test, y_train, y_test) # test shape difference between X_train and X_test X_test = np.hstack((X_test, np.zeros( (X_test.shape[0], 1)))) # add extra column/feature with assert_raises(ValueError): check_consistent_shape(X_train, y_train, X_test, y_test, y_train_pred_n, y_test_pred_n)
def test_data_generate2(self): X_train, y_train, X_test, y_test = \ generate_data(n_train=self.n_train, n_test=self.n_test, n_features=3, contamination=self.contamination) assert_allclose(X_train.shape, (self.n_train, 3)) assert_allclose(X_test.shape, (self.n_test, 3))
def test_data_generate3(self): X_train, y_train, X_test, y_test = \ generate_data(n_train=self.n_train, n_test=self.n_test, n_features=2, contamination=self.contamination, random_state=42) X_train2, y_train2, X_test2, y_test2 = \ generate_data(n_train=self.n_train, n_test=self.n_test, n_features=2, contamination=self.contamination, random_state=42) assert_allclose(X_train, X_train2) assert_allclose(X_test, X_test2) assert_allclose(y_train, y_train2) assert_allclose(y_test, y_test2)
def test_get_outliers_inliers(self): X_train, y_train = generate_data(n_train=self.n_train, train_only=True, contamination=self.contamination) X_outliers, X_inliers = get_outliers_inliers(X_train, y_train) inlier_index = int(self.n_train * (1 - self.contamination)) assert_allclose(X_train[0:inlier_index, :], X_inliers) assert_allclose(X_train[inlier_index:, :], X_outliers)
def test_get_outliers_inliers(self): X_train, y_train = generate_data( n_train=self.n_train, train_only=True, contamination=self.contamination) X_outliers, X_inliers = get_outliers_inliers(X_train, y_train) inlier_index = int(self.n_train * (1 - self.contamination)) assert_allclose(X_train[0:inlier_index, :], X_inliers) assert_allclose(X_train[inlier_index:, :], X_outliers)
def test_data_generate(self): X_train, y_train, X_test, y_test = \ generate_data(n_train=self.n_train, n_test=self.n_test, contamination=self.contamination) assert_equal(y_train.shape[0], X_train.shape[0]) assert_equal(y_test.shape[0], X_test.shape[0]) assert_less_equal(self.n_train - X_train.shape[0], 1) assert_equal(X_train.shape[1], 2) assert_less_equal(self.n_test - X_test.shape[0], 1) assert_equal(X_test.shape[1], 2) out_perc = np.sum(y_train) / self.n_train assert_allclose(self.contamination, out_perc, atol=0.01) out_perc = np.sum(y_test) / self.n_test assert_allclose(self.contamination, out_perc, atol=0.01)
def test_check_consistent_shape(self): X_train, y_train, X_test, y_test = generate_data( n_train=self.n_train, n_test=self.n_test, contamination=self.contamination) X_train_n, y_train_n, X_test_n, y_test_n, y_train_pred_n, y_test_pred_n \ = check_consistent_shape(X_train, y_train, X_test, y_test, y_train, y_test) assert_allclose(X_train_n, X_train) assert_allclose(y_train_n, y_train) assert_allclose(X_test_n, X_test) assert_allclose(y_test_n, y_test) assert_allclose(y_train_pred_n, y_train) assert_allclose(y_test_pred_n, y_test) # test shape difference with assert_raises(ValueError): check_consistent_shape(X_train, y_train, y_train, y_test, y_train, y_test)
def test_evaluate_print(self): X_train, y_train, X_test, y_test = generate_data( n_train=self.n_train, n_test=self.n_test, contamination=self.contamination) evaluate_print('dummy', y_train, y_train * 0.1)
import numpy as np from sklearn.utils import shuffle from matplotlib import pyplot as plt from linear_regression.linear_regression import LinearRegressor from utils.data import generate_data x, y = generate_data() x_shuffle, y_shuffle = shuffle(x, y, random_state=0) lr = LinearRegressor(input_size=2) lr.train(x_shuffle, y_shuffle, iter=10000, alpha=0.001) plt.title("Loss per Iteration") plt.xlabel("Iteration(s)") plt.ylabel("Loss") plt.plot(lr.historical_error) plt.show() t, s = lr.decision_boundry(start=-0.4, stop=1, step=0.01) plt.title("Accuracy per Epoch") plt.plot(lr.historical_accuracy) plt.xlabel("Iteration(s)") plt.ylabel("Accuracy") plt.show() plt.title('Classification Data') plt.scatter(x[0:500, 0], x[0:500, 1], label='Class 1', marker='x') plt.scatter(x[500:1000, 0], x[500:1000, 1], label='Class 2', marker='v') plt.legend(['Class 1', 'Class 2'], loc='lower left') plt.show()