Пример #1
0
 def test_train_test_split(self):
     X = pd.DataFrame([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
     train_X, test_X = train_test_split(X, test_size=0.2, random_state=42)
     expected_train_X = pd.DataFrame([6, 1, 8, 3, 10, 5, 4, 7]).set_index(pd.Series([5, 0, 7, 2, 9, 4, 3, 6]))
     expected_test_X = pd.DataFrame([9, 2]).set_index(pd.Series([8, 1]))
     pandas.testing.assert_frame_equal(expected_train_X, train_X)
     pandas.testing.assert_frame_equal(expected_test_X, test_X)
Пример #2
0
def plot_learning_curves(model, X, y):
    X_train, X_val = train_test_split(X, test_size=0.2, random_state=42)
    y_train, y_val = train_test_split(y, test_size=0.2, random_state=42)
    train_errors, val_errors = [], []

    for m in range(1, len(X_train)):
        model.fit(X_train[:m], y_train[:m])
        y_train_predict = model.predict(X_train[:m])
        y_val_predict = model.predict(X_val)
        train_errors.append(mean_squared_error(y_train[:m], y_train_predict))
        val_errors.append(mean_squared_error(y_val, y_val_predict))

    plt.xlabel('Training set size', fontsize=14)
    plt.ylabel('RMSE', fontsize=14)
    plt.plot(np.sqrt(train_errors), 'r-', linewidth=2, label='train')
    plt.plot(np.sqrt(val_errors), 'b-', linewidth=2, label='val')
    plt.legend(loc='upper right', fontsize=14)
    plt.show()
Пример #3
0
def main():
    """
    :brief: The main function executes the program.
    """

    filename = os.path.join(datasets_path, 'bank.csv')
    x, y, headers = read_dataset(filename, header=True, x_y=True)

    # we create the train = 70% dataset and the test = 30% dataset
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)

    adaboost_classifier = AdaBoostClassifier(
        base_estimator=DecisionTree(max_depth=1), n_estimators=5)
    adaboost_classifier.fit(x_train, y_train, headers=headers)
    predictions = adaboost_classifier.predict(x_test)
    accuracy = accuracy_score(y_test, predictions)
    print("Accuracy of AdaBoost Classifier: {:.2f}% ".format(accuracy * 100))
Пример #4
0
def main():
    """
    :brief: The main function executes the program.
    """

    filename = os.path.join(datasets_path, 'bank.csv')
    x, y, headers = read_dataset(filename, header=True, x_y=True)

    # we create the train = 70% dataset and the test = 30% dataset
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)

    decision_tree = DecisionTree(max_depth=4,
                                 min_samples_split=5,
                                 min_samples_leaf=2)
    decision_tree.fit(x_train, y_train, headers=headers)
    print(decision_tree)
    predictions = decision_tree.predict(x_test)
    accuracy = accuracy_score(y_test, predictions)
    print("Accuracy of Decision Tree: {:.2f}% ".format(accuracy * 100))
Пример #5
0
    def test_fit_predict(self):
        data = pd.read_csv('learnml/linear_model/tests/test_data.csv')

        train, test = train_test_split(data, test_size=0.2, random_state=42)

        X_train = train.drop('y', axis=1).values
        y_train = train['y'].values
        X_test = test.drop('y', axis=1).values
        y_test = test['y'].values

        scaler = StandardScaler()

        scaler.fit(X_train)
        X_train = scaler.transform(X_train)
        X_test = scaler.transform(X_test)

        per_clf = Perceptron()
        per_clf.fit(X_train, y_train)
        y_pred = per_clf.predict(X_test)

        accuracy_score = np.mean(y_pred == y_test)
        self.assertTrue(accuracy_score >= 0.8)
Пример #6
0
    def test_fit_predict(self):
        data = pd.read_csv('learnml/neural_network/tests/test_data.csv')

        train, test = train_test_split(data, 0.2, random_state=42)

        X_train = train.drop('y', axis=1).values
        y_train = train['y'].values
        X_test = test.drop('y', axis=1).values
        y_test = test['y'].values

        scaler = StandardScaler()

        scaler.fit(X_train)
        X_train = scaler.transform(X_train)
        X_test = scaler.transform(X_test)

        nn_clf = NeuralNetwork(layer_dims=np.array([2, 1]),
                               learning_rate=1,
                               num_iterations=100)
        nn_clf.fit(X_train, y_train)
        y_pred = nn_clf.predict(X_test)

        accuracy_score = np.mean(y_pred == y_test)
        self.assertTrue(accuracy_score >= 0.8)
Пример #7
0
    def test_fit_predict(self):
        data = pd.read_csv('learnml/linear_model/tests/test_data.csv')

        train, test = train_test_split(data, test_size=0.2, random_state=42)

        X_train = train.drop('y', axis=1).values
        y_train = train['y'].values
        X_test = test.drop('y', axis=1).values
        y_test = test['y'].values

        scaler = StandardScaler()

        scaler.fit(X_train)
        X_train = scaler.transform(X_train)
        X_test = scaler.transform(X_test)

        for i, penalty in enumerate(['l2', 'l1']):
            with self.subTest(penalty=penalty):
                log_clf = SGDClassifier(penalty=penalty, max_iter=5000, eta0=1)
                log_clf.fit(X_train, y_train)
                y_pred = log_clf.predict(X_test)

                accuracy_score = np.mean(y_pred == y_test)
                self.assertTrue(accuracy_score >= 0.8)