def main(): print("-- Gradient Boosting Classification --") data = datasets.load_iris() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) clf = GradientBoostingClassifier(debug=True) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) pca = PCA() pca.plot_in_2d(X_test, y_pred, title="Gradient Boosting", accuracy=accuracy, legend_labels=data.target_names) print("-- Gradient Boosting Regression --") X, y = datasets.make_regression(n_features=1, n_samples=150, bias=0, noise=5) X_train, X_test, y_train, y_test = train_test_split(standardize(X), y, test_size=0.5) clf = GradientBoostingRegressor(debug=True) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) mse = mean_squared_error(y_test, y_pred) print("Mean Squared Error:", mse) # Plot the results plt.scatter(X_test[:, 0], y_test, color='black') plt.scatter(X_test[:, 0], y_pred, color='green') plt.title("Gradient Boosting Regression (%.2f MSE)" % mse) plt.show()
def main(): data = datasets.load_digits() X = normalize(data.data) y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=1) # Perceptron clf = Perceptron(n_iterations=4000, learning_rate=0.01, plot_errors=True) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results pca = PCA() pca.plot_in_2d(X_test, y_pred, title="Perceptron", accuracy=accuracy, legend=True)
def main(): data = datasets.load_digits() X = data.data y = data.target digit1 = 1 digit2 = 8 idx = np.append(np.where(y == digit1)[0], np.where(y == digit2)[0]) y = data.target[idx] # Change labels to {-1, 1} y[y == digit1] = -1 y[y == digit2] = 1 X = data.data[idx] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) # Adaboost classification clf = Adaboost(n_clf=5) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) # Reduce dimensions to 2d using pca and plot the results pca = PCA() pca.plot_in_2d(X_test, y_pred, title="Adaboost", accuracy=accuracy)
def main(): data = datasets.load_digits() X = normalize(data.data) y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=1) # Optimization method for finding weights that minimizes loss optimizer = RMSprop(learning_rate=0.01) # Perceptron clf = Perceptron(n_iterations=5000, activation_function=ExpLU, optimizer=optimizer, early_stopping=True, plot_errors=True) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results pca = PCA() pca.plot_in_2d(X_test, y_pred, title="Perceptron", accuracy=accuracy, legend_labels=np.unique(y))
def main(): print("-- XGBoost --") data = datasets.load_iris() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=2) clf = XGBoost(debug=True) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) pca = PCA() pca.plot_in_2d(X_test, y_pred, title="XGBoost", accuracy=accuracy, legend_labels=data.target_names)
def main(): # Load temperature data data = pd.read_csv('data/TempLinkoping2016.txt', sep="\t") time = np.atleast_2d(data["time"].as_matrix()).T temp = np.atleast_2d(data["temp"].as_matrix()).T X = time y = temp X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) clf = PolynomialRegression(degree=2, n_iterations=3000) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) mse = mean_squared_error(y_test, y_pred) # Print the mean squared error print("Mean Squared Error:", mse) # Plot the results m = plt.scatter(X_test[:, 0], y_test, color='gray', s=10) p = plt.scatter(X_test[:, 0], y_pred, color='black', s=15) plt.suptitle( "Linear Regression of temperature data in Linkoping, Sweden 2016") plt.title("(%.2f MSE)" % mse) plt.xlabel('Fraction of year') plt.ylabel('Temperature in Celcius') plt.legend((m, p), ("Measurements", "Prediction"), scatterpoints=1, loc='lower right') plt.show()
def main(): data = datasets.load_digits() X = normalize(data.data) y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=1) # MLP clf = MultilayerPerceptron(n_hidden=12, n_iterations=5000, learning_rate=0.01, early_stopping=True, plot_errors=True) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results pca = PCA() pca.plot_in_2d(X_test, y_pred, title="Multilayer Perceptron", accuracy=accuracy, legend_labels=np.unique(y))
def main(): X, y = datasets.make_regression(n_features=1, n_samples=100, bias=3, noise=10) X_train, X_test, y_train, y_test = train_test_split(X, y, 0.3) clf = RidgeRegression()
def main(): data = load_iris_dataset(dir_path + r"/../data/iris.csv") X = data['X'] y = data['target'] # Change class labels from strings to numbers # df = df.replace(to_replace="setosa", value="-1") # df = df.replace(to_replace="virginica", value="1") # df = df.replace(to_replace="versicolor", value="2") # Only select data for two classes #X = df.loc[df['species'] != "2"].drop("species", axis=1).as_matrix() #y = df.loc[df['species'] != "2"]["species"].as_matrix() X = X[y != 2] y = y[y != 2] y[y == 0] = -1 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) # Adaboost classification clf = Adaboost(n_clf=8) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print "Accuracy:", accuracy_score(y_test, y_pred) # Reduce dimensions to 2d using pca and plot the results pca = PCA() pca.plot_in_2d(X_test, y_pred)
def main(): # Load dataset data = datasets.load_iris() X = normalize(data.data[data.target != 0]) y = data.target[data.target != 0] y[y == 1] = 0 y[y == 2] = 1 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=1) clf = LogisticRegression(gradient_descent=True) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results pca = PCA() pca.plot_in_2d(X_test, y_pred, title="Logistic Regression", accuracy=accuracy)
def main(): print("-- Classification Tree --") data = datasets.load_iris() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=2) clf = ClassificationTree() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print("Accuracy:", accuracy_score(y_test, y_pred)) pca = PCA() pca.plot_in_2d(X_test, y_pred) print("-- Regression Tree --") X, y = datasets.make_regression(n_features=1, n_samples=100, bias=0, noise=5) X_train, X_test, y_train, y_test = train_test_split(standardize(X), y, test_size=0.3) clf = RegressionTree() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print("Mean Squared Error:", mean_squared_error(y_test, y_pred)) # Plot the results plt.scatter(X_test[:, 0], y_test, color='black') plt.scatter(X_test[:, 0], y_pred, color='green') plt.show()
def main(): print ("-- Gradient Boosting Classification --") data = datasets.load_iris() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) clf = GradientBoostingClassifier(debug=True) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) pca = PCA() pca.plot_in_2d(X_test, y_pred, title="Gradient Boosting", accuracy=accuracy, legend_labels=data.target_names) print ("-- Gradient Boosting Regression --") X, y = datasets.make_regression(n_features=1, n_samples=150, bias=0, noise=5) X_train, X_test, y_train, y_test = train_test_split(standardize(X), y, test_size=0.5) clf = GradientBoostingRegressor(debug=True) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) mse = mean_squared_error(y_test, y_pred) print ("Mean Squared Error:", mse) # Plot the results plt.scatter(X_test[:, 0], y_test, color='black') plt.scatter(X_test[:, 0], y_pred, color='green') plt.title("Gradient Boosting Regression (%.2f MSE)" % mse) plt.show()
def main(): data = datasets.load_iris() X = normalize(data.data) y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, 0.3) clf = NaiveBayes() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print(accuracy_score(y_pred, y_test))
def main(): iris = load_iris() X = normalize(iris.data) y = iris.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) clf = KNN(k=3) y_pred = clf.predict(X_test, X_train, y_train) print "Accuracy score:", accuracy_score(y_test, y_pred) # Reduce dimensions to 2d using pca and plot the results pca = PCA() pca.plot_in_2d(X_test, y_pred)
def main(): data = datasets.load_iris() X = normalize(data.data) y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, 0.3) knn = KNN(3) y_pred = knn.predict(X_test, X_train, y_train) accuracy = accuracy_score(y_pred, y_test) print("accuracy is ", accuracy) pca = PCA() pca.plot_in_2d(X_test, y_pred, title="knn", accuracy=accuracy)
def main(): iris = datasets.load_iris() X = normalize(iris.data) y = iris.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) clf = NaiveBayes() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print "Accuracy score:", accuracy_score(y_test, y_pred) # Reduce dimension to two using PCA and plot the results pca = PCA() pca.plot_in_2d(X_test, y_pred)
def main(): data = load_iris_dataset(dir_path + r"/../data/iris.csv") X = data['X'] y = data['target'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) clf = RandomForest(n_estimators=50, debug=True) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print "Accuracy:", accuracy_score(y_test, y_pred) pca = PCA() pca.plot_in_2d(X_test, y_pred)
def main(): data=load_iris_dataset(dir_path + r"/../data/iris.csv") X=data['X'] y=data['target'] X = normalize(X) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) clf = KNN(k=3) y_pred = clf.predict(X_test, X_train, y_train) print "Accuracy score:", accuracy_score(y_test, y_pred) # Reduce dimensions to 2d using pca and plot the results pca = PCA() pca.plot_in_2d(X_test, y_pred)
def main(): data = datasets.load_digits() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) clf = RandomForest(n_estimators=50, debug=True) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print "Accuracy:", accuracy_score(y_test, y_pred) pca = PCA() pca.plot_in_2d(X_test, y_pred)
def main(): # Load the diabetes dataset X, y = datasets.make_regression(n_features=1, n_samples=100, bias=3, noise=10) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) # Finding regularization constant using cross validation lowest_error = float("inf") best_reg_factor = None print("Finding regularization constant using cross validation:") k = 10 for regularization_factor in np.arange(0, 0.3, 0.001): cross_validation_sets = k_fold_cross_validation_sets(X_train, y_train, k=k) mse = 0 for _X_train, _X_test, _y_train, _y_test in cross_validation_sets: clf = RidgeRegression(delta=regularization_factor) clf.fit(_X_train, _y_train) y_pred = clf.predict(_X_test) _mse = mean_squared_error(_y_test, y_pred) mse += _mse mse /= k # Print the mean squared error print("\tMean Squared Error: %s (regularization: %s)" % (mse, regularization_factor)) # Save reg. constant that gave lowest error if mse < lowest_error: best_reg_factor = regularization_factor lowest_error = mse # Make final prediction clf = RidgeRegression(delta=best_reg_factor) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) mse = mean_squared_error(y_test, y_pred) print("Mean squared error: %s (given by reg. factor: %s)" % (lowest_error, best_reg_factor)) # Plot the results plt.scatter(X_test[:, 0], y_test, color='black') plt.plot(X_test[:, 0], y_pred, color='blue', linewidth=3) plt.title("Ridge Regression (%.2f MSE)" % mse) plt.show()
def main(): X, y = make_regression(n_samples=100, n_features=1, noise=20) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) n_samples, n_features = np.shape(X) model = PolynomialRidgeRegression(reg_factor=0.1, degree=3, n_iterations=100, learning_rate=0.001) model.fit(X_train, y_train) # Training error plot n = len(model.training_errors) training, = plt.plot(range(n), model.training_errors, label="Training Error") plt.legend(handles=[training]) plt.title("Error Plot") plt.ylabel('Mean Squared Error') plt.xlabel('Iterations') plt.show() y_pred = model.predict(X_test) mse = mean_squared_error(y_test, y_pred) print("Mean squared error: %s" % (mse)) y_pred_line = model.predict(X) # Color map cmap = plt.get_cmap('viridis') # Plot the results m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10) m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10) plt.plot(366 * X, y_pred_line, color='black', linewidth=2, label="Prediction") plt.suptitle("PolynomialRegression Regression") plt.title("MSE: %.2f" % mse, fontsize=10) plt.xlabel('Day') plt.ylabel('Temperature in Celcius') plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right') plt.show()
def main(): data = datasets.load_digits() X = normalize(data.data) y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) # MLP clf = MultilayerPerceptron(n_hidden=10) clf.fit(X_train, y_train, n_iterations=4000, learning_rate=0.01) y_pred = clf.predict(X_test) print "Accuracy:", accuracy_score(y_test, y_pred) # Reduce dimension to two using PCA and plot the results pca = PCA() pca.plot_in_2d(X_test, y_pred)
def main(): data = datasets.load_iris() X = normalize(data.data) y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) clf = KNN(k=3) y_pred = clf.predict(X_test, X_train, y_train) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimensions to 2d using pca and plot the results pca = PCA() pca.plot_in_2d(X_test, y_pred, title="K Nearest Neighbors", accuracy=accuracy, legend_labels=data.target_names)
def main(): data = datasets.load_iris() X = normalize(data.data) y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) # Perceptron clf = Perceptron() clf.fit(X_train, y_train, plot_errors=True) y_pred = clf.predict(X_test) print "Accuracy:", accuracy_score(y_test, y_pred) # Reduce dimension to two using PCA and plot the results pca = PCA() pca.plot_in_2d(X_test, y_pred)
def main(): data = datasets.load_iris() X = normalize(data.data[data.target != 0]) y = data.target[data.target != 0] y[y == 1] = -1 y[y == 2] = 1 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) clf = SupportVectorMachine(kernel=polynomial_kernel, power=4, coef=1) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print "Accuracy:", accuracy_score(y_test, y_pred) # Reduce dimension to two using PCA and plot the results pca = PCA() pca.plot_in_2d(X_test, y_pred)
def main(): data = load_iris_dataset(dir_path + r"/../data/iris.csv") X = data['X'] y = data['target'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) clf = DecisionTree() clf.fit(X_train, y_train) # clf.print_tree() y_pred = clf.predict(X_test) print "Accuracy:", accuracy_score(y_test, y_pred) pca = PCA() pca.plot_in_2d(X_test, y_pred)
def main(): data = datasets.load_digits() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) clf = DecisionTree() clf.fit(X_train, y_train) # clf.print_tree() y_pred = clf.predict(X_test) print "Accuracy:", accuracy_score(y_test, y_pred) pca = PCA() pca.plot_in_2d(X_test, y_pred)
def main(): X, y = datasets.make_regression(n_features=1, n_samples=100, bias=0, noise=5) X_train, X_test, y_train, y_test = train_test_split(standardize(X), y, test_size=0.3) clf = RegressionTree() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) # Print the mean squared error print "Mean Squared Error:", mean_squared_error(y_test, y_pred) # Plot the results plt.scatter(X_test[:, 0], y_test, color='black') plt.scatter(X_test[:, 0], y_pred, color='green') plt.show()
def main(): data = datasets.load_iris() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=2) clf = RandomForest(debug=True) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy)
def main(): data = load_iris_dataset(dir_path + r"/../data/iris.csv") X = data['X'] y = data['target'] X = normalize(X) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) # Perceptron clf = Perceptron() clf.fit(X_train, y_train, plot_errors=True) y_pred = clf.predict(X_test) print "Accuracy:", accuracy_score(y_test, y_pred) # Reduce dimension to two using PCA and plot the results pca = PCA() pca.plot_in_2d(X_test, y_pred)
def main(): # Load temperature data data = pd.read_csv('data/TempLinkoping2016.txt', sep="\t") time = np.atleast_2d(data["time"].as_matrix()).T temp = np.atleast_2d(data["temp"].as_matrix()).T X = time # fraction of the year [0, 1] y = temp X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) clf = PolynomialRegression(degree=6, n_iterations=100000) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) mse = mean_squared_error(y_test, y_pred) # Generate data for prediction line X_pred_ = np.arange(0, 1, 0.001).reshape((1000, 1)) y_pred_ = clf.predict(X=X_pred_) # Print the mean squared error print("Mean Squared Error:", mse) # Color map cmap = plt.get_cmap('viridis') # Plot the results m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10) m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10) p = plt.plot(366 * X_pred_, y_pred_, color="black", linewidth=2, label="Prediction") plt.suptitle("Polynomial Regression") plt.title("MSE: %.2f" % mse) plt.xlabel('Days') plt.ylabel('Temperature in Celcius') plt.legend(loc='lower right') plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right') plt.show()
def main(): data = datasets.load_iris() X = data.data y = data.target X = X[y != 2] y = y[y != 2] y[y == 0] = -1 y[y == 1] = 1 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) # Adaboost classification clf = Adaboost(n_clf=10) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred)
def main(): data = datasets.load_iris() X = normalize(data.data) y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) clf = NaiveBayes() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results pca = PCA() pca.plot_in_2d(X_test, y_pred, title="Naive Bayes", accuracy=accuracy, legend_labels=data.target_names)
def main(): data = datasets.load_iris() X = normalize(data.data[data.target != 0]) y = data.target[data.target != 0] y[y == 1] = -1 y[y == 2] = 1 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) clf = SupportVectorMachine(kernel=polynomial_kernel, power=4, coef=1) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results pca = PCA() pca.plot_in_2d(X_test, y_pred, title="Support Vector Machine", accuracy=accuracy)
def main(): # Load the diabetes dataset X, y = datasets.make_regression(n_features=1, n_samples=100, bias=3, noise=10) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) # Finding regularization constant using cross validation lowest_error = float("inf") best_reg_factor = None print ("Finding regularization constant using cross validation:") k = 10 for regularization_factor in np.arange(0, 0.3, 0.001): cross_validation_sets = k_fold_cross_validation_sets( X_train, y_train, k=k) mse = 0 for _X_train, _X_test, _y_train, _y_test in cross_validation_sets: clf = RidgeRegression(delta=regularization_factor) clf.fit(_X_train, _y_train) y_pred = clf.predict(_X_test) _mse = mean_squared_error(_y_test, y_pred) mse += _mse mse /= k # Print the mean squared error print ("\tMean Squared Error: %s (regularization: %s)" % (mse, regularization_factor)) # Save reg. constant that gave lowest error if mse < lowest_error: best_reg_factor = regularization_factor lowest_error = mse # Make final prediction clf = RidgeRegression(delta=best_reg_factor) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) mse = mean_squared_error(y_test, y_pred) print ("Mean squared error: %s (given by reg. factor: %s)" % (lowest_error, best_reg_factor)) # Plot the results plt.scatter(X_test[:, 0], y_test, color='black') plt.plot(X_test[:, 0], y_pred, color='blue', linewidth=3) plt.title("Ridge Regression (%.2f MSE)" % mse) plt.show()
def main(): X, y = datasets.make_regression(n_features=1, n_samples=200, bias=100, noise=5) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) clf = LinearRegression() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) mse = mean_squared_error(y_test, y_pred) # Print the mean squared error print ("Mean Squared Error:", mse) # Plot the results plt.scatter(X_test[:, 0], y_test, color='black') plt.plot(X_test[:, 0], y_pred, color='blue', linewidth=3) plt.title("Linear Regression (%.2f MSE)" % mse) plt.show()
def main(): data = datasets.load_digits() X = normalize(data.data) y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=1) # Perceptron clf = Perceptron(n_iterations=5000, learning_rate=0.01, early_stopping=True, plot_errors=True) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results pca = PCA() pca.plot_in_2d(X_test, y_pred, title="Perceptron", accuracy=accuracy, legend_labels=np.unique(y))
def main(): # Load dataset data = datasets.load_iris() X = normalize(data.data[data.target != 0]) y = data.target[data.target != 0] y[y == 1] = 0 y[y == 2] = 1 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=1) clf = LogisticRegression(gradient_descent=True) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results pca = PCA() pca.plot_in_2d(X_test, y_pred, title="Logistic Regression", accuracy=accuracy)
def main(): df = pd.read_csv(dir_path + "/../data/iris.csv") # Change class labels from strings to numbers df = df.replace(to_replace="setosa", value="-1") df = df.replace(to_replace="virginica", value="1") df = df.replace(to_replace="versicolor", value="2") # Only select data for two classes X = df.loc[df['species'] != "2"].drop("species", axis=1).as_matrix() y = df.loc[df['species'] != "2"]["species"].as_matrix() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) # Adaboost classification clf = Adaboost(n_clf = 8) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print "Accuracy:", accuracy_score(y_test, y_pred) # Reduce dimensions to 2d using pca and plot the results pca = PCA() pca.plot_in_2d(X_test, y_pred)
def main(): print ("-- XGBoost --") data = datasets.load_iris() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=2) clf = XGBoost(debug=True) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) pca = PCA() pca.plot_in_2d(X_test, y_pred, title="XGBoost", accuracy=accuracy, legend_labels=data.target_names)
def main(): # Load the dataset data = datasets.load_iris() X = data.data y = data.target # Three -> two classes X = X[y != 2] y = y[y != 2] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) # Fit and predict using LDA lda = LDA() lda.fit(X_train, y_train) y_pred = lda.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) pca = PCA() pca.plot_in_2d(X_test, y_pred, title="LDA", accuracy=accuracy)
def main(): data = datasets.load_iris() X = data.data y = data.target X = X[y != 2] y = y[y != 2] y[y == 0] = -1 y[y == 1] = 1 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) # Adaboost classification clf = Adaboost(n_clf=10) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimensions to 2d using pca and plot the results pca = PCA() pca.plot_in_2d(X_test, y_pred, title="Adaboost", accuracy=accuracy)
def fit(self, X, y): X_train = X y_train = y if self.early_stopping: # Split the data into training and validation sets X_train, X_validate, y_train, y_validate = train_test_split(X, y, test_size=0.1) y_validate = categorical_to_binary(y_validate) # Convert the nominal y values to binary y_train = categorical_to_binary(y_train) n_samples, n_features = np.shape(X_train) n_outputs = np.shape(y_train)[1] # Initial weights between [-1/sqrt(N), 1/sqrt(N)] a = -1 / math.sqrt(n_features) b = -a self.W = (b - a) * np.random.random((n_features, n_outputs)) + a self.biasW = (b - a) * np.random.random((1, n_outputs)) + a training_errors = [] validation_errors = [] iter_with_rising_val_error = 0 for i in range(self.n_iterations): # Calculate outputs neuron_input = np.dot(X_train, self.W) + self.biasW neuron_output = sigmoid(neuron_input) # Training error error = y_train - neuron_output mse = np.mean(np.power(error, 2)) training_errors.append(mse) # Calculate the loss gradient w_gradient = -2 * (y_train - neuron_output) * \ sigmoid_gradient(neuron_input) bias_gradient = w_gradient # Update weights self.W -= self.learning_rate * X_train.T.dot(w_gradient) self.biasW -= self.learning_rate * \ np.ones((1, n_samples)).dot(bias_gradient) if self.early_stopping: # Calculate the validation error error = y_validate - self._calculate_output(X_validate) mse = np.mean(np.power(error, 2)) validation_errors.append(mse) # If the validation error is larger than the previous iteration increase # the counter if len(validation_errors) > 1 and validation_errors[-1] > validation_errors[-2]: iter_with_rising_val_error += 1 # If the validation error has been for more than 50 iterations # stop training to avoid overfitting if iter_with_rising_val_error > 50: break else: iter_with_rising_val_error = 0 # Plot the training error if self.plot_errors: if self.early_stopping: # Training and validation error plot training, = plt.plot(range(i+1), training_errors, label="Training Error") validation, = plt.plot(range(i+1), validation_errors, label="Validation Error") plt.legend(handles=[training, validation]) else: training, = plt.plot(range(i+1), training_errors, label="Training Error") plt.legend(handles=[training]) plt.title("Error Plot") plt.ylabel('Error') plt.xlabel('Iterations') plt.show()
y[y == digit1] = 0 y[y == digit2] = 1 X = data.data[idx] X = normalize(X) # .......................... # DIMENSIONALITY REDUCTION # .......................... pca = PCA() X = pca.transform(X, n_components=5) # Reduce to 5 dimensions # .......................... # TRAIN / TEST SPLIT # .......................... X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) # Rescale label for Adaboost to {-1, 1} rescaled_y_train = 2*y_train - np.ones(np.shape(y_train)) rescaled_y_test = 2*y_test - np.ones(np.shape(y_test)) # ....... # SETUP # ....... adaboost = Adaboost(n_clf = 8) naive_bayes = NaiveBayes() knn = KNN(k=4) logistic_regression = LogisticRegression() mlp = MultilayerPerceptron(n_hidden=20) perceptron = Perceptron() decision_tree = DecisionTree() random_forest = RandomForest(n_estimators=150)