def main(): data = datasets.load_digits() X = normalize(data.data) y = data.target # One-hot encoding of nominal y-values y = to_categorical(y) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=1) # Perceptron clf = Perceptron(n_iterations=5000, learning_rate=0.001, loss=CrossEntropy, activation_function=Sigmoid) clf.fit(X_train, y_train) y_pred = np.argmax(clf.predict(X_test), axis=1) y_test = np.argmax(y_test, axis=1) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results Plot().plot_in_2d(X_test, y_pred, title="Perceptron", accuracy=accuracy, legend_labels=np.unique(y))
def main(): # Load dataset data = datasets.load_iris() X = normalize(data.data[data.target != 0]) y = data.target[data.target != 0] y[y == 1] = 0 y[y == 2] = 1 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=1) clf = LogisticRegression(gradient_descent=True) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results Plot().plot_in_2d(X_test, y_pred, title="Logistic Regression", accuracy=accuracy)
def main(): data = datasets.load_digits() X = data.data y = data.target digit1 = 1 digit2 = 8 idx = np.append(np.where(y == digit1)[0], np.where(y == digit2)[0]) y = data.target[idx] # Change labels to {-1, 1} y[y == digit1] = -1 y[y == digit2] = 1 X = data.data[idx] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) # Adaboost classification with 5 weak classifiers clf = Adaboost(n_clf=5) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimensions to 2d using pca and plot the results Plot().plot_in_2d(X_test, y_pred, title="Adaboost", accuracy=accuracy)
def main(): print("-- XGBoost --") data = datasets.load_iris() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=2) clf = XGBoost() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) Plot().plot_in_2d(X_test, y_pred, title="XGBoost", accuracy=accuracy, legend_labels=data.target_names)
def main(): optimizer = Adam() #----- # MLP #----- data = datasets.load_digits() X = data.data y = data.target # Convert to one-hot encoding y = to_categorical(y.astype("int")) n_samples, n_features = X.shape n_hidden = 512 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=1) clf = NeuralNetwork(optimizer=optimizer, loss=CrossEntropy, validation_data=(X_test, y_test)) clf.add(Dense(n_hidden, input_shape=(n_features,))) clf.add(Activation('leaky_relu')) clf.add(Dense(n_hidden)) clf.add(Activation('leaky_relu')) clf.add(Dropout(0.25)) clf.add(Dense(n_hidden)) clf.add(Activation('leaky_relu')) clf.add(Dropout(0.25)) clf.add(Dense(n_hidden)) clf.add(Activation('leaky_relu')) clf.add(Dropout(0.25)) clf.add(Dense(10)) clf.add(Activation('softmax')) print () clf.summary(name="MLP") train_err, val_err = clf.fit(X_train, y_train, n_epochs=50, batch_size=256) # Training and validation error plot n = len(train_err) training, = plt.plot(range(n), train_err, label="Training Error") validation, = plt.plot(range(n), val_err, label="Validation Error") plt.legend(handles=[training, validation]) plt.title("Error Plot") plt.ylabel('Error') plt.xlabel('Iterations') plt.show() _, accuracy = clf.test_on_batch(X_test, y_test) print ("Accuracy:", accuracy) # Reduce dimension to 2D using PCA and plot the results y_pred = np.argmax(clf.predict(X_test), axis=1) Plot().plot_in_2d(X_test, y_pred, title="Multilayer Perceptron", accuracy=accuracy, legend_labels=range(10))
def main(): X, y = datasets.make_classification(n_samples=1000, n_features=10, n_classes=4, n_clusters_per_class=1, n_informative=2) data = datasets.load_digits() X = normalize(data.data) y = data.target y = to_categorical(y.astype("int")) # Model builder def model_builder(n_inputs, n_outputs): model = NeuralNetwork(optimizer=Adam(), loss=CrossEntropy) model.add(Dense(16, input_shape=(n_inputs, ))) model.add(Activation('relu')) model.add(Dense(n_outputs)) model.add(Activation('softmax')) return model # Print the model summary of a individual in the population print("") model_builder(n_inputs=X.shape[1], n_outputs=y.shape[1]).summary() population_size = 100 n_generations = 3000 mutation_rate = 0.01 print("Population Size: %d" % population_size) print("Generations: %d" % n_generations) print("Mutation Rate: %.2f" % mutation_rate) print("") X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=1) model = Neuroevolution(population_size=population_size, mutation_rate=mutation_rate, model_builder=model_builder) model = model.evolve(X_train, y_train, n_generations=n_generations) loss, accuracy = model.test_on_batch(X_test, y_test) # Reduce dimension to 2D using PCA and plot the results y_pred = np.argmax(model.predict(X_test), axis=1) Plot().plot_in_2d(X_test, y_pred, title="Evolutionary Evolved Neural Network", accuracy=accuracy, legend_labels=range(y.shape[1]))
def main(): # Load temperature data data = pd.read_csv('sratchml/data/buffalotemp.txt', sep="\t") time = np.atleast_2d(data["time"].values).T temp = data["temp"].values X = time # fraction of the year [0, 1] y = temp X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) poly_degree = 13 model = ElasticNet(degree=15, reg_factor=0.01, l1_ratio=0.7, learning_rate=0.001, n_iterations=4000) model.fit(X_train, y_train) # Training error plot n = len(model.training_errors) training, = plt.plot(range(n), model.training_errors, label="Training Error") plt.legend(handles=[training]) plt.title("Error Plot") plt.ylabel('Mean Squared Error') plt.xlabel('Iterations') plt.show() y_pred = model.predict(X_test) mse = mean_squared_error(y_test, y_pred) print("Mean squared error: %s (given by reg. factor: %s)" % (mse, 0.05)) y_pred_line = model.predict(X) # Color map cmap = plt.get_cmap('viridis') # Plot the results m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10) m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10) plt.plot(366 * X, y_pred_line, color='black', linewidth=2, label="Prediction") plt.suptitle("Elastic Net") plt.title("MSE: %.2f" % mse, fontsize=10) plt.xlabel('Day') plt.ylabel('Temperature in Celcius') plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right') plt.show()
def main(): X, y = datasets.make_classification(n_samples=1000, n_features=10, n_classes=4, n_clusters_per_class=1, n_informative=2) data = datasets.load_iris() X = normalize(data.data) y = data.target y = to_categorical(y.astype("int")) # Model builder def model_builder(n_inputs, n_outputs): model = NeuralNetwork(optimizer=Adam(), loss=CrossEntropy) model.add(Dense(16, input_shape=(n_inputs,))) model.add(Activation('relu')) model.add(Dense(n_outputs)) model.add(Activation('softmax')) return model # Print the model summary of a individual in the population print ("") model_builder(n_inputs=X.shape[1], n_outputs=y.shape[1]).summary() population_size = 100 n_generations = 10 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=1) inertia_weight = 0.8 cognitive_weight = 0.8 social_weight = 0.8 print ("Population Size: %d" % population_size) print ("Generations: %d" % n_generations) print ("") print ("Inertia Weight: %.2f" % inertia_weight) print ("Cognitive Weight: %.2f" % cognitive_weight) print ("Social Weight: %.2f" % social_weight) print ("") model = ParticleSwarmOptimizedNN(population_size=population_size, inertia_weight=inertia_weight, cognitive_weight=cognitive_weight, social_weight=social_weight, max_velocity=5, model_builder=model_builder) model = model.evolve(X_train, y_train, n_generations=n_generations) loss, accuracy = model.test_on_batch(X_test, y_test) print ("Accuracy: %.1f%%" % float(100*accuracy)) # Reduce dimension to 2D using PCA and plot the results y_pred = np.argmax(model.predict(X_test), axis=1) Plot().plot_in_2d(X_test, y_pred, title="Particle Swarm Optimized Neural Network", accuracy=accuracy, legend_labels=range(y.shape[1]))
def main(): X, y = make_regression(n_samples=100, n_features=1, noise=20) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) n_samples, n_features = np.shape(X) model = LinearRegression(n_iterations=100) model.fit(X_train, y_train) # Training error plot n = len(model.training_errors) training, = plt.plot(range(n), model.training_errors, label="Training Error") plt.legend(handles=[training]) plt.title("Error Plot") plt.ylabel('Mean Squared Error') plt.xlabel('Iterations') plt.show() y_pred = model.predict(X_test) mse = mean_squared_error(y_test, y_pred) print("Mean squared error: %s" % (mse)) y_pred_line = model.predict(X) # Color map cmap = plt.get_cmap('viridis') # Plot the results m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10) m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10) plt.plot(366 * X, y_pred_line, color='black', linewidth=2, label="Prediction") plt.suptitle("Linear Regression") plt.title("MSE: %.2f" % mse, fontsize=10) plt.xlabel('Day') plt.ylabel('Temperature in Celcius') plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right') plt.show()
def main(): data = datasets.load_iris() X = normalize(data.data[data.target != 0]) y = data.target[data.target != 0] y[y == 1] = -1 y[y == 2] = 1 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) clf = SupportVectorMachine(kernel=polynomial_kernel, power=4, coef=1) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results Plot().plot_in_2d(X_test, y_pred, title="Support Vector Machine", accuracy=accuracy)
def main(): data = datasets.load_iris() X = normalize(data.data) y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) clf = KNN(k=5) y_pred = clf.predict(X_test, X_train, y_train) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) # Reduce dimensions to 2d using pca and plot the results Plot().plot_in_2d(X_test, y_pred, title="K Nearest Neighbors", accuracy=accuracy, legend_labels=data.target_names)
def main(): data = datasets.load_digits() X = normalize(data.data) y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) clf = NaiveBayes() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results Plot().plot_in_2d(X_test, y_pred, title="Naive Bayes", accuracy=accuracy, legend_labels=data.target_names)
def main(): # Load the dataset data = datasets.load_iris() X = data.data y = data.target # Three -> two classes X = X[y != 2] y = y[y != 2] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) # Fit and predict using LDA lda = LDA() lda.fit(X_train, y_train) y_pred = lda.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) Plot().plot_in_2d(X_test, y_pred, title="LDA", accuracy=accuracy)
def main(): print("-- Gradient Boosting Regression --") # Load temperature data data = pd.read_csv('sratchml/data/TempLinkoping2016.txt', sep="\t") time = np.atleast_2d(data["time"].values).T temp = np.atleast_2d(data["temp"].values).T X = time.reshape((-1, 1)) # Time. Fraction of the year [0, 1] X = np.insert(X, 0, values=1, axis=1) # Insert bias term y = temp[:, 0] # Temperature. Reduce to one-dim X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) model = GradientBoostingRegressor() model.fit(X_train, y_train) y_pred = model.predict(X_test) y_pred_line = model.predict(X) # Color map cmap = plt.get_cmap('viridis') mse = mean_squared_error(y_test, y_pred) print("Mean Squared Error:", mse) # Plot the results m1 = plt.scatter(366 * X_train[:, 1], y_train, color=cmap(0.9), s=10) m2 = plt.scatter(366 * X_test[:, 1], y_test, color=cmap(0.5), s=10) m3 = plt.scatter(366 * X_test[:, 1], y_pred, color='black', s=10) plt.suptitle("Regression Tree") plt.title("MSE: %.2f" % mse, fontsize=10) plt.xlabel('Day') plt.ylabel('Temperature in Celcius') plt.legend((m1, m2, m3), ("Training data", "Test data", "Prediction"), loc='lower right') plt.show()
def main(): print ("-- Regression Tree --") # Load temperature data data = pd.read_csv('sratchml/data/buffalotemp.txt', sep="\t") time = np.atleast_2d(data["time"].values).T temp = np.atleast_2d(data["temp"].values).T X = standardize(time) # Time. Fraction of the year [0, 1] y = temp[:, 0] # Temperature. Reduce to one-dim X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) model = RegressionTree() model.fit(X_train, y_train) y_pred = model.predict(X_test) y_pred_line = model.predict(X) # Color map cmap = plt.get_cmap('viridis') mse = mean_squared_error(y_test, y_pred) print ("Mean Squared Error:", mse) # Plot the results # Plot the results m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10) m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10) m3 = plt.scatter(366 * X_test, y_pred, color='black', s=10) plt.suptitle("Regression Tree") plt.title("MSE: %.2f" % mse, fontsize=10) plt.xlabel('Day') plt.ylabel('Temperature in Celcius') plt.legend((m1, m2, m3), ("Training data", "Test data", "Prediction"), loc='lower right') plt.show()
def main(): print("-- Classification Tree --") data = datasets.load_iris() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) clf = ClassificationTree() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) Plot().plot_in_2d(X_test, y_pred, title="Decision Tree", accuracy=accuracy, legend_labels=data.target_names)
def main(): data = datasets.load_digits() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=2) clf = RandomForest(n_estimators=100) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) Plot().plot_in_2d(X_test, y_pred, title="Random Forest", accuracy=accuracy, legend_labels=data.target_names)
def main(): data = datasets.load_digits() X = normalize(data.data) y = data.target # Convert the nominal y values to binary y = to_categorical(y) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=1) # MLP clf = MultilayerPerceptron(n_hidden=16, n_iterations=1000, learning_rate=0.01) clf.fit(X_train, y_train) y_pred = np.argmax(clf.predict(X_test), axis=1) y_test = np.argmax(y_test, axis=1) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results Plot().plot_in_2d(X_test, y_pred, title="Multilayer Perceptron", accuracy=accuracy, legend_labels=np.unique(y))
def main(): optimizer = Adam() def gen_mult_ser(nums): """ Method which generates multiplication series """ X = np.zeros([nums, 10, 61], dtype=float) y = np.zeros([nums, 10, 61], dtype=float) for i in range(nums): start = np.random.randint(2, 7) mult_ser = np.linspace(start, start * 10, num=10, dtype=int) X[i] = to_categorical(mult_ser, n_col=61) y[i] = np.roll(X[i], -1, axis=0) y[:, -1, 1] = 1 # Mark endpoint as 1 return X, y def gen_num_seq(nums): """ Method which generates sequence of numbers """ X = np.zeros([nums, 10, 20], dtype=float) y = np.zeros([nums, 10, 20], dtype=float) for i in range(nums): start = np.random.randint(0, 10) num_seq = np.arange(start, start + 10) X[i] = to_categorical(num_seq, n_col=20) y[i] = np.roll(X[i], -1, axis=0) y[:, -1, 1] = 1 # Mark endpoint as 1 return X, y X, y = gen_mult_ser(3000) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) # Model definition clf = NeuralNetwork(optimizer=optimizer, loss=CrossEntropy) clf.add(RNN(10, activation="tanh", bptt_trunc=5, input_shape=(10, 61))) clf.add(Activation('softmax')) clf.summary("RNN") # Print a problem instance and the correct solution tmp_X = np.argmax(X_train[0], axis=1) tmp_y = np.argmax(y_train[0], axis=1) print("Number Series Problem:") print("X = [" + " ".join(tmp_X.astype("str")) + "]") print("y = [" + " ".join(tmp_y.astype("str")) + "]") print() train_err, _ = clf.fit(X_train, y_train, n_epochs=500, batch_size=512) # Predict labels of the test data y_pred = np.argmax(clf.predict(X_test), axis=2) y_test = np.argmax(y_test, axis=2) print() print("Results:") for i in range(5): # Print a problem instance and the correct solution tmp_X = np.argmax(X_test[i], axis=1) tmp_y1 = y_test[i] tmp_y2 = y_pred[i] print("X = [" + " ".join(tmp_X.astype("str")) + "]") print("y_true = [" + " ".join(tmp_y1.astype("str")) + "]") print("y_pred = [" + " ".join(tmp_y2.astype("str")) + "]") print() accuracy = np.mean(accuracy_score(y_test, y_pred)) print("Accuracy:", accuracy) training = plt.plot(range(500), train_err, label="Training Error") plt.title("Error Plot") plt.ylabel('Training Error') plt.xlabel('Iterations') plt.show()
def main(): # Load temperature data data = pd.read_csv('sratchml/data/TempLinkoping2016.txt', sep="\t") time = np.atleast_2d(data["time"].values).T temp = data["temp"].values X = time # fraction of the year [0, 1] y = temp X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) poly_degree = 15 # Finding regularization constant using cross validation lowest_error = float("inf") best_reg_factor = None print("Finding regularization constant using cross validation:") k = 10 for reg_factor in np.arange(0, 0.1, 0.01): cross_validation_sets = k_fold_cross_validation_sets(X_train, y_train, k=k) mse = 0 for _X_train, _X_test, _y_train, _y_test in cross_validation_sets: model = PolynomialRidgeRegression(degree=poly_degree, reg_factor=reg_factor, learning_rate=0.001, n_iterations=10000) model.fit(_X_train, _y_train) y_pred = model.predict(_X_test) _mse = mean_squared_error(_y_test, y_pred) mse += _mse mse /= k # Print the mean squared error print("\tMean Squared Error: %s (regularization: %s)" % (mse, reg_factor)) # Save reg. constant that gave lowest error if mse < lowest_error: best_reg_factor = reg_factor lowest_error = mse # Make final prediction model = PolynomialRidgeRegression(degree=poly_degree, reg_factor=reg_factor, learning_rate=0.001, n_iterations=10000) model.fit(X_train, y_train) y_pred = model.predict(X_test) mse = mean_squared_error(y_test, y_pred) print("Mean squared error: %s (given by reg. factor: %s)" % (mse, reg_factor)) y_pred_line = model.predict(X) # Color map cmap = plt.get_cmap('viridis') # Plot the results m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10) m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10) plt.plot(366 * X, y_pred_line, color='black', linewidth=2, label="Prediction") plt.suptitle("Polynomial Ridge Regression") plt.title("MSE: %.2f" % mse, fontsize=10) plt.xlabel('Day') plt.ylabel('Temperature in Celcius') plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right') plt.show()
def main(): #---------- # Conv Net #---------- optimizer = Adam() data = datasets.load_digits() X = data.data y = data.target # Convert to one-hot encoding y = to_categorical(y.astype("int")) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=1) # Reshape X to (n_samples, channels, height, width) X_train = X_train.reshape((-1, 1, 8, 8)) X_test = X_test.reshape((-1, 1, 8, 8)) clf = NeuralNetwork(optimizer=optimizer, loss=CrossEntropy, validation_data=(X_test, y_test)) clf.add( Conv2D(n_filters=16, filter_shape=(3, 3), stride=1, input_shape=(1, 8, 8), padding='same')) clf.add(Activation('relu')) clf.add(Dropout(0.25)) clf.add(BatchNormalization()) clf.add(Conv2D(n_filters=32, filter_shape=(3, 3), stride=1, padding='same')) clf.add(Activation('relu')) clf.add(Dropout(0.25)) clf.add(BatchNormalization()) clf.add(Flatten()) clf.add(Dense(256)) clf.add(Activation('relu')) clf.add(Dropout(0.4)) clf.add(BatchNormalization()) clf.add(Dense(10)) clf.add(Activation('softmax')) print() clf.summary(name="ConvNet") train_err, val_err = clf.fit(X_train, y_train, n_epochs=50, batch_size=256) # Training and validation error plot n = len(train_err) training, = plt.plot(range(n), train_err, label="Training Error") validation, = plt.plot(range(n), val_err, label="Validation Error") plt.legend(handles=[training, validation]) plt.title("Error Plot") plt.ylabel('Error') plt.xlabel('Iterations') plt.show() _, accuracy = clf.test_on_batch(X_test, y_test) print("Accuracy:", accuracy) y_pred = np.argmax(clf.predict(X_test), axis=1) X_test = X_test.reshape(-1, 8 * 8) # Reduce dimension to 2D using PCA and plot the results Plot().plot_in_2d(X_test, y_pred, title="Convolutional Neural Network", accuracy=accuracy, legend_labels=range(10))
X = normalize(X) print("Dataset: The Digit Dataset (digits %s and %s)" % (digit1, digit2)) # .......................... # DIMENSIONALITY REDUCTION # .......................... pca = PCA() X = pca.transform(X, n_components=5) # Reduce to 5 dimensions n_samples, n_features = np.shape(X) # .......................... # TRAIN / TEST SPLIT # .......................... X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) # Rescaled labels {-1, 1} rescaled_y_train = 2 * y_train - np.ones(np.shape(y_train)) rescaled_y_test = 2 * y_test - np.ones(np.shape(y_test)) # ....... # SETUP # ....... adaboost = Adaboost(n_clf=8) naive_bayes = NaiveBayes() knn = KNN(k=4) logistic_regression = LogisticRegression() mlp = NeuralNetwork(optimizer=Adam(), loss=CrossEntropy) mlp.add(Dense(input_shape=(n_features, ), n_units=64)) mlp.add(Activation('relu')) mlp.add(Dense(n_units=64))