def main(): print("-- XGBoost --") data = datasets.load_iris() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=2) clf = XGBoost(debug=True) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) Plot().plot_in_2d(X_test, y_pred, title="XGBoost", accuracy=accuracy, legend_labels=data.target_names)
def main(): data = datasets.load_digits() X = normalize(data.data) y = data.target # Convert the nominal y values to binary y = to_categorical(y) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=1) # MLP clf = MultilayerPerceptron( n_hidden=16, n_iterations=1, # n_iterations=1000, learning_rate=0.01) clf.fit(X_train, y_train) y_pred = np.argmax(clf.predict(X_test), axis=1) y_test = np.argmax(y_test, axis=1) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy)
def main(): data = datasets.load_digits() X = data.data y = data.target digit1 = 1 digit2 = 8 idx = np.append(np.where(y == digit1)[0], np.where(y == digit2)[0]) y = data.target[idx] # Change labels to {-1, 1} y[y == digit1] = -1 y[y == digit2] = 1 X = data.data[idx] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) # Adaboost classification with 5 weak classifiers clf = Adaboost(n_clf=5) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) # Reduce dimensions to 2d using pca and plot the results Plot().plot_in_2d(X_test, y_pred, title="Adaboost", accuracy=accuracy)
def main(): data = datasets.load_digits() X = normalize(data.data) y = data.target # One-hot encoding of nominal y-values y = to_categorical(y) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=1) # Perceptron clf = Perceptron(n_iterations=5000, learning_rate=0.001, loss=CrossEntropy, activation_function=Sigmoid) clf.fit(X_train, y_train) y_pred = np.argmax(clf.predict(X_test), axis=1) y_test = np.argmax(y_test, axis=1) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results Plot().plot_in_2d(X_test, y_pred, title="Perceptron", accuracy=accuracy, legend_labels=np.unique(y))
def calcPreceptronOrg(self, X_train, X_test, y_train, y_test, file_nm): y = np.concatenate((y_train, y_test), axis=0) y = to_categorical(y) y_train = to_categorical(y_train) y_test = to_categorical(y_test) # Perceptron clf = Perceptron(n_iterations=5000, learning_rate=0.001, loss=SquareLoss, activation_function=Sigmoid) clf.fit(X_train, y_train) y_pred = np.argmax(clf.predict(X_test), axis=1) y_test = np.argmax(y_test, axis=1) accuracy = accuracy_score(y_test, y_pred) # print ("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results Plot().plot_in_2d(X_test, y_pred, title=file_nm, accuracy=accuracy, legend_labels=np.unique(y)) return accuracy
def main(): print("-- XGBoost --") from sklearn import preprocessing from sklearn.preprocessing import LabelEncoder with pyRAPL.Measurement('Read_data', output=csv_output): df = pd.read_csv('/home/gabi/Teste/BaseSintetica/1k_5att.csv') X = df.iloc[:, :-1].values y = df.iloc[:, -1].values X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=2) csv_output.save() clf = XGBoost() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy)
def main(): data = datasets.load_digits() X = normalize(data.data) y = data.target # One-hot encoding of nominal y-values y = to_categorical(y) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=1) # Perceptron clf = Perceptron(n_iterations=5000, learning_rate=0.001, loss=CrossEntropy, activation_function=Sigmoid) clf.fit(X_train, y_train) y_pred = np.argmax(clf.predict(X_test), axis=1) y_test = np.argmax(y_test, axis=1) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results Plot().plot_in_2d(X_test, y_pred, title="Perceptron", accuracy=accuracy, legend_labels=np.unique(y))
def main(): data = datasets.load_digits() X = normalize(data.data) y = data.target # Convert the nominal y values to binary y = to_categorical(y) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=1) # MLP clf = MultilayerPerceptron(n_hidden=16, n_iterations=1000, learning_rate=0.01) clf.fit(X_train, y_train) y_pred = np.argmax(clf.predict(X_test), axis=1) y_test = np.argmax(y_test, axis=1) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results Plot().plot_in_2d(X_test, y_pred, title="Multilayer Perceptron", accuracy=accuracy, legend_labels=np.unique(y))
def main(): data = datasets.load_digits() X = data.data y = data.target digit1 = 1 digit2 = 8 idx = np.append(np.where(y == digit1)[0], np.where(y == digit2)[0]) y = data.target[idx] # Change labels to {-1, 1} y[y == digit1] = -1 y[y == digit2] = 1 X = data.data[idx] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) # Adaboost classification with 5 weak classifiers clf = Adaboost(n_clf=5) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimensions to 2d using pca and plot the results Plot().plot_in_2d(X_test, y_pred, title="Adaboost", accuracy=accuracy)
def main(): # Load dataset data = datasets.load_iris() # 数据清洗. 有点看不懂 X = normalize(data.data[data.target != 0]) # 取出 y = data.target[data.target != 0] y[y == 1] = 0 y[y == 2] = 1 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=1) clf = LogisticRegression(gradient_descent=True) # 逻辑回归的实现 # clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results Plot().plot_in_2d(X_test, y_pred, title="Logistic Regression", accuracy=accuracy)
def main(): data = datasets.load_digits() X = normalize(data.data) y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) clf = NB() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print('Accuracy : {}'.format(accuracy)) Plot().plot_in_2d(X_test, y_pred, title="Naive Bayes", accuracy=accuracy, legend_labels=data.target_names)
def main(): data = datasets.load_iris() X = normalize(data.data) y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) clf = KNN(k=5) y_pred = clf.predict(X_test, X_train, y_train) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimensions to 2d using pca and plot the results Plot().plot_in_2d(X_test, y_pred, title="K Nearest Neighbors", accuracy=accuracy, legend_labels=data.target_names)
def main(): data = datasets.load_digits() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=2) clf = RandomForest(n_estimators=100) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) Plot().plot_in_2d(X_test, y_pred, title="Random Forest", accuracy=accuracy, legend_labels=data.target_names)
def main(): data = datasets.load_iris() X = data.data y = data.target X = X[y != 2] y = y[y != 2] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) lda = LDA() lda.fit(X_train, y_train) y_pred = lda.predict(X_test) accuracy = accuracy_score(X_test, y_pred) print("Accuracy : {}".format(accuracy)) Plot().plot_in_2d(X_test, y_test, title="LDA", accuracy=accuracy)
def main(): data = datasets.load_digits() X = normalize(data.data) y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) clf = NaiveBayes() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results Plot().plot_in_2d(X_test, y_pred, title="Naive Bayes", accuracy=accuracy, legend_labels=data.target_names)
def main(): data = datasets.load_iris() X = normalize(data.data[data.target != 0]) y = data.target[data.target != 0] y[y == 1] = -1 y[y == 2] = 1 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) clf = SupportVectorMachine(kernel=polynomial_kernel, power=4, coef=1) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results Plot().plot_in_2d(X_test, y_pred, title="Support Vector Machine", accuracy=accuracy)
def main(): # Load dataset data = datasets.load_iris() X = normalize(data.data[data.target != 0]) y = data.target[data.target != 0] y[y == 1] = 0 y[y == 2] = 1 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=1) clf = LogisticRegression(gradient_descent=True) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results Plot().plot_in_2d(X_test, y_pred, title="Logistic Regression", accuracy=accuracy)
def main(): print("-- Classification Tree --") with pyRAPL.Measurement('Read_data', output=csv_output): dataset = pd.read_csv('/home/gabi/Teste/BaseSintetica/1k_5att.csv') X = dataset.iloc[:, 0:5].values y = dataset.iloc[:, 5].values X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) csv_output.save() clf = ClassificationTree() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy)
def main(): print("-- Gradient Boosting Classification --") data = datasets.load_iris() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) clf = GradientBoostingClassifier() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) Plot().plot_in_2d(X_test, y_pred, title="Gradient Boosting", accuracy=accuracy, legend_labels=data.target_names)
def main(): data = datasets.load_iris() X = normalize(data.data[data.target != 0]) y = data.target[data.target != 0] y[y == 1] = -1 y[y == 2] = 1 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) clf = SupportVectorMachine(kernel=polynomial_kernel, power=4, coef=1) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results Plot().plot_in_2d(X_test, y_pred, title="Support Vector Machine", accuracy=accuracy)
def main(): data = datasets.load_iris() X = normalize(data.data) y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) clf = NaiveBayes() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results Plot().plot_in_2d(X_test, y_pred, title="Naive Bayes", accuracy=accuracy, legend_labels=data.target_names)
def main(): # Load the dataset data = datasets.load_iris() X = data.data y = data.target # Three -> two classes X = X[y != 2] y = y[y != 2] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) # Fit and predict using LDA lda = LDA() lda.fit(X_train, y_train) y_pred = lda.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) Plot().plot_in_2d(X_test, y_pred, title="LDA", accuracy=accuracy)
def main(): # Load the dataset data = datasets.load_iris() X = data.data y = data.target # Three -> two classes X = X[y != 2] y = y[y != 2] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) # Fit and predict using LDA lda = LDA() lda.fit(X_train, y_train) y_pred = lda.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) Plot().plot_in_2d(X_test, y_pred, title="LDA", accuracy=accuracy)
def main(): print ("-- Classification Tree --") data = datasets.load_iris() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) clf = ClassificationTree() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) Plot().plot_in_2d(X_test, y_pred, title="Decision Tree", accuracy=accuracy, legend_labels=data.target_names)
def main(): print ("-- XGBoost --") data = datasets.load_iris() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=2) clf = XGBoost() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) Plot().plot_in_2d(X_test, y_pred, title="XGBoost", accuracy=accuracy, legend_labels=data.target_names)
y_pred["Logistic Regression"] = logistic_regression.predict(X_test) y_pred["LDA"] = lda.predict(X_test) y_pred["Multilayer Perceptron"] = np.argmax(mlp.predict(X_test), axis=1) y_pred["Perceptron"] = np.argmax(perceptron.predict(X_test), axis=1) y_pred["Decision Tree"] = decision_tree.predict(X_test) y_pred["Random Forest"] = random_forest.predict(X_test) y_pred["Support Vector Machine"] = support_vector_machine.predict(X_test) y_pred["XGBoost"] = xgboost.predict(X_test) # .......... # ACCURACY # .......... print("Accuracy:") for clf in y_pred: # Rescaled {-1 1} if clf == "Adaboost" or clf == "Support Vector Machine": print("\t%-23s: %.5f" % (clf, accuracy_score(rescaled_y_test, y_pred[clf]))) # Categorical else: print("\t%-23s: %.5f" % (clf, accuracy_score(y_test, y_pred[clf]))) # ....... # PLOT # ....... plt.scatter(X_test[:, 0], X_test[:, 1], c=y_test) plt.ylabel("Principal Component 2") plt.xlabel("Principal Component 1") plt.title("The Digit Dataset (digits %s and %s)" % (digit1, digit2)) plt.show()
def main(): optimizer = Adadelta() #----- # MLP #----- data = datasets.load_digits() X = data.data y = data.target # Convert to one-hot encoding y = to_categorical(y.astype("int")) n_samples = np.shape(X) n_hidden = 512 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=1) clf = NeuralNetwork(optimizer=optimizer, loss=CrossEntropy, validation_data=(X_test, y_test)) clf.add(Dense(n_hidden, input_shape=(8 * 8, ))) clf.add(Activation('leaky_relu')) clf.add(Dense(n_hidden)) clf.add(Activation('leaky_relu')) clf.add(Dropout(0.25)) clf.add(Dense(n_hidden)) clf.add(Activation('leaky_relu')) clf.add(Dropout(0.25)) clf.add(Dense(n_hidden)) clf.add(Activation('leaky_relu')) clf.add(Dropout(0.25)) clf.add(Dense(10)) clf.add(Activation('softmax')) print() clf.summary(name="MLP") train_err, val_err = clf.fit(X_train, y_train, n_epochs=50, batch_size=256) # Training and validation error plot n = len(train_err) training, = plt.plot(range(n), train_err, label="Training Error") validation, = plt.plot(range(n), val_err, label="Validation Error") plt.legend(handles=[training, validation]) plt.title("Error Plot") plt.ylabel('Error') plt.xlabel('Iterations') plt.show() # Predict labels of the test data y_pred = np.argmax(clf.predict(X_test), axis=1) y_test = np.argmax(y_test, axis=1) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) # Reduce dimension to 2D using PCA and plot the results Plot().plot_in_2d(X_test, y_pred, title="Multilayer Perceptron", accuracy=accuracy, legend_labels=range(10))
def acc(self, y, p): return accuracy_score(np.argmax(y, axis=1), np.argmax(p, axis=1))
def main(): optimizer = Adam() def gen_mult_ser(nums): """ Method which generates multiplication series """ X = np.zeros([nums, 10, 61], dtype=float) y = np.zeros([nums, 10, 61], dtype=float) for i in range(nums): start = np.random.randint(2, 7) mult_ser = np.linspace(start, start * 10, num=10, dtype=int) X[i] = to_categorical(mult_ser, n_col=61) y[i] = np.roll(X[i], -1, axis=0) y[:, -1, 1] = 1 # Mark endpoint as 1 return X, y def gen_num_seq(nums): """ Method which generates sequence of numbers """ X = np.zeros([nums, 10, 20], dtype=float) y = np.zeros([nums, 10, 20], dtype=float) for i in range(nums): start = np.random.randint(0, 10) num_seq = np.arange(start, start + 10) X[i] = to_categorical(num_seq, n_col=20) y[i] = np.roll(X[i], -1, axis=0) y[:, -1, 1] = 1 # Mark endpoint as 1 return X, y X, y = gen_mult_ser(3000) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) # Model definition clf = NeuralNetwork(optimizer=optimizer, loss=CrossEntropy) clf.add(RNN(10, activation="tanh", bptt_trunc=5, input_shape=(10, 61))) clf.add(Activation('softmax')) clf.summary("RNN") # Print a problem instance and the correct solution tmp_X = np.argmax(X_train[0], axis=1) tmp_y = np.argmax(y_train[0], axis=1) print("Number Series Problem:") print("X = [" + " ".join(tmp_X.astype("str")) + "]") print("y = [" + " ".join(tmp_y.astype("str")) + "]") print() train_err, _ = clf.fit(X_train, y_train, n_epochs=500, batch_size=512) # Predict labels of the test data y_pred = np.argmax(clf.predict(X_test), axis=2) y_test = np.argmax(y_test, axis=2) print() print("Results:") for i in range(5): # Print a problem instance and the correct solution tmp_X = np.argmax(X_test[i], axis=1) tmp_y1 = y_test[i] tmp_y2 = y_pred[i] print("X = [" + " ".join(tmp_X.astype("str")) + "]") print("y_true = [" + " ".join(tmp_y1.astype("str")) + "]") print("y_pred = [" + " ".join(tmp_y2.astype("str")) + "]") print() accuracy = np.mean(accuracy_score(y_test, y_pred)) print("Accuracy:", accuracy) training = plt.plot(range(500), train_err, label="Training Error") plt.title("Error Plot") plt.ylabel('Training Error') plt.xlabel('Iterations') plt.show()
y_pred["LDA"] = lda.predict(X_test) y_pred["Multilayer Perceptron"] = np.argmax(mlp.predict(X_test), axis=1) y_pred["Perceptron"] = np.argmax(perceptron.predict(X_test), axis=1) y_pred["Decision Tree"] = decision_tree.predict(X_test) y_pred["Random Forest"] = random_forest.predict(X_test) y_pred["Support Vector Machine"] = support_vector_machine.predict(X_test) y_pred["XGBoost"] = xgboost.predict(X_test) # .......... # ACCURACY # .......... print ("Accuracy:") for clf in y_pred: # Rescaled {-1 1} if clf == "Adaboost" or clf == "Support Vector Machine": print ("\t%-23s: %.5f" %(clf, accuracy_score(rescaled_y_test, y_pred[clf]))) # Categorical else: print ("\t%-23s: %.5f" %(clf, accuracy_score(y_test, y_pred[clf]))) # ....... # PLOT # ....... plt.scatter(X_test[:,0], X_test[:,1], c=y_test) plt.ylabel("Principal Component 2") plt.xlabel("Principal Component 1") plt.title("The Digit Dataset (digits %s and %s)" % (digit1, digit2)) plt.show()
def acc(self, y_true, y_pred): # return self.logprob return accuracy_score(y_true, self.ypred)