def ex_2_2(input1, target1, input2, target2): ## TODO scores = [] scores_train = [] classifiers = [] for i in range(10): classifier = MLPClassifier(hidden_layer_sizes=(20, ), solver="adam", max_iter=1000, activation="tanh", random_state=i) classifier.fit(input1, target1[:, 0]) scores.append(classifier.score(input2, target2[:, 0])) classifiers.append(classifier) scores_train.append(classifier.score(input1, target1[:, 0])) conf_mat = confusion_matrix(target2[:, 0], classifiers[np.argmax(scores)].predict(input2)) plot_histogram_of_acc(scores_train, scores) #plot_histogram_of_acc(classifiers[np.argmax(scores)], classifier.score(input2, target2[:, 0])) #plot_histogram_of_acc(classifier.score(input1, target1[:,0]), classifier.score(input2, target2[:,0])) predected_target = classifier.predict(input2) misclassified_images = [] for i in range(len(target2[:, 0])): if target2[:, 0][i] != predected_target[i]: misclassified_images.append(input2[i]) for i in range(len(misclassified_images)): plot_image(misclassified_images[i]) pass
def ex_2_1(input2, target2): ''' • Write code to train a feed-forward neural network with 1 hidden layers containing 6 hidden units for pose recognition. Use dataset2 for training after normalization, ‘adam’ as the training solver and train for 200 iterations. • Calculate the confusion matrix • Plot the weights between each input neuron and the hidden neurons to visualize what the network has learnt in the first layer. inote Use scikit-learn’s confusion_matrix function to to calculate the confusion matrix. Documentation for this can be found here inote You can use the coefs_ attribute of the model to read the weights. It is a list of length nlayers − 1 where the ith element in the list represents the weight matrix corresponding to layer i. inote Use the plot_hidden_layer_weights in nn_classification_plot.py to plot the hidden weights. ''' # dataset2 = normalize(input2) already done by main x_train = input2 y_train = target2[:, 1] # print(y_train) nn = MLPClassifier(solver='adam', activation='tanh', max_iter=200, hidden_layer_sizes=(6, )) nn.fit(x_train, y_train) cm = confusion_matrix(y_train, nn.predict(x_train)) plot_hidden_layer_weights(nn.coefs_[0]) print(cm) pass
def ex_2_1(input2, target2): """ Solution for exercise 2.1 :param input2: The input from dataset2 :param target2: The target from dataset2 :return: """ # parse target2 2nd column pose2 = [] for target in target2: pose2.append(target[1]) mlp = MLPClassifier(activation='tanh', hidden_layer_sizes=6) print("===========fit started===========") mlp.fit(input2, pose2) print("===========fit finished===========") print("classes_: ", mlp.classes_) print("n_layers_: ", mlp.n_layers_) plot_hidden_layer_weights(mlp.coefs_[0]) print("===========predict started===========") prediction = mlp.predict(input2) print("===========predict finished===========") cnf_matrix = confusion_matrix(pose2, prediction) print(cnf_matrix) return
def ex_2_1(input2, target2): """ Solution for exercise 2.1 :param input2: The input from dataset2 :param target2: The target from dataset2 :return: """ ## TODO - done hidden_units = 6 nn = MLPClassifier(activation=ACTIVATION, solver='adam', hidden_layer_sizes=(hidden_units, ), max_iter=200) pose = target2[:, 1] nn.fit(input2, pose) # using index 0 because of just one hidden layer hidden_layer_weights = nn.coefs_[0] y_pred = nn.predict(input2) matrix = confusion_matrix(pose, y_pred) print("The Confusion Matrix we obtained: \n" + str(matrix)) plot_hidden_layer_weights(hidden_layer_weights)
def main(): X = [[0.,1.,0.,1.,0.,0.], [1.,0.,0., 1., 1., 0.]] y = [0,15] clf = MLPClassifier(alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1) clf.partial_fit(X, y) res = clf.predict([[1., 1., 0., 1., 1., 1.], [0, 0, 1 ,1, 1.,0]]) print("res",res)
def ex_2_1(input2, target2): """ Solution for exercise 2.1 :param input2: The input from dataset2 :param target2: The target from dataset2 :return: """ #declaring variables used for MLPClassifier hidden_layers = 6 solver_mode = 'adam' activation_mode = 'tanh' max_iter = 200 cf = MLPClassifier(hidden_layer_sizes=(hidden_layers, ), solver=solver_mode, activation=activation_mode, max_iter=max_iter) #training the classifier cf.fit(input2, target2[:, 1]) #calculate y_predicted and y_true for confusion matrix calculation #printing confusion matrix print(confusion_matrix(target2[:, 1], cf.predict(input2))) #plotting the hidden layer weights plot_hidden_layer_weights(cf.coefs_[0]) pass
def train(self, labeledDoc): """ Entrena el modelo final de clasificacion :param labeledDoc: objeto labeledDoc :return: True si todo correcto, Raise exception si fallo """ if self.save_loc == None: raise UnboundLocalError("Should have set the save path <setSaveLocation>") if self.dependenceModel == None: raise UnboundLocalError("Should have set the TextProcessing.Doc2Vec model <setDependenceModel>") tags_id = {} Y = [] X = [] for doc in labeledDoc: for tag in doc.tags[1:]: if tag not in tags_id: tags_id[tag] = len(tags_id) labeledDoc.reloadDoc() for doc in labeledDoc: tags = doc.tags text = doc.words auxY = np.zeros(len(tags_id)) for tag in tags[1:]: auxY[tags_id[tag]] = 1. Y.append(auxY) vecX = self.dependenceModel.predict(text)[0] X.append(vecX) Y = np.array(Y) X = np.array(X) clf = MLPClassifier(algorithm='l-bfgs', alpha=1e-5, hidden_layer_sizes=(15,), random_state=1) clf.fit(X, Y) print clf.predict(X) joblib.dump(clf, self.save_loc) with open(self.save_loc+"_tags_id", "w") as fout: fout.write(json.dumps(tags_id))
def ex_2_1(input2, target2): ## TODO classifier = MLPClassifier(hidden_layer_sizes=(6, ), solver="adam", max_iter=200, activation="tanh") classifier.fit(input2, target2[:, 1]) con_mat = confusion_matrix(target2[:, 1], classifier.predict(input2)) plot_hidden_layer_weights(classifier.coefs_[0])
def neural_net_2(train, test, val, train_out, test_out, val_out, BigSigma_inv): clf = MLPClassifier(solver='sgd', alpha=1e-5, hidden_layer_sizes=(100, 1), activation='logistic', batch_size=BATCH_HUMAN, shuffle=True, max_iter=5000) scaler = StandardScaler() scaler.fit(train) train1 = scaler.transform(train) # apply same transformation to test data test = scaler.transform(test) train_out = train_out.astype(float) clf.fit(X=train1, y=train_out) predict_test = clf.predict(test) predict_val = clf.predict(val) print("TEST ERMS ACCURACY", mean_squared_error(test_out, predict_test), acc_manual(test_out, predict_test)) print("VAL ERMS ACCURACY", mean_squared_error(val_out, predict_val), acc_manual(val_out, predict_test))
def ex_2_2(input1, target1, input2, target2): """ Solution for exercise 2.2 :param input1: The input from dataset1 :param target1: The target from dataset1 :param input2: The input from dataset2 :param target2: The target from dataset2 :return: """ #declaring variables used for MLPClassifier hidden_layers = 20 solver_mode = 'adam' activation_mode = 'tanh' max_iter = 1000 max_accuracy = 0.0 train_accuracy = [] test_accuracy = [] cfn = [] m = 0 for m in range(10): cf = MLPClassifier(hidden_layer_sizes=(hidden_layers, ), activation=activation_mode, solver=solver_mode, random_state=m, max_iter=max_iter) cf.fit(input1, target1[:, 0]) train_accuracy.append(cf.score(input1, target1[:, 0])) current_test_accuracy = cf.score(input2, target2[:, 0]) test_accuracy.append(current_test_accuracy) plot_histogram_of_acc(train_accuracy[m], test_accuracy[m]) if current_test_accuracy > max_accuracy: cfn = confusion_matrix(target2[:, 0], cf.predict(input2)) max_accuracy = current_test_accuracy print(cfn) #plot_histogram_of_acc(train_accuracy, test_accuracy) #plot_random_images(input2) pass
def ex_2_1(input2, target2): """ Solution for exercise 2.1 :param input2: The input from dataset2 :param target2: The target from dataset2 :return: """ ## TODO pose = target2[:,1] nn = MLPClassifier(hidden_layer_sizes=(6,) ,activation='tanh', max_iter=200) nn.fit(input2, pose) y_pred = nn.predict(input2) C = confusion_matrix(pose, y_pred, labels=None, sample_weight=None) plot_hidden_layer_weights(nn.coefs_[0]) return C
def ex_2_2(input1, target1, input2, target2): """ Solution for exercise 2.2 :param input1: The input from dataset1 :param target1: The target from dataset1 :param input2: The input from dataset2 :param target2: The target from dataset2 :return: """ n = 10 train_acc = np.zeros(n) test_acc = np.zeros(n) pred_test = np.zeros((n, 564)) coefs = np.zeros((n, 960, 20)) #print(min(target1[:,0]), max(target1[:,0])) # we have 20 person for i in range(n): classifier = MLPClassifier(hidden_layer_sizes=(20, ), activation='tanh', solver='adam', max_iter=5000, random_state=i) classifier.fit(input1, target1[:, 0]) pred_test[i] = classifier.predict(input2) coefs[i] = classifier.coefs_[0] train_acc[i] = classifier.score(input1, target1[:, 0]) test_acc[i] = classifier.score(input2, target2[:, 0]) error = pred_test[1] - target2[:, 0] for j in range(len(error)): if (error[j] != 0): print(j) plot_random_images(np.row_stack((input2[175, :], input2[184, :]))) plot_random_images(np.row_stack((input2[210, :], input2[134, :]))) plot_random_images(np.row_stack((input2[223, :], input2[177, :]))) plot_random_images(np.row_stack((input2[179, :], input2[186, :]))) plot_histogram_of_acc(train_acc, test_acc) # best network with seed i=1 confmat = confusion_matrix(target2[:, 0], pred_test[1]) print(confmat) pass
def ex_2_2(input1, target1, input2, target2): """ Solution for exercise 2.2 :param input1: The input from dataset1 :param target1: The target from dataset1 :param input2: The input from dataset2 :param target2: The target from dataset2 :return: """ train = input1 test = input2 target_train = target1[:, 1] target_test = target2[:, 1] ## TODO n_hidden_neurons = 20 accu_list_train = np.zeros((10,1)) accu_list_test = np.zeros((10, 1)) # Find the best seed for seed in range(10): nn = MLPClassifier(activation='tanh', solver='adam', max_iter=1000, hidden_layer_sizes=(n_hidden_neurons,), random_state=seed) nn.fit(train, target_train) accu_list_train[seed] = nn.score(train, target_train) accu_list_test[seed] = nn.score(test, target_test) print(accu_list_train) print(accu_list_test) # Compute NN weights with the best seed best_seed = np.argmax(accu_list_train) best_nn = nn = MLPClassifier(activation='tanh', solver='adam', max_iter=1000, hidden_layer_sizes=(n_hidden_neurons,),random_state=best_seed) best_nn.fit(train, target_train) # Evaluate the confusion matrix with best NN predictions = nn.predict(test) C = confusion_matrix(target_test, predictions) print(C) # Plot results plot_histogram_of_acc(accu_list_train, accu_list_test) print(accu_list_test) # Find misclassified images comp_array = target_test - predictions comp_vector2 = np.nonzero(comp_array)
def ex_2_1(input2, target2): """ Solution for exercise 2.1 :param input2: The input from dataset2 :param target2: The target from dataset2 :return: """ ## TODO n_hidden_neurons = 6 nn = MLPClassifier(activation='tanh', solver='adam', max_iter=200, hidden_layer_sizes=(n_hidden_neurons,)) target = target2[:,2] ## Train the network nn.fit(input2, target) predictions = nn.predict(input2) C=confusion_matrix(target,predictions) hidden_layer_weights = nn.coefs_[0] plot_hidden_layer_weights(hidden_layer_weights) print(C)
def ex_2_1(input2, target2): """ Solution for exercise 2.1 :param input2: The input from dataset2 :param target2: The target from dataset2 :return: """ classifier = MLPClassifier(hidden_layer_sizes=(6, ), activation='tanh', solver='adam', max_iter=200) classifier.fit(input2, target2[:, 1]) pred2 = classifier.predict(input2) confmat = confusion_matrix(target2[:, 1], pred2) coefs = classifier.coefs_ print(confmat) plot_hidden_layer_weights(coefs[0]) ## TODO pass
class MLPClassifierImpl(): def __init__(self, hidden_layer_sizes=(100,), activation='relu', solver='adam', alpha=0.0001, batch_size='auto', learning_rate='constant', learning_rate_init=0.001, power_t=0.5, max_iter=200, shuffle=True, random_state=None, tol=0.0001, verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08, n_iter_no_change=10): self._hyperparams = { 'hidden_layer_sizes': hidden_layer_sizes, 'activation': activation, 'solver': solver, 'alpha': alpha, 'batch_size': batch_size, 'learning_rate': learning_rate, 'learning_rate_init': learning_rate_init, 'power_t': power_t, 'max_iter': max_iter, 'shuffle': shuffle, 'random_state': random_state, 'tol': tol, 'verbose': verbose, 'warm_start': warm_start, 'momentum': momentum, 'nesterovs_momentum': nesterovs_momentum, 'early_stopping': early_stopping, 'validation_fraction': validation_fraction, 'beta_1': beta_1, 'beta_2': beta_2, 'epsilon': epsilon, 'n_iter_no_change': n_iter_no_change} def fit(self, X, y=None): self._sklearn_model = SKLModel(**self._hyperparams) if (y is not None): self._sklearn_model.fit(X, y) else: self._sklearn_model.fit(X) return self def predict(self, X): return self._sklearn_model.predict(X) def predict_proba(self, X): return self._sklearn_model.predict_proba(X)
def compute(self): # Iterate Leave-One-Out Index over all vectors actual_matrix = self.get_actual_data_matrix() for params_list_index in range(len(self._params_list)): params = self._params_list[params_list_index] current_params_result = self._params_result_list[params_list_index] for loo_index in range(self.get_vector_count()): # Prepare data and labels for current leave one out train_data = [[ 0 for x in range(self.get_actual_feature_count()) ] for y in range(self.get_vector_count() - 1)] train_labels = [ 0 for x in range(0, self.get_vector_count() - 1) ] test_data = [[ 0 for x in range(0, self.get_actual_feature_count()) ] for y in range(1)] test_labels = [0 for x in range(1)] y1 = 0 for y in range(self.get_vector_count()): if (y != loo_index): for x in range(self.get_actual_feature_count()): train_data[y1][x] = actual_matrix[y][x] train_labels[y1] = self._labels[y] y1 = y1 + 1 for x in range(self.get_actual_feature_count()): test_data[0][x] = actual_matrix[loo_index][x] test_labels[0] = self._labels[loo_index] #clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1) clf = MLPClassifier(**params) clf.fit(train_data, train_labels) res = clf.predict(test_data) current_params_result.predicted_labels[loo_index] = res[0] #print(repr(self.get_labels()[loo_index])+"\t"+repr(res[0])) self._commit_params_computation(params_list_index) self._complete_computation()
#from sklearn.cross_validation import train_test_split from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler iris = datasets.load_iris() data = iris.data labels = iris.target data_train, data_test, labels_train, labels_test = train_test_split( data, labels, test_size=0.5, random_state=1) scaler = StandardScaler() scaler.fit(data) data_train_std = scaler.transform(data_train) data_test_std = scaler.transform(data_test) data_train = data_train_std data_test = data_test_std # We add max_iter=1000 becaue the default is max_iter=200 and # it is not enough for full convergence mlp = MLPClassifier(random_state=1, max_iter=1000) mlp.fit(data, labels) mlp.fit(data_train, labels_train) pred = mlp.predict(data_test) print() print('Misclassified samples: %d' % (labels_test != pred).sum()) print('Accuracy: %.2f' % accuracy_score(labels_test, pred))
from sklearn.neural_network.multilayer_perceptron import MLPClassifier from sklearn import datasets from sklearn.metrics import accuracy_score iris = datasets.load_iris() data = iris.data labels = iris.target # We add max_iter=1000 becaue the default is max_iter=200 and # it is not enough for full convergence mlp = MLPClassifier(random_state=1, max_iter=1000) mlp.fit(data, labels) pred = mlp.predict(data) print() print('Accuracy: %.2f' % accuracy_score(labels, pred))
df = encode_data(df) df = delete_columns(df) df, label = seperate_label(df) df, scaler = scale_columns(df) pickle.dump(scaler, open('./scaler.model', 'wb')) x_train, x_test, y_train, y_test = train_test_split(df, label, test_size=.5) # classifier=tree.DecisionTreeClassifier() # classifier.fit(x_train,y_train) # predictions=classifier.predict(x_test) classifier = MLPClassifier() classifier.fit(x_train, y_train) predictions = classifier.predict(x_test) print("Accuracy:", accuracy_score(y_test, predictions)) pickle.dump(classifier, open("model.model", 'wb')) print( "Training completed. \nModel dumped succesfully..\n -----------------------" ) ###############Evaluating################# data = pd.read_csv("ITData_eval-unlabeled.csv") data.columns = header df2 = data.drop(['Satisfaction'], axis=1) df2 = encode_data(df2)
def classify_mlp(data_path): result_path = '%s/mlp_results.txt' % os.path.abspath( os.path.join(os.path.dirname(data_path), os.path.join(os.pardir, os.pardir))) if os.path.exists(result_path): if data_path in open(result_path).read(): return True print(data_path) fname = "{}/train_labels.csv".format(data_path) if not os.path.exists(fname): return True tr_labels = np.loadtxt(fname) fname = "{}/train_embeddings.csv".format(data_path) tr_embeddings = np.loadtxt(fname) fname = "{}/val_labels.csv".format(data_path) val_labels = np.loadtxt(fname) fname = "{}/val_embeddings.csv".format(data_path) val_embeddings = np.loadtxt(fname) fname = "{}/test_labels.csv".format(data_path) te_labels = np.loadtxt(fname) fname = "{}/test_embeddings.csv".format(data_path) te_embeddings = np.loadtxt(fname) clf = MLPClassifier(random_state=2, max_iter=200000000, hidden_layer_sizes=(64, )) clf.fit(tr_embeddings, tr_labels) tr_score = clf.score(tr_embeddings, tr_labels) val_score = clf.score(val_embeddings, val_labels) te_score = clf.score(te_embeddings, te_labels) tr_predictions = clf.predict(tr_embeddings) val_predictions = clf.predict(val_embeddings) te_predictions = clf.predict(te_embeddings) tr_fscore = f1_score(tr_predictions, tr_labels, average="weighted") val_fscore = f1_score(val_predictions, val_labels, average="weighted") te_fscore = f1_score(te_predictions, te_labels, average="weighted") print("tr_score %s" % tr_score) print("val_score %s" % val_score) print("te_score %s" % te_score) with open(result_path, mode='a') as f: f.write( 'Data Path: %s\tTrain Accuracy:%s\tVal Accuracy:%s\tTest Accuracy:%s\tTrain FScore:%s\tVal FScore:%s\tTest FScore:%s\n' % (data_path, tr_score, val_score, te_score, tr_fscore, val_fscore, te_fscore)) conf_mat = confusion_matrix(te_labels, te_predictions) labels = sorted(list(set(list(te_labels)))) plot_confusion_matrix(conf_mat, classes=labels, normalize=True, title='Normalized confusion matrix', output=data_path, path_name='mlp_confusion_matrix', alg='mlp')
if __name__ == '__main__': np.random.seed(100) nn = NeuralNetwork([2, 2, 1]) X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]) y = np.array([0, 1, 1, 0]) nn.fit(X, y, learning_rate=0.1, epochs=1000) print("Final prediction") for s in X: print(s, nn.predict(s)) mlp = MLPClassifier(random_state=1) mlp.fit(X, y) data = X markers = ('s', '*', '^') colors = ('blue', 'green', 'red') cmap = ListedColormap(colors) x_min, x_max = data[:, 0].min() - 1, data[:, 0].max() + 1 y_min, y_max = data[:, 1].min() - 1, data[:, 1].max() + 1 resolution = 0.01 x, y = np.meshgrid(np.arange(x_min, x_max, resolution), np.arange(y_min, y_max, resolution)) Z = mlp.predict(np.array([x.ravel(), y.ravel()]).T) Z = Z.reshape(x.shape) plt.pcolormesh(x, y, Z, cmap=cmap) plt.xlim(x.min(), x.max())
#fit only to the training data scaler.fit(X) StandardScaler(copy=True, with_mean=True, with_std=True) #now apply the transformations to the data: x_train_nn = scaler.transform(X) x_test_nn = scaler.transform(X_test) nn = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1) print(nn.fit(x_train_nn, y)) print('Neural network model:') nn_pred_test = nn.predict(x_test_nn) #compute confusion matrix from sklearn import metrics #pred_obj = np.where(predictions==predictions[0],'N','Y') #print(pred_obj) cnf_matrix = metrics.confusion_matrix(y_test, nn_pred_test) print(cnf_matrix) #compute roc cureve import matplotlib.pyplot as plt y_pred_proba = nn.predict_proba(X_test)[::, 1] y_binary = np.where(y == 'N', 0, 1) fpr, tpr, _ = metrics.roc_curve(y_binary, y_pred_proba) auc = metrics.roc_auc_score(y_binary, y_pred_proba) plt.plot(fpr, tpr, label="data 1, auc=" + str(auc))
svd.fit(fea_data_set) x_new=svd.fit_transform(fea_data_set) # pca=PCA(n_components=30) # pca.fit(fea_data_set) # x_new=pca.transform(fea_data_set) xtrain,xtest,ytrain,ytest=train_test_split(x_new,label,test_size=0.2) lg.fit(xtrain,ytrain) nb.fit(xtrain,ytrain) forest.fit(xtrain,ytrain) SVM.fit(xtrain,ytrain) mlp.fit(xtrain,ytrain) print("------------") print(lg.score(xtest,ytest)) print(np.mean(lg.predict(xtest)-ytest)**2) print(lg.score(xtrain,ytrain)) print(np.mean(lg.predict(xtrain)-ytrain)**2) print("------------") print(nb.score(xtest,ytest)) print(np.mean(nb.predict(xtest)-ytest)**2) print(forest.score(xtest,ytest)) print(np.mean((forest.predict(xtest)-ytest)**2)) print(SVM.score(xtest,ytest)) print(np.mean((SVM.predict(xtest)-ytest)**2)) print(mlp.score(xtest,ytest)) print(np.mean((mlp.predict(xtest)-ytest)**2)) #训练了4个模型,分别是测试集为80%,70%,50%,30%的效果 joblib.dump(lg,"lg3.m") joblib.dump(nb,"nb3.m") joblib.dump(forest,"rf3.m") joblib.dump(SVM,"svm3.m") joblib.dump(mlp,"mlp3.m")
def classify(data_path, path=None, counter=None, alg='svm'): out = os.path.join(data_path, '%s_%s_%s' % (alg, path, 'confusion.png')) if os.path.exists(out): return True fname = "{}/labels.csv".format(data_path) paths = pd.read_csv(fname, header=None).as_matrix()[:, 1] paths = map(os.path.basename, paths) # Get the filename. # Remove the extension. paths = map(lambda x: x.split(".")[0], paths) paths = np.array(map(lambda path: os.path.splitext(path)[0], paths)) fname = "{}/reps.csv".format(data_path) rawEmbeddings = pd.read_csv(fname, header=None).as_matrix() # print(rawEmbeddings.shape, paths.shape) folds = cross_validation.KFold(n=len(rawEmbeddings), random_state=1, n_folds=10, shuffle=True) scores = [] fscores_weighted, fscores_macro, fscores_micro = [], [], [] for idx, (train, test) in enumerate(folds): print idx, alg if alg == 'knn': clf = neighbors.KNeighborsClassifier(1) elif alg == 'svm': clf = svm.SVC(kernel='linear', C=1, max_iter=200000000) # clf = svm.LinearSVC() # clf = svm.SVC(kernel="poly", degree=5, C=1, verbose=10) elif alg == 'nn': # clf = MLPClassifier(random_state=2, max_iter=200000000) clf = MLPClassifier(random_state=2, max_iter=200000000, hidden_layer_sizes=(96, 64, 32)) elif alg == 'nnd': # clf = MLPClassifier(random_state=2, max_iter=200000000) clf = MLPClassifier(random_state=2, max_iter=200000000) elif alg == 'poly': clf = svm.SVC(kernel="poly", max_iter=200000000) elif alg == 'rf': clf = RandomForestClassifier() clf.fit(rawEmbeddings[train], paths[train]) gc.collect() score = clf.score(rawEmbeddings[test], paths[test]) # print score, alg scores.append(score) prediction = clf.predict(rawEmbeddings[test]) fscore_weighted = f1_score(paths[test], prediction, average="weighted") fscores_weighted.append(fscore_weighted) fscore_macro = f1_score(paths[test], prediction, average="macro") fscores_macro.append(fscore_macro) fscore_micro = f1_score(paths[test], prediction, average="micro") fscores_micro.append(fscore_micro) accuracy_dir = os.path.abspath( os.path.join(data_path, 'accuracies_%s.txt' % alg)) with open(accuracy_dir, "wb") as file: for i in scores: file.writelines("%s,%s\n" % (str(i), str(counter))) # print "KNN Avg. score %s" % (reduce(operator.add, scores) / len(folds)) # print "MLP Avg. score %s" % (reduce(operator.add, scores3) / len(folds)) print "Avg. score %s" % (reduce(operator.add, scores) / len(folds)), data_path result_path = "{}/{}_{}.log".format( os.path.abspath( os.path.join(os.path.join(data_path, os.pardir), os.pardir)), path, alg) with open(result_path, "a") as file: file.write("%s,\t%s\t%s\n" % (str( (reduce(operator.add, scores) / len(folds))), str(counter), alg)) fscores_weighted_result_path = "{}/{}_{}_fscores_weighted.log".format( os.path.abspath( os.path.join(os.path.join(data_path, os.pardir), os.pardir)), path, alg) with open(fscores_weighted_result_path, "a") as file: file.write("%s,\t%s\t%s\n" % (str( (reduce(operator.add, fscores_weighted) / len(folds))), str(counter), alg)) fscores_macro_result_path = "{}/{}_{}_fscores_macro.log".format( os.path.abspath( os.path.join(os.path.join(data_path, os.pardir), os.pardir)), path, alg) with open(fscores_macro_result_path, "a") as file: file.write("%s,\t%s\t%s\n" % (str( (reduce(operator.add, fscores_macro) / len(folds))), str(counter), alg)) fscores_micro_result_path = "{}/{}_{}_fscores_micro.log".format( os.path.abspath( os.path.join(os.path.join(data_path, os.pardir), os.pardir)), path, alg) with open(fscores_micro_result_path, "a") as file: file.write("%s,\t%s\t%s\n" % (str( (reduce(operator.add, fscores_micro) / len(folds))), str(counter), alg))
from sklearn.preprocessing import StandardScaler scaler= StandardScaler() #fit only to the training data scaler.fit(X_train) StandardScaler(copy=True, with_mean=True, with_std=True) #now apply the transformations to the data: x_train_nn = scaler.transform(X_train) x_test_nn = scaler.transform(X_test) nn = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1) print(nn.fit(x_train_nn,y_train)) print('Neural network model:') nn_pred_train = nn.predict(x_train_nn) print('train data model estimation') #Mean absolute error print('MAE: ') print(mean_absolute_error(y_train, nn_pred_train)) #ROOTMEAN SUARED ERROR print('RMS: ') print(sqrt(mean_squared_error(y_train, nn_pred_train))) #R-squared score of this model print('R2: ') print(r2_score(y_train, nn_pred_train)) #MAPE
def ex_2_2(input1, target1, input2, target2): ''' • Write code to train a feed-forward neural network with 1 hidden layer containing 20 hidden units for recognising the individuals. Use dataset1 for training, ‘adam’ as the training solver and train for 1000 iterations. Use dataset2 as the test set. • Repeat the process 10 times starting from a different initial weight vector and plot the histogram for the resulting accuracy on the training and on the test set (the accuracy is proportion of correctly classified samples and it is computed with the method score of the classifier). • Use the best network (with maximal accuracy on the test set) to calculate the confusion matrix for the test set. • Plot a few misclassified images. ''' x_train = input1 y_train = target1[:, 0] x_test = input2 y_test = target2[:, 0] seeds = np.array(range(1, 11)) train_accs = [] test_accs = [] max_acc = -1 for index_seed, seed in np.ndenumerate(seeds): nn = MLPClassifier(solver='adam', activation='tanh', max_iter=1000, hidden_layer_sizes=(20, ), random_state=seed) nn.fit(x_train, y_train) train_acc = accuracy_score(y_train, nn.predict(x_train)) train_accs.append(train_acc) test_acc = accuracy_score(y_test, nn.predict(x_test)) test_accs.append(test_acc) if test_acc > max_acc: max_acc = test_acc best_nn = nn plot_histogram_of_acc(train_accs, test_accs) cm = confusion_matrix(y_test, best_nn.predict(x_test)) prediction = best_nn.predict(x_test) misclassified = np.where(y_test != prediction) print(cm) limit = 8 i = 0 for mc_index in misclassified[0]: if i < limit: fig, plts = plt.subplots(1, 2) plts[0].set_title("Predicted Person " + str(prediction[mc_index])) plts[0].imshow(input2[prediction[mc_index]].reshape(*IMAGE_DIM).T, cmap=plt.cm.gray) plts[0].set_xticks(()) plts[0].set_yticks(()) plts[1].set_title("Should be Person " + str(y_test[mc_index])) plts[1].imshow(input2[y_test[mc_index]].reshape(*IMAGE_DIM).T, cmap=plt.cm.gray) plts[1].set_xticks(()) plts[1].set_yticks(()) plt.show() i = i + 1 pass
kf = KFold(10, shuffle=True, random_state=None) print(" 10-fold Cross-Validation training and testing \n") i = 1 tableResults = [] tableResults=[] NN = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(10), random_state=1) for trainIndex, testIndex in kf.split(x): print(" ============== Fold ", i, "============\n") trainDocs, testDocs = x[trainIndex], x[testIndex] trainCats, testCats = y[trainIndex], y[testIndex] NN.fit(trainDocs, trainCats) pred = NN.predict(testDocs) accuracy = accuracy_score(testCats, pred) recall = recall_score(testCats, pred, average='weighted') precision = precision_score(testCats, pred, average='weighted') f1 = f1_score(testCats, pred, average='weighted') tableResults.append({'model': 'NN', 'accuracy': accuracy, 'recall': recall, 'precision': precision, 'f1': f1}) i+=1 joblib.dump(NN, 'NN_saved/' + pathname.split('/')[-1]+'.model') NN = joblib.load('NN_saved/' + pathname.split('/')[-1]+'.model') measures = ['precision', 'recall', 'accuracy', 'f1'] with open('neuralNN' + pathname.replace('/', '_')+'.csv', 'w') as f: writer = csv.writer(f, delimiter=',') df = pd.DataFrame(tableResults) filt = pd.pivot_table(df, values=['precision', 'recall', 'accuracy', 'f1'], index=['model']) print(" Results") print(filt)
# from warnings import warn import numpy as np from data_utils import * from sklearn.neural_network.multilayer_perceptron import MLPClassifier data = gather_and_clean_data() X = data[:, 0:-1] y = data[:, -1] MClass = MLPClassifier() MClass.fit(X, y) pred = MClass.predict(X) score = MClass.score(X, y) print(f"Pred: {pred}") print(f"Score: {score}")
from sklearn.neural_network.multilayer_perceptron import MLPClassifier from sklearn import datasets from sklearn.metrics import accuracy_score from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler # Iris iris = datasets.load_iris() X = iris.data Y = iris.target X_train, X_test, Y_train, Y_test = \ train_test_split(X, Y, test_size=0.3, random_state=1) sc = StandardScaler() sc.fit(X) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) mlp = MLPClassifier(max_iter=200, random_state=1, hidden_layer_sizes=(200, 100)) mlp.fit(X_train_std, Y_train) Y_pred = mlp.predict(X_test_std) print('Misclassified samples: %d' % (Y_test != Y_pred).sum()) print('Accuracy: %.2f' % accuracy_score(Y_test, Y_pred)) # pred_prob = mlp.predict_proba(X_test_std) # print('Probility:{0}'.format(pred_prob))
X_train, X_test, y_train, y_test = train_test_split(examplesMatrix, Y_vector, test_size=0.2) print("Training...") # Commented code for several models: model = MLPClassifier(hidden_layer_sizes=(128, 64, 32, 16, 8), max_iter=2500) # model = SVC(gamma='scale', probability = True) # model = KNeighborsClassifier() # model = LinearDiscriminantAnalysis() # model = GaussianNB() # model = DecisionTreeClassifier() # model = LogisticRegression() model.fit(X_train, y_train) predictions = model.predict(X_test) # Test to see if the model(s) is seeing and producing reasonable values print("Max/min of predictions: ") ymax = max(predictions) ymin = min(predictions) print(str(ymax) + "/" + str(ymin)) print("Max/Min of Y_test") ymax = max(y_test) ymin = min(y_test) print(str(ymax) + "/" + str(ymin)) print("Max/Min of Y_train") ymax = max(y_train) ymin = min(y_train)