def ex_2_2(input1, target1, input2, target2): ## TODO scores = [] scores_train = [] classifiers = [] for i in range(10): classifier = MLPClassifier(hidden_layer_sizes=(20, ), solver="adam", max_iter=1000, activation="tanh", random_state=i) classifier.fit(input1, target1[:, 0]) scores.append(classifier.score(input2, target2[:, 0])) classifiers.append(classifier) scores_train.append(classifier.score(input1, target1[:, 0])) conf_mat = confusion_matrix(target2[:, 0], classifiers[np.argmax(scores)].predict(input2)) plot_histogram_of_acc(scores_train, scores) #plot_histogram_of_acc(classifiers[np.argmax(scores)], classifier.score(input2, target2[:, 0])) #plot_histogram_of_acc(classifier.score(input1, target1[:,0]), classifier.score(input2, target2[:,0])) predected_target = classifier.predict(input2) misclassified_images = [] for i in range(len(target2[:, 0])): if target2[:, 0][i] != predected_target[i]: misclassified_images.append(input2[i]) for i in range(len(misclassified_images)): plot_image(misclassified_images[i]) pass
def ex_2_2(input1, target1, input2, target2): list = [] train_acc = np.zeros(10) test_acc = np.zeros(10) for i in range(10): nn = MLPClassifier(hidden_layer_sizes=(20,),activation='tanh', max_iter=1000, random_state=None) list.append(nn) nn.fit(input1, target1[:,0]) train_acc[i] = nn.score(input1, target1[:,0]) test_acc[i] = nn.score(input2,target2[:,0]) i_best = np.where(test_acc == test_acc.min())[0][0] import pdb pdb.set_trace() y_pred = list[i_best].predict(input2) C = confusion_matrix(target2[:,0], y_pred, labels=None, sample_weight=None) """ Solution for exercise 2.2 :param input1: The input from dataset1 :param target1: The target from dataset1 :param input2: The input from dataset2 :param target2: The target from dataset2 :return: """ ## TODO return train_acc, test_acc, y_pred, C
def ex_2_2(input1, target1, input2, target2): """ Solution for exercise 2.2 :param input1: The input from dataset1 :param target1: The target from dataset1 :param input2: The input from dataset2 :param target2: The target from dataset2 :return: """ ## TODO hidden_units = 20 test_face = target2[:, 0] train_face = target1[:, 0] test_accuracy = np.zeros(10) train_accuracy = np.zeros(10) best_network = 0 max_accuracy = 0 nn = MLPClassifier(activation=ACTIVATION, solver="adam", hidden_layer_sizes=(hidden_units, ), max_iter=1000) for i in range(0, 10): nn.random_state = i nn.fit(input1, train_face) train_accuracy[i] = nn.score(input1, train_face) test_accuracy[i] = nn.score(input2, test_face) if test_accuracy[i] > max_accuracy: best_network = nn max_accuracy = test_accuracy[i] plot_histogram_of_acc(train_accuracy, test_accuracy) # Use the best network to calculate the confusion matrix for the test set. y_pred = best_network.predict(input2) matrix = confusion_matrix(test_face, y_pred) print("The Confusion Matrix we obtained: \n" + str(matrix)) # Plot a few misclassified images. annas_favorit_number = 177 marcos_favorit_numer = 490 strugers_favorit_number_aka_best_mirp = 13 manfreds_favorit_number_is_a_emirp_a_lucky_fortunate_sexy_and_happy_prime = 79 best_numbers_ever = [ annas_favorit_number, strugers_favorit_number_aka_best_mirp, marcos_favorit_numer, manfreds_favorit_number_is_a_emirp_a_lucky_fortunate_sexy_and_happy_prime ] for _ in best_numbers_ever: misclassified = np.where(test_face != best_network.predict(input2)) plot_random_images(input2[misclassified])
def ex_2_2(input1, target1, input2, target2): """ Solution for exercise 2.2 :param input1: The input from dataset1 :param target1: The target from dataset1 :param input2: The input from dataset2 :param target2: The target from dataset2 :return: """ #declaring variables used for MLPClassifier hidden_layers = 20 solver_mode = 'adam' activation_mode = 'tanh' max_iter = 1000 max_accuracy = 0.0 train_accuracy = [] test_accuracy = [] cfn = [] m = 0 for m in range(10): cf = MLPClassifier(hidden_layer_sizes=(hidden_layers, ), activation=activation_mode, solver=solver_mode, random_state=m, max_iter=max_iter) cf.fit(input1, target1[:, 0]) train_accuracy.append(cf.score(input1, target1[:, 0])) current_test_accuracy = cf.score(input2, target2[:, 0]) test_accuracy.append(current_test_accuracy) plot_histogram_of_acc(train_accuracy[m], test_accuracy[m]) if current_test_accuracy > max_accuracy: cfn = confusion_matrix(target2[:, 0], cf.predict(input2)) max_accuracy = current_test_accuracy print(cfn) #plot_histogram_of_acc(train_accuracy, test_accuracy) #plot_random_images(input2) pass
def ex_2_2(input1, target1, input2, target2): """ Solution for exercise 2.2 :param input1: The input from dataset1 :param target1: The target from dataset1 :param input2: The input from dataset2 :param target2: The target from dataset2 :return: """ n = 10 train_acc = np.zeros(n) test_acc = np.zeros(n) pred_test = np.zeros((n, 564)) coefs = np.zeros((n, 960, 20)) #print(min(target1[:,0]), max(target1[:,0])) # we have 20 person for i in range(n): classifier = MLPClassifier(hidden_layer_sizes=(20, ), activation='tanh', solver='adam', max_iter=5000, random_state=i) classifier.fit(input1, target1[:, 0]) pred_test[i] = classifier.predict(input2) coefs[i] = classifier.coefs_[0] train_acc[i] = classifier.score(input1, target1[:, 0]) test_acc[i] = classifier.score(input2, target2[:, 0]) error = pred_test[1] - target2[:, 0] for j in range(len(error)): if (error[j] != 0): print(j) plot_random_images(np.row_stack((input2[175, :], input2[184, :]))) plot_random_images(np.row_stack((input2[210, :], input2[134, :]))) plot_random_images(np.row_stack((input2[223, :], input2[177, :]))) plot_random_images(np.row_stack((input2[179, :], input2[186, :]))) plot_histogram_of_acc(train_acc, test_acc) # best network with seed i=1 confmat = confusion_matrix(target2[:, 0], pred_test[1]) print(confmat) pass
def ex_2_2(input1, target1, input2, target2): """ Solution for exercise 2.2 :param input1: The input from dataset1 :param target1: The target from dataset1 :param input2: The input from dataset2 :param target2: The target from dataset2 :return: """ train = input1 test = input2 target_train = target1[:, 1] target_test = target2[:, 1] ## TODO n_hidden_neurons = 20 accu_list_train = np.zeros((10,1)) accu_list_test = np.zeros((10, 1)) # Find the best seed for seed in range(10): nn = MLPClassifier(activation='tanh', solver='adam', max_iter=1000, hidden_layer_sizes=(n_hidden_neurons,), random_state=seed) nn.fit(train, target_train) accu_list_train[seed] = nn.score(train, target_train) accu_list_test[seed] = nn.score(test, target_test) print(accu_list_train) print(accu_list_test) # Compute NN weights with the best seed best_seed = np.argmax(accu_list_train) best_nn = nn = MLPClassifier(activation='tanh', solver='adam', max_iter=1000, hidden_layer_sizes=(n_hidden_neurons,),random_state=best_seed) best_nn.fit(train, target_train) # Evaluate the confusion matrix with best NN predictions = nn.predict(test) C = confusion_matrix(target_test, predictions) print(C) # Plot results plot_histogram_of_acc(accu_list_train, accu_list_test) print(accu_list_test) # Find misclassified images comp_array = target_test - predictions comp_vector2 = np.nonzero(comp_array)
#getting MNIST of size 70k images dataset = fetch_mldata("MNIST original") X = np.array(dataset.data) #Our Features y = np.array(dataset.target) #Our labels X = X.astype('float32') #splitting Dataset into Training and Testing dataset #First 60k instances are for Training and last 10k are for testing X_train, X_test = X[:60000], X[60000:] y_train, y_test = y[:60000], y[60000:] #Normalizing Our Features in range 0 and 1 X_train = X_train / 255 X_test = X_test / 255 #creating Neural Network # Neural Network has one hidden layer with 512 units # Neural NetWork is of size 784-512-10 mlp = MLPClassifier(hidden_layer_sizes=(512), max_iter=500, verbose=True) #fitting our model mlp.fit(X_train, y_train, epoch=50) print("Training set score: %f" % mlp.score(X_train, y_train)) #output : 0.99 print("Test set score: %f" % mlp.score(X_test, y_test)) #output :0.98 #saving our model joblib.dump(mlp, "model.pkl")
def classify_mlp(data_path): result_path = '%s/mlp_results.txt' % os.path.abspath( os.path.join(os.path.dirname(data_path), os.path.join(os.pardir, os.pardir))) if os.path.exists(result_path): if data_path in open(result_path).read(): return True print(data_path) fname = "{}/train_labels.csv".format(data_path) if not os.path.exists(fname): return True tr_labels = np.loadtxt(fname) fname = "{}/train_embeddings.csv".format(data_path) tr_embeddings = np.loadtxt(fname) fname = "{}/val_labels.csv".format(data_path) val_labels = np.loadtxt(fname) fname = "{}/val_embeddings.csv".format(data_path) val_embeddings = np.loadtxt(fname) fname = "{}/test_labels.csv".format(data_path) te_labels = np.loadtxt(fname) fname = "{}/test_embeddings.csv".format(data_path) te_embeddings = np.loadtxt(fname) clf = MLPClassifier(random_state=2, max_iter=200000000, hidden_layer_sizes=(64, )) clf.fit(tr_embeddings, tr_labels) tr_score = clf.score(tr_embeddings, tr_labels) val_score = clf.score(val_embeddings, val_labels) te_score = clf.score(te_embeddings, te_labels) tr_predictions = clf.predict(tr_embeddings) val_predictions = clf.predict(val_embeddings) te_predictions = clf.predict(te_embeddings) tr_fscore = f1_score(tr_predictions, tr_labels, average="weighted") val_fscore = f1_score(val_predictions, val_labels, average="weighted") te_fscore = f1_score(te_predictions, te_labels, average="weighted") print("tr_score %s" % tr_score) print("val_score %s" % val_score) print("te_score %s" % te_score) with open(result_path, mode='a') as f: f.write( 'Data Path: %s\tTrain Accuracy:%s\tVal Accuracy:%s\tTest Accuracy:%s\tTrain FScore:%s\tVal FScore:%s\tTest FScore:%s\n' % (data_path, tr_score, val_score, te_score, tr_fscore, val_fscore, te_fscore)) conf_mat = confusion_matrix(te_labels, te_predictions) labels = sorted(list(set(list(te_labels)))) plot_confusion_matrix(conf_mat, classes=labels, normalize=True, title='Normalized confusion matrix', output=data_path, path_name='mlp_confusion_matrix', alg='mlp')
svd.fit(fea_data_set) x_new=svd.fit_transform(fea_data_set) # pca=PCA(n_components=30) # pca.fit(fea_data_set) # x_new=pca.transform(fea_data_set) xtrain,xtest,ytrain,ytest=train_test_split(x_new,label,test_size=0.2) lg.fit(xtrain,ytrain) nb.fit(xtrain,ytrain) forest.fit(xtrain,ytrain) SVM.fit(xtrain,ytrain) mlp.fit(xtrain,ytrain) print("------------") print(lg.score(xtest,ytest)) print(np.mean(lg.predict(xtest)-ytest)**2) print(lg.score(xtrain,ytrain)) print(np.mean(lg.predict(xtrain)-ytrain)**2) print("------------") print(nb.score(xtest,ytest)) print(np.mean(nb.predict(xtest)-ytest)**2) print(forest.score(xtest,ytest)) print(np.mean((forest.predict(xtest)-ytest)**2)) print(SVM.score(xtest,ytest)) print(np.mean((SVM.predict(xtest)-ytest)**2)) print(mlp.score(xtest,ytest)) print(np.mean((mlp.predict(xtest)-ytest)**2)) #训练了4个模型,分别是测试集为80%,70%,50%,30%的效果 joblib.dump(lg,"lg3.m") joblib.dump(nb,"nb3.m") joblib.dump(forest,"rf3.m") joblib.dump(SVM,"svm3.m") joblib.dump(mlp,"mlp3.m")
def classify(data_path, path=None, counter=None, alg='svm'): out = os.path.join(data_path, '%s_%s_%s' % (alg, path, 'confusion.png')) if os.path.exists(out): return True fname = "{}/labels.csv".format(data_path) paths = pd.read_csv(fname, header=None).as_matrix()[:, 1] paths = map(os.path.basename, paths) # Get the filename. # Remove the extension. paths = map(lambda x: x.split(".")[0], paths) paths = np.array(map(lambda path: os.path.splitext(path)[0], paths)) fname = "{}/reps.csv".format(data_path) rawEmbeddings = pd.read_csv(fname, header=None).as_matrix() # print(rawEmbeddings.shape, paths.shape) folds = cross_validation.KFold(n=len(rawEmbeddings), random_state=1, n_folds=10, shuffle=True) scores = [] fscores_weighted, fscores_macro, fscores_micro = [], [], [] for idx, (train, test) in enumerate(folds): print idx, alg if alg == 'knn': clf = neighbors.KNeighborsClassifier(1) elif alg == 'svm': clf = svm.SVC(kernel='linear', C=1, max_iter=200000000) # clf = svm.LinearSVC() # clf = svm.SVC(kernel="poly", degree=5, C=1, verbose=10) elif alg == 'nn': # clf = MLPClassifier(random_state=2, max_iter=200000000) clf = MLPClassifier(random_state=2, max_iter=200000000, hidden_layer_sizes=(96, 64, 32)) elif alg == 'nnd': # clf = MLPClassifier(random_state=2, max_iter=200000000) clf = MLPClassifier(random_state=2, max_iter=200000000) elif alg == 'poly': clf = svm.SVC(kernel="poly", max_iter=200000000) elif alg == 'rf': clf = RandomForestClassifier() clf.fit(rawEmbeddings[train], paths[train]) gc.collect() score = clf.score(rawEmbeddings[test], paths[test]) # print score, alg scores.append(score) prediction = clf.predict(rawEmbeddings[test]) fscore_weighted = f1_score(paths[test], prediction, average="weighted") fscores_weighted.append(fscore_weighted) fscore_macro = f1_score(paths[test], prediction, average="macro") fscores_macro.append(fscore_macro) fscore_micro = f1_score(paths[test], prediction, average="micro") fscores_micro.append(fscore_micro) accuracy_dir = os.path.abspath( os.path.join(data_path, 'accuracies_%s.txt' % alg)) with open(accuracy_dir, "wb") as file: for i in scores: file.writelines("%s,%s\n" % (str(i), str(counter))) # print "KNN Avg. score %s" % (reduce(operator.add, scores) / len(folds)) # print "MLP Avg. score %s" % (reduce(operator.add, scores3) / len(folds)) print "Avg. score %s" % (reduce(operator.add, scores) / len(folds)), data_path result_path = "{}/{}_{}.log".format( os.path.abspath( os.path.join(os.path.join(data_path, os.pardir), os.pardir)), path, alg) with open(result_path, "a") as file: file.write("%s,\t%s\t%s\n" % (str( (reduce(operator.add, scores) / len(folds))), str(counter), alg)) fscores_weighted_result_path = "{}/{}_{}_fscores_weighted.log".format( os.path.abspath( os.path.join(os.path.join(data_path, os.pardir), os.pardir)), path, alg) with open(fscores_weighted_result_path, "a") as file: file.write("%s,\t%s\t%s\n" % (str( (reduce(operator.add, fscores_weighted) / len(folds))), str(counter), alg)) fscores_macro_result_path = "{}/{}_{}_fscores_macro.log".format( os.path.abspath( os.path.join(os.path.join(data_path, os.pardir), os.pardir)), path, alg) with open(fscores_macro_result_path, "a") as file: file.write("%s,\t%s\t%s\n" % (str( (reduce(operator.add, fscores_macro) / len(folds))), str(counter), alg)) fscores_micro_result_path = "{}/{}_{}_fscores_micro.log".format( os.path.abspath( os.path.join(os.path.join(data_path, os.pardir), os.pardir)), path, alg) with open(fscores_micro_result_path, "a") as file: file.write("%s,\t%s\t%s\n" % (str( (reduce(operator.add, fscores_micro) / len(folds))), str(counter), alg))
# from warnings import warn import numpy as np from data_utils import * from sklearn.neural_network.multilayer_perceptron import MLPClassifier data = gather_and_clean_data() X = data[:, 0:-1] y = data[:, -1] MClass = MLPClassifier() MClass.fit(X, y) pred = MClass.predict(X) score = MClass.score(X, y) print(f"Pred: {pred}") print(f"Score: {score}")