def ex_2_2(input1, target1, input2, target2):
    ## TODO
    scores = []
    scores_train = []
    classifiers = []
    for i in range(10):
        classifier = MLPClassifier(hidden_layer_sizes=(20, ),
                                   solver="adam",
                                   max_iter=1000,
                                   activation="tanh",
                                   random_state=i)
        classifier.fit(input1, target1[:, 0])
        scores.append(classifier.score(input2, target2[:, 0]))
        classifiers.append(classifier)
        scores_train.append(classifier.score(input1, target1[:, 0]))

    conf_mat = confusion_matrix(target2[:, 0],
                                classifiers[np.argmax(scores)].predict(input2))

    plot_histogram_of_acc(scores_train, scores)
    #plot_histogram_of_acc(classifiers[np.argmax(scores)], classifier.score(input2, target2[:, 0]))
    #plot_histogram_of_acc(classifier.score(input1, target1[:,0]), classifier.score(input2, target2[:,0]))
    predected_target = classifier.predict(input2)
    misclassified_images = []
    for i in range(len(target2[:, 0])):
        if target2[:, 0][i] != predected_target[i]:
            misclassified_images.append(input2[i])

    for i in range(len(misclassified_images)):
        plot_image(misclassified_images[i])
    pass
Exemplo n.º 2
0
def ex_2_2(input1, target1, input2, target2):
    list = []
    train_acc = np.zeros(10)
    test_acc = np.zeros(10)
    for i in range(10):
        nn = MLPClassifier(hidden_layer_sizes=(20,),activation='tanh', max_iter=1000, random_state=None)
        list.append(nn)
        nn.fit(input1, target1[:,0]) 
        train_acc[i] = nn.score(input1, target1[:,0])
        test_acc[i] = nn.score(input2,target2[:,0])
        i_best = np.where(test_acc == test_acc.min())[0][0]
    import pdb
    pdb.set_trace() 
    y_pred = list[i_best].predict(input2)
    C = confusion_matrix(target2[:,0], y_pred, labels=None, sample_weight=None)
    """
    Solution for exercise 2.2
    :param input1: The input from dataset1
    :param target1: The target from dataset1
    :param input2: The input from dataset2
    :param target2: The target from dataset2
    :return:
    """
    ## TODO
    return train_acc, test_acc, y_pred, C
def ex_2_2(input1, target1, input2, target2):
    """
    Solution for exercise 2.2
    :param input1: The input from dataset1
    :param target1: The target from dataset1
    :param input2: The input from dataset2
    :param target2: The target from dataset2
    :return:
    """
    ## TODO
    hidden_units = 20

    test_face = target2[:, 0]
    train_face = target1[:, 0]

    test_accuracy = np.zeros(10)
    train_accuracy = np.zeros(10)

    best_network = 0
    max_accuracy = 0
    nn = MLPClassifier(activation=ACTIVATION,
                       solver="adam",
                       hidden_layer_sizes=(hidden_units, ),
                       max_iter=1000)

    for i in range(0, 10):
        nn.random_state = i

        nn.fit(input1, train_face)
        train_accuracy[i] = nn.score(input1, train_face)
        test_accuracy[i] = nn.score(input2, test_face)

        if test_accuracy[i] > max_accuracy:
            best_network = nn
            max_accuracy = test_accuracy[i]

    plot_histogram_of_acc(train_accuracy, test_accuracy)

    # Use the best network to calculate the confusion matrix for the test set.
    y_pred = best_network.predict(input2)
    matrix = confusion_matrix(test_face, y_pred)

    print("The Confusion Matrix we obtained: \n" + str(matrix))

    # Plot a few misclassified images.
    annas_favorit_number = 177
    marcos_favorit_numer = 490
    strugers_favorit_number_aka_best_mirp = 13
    manfreds_favorit_number_is_a_emirp_a_lucky_fortunate_sexy_and_happy_prime = 79
    best_numbers_ever = [
        annas_favorit_number, strugers_favorit_number_aka_best_mirp,
        marcos_favorit_numer,
        manfreds_favorit_number_is_a_emirp_a_lucky_fortunate_sexy_and_happy_prime
    ]

    for _ in best_numbers_ever:
        misclassified = np.where(test_face != best_network.predict(input2))
        plot_random_images(input2[misclassified])
Exemplo n.º 4
0
def ex_2_2(input1, target1, input2, target2):
    """
    Solution for exercise 2.2
    :param input1: The input from dataset1
    :param target1: The target from dataset1
    :param input2: The input from dataset2
    :param target2: The target from dataset2
    :return:
    """

    #declaring variables used for MLPClassifier
    hidden_layers = 20
    solver_mode = 'adam'
    activation_mode = 'tanh'
    max_iter = 1000

    max_accuracy = 0.0

    train_accuracy = []
    test_accuracy = []
    cfn = []

    m = 0

    for m in range(10):
        cf = MLPClassifier(hidden_layer_sizes=(hidden_layers, ),
                           activation=activation_mode,
                           solver=solver_mode,
                           random_state=m,
                           max_iter=max_iter)
        cf.fit(input1, target1[:, 0])

        train_accuracy.append(cf.score(input1, target1[:, 0]))

        current_test_accuracy = cf.score(input2, target2[:, 0])

        test_accuracy.append(current_test_accuracy)

        plot_histogram_of_acc(train_accuracy[m], test_accuracy[m])

        if current_test_accuracy > max_accuracy:
            cfn = confusion_matrix(target2[:, 0], cf.predict(input2))
            max_accuracy = current_test_accuracy

    print(cfn)

    #plot_histogram_of_acc(train_accuracy, test_accuracy)
    #plot_random_images(input2)

    pass
def ex_2_2(input1, target1, input2, target2):
    """
    Solution for exercise 2.2
    :param input1: The input from dataset1
    :param target1: The target from dataset1
    :param input2: The input from dataset2
    :param target2: The target from dataset2
    :return:
    """
    n = 10

    train_acc = np.zeros(n)
    test_acc = np.zeros(n)
    pred_test = np.zeros((n, 564))
    coefs = np.zeros((n, 960, 20))

    #print(min(target1[:,0]), max(target1[:,0]))
    # we have 20 person

    for i in range(n):
        classifier = MLPClassifier(hidden_layer_sizes=(20, ),
                                   activation='tanh',
                                   solver='adam',
                                   max_iter=5000,
                                   random_state=i)
        classifier.fit(input1, target1[:, 0])
        pred_test[i] = classifier.predict(input2)
        coefs[i] = classifier.coefs_[0]
        train_acc[i] = classifier.score(input1, target1[:, 0])
        test_acc[i] = classifier.score(input2, target2[:, 0])

    error = pred_test[1] - target2[:, 0]
    for j in range(len(error)):
        if (error[j] != 0):
            print(j)
    plot_random_images(np.row_stack((input2[175, :], input2[184, :])))
    plot_random_images(np.row_stack((input2[210, :], input2[134, :])))
    plot_random_images(np.row_stack((input2[223, :], input2[177, :])))
    plot_random_images(np.row_stack((input2[179, :], input2[186, :])))

    plot_histogram_of_acc(train_acc, test_acc)

    # best network with seed i=1
    confmat = confusion_matrix(target2[:, 0], pred_test[1])
    print(confmat)

    pass
def ex_2_2(input1, target1, input2, target2):
    """
    Solution for exercise 2.2
    :param input1: The input from dataset1
    :param target1: The target from dataset1
    :param input2: The input from dataset2
    :param target2: The target from dataset2
    :return:
    """
    train = input1
    test = input2
    target_train = target1[:, 1]
    target_test = target2[:, 1]


    ## TODO
    n_hidden_neurons = 20

    accu_list_train = np.zeros((10,1))
    accu_list_test = np.zeros((10, 1))

# Find the best seed
    for seed in range(10):
        nn = MLPClassifier(activation='tanh', solver='adam', max_iter=1000, hidden_layer_sizes=(n_hidden_neurons,), random_state=seed)
        nn.fit(train, target_train)
        accu_list_train[seed] = nn.score(train, target_train)
        accu_list_test[seed] = nn.score(test, target_test)

    print(accu_list_train)
    print(accu_list_test)
# Compute NN weights with the best seed
    best_seed = np.argmax(accu_list_train)
    best_nn = nn = MLPClassifier(activation='tanh', solver='adam', max_iter=1000, hidden_layer_sizes=(n_hidden_neurons,),random_state=best_seed)
    best_nn.fit(train, target_train)

# Evaluate the confusion matrix with best NN
    predictions = nn.predict(test)
    C = confusion_matrix(target_test, predictions)
    print(C)

# Plot results
    plot_histogram_of_acc(accu_list_train, accu_list_test)
    print(accu_list_test)
# Find misclassified images
    comp_array = target_test - predictions
    comp_vector2 = np.nonzero(comp_array)
Exemplo n.º 7
0
#getting MNIST of size 70k images
dataset = fetch_mldata("MNIST original")
X = np.array(dataset.data)  #Our Features
y = np.array(dataset.target)  #Our labels

X = X.astype('float32')

#splitting Dataset into Training and Testing dataset
#First 60k instances are for Training and last 10k are for testing
X_train, X_test = X[:60000], X[60000:]
y_train, y_test = y[:60000], y[60000:]

#Normalizing Our Features in range 0 and 1
X_train = X_train / 255
X_test = X_test / 255

#creating Neural Network
# Neural Network has one hidden layer with 512 units
# Neural NetWork is of size 784-512-10

mlp = MLPClassifier(hidden_layer_sizes=(512), max_iter=500, verbose=True)

#fitting our model
mlp.fit(X_train, y_train, epoch=50)

print("Training set score: %f" % mlp.score(X_train, y_train))  #output : 0.99
print("Test set score: %f" % mlp.score(X_test, y_test))  #output :0.98

#saving our model
joblib.dump(mlp, "model.pkl")
Exemplo n.º 8
0
def classify_mlp(data_path):
    result_path = '%s/mlp_results.txt' % os.path.abspath(
        os.path.join(os.path.dirname(data_path),
                     os.path.join(os.pardir, os.pardir)))
    if os.path.exists(result_path):
        if data_path in open(result_path).read():
            return True
    print(data_path)

    fname = "{}/train_labels.csv".format(data_path)
    if not os.path.exists(fname):
        return True
    tr_labels = np.loadtxt(fname)

    fname = "{}/train_embeddings.csv".format(data_path)
    tr_embeddings = np.loadtxt(fname)

    fname = "{}/val_labels.csv".format(data_path)
    val_labels = np.loadtxt(fname)

    fname = "{}/val_embeddings.csv".format(data_path)
    val_embeddings = np.loadtxt(fname)

    fname = "{}/test_labels.csv".format(data_path)
    te_labels = np.loadtxt(fname)

    fname = "{}/test_embeddings.csv".format(data_path)
    te_embeddings = np.loadtxt(fname)

    clf = MLPClassifier(random_state=2,
                        max_iter=200000000,
                        hidden_layer_sizes=(64, ))
    clf.fit(tr_embeddings, tr_labels)

    tr_score = clf.score(tr_embeddings, tr_labels)
    val_score = clf.score(val_embeddings, val_labels)
    te_score = clf.score(te_embeddings, te_labels)

    tr_predictions = clf.predict(tr_embeddings)
    val_predictions = clf.predict(val_embeddings)
    te_predictions = clf.predict(te_embeddings)

    tr_fscore = f1_score(tr_predictions, tr_labels, average="weighted")
    val_fscore = f1_score(val_predictions, val_labels, average="weighted")
    te_fscore = f1_score(te_predictions, te_labels, average="weighted")
    print("tr_score %s" % tr_score)
    print("val_score %s" % val_score)
    print("te_score %s" % te_score)
    with open(result_path, mode='a') as f:
        f.write(
            'Data Path: %s\tTrain Accuracy:%s\tVal Accuracy:%s\tTest Accuracy:%s\tTrain FScore:%s\tVal FScore:%s\tTest FScore:%s\n'
            % (data_path, tr_score, val_score, te_score, tr_fscore, val_fscore,
               te_fscore))

    conf_mat = confusion_matrix(te_labels, te_predictions)
    labels = sorted(list(set(list(te_labels))))
    plot_confusion_matrix(conf_mat,
                          classes=labels,
                          normalize=True,
                          title='Normalized confusion matrix',
                          output=data_path,
                          path_name='mlp_confusion_matrix',
                          alg='mlp')
Exemplo n.º 9
0
svd.fit(fea_data_set)
x_new=svd.fit_transform(fea_data_set)
# pca=PCA(n_components=30)
# pca.fit(fea_data_set)
# x_new=pca.transform(fea_data_set)
xtrain,xtest,ytrain,ytest=train_test_split(x_new,label,test_size=0.2)
lg.fit(xtrain,ytrain)
nb.fit(xtrain,ytrain)
forest.fit(xtrain,ytrain)
SVM.fit(xtrain,ytrain)
mlp.fit(xtrain,ytrain)
print("------------")
print(lg.score(xtest,ytest))
print(np.mean(lg.predict(xtest)-ytest)**2)
print(lg.score(xtrain,ytrain))
print(np.mean(lg.predict(xtrain)-ytrain)**2)
print("------------")
print(nb.score(xtest,ytest))
print(np.mean(nb.predict(xtest)-ytest)**2)
print(forest.score(xtest,ytest))
print(np.mean((forest.predict(xtest)-ytest)**2))
print(SVM.score(xtest,ytest))
print(np.mean((SVM.predict(xtest)-ytest)**2))
print(mlp.score(xtest,ytest))
print(np.mean((mlp.predict(xtest)-ytest)**2))
#训练了4个模型,分别是测试集为80%,70%,50%,30%的效果
joblib.dump(lg,"lg3.m")
joblib.dump(nb,"nb3.m")
joblib.dump(forest,"rf3.m")
joblib.dump(SVM,"svm3.m")
joblib.dump(mlp,"mlp3.m")
Exemplo n.º 10
0
def classify(data_path, path=None, counter=None, alg='svm'):
    out = os.path.join(data_path, '%s_%s_%s' % (alg, path, 'confusion.png'))
    if os.path.exists(out):
        return True
    fname = "{}/labels.csv".format(data_path)
    paths = pd.read_csv(fname, header=None).as_matrix()[:, 1]
    paths = map(os.path.basename, paths)  # Get the filename.
    # Remove the extension.
    paths = map(lambda x: x.split(".")[0], paths)
    paths = np.array(map(lambda path: os.path.splitext(path)[0], paths))

    fname = "{}/reps.csv".format(data_path)
    rawEmbeddings = pd.read_csv(fname, header=None).as_matrix()
    # print(rawEmbeddings.shape, paths.shape)
    folds = cross_validation.KFold(n=len(rawEmbeddings),
                                   random_state=1,
                                   n_folds=10,
                                   shuffle=True)
    scores = []
    fscores_weighted, fscores_macro, fscores_micro = [], [], []
    for idx, (train, test) in enumerate(folds):
        print idx, alg
        if alg == 'knn':
            clf = neighbors.KNeighborsClassifier(1)
        elif alg == 'svm':
            clf = svm.SVC(kernel='linear', C=1, max_iter=200000000)
            # clf = svm.LinearSVC()
            # clf = svm.SVC(kernel="poly", degree=5, C=1, verbose=10)
        elif alg == 'nn':
            # clf = MLPClassifier(random_state=2, max_iter=200000000)
            clf = MLPClassifier(random_state=2,
                                max_iter=200000000,
                                hidden_layer_sizes=(96, 64, 32))
        elif alg == 'nnd':
            # clf = MLPClassifier(random_state=2, max_iter=200000000)
            clf = MLPClassifier(random_state=2, max_iter=200000000)
        elif alg == 'poly':
            clf = svm.SVC(kernel="poly", max_iter=200000000)
        elif alg == 'rf':
            clf = RandomForestClassifier()
        clf.fit(rawEmbeddings[train], paths[train])
        gc.collect()
        score = clf.score(rawEmbeddings[test], paths[test])
        # print score, alg
        scores.append(score)
        prediction = clf.predict(rawEmbeddings[test])
        fscore_weighted = f1_score(paths[test], prediction, average="weighted")
        fscores_weighted.append(fscore_weighted)

        fscore_macro = f1_score(paths[test], prediction, average="macro")
        fscores_macro.append(fscore_macro)

        fscore_micro = f1_score(paths[test], prediction, average="micro")
        fscores_micro.append(fscore_micro)
    accuracy_dir = os.path.abspath(
        os.path.join(data_path, 'accuracies_%s.txt' % alg))

    with open(accuracy_dir, "wb") as file:
        for i in scores:
            file.writelines("%s,%s\n" % (str(i), str(counter)))
    # print "KNN Avg. score %s" % (reduce(operator.add, scores) / len(folds))
    # print "MLP Avg. score %s" % (reduce(operator.add, scores3) / len(folds))
    print "Avg. score %s" % (reduce(operator.add, scores) /
                             len(folds)), data_path
    result_path = "{}/{}_{}.log".format(
        os.path.abspath(
            os.path.join(os.path.join(data_path, os.pardir), os.pardir)), path,
        alg)
    with open(result_path, "a") as file:
        file.write("%s,\t%s\t%s\n" % (str(
            (reduce(operator.add, scores) / len(folds))), str(counter), alg))
    fscores_weighted_result_path = "{}/{}_{}_fscores_weighted.log".format(
        os.path.abspath(
            os.path.join(os.path.join(data_path, os.pardir), os.pardir)), path,
        alg)
    with open(fscores_weighted_result_path, "a") as file:
        file.write("%s,\t%s\t%s\n" % (str(
            (reduce(operator.add, fscores_weighted) / len(folds))),
                                      str(counter), alg))

    fscores_macro_result_path = "{}/{}_{}_fscores_macro.log".format(
        os.path.abspath(
            os.path.join(os.path.join(data_path, os.pardir), os.pardir)), path,
        alg)
    with open(fscores_macro_result_path, "a") as file:
        file.write("%s,\t%s\t%s\n" % (str(
            (reduce(operator.add, fscores_macro) / len(folds))), str(counter),
                                      alg))

    fscores_micro_result_path = "{}/{}_{}_fscores_micro.log".format(
        os.path.abspath(
            os.path.join(os.path.join(data_path, os.pardir), os.pardir)), path,
        alg)
    with open(fscores_micro_result_path, "a") as file:
        file.write("%s,\t%s\t%s\n" % (str(
            (reduce(operator.add, fscores_micro) / len(folds))), str(counter),
                                      alg))
Exemplo n.º 11
0
# from warnings import warn
import numpy as np
from data_utils import *
from sklearn.neural_network.multilayer_perceptron import MLPClassifier

data = gather_and_clean_data()

X = data[:, 0:-1]
y = data[:, -1]

MClass = MLPClassifier()
MClass.fit(X, y)
pred = MClass.predict(X)
score = MClass.score(X, y)
print(f"Pred: {pred}")
print(f"Score: {score}")