示例#1
0
def runner():
    """actual method for running the model """
    ###
    ### pre-trained model specification, using VGG16 "block5_conv3", translates to layer number 17
    ###
    gen_dir_train, gen_dir_valid = pre_process.load_data(path_data_train='./images/splitted/train', path_data_valid='./images/splitted/valid', size_mini_batch=11)
    model = build_model(no_class=NO_CLASS, no_last_layer_backbone=17, rate_learning=1.0, rate_decay_weight=1e-8, flg_debug=True)
    try:
        plot_model(model, to_file='model_plot.png', show_layer_names=True, show_shapes=True)
        plot_model(model, to_file='model_plot.gv', show_layer_names=True, show_shapes=True)
        plot_model(model, to_file='model_plot.svg', show_layer_names=True, show_shapes=True)
    except OSError as identifier:
        print(identifier)
    train_model(model=model, gen_dir_train=gen_dir_train, gen_dir_valid=gen_dir_valid, max_epoch=1, batch_size=9)
    ### finetune only fc layer
    print('Finetuning FC Layer')
    hist = train_model(model=model, gen_dir_train=gen_dir_train, gen_dir_valid=gen_dir_valid, max_epoch=99)
    print('Finetuning all layers')
    ### finetune all layers
    for layer in model.layers:
        layer.trainable = True
    # now that all layers are trainable, change the LR
    opt_sgd = keras.optimizers.sgd(lr=1e-3, decay=1e-9, momentum=0.9, nesterov=False)
    model.compile(loss="categorical_crossentropy", optimizer=opt_sgd, metrics=["categorical_accuracy"])
    print('Begin final approach')
    hist = train_model(model=model, gen_dir_train=gen_dir_train, gen_dir_valid=gen_dir_valid, max_epoch=1)
    print('Approaching final training')
    hist = train_model(model=model, gen_dir_train=gen_dir_train, gen_dir_valid=gen_dir_valid, max_epoch=33)
    print('Done!, save histogram')
    save_histogram(hist, './model/BCNN_Keras/histograms/')
def run():
    """this will run the second step """
    gen_dir_train, gen_dir_valid = pre_process.load_data(
        path_data_train='./images/splitted/train',
        path_data_valid='./images/splitted/valid',
        size_mini_batch=10)
    model = load_existing_model(SAVE_PATH + 'model.h5')
    opt_sgd = keras.optimizers.sgd(lr=1e-3,
                                   decay=1e-9,
                                   momentum=0.9,
                                   nesterov=False)
    model.compile(loss="categorical_crossentropy",
                  optimizer=opt_sgd,
                  metrics=["categorical_accuracy"])
    print('Begin final approach')
    hist = train_model(model=model,
                       gen_dir_train=gen_dir_train,
                       gen_dir_valid=gen_dir_valid,
                       max_epoch=1)
    print('Approaching final training')
    hist = train_model(model=model,
                       gen_dir_train=gen_dir_train,
                       gen_dir_valid=gen_dir_valid,
                       max_epoch=33)
    print('Done!, save histogram')
    save_model(model)
    save_histogram(hist, './model/save/histograms/')
示例#3
0
def train_and_test():
  
    word_vocab, nt_vocab, ter_vocab, act_vocab, word_tokens, tree_tokens, tran_actions\
                                                     = loader.load_data(options.data_dir, options.order)
 
    parser = model.LSTMParser(word_vocab, 
                              nt_vocab, 
                              ter_vocab,
                              act_vocab,
                              options.word_dim, 
                              options.nt_dim, 
                              options.ter_dim, 
                              options.lstm_dim, 
                              options.nlayers, 
                              options.order,
                              options.embedding_file,
                              options.attention,
                              options.train_selection,
                              options.test_selection,
                              options.beam_search,
                              options.beam_size)

    if os.path.exists(options.model_dir):
      parser.load_model(options.model_dir)

    trainer = optimizers[options.optimizer](parser.model)

    i = 0
    for epoch in range(options.epochs): 
      sents = 0
      total_loss = 0.0
      train_size = len(word_tokens['train'])
      for x, y, z in loader.iter_data(word_tokens, tran_actions, tree_tokens, 'train'):
        loss = parser.train(x, y, z, options)
        sents += 1
        if loss is not None:
          total_loss += loss.scalar_value() 
          loss.backward()
          trainer.update()
        e = float(i) / train_size
        if i % options.print_every == 0:
          print('epoch {}: loss per sentence: {}'.format(e, total_loss / sents))
          sents = 0
          total_loss = 0.0

        i += 1

      print('testing...')
      save_as = '%s/epoch%03d.model' % (options.result_dir, epoch)
      parser.save_model(save_as)
      rf = open(options.result_dir + str(i), 'w')
      test_sents = 0
      test_loss = 0.0
      for x, y, z in loader.iter_data(word_tokens, tran_actions, tree_tokens, 'test'):
          output_actions, output_tokens = parser.parse(x, y, z)
          output = post_process.recover(output_actions, output_tokens, options.order)
          output = post_process.format_output(output)
          rf.write(output + '\n')
      rf.close()
示例#4
0
def run():
    """first steps """
    gen_dir_train, gen_dir_valid = pre_process.load_data(
        path_data_train='./images/splitted/train',
        path_data_valid='./images/splitted/valid',
        size_mini_batch=10)
    model = build_model(no_class=NO_CLASS,
                        no_last_layer_backbone=17,
                        rate_learning=1.0,
                        rate_decay_weight=1e-8,
                        flg_debug=True)
    train_model(model=model,
                gen_dir_train=gen_dir_train,
                gen_dir_valid=gen_dir_valid,
                max_epoch=1,
                batch_size=9)
    ### finetune only fc layer
    print('Finetuning FC Layer')
    hist = train_model(model=model,
                       gen_dir_train=gen_dir_train,
                       gen_dir_valid=gen_dir_valid,
                       max_epoch=99)
    print('Finetuning all layers')
    ### finetune all layers
    for layer in model.layers:
        layer.trainable = True
    # now that all layers are trainable, change the LR
    opt_sgd = keras.optimizers.sgd(lr=1e-3,
                                   decay=1e-9,
                                   momentum=0.9,
                                   nesterov=False)
    model.compile(loss="categorical_crossentropy",
                  optimizer=opt_sgd,
                  metrics=["categorical_accuracy"])
    save_model(model)
    save_histogram(hist, './model/save/histograms/')
示例#5
0
def train():
    file_name = FLAGS.file_name
    train_set = load_data("../pre_prosess/OHSUMED/" + file_name +
                          "/trainingset.txt")
    test_set = load_data("../pre_prosess/OHSUMED/" + file_name +
                         "/testset.txt")
    valid_set = load_data("../pre_prosess/OHSUMED/" + file_name +
                          "/validationset.txt")
    each_query_length = get_each_query_length()

    log = open(precision, "w")
    log.write(str(FLAGS.__flags) + '\n')

    RL_L2R = QRL_L2R(feature_dim=FLAGS.feature_dim,
                     learn_rate=FLAGS.learning_rate,
                     reward_decay=FLAGS.reward_decay,
                     FLAGS=FLAGS)

    max_ndcg_1 = 0.020
    max_ndcg_10 = 0.02
    max_reward = 1
    # loss_max = 0.3
    for i in range(FLAGS.num_epochs):
        print("\nepoch " + str(i) + "\n")
        j = 1
        # reward_sum = 0
        # training process
        for data in get_batch(train_set, FLAGS.feature_dim):
            doc_feature = data[0]
            doc_label = data[1]
            doc_len = data[2]
            qid = data[3]
            # print ("doc_label : {}".format(doc_label))
            for step in range(doc_len):
                immediate_rewards = calcu_immediate_reward(step, doc_label)
                selected_doc_index = RL_L2R.choose_doc(step, doc_feature,
                                                       doc_label,
                                                       immediate_rewards, True)
                current_doc = doc_feature[selected_doc_index]
                current_label = doc_label[selected_doc_index]
                doc_feature, doc_label = get_candidata_feature_label(
                    selected_doc_index, doc_feature, doc_label)
                # print (current_label)
                RL_L2R.store_transition(current_doc, current_label)

            # print ("RL_L2R.ep_label : {}".format(RL_L2R.ep_label))
            reward = calcu_reward(RL_L2R.ep_label)
            # print (reward)
            # idel_reward, idel_features = calcu_idel_reward(RL_L2R.ep_docs, RL_L2R.ep_label)
            # ep_rs_norm, loss = RL_L2R.learn(reward)
            # ep_rs_norm, loss = RL_L2R.learn(reward, idel_reward, idel_features)
            # loss = RL_L2R.learn(reward)
            RL_L2R.reset_network()
            # reward_sum += reward
            print("training, qid :{} with_length : {}, reward : {}".format(
                qid, doc_len, reward))
            # break

        # train evaluation
        train_predict_label_collection, train_reward = predict(
            RL_L2R, train_set)
        train_MAP, train_NDCG_at_1, train_NDCG_at_3, train_NDCG_at_5, train_NDCG_at_10, train_NDCG_at_20, train_MRR, train_P = evaluation_ranklists(
            train_predict_label_collection)
        train_result_line = "## epoch {}, train MAP : {}, train_NDCG_at_1 : {}, train_NDCG_at_3 : {}, train_NDCG_at_5 : {}, train_NDCG_at_10 : {}, train_NDCG_at_20 : {}, train_MRR@20 : {}, train_P@20 : {}, \ntrain_reward : {}".format(
            i, train_MAP, train_NDCG_at_1, train_NDCG_at_3, train_NDCG_at_5,
            train_NDCG_at_10, train_NDCG_at_20, train_MRR, train_P,
            train_reward[0])

        print(train_result_line)
        log.write(train_result_line + "\n")

        # valid evaluation
        valid_predict_label_collection, valid_reward = predict(
            RL_L2R, valid_set)
        valid_MAP, valid_NDCG_at_1, valid_NDCG_at_3, valid_NDCG_at_5, valid_NDCG_at_10, valid_NDCG_at_20, valid_MRR, valid_P = evaluation_ranklists(
            valid_predict_label_collection)
        valid_result_line = "## epoch {}, valid_MAP : {}, valid_NDCG_at_1 : {}, valid_NDCG_at_3 : {}, valid_NDCG_at_5 : {}, valid_NDCG_at_10 : {}, valid_NDCG_at_20 : {}, valid_MRR@20 : {}, valid_P@20 : {}, \nvalid_reward : {}".format(
            i, valid_MAP, valid_NDCG_at_1, valid_NDCG_at_3, valid_NDCG_at_5,
            valid_NDCG_at_10, valid_NDCG_at_20, valid_MRR, valid_P,
            valid_reward[0])
        print(valid_result_line)
        log.write(valid_result_line + "\n")

        # save param
        if valid_reward > max_reward:
            max_reward = valid_reward[0]
            write_str = str(max_reward) + "_" + str(
                valid_NDCG_at_1) + "_" + str(valid_NDCG_at_10)
            RL_L2R.save_param(write_str, timeDay)

        # if valid_NDCG_at_1 > max_ndcg_1 and valid_NDCG_at_10 > max_ndcg_10:
        # 	max_ndcg_1 = valid_NDCG_at_1
        # 	max_ndcg_10 = valid_NDCG_at_10
        # 	write_str = str(max_ndcg_1)+"_"+str(max_ndcg_10)
        # 	RL_L2R.save_param(write_str, timeDay)

        # test evaluation
        test_predict_label_collection, test_reward = predict(RL_L2R, test_set)

        test_MAP, test_NDCG_at_1, test_NDCG_at_3, test_NDCG_at_5, test_NDCG_at_10, test_NDCG_at_20, test_MRR, test_P = evaluation_ranklists(
            test_predict_label_collection)
        test_result_line = "## test_MAP : {}, test_NDCG_at_1 : {}, test_NDCG_at_3 : {}, test_NDCG_at_5 : {}, test_NDCG_at_10 : {}, test_NDCG_at_20 : {}, test_MRR@20 : {}, test_P@20 : {}, \ntest_reward : {}".format(
            test_MAP, test_NDCG_at_1, test_NDCG_at_3, test_NDCG_at_5,
            test_NDCG_at_10, test_NDCG_at_20, test_MRR, test_P, test_reward[0])
        print(test_result_line)
        log.write(test_result_line + "\n\n")

    # test process

    test_predict_label_collection, test_reward = predict(RL_L2R, test_set)

    test_MAP, test_NDCG_at_1, test_NDCG_at_3, test_NDCG_at_5, test_NDCG_at_10, test_NDCG_at_20, test_MRR, test_P = evaluation_ranklists(
        test_predict_label_collection)
    test_result_line = "## test_MAP : {}, test_NDCG_at_1 : {}, test_NDCG_at_3 : {}, test_NDCG_at_5 : {}, test_NDCG_at_10 : {}, test_NDCG_at_20 : {}, test_MRR@20 : {}, test_P@20 : {}, \ntest_reward : {}".format(
        test_MAP, test_NDCG_at_1, test_NDCG_at_3, test_NDCG_at_5,
        test_NDCG_at_10, test_NDCG_at_20, test_MRR, test_P, test_reward[0])
    print(test_result_line)
    log.write(test_result_line + "\n")

    log.write("\n")
    log.flush()
    log.close()
def analyse_KNN_euclidean(k=10):
    """
    Analyse and collect all the different results
    with respect to different kNNs tests

    Parameters
    ----------
    k: int
        How many neighbeours should we consider

    Returns
    -------
    results: list of lists
        Measured results which are going to be later analysed
    true_labels: list
        True test labels
    """

    all_data = load_data()
    training_data = all_data[0]

    training_labels = training_data[1]
    training_features = training_data[0]
    training_camIds = training_data[2]

    query_data = all_data[1]
    gallery_data = all_data[2]

    query_labels = query_data[1]
    gallery_labels = gallery_data[1]

    query_features = query_data[0]
    gallery_features = gallery_data[0]

    query_camIds = query_data[2]
    gallery_camIds = gallery_data[2]

    errors = [0]*k
    labels= [None]*k
    tops = [0]*k

    for i in tqdm(range(len(query_features))):
        query = query_features[i,:]
        query_label = query_labels[i]
        query_camId = query_camIds[i]

        selected_gallery_features, selected_gallery_labels = select_features(gallery_camIds, query_camId, gallery_labels, query_label, gallery_features)

        # Initialise the classifier
        clf = neighbors.KNeighborsClassifier(k,algorithm="brute",p=2, weights="uniform",n_jobs= -1)
        clf.fit(selected_gallery_features, selected_gallery_labels)

        # Predict the neighbors but do not return the distances

        predicted_neighbors = clf.kneighbors(query.reshape(1, -1), return_distance=False)

        # Implement only majority voting without weighting on distances
        predicted_labels = [selected_gallery_labels[l] for l in predicted_neighbors][0]

        # Count the majority votes and add up the scores for respective k
        for i in range(len(predicted_labels)):
            rank = predicted_labels[:i+1]
            b = Counter(rank)
            label = b.most_common(1)[0][0]

            if labels[i] is None:
                labels[i] = [label]
            else:
                labels[i].append(label)

            if query_label not in rank:
                tops[i]+=1

            if label!=query_label:
                errors[i]+=1

    for i in range(len(errors)):
        errors[i]/=len(query_features)
        tops[i]/=len(query_features)

    return labels,errors,tops, query_labels
示例#7
0
def analyse_KMeans():
    """
    Analyse and collect all the different results
    with respect to different KMeans

    Note that k is initalized as the number of classes in the test set otherwise
    it would not make sense to do classification at all
    """
    results = {}

    # Split and load the data
    all_data = load_data()

    query_data = all_data[1]
    gallery_data = all_data[2]

    query_labels = query_data[1]
    gallery_labels = gallery_data[1]

    query_features = query_data[0]
    gallery_features = gallery_data[0]

    query_camIds = query_data[2]
    gallery_camIds = gallery_data[2]

    error = 0
    labels = []

    selected_gallery_features = gallery_features
    selected_gallery_labels = gallery_labels

    print("Training classifier...")
    clf = KMeans(max_iter=100,
                 random_state=1,
                 n_clusters=len(set(selected_gallery_labels)),
                 verbose=True)
    cluster_centers = clf.fit_predict(selected_gallery_features,
                                      selected_gallery_labels)

    print("Testing classifier...")
    for i in tqdm(range(len(query_features))):
        query = query_features[i, :]
        query_label = query_labels[i]
        query_camId = query_camIds[i]

        predicted_cluster_center = clf.predict(query.reshape(1, -1))

        predicted_points = []
        predicted_labels = []
        predicted_distances = []

        for i in range(len(cluster_centers)):
            if cluster_centers[i] == predicted_cluster_center:
                predicted_points.append(selected_gallery_features[i])
                predicted_labels.append(selected_gallery_labels[i])

        for i in range(len(predicted_points)):
            distance = np.linalg.norm(
                clf.cluster_centers_[predicted_cluster_center, :].flatten() -
                predicted_points[i])
            predicted_distances.append(distance)

        predicted_distances, predicted_labels = zip(
            *sorted(zip(predicted_distances, predicted_labels)))

        b = Counter(predicted_labels)
        top_label = b.most_common(1)[0][0]
        labels.append(top_label)

        if top_label != query_label:
            error += 1

    error /= len(query_labels)
    print("Error {}".format(error))
    return [labels, error]
def analyse_KNN_PCA(k=10):
    """
    Analyse and collect all the different results
    with respect to different kNNs tests

    Parameters
    ----------
    k: int
        How many neighbeours should we consider

    Returns
    -------
    results: list of lists
        Measured results which are going to be later analysed
    true_labels: list
        True test labels
    """

    all_data = load_data(False)
    training_data = all_data[0]

    training_labels = training_data[1]
    training_features = training_data[0]
    training_camIds = training_data[2]

    query_data = all_data[1]
    gallery_data = all_data[2]

    query_labels = query_data[1]
    gallery_labels = gallery_data[1]

    query_features = query_data[0]
    gallery_features = gallery_data[0]

    query_camIds = query_data[2]
    gallery_camIds = gallery_data[2]

    errors = [0] * k
    labels = [None] * k
    tops = [0] * k

    query_features = normalize(query_features, axis=1)
    training_features = normalize(training_features, axis=1)
    gallery_features = normalize(gallery_features, axis=1)
    pca = KernelPCA(n_components=500, kernel="rbf", n_jobs=-1)
    pca.fit(training_features)

    query_features = pca.transform(query_features)
    training_features = pca.transform(training_features)
    gallery_features = pca.transform(gallery_features)

    for i in tqdm(range(len(query_features))):
        query = query_features[i, :]
        query_label = query_labels[i]
        query_camId = query_camIds[i]

        selected_gallery_features, selected_gallery_labels = select_features(
            gallery_camIds, query_camId, gallery_labels, query_label,
            gallery_features)

        clf = neighbors.KNeighborsClassifier(k, metric="euclidean")

        clf.fit(selected_gallery_features, selected_gallery_labels)
        distances, predicted_neighbors = clf.kneighbors(query.reshape(1, -1),
                                                        return_distance=True)
        predicted_labels = np.array([
            selected_gallery_labels[l] for l in predicted_neighbors
        ]).flatten()

        weighted_distances = weight(distances).flatten()

        for j in range(len(predicted_labels)):
            rank = predicted_labels[:j + 1]
            rank_weights = weighted_distances[:j + 1]
            label = vote(rank, rank_weights)

            if labels[j] is None:
                labels[j] = [label]
            else:
                labels[j].append(label)
            if query_label not in rank:
                tops[j] += 1

            if label != query_label:
                errors[j] += 1
    for i in range(len(errors)):
        errors[i] /= len(query_features)
        tops[i] /= len(query_features)

    return labels, errors, tops, query_labels
def analyse_KNN_NN(k=10):
    """
    Analyse and collect all the different results
    with respect to different kNNs tests

    Parameters
    ----------
    k: int
        How many neighbeours should we consider

    Returns
    -------
    results: list of lists
        Measured results which are going to be later analysed
    true_labels: list
        True test labels
    """

    all_data = load_data(False)

    query_data = all_data[1]
    gallery_data = all_data[2]

    query_labels = query_data[1]
    gallery_labels = gallery_data[1]

    query_features = query_data[0]
    gallery_features = gallery_data[0]

    query_camIds = query_data[2]
    gallery_camIds = gallery_data[2]

    errors = [0] * k
    labels = [None] * k
    tops = [0] * k

    query_features = normalize(query_features, axis=1)
    gallery_features = normalize(gallery_features, axis=1)

    MODEL = load_model()

    def metric(x, y):
        return MODEL.predict([x.reshape(1, -1), y.reshape(1, -1)], verbose=0)

    for i in tqdm(range(len(query_features))):
        query = query_features[i, :]
        query_label = query_labels[i]
        query_camId = query_camIds[i]

        selected_gallery_features, selected_gallery_labels = select_features(
            gallery_camIds, query_camId, gallery_labels, query_label,
            gallery_features)

        clf = neighbors.KNeighborsClassifier(k,
                                             metric=metric,
                                             algorithm="brute")
        clf.fit(selected_gallery_features, selected_gallery_labels)

        distances, predicted_neighbors = clf.kneighbors(query.reshape(1, -1),
                                                        return_distance=True)
        predicted_labels = np.array([
            selected_gallery_labels[l] for l in predicted_neighbors
        ]).flatten()

        weighted_distances = weight(distances).flatten()

        for j in range(len(predicted_labels)):
            rank = predicted_labels[:j + 1]
            rank_weights = weighted_distances[:j + 1]
            label = vote(rank, rank_weights)

            if labels[j] is None:
                labels[j] = [label]
            else:
                labels[j].append(label)
            if query_label not in rank:
                tops[j] += 1

            if label != query_label:
                errors[j] += 1
    for i in range(len(errors)):
        errors[i] /= len(query_features)
        tops[i] /= len(query_features)

    return labels, errors, tops, query_labels
def main():

    # load data
    mean, eigenvectors, eigenvalues, dataset = load_data()

    # Initialise EigenFace Class
    eigenface = EigenFace(copy.deepcopy(dataset),copy.deepcopy(eigenvectors[0]),mean)

    M = np.arange(0,401,5)

    '''

    ########################
    # RECONSTRUCTION ERROR #
    ########################

    # Obtain reconstruction error as function of M
    err = []
    run_time = []
    mem_consumption = []
    for m in M:
        eigenface.M = m
        start = time.time()
        err.append(eigenface.run_reconstruction())
        end = time.time()
        mem= get_process_memory()
        mem_consumption.append(mem)
        run_time.append(end-start)

    # Run Time
    fig, ax1 = plt.subplots()
    h1, = ax1.plot(M,run_time, label="Run Time")
    ax1.set_ylabel('Run Time (s)')
    ax1.set_xlabel('Number of Eigenvectors')
    # Memory Consumption
    ax2 = ax1.twinx()
    h2, = ax2.plot(M,mem_consumption,'r',label="Memory Consumption")
    ax2.set_ylabel('Memory Consumption (%)')
    plt.legend(handles=[h1, h2])
    fig.tight_layout()
    plt.title('Reconstruction Run Time & Memory Consumption')
    plt.savefig("results/q1-2/reconstruction_run_time_and_mem.png", format="png", transparent=True)
    plt.close()

    # Error
    plt.figure()
    plt.plot(M,err,label="Error")
    plt.ylabel('Error')
    plt.xlabel('Number of Eigenvectors')
    plt.title('Reconstruction Error')
    plt.legend()
    plt.savefig("results/q1-2/reconstruction_error.png", format="png", transparent=True)
    plt.close()

    # Run Time
    fig, ax1 = plt.subplots()
    h1, = ax1.plot(M,err, label="Reconstruction Error")
    ax1.set_ylabel('Error')
    ax1.set_xlabel('Number of Eigenvectors')

    # Memory Consumption
    ax2 = ax1.twinx()
    h2, = ax2.plot(M,eigenvalues[0][M],'r',label="Eigenvalue magnitude")
    ax2.set_ylabel('Eigenvalues')

    plt.legend(handles=[h1,h2])
    fig.tight_layout()
    plt.title('Reconstruction Error')
    plt.savefig("results/q1-2/reconstruction_err_eigenvalues.png", format="png", transparent=True)
    plt.close()
    """
    #############################
    # RECONSTRUCTION COMPARISON #
    #############################

    # Compair Image for different M
    plt.figure()
    f, (ax1, ax2, ax3, ax4) = plt.subplots(1, 4, sharey=True)
    f.suptitle('Comparison of Reconstructed Faces')
    face = copy.deepcopy(eigenface.train_faces[:,[20]])

    # Original Face
    face_tmp = face + mean
    img = face_tmp.reshape((46,56))
    img = np.rot90(img,3)
    ax1.imshow(img, cmap="gray")
    ax1.axis('off')
    ax1.set_title('Original')


    # Bad reconstruction
    eigenface.select_M_eigenvectors(5, plot=False)
    projected_face = copy.deepcopy(eigenface.project_to_face_space(face))
    reconstructed_face = copy.deepcopy(eigenface.reconstruction(projected_face))

    img = reconstructed_face.reshape((46,56))
    img = np.rot90(img,3)
    ax2.imshow(img, cmap="gray")
    ax2.axis('off')
    ax2.set_title('M=5')


    # Good reconstruction
    eigenface.select_M_eigenvectors(120, plot=False)
    projected_face = copy.deepcopy(eigenface.project_to_face_space(face))
    reconstructed_face = copy.deepcopy(eigenface.reconstruction(projected_face))

    img = reconstructed_face.reshape((46,56))
    img = np.rot90(img,3)
    ax3.imshow(img, cmap="gray")
    ax3.axis('off')
    ax3.set_title('M=120')

    # Good reconstruction
    eigenface.select_M_eigenvectors(400, plot=False)
    projected_face = copy.deepcopy(eigenface.project_to_face_space(face))
    reconstructed_face = copy.deepcopy(eigenface.reconstruction(projected_face))

    img = reconstructed_face.reshape((46,56))
    img = np.rot90(img,3)
    ax4.imshow(img, cmap="gray")
    ax4.axis('off')
    ax4.set_title('M=400')

    plt.savefig("results/q1-2/reconstruction_comparison.png", format="png", transparent=True)
    plt.close()

    ############
    # NN ERROR #
    ############

    err = []
    run_time = []
    mem_consumption = []

    for m in M:
        eigenface.M = m
        start = time.time()
        err.append(eigenface.run_nn_classifier()[0])
        end = time.time()
        mem= get_process_memory()
        mem_consumption.append(mem)
        run_time.append(end-start)

    # Run Time
    fig, ax1 = plt.subplots()
    h1, = ax1.plot(M,run_time, label="Run Time")
    ax1.set_ylabel('Run Time (s)')
    ax1.set_xlabel('Number of Eigenvectors')

    # Memory Consumption
    ax2 = ax1.twinx()
    h2, = ax2.plot(M,mem_consumption,'r', label="Memory Consumption")
    ax2.set_ylabel('Memory Consumption (%)')
    plt.legend(handles=[h1,h2])
    fig.tight_layout()
    plt.title('Nearest Neighbour Classifer Run Time & Memory Consumption')
    plt.savefig("results/q1-2/nn_run_time_and_mem.png", format="png", transparent=True)
    plt.close()
    # Error
    plt.figure()
    plt.plot(M,err, label="Error")
    plt.ylabel('Error (MSE)')
    plt.xlabel('Number of Eigenvectors')
    plt.title('Nearest Neighbour Classifer Error')
    plt.legend()
    plt.savefig("results/q1-2/nn_error.png", format="png", transparent=True)
    plt.close()
    #############################################
    # RECONSTRUCTION CLASSIFIER ERROR (FIXED M) #
    #############################################

    err = []
    run_time = []
    M = np.arange(1,8)
    for m in M:
        print(m)
        eigenface.M = m
        start = time.time()
        err.append(eigenface.run_reconstruction_classifier(FIXED_M=True)[0])
        end = time.time()
        run_time.append(end-start)

    # Run Time
    plt.figure()
    plt.plot(M,run_time,label="Run Time")
    plt.ylabel('Run Time (s)')
    plt.xlabel('Number of Eigenvectors')
    plt.title('Reconstruction Classifer Run Time')
    plt.legend()
    plt.savefig("results/q1-2/reconstruction_classifier_run_time.png", format="png", transparent=True)
    plt.close()

    # Error
    plt.figure()
    plt.plot(M,err,label="Error")
    plt.ylabel('Error (MSE)')
    plt.xlabel('Number of Eigenvectors')
    plt.title('Reconstruction Classifer Error')
    plt.legend()
    plt.savefig("results/q1-2/reconstruction_classifier_error.png", format="png", transparent=True)
    plt.close()


    ############################################
    # RECONSTRUCTION CLASSIFIER ERROR (CUTOFF) #
    ############################################

    err = []
    run_time = []
    mem_consumption = []

    err_cutoff = np.arange(1,500,20)
    for e in err_cutoff:
        start = time.time()
        err.append(eigenface.run_reconstruction_classifier(err_min=e)[0])
        end = time.time()
        mem= get_process_memory()
        mem_consumption.append(mem)
        run_time.append(end-start)

    # Run Time
    fig, ax1 = plt.subplots()
    h1, = ax1.plot(err_cutoff,run_time, label="Run Time")
    ax1.set_ylabel('Run Time (s)')
    ax1.set_xlabel('Number of Eigenvectors')

    # Memory Consumption
    ax2 = ax1.twinx()
    h2, = ax2.plot(err_cutoff,mem_consumption,'r', label="Memory Consumption")
    ax2.set_ylabel('Memory Consumption (%)')

    fig.tight_layout()
    plt.legend(handles=[h1,h2])
    plt.title('Reconstruction Classifier Run Time & Memory Consumption')
    plt.savefig("results/q1-2/reconstruction_classifier_run_time_and_mem.png", format="png", transparent=True)
    plt.close()
    # Error
    plt.figure()
    plt.plot(err_cutoff,err, label="Error")
    plt.ylabel('Error (MSE)')
    plt.xlabel('Cutoff for Class-wise Reconstruction Error')
    plt.title('Reconstruction Classifer Error')
    plt.legend()
    plt.savefig("results/q1-2/reconstruction_classifier_error.png", format="png", transparent=True)
    plt.close()

    #########################
    # CLASSIFIER COMPARISON #
    #########################

    """
    # Best, reconstruction classifier
    eigenface.M = 2
    err, y_pred = eigenface.run_reconstruction_classifier(err_min=20)
    plot_confusion_matrix(dataset[1][1], y_pred, "results/q1-2/reconstruction_classifier_cm",normalize=True)

    # Best, NN-PCA classifier
    eigenface.M = 100
    err, y_pred = eigenface.run_nn_classifier()
    plot_confusion_matrix(dataset[1][1], y_pred, "results/q1-2/nn_pca_classifier_cm",normalize=True)

    # find wrong classification
    err_index = 0

    for i in range(20,len(y_pred)):
        if not y_pred[i] == dataset[1][1][i]:
            err_index = i
            break

    for i in range(1,len(y_pred)):
        if y_pred[i] == dataset[1][1][i]:
            corr_index = i
            break

    correct_face = copy.deepcopy(dataset[1][0][:,[err_index]])
    index = eigenface.nn_classifier_index(eigenface.project_to_face_space(correct_face))
    wrong_face   = copy.deepcopy(dataset[0][0][:,[index]])

    correct_face_2 = copy.deepcopy(dataset[1][0][:,[corr_index]])
    index = eigenface.nn_classifier_index(eigenface.project_to_face_space(correct_face_2))
    corr_face   = copy.deepcopy(dataset[0][0][:,[index]])

    # plot both faces to compare
    plt.figure()
    f, ax = plt.subplots(2, 2, sharey=True)
    f.suptitle('PCA-NN wrong classification comparison')

    img = (correct_face).reshape((46,56))
    img = np.rot90(img,3)
    ax[0,0].imshow(img, cmap="gray")
    ax[0,0].axis('off')
    ax[0,0].set_title('Input Face')

    img = (wrong_face).reshape((46,56))
    img = np.rot90(img,3)
    ax[0,1].imshow(img, cmap="gray")
    ax[0,1].axis('off')
    ax[0,1].set_title('Wrong Prediction')

    img = (correct_face_2).reshape((46,56))
    img = np.rot90(img,3)
    ax[1,0].imshow(img, cmap="gray")
    ax[1,0].axis('off')
    ax[1,0].set_title('Input Face')

    img = (corr_face).reshape((46,56))
    img = np.rot90(img,3)
    ax[1,1].imshow(img, cmap="gray")
    ax[1,1].axis('off')
    ax[1,1].set_title('Correct Prediction')

    plt.savefig("results/q1-2/wrong_nn_classifier.png", format="png", transparent=True)
    plt.close()