示例#1
0
def test_knn():
    # file_path = 'trail_forest_results/results/trained_models/deepnn_subset_full_exc_001.h5'
    model = create_trail_model()
    # model = load_model(file_path)
    reset_weights(model)

    rep_layer = Model(inputs=model.input,
                      outputs=model.get_layer(index=7).output)
    print(rep_layer.summary())

    data_001 = DataSet(root_dir='/Users/jesusnavarro/Desktop/DataSet/',
                       train_subsets=[],
                       test_subsets=['/001/'],
                       )
    data2 = DataSet(root_dir='/Users/jesusnavarro/Desktop/DataSet/',
                    train_subsets=[],
                    test_subsets=['/002/'],
                    )

    # Shuffle Data
    train_df = data_001.test_set[2]
    random.shuffle(train_df)
    train_df, test_df = split_sequence(train_df, 90, 30)

    baseline = data2.test_set[2]

    knn_model = KModel(train_df, test_df, model=rep_layer, baseline=None)
    knn_model.train_full_model()

    return knn_model
示例#2
0
def save_cvs():
    model_exc_001 = load_model(
        '/Users/jesusnavarro/Desktop/trail_project/results/trained_models/deepnn_subset_001.h5.h5')

    model_exc_001_rep = Model(inputs=model_exc_001.input,
                              outputs=model_exc_001.get_layer(index=7).output)

    dataset_001 = DataSet(root_dir='/Users/jesusnavarro/Desktop/DataSet/',
                          train_subsets=[],
                          test_subsets=['/001/'])

    dataset_002 = DataSet(root_dir='/Users/jesusnavarro/Desktop/DataSet/',
                          train_subsets=[],
                          test_subsets=['/002/'])

    key = {str(np.array([1, 0, 0])): 'right',
           str(np.array([0, 1, 0])): 'center',
           str(np.array([0, 0, 1])): 'left'}

    csv_001_key = {'left': 0, 'center': 1, 'right': 2}
    csv_002_key = {'left': 3, 'center': 4, 'right': 5}

    x_001, y_001, paths_001 = dataset_001.test_set
    x_002, y_002, paths_002 = dataset_002.test_set

    int_001_output = model_exc_001_rep.predict(x_001)
    int_001_output = int_001_output.reshape(x_001.shape[0], - 1)

    int_002_output = model_exc_001_rep.predict(x_002)
    int_002_output = int_002_output.reshape(x_002.shape[0], - 1)

    csvData = []

    for int_out, paths, y in zip(int_001_output, paths_001, y_001):
        dir_class = key[str(y)]
        label = csv_001_key[dir_class]
        tmp_str = ''
        for val in int_out:
            tmp_str += str(val) + ','
        tmp_str = paths + '\t' + str(label) + '\t' + '[' + tmp_str[:-1] + ']' + '\n'
        csvData.append(tmp_str)

    for int_out, paths, y in zip(int_002_output, paths_002, y_002):
        dir_class = key[str(y)]
        label = csv_002_key[dir_class]
        tmp_str = ''
        for val in int_out:
            tmp_str += str(val) + ','
        tmp_str = paths + '\t' + str(label) + '\t' + '[' + tmp_str[:-1] + ']' + '\n'
        csvData.append(tmp_str)

    f = open('/Users/jesusnavarro/Desktop/trail_project/Pickledata/visualize_001_002_nn_exc_001.txt', 'w+')

    for line in csvData:
        print(line)
        f.write(line)
    f.close()
    return None
示例#3
0
    def train_model(self, model):
        """
        Train model and set data attributes

        The model and datasets (test, train, calibration) are saved to 'workspace.pickle'.
        Training data is randomized and training is done using .flow() where inputs are
        numpy arrays.

        """

        # Get dataframe of paths
        dataset = DataSet(self.root,
                          self.train_set,
                          self.test_set,
                          type='list')
        paths_dataframe = dataset.train_set[2]
        train_df, validation_df, calibration_df = split_data(paths_dataframe)

        # set new parameters for train and calibration_set
        test_df = dataset.test_set[2]
        random.shuffle(test_df)
        self.__test_set = test_df
        self.__train_set = train_df
        self.__validation_set = validation_df
        self.__calibration_set = calibration_df

        train_images, train_labels = read_img_io(train_df)

        # randomize the data
        train_images, train_labels = shuffle(train_images, train_labels)

        validation_images, validation_labels = read_img_io(validation_df)
        train_datagen = ImageDataGenerator(rescale=1,
                                           shear_range=0.2,
                                           zoom_range=0.2,
                                           rotation_range=18)

        valid_datagen = ImageDataGenerator()

        train_set = train_datagen.flow(train_images,
                                       train_labels,
                                       batch_size=32,
                                       shuffle=True)
        valid_set = valid_datagen.flow(validation_images,
                                       validation_labels,
                                       batch_size=32,
                                       shuffle=True)

        model.fit_generator(train_set,
                            steps_per_epoch=train_images.shape[0] // 32,
                            epochs=5,
                            validation_data=valid_set,
                            validation_steps=validation_images.shape[0] // 32)

        # save to current workspace
        with open('workspace.pickle', 'wb+') as handle:
            pickle.dump(
                [model, train_df, test_df, calibration_df, validation_df],
                handle)
        return model
def knn_increase_data():
    model = load_from_url()

    rep_layer = Model(inputs=model.input,
                      outputs=model.get_layer(index=7).output)
    data_002 = DataSet(root_dir='/Users/jesusnavarro/Desktop/DataSet/',
                       train_subsets=[],
                       test_subsets=['/002/'],
                       type='nl',
                       loc='gs')

    datasets = ['/001/', '/002/', '/003/', '/004/', '/005/', '/006/', '/007/', '/008/', '/009/', '/010/', '/011/']
    accuracy_list = {}

    for ds in datasets:
        print(ds)
        data_001 = DataSet(root_dir='/Users/jesusnavarro/Desktop/DataSet/',
                           train_subsets=[],
                           test_subsets=[ds],
                           type='nl',
                           loc='gs')

        accuracy_list[ds] = []

        data_array = data_001.test_set
        d2 = data_002.test_set

        list_to_randomize = []
        list_test = []

        for (x, y) in zip(data_array[0], data_array[1]):
            list_to_randomize.append([x, y])

        random.shuffle(list_to_randomize)
        n = 10
        batch_size = len(list_to_randomize) // n
        remainder = len(list_to_randomize) - batch_size * n
        print(batch_size)

        for (x, y) in zip(d2[0], d2[1]):
            list_to_randomize.append([x, y])

        # extract data to test (001 dataset up to batch size * n + remainder
        x_001_randarr = np.array([item[0] for item in list_to_randomize[0: n * batch_size + remainder - 1]])
        y_001_randarr = np.array([item[1] for item in list_to_randomize[0: n * batch_size + remainder - 1]])

        x_002_list = [item[0] for item in list_to_randomize[n * batch_size + remainder:]]  # used for ref. point
        y_002_list = [item[1] for item in list_to_randomize[n * batch_size + remainder:]]

        clf = KNeighborsClassifier()  # creat KNN object

        # train with dataset 2
        x_002_arr = np.array(x_002_list)
        int_output = rep_layer.predict(x_002_arr)
        int_output = int_output.reshape(x_002_arr.shape[0], -1)
        clf.fit(int_output, np.array(y_002_list))

        init_loss = knn_accuracy(clf, x_001_randarr, y_001_randarr, rep_layer)  # Test on 001
        accuracy_list[ds].append(init_loss)
        z = 1

        for i in range(10):
            print("Fitting on batch number:", z)

            x_test_list = [item[0] for item in list_to_randomize[0:(i + 1) * batch_size - 1 + remainder * (i // 9)]] + [
                item[0] for item in list_to_randomize[n * batch_size + remainder:]]
            y_test_list = [item[1] for item in list_to_randomize[0:(i + 1) * batch_size - 1 + remainder * (i // 9)]] + [
                item[1] for item in list_to_randomize[n * batch_size + remainder:]]

            x = np.array(x_test_list)
            y = np.array(y_test_list)

            print(x.shape, y.shape)

            int_output = rep_layer.predict(x)
            int_output = int_output.reshape(x.shape[0], -1)

            clf.fit(int_output, y)

            accuracy = knn_accuracy(clf, x_001_randarr, y_001_randarr, rep_layer)
            print(accuracy)
            accuracy_list[ds].append(accuracy)

            z += 1

    with open('gs://data-daisy/knn_test.pickle', 'wb+') as handle:
        pickle.dump(accuracy_list, handle)
    return accuracy_list
示例#5
0
        csvData.append(tmp_str)

    f = open('/Users/jesusnavarro/Desktop/trail_project/Pickledata/visualize_001_002_nn_exc_001.txt', 'w+')

    for line in csvData:
        print(line)
        f.write(line)
    f.close()
    return None


model_exc_001 = load_model(
    '/Users/jesusnavarro/Desktop/trail_project/results/trained_models/deepnn_subset_full_exc_001.h5')

dataset_001 = DataSet(root_dir='/Users/jesusnavarro/Desktop/DataSet/',
                      train_subsets=[],
                      test_subsets=['/001/'])

dataset_002 = DataSet(root_dir='/Users/jesusnavarro/Desktop/DataSet/',
                      train_subsets=[],
                      test_subsets=['/002/'])

key = {str(np.array([1, 0, 0])): 'right',
       str(np.array([0, 1, 0])): 'center',
       str(np.array([0, 0, 1])): 'left'}

csv_001_key = {'left': 0, 'center': 1, 'right': 2}
csv_002_key = {'left': 3, 'center': 4, 'right': 5}

x_001, y_001, paths_001 = dataset_001.test_set
x_002, y_002, paths_002 = dataset_002.test_set
                continue
            variables_to_restore.append(var)
        print(variables_to_restore)
        train_op = tf.train.AdamOptimizer(1e-4).minimize(
            loss, var_list=trainable_var_list)
        load_fn = slim.assign_from_checkpoint_fn("./ckpt/vgg_16.ckpt",
                                                 variables_to_restore)
        saver = tf.train.Saver(tf.global_variables())

    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        load_fn(sess)
        batch_size = 20
        train_steps = 1000
        epochs = 10
        trainset = DataSet("./data/train_data.txt")
        testset = DataSet("./data/test_data.txt")
        for epoch in range(epochs):
            for i in range(train_steps):
                batch_data, batch_labels = trainset.next_batch(batch_size)
                _, train_step_loss, train_step_acc = sess.run(
                    [train_op, loss, accuracy],
                    feed_dict={
                        x: batch_data,
                        y: batch_labels
                    })
                print("Loss: ", train_step_loss)
                print("Acc: ", train_step_acc)
            batch_test_data, batch_test_labels = testset.next_batch(100)
            test_step_loss, test_step_acc = sess.run([loss, accuracy],
                                                     feed_dict={
示例#7
0
import numpy as np
import tensorflow as tf
from core.distillation_model2 import Distillation_Model2
from core.dataset import DataSet
import os

os.environ["CODA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CODA_VISIBLE_DEVICES"] = "3"

learn_rate = 1e-7
ckpt_file = "./ckpt/train_model.ckpt"
trainset = DataSet('./data/train_data.txt')
testset = DataSet('./data/test_data.txt')

input_data = tf.placeholder(shape=[None, 224, 224, 3],
                            dtype=tf.float32,
                            name='input_data')
label = tf.placeholder(shape=[None, 5], dtype=tf.float32, name="label")

with tf.device("/gpu:0"):
    model = Distillation_Model2(input_data)
    y_ = model.fc10
    loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=label, logits=y_))
    correct_prediction = tf.equal(tf.argmax(label, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    train_op = tf.train.AdamOptimizer(learn_rate).minimize(loss)
    saver = tf.train.Saver(tf.global_variables())

with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
    init_op = tf.global_variables_initializer()
import numpy as np
from keras.applications.vgg16 import VGG16

from core.dataset import DataSet
from core.knn import reset_weights
from utils.data_processing import rep_layer_ouptut

vgg16_rep_layer = VGG16(weights=None, include_top=False, pooling='max', input_shape=(101, 101, 3))
reset_weights(vgg16_rep_layer)

data_001 = DataSet(root_dir='/Users/jesusnavarro/Desktop/DataSet/',
                   train_subsets=[],
                   test_subsets=['/001/'],
                   location='local')

data_002 = DataSet(root_dir='/Users/jesusnavarro/Desktop/DataSet/',
                   train_subsets=[],
                   test_subsets=['/002/'],
                   location='local')

print(vgg16_rep_layer.summary())

d1_df = data_001.test_set[2]
d2_df = data_002.test_set[2]

x1, y1 = rep_layer_ouptut(d1_df, vgg16_rep_layer)
x2, y2 = rep_layer_ouptut(d2_df, vgg16_rep_layer)

path = '/Users/jesusnavarro/Desktop/vgg16_tests/npz_data/vgg16_output_top_off_pooling_resized.npz'
np.savez(path, x1=x1, y1=y1, x2=x2, y2=y2)
print(vgg16_rep_layer.summary())