Exemplo n.º 1
0
def load_data(dataset='cifar10',
              num_training=50000,
              num_test=10000,
              normalize=True):
    print("Attempting to load dataset {} ...".format(dataset))
    X, Y, X_test, Y_test = None, None, None, None
    n_classes = 0
    if dataset == 'cifar10':
        X, Y, X_val, Y_val, X_test, Y_test = load_cifar(
            num_training=num_training,
            num_validation=0,
            num_test=num_test,
            dataset='cifar10',
            normalize=normalize)
    elif dataset == 'cifar100_coarse':
        X, Y, X_val, Y_val, X_test, Y_test = load_cifar(
            num_training=num_training,
            num_validation=0,
            num_test=num_test,
            dataset='cifar100',
            normalize=normalize)
        Y = Y[:, 1]
        Y_test = Y_test[:, 1]

    elif dataset == 'cifar100_fine':
        X, Y, X_val, Y_val, X_test, Y_test = load_cifar(
            num_training=num_training,
            num_validation=0,
            num_test=num_test,
            dataset='cifar100',
            normalize=normalize)
        Y = Y[:, 0]
        Y_test = Y_test[:, 0]
    elif dataset == 'cifar100_joint_fine_only':
        X_train_joint, y_train_joint = load_data_pyramid(
            dataset="cifar100_joint",
            return_subset='joint_only',
            normalize=normalize)
        all_X = X_train_joint
        all_Y = y_train_joint[:, 0]  # extract only FINE... no coarse
        all_X, all_Y = shuffle(all_X, all_Y)

        testSplitIndex = int(len(all_X) * 0.85)
        X = all_X[:testSplitIndex]
        Y = all_Y[:testSplitIndex]
        X_test = all_X[testSplitIndex:]
        Y_test = all_Y[testSplitIndex:]
    else:
        print("Dataset {} not found. ".format(dataset))
        sys.exit()
    n_classes = DATASET_TO_N_CLASSES[dataset]
    X, Y = shuffle(X, Y)
    Y = to_categorical(Y, n_classes)
    X_test, Y_test = shuffle(X_test, Y_test)
    Y_test = to_categorical(Y_test, n_classes)
    return X, Y, X_test, Y_test
Exemplo n.º 2
0
def get_data():
    x, y, test_x, test_y = mnist.load_data(one_hot=True)
    x, y = shuffle(x, y)
    test_x, test_y = shuffle(test_x, test_y)
    train_x = x[0:50000]
    train_y = y[0:50000]
    valid_x = x[50000:]
    valid_y = y[50000:]
    # make sure you reshape the training and testing
    # data as follows.
    train_x = train_x.reshape([-1, 28, 28, 1])
    test_x = test_x.reshape([-1, 28, 28, 1])
    return train_x, train_y, test_x, test_y, valid_x, valid_y
Exemplo n.º 3
0
    def tf_learn(self, modelName, folders, example_count, test_count):
        if (self.modelTrained):
            print("Model already trained!")
            return

        # (X, Y), (X_test, Y_test) = cifar10.load_data()
        (X, Y), (X_test, Y_test) = self.loadImages(folders, example_count,
                                                   test_count)

        X, Y = shuffle(X, Y)
        Y = to_categorical(Y, len(folders))
        Y_test = to_categorical(Y_test, len(folders))

        self.model.fit(X,
                       Y,
                       n_epoch=50,
                       shuffle=True,
                       validation_set=(X_test, Y_test),
                       show_metric=True,
                       batch_size=96,
                       run_id=modelName)

        self.model.save("model/" + modelName + ".tfl")

        self.modelTrained = True
        return None
Exemplo n.º 4
0
def get_cifar_10():
    label_name = unpickle('../data/CIFAR-10/batches.meta')['label_names']
    X, Y = [], []
    for i in range(1, 6):
        batch = unpickle('../data/CIFAR-10/data_batch_' + str(i))
        if i == 1:
            X = batch['data']
            Y = batch['labels']
        else:
            X = np.append(X, batch['data'], axis=0)
            Y = np.append(Y, batch['labels'], axis=0)
            #Y = Y + batch['labels']
    X, Y = shuffle(X, Y)
    test_batch = unpickle('../data/CIFAR-10/test_batch')
    X_test, Y_test = test_batch['data'], test_batch['labels']
    # Reshape X: (50000, 3072) -> (50000, 32, 32, 3)
    X = np.dstack((X[:, :1024], X[:, 1024:2048], X[:,
                                                   2048:]))  # (50000, 1024, 3)
    X = np.reshape(X, [-1, 32, 32, 3])  # (50000, 32, 32, 3)
    X_test = np.dstack((X_test[:, :1024], X_test[:, 1024:2048], X_test[:,
                                                                       2048:]))
    X_test = np.reshape(X_test, [-1, 32, 32, 3])
    # one-hot
    Y = to_categorical(Y, 10)
    Y_test = to_categorical(Y_test, 10)
    print('X:', X.shape)
    print('Y:', Y.shape)
    return label_name, X, Y, X_test, Y_test
Exemplo n.º 5
0
def get_images(path):

    images = np.ndarray(shape=(len(os.listdir(path)), 48, 48, 3),
                        dtype=np.float32)
    labels = list()
    count = -1
    if len(os.listdir(path)) == 0:
        print("Empty Dataset.......aborting Training")
        exit()
    for img in os.listdir(path):
        regex = re.compile(r'(\d+|\s+)')
        labl = regex.split(img)
        labl = labl[0]
        count = count + 1
        Make_Changes(labl)
        image_path = os.path.join(path, img)
        image = cv2.imread(image_path)
        try:
            image = cv2.resize(image, (48, 48), interpolation=cv2.INTER_CUBIC)
        except Exception as e:
            print("ye lo error", e)
            exit()

        if images[count, :, :].shape != image.shape:
            print(image.shape)
            count -= 1
            continue
        images[count, :, :] = image
        labels.append(Data_list.index(labl))  # one hot encoding here
    images, labels = shuffle(images, labels)
    return images, labels, count
Exemplo n.º 6
0
def neuralNetwork():
    from tensorflow.python.framework import ops
    ops.reset_default_graph()

    # Load the data set
    with open("dataset.pkl", "rb") as f:
        u = pickle._Unpickler(f)
        u.encoding = 'latin1'
        X, Y, X_test, Y_test = u.load()
        X = X.astype('float32')
        X_test = X_test.astype('float32')

    # Shuffle the data
    X, Y = shuffle(X, Y)

    model = re.getReseau()

    # Train it! We'll do 100 training passes and monitor it as it goes.
    model.fit(X,
              Y,
              n_epoch=settings.nb_epoch,
              shuffle=True,
              validation_set=(X_test, Y_test),
              show_metric=True,
              batch_size=settings.batch_size,
              snapshot_epoch=True)
    #run_id='dataviz-classifier')
    # Save model when training is complete to a file
    model.save("dataviz-classifier.tfl")
    print(model.evaluate(X, Y))
    print("Network trained and saved as dataviz-classifier.tfl!")
Exemplo n.º 7
0
def process_form_data(filename) :
    data = h5py.File(filename, 'r')
    output = h5py.File('forms_out.h5', 'w')

    test_image = output.create_dataset('test_image', (330, 3, 256, 256), dtype=np.uint8)
    train_image = output.create_dataset('train_image', (770, 3, 256, 256), dtype=np.uint8)
    test_label  = output.create_dataset('test_label', (330,11), dtype=np.int8)
    train_label  = output.create_dataset('train_label', (770,11), dtype=np.int8)

    image, labels = shuffle(data['image'], data['form'])

    onehot_labels = to_categorical(labels, 11)


    count = {}
    train_count = 0
    test_count = 0
    for i, l in enumerate(labels) :

        if l not in count :
            count[l] = 0

        if count[l] > 29 :
            train_image[train_count] = image[i]
            train_label[train_count] = onehot_labels[i]
            train_count += 1

        else :
            test_image[test_count] = image[i]
            test_label[test_count] = onehot_labels[i]
            test_count += 1

        count[l] += 1

    output.close()
Exemplo n.º 8
0
def train_pyramid_model(model_id='pyramid_cifar100',
                        dataset='cifar100_joint',
                        checkpoint_model_id=None):
    coarse_dim = 20
    fine_dim = 100
    X_train_joint, y_train_joint = load_data_pyramid(
        dataset=dataset, return_subset='joint_only')

    X_train_joint, y_train_joint = shuffle(X_train_joint, y_train_joint)
    y_train_fine, y_train_coarse = y_train_joint[:, 0], y_train_joint[:, 1]
    y_train_fine, y_train_coarse = to_categorical(
        y_train_fine, fine_dim), to_categorical(y_train_coarse, coarse_dim)

    y_train_joint = np.concatenate((y_train_coarse, y_train_fine), axis=1)

    model = load_model(model_id,
                       pyramid_output_dims=[coarse_dim, fine_dim],
                       is_training=True,
                       checkpoint_model_id=checkpoint_model_id)

    date_time_string = datetime.datetime.now().strftime("%m-%d-%Y_%H-%M-%S")
    run_id = "{}_{}".format(model_id, date_time_string)

    model.fit(X_train_joint,
              y_train_joint,
              n_epoch=50,
              shuffle=True,
              validation_set=0.1,
              show_metric=True,
              batch_size=128,
              run_id=run_id,
              snapshot_step=100)
Exemplo n.º 9
0
def get_data():
    # Data loading and preprocessing
    from tflearn.datasets import cifar10
    (X, Y), (X_test, Y_test) = cifar10.load_data()
    X, Y = shuffle(X, Y)
    Y = to_categorical(Y, 10)
    Y_test = to_categorical(Y_test, 10)
    return (X, Y), (X_test, Y_test)
Exemplo n.º 10
0
def main(_):
    print(FLAGS.buckets)
    print(FLAGS.checkpointDir)
    print(FLAGS.test_para)

    if tf.gfile.Exists(FLAGS.checkpointDir):
        tf.gfile.DeleteRecursively(FLAGS.checkpointDir)
    tf.gfile.MakeDirs(FLAGS.checkpointDir)

    dirname = os.path.join(FLAGS.buckets, "")
    (X, Y), (X_test, Y_test) = load_data(dirname)
    print("load data done")

    X, Y = shuffle(X, Y)
    Y = to_categorical(Y, 10)
    Y_test = to_categorical(Y_test, 10)

    # Real-time data preprocessing
    img_prep = ImagePreprocessing()
    img_prep.add_featurewise_zero_center()
    img_prep.add_featurewise_stdnorm()

    # Real-time data augmentation
    img_aug = ImageAugmentation()
    img_aug.add_random_flip_leftright()
    img_aug.add_random_rotation(max_angle=25.)

    # Convolutional network building
    network = input_data(shape=[None, 32, 32, 3],
                         data_preprocessing=img_prep,
                         data_augmentation=img_aug)
    network = conv_2d(network, 32, 3, activation='relu')
    network = max_pool_2d(network, 2)
    network = conv_2d(network, 64, 3, activation='relu')
    network = conv_2d(network, 64, 3, activation='relu')
    network = max_pool_2d(network, 2)
    network = fully_connected(network, 512, activation='relu')
    network = dropout(network, 0.5)
    network = fully_connected(network, 10, activation='softmax')
    network = regression(network,
                         optimizer='adam',
                         loss='categorical_crossentropy',
                         learning_rate=0.001)

    # Train using classifier
    model = tflearn.DNN(network, tensorboard_verbose=0)
    model.fit(X,
              Y,
              n_epoch=50,
              shuffle=True,
              validation_set=(X_test, Y_test),
              show_metric=True,
              batch_size=96,
              run_id='cifar10_cnn')
    model_path = os.path.join(FLAGS.checkpointDir, "model.tfl")
    print(model_path)
    model.save(model_path)
Exemplo n.º 11
0
def get_data(data_dir, hdf5):
    """This function loads in the data, either by loading images on the fly or by creating and
    loading from a hdf5 database.

    Args:
        data_dir: Root directory of the dataset.
        hdf5: Boolean. If true, (create and) load data from a hdf5 database.

    Returns:
        X: training images.
        Y: training labels.
        X_test: validation images.
        Y_test: validation labels."""

    # Get the filenames of the lists containing image paths and labels.
    train_file, val_file = build_dataset_index(data_dir)

    # Check if (creating and) loading from hdf5 database is desired.
    if hdf5:
        # Create folder to store dataset.
        if not os.path.exists('hdf5'):
            os.makedirs('hdf5')
        # Check if hdf5 databases already exist and create them if not.
        if not os.path.exists('hdf5/tiny-imagenet_train.h5'):
            from tflearn.data_utils import build_hdf5_image_dataset
            print ' Creating hdf5 train dataset.'
            build_hdf5_image_dataset(train_file, image_shape=(64, 64), mode='file',
                                     output_path='hdf5/tiny-imagenet_train.h5', categorical_labels=True, normalize=True)

        if not os.path.exists('hdf5/tiny-imagenet_val.h5'):
            from tflearn.data_utils import build_hdf5_image_dataset
            print ' Creating hdf5 val dataset.'
            build_hdf5_image_dataset(val_file, image_shape=(64, 64), mode='file',
                                     output_path='hdf5/tiny-imagenet_val.h5', categorical_labels=True, normalize=True)

        # Load training data from hdf5 dataset.
        h5f = h5py.File('hdf5/tiny-imagenet_train.h5', 'r')
        X = h5f['X']
        Y = h5f['Y']

        # Load validation data.
        h5f = h5py.File('hdf5/tiny-imagenet_val.h5', 'r')
        X_test = h5f['X']
        Y_test = h5f['Y']

        # Load images directly from disk when they are required.
    else:
        from tflearn.data_utils import image_preloader
        X, Y = image_preloader(train_file, image_shape=(64, 64), mode='file', categorical_labels=True, normalize=True,
                               filter_channel=True)
        X_test, Y_test = image_preloader(val_file, image_shape=(64, 64), mode='file', categorical_labels=True,
                                         normalize=True, filter_channel=True)

    # Randomly shuffle the dataset.
    X, Y = shuffle(X, Y)

    return X, Y, X_test, Y_test
    def sentiment_analysis(self, sentencedata):

        file_path = 'Cleaned-Masita corpus 2.csv'
        data, labels = load_csv(file_path,
                                target_column=0,
                                categorical_labels=True,
                                n_classes=2)

        pdata = self.preprocess_server(data)
        unique_words = self.get_uniquewords(pdata)
        data = self.preprocess_vector(pdata, unique_words)

        neurons = len(data[0])

        # shuffle the dataset
        data, labels = shuffle(data, labels)

        reset_default_graph()
        network = input_data(shape=[None, neurons])
        network = fully_connected(network, 8, activation='relu')
        network = fully_connected(network, 8 * 2, activation='relu')
        network = fully_connected(network, 8, activation='relu')
        network = dropout(network, 0.5)

        network = fully_connected(network, 2, activation='softmax')
        network = regression(network,
                             optimizer='adam',
                             learning_rate=0.01,
                             loss='categorical_crossentropy')

        model = tflearn.DNN(network)
        #model.fit(data, labels, n_epoch=40, shuffle=True, validation_set=None , show_metric=True, batch_size=None, snapshot_epoch=True, run_id='task-classifier')
        #model.save("./model/thaitext-classifier-mashita.tfl")
        #print("Network trained and saved as thaitext-classifier-mashita.tfl")

        model.load("./model/thaitext-classifier-mashita.tfl")
        #file_path3 = 'Cleaned-Masita-traindataset-2.csv'

        input_sentencedata = self.preprocess_server(sentencedata)

        vector_one = []
        for word in unique_words:
            if word in input_sentencedata:
                vector_one.append(1)
            else:
                vector_one.append(0)

        vector_one = np.array(vector_one, dtype=np.float32)

        label = model.predict_label([vector_one])
        #print (label)

        pred = model.predict([vector_one])
        #print(pred)
        return pred
Exemplo n.º 13
0
def load_cifar10_dataset(data_dir=None):
    from tflearn.datasets import cifar10
    from tflearn.data_utils import to_categorical

    HEIGHT = 32
    WIDTH = 32
    CHANNELS = 3
    CLASSES = 10

    (X, Y), (Xv, Yv) = cifar10.load_data(dirname=data_dir, one_hot=True)
    X, Y = shuffle(X, Y)
    Xv, Yv = shuffle(Xv, Yv)

    Xt = Xv[2000:]
    Yt = Yv[2000:]

    Xv = Xv[:2000]
    Yv = Yv[:2000]

    return CLASSES, X, Y, HEIGHT, WIDTH, CHANNELS, Xv, Yv, Xt, Yt
Exemplo n.º 14
0
def main(data_dir, hdf5, name):
    batch_size = 256
    num_epochs = 10
    learning_rate = 0.001
    X, Y, X_test, Y_test = get_data(data_dir, hdf5)
    X, Y = shuffle(X, Y)
    img_prep = ImagePreprocessing()
    img_prep.add_featurewise_zero_center()
    img_prep.add_featurewise_stdnorm()
    img_aug = ImageAugmentation()
    img_aug.add_random_flip_leftright()
    img_aug.add_random_rotation(max_angle=25.)
    img_aug.add_random_blur(sigma_max=3.)
    network = input_data(shape=[None, 32, 32, 3],
                         data_preprocessing=img_prep,
                         data_augmentation=img_aug)
    # Step 1: Convolution
    network = conv_2d(network, 32, 3, activation='relu')
    # Step 2: Max pooling
    network = max_pool_2d(network, 2)
    # Step 3: Convolution
    network = conv_2d(network, 64, 3, activation='relu')
    # Step 4: Convolution
    network = conv_2d(network, 64, 3, activation='relu')
    # Step 5: Max pooling
    network = max_pool_2d(network, 2)
    # Step 6: Fully-connected 512 node neural network
    network = fully_connected(network, 512, activation='relu')
    # Step 7: Dropout - throw away some data randomly during training to prevent over-fitting
    network = dropout(network, 0.5)
    # Step 8: Fully-connected neural network with two outputs (0=isn't a bird, 1=is a bird) to make the final prediction
    network = fully_connected(network, 2, activation='softmax')
    # Tell tflearn how we want to train the network
    network = regression(network,
                         optimizer='adam',
                         loss='categorical_crossentropy',
                         learning_rate=0.001)
    # Wrap the network in a model object
    model = tflearn.DNN(network,
                        tensorboard_verbose=0,
                        checkpoint_path='bird-classifier.tfl.ckpt')
    # Train it! We'll do 100 training passes and monitor it as it goes.
    model.fit(X,
              Y,
              n_epoch=100,
              shuffle=True,
              validation_set=(X_test, Y_test),
              show_metric=True,
              batch_size=96,
              snapshot_epoch=True,
              run_id='bird-classifier')
    # Save model when training is complete to a file
    model.save("bird-classifier.tfl")
    print("Network trained and saved as bird-classifier.tfl!")
Exemplo n.º 15
0
def load_mnist_dataset(data_dir=None):
    import tflearn.datasets.mnist as mnist

    HEIGHT = 28
    WIDTH = 28
    CHANNELS = 1
    CLASSES = 10

    X, Y, Xv, Yv = mnist.load_data(data_dir=data_dir, one_hot=True)
    X, Y = shuffle(X, Y)
    Xv, Yv = shuffle(Xv, Yv)
    X = X.reshape([-1, 28, 28, 1])
    Xv = Xv.reshape([-1, 28, 28, 1])

    Xt = Xv[2000:]
    Yt = Yv[2000:]

    Xv = Xv[:2000]
    Yv = Yv[:2000]

    return CLASSES, X, Y, HEIGHT, WIDTH, CHANNELS, Xv, Yv, Xt, Yt
def train_val(sess, x_train, y_train, x_val, y_val, epochs):
    x_train, y_train = shuffle(x_train, y_train)
    train_batch_size = 256
    val_batch_size = 128
    train_total_batch = int(len(x_train) / train_batch_size)
    total_val_batch = int(len(x_val) / val_batch_size)
    for epoch in range(epochs):
        print('epoche:{0} training start'.format(epoch))
        epoch_time = time.time()
        train(sess, x_train, y_train, train_total_batch, train_batch_size)
        val(sess, x_val, y_val, total_val_batch, val_batch_size)
        print('Epoch:{0} , time:{1} seconds'.format(epoch,
                                                    time.time() - epoch_time))
Exemplo n.º 17
0
 def prepare_learn_data(self):
     X = []
     Y = []
     for image, manual, mask in self.zip_data():
         possible_points = LearnData.get_possible_points(mask)
         max_index = len(possible_points) - 1
         for i in range(0, PHOTO_SAMPLES):
             result, sample = self.get_sample(image, manual, max_index,
                                              possible_points)
             X.append(sample)
             Y.append(result)
     X = LearnData.normalize(X)
     Y = LearnData.normalize(Y)
     return shuffle(X, Y)
Exemplo n.º 18
0
    def sentiment_analysis(self, sentencedata):

        file_path = './corpus/Combined_inhousedata_UTF8-2.csv'
        data, labels = load_csv(file_path,
                                target_column=0,
                                categorical_labels=True,
                                n_classes=2)

        pdata = self.preprocess_server(data)
        unique_words = self.get_uniquewords(pdata)
        data = self.preprocess_vector(pdata, unique_words)

        neurons = len(unique_words)

        # shuffle the dataset
        data, labels = shuffle(data, labels)

        reset_default_graph()
        network = input_data(shape=[None, neurons])
        network = fully_connected(network, 8, activation='relu')
        network = fully_connected(network, 8 * 2, activation='relu')
        network = fully_connected(network, 8, activation='relu')
        network = dropout(network, 0.5)

        network = fully_connected(network, 2, activation='softmax')
        network = regression(network,
                             optimizer='adam',
                             learning_rate=0.01,
                             loss='categorical_crossentropy')

        model = tflearn.DNN(network)
        model.load("./model/thaitext-classifier-CID_UTF8-burgerking-2.tfl")

        input_sentencedata = self.preprocess_server_2(sentencedata)
        #input_uniquewords = self.get_uniquewords(input_sentencedata)

        vector_one = []
        for word in unique_words:
            if word in input_sentencedata:
                vector_one.append(1)
            else:
                vector_one.append(0)
        vector_one = np.array(vector_one, dtype=np.float32)
        #print(vector_one)

        label = model.predict_label([vector_one])
        pred = model.predict([vector_one])

        return pred
Exemplo n.º 19
0
def load_data():
    # Load the data set
    hotdog = pickle.load(open("hotdog.pickle", "rb"))
    X = hotdog['X']
    #Y = [ 0 if x != 1 else 1 for x in hotdog['Y'] ]
    Y = hotdog['Y']
    X_test = hotdog['X_test']
    #Y_test = [ 0 if x != 1 else 1 for x in hotdog['Y_test'] ]
    Y_test = hotdog['Y_test']

    # Shuffle the data
    X, Y = shuffle(X, Y)
    Y = tflearn.data_utils.to_categorical(Y, nb_classes=101)
    Y_test = tflearn.data_utils.to_categorical(Y_test, nb_classes=101)
    return X, Y, X_test, Y_test
    def sentiment_analysis(self, sentencedata):
        
        file_path = './corpus/BurgerKing_UTF8.csv'
        data, labels = load_csv(file_path, target_column=0, categorical_labels=True, n_classes=2)

        pdata =self.preprocess_server(data)
        unique_words = self.get_uniquewords(pdata)
        data = self.preprocess_vector(pdata, unique_words)

        

        neurons = len(data[0])

        # shuffle the dataset
        data, labels = shuffle(data, labels)
Exemplo n.º 21
0
def tflearn_cifar():
    """
    图像分类
    :return:
    """

    (X_train, Y_train), (X_test, Y_test) = cifar10.load_data()
    X_train, Y_train = shuffle(X_train, Y_train)
    Y_train = to_categorical(Y_train, nb_classes=10)
    Y_test = to_categorical(Y_test, nb_classes=10)

    # 对数据集进行零中心化(即对整个数据集计算平均值),同时进行 STD 标准化(即对整个数据集计算标准差)
    img_prep = ImagePreprocessing()
    img_prep.add_featurewise_zero_center()
    img_prep.add_featurewise_stdnorm()

    # 通过随机左右翻转和随机旋转来增强数据集
    img_aug = ImageAugmentation()
    img_aug.add_random_flip_leftright()
    img_aug.add_random_rotation(max_angle=25.)

    # 定义模型
    network = input_data(shape=(None, 32, 32, 3),
                         data_preprocessing=img_prep,
                         data_augmentation=img_aug)
    network = conv_2d(network, 32, 3, activation="relu")
    network = max_pool_2d(network, 2)
    network = conv_2d(network, 64, 3, activation="relu")
    network = conv_2d(network, 64, 3, activation="relu")
    network = max_pool_2d(network, 2)
    network = fully_connected(network, 512, activation="relu")
    network = dropout(network, 0.5)
    network = fully_connected(network, 10, activation="softmax")
    network = regression(network,
                         optimizer="adam",
                         loss="categorical_crossentropy",
                         learning_rate=0.001)

    # 训练模型
    model = DNN(network, tensorboard_verbose=0)
    model.fit(X_train,
              Y_train,
              n_epoch=50,
              shuffle=True,
              validation_set=(X_test, Y_test),
              show_metric=True,
              batch_size=96,
              run_id="cifar10_cnn")
Exemplo n.º 22
0
def get_data(data_dir, hdf5):
    train_file, val_file = build_dataset_index(data_dir)

    if hdf5:
        if not os.path.exists('hdf5'):
            os.makedirs('hdf5')
        if not os.path.exists('hdf5/tiny-imagenet_train.h5'):
            from tflearn.data_utils import build_hdf5_image_dataset
            build_hdf5_image_dataset(train_file,
                                     image_shape=(64, 64),
                                     mode='file',
                                     output_path='hdf5/tiny-imagenet_train.h5',
                                     categorical_labels=True,
                                     normalize=True)
        if not os.path.exists('hdf5/tiny-imagenet_val.h5'):
            from tflearn.data_utils import build_hdf5_image_dataset
            build_hdf5_image_dataset(val_file,
                                     image_shape=(64, 64),
                                     mode='file',
                                     output_path='hdf5/tiny-imagenet_val.h5',
                                     categorical_labels=True,
                                     normalize=True)

        h5f = h5py.File('hdf5/tiny-imagenet_train.h5', 'r')
        X = h5f['X']
        Y = h5f['Y']

        h5f = h5py.File('hdf5/tiny-imagenet_val.h5', 'r')
        X_test = h5f['X']
        Y_test = h5f['Y']
    else:
        from tflearn.data_utils import image_preloader
        X, Y = image_preloader(train_file,
                               image_shape=(64, 64),
                               mode='file',
                               categorical_labels=True,
                               normalize=True,
                               filter_channel=True)
        X_test, Y_test = image_preloader(val_file,
                                         image_shape=(64, 64),
                                         mode='file',
                                         categorical_labels=True,
                                         normalize=True,
                                         filter_channel=True)

    # Randomly shuffle the dataset.
    X, Y = shuffle(X, Y)
    return X, Y, X_test, Y_test
Exemplo n.º 23
0
def train():
	# Data loading and preprocessing
	(X, Y), (X_test, Y_test) = cifar10.load_data()
	X, Y = shuffle(X, Y)
	Y = to_categorical(Y, 10)
	Y_test = to_categorical(Y_test, 10)

	# Train using classifier
	model = tflearn.DNN(initialise_model.create_network('adam'), tensorboard_verbose=0, checkpoint_path='cifar10.tfl.ckpt')

	#train the algorithm and take checkpoints every epoch
	model.fit(X, Y, n_epoch=50, shuffle=True, validation_set=(X_test, Y_test), snapshot_epoch=True,
	          show_metric=True, batch_size=122, run_id='cifar10_cnn')

	#export the model
	model.save('cifar.tflearn')
Exemplo n.º 24
0
def load_data_internal(dirname, dtype):
    print ''

    all_data = []
    all_labels = []

    if dtype != 'train' and dtype != 'test':
        print 'ERROR: data type can only be train | test, got', dtype
        exit(-1)

    data_files = []
    for root, dirs, files in os.walk(dirname):
        if files:
            for ff in files:
                if dtype == 'train':
                    if re.search(r'train_MRI', ff) != None:
                        data_files.append(os.path.join(root, ff))
                else:
                    if re.search(r'test_MRI', ff) != None:
                        data_files.append(os.path.join(root, ff))

    is_first_file = True
    for item in data_files:
        print 'Reading ', item
        data, labels = load_batch(item)
        if is_first_file:
            all_data = data
            all_labels = labels
            is_first_file = False
        else:
            all_data = np.concatenate((all_data, data), axis=0)
            all_labels = np.concatenate((all_labels, labels), axis=0)

    if len(all_data) != len(all_labels):
        print 'ERROR: data and label length mismatch, exit.'
        exit(-1)
    print 'Done read. Total number of elements: ', len(all_data)

    all_data = all_data / 255.0

    # shuffle before return
    all_data, all_labels = shuffle(all_data, all_labels)

    print 'Number of data [%s]: %d' % (dtype, len(all_data))
    print ''

    return all_data, all_labels
Exemplo n.º 25
0
 def nextfile(self):
     if(self.now_read_file_pos + 1 <= self.file_num):
         self.data = h5py.File(self.dataset[self.now_read_file_pos], 'r')
         self.X_data = self.data['X'];
         self.Y_data = self.data['Y'];
         self.datanum = self.X_data.shape[0];
         self.total_datanum = self.total_datanum + self.datanum;
         self.Y_data = move_zero_label(self.Y_data, self.Y_data.shape[0], self.Y_data.shape[1]);
         if(self.shuffle):
         	self.X_data, self.Y_data = shuffle(self.X_data, self.Y_data)
         self.batch_num = int(self.datanum / self.batch_size);
         self.tem_batch_pos = 0;
         print('Read data file: ' + self.dataset[self.now_read_file_pos]);
         self.now_read_file_pos = self.now_read_file_pos + 1;
         return True;
     else:
         return False;
Exemplo n.º 26
0
    def fit(cls, n_epoch=50, batch_size=96, image_size=(64, 64)):
        """ Learn face images from databases.
        use images at reception_robot.person.face_imgs
        Returns:
        """
        (X, Y), (X_test, Y_test) = Person.train_test_images_ids(
            image_size=image_size)
        print(X.shape, Y.shape, X_test.shape, Y_test.shape)
        person_ids = np.sort(list(set(Y))).tolist()
        print("person_ids: {}".format(person_ids))
        person_num = len(person_ids)
        print("label_num: {}".format(person_num))
        X, Y = shuffle(X, Y)
        Y = cls.to_categorical(Y, person_ids)
        Y_test = cls.to_categorical(Y_test, person_ids)

        face_recognizer_search_query = {
            'n_epoch': n_epoch,
            'batch_size': batch_size,
            'image_size': image_size,
            'person_ids': person_ids,
        }
        # face recognizer
        face_recognizer = cls.objects(**face_recognizer_search_query).first()
        if face_recognizer is None:
            face_recognizer = cls(**face_recognizer_search_query,
                                  person_num=person_num)

        network = face_recognizer.generate_network()
        # Train using classifier
        run_id = 'face_cnn'
        model = tflearn.DNN(network, tensorboard_verbose=3,
                            tensorboard_dir='tensorboard_log',
                            checkpoint_path='checkpoints/{}'.format(run_id))
        model.fit(X, Y, n_epoch=n_epoch, shuffle=True,
                      validation_set=(X_test, Y_test),
                      show_metric=True, batch_size=batch_size, run_id=run_id)
        import os
        try:
            os.makedirs('models')
        except Exception as e:
            print(e)
        model.save(face_recognizer.generate_filename())
        face_recognizer.save()
Exemplo n.º 27
0
def trainArtToPrimaryTypeModel(artPath, jsonPath, testProp, numEpochs=50):
  '''
  Trains a convolutional network to categorize card art by primary type
  Inputs:
    artPath: path to card art
    jsonPath: path to card data json file
    testProp: proportion of samples to be used for test/validation
    numEpochs: number of epochs to train for (50)
  '''
  (X, Y), (X_Test, Y_Test), numCategories = turnPicsToSimpleInputs(artPath,
                                                                    jsonPath,
                                                                    testProp=testProp)
  X, Y = shuffle(X, Y)
  Y = to_categorical(Y, numCategories)
  Y_Test = to_categorical(Y_Test, numCategories)

  # Train model as classifier
  model = artToMainTypeModel(numCategories)
  model.fit(X, Y, n_epoch=numEpochs, shuffle=True, validation_set=(X_Test, Y_Test),
              show_metric=True, batch_size=100, run_id='mtg_classifier')
Exemplo n.º 28
0
def build_corpus():
    v2i, _ = build_vocab()
    vocab_size = len(v2i)
    questions = df['question'].values.tolist()
    questions = [q.split() for q in questions]
    questions = [[v2i[vocab] for vocab in ques if vocab in v2i] for ques in questions]
    sentence_size = max([len(ques) for ques in questions])
    corpus = pad_sequences(questions, maxlen=sentence_size, value=0)

    l2i, _ = build_label()
    labels = df['is_business'].values.tolist()
    labels = [l2i[label] for label in labels if label in l2i]

    corpus, labels = shuffle(corpus, labels)
    corpus_num = len(corpus)
    valid_portion = 0.1
    train = (corpus[0:int((1 - valid_portion) * corpus_num)], labels[0:int((1 - valid_portion) * corpus_num)])
    test = (corpus[int((1 - valid_portion) * corpus_num) + 1:], labels[int((1 - valid_portion) * corpus_num) + 1:])
    valid = test
    return train, test, valid, sentence_size, vocab_size
Exemplo n.º 29
0
    def load_data(self, ds):
        _ds = None
        if ds['name'] == 'mnist':
            from tflearn.datasets import mnist as _ds
            self._X, self._Y, self._test_X, self._test_Y = _ds.load_data(
                one_hot=ds.get('one_hot', False))

        if ds['name'] == 'cifar10':
            from tflearn.datasets import cifar10 as _ds
            (self._X, self._Y), (self._test_X, self._test_Y) = _ds.load_data(
                one_hot=ds.get('one_hot', False))
        from tflearn.data_utils import shuffle, to_categorical
        del _ds  # discard
        if 'reshape' in ds: self.reshape(ds['reshape'])
        if ds.get('shuffle', False):
            self._X, self._Y = shuffle(self._X, self._Y)

        if ds.get('to_categorical', False):
            self._Y = to_categorical(self._Y, None)
            self._test_Y = to_categorical(self._test_Y, None)
        return self
Exemplo n.º 30
0
def train_val(sess, x_train, y_train, x_val, y_val, epochs):
    x_train, y_train = shuffle(x_train, y_train)
    train_batch_size = 32
    val_batch_size = 64
    train_total_batch = int(len(x_train) / train_batch_size)
    total_val_batch = int(len(x_val) / val_batch_size)
    validation_accuracy_buffer = []
    for epoch in range(epochs):
        print('epoche:{0} training start'.format(epoch))
        epoch_time = time.time()
        train(sess, x_train, y_train, train_total_batch, train_batch_size)
        val_accuracy=val(sess, x_val, y_val, total_val_batch, val_batch_size)
        print('Epoch:{0} , time:{1} seconds'.format(epoch, time.time() - epoch_time))
        if epoch > 10:
            validation_accuracy_buffer.append(val_accuracy)
            if len(validation_accuracy_buffer) > 10:
                index_of_max_val_acc = np.argmax(validation_accuracy_buffer)
                if index_of_max_val_acc == 0:
                    break
                else:
                    del validation_accuracy_buffer[0]
Exemplo n.º 31
0
def shuffleAndSplitData(images, labels):
    images, labels = shuffle(images, labels)
    numImages = len(images)
    numTrain = math.floor(numImages * 0.7)
    numTest = math.floor(numImages * 0.2)
    trainImages = images[:numTrain]
    trainLabels = labels[:numTrain]
    testImages = images[numTrain:numTrain + numTest]
    testLabels = labels[numTrain:numTrain + numTest]
    validImages = images[numTrain + numTest:]
    validLabels = labels[numTrain + numTest:]

    test = cv2.imread('./test.jpg')
    test = format_image(test)
    print(test)
    trainImages = np.concatenate((trainImages, [test]))
    expression_arr = [0 for _ in range(8)]
    expression_arr[6] += 1
    trainLabels = np.concatenate((trainLabels, [expression_arr]))

    test2 = cv2.imread('./test2.jpg')
    test2 = format_image(test2)
    trainImages = np.concatenate((trainImages, [test2]))
    expression_arr = [0 for _ in range(8)]
    expression_arr[5] += 1
    trainLabels = np.concatenate((trainLabels, [expression_arr]))

    test3 = cv2.imread('./test3.jpg')
    test3 = format_image(test3)
    trainimages = np.concatenate((trainImages, [test3]))
    expression_arr = [0 for _ in range(8)]
    expression_arr[1] += 1
    trainLabels = np.concatenate((trainLabels, [expression_arr]))

    trainImages = np.array(trainImages)
    testImages = np.array(testImages)
    validImages = np.array(validImages)

    return (trainImages, trainLabels, testImages, testLabels, validImages,
            validLabels)
Exemplo n.º 32
0
def extract_data(filename):
    """Extract the images into a 4D tensor [image index, y, x, channels].


        """
    print('Extracting', filename)
    # get data from h5py
    file = h5py.File(filename, 'r')
    train_data = file['train_data'].value
    train_label = file['train_label']
    test_data = file['test_data'].value
    test_label = file['test_label']
    train_label = np.int64(train_label)
    test_label = np.int64(test_label)
    train_num = train_data.shape[0]
    test_num = test_data.shape[0]

    max, min = train_data.max(), train_data.min()
    train_data_new = (train_data - min) / (max - min)
    train_data_out = np.zeros([
        train_data.shape[0], train_data.shape[3], train_data.shape[1],
        train_data.shape[2], 1
    ])
    for i in range(train_data.shape[3]):
        train_data_out[:, i, :, :, :] = train_data_new[:, :, :, i]

    max, min = test_data.max(), test_data.min()
    test_data_new = (test_data - min) / (max - min)
    test_data_out = np.zeros([
        test_data.shape[0], test_data.shape[3], test_data.shape[1],
        test_data.shape[2], 1
    ])
    for i in range(test_data.shape[3]):
        test_data_out[:, i, :, :, :] = test_data_new[:, :, :, i]

    train_data_out, train_label = shuffle(train_data_out, train_label)
    train_label = to_categorical(train_label, 20)
    test_label = to_categorical(test_label, 20)

    return train_data_out, train_label, test_data_out, test_label
Exemplo n.º 33
0
    def fit(self):
        """ Learn face images from databases.

        use images at reception_robot.person.face_imgs
        Returns:

        """
        (X, Y), (X_test, Y_test) = self.generate_learning_data(
            image_size, is_test=bool(self.is_test))
        X, Y = shuffle(X, Y)

        self.load_model()
        print('Start learning.')
        self.model.fit(
            X, Y,
            n_epoch=self.n_epoch, shuffle=True,
            validation_set=(X_test, Y_test), show_metric=True,
            batch_size=self.batch_size, run_id=self.run_id)
        try:
            os.makedirs('models')
        except Exception as e:
            print(e)
        self.model.save(detector.generate_filename())
        rowY[0] = 1
    elif(row[2] == 'Y'):
        rowY[1] = 1
    elif(row[3] == 'Y'):
        rowY[2] = 1
    elif(row[4] == 'Y'):
        rowY[3] = 1
    Y.append(rowY)


# Read the images
X = []
for filename in filenames:
    image = misc.imread(constants.IMAGE_64_PATH + '/' + filename, mode='L')
    X.append(image)

X = (numpy.array(X) / 256.0)
X = remove_zero(X)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33, random_state=42)

X_train, y_train = shuffle(X_train, y_train)

model = build_model_specific();
model.fit(X_train, y_train, n_epoch=100, shuffle=True, validation_set=(X_test, y_test),
          show_metric=True, batch_size=25, run_id='specific_cnn')
model.save(constants.TFLEARN_SPECIFIC_FILENAME)

print_results(X, Y, model)
Exemplo n.º 35
0
from __future__ import division, print_function, absolute_import

import tflearn
from tflearn.data_utils import shuffle, to_categorical
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.estimator import regression
from tflearn.data_preprocessing import ImagePreprocessing
from tflearn.data_augmentation import ImageAugmentation

# Data loading and pre processing
from tflearn.datasets import cifar10

(X,Y), (X_test, Y_test) = cifar10.load_data()
X, Y = shuffle(X,Y)
Y = to_categorical(Y, 10)
Y_test = to_categorical(Y_test, 10)

# Data preprocessing
img_prep = ImagePreprocessing()
img_prep.add_featurewise_zero_center()
img_prep.add_featurewise_stdnorm()

# Data augmentation
img_aug = ImageAugmentation()
img_aug.add_random_flip_leftright()
img_aug.add_random_rotation()

# Building the CNN
network = input_data(shape=[None, 32, 32, 3], data_preprocessing=img_prep, data_augmentation=img_aug, name='first_layer')
network = max_pool_2d(network, 2) # Max pooling layer
import tflearn
from tflearn.data_utils import shuffle, to_categorical
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.conv import conv_2d, max_pool_2d, global_avg_pool
from tflearn.layers.estimator import regression
from tflearn.layers.normalization import batch_normalization
from tflearn.data_preprocessing import ImagePreprocessing
from tflearn.data_augmentation import ImageAugmentation
import numpy as np
from load_input import load_train_data


X_train, Y_train = load_train_data()
X_train, Y_train = shuffle(X_train, Y_train)
print('shuffle done')

X_val = X_train[2000:4000]
Y_val = Y_train[2000:4000]
network = input_data(shape=[None, 32, 32, 3])

network = conv_2d(network, 16, 3, activation='relu', weights_init='xavier')
network = batch_normalization(network)

network = conv_2d(network, 16, 3, activation='relu', weights_init='xavier')      
network = max_pool_2d(network, 2)
network = batch_normalization(network)

network = conv_2d(network, 32, 3, activation='relu', weights_init='xavier')    
network = max_pool_2d(network, 2)
network = batch_normalization(network)
import tflearn
from tflearn.data_utils import shuffle, to_categorical
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.conv import conv_2d, max_pool_2d, global_avg_pool
from tflearn.layers.estimator import regression
from tflearn.layers.normalization import batch_normalization
from tflearn.data_preprocessing import ImagePreprocessing
from tflearn.data_augmentation import ImageAugmentation
import numpy as np
from load_input import load_test_data


X_test, Y_test = load_test_data()
X_test, Y_test = shuffle(X_test, Y_test)

network = input_data(shape=[None, 32, 32, 3])

network = conv_2d(network, 16, 3, activation='relu', weights_init='xavier')
network = batch_normalization(network)

network = conv_2d(network, 16, 3, activation='relu', weights_init='xavier')      
network = max_pool_2d(network, 2)
network = batch_normalization(network)

network = conv_2d(network, 32, 3, activation='relu', weights_init='xavier')    
network = max_pool_2d(network, 2)
network = batch_normalization(network)

network = conv_2d(network, 32, 3, activation='relu', weights_init='xavier')
network = max_pool_2d(network, 2)
network = batch_normalization(network)