Exemplo n.º 1
0
def RNN_data_generator(corpus, seq_size, batch_size, dict_len, skip_step):
    '''
    generate input x and y from the whole corpus
    poem = whole corpus mapped into integer index in order
    seq_size = number of consecutive characters from corpus that forms a sequence
    batch_size = number of sequences to train over
    dict_len = length of dictionary built from the corups
    skip_step = gap between consecutive sequences
    '''

    x = np.zeros((batch_size, seq_size, dict_len))
    y = np.zeros((batch_size, dict_len))
    corpus_len = len(corpus)
    idx_start = 0
    idx_end = 0
    for idx in range(batch_size):
        idx_start = idx_start % corpus_len
        idx_end = idx_start + seq_size
        if idx_end+1 >= corpus_len:
            print('redundant sequences, choose smaller batch_size')
            idx_start = int(round(skip_step/2.0))
            idx_end = idx_start + seq_size
        tmp_x = corpus[idx_start:idx_end]
        x[idx, :, :] = to_categorical(tmp_x, num_classes=dict_len)
        tmp_y = corpus[idx_end]
        y[idx, :] = to_categorical(tmp_y, num_classes=dict_len)
        idx_start += skip_step

    return x, y
Exemplo n.º 2
0
    def next_batch(self, batch_size, shuffle = True):
        """Return the next `batch_size` examples from this data set."""

        # shuffle for the first epoch
        start = self._index_in_epoch
        if self._epochs_completed == 0 and start == 0 and shuffle:
            self._shuffle_images_and_labels()

        if start + batch_size > self._num_examples:
            # retrieve the rest of the examples that does not add up to a full batch size
            self._epochs_completed += 1
            rest_num_examples = self._num_examples - start
            rest_images = self._images[start:self._num_examples]
            rest_labels = self._labels[start:self._num_examples]
            if shuffle:
                self._shuffle_images_and_labels()

            # complete the batch size from the next epoch
            start = 0
            self._index_in_epoch = batch_size - rest_num_examples
            end = self._index_in_epoch
            new_images = self._images[start:end]
            new_labels = self._labels[start:end]
            images = np.concatenate((rest_images, new_images), axis = 0)
            labels = np.concatenate((rest_labels, new_labels), axis = 0)
            return images, to_categorical(labels, self.num_classes)
        else:
            self._index_in_epoch += batch_size
            end = self._index_in_epoch
            return (self._images[start:end],
                    to_categorical(self._labels[start:end], self.num_classes))
def load_data2(path):
    print("[INFO] loading images...")
    data = []
    labels = []
    # grab the image paths and randomly shuffle them
    imagePaths = sorted(list(paths.list_images(path)))
    random.seed(42)
    random.shuffle(imagePaths)
    # loop over the input images
    for imagePath in imagePaths:
        # load the image, pre-process it, and store it in the data list
        image = cv2.imread(imagePath)
        image = cv2.resize(image, (norm_size, norm_size))
        image = img_to_array(image)
        data.append(image)

        # extract the class label from the image path and update the
        # labels list
        label = int(imagePath.split(os.path.sep)[-2])       
        labels.append(label)  
        
    # scale the raw pixel intensities to the range [0, 1]
    data = np.array(data, dtype="float") / 255.0
    labels = np.array(labels)


    # partition the data into training and testing splits using 75% of
    # the data for training and the remaining 25% for testing
    (trainX, testX, trainY, testY) = train_test_split(data,
            labels, test_size=0.25, random_state=42)

    # convert the labels from integers to vectors
    trainY = to_categorical(trainY, num_classes=CLASS_NUM)
    testY = to_categorical(testY, num_classes=CLASS_NUM)   
    return trainX,trainY,testX,testY
Exemplo n.º 4
0
    def train(self, epochs, batch_size=128, sample_interval=50):

        # Load the dataset
        (X_train, y_train), (_, _) = mnist.load_data()

        # Rescale MNIST to 32x32
        X_train = np.array([scipy.misc.imresize(x, [self.img_rows, self.img_cols]) for x in X_train])

        # Rescale -1 to 1
        X_train = (X_train.astype(np.float32) - 127.5) / 127.5
        X_train = np.expand_dims(X_train, axis=3)
        y_train = y_train.reshape(-1, 1)

        # Adversarial ground truths
        valid = np.ones((batch_size, 4, 4, 1))
        fake = np.zeros((batch_size, 4, 4, 1))

        for epoch in range(epochs):

            # ---------------------
            #  Train Discriminator
            # ---------------------

            # Sample half batch of images
            idx = np.random.randint(0, X_train.shape[0], batch_size)
            imgs = X_train[idx]
            labels = y_train[idx]

            masked_imgs = self.mask_randomly(imgs)

            # Generate a half batch of new images
            gen_imgs = self.generator.predict(masked_imgs)

            # One-hot encoding of labels
            labels = to_categorical(labels, num_classes=self.num_classes+1)
            fake_labels = to_categorical(np.full((batch_size, 1), self.num_classes), num_classes=self.num_classes+1)

            # Train the discriminator
            d_loss_real = self.discriminator.train_on_batch(imgs, [valid, labels])
            d_loss_fake = self.discriminator.train_on_batch(gen_imgs, [fake, fake_labels])
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

            # ---------------------
            #  Train Generator
            # ---------------------

            # Train the generator
            g_loss = self.combined.train_on_batch(masked_imgs, valid)

            # Plot the progress
            print ("%d [D loss: %f, op_acc: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100*d_loss[4], g_loss))

            # If at save interval => save generated image samples
            if epoch % sample_interval == 0:
                # Select a random half batch of images
                idx = np.random.randint(0, X_train.shape[0], 6)
                imgs = X_train[idx]
                self.sample_images(epoch, imgs)
                self.save_model()
Exemplo n.º 5
0
def load_miniplaces_batch():
    x_train, y_train = loader_train.next_batch(10000)
    x_test,y_test = loader_val.next_batch(10000)
    print("\n OG shapes:",x_train.shape,y_train.shape,x_test.shape,y_test.shape,len(y_test),y_test[0])
    y_train = to_categorical(y_train.astype('float32'),100)
    y_test = to_categorical(y_test.astype('float32'),100)
    print(len(y_test[0]))
    return (x_train,y_train), (x_test,y_test)
Exemplo n.º 6
0
    def multi_label(self):

        '''For many labels in a single column'''

        self.y_pred = to_categorical(self.y_pred, num_classes=self.classes)
        self.y_val = to_categorical(self.y_val, num_classes=self.classes)

        self.multi_class()
Exemplo n.º 7
0
    def train(self, epochs, batch_size=128, sample_interval=50):

        # Load the dataset
        (X_train, y_train), (_, _) = mnist.load_data()

        # Rescale -1 to 1
        X_train = (X_train.astype(np.float32) - 127.5) / 127.5
        X_train = np.expand_dims(X_train, axis=3)
        y_train = y_train.reshape(-1, 1)

        # Class weights:
        # To balance the difference in occurences of digit class labels.
        # 50% of labels that the discriminator trains on are 'fake'.
        # Weight = 1 / frequency
        cw1 = {0: 1, 1: 1}
        cw2 = {i: self.num_classes / half_batch for i in range(self.num_classes)}
        cw2[self.num_classes] = 1 / half_batch

        # Adversarial ground truths
        valid = np.ones((batch_size, 1))
        fake = np.zeros((batch_size, 1))

        for epoch in range(epochs):

            # ---------------------
            #  Train Discriminator
            # ---------------------

            # Select a random batch of images
            idx = np.random.randint(0, X_train.shape[0], batch_size)
            imgs = X_train[idx]

            # Sample noise and generate a batch of new images
            noise = np.random.normal(0, 1, (batch_size, self.latent_dim))
            gen_imgs = self.generator.predict(noise)

            # One-hot encoding of labels
            labels = to_categorical(y_train[idx], num_classes=self.num_classes+1)
            fake_labels = to_categorical(np.full((batch_size, 1), self.num_classes), num_classes=self.num_classes+1)

            # Train the discriminator
            d_loss_real = self.discriminator.train_on_batch(imgs, [valid, labels], class_weight=[cw1, cw2])
            d_loss_fake = self.discriminator.train_on_batch(gen_imgs, [fake, fake_labels], class_weight=[cw1, cw2])
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)


            # ---------------------
            #  Train Generator
            # ---------------------

            g_loss = self.combined.train_on_batch(noise, validity, class_weight=[cw1, cw2])

            # Plot the progress
            print ("%d [D loss: %f, acc: %.2f%%, op_acc: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100*d_loss[3], 100*d_loss[4], g_loss))

            # If at save interval => save generated image samples
            if epoch % sample_interval == 0:
                self.sample_images(epoch)
Exemplo n.º 8
0
 def load_data(self):
     x_train, y_train = _.load_train_set()
     x_test, y_test = _.load_test_set()
     # Some simple preprocess.
     self.x_train = x_train.astype('float32') / 255
     self.x_test = x_test.astype('float32') / 255
     # Convert class vectors to binary class matrices.
     self.y_train = to_categorical(y_train, self.num_classes)
     self.y_test = to_categorical(y_test, self.num_classes)
     print('Data load complete.')
Exemplo n.º 9
0
def load_mnist():
    # the data, shuffled and split between train and test sets
    from keras.datasets import mnist
    (x_train, y_train), (x_test, y_test) = mnist.load_data()

    x_train = x_train.reshape(-1, 28, 28, 1).astype('float32') / 255.
    x_test = x_test.reshape(-1, 28, 28, 1).astype('float32') / 255.
    y_train = to_categorical(y_train.astype('float32'))
    y_test = to_categorical(y_test.astype('float32'))
    return (x_train, y_train), (x_test, y_test)
Exemplo n.º 10
0
def preprocess_data(dataset, human_vocab, machine_vocab, Tx, Ty):
    
    X, Y = zip(*dataset)
    
    X = np.array([string_to_int(i, Tx, human_vocab) for i in X])
    Y = [string_to_int(t, Ty, machine_vocab) for t in Y]
    
    Xoh = np.array(list(map(lambda x: to_categorical(x, num_classes=len(human_vocab)), X)))
    Yoh = np.array(list(map(lambda x: to_categorical(x, num_classes=len(machine_vocab)), Y)))

    return X, np.array(Y), Xoh, Yoh
    def fit(self, x_train, y_train, x_val, y_val):
        self.model.summary()
        y_train = to_categorical(y_train, num_classes=2)
        y_val = to_categorical(y_val, num_classes=2)

        weight_file = 'hybrid_{val_acc:.3f}_loss_{loss:.3f}.hdf5'
        model_checkpoint = ModelCheckpoint(WEIGHT_DIR + weight_file, monitor='val_acc', verbose=1, save_best_only=True)
        tensorboard = TensorBoard(log_dir=LOG_DIR + 'train_log', histogram_freq=0, write_graph=True, write_images=False)
        self.model.fit(x_train, y_train, validation_data=(x_val, y_val),
                       batch_size=self.batch_size, epochs=self.epochs, verbose=1, shuffle=True,
                       callbacks=[model_checkpoint, tensorboard])
Exemplo n.º 12
0
def iris():

    df = pd.read_csv(base + 'iris.csv')
    df['species'] = df['species'].factorize()[0]
    df = df.sample(len(df))
    y = to_categorical(df['species'])
    x = df.iloc[:, :-1].values

    y = to_categorical(df['species'])
    x = df.iloc[:, :-1].values

    return x, y
def load_cifar_10():
    from keras.datasets import cifar10
    num_classes = 10
    (x_train, y_train), (x_test, y_test) = cifar10.load_data()
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    x_train /= 255.0
    x_test /= 255.0    
    y_train = to_categorical(y_train, num_classes)
    y_test = to_categorical(y_test, num_classes)

    return (x_train,y_train),(x_test,y_test)
Exemplo n.º 14
0
def load_mnist(normalize=False, one_hot_label=True):
  (x_train, y_train), (x_test, y_test) = mnist.load_data()

  if normalize:
    x_train = x_train.reshape(x_train.shape[0], 28*28).astype(np.float32)/255.0
    x_test = x_test.reshape(x_test.shape[0], 28*28).astype(np.float32)/255.0
 
  if one_hot_label:
    y_train = to_categorical(y_train, num_classes=10)
    y_test = to_categorical(y_test, num_classes=10)
  
  return (x_train, y_train), (x_test, y_test)
Exemplo n.º 15
0
def cifar10(*args, **kwargs):
    dataset = cx.Dataset()
    from keras.datasets import cifar10
    (x_train, y_train), (x_test, y_test) = cifar10.load_data()
    inputs = np.concatenate((x_train, x_test))
    x_train, x_test = None, None
    inputs = inputs.astype('float32')
    inputs /= 255
    labels = np.concatenate((y_train, y_test))
    y_train, y_test = None, None
    targets = to_categorical(labels, 10)
    labels = np.array([str(label[0]) for label in labels], dtype=str)
    dataset.name = "CIFAR-10"
    dataset.description = """
Original source: https://www.cs.toronto.edu/~kriz/cifar.html

The CIFAR-10 dataset consists of 60000 32x32 colour images in 10
classes, with 6000 images per class.

The classes are completely mutually exclusive. There is no overlap
between automobiles and trucks. "Automobile" includes sedans, SUVs,
things of that sort. "Truck" includes only big trucks. Neither
includes pickup trucks.
"""
    dataset.load_direct([inputs], [targets], [labels])
    return dataset
Exemplo n.º 16
0
    def __init__(self, folder, transforms=None, shuffle=True, batch_size=32,
                 seed=None):

        if transforms is None:
            transforms = []

        paths, labels, label_names = get_paths_with_labels(folder)

        self.n = len(paths)
        self.paths = np.asarray(paths)
        self.labels = to_categorical(labels, num_classes=len(label_names))
        self.label_names = label_names

        self.shuffle = shuffle
        self.seed = seed

        self.transform = get_transform(*transforms)

        self.batch_size = batch_size
        self.batch_idx = 0
        self.num_batches_so_far = -1
        self.indices = np.arange(self.n)

        # calculate output shape by loading an image and
        # passing it through the functions
        img = imread(paths[0])
        img = np.asarray(img, dtype=K.floatx())
        self.output_shape = self.transform(img).shape

        if K.image_data_format() == 'channels_first':
            self.output_shape = (self.output_shape[2],
                                 self.output_shape[0],
                                 self.output_shape[1])

        self.reset()
Exemplo n.º 17
0
    def sample_generator_input(self, batch_size):
        # Generator inputs
        sampled_noise = np.random.normal(0, 1, (batch_size, 62))
        sampled_labels = np.random.randint(0, self.num_classes, batch_size).reshape(-1, 1)
        sampled_labels = to_categorical(sampled_labels, num_classes=self.num_classes)

        return sampled_noise, sampled_labels
Exemplo n.º 18
0
def process_images(image_path, shape_path):
    shape_y = None
    if shape_path == "crop_images/valid_circle/":
        shape_y = 0
    if shape_path == "crop_images/valid_rectangle/": 
        shape_y = 1
    if shape_path == "crop_images/valid_triangle/":
        shape_y = 2
    if shape_path == "crop_images/valid_square/":
        shape_y = 3
        
    ylabel = to_categorical(shape_y, num_classes = 4) 
    ylabel = np.reshape(ylabel, (4))
	
    childs_lock.acquire()
    img = Image.open(image_path)
    childs_lock.release()   
    np_img = np.array(img, dtype = [('img_info', np.float16)]) 
    #img = img.resize((200, 200), Image.ANTIALIAS) #ANTIALIAS reserves quality
    # to check that all image input are the same shape
    '''width, height = np_img.shape[0], np_img.shape[1]
    if width == 300 or height == 300:
		print ("rm ", image_path)'''
    img.close()
    return np_img['img_info'], ylabel
 def get_ori_data(self):
     print('...Load original data begin')
     data = self.load_pkl_data(ORIGINAL)
     data['data'] = data['data'].astype('float32')
     data['label'] = to_categorical(data['label'], n_classes)
     print('......original data shape : {0}'.format(data['data'].shape))
     print('...Load original data done')
     return data
Exemplo n.º 20
0
def sentence_to_id(seed, dict):
    seed_id = []
    dict_len = len(dict)
    for char in seed:
        id = dict[char]
        tmp = to_categorical(id, num_classes = dict_len)
        seed_id.append(tmp)
    return seed_id
Exemplo n.º 21
0
def load_cifar100() :
    (train_data, train_labels), (test_data, test_labels) = cifar100.load_data()
    # train_data = train_data / 255.0
    # test_data = test_data / 255.0
    train_data, test_data = normalize(train_data, test_data)

    train_labels = to_categorical(train_labels, 100)
    test_labels = to_categorical(test_labels, 100)

    seed = 777
    np.random.seed(seed)
    np.random.shuffle(train_data)
    np.random.seed(seed)
    np.random.shuffle(train_labels)


    return train_data, train_labels, test_data, test_labels
Exemplo n.º 22
0
def load_fashion() :
    (train_data, train_labels), (test_data, test_labels) = fashion_mnist.load_data()
    train_data = np.expand_dims(train_data, axis=-1)
    test_data = np.expand_dims(test_data, axis=-1)

    train_data, test_data = normalize(train_data, test_data)

    train_labels = to_categorical(train_labels, 10)
    test_labels = to_categorical(test_labels, 10)

    seed = 777
    np.random.seed(seed)
    np.random.shuffle(train_data)
    np.random.seed(seed)
    np.random.shuffle(train_labels)


    return train_data, train_labels, test_data, test_labels
Exemplo n.º 23
0
 def get_ori_data(self):
     print('...Load original data begin')
     path = self.root_dir + 'fold{0}/'.format(self.k) + 'original_{0}.pkl'.format(self.train_or_valid)
     data = self.load_pkl_data(path)
     data['data'] = data['data'].astype('float32')
     data['label'] = to_categorical(data['label'], n_classes)
     print('......original data shape : {0}'.format(data['data'].shape))
     print('...Load original data done')
     return data
Exemplo n.º 24
0
  def load_testing_data(self):
    #testing_dataframe = pandas.read_csv(self.commandline_args.test)
    #values = testing_dataframe.values
    
    (X_train, y_train), (X_test, y_test) = self.cifar_data
    shaped_labels = to_categorical(y_test, self.num_classes+1)
    scaled_values = self.scale_values(X_test)
    shaped_values = self.reshape_values(scaled_values)

    return shaped_values, shaped_labels
Exemplo n.º 25
0
def trainBatch(model, args):
    """
    Training a CapsuleNet
    :param model: the CapsuleNet model
    :param data: a tuple containing training and testing data, like `((x_train, y_train), (x_test, y_test))`
    :param args: arguments
    :return: The trained model
    """

    # callbacks
    log = callbacks.CSVLogger(args.save_dir + '/log.csv')
    tb = callbacks.TensorBoard(log_dir=args.save_dir + '/tensorboard-logs',
                               batch_size=args.batch_size, histogram_freq=args.debug)
    checkpoint = callbacks.ModelCheckpoint(args.save_dir + '/weights-resnet-{epoch:02d}.h5',
                                           save_best_only=True, save_weights_only=True, verbose=1)
    lr_decay = callbacks.LearningRateScheduler(schedule=lambda epoch: args.lr * (0.9 ** epoch))

    # compile the model
    # model.compile(optimizer=optimizers.Adam(lr=args.lr),
    #               loss=[margin_loss, 'mse'],
    #               loss_weights=[1., args.lam_recon],
    #               metrics={'out_caps': 'accuracy'})
    sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['mae', 'acc','top_k_categorical_accuracy'])

    groups = args.groups
    for i in range(groups):
        print("Training Group: ",i)
        (x_test, y_test, x_train, y_train) = loadMiniplacesBatch(train_data_list, val_data_list, images_root,group=i,groups=groups,size=[100,100])
        x_train = x_train.reshape(-1, 100, 100, 3).astype('float32') / 255.
        x_test = x_test.reshape(-1, 100, 100, 3).astype('float32') / 255.
        y_train = to_categorical(y_train.astype('float32'),num_classes=100)
        y_test = to_categorical(y_test.astype('float32'),num_classes=100)
        print(x_train.shape,y_train.shape,x_test.shape,y_test.shape)

        # Training without data augmentation:
        model.fit(x_train, y_train, batch_size=args.batch_size, epochs=args.epochs, callbacks=[log, tb, checkpoint, lr_decay],validation_data=(x_test,y_test))

    model.save_weights(args.save_dir + '/trained_model.h5')
    print('Trained model saved to \'%s/trained_model.h5\'' % args.save_dir)

    return model
 def get_ori_data(self):
     print('...Load original data begin')
     path = self.root_dir + 'fold{0}/'.format(self.k) + 'original_{0}.pkl'.format(self.train_or_valid)
     data = self.load_pkl_data(path)
     if self.train_or_valid=='train':
         data['data'] = np.array(data['data'])
     if self.train_or_valid=='valid':
         data['data'] = conduct_fe(data['data'], SPEC)
     data['label'] = to_categorical(data['label'], n_classes)
     print('...Load original data done')
     return data
def train_fold(data):
    i_fold, train, test, nv_cls, X, y, s_info, cachedir = data
    window_qtd = int(sys.argv[1])
    window_qtd_stride = 5
    print window_qtd
    print i_fold
    print nv_cls
    X_train, y_train = lofar2image(X, y, train, window_qtd, window_qtd_stride, s_info)
    X_test, y_test = lofar2image(X, y, test, window_qtd, window_qtd, s_info)
    if verbose:
        print('\t\t Fold %i' % i_fold)

    novelty_cls = nv_cls
    X_train = X_train[y_train != novelty_cls]
    X_test = X_test[y_test != novelty_cls]
    y_train = y_train[y_train != novelty_cls]
    y_test = y_test[y_test != novelty_cls]
    y_train = to_categorical(y_train)
    y_test = to_categorical(y_test)

    mask = np.ones(4, dtype=bool)
    mask[novelty_cls] = False
    if y_train.shape[1] == 4:
        y_train = y_train[:, mask]
        y_test = y_test[:, mask]
    elif y_train.shape[1] != 3:
        raise NotImplementedError

    class_mapping = {0: 'ClassA',
                     1: 'ClassB',
                     2: 'ClassC',
                     3: 'ClassD'}
    inner_cachedir = os.path.join(cachedir, class_mapping[nv_cls], '%i_fold' % i_fold)
    estimator.fit(X_train, y_train,
                  validation_split=0.1,
                  # validation_data=(X_test, y_test),
                  n_inits=1,
                  verbose=verbose,
                  cachedir=inner_cachedir)
    scores = estimator.score(X_test, y_test, return_eff=True)
    return i_fold, scores, nv_cls
Exemplo n.º 28
0
def prepare_image_dataset(x_data, y_data, img_dim, num_classes, reshape=True):
    """
        Normalizes the images in the datasets and converts the labels to categoricals.
    """
    img_rows, img_cols = img_dim
    if reshape:
        x_data = x_data.reshape(x_data.shape[0], img_rows, img_cols)
    x_data = x_data.astype('float32')/255
    # convert class vectors to binary class matrices
    y_data = utils.to_categorical(y_data, num_classes)
    
    return (x_data, y_data)
    def get_class_one_hot(self, class_str):
        """Given a class as a string, return its number in the classes
        list. This lets us encode and one-hot it for training."""
        # Encode it first.
        label_encoded = self.classes.index(class_str)

        # Now one-hot it.
        label_hot = to_categorical(label_encoded, len(self.classes))

        assert len(label_hot) == len(self.classes)

        return label_hot
Exemplo n.º 30
0
  def load_training_data(self):
    #training_dataframe = pandas.read_csv(self.commandline_args.train)
    #values = training_dataframe.values[:,1:]
    #labels = training_dataframe.values[:,0]
    (X_train, y_train), (X_test, y_test) = self.cifar_data
    
    #shaped_labels = to_categorical(y_train, self.num_classes+1)
    shaped_labels = to_categorical(np.full((y_train.shape[0], 1), 0), self.num_classes+1)
    scaled_values = self.scale_values(X_train)
    shaped_values = self.reshape_values(scaled_values)

    return shaped_values, shaped_labels
	for x in test_datasets:
		ax = pickle.load(open(x, 'rb'))
		test_sizes.append(len(ax))

	_, _, test_dataset, test_labels = merge_datasets(test_datasets, test_sizes, 1)

	print('Testing:', test_dataset.shape, test_labels.shape)
	pickle_file = os.path.join(local_path, 'FER_traj.pickle')

	translate_labels = ['anger', 'disgust', 'fear', 'happiness', 'neutral', 'sadness', 'surprise']

	print('Test set', test_dataset.shape, test_labels.shape)

	test_dataset = test_dataset.reshape(
		(-1, settings['image_size'][0], settings['image_size'][1], settings['num_channels'])).astype(np.float32)
	test_labels_oh = to_categorical(test_labels)
	print('Test set', test_dataset.shape, test_labels_oh.shape)

	input_img = Input(shape = (settings['image_size'][0], settings['image_size'][1], settings['num_channels']))

	encode = encoder(input_img)
	flat_level = (Model(input_img,fcfl(encode)))
	full_model = Model(input_img,fc(encode))
	full_model.load_weights('classification_complete.h5')

	for l1,l2 in zip(flat_level.layers[:],full_model.layers[:14]):
		l1.set_weights(l2.get_weights())

	flat_level.compile(
		loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
Exemplo n.º 32
0
    label = 1 if label == "positive" else 0
    labels.append(label)

# scale the raw pixel intensities to the range [0, 1]
data = np.array(data, dtype="float") / 255.0
labels = np.array(labels)

# partition the data into training and testing splits using 75% of
# the data for training and the remaining 25% for testing
(trainX, testX, trainY, testY) = train_test_split(data,
                                                  labels,
                                                  test_size=0.25,
                                                  random_state=42)

# convert the labels from integers to vectors
trainY = to_categorical(trainY, num_classes=2)
testY = to_categorical(testY, num_classes=2)

# construct the image generator for data augmentation
aug = ImageDataGenerator(rotation_range=30,
                         width_shift_range=0.1,
                         height_shift_range=0.1,
                         shear_range=0.2,
                         zoom_range=0.2,
                         horizontal_flip=True,
                         fill_mode="nearest")

# initialize the model
print("[INFO] compiling model...")
model = LeNet.build(width=28, height=28, depth=3, classes=2)
opt = Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS)
from keras.datasets import cifar10
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPool2D, Flatten
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt

# Data Settings
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

x_train = x_train / 225
x_test = x_test / 255

y_cat_train = to_categorical(y_train, 10)
y_cat_test = to_categorical(y_test, 10)

# Training the Model
model = Sequential()

model.add(
    Conv2D(
        filters=32,
        kernel_size=(4, 4),
        input_shape=(32, 32, 3),
        activation='relu',
    ))
model.add(MaxPool2D(pool_size=(2, 2)))

model.add(
    Conv2D(
        filters=32,
Exemplo n.º 34
0
def encode_categoricals(df, col_names):
    for col in col_names:
        onehots = to_categorical(df[col].values)
        onehot_df = pd.DataFrame(onehots, columns=[col + "_" + str(i) for i in range(onehots.shape[1])])
        df = pd.concat([df.drop(col, axis=1), onehot_df], axis=1)
    return df
Exemplo n.º 35
0
 def remember(self, game, action):
     self.memory_data.append(self.collect_memory_fragment(game, action))
     self.memory_label.append(
         to_categorical(list(Direction).index(action),
                        len(Direction)).reshape((1, len(Direction))))
Exemplo n.º 36
0
model_path = 'train_test_verified'

if not os.path.exists(model_path):
    os.mkdir(model_path)

# refine_path = 'cnn2d_verified_refine'

# models = [join(model_path, f) for f in listdir(model_path) if isfile(join(model_path, f))]

# if not os.path.exists(refine_path):
#     os.mkdir(refine_path)

df = pd.read_csv('data/train_label.csv')
df['trans'] = df['label'].map(map_dict)
df['onehot'] = df['trans'].apply(lambda x: to_categorical(x, num_classes=41))
Y = df_manu['onehot'].tolist()
Y = np.array(Y)
Y = Y.reshape(-1, 41)

X = np.load('data/mfcc/X_train.npy')

X, Y = shuffle(X, Y, random_state=5)

split = X.shape[0] * (9 / 10)
X_train, X_test = X[:split], X[split:]
Y_train, Y_test = Y[:split], Y[split:]

print(X_train.shape)

checkpoint = ModelCheckpoint(os.path.join(model_path,
Exemplo n.º 37
0
y_test = y_test.values
"""### Let's take a look at our data"""

show_data(X_train, y_train)
"""### We notice that the digits themselves are always black, and the backgrounds are of a different shade. This implies we could remove the background. Let's do that"""

X_train = preprocess(X_train)
X_test = preprocess(X_test)

display(
    X_train.shape
)  # Now our inputs have been reshaped, so we have a 4D array, with each item representing an image.
display(X_test.shape)
"""### Categorical labels should always be one-hot encoded"""

y_train_enc = to_categorical(y_train, NUM_CLASSES)
y_test_enc = to_categorical(y_test, NUM_CLASSES)

show_data(X_train, y_train)

show_data(X_test, y_test)
"""### Now we define our base model

This is a very simple CNN architecture similar to what you learned in the lecture. We have cascading blocks of convolutional layers followed by pooling layers.
"""


def create_base_model():
    model = Sequential()

    model.add(
Exemplo n.º 38
0
def get_state(env):
    state = env.get_feature_vec_observation()
    res = state[0:-1]
    res.extend(to_categorical(state[-1], n_object))
    return res
Exemplo n.º 39
0
sequences = tokenizer.texts_to_sequences(contents)
sequences_words = tokenizer.texts_to_sequences(word)
data_x = pad_sequences(sequences,maxlen=50) # 平均长度是20,最长是200,设置为50

def getDataY(data_x):
    # 获取情感关键词在评论内容中的位置
    data_y = []
    for i in range(data_x.shape[0]):
        try:
            data_y.append(list(data_x[i]).index(sequences_words[i][0]))
        except:
            data_y.append(-1) # 如果情感的关键词不在分词里面
    return  np.array(data_y)

data_y = getDataY(data_x)
onehot_y = to_categorical(data_y[data_y >= 0],num_classes=50) # 将位置的信息转化为OneHot
train_x, test_x, train_y, test_y = train_test_split(data_x[data_y >= 0],onehot_y)

def trainModel(train_x, test_x, train_y, test_y):
    # 训练模型
    model = Sequential()
    model.add(Embedding(len(tokenizer.word_index) + 1, 128))
    model.add(LSTM(128,dropout=0.2, recurrent_dropout=0.2))
    model.add(Dense(50, activation='softmax'))

    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    model.fit(train_x, train_y,
              batch_size=32,
Exemplo n.º 40
0
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

#Next,we need to deal with the datas as suitable array (match the CNN as inputing data)
#For fitting the CNN, we need the reshape the origin data(ex:train_images.shape=(60000,28,28)->(60000,28,28,1))
#(60000->the number of graph,28->length,28->width,1->RGB number(because our data is black and ))

train_images = train_images.reshape((60000, 28, 28, 1))

train_images = train_images.astype('float32') / 255

test_images = test_images.reshape((10000, 28, 28, 1))

test_images = test_images.astype('float32') / 255

train_labels = to_categorical(train_labels)

test_labels = to_categorical(test_labels)
##Create the depthwise separable convolution##

from keras.models import Sequential, Model
from keras import layers, models

#Use Sequential method to create the model

model = models.Sequential()

model.add(
    layers.SeparableConv2D(32, (3, 3),
                           activation='relu',
                           input_shape=(28, 28, 1)))
Exemplo n.º 41
0
def loadData(iData):
    lColumns = getColumns()
    #h5File = h5py.File(iFile)
    #treeArray = h5File['test'][()]
    df = pd.DataFrame(iData, columns=lColumns)
    print "A"
    idconv = {
        11.: 1,
        12.: 2,
        13.: 3,
        22.: 4,
        130.: 5,
        211.: 6,
        310.: 7,
        321.: 8,
        2112.: 9,
        2212.: 10,
        3112.: 11,
        3122.: 12,
        3222.: 13,
        3312.: 14,
        3322.: 15,
        3334.: 16,
        -11.: 17,
        -12.: 18,
        -13.: 19,
        -22.: 20,
        -130.: 21,
        -211.: 22,
        -310.: 23,
        -321.: 24,
        -2112.: 25,
        -2212.: 26,
        -3112.: 27,
        -3122.: 28,
        -3222.: 29,
        -3312.: 30,
        -3322.: 31,
        -3334.: 32,
        0.: 0
    }
    nIDs = 33
    print "B"
    for i0 in range(nparts):
        df['j_part_pt_' + str(i0)] = df['j_part_pt_' + str(i0)] / df['j_pt']
        df['j_part_id_' + str(i0)] = df['j_part_id_' + str(i0)].map(idconv)
    features_val = df[lPartfeatures]
    for p in lPartfeatures:
        if (df[p].isna().sum() > 0): print(p, "found nan!!")

    features_2df = np.zeros(
        (len(df['procid']), nparts, len(lPartvars) + nIDs - 1))
    for ir, row in features_val.iterrows():
        features_row = np.array(
            np.transpose(row.values.reshape(len(lPartvars), nparts)))
        features_row = np.concatenate(
            (features_row[:, :-1],
             to_categorical(features_row[:, -1], num_classes=nIDs)),
            axis=1)
        features_2df[ir, :, :] = features_row
    features_val = features_2df
    return features_val
num_classes = 10
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

import numpy as np 
import matplotlib.pyplot as plt 

from keras.utils import to_categorical 

x_train = x_train / 255 
x_test = x_test / 255 

y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes) 
print(y_train.shape) 
print(y_train[0]) 

from keras.models import Sequential 
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout 

model = Sequential() 

model.add(Conv2D(32, (3, 3), padding = "same", input_shape = x_train.shape[1:], activation = "relu"))
model.add(MaxPooling2D(pool_size = 2)) 
model.add(Dropout(0.25)) 

model.add(Flatten()) 
model.add(Dense(512, activation = "relu")) 
Exemplo n.º 43
0
    model = model(Tx, Ty, n_a, n_s, len(human_vocab), len(machine_vocab))
    model.summary()
    model.compile(optimizer=Adam(lr=0.005,
                                 beta_1=0.9,
                                 beta_2=0.999,
                                 decay=0.01),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    s0 = np.zeros((m, n_s))
    c0 = np.zeros((m, n_s))
    outputs = list(Yoh.swapaxes(0, 1))
    model.fit([Xoh, s0, c0], outputs, epochs=1, batch_size=100)

    model.load_weights('models/model.h5')
    EXAMPLES = [
        '3 May 1979', '5 April 09', '21th of August 2016', 'Tue 10 Jul 2007',
        'Saturday May 9 2018', 'March 3 2001', 'March 3rd 2001', '1 March 2001'
    ]
    for example in EXAMPLES:
        source = string_to_int(example, Tx, human_vocab)
        source = np.array(
            list(
                map(lambda x: to_categorical(x, num_classes=len(human_vocab)),
                    source))).swapaxes(0, 1)
        prediction = model.predict([source, s0, c0])
        prediction = np.argmax(prediction, axis=-1)
        output = [inv_machine_vocab[int(i)] for i in prediction]

        print("source:", example)
        print("output:", ''.join(output))
Exemplo n.º 44
0
# develop an mlp for blobs dataset
from sklearn.datasets.samples_generator import make_blobs
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD
from matplotlib import pyplot
# generate 2d classification dataset
X, y = make_blobs(n_samples=1100,
                  centers=3,
                  n_features=2,
                  cluster_std=2,
                  random_state=2)
# one hot encode output variable
y = to_categorical(y)
# split into train and test
n_train = 100
trainX, testX = X[:n_train, :], X[n_train:, :]
trainy, testy = y[:n_train], y[n_train:]
# define model
model = Sequential()
model.add(Dense(25, input_dim=2, activation='relu'))
model.add(Dense(3, activation='softmax'))
opt = SGD(lr=0.01, momentum=0.9)
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])
# fit model
history = model.fit(trainX,
                    trainy,
                    validation_data=(testX, testy),
def train_model():

    if not os.path.exists('ModelCheckpoint'):
        os.makedirs('ModelCheckpoint')

    MAX_SEQUENCE_LENGTH = 90018
    MAX_NB_WORDS = 170000
    EMBEDDING_DIM = 300
    VALIDATION_SPLIT = 0.1
    BATCH_SIZE = 32

    print('Indexing word vectors.')

    embeddings_index = {}
    f = file_io.FileIO('GoogleNews-vectors-negative300.txt', mode='r')
    for line in f:
        values = line.split()
        word = values[0]
        coefs = np.asarray(values[1:], dtype='float32')
        embeddings_index[word] = coefs
    f.close()

    print('Found %s word vectors.' % len(embeddings_index))

    print('Processing text dataset')

    sc = SupremeCourt()
    print(sc.info)

    texts = []  # list of text samples
    labels_index = {}  # dictionary mapping label name to numeric id
    labels = []  # list of label ids

    issue_codes = list(sc.issue_area_codes.keys())  # 15 labels
    issue_codes.sort()
    issue_codes = [str(ic) for ic in issue_codes]

    labels_index = dict(zip(issue_codes, np.arange(len(issue_codes))))

    for record in sc.records():
        if record['issue'] == None:  # some cases have None as an issue
            labels.append(labels_index['-1'])
        else:
            labels.append(labels_index[record['issue'][:-4]])
        texts.append(record['text'])

    print('Found %s texts.' % len(texts))
    print('Found %s labels.' % len(labels_index))

    # finally, vectorize the text samples into a 2D integer tensor
    tokenizer = Tokenizer(
        num_words=MAX_NB_WORDS,
        filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
    )
    tokenizer.fit_on_texts(texts)
    sequences = tokenizer.texts_to_sequences(texts)

    word_index = tokenizer.word_index
    print('Found %s unique tokens.' % len(word_index))

    data = pad_sequences(sequences)

    MAX_SEQUENCE_LENGTH = data.shape[1]

    labels = to_categorical(np.asarray(labels))
    print('Shape of data tensor:', data.shape)
    print('Shape of label tensor:', labels.shape)

    # split the data into a training set and a validation set
    x_train, x_test, y_train, y_test = train_test_split(
        data, labels, test_size=VALIDATION_SPLIT, random_state=42)
    x_train, x_val, y_train, y_val = train_test_split(
        x_train, y_train, test_size=VALIDATION_SPLIT, random_state=42)

    def generator():
        while True:
            indices = list(range(len(x_train)))
            imax = len(indices) // BATCH_SIZE
            for i in range(imax):
                list_IDs_temp = indices[i * BATCH_SIZE:(i + 1) * BATCH_SIZE]
                yield x_train[list_IDs_temp], y_train[list_IDs_temp]

    def test_generator():
        while True:
            indices = list(range(len(x_test)))
            imax = len(indices) // BATCH_SIZE
            for i in range(imax):
                list_IDs_temp = indices[i * BATCH_SIZE:(i + 1) * BATCH_SIZE]
                yield x_test[list_IDs_temp], y_test[list_IDs_temp]

    def val_generator():
        while True:
            indices = list(range(len(x_val)))
            imax = len(indices) // BATCH_SIZE
            for i in range(imax):
                list_IDs_temp = indices[i * BATCH_SIZE:(i + 1) * BATCH_SIZE]
                yield x_val[list_IDs_temp], y_val[list_IDs_temp]

    print('Preparing embedding matrix.')

    # prepare embedding matrix
    num_words = min(MAX_NB_WORDS, len(word_index))
    embedding_matrix = np.zeros((num_words, EMBEDDING_DIM))
    for word, i in word_index.items():
        if i >= MAX_NB_WORDS:
            continue
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            # words not found in embedding index will be all-zeros.
            embedding_matrix[i] = embedding_vector

    print('Training model.')

    model = Sequential()
    model.add(
        Embedding(num_words,
                  EMBEDDING_DIM,
                  weights=[embedding_matrix],
                  input_length=MAX_SEQUENCE_LENGTH,
                  trainable=False))
    model.add(CuDNNLSTM(128))
    model.add(Dropout(0.5))
    model.add(Dense(len(labels_index), activation='softmax'))

    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['acc'])

    checkpointer = ModelCheckpoint(filepath="ModelCheckpoint/" +
                                   os.path.basename(__file__)[:-3] +
                                   "-{epoch:02d}-{val_acc:.2f}.hdf5",
                                   monitor='val_acc',
                                   verbose=2,
                                   save_best_only=True,
                                   mode='max')

    earlystopper = EarlyStopping(monitor='val_loss',
                                 min_delta=0,
                                 patience=0,
                                 verbose=2,
                                 mode='auto')

    model.summary()

    model.fit_generator(generator=generator(),
                        steps_per_epoch=len(x_train) // BATCH_SIZE,
                        epochs=50,
                        verbose=2,
                        validation_data=test_generator(),
                        validation_steps=len(x_test) // BATCH_SIZE,
                        callbacks=[checkpointer, earlystopper],
                        shuffle=True)

    score = model.evaluate_generator(val_generator(),
                                     steps=len(x_val) // BATCH_SIZE)

    print('Test loss:', score[0])
    print('Test accuracy:', score[1])

    # Save Keras ModelCheckpoints locally
    model.save('model.hdf5')

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision


def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))


label = to_categorical(label)
print(label)
print(label.shape)
X_train, X_test, y_train, y_test = train_test_split(
    data,
    label,
    test_size=0.2,
    random_state=1)

# fit hanya berdasarkan data train
tokenizer.fit_on_texts(X_train)
# konversi train
seq_x_train = tokenizer.texts_to_sequences(X_train)
X_enc_train = tokenizer.sequences_to_matrix(seq_x_train, mode="tfidf")
# # konversi teks
seq_x_test = tokenizer.texts_to_sequences(X_test)
Exemplo n.º 47
0
print(df.shape)
df=df[['Presence_of_Smoke','Area','ROG','Color','Severity1']]



##create input and output vectors
X = df.iloc[:,0:4]
Y = df.iloc[:,4]
print(X.shape)
X = X.values.reshape(1467, 4, 1)
Y = Y.values.reshape(1467, 1)


Z = Y
from keras.utils import to_categorical
Y = to_categorical(Y)
total_rows = 1467 * 4
max_len=4


## splitting of training and testing data
X_train,X_test,Z_train,Z_test = train_test_split(X,Z,test_size=0.20)
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.20)

X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))




#create model (GRU)
# Train the Model
history = model.fit_generator(
    train_generator,
    epochs=epochs,
    validation_data=validation_generator,
    verbose=1,
    callbacks=[metrics_epoch])

# Save the Model
model.save('Dogs_Cats_Finetuning_MobileNet.h5')

predictions = model.predict_generator(validation_generator)
# val_preds = np.argmax(predictions, axis=-1)
val_preds = [1 if x >= 0.5 else 0 for x in predictions]
val_trues = validation_generator.classes
classes_one_hot_encoded = to_categorical(val_trues)

cm = metrics.confusion_matrix(val_trues, val_preds)
print(cm)

precisions, recall, fscore, support = metrics.precision_recall_fscore_support(val_trues, val_preds, average=None)

# Plot the accuracy and loss curves
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

# metrics calculated by using sklearn after validating
print('Precision')
print(precisions)
    for num_key in range(label_mapping.get(label)):
        labels.append(i)
    i += 1

print("Found %s words in our csv file" % len(texts))

# Vectorize the words
tokenizer = Tokenizer(num_words=MAX_NB_WORDS)
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
word_index = tokenizer.word_index
print("Found %s unique words" % len(word_index))
data = pad_sequences(sequences, maxlen=MAX_SEQUENCE_LENGTH)

# Create one-hot vectors for each of the labels
labels = to_categorical(np.asarray(labels))

print("Shape of data: ", data.shape)
print("Shape of label: ", labels.shape)

# Split the data into a training set and a validation set
# Will later perform 10 fold crossvalidation to find best split

# Shufling the len(data) items of data
indices = np.arange(data.shape[0])
np.random.shuffle(indices)
data = data[indices]
labels = labels[indices]
num_validation_samples = int(VALIDATION_SPLIT * data.shape[0])

# Data (x) and labels (y) are actually split here
Exemplo n.º 50
0
    def train_models(self):
        if not self.feature_sets:
            print("Error: no input feature sets found!")
            sys.exit(1)
        self.df = pd.concat(self.df_dict.values())

        only_train = self.df[self.df["for_train"] & ~self.df["for_test"]]
        x_otrain = only_train.loc[:, only_train.columns != 'category']
        y_otrain = only_train["category"]

        only_test = self.df[~self.df["for_train"] & self.df["for_test"]]
        x_otest = only_test.loc[:, only_test.columns != 'category']
        y_otest = only_test["category"]

        both = self.df[self.df["for_train"] & self.df["for_test"]]
        x_both = both.loc[:, both.columns != 'category']
        y_both = both["category"]

        train_frac = 0.6
        # if sample used for both training and testing, split it
        x_train, x_test, y_train, y_test = train_test_split(
            x_both,
            y_both,
            train_size=train_frac,
            test_size=(1 - train_frac),
            shuffle=True)
        # if only for testing - assign a weight so roc curves don't mess up
        only_test["wgt"] = only_test["wgt"] * (1 - train_frac)

        x_train["category"] = y_train
        x_otrain["category"] = y_otrain
        train = pd.concat([x_train, x_otrain])
        train = train.sample(frac=1)  # shuffle

        x_test["category"] = y_test
        x_otest["category"] = y_otest
        test = pd.concat([x_test, x_otest])

        tr_filter = False
        if tr_filter:
            train = train_filter(train)

        self.x_train = train.loc[:, train.columns != 'category']
        self.y_train = train['category']

        self.x_test = test.loc[:, test.columns != 'category']
        self.y_test = test['category']

        for feature_set_name, feature_set in self.feature_sets.items():

            for model in self.mva_models:
                training_data, testing_data = self.prepare_data(
                    feature_set_name, feature_set)

                if model.binary:
                    if len(self.categories) is not 2:
                        print(
                            "Can't perform binary classification with {0} categories!"
                            .format(len(self.categories)))
                        sys.exit(1)
                elif not self.converted_to_cat:
                    self.y_train = to_categorical(self.y_train,
                                                  len(self.categories))
                    self.y_test = to_categorical(self.y_test,
                                                 len(self.categories))
                    # need this to convert only once (for the case when several models are trained)
                    self.converted_to_cat = True

                if "resweights" in model.name:
                    self.y_train = train[['category', 'resweight']]
                    self.y_test = test[['category', 'resweight']]
                else:
                    training_data = training_data.loc[:, training_data.
                                                      columns != 'category']
                    testing_data = testing_data.loc[:, testing_data.
                                                    columns != 'category']

                model.train(training_data, self.y_train, feature_set_name,
                            self.model_dir, self.name)
                print(f"Test shape: {testing_data.shape}")
                prediction = model.predict(testing_data, self.y_test,
                                           feature_set_name)

                if model.binary:
                    if "resweights" in model.name:
                        roc = roc_curve(self.y_test.iloc[:, 0],
                                        prediction,
                                        sample_weight=self.x_test['wgt'] *
                                        self.x_test["genweight"])
                        testing_data["category"] = self.y_test.iloc[:, 0]
                    else:
                        roc = roc_curve(self.y_test,
                                        prediction,
                                        sample_weight=self.x_test['wgt'] *
                                        self.x_test["genweight"])
                        testing_data["category"] = self.y_test

                    self.print_yields(roc, prediction, 0.01)

                    self.plot_hist("dnn_score_{0}_{1}_{2}".format(
                        self.name, model.name, feature_set_name),
                                   df=testing_data,
                                   values=prediction)
                    np.save(
                        "{0}/{1}_{2}_{3}_roc".format(self.out_dir, self.name,
                                                     model.name,
                                                     feature_set_name), roc)
                    self.roc_curves[model.name + "_" + feature_set_name] = roc

                else:
                    vbf_pred = prediction[0]
                    ggh_pred = prediction[1]
                    dy_pred = prediction[2]
                    ewk_pred = prediction[3]

                    #                    cuts = (ewk_pred<0.7)
                    cuts = None
                    if cuts:
                        self.x_test = self.x_test[cuts]
                        self.y_test = self.y_test[cuts]
                        vbf_pred = vbf_pred[cuts]
                        ggh_pred = ggh_pred[cuts]
                        dy_pred = dy_pred[cuts]
                        ewk_pred = ewk_pred[cuts]
                    pred = vbf_pred
                    #                    pred = np.sum([vbf_pred,ggh_pred], axis=0)
                    #                    pred = np.sum([vbf_pred, (-1)*ewk_pred], axis=0)
                    roc = roc_curve(np.logical_or(self.y_test[:, 0],
                                                  self.y_test[:, 1]),
                                    pred,
                                    sample_weight=self.x_test['wgt'] *
                                    self.x_test["genweight"])

                    self.print_yields(roc, pred, 0.01)

                    #                    np.save("{0}/{1}_{2}_{3}_roc".format(self.out_dir, self.name,  model.name, feature_set_name), roc)
                    #                    np.save("{0}/{1}_{2}_{3}_vbf-ewk_roc".format(self.out_dir, self.name,  model.name, feature_set_name), roc)
                    np.save(
                        "{0}/{1}_{2}_{3}_ewk<07_roc".format(
                            self.out_dir, self.name, model.name,
                            feature_set_name), roc)
Exemplo n.º 51
0
    def generator(self):

        while True:
            batches = _make_batches(size=self.total_images,
                                    batch_size=self.batch_size)
            for start, end in batches:
                arr = []
                labels = []
                cur_batch = self.image_paths[start:end]

                for image_path in cur_batch:
                    # print image_path
                    img = imread(
                        fname=os.path.join(self.data_path, image_path))

                    # if channels are not 3
                    ndim = len(img.shape)

                    if ndim == 2:
                        img = img[..., np.newaxis]
                        img = np.tile(A=img, reps=(1, 1, 3))

                    if ndim == 4:
                        img = img[..., :3]

                    # resizing image maintaining aspect ratio
                    img = resize_image(img=img, size=self.input_size)

                    if self.training:
                        # random cropping while training
                        img = random_crop_image(img=img, size=self.input_size)
                        img = augment(img=img,
                                      horizontal_flip=True,
                                      vertical_flip=True,
                                      brightness=True,
                                      contrast=True,
                                      rotation=True,
                                      translation=True,
                                      blur=True,
                                      noise=True)
                    else:
                        # center cropping
                        h, w, c = img.shape
                        center_h = h / 2
                        center_w = w / 2
                        center_new_img = self.input_size / 2
                        new_x1 = center_w - center_new_img
                        new_y1 = center_h - center_new_img
                        new_x2 = center_w + center_new_img
                        new_y2 = center_h + center_new_img
                        if self.input_size % 2 == 1:
                            new_x2 += 1
                            new_y2 += 1
                        img = img[new_y1:new_y2, new_x1:new_x2]

                    arr.append(img)
                    cls = image_path.split('/')[0]
                    id_for_cls = self.cls2id[cls]
                    labels.append(id_for_cls)

                arr = np.array(arr)
                arr.astype('float32')

                # making mean of data 0 with standard deviation 1
                arr /= 255.
                arr -= 0.5
                arr *= 2.

                # one hot encoding
                labels = to_categorical(y=labels,
                                        num_classes=self.total_classes)
                yield (arr, labels)
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten
import code
import imageResize as ir
import numpy as np

#download mnist data and split into train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()

#reshape data to fit model
X_train = X_train.reshape(60000, 28, 28, 1)
X_test = X_test.reshape(10000, 28, 28, 1)

#one-hot encode target column
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

#create model
model = Sequential()
#add model layers
model.add(Conv2D(64, kernel_size=3, activation="relu",
                 input_shape=(28, 28, 1)))
model.add(Conv2D(32, kernel_size=3, activation="relu"))
model.add(Flatten())
model.add(Dense(10, activation="softmax"))

#compile model using accuracy to measure model performance
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
from keras.optimizers import Adam

np.random.seed(1671)

(X_train, y_train), (X_test, y_test) = mnist.load_data()

X_train = X_train.reshape(60000, 784)
X_train = X_train.astype('float32')
X_test = X_test.reshape(10000, 784)
X_test = X_test.astype('float32')

# normalize data
X_train /= 255
X_test /= 255

y_train = utils.to_categorical(y_train, 10)
y_test = utils.to_categorical(y_test, 10)

model = Sequential()
model.add(Dense(128, input_shape=(X_train.shape[1],), activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(y_train.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy'])
model.summary()

history = model.fit(X_train, y_train, batch_size=128, epochs=20, validation_split=0.2)
score = model.evaluate(X_test, y_test)
Exemplo n.º 54
0
    for imgPath in tqdm(imgPaths):
        image = load_img(imgPath, target_size=(28, 28), grayscale=True)
        image = img_to_array(image)
        data.append(image)

        label = imgPath.split(os.path.sep)[-2]
        label = int(label)
        labels.append(label)

data = np.array(data, dtype=np.float) / 255.
labels = np.array(labels)

train_input, valid_input, train_target, valid_target = train_test_split(
    data, labels, test_size=0.25, random_state=123)

train_target = to_categorical(train_target, num_classes)
valid_target = to_categorical(valid_target, num_classes)

aug = ImageDataGenerator(
    rotation_range=10,  # randomly rotate images in the range (degrees, 0 to 180)
    zoom_range=0.1,  # Randomly zoom image
    width_shift_range=
    0.1,  # randomly shift images horizontally (fraction of total width)
    height_shift_range=0.1)

input = (28, 28, 1)
model = build_model(input)

opt = Adam()
model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["acc"])
def train_on_VeRi(model, out_name, fix_layer =-1, xml_file = 'train_label.xml', image_folder = 'image_train',ismodelfile = True):
    nb_epoch = 70
    num_classes = 776
    batch_size = 32
    labels = []
    train_names = []
    xmlp = ET.XMLParser(encoding="utf-8")

    # train label file - xml
    f = ET.parse(xml_file, parser=xmlp)
    root = f.getroot()
    for child in root.iter('Item'):
        labels.append(child.attrib['vehicleID'])
        train_names.append(os.path.join(image_folder, child.attrib['imageName']))

    labels = utils.to_categorical(labels, num_classes=776)
    X_train = []

    for name in train_names:
        x = load_img(name, target_size=(img_rows, img_cols))
        x = img_to_array(x)
        X_train.append(x)

    X_train = np.array(X_train, ndmin=4)

    train_datagen = ImageDataGenerator(
        rescale=1. / 255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)

    train_generator = train_datagen.flow(
        x=X_train, y=labels,
        batch_size=batch_size)

    if ismodelfile:
        base_model = load_model(model)
    else:
        base_model = model

    x = base_model.get_layer(name='block5_pool').output
    x = Flatten()(x)
    x = Dense(4096, activation='relu', name='fc1')(x)
    x = Dense(4096, activation='relu', name='fc2')(x)
    x = Dropout(0.5)(x)
    predictions = Dense(num_classes, activation='softmax')(x)
    model = Model(input=base_model.input, output=predictions)

    if fix_layer > -1:
        for layer in model.layers[:fix_layer]:
            layer.trainable = False
    sgd = SGD(lr=1e-3)
    model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])

    model.fit_generator(
        train_generator,
        epochs=nb_epoch,
        steps_per_epoch=1184,
    )
    model.save(out_name)
    return model
Exemplo n.º 56
0
def train_model(name, regression_train_data_path, classification_train_data_path,
                regression_dev_data_path, classification_train_dev_path,
                word_index, use_neural_features=True, use_lexicon_features=True, feature_extraction_model=None):
    early_stop = EarlyStopping(monitor='val_loss', patience=3, verbose=1)
    lexicon_feature_extractor = LexiconFeatureExtractor()
    print("Preparing Data for {} model ...".format(name))
    train_x, train_y_labels = data_utils.load_affect_data(classification_train_data_path, is_label_numeric=False)
    _, train_y_scores = data_utils.load_affect_data(regression_train_data_path)
    train_y_labels = to_categorical(train_y_labels)
    train_y_scores = np.array(train_y_scores)
    dev_x, dev_y_labels = data_utils.load_affect_data(classification_train_dev_path, is_label_numeric=False)
    _, dev_y_scores = data_utils.load_affect_data(regression_dev_data_path)
    dev_y_scores = np.array(dev_y_scores)
    dev_y_labels = to_categorical(dev_y_labels)
    if use_neural_features and use_lexicon_features:
        neural_fetures_train = feature_extraction_model.predict(encode(train_x, word_index))
        lexicon_features_train = [extract_lexicon_features(instance, lexicon_feature_extractor) for instance in train_x]
        lexicon_features_train = np.array(lexicon_features_train)
        neural_fetures_dev = feature_extraction_model.predict(encode(dev_x, word_index))
        lexicon_features_dev = [extract_lexicon_features(instance, lexicon_feature_extractor) for instance in dev_x]
        lexicon_features_dev = np.array(lexicon_features_dev)
        model = create_prediction_model(len(neural_fetures_train[0]), len(lexicon_features_train[0]),
                                        len(train_y_labels[0]))
        print(model.summary())
        print("Start training for {} model ...".format(name))
        model.fit(x=[neural_fetures_train, lexicon_features_train],
                  y={"regression_output": train_y_scores, "classification_output": train_y_labels},
                  validation_data=(
                      [neural_fetures_dev, lexicon_features_dev],
                      {"regression_output": dev_y_scores, "classification_output": dev_y_labels}
                  ),
                  epochs=100, callbacks=[early_stop], batch_size=1)
    elif use_lexicon_features and not use_neural_features:
        lexicon_features_train = [extract_lexicon_features(instance, lexicon_feature_extractor) for instance in train_x]
        lexicon_features_train = np.array(lexicon_features_train)
        lexicon_features_dev = [extract_lexicon_features(instance, lexicon_feature_extractor) for instance in dev_x]
        lexicon_features_dev = np.array(lexicon_features_dev)
        model = create_prediction_model(0, len(lexicon_features_train[0]), len(train_y_labels[0]))
        print(model.summary())
        print("Start training for {} model ...".format(name))
        model.fit(x=lexicon_features_train,
                  y={"regression_output": train_y_scores, "classification_output": train_y_labels},
                  validation_data=(
                      lexicon_features_dev,
                      {"regression_output": dev_y_scores, "classification_output": dev_y_labels}
                  ),
                  epochs=100, callbacks=[early_stop], batch_size=1)
    elif not use_lexicon_features and use_neural_features:
        neural_fetures_train = feature_extraction_model.predict(encode(train_x, word_index))
        neural_fetures_dev = feature_extraction_model.predict(encode(dev_x, word_index))
        model = create_prediction_model(len(neural_fetures_train[0]), 0, len(train_y_labels[0]))
        print(model.summary())
        print("Start training for {} model ...".format(name))
        model.fit(x=neural_fetures_train,
                  y={"regression_output": train_y_scores, "classification_output": train_y_labels},
                  validation_data=(
                      neural_fetures_dev,
                      {"regression_output": dev_y_scores, "classification_output": dev_y_labels}
                  ),
                  epochs=100, callbacks=[early_stop], batch_size=1)
    else:
        raise Exception("You must use one of the feature sets at least!")
    predictions = model.predict([neural_fetures_dev, lexicon_features_dev])
    print("pearson correlation={}".format(pearson_correlation(dev_y_scores, predictions[0])))
    print("Saving {} model ...".format(name))
    model.save("resources/saved_models/{}.model".format(name))
    X = Dense(120, activation='tanh', name='fc1')(X)
    X = Dense(84, activation='tanh', name='fc2')(X)
    X = Dense(10, activation='softmax')(X)
    model = Model(inputs=X_input, outputs=X, name='lenet_5')
    return model


model = lenet_5(input_shape=(28, 28, 1), classes=10)
model.summary()

model.compile(optimizer='sgd',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

X_train = data_train[:, 1:].reshape((data_train.shape[0], 28, 28, 1))
Y_train = to_categorical(data_train[:, 0])
model.fit(X_train, Y_train, epochs=10, batch_size=16)

X_val = data_val[:, 1:].reshape((data_val.shape[0], 28, 28, 1))
Y_val = to_categorical(data_val[:, 0])
preds = model.evaluate(X_val, Y_val)
print("Validation loss=" + str(preds[0]))
print("Validation accuracy=" + str(preds[1]))

X_test = data_test.reshape((data_test.shape[0], 28, 28, 1))
predicted = np.argmax(model.predict(X_test), axis=1)
with open('data/submission.csv', 'w') as f:
    f.write('ImageId,Label\n')
    for i in range(len(predicted)):
        f.write(str(i + 1) + ',' + str(predicted[i]) + '\n')
print(predicted)
Exemplo n.º 58
0
sess.run(tf.global_variables_initializer())

print("Testing started")

scale = 3

BATCH_SIZE = 128
cfar10 = CFAR10.KittiData(path=["KITTI/train_y.npy", "KITTI/label_y.npy"])

accuracy = []
while (True):
    xs, ys = cfar10.next_batch(batch_size=BATCH_SIZE, shuffle=False)
    xs = scale * xs
    lo = sess.run(layerout7, {input_real: xs, output_real: ys})
    layerout = np.argmin(lo, axis=1)
    layerout = to_categorical(layerout, 10)
    accurate = 0
    for i in range(len(ys)):
        if (layerout[i] == ys[i]).all():
            accurate = accurate + 1
    accurate = accurate / 128.
    # print(accurate)
    accuracy.append(accurate)
    step = sess.run(step_inc_op)
    if step % 10 == 0:
        accurate1 = tf.reduce_mean(accuracy)
        print('Step: ' + repr(step) + ', ' + 'Accurate: ' +
              repr(sess.run(accurate1)))

    if step == 78:
        break
Exemplo n.º 59
0
def label2vec(labels):

    labels1 = encoder.transform(labels)
    one_hot_labels1 = to_categorical(labels1, num_classes=102)
    return labels1, one_hot_labels1
Exemplo n.º 60
0
    else:
        test_images.append(image)
        test_labels.append(label)
        to_train = 0

#-----------------keras time --> make the model

#flatten data
dataDim = np.prod(images[0].shape)
train_data = flatten(dataDim, train_images)
test_data = flatten(dataDim, test_images)

#change labels to categorical
train_labels = np.array(train_labels)
test_labels = np.array(test_labels)
train_labels_one_hot = to_categorical(train_labels)
test_labels_one_hot = to_categorical(test_labels)

#determine the number of classes
classes = np.unique(train_labels)
nClasses = len(classes)


#three layers
#activation function: both
#neurons: 256
model = Sequential()
model.add(Dense(256, activation='tanh', input_shape=(dataDim,)))
model.add(Dropout(0.2))
model.add(Dense(256, activation='tanh'))
model.add(Dropout(0.2))