Пример #1
0
    def get_generator(self, batch_size):
        board_tensors = self.board_tensors
        policy_tensors = self.policy_tensors
        value_tensors = self.value_tensors
        size = board_tensors.shape[0] * 8
        func_number = len(roting_fliping_functions)
        batches = _make_batches(size, batch_size)
        dimshuffle = True if K.image_data_format() == 'channels_last' else False
        def generator():
            indice = list(range(size))
            while True:
                np.random.shuffle(indice)
                for batch in batches:
                    idxs = indice[batch[0]:batch[1]]
                    cache_board_tensors = []
                    cache_policy_tensors = []
                    cache_value_tensors = []
                    for idx in idxs:
                        sample_idx = idx // func_number
                        func = roting_fliping_functions[idx % func_number]
                        cache_board_tensors.append(func(board_tensors[sample_idx:sample_idx+1, ...]))
                        cache_policy_tensors.append(func(policy_tensors[sample_idx:sample_idx+1, ...]))
                        cache_value_tensors.append(value_tensors[sample_idx:sample_idx+1, ...])

                    cache_board_tensors = np.concatenate(cache_board_tensors, axis=0)
                    cache_policy_tensors = np.concatenate(cache_policy_tensors, axis=0).reshape((-1, SIZE**2))
                    cache_value_tensors = np.concatenate(cache_value_tensors, axis=0)

                    if dimshuffle:
                        cache_board_tensors = np.transpose(cache_board_tensors, (0, 2, 3, 1))

                    yield (cache_board_tensors, [cache_policy_tensors, cache_value_tensors])

        return generator(), len(batches)
Пример #2
0
 def data_generator(x, y, batch_size=50):
     index_array = np.arange(len(x))
     while 1:
         batches = _make_batches(len(x_test), batch_size)
         for batch_index, (batch_start, batch_end) in enumerate(batches):
             batch_ids = index_array[batch_start:batch_end]
             x_batch = x[batch_ids]
             y_batch = y[batch_ids]
             yield (x_batch, y_batch)
Пример #3
0
def time_delay_generator(x, y, delays, batch_size, weights=None, shuffle=True):
    '''A generator to make it easy to fit time-delay regression models,
    i.e. a model where the value of y depends on past values of x

    # Arguments
    x: input data, as a Numpy array
    y: targets, as a Numpy array or None for prediction generation
    delays: number of time-steps to include in model
    weights: Numpy array of weights for the samples
    shuffle: Whether or not to shuffle the data (set True for training)

    # Example
    if X_train is (1000,200), Y_train is (1000,1)
    train_gen = time_delay_generator(X_train, Y_train, delays=10, batch_size=100)

    train_gen is a generator that gives:
    x_batch as size (100,10,200) since each of the 100 samples includes the input
    data at the current and nine previous time steps
    y_batch as size (100,1)
    w_batch as size (100,)

    '''

    if type(delays) is int:
        delays = range(delays)

    if type(x) is not list:
        x = list([x])
    index_array = np.arange(x[0].shape[0])

    tlists = [[1, 0] + list(range(2, np.ndim(xx) + 1)) for xx in x]
    batches = _make_batches(x[0].shape[0], batch_size)
    while 1:
        if shuffle:
            np.random.shuffle(index_array)
        for batch_index, (batch_start, batch_end) in enumerate(batches):
            batch_ids = index_array[batch_start:batch_end]
            batch_ids_delay = [
                np.minimum(np.maximum(0, batch_ids - d), x[0].shape[0] - 1)
                for d in delays
            ]
            x_batch = _standardize_input_data([
                xx[batch_ids_delay, :].transpose(tt)
                for xx, tt in zip(x, tlists)
            ], ['x_batch' + str(i) for i in range(1,
                                                  len(x) + 1)])
            if y is None:
                yield x_batch
            else:
                y_batch = _standardize_input_data(y[batch_ids, :], ['y_batch'])
                if weights is not None:
                    w_batch = weights[batch_ids, :][:, 0]
                else:
                    w_batch = np.ones(x_batch[0].shape[0])
                w_batch[batch_ids < delays[-1]] = 0.
                w_batch = _standardize_sample_weights(w_batch, ['w_batch'])
                yield (x_batch, y_batch, w_batch)
Пример #4
0
 def data_generator(x, y, batch_size=50):
     index_array = np.arange(len(x))
     while 1:
         batches = _make_batches(len(x_test), batch_size)
         for batch_index, (batch_start, batch_end) in enumerate(batches):
             batch_ids = index_array[batch_start:batch_end]
             x_batch = x[batch_ids]
             y_batch = y[batch_ids]
             yield (x_batch, y_batch)
Пример #5
0
    def train_neural_network(self, words, vocab_dict, batch_size):
        num_word = len(vocab_dict)
        logits, last_state, _, _, _ = self.def_model(num_word, batch_size)
        targets = tf.reshape(self.output_targets, [-1])
        loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example([logits], [targets], \
                                                                  [tf.ones_like(targets, dtype=tf.float32)], len(words))
        cost = tf.reduce_mean(loss)
        learning_rate = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), 5)
        # optimizer = tf.train.GradientDescentOptimizer(learning_rate)
        optimizer = tf.train.AdamOptimizer(learning_rate)
        train_op = optimizer.apply_gradients(zip(grads, tvars))

        Session_config = tf.ConfigProto(allow_soft_placement=True)
        Session_config.gpu_options.allow_growth = True

        batches = _make_batches(len(words), batch_size)

        with tf.Session(config=Session_config) as sess:
            sess.run(tf.global_variables_initializer())

            saver = tf.train.Saver(tf.global_variables())
            last_epoch = self.load_model(sess, saver, 'model/')

            for epoch in range(last_epoch + 1, 100):
                sess.run(tf.assign(learning_rate, 0.002 * (0.97**epoch)))
                # sess.run(tf.assign(learning_rate, 0.01))
                index_array = np.arange(len(words))
                np.random.shuffle(index_array)

                all_loss = 0.0
                for batch_index, (batch_start,
                                  batch_end) in enumerate(batches):
                    if batch_end - batch_start != batch_size:
                        # print('skip batch {} {}'.format(batch_start, batch_end))
                        continue
                    batch_ids = index_array[batch_start:batch_end]
                    xdata = words[batch_ids]
                    ydata = np.copy(xdata)
                    ydata[:, :-1] = xdata[:, 1:]
                    train_loss, _, _ = sess.run([cost, last_state, train_op],
                                                feed_dict={
                                                    self.input_data: xdata,
                                                    self.output_targets: ydata
                                                })
                    all_loss = all_loss + train_loss

                    if batch_index % 50 == 1:
                        print(epoch, batch_index, 0.002 * (0.97**epoch),
                              train_loss)

                saver.save(sess, 'model/poetry.module', global_step=epoch)
                print(epoch, ' Loss: ', all_loss * 1.0 / len(batches))
Пример #6
0
 def next_batch(self):
     self.batches = _make_batches(len(self.labels), self.batch_size)
     index_array = np.arange(len(self.labels))
     np.random.shuffle(index_array)
     for batch_index, (batch_start, batch_end) in enumerate(self.batches):
         if batch_end - batch_start != self.batch_size:
             # print('skip batch {} {}'.format(batch_start, batch_end))
             continue
         batch_ids = index_array[batch_start:batch_end]
         xdata = self.sentences[batch_ids]
         y = self.labels[batch_ids]
         yield xdata, y
Пример #7
0
def time_delay_generator_jitter(x, y, delays, batch_size, weights=None, shuffle=True, conv3d=False, jitter=True, jitter_axes=[3,4], max_jitter=1):
    '''A generator to make it easy to fit time-delay regression models,
    i.e. a model where the value of y depends on past values of x

    # Arguments
    x: input data, as a Numpy array
    y: targets, as a Numpy array or None for prediction generation
    delays: number of time-steps to include in model
    weights: Numpy array of weights for the samples
    shuffle: Whether or not to shuffle the data (set True for training)

    # Example
    if X_train is (1000,200), Y_train is (1000,1)
    train_gen = time_delay_generator(X_train, Y_train, delays=10, batch_size=100)

    train_gen is a generator that gives:
    x_batch as size (100,10,200) since each of the 100 samples includes the input
    data at the current and nine previous time steps
    y_batch as size (100,1)
    w_batch as size (100,)

    '''
    index_array = np.arange(x.shape[0])
    if conv3d:
        tlist = [1, 2, 0] + range(3, np.ndim(x) + 1)
    else:
        tlist = [1, 0] + range(2, np.ndim(x) + 1)
    batches = _make_batches(x.shape[0], batch_size)
    while 1:
        if shuffle:
            np.random.shuffle(index_array)
        for batch_index, (batch_start, batch_end) in enumerate(batches):
            batch_ids = index_array[batch_start:batch_end]
            batch_ids = [np.maximum(0, batch_ids - d) for d in range(delays)]
            x_batch = x[batch_ids, :].transpose(tlist)
            if jitter:
                for j in jitter_axes:
                    x_batch = np.roll(x_batch, np.random.randint(-max_jitter,max_jitter+1), axis=j)
            x_batch = _standardize_input_data(x_batch, ['x_batch'])
            if y is None:
                yield x_batch
            else:
                y_batch = _standardize_input_data(y[batch_ids[0], :], ['y_batch'])
                if weights is not None:
                    w_batch = weights[batch_ids[0], :][:, 0]
                else:
                    w_batch = np.ones(x_batch[0].shape[0])
                w_batch[batch_ids[0] < delays] = 0.
                w_batch = _standardize_sample_weights(w_batch, ['w_batch'])
                yield (x_batch, y_batch, w_batch)
Пример #8
0
    def next_batch(self):
        assert len(self.data) == len(self.keywords)

        self.batches = _make_batches(len(self.data), self.batch_size)
        index_array = np.arange(len(self.data))
        np.random.shuffle(index_array)
        for batch_index, (batch_start, batch_end) in enumerate(self.batches):
            if batch_end - batch_start != self.batch_size:
                # print('skip batch {} {}'.format(batch_start, batch_end))
                continue
            batch_ids = index_array[batch_start:batch_end]
            xdata = self.data[batch_ids]
            xkeywords = self.keywords[batch_ids]
            yield xdata, xkeywords
Пример #9
0
def train_gen(pairs_train, dist_train, batch_size):
    '''
    Generator used for training the siamese net with keras

    pairs_train:    training pairs
    dist_train:     training labels

    returns:        generator instance
    '''
    batches = _make_batches(len(pairs_train), batch_size)
    while 1:
        random_idx = np.random.permutation(len(pairs_train))
        for batch_start, batch_end in batches:
            p_ = random_idx[batch_start:batch_end]
            x1, x2 = pairs_train[p_, 0], pairs_train[p_, 1]
            y = dist_train[p_]
            yield ([x1, x2], y)
Пример #10
0
def batch_generator(X, y, batch_size, samples_per_epoch):
    while 1:
        index_array = np.arange(X.shape[0])
        np.random.shuffle(index_array)
        batches = _make_batches(samples_per_epoch, batch_size)
        # print("\n",index_array[0:2])
        # print("=====================")
        for batch_index, (batch_start, batch_end) in enumerate(batches):
            batch_ids = index_array[batch_start:batch_end]
            # print("\n", batch_start, batch_end, batch_index, len(batch_ids))
            X_batch = _slice_X(X, batch_ids)
            y_batch = _slice_X(y, batch_ids)
            if (sps.issparse(X)):
                X_batch = np.array(X_batch.todense())
            else:
                X_batch = np.array(X_batch)
            y_batch = np.array(y_batch)
            yield (X_batch, y_batch)
Пример #11
0
def time_delay_generator_AE(x, delays, batch_size, shuffle=True, conv3d=False):
    '''A generator to make it easy to fit time-delay regression models,
    i.e. a model where the value of y depends on past values of x

    # Arguments
    x: input data, as a Numpy array
    y: targets, as a Numpy array or None for prediction generation
    delays: number of time-steps to include in model
    weights: Numpy array of weights for the samples
    shuffle: Whether or not to shuffle the data (set True for training)

    # Example
    if X_train is (1000,200), Y_train is (1000,1)
    train_gen = time_delay_generator(X_train, Y_train, delays=10, batch_size=100)

    train_gen is a generator that gives:
    x_batch as size (100,10,200) since each of the 100 samples includes the input
    data at the current and nine previous time steps
    y_batch as size (100,1)
    w_batch as size (100,)

    '''
    index_array = np.arange(x.shape[0])
    if conv3d:
        tlist = [1, 2, 0] + range(3, np.ndim(x) + 1)
    else:
        tlist = [1, 0] + range(2, np.ndim(x) + 1)
    batches = _make_batches(x.shape[0], batch_size)
    while 1:
        if shuffle:
            np.random.shuffle(index_array)
        for batch_index, (batch_start, batch_end) in enumerate(batches):
            batch_ids = index_array[batch_start:batch_end]
            batch_ids = [np.maximum(0, batch_ids - d) for d in range(delays)]
            x_batch = _standardize_input_data(x[batch_ids, :].transpose(tlist),
                                              ['x_batch'])
            y_batch = _standardize_input_data(
                np.copy(x_batch[0]).reshape((x_batch[0].shape[0], -1)),
                ['y_batch'])
            yield (x_batch, y_batch)
Пример #12
0
def _fit_loop(self,
              f,
              ins,
              out_labels=None,
              batch_size=32,
              epochs=100,
              verbose=1,
              callbacks=None,
              val_f=None,
              val_ins=None,
              shuffle=True,
              callback_metrics=None,
              initial_epoch=0,
              steps_per_epoch=None,
              validation_steps=None):
    """Abstract fit function for f(ins).
    Assume that f returns a list, labeled by out_labels.

    # Arguments
        f: Keras function returning a list of tensors
        ins: List of tensors to be fed to `f`
        out_labels: List of strings, display names of
            the outputs of `f`
        batch_size: Integer batch size or None if unknown.
        epochs: Number of times to iterate over the data
        verbose: Verbosity mode, 0, 1 or 2
        callbacks: List of callbacks to be called during training
        val_f: Keras function to call for validation
        val_ins: List of tensors to be fed to `val_f`
        shuffle: Whether to shuffle the data at the beginning of each epoch
        callback_metrics: List of strings, the display names of the metrics
            passed to the callbacks. They should be the
            concatenation of list the display names of the outputs of
             `f` and the list of display names of the outputs of `f_val`.
        initial_epoch: Epoch at which to start training
            (useful for resuming a previous training run)
        steps_per_epoch: Total number of steps (batches of samples)
            before declaring one epoch finished and starting the
            next epoch. Ignored with the default value of `None`.
        validation_steps: Number of steps to run validation for
            (only if doing validation from data tensors).
            Ignored with the default value of `None`.

    # Returns
        `History` object.

    [A tweaked version.]
    """
    do_validation = False
    if val_f and val_ins:
        do_validation = True
        if verbose and ins and hasattr(ins[0], 'shape') and hasattr(
                val_ins[0], 'shape'):
            print('Train on %d samples, validate on %d samples' %
                  (ins[0].shape[0], val_ins[0].shape[0]))
    if validation_steps:
        do_validation = True
        if steps_per_epoch is None:
            raise ValueError('Can only use `validation_steps` '
                             'when doing step-wise '
                             'training, i.e. `steps_per_epoch` '
                             'must be set.')

    num_train_samples = self._check_num_samples(ins, batch_size,
                                                steps_per_epoch,
                                                'steps_per_epoch')
    if num_train_samples is not None:
        index_array = np.arange(num_train_samples)

    self.history = cbks.History()
    callbacks = [cbks.BaseLogger()] + (callbacks or []) + [self.history]
    if verbose:
        if steps_per_epoch is not None:
            count_mode = 'steps'
        else:
            count_mode = 'samples'
        callbacks += [cbks.ProgbarLogger(count_mode)]
    callbacks = cbks.CallbackList(callbacks)
    out_labels = out_labels or []

    # it's possible to callback a different model than self
    # (used by Sequential models)
    if hasattr(self, 'callback_model') and self.callback_model:
        callback_model = self.callback_model
    else:
        callback_model = self

    callbacks.set_model(callback_model)
    callbacks.set_params({
        'batch_size': batch_size,
        'epochs': epochs,
        'steps': steps_per_epoch,
        'samples': num_train_samples,
        'verbose': verbose,
        'do_validation': do_validation,
        'metrics': callback_metrics or [],
    })
    callbacks.on_train_begin()
    callback_model.stop_training = False
    # for cbk in callbacks:
    #     cbk.validation_data = val_ins

    for epoch in range(initial_epoch, epochs):
        callbacks.on_epoch_begin(epoch)
        epoch_logs = {}
        if steps_per_epoch is not None:
            for step_index in range(steps_per_epoch):
                batch_logs = {}
                batch_logs['batch'] = step_index
                batch_logs['size'] = 1
                callbacks.on_batch_begin(step_index, batch_logs)
                outs = f(ins)

                if not isinstance(outs, list):
                    outs = [outs]
                for l, o in zip(out_labels, outs):
                    batch_logs[l] = o

                callbacks.on_batch_end(step_index, batch_logs)
                if callback_model.stop_training:
                    break

            if do_validation:
                val_outs = self._test_loop(val_f,
                                           val_ins,
                                           batch_size=batch_size,
                                           steps=validation_steps,
                                           verbose=0)
                if not isinstance(val_outs, list):
                    val_outs = [val_outs]
                # Same labels assumed.
                for l, o in zip(out_labels, val_outs):
                    epoch_logs['val_' + l] = o
        else:
            if shuffle == 'batch':
                index_array = _batch_shuffle(index_array, batch_size)
            elif shuffle:
                np.random.shuffle(index_array)

            batches = _make_batches(num_train_samples, batch_size)
            for batch_index, (batch_start, batch_end) in enumerate(batches):
                batch_ids = index_array[batch_start:batch_end]
                try:
                    if isinstance(ins[-1], float):
                        # do not slice the training phase flag
                        ins_batch = _slice_arrays(ins[:-1],
                                                  batch_ids) + [ins[-1]]
                    else:
                        ins_batch = _slice_arrays(ins, batch_ids)
                except TypeError:
                    raise TypeError('TypeError while preparing batch. '
                                    'If using HDF5 input data, '
                                    'pass shuffle="batch".')
                batch_logs = {}
                batch_logs['batch'] = batch_index
                batch_logs['size'] = len(batch_ids)
                batch_logs['ids'] = batch_ids
                callbacks.on_batch_begin(batch_index, batch_logs)
                outs = f(ins_batch)
                if not isinstance(outs, list):
                    outs = [outs]
                for l, o in zip(out_labels, outs):
                    batch_logs[l] = o

                callbacks.on_batch_end(batch_index, batch_logs)
                if callback_model.stop_training:
                    break

                if batch_index == len(batches) - 1:  # last batch.
                    if do_validation:
                        val_outs = self._test_loop(val_f,
                                                   val_ins,
                                                   batch_size=batch_size,
                                                   verbose=0)
                        if not isinstance(val_outs, list):
                            val_outs = [val_outs]
                        # same labels assumed
                        for l, o in zip(out_labels, val_outs):
                            epoch_logs['val_' + l] = o
        callbacks.on_epoch_end(epoch, epoch_logs)
        if callback_model.stop_training:
            break
    callbacks.on_train_end()
    return self.history
Пример #13
0
    def _fit_loop(self,
                  f,
                  ins,
                  out_labels=None,
                  batch_size=32,
                  epochs=100,
                  verbose=1,
                  callbacks=None,
                  val_f=None,
                  val_ins=None,
                  shuffle=True,
                  callback_metrics=None,
                  initial_epoch=0,
                  steps_per_epoch=None):
        """Abstract fit function for `f(ins)`.

        Assume that f returns a list, labeled by out_labels.

        # Arguments
            f: Keras function returning a list of tensors
            ins: list of tensors to be fed to `f`
            out_labels: list of strings, display names of
                the outputs of `f`
            batch_size: integer batch size
            epochs: number of times to iterate over the data
            verbose: verbosity mode, 0, 1 or 2
            callbacks: list of callbacks to be called during training
            val_f: Keras function to call for validation
            val_ins: list of tensors to be fed to `val_f`
            shuffle: whether to shuffle the data at the beginning of each epoch
            callback_metrics: list of strings, the display names of the metrics
                passed to the callbacks. They should be the
                concatenation of list the display names of the outputs of
                 `f` and the list of display names of the outputs of `f_val`.
            initial_epoch: epoch at which to start training
                (useful for resuming a previous training run)
            steps_per_epoch: Total number of steps (batches of samples)
                before declaring one epoch finished and starting the
                next epoch. The default `None` is equal to the number
                of unique samples in your dataset divided by the batch
                size, or 1 if that cannot be determined.

        # Returns
            `History` object.
        """
        do_validation = False
        if val_f and val_ins:
            do_validation = True
            if verbose and ins and hasattr(ins[0], 'shape'):
                print('Train on %d samples, validate on %d samples' %
                      (ins[0].shape[0], val_ins[0].shape[0]))

        if steps_per_epoch is not None:
            num_train_samples = steps_per_epoch
        else:
            if ins and hasattr(ins[0], 'shape'):
                num_train_samples = ins[0].shape[0]
            else:
                # May happen if we are running `fit` without Numpy input data,
                # i.e. if all inputs to the models are data tensors
                # instead of placeholders.
                # In that case we will run `fit` over a single batch.
                num_train_samples = batch_size
                verbose = 2
        index_array = np.arange(num_train_samples)

        self.history = cbks.History()
        callbacks = [cbks.BaseLogger()] + (callbacks or []) + [self.history]
        if verbose:
            # callbacks += [cbks.ProgbarLogger()]
            callbacks += [ProgbarLogger_TFRecord()]
        callbacks = cbks.CallbackList(callbacks)
        out_labels = out_labels or []

        # it's possible to callback a different model than self
        # (used by Sequential models)
        if hasattr(self, 'callback_model') and self.callback_model:
            callback_model = self.callback_model
        else:
            callback_model = self

        callbacks.set_model(callback_model)
        callbacks.set_params({
            'batch_size': batch_size,
            'epochs': epochs,
            'samples': num_train_samples,
            'verbose': verbose,
            'do_validation': do_validation,
            'metrics': callback_metrics or [],
        })
        callbacks.on_train_begin()
        callback_model.stop_training = False
        for cbk in callbacks:
            cbk.validation_data = val_ins

        for epoch in range(initial_epoch, epochs):
            callbacks.on_epoch_begin(epoch)
            if shuffle == 'batch':
                index_array = _batch_shuffle(index_array, batch_size)
            elif shuffle:
                np.random.shuffle(index_array)

            batches = _make_batches(num_train_samples, batch_size)
            epoch_logs = {}
            for batch_index, (batch_start, batch_end) in enumerate(batches):
                batch_ids = index_array[batch_start:batch_end]
                try:
                    if isinstance(ins[-1], float):
                        # Do not slice the training phase flag.
                        ins_batch = \
                            _slice_arrays(ins[:-1], batch_ids) + [ins[-1]]
                    else:
                        ins_batch = _slice_arrays(ins, batch_ids)
                except TypeError:
                    raise TypeError('TypeError while preparing batch. '
                                    'If using HDF5 input data, '
                                    'pass shuffle="batch".')
                batch_logs = {}
                batch_logs['batch'] = batch_index
                batch_logs['size'] = len(batch_ids)
                callbacks.on_batch_begin(batch_index, batch_logs)
                outs = f(ins_batch)
                if not isinstance(outs, list):
                    outs = [outs]
                for l, o in zip(out_labels, outs):
                    batch_logs[l] = o

                callbacks.on_batch_end(batch_index, batch_logs)
                if callback_model.stop_training:
                    break

                if batch_index == len(batches) - 1:  # Last batch.
                    if do_validation:
                        val_outs = self._test_loop(val_f,
                                                   val_ins,
                                                   batch_size=batch_size,
                                                   verbose=0)
                        if not isinstance(val_outs, list):
                            val_outs = [val_outs]
                        # Same labels assumed.
                        for l, o in zip(out_labels, val_outs):
                            epoch_logs['val_' + l] = o
            callbacks.on_epoch_end(epoch, epoch_logs)
            if callback_model.stop_training:
                break
        callbacks.on_train_end()
        return self.history
Пример #14
0
    def generator(self):

        while True:
            batches = _make_batches(size=self.total_images,
                                    batch_size=self.batch_size)
            for start, end in batches:
                arr = []
                labels = []
                cur_batch = self.image_paths[start:end]

                for image_path in cur_batch:
                    # print image_path
                    img = imread(
                        fname=os.path.join(self.data_path, image_path))

                    # if channels are not 3
                    ndim = len(img.shape)

                    if ndim == 2:
                        img = img[..., np.newaxis]
                        img = np.tile(A=img, reps=(1, 1, 3))

                    if ndim == 4:
                        img = img[..., :3]

                    # resizing image maintaining aspect ratio
                    img = resize_image(img=img, size=self.input_size)

                    if self.training:
                        # random cropping while training
                        img = random_crop_image(img=img, size=self.input_size)
                        img = augment(img=img,
                                      horizontal_flip=True,
                                      vertical_flip=True,
                                      brightness=True,
                                      contrast=True,
                                      rotation=True,
                                      translation=True,
                                      blur=True,
                                      noise=True)
                    else:
                        # center cropping
                        h, w, c = img.shape
                        center_h = h / 2
                        center_w = w / 2
                        center_new_img = self.input_size / 2
                        new_x1 = center_w - center_new_img
                        new_y1 = center_h - center_new_img
                        new_x2 = center_w + center_new_img
                        new_y2 = center_h + center_new_img
                        if self.input_size % 2 == 1:
                            new_x2 += 1
                            new_y2 += 1
                        img = img[new_y1:new_y2, new_x1:new_x2]

                    arr.append(img)
                    cls = image_path.split('/')[0]
                    id_for_cls = self.cls2id[cls]
                    labels.append(id_for_cls)

                arr = np.array(arr)
                arr.astype('float32')

                # making mean of data 0 with standard deviation 1
                arr /= 255.
                arr -= 0.5
                arr *= 2.

                # one hot encoding
                labels = to_categorical(y=labels,
                                        num_classes=self.total_classes)
                yield (arr, labels)
Пример #15
0
def predict(predict_var,
            x_unlabeled,
            inputs,
            y_true,
            batch_sizes,
            x_labeled=None,
            y_labeled=None):
    '''
    Evaluates predict_var, batchwise, over all points in x_unlabeled
    and x_labeled.

    predict_var:        list of tensors to evaluate and return
    x_unlabeled:        unlabeled input data
    inputs:             dictionary containing input_types and
                        input_placeholders as key, value pairs, respectively
    y_true:             true labels tensorflow placeholder
    batch_sizes:        dictionary containing input_types and batch_sizes as
                        key, value pairs, respectively
    x_labeled:          labeled input data
    y_labeled:          labeled input labels

    returns:    a list of length n containing the result of all tensors
                in return_var, where n = len(x_unlabeled) + len(x_labeled)
    '''
    x_unlabeled, x_labeled, y_labeled = check_inputs(x_unlabeled, x_labeled,
                                                     y_labeled, y_true)

    # combined data
    x = np.concatenate((x_unlabeled, x_labeled), 0)
    # get shape of y_true
    y_shape = y_true.get_shape()[1:K.ndim(y_true)].as_list()

    # calculate batches for predict loop
    unlabeled_batch_size = batch_sizes.get('Unlabeled', 0)
    labeled_batch_size = batch_sizes.get('Labeled', 0)
    if 'Labeled' in batch_sizes and 'Unlabeled' in batch_sizes:
        assert unlabeled_batch_size == labeled_batch_size
    batch_size = min(len(x), max(unlabeled_batch_size, labeled_batch_size))
    batches = _make_batches(len(x), batch_size)

    y_preds = []
    # predict over all points
    for i, (batch_start, batch_end) in enumerate(batches):
        feed_dict = {K.learning_phase(): 0}

        # feed corresponding input for each input_type
        for input_type, input_placeholder in inputs.items():
            if input_type == 'Unlabeled':
                feed_dict[input_placeholder] = x[batch_start:batch_end]
            elif input_type == 'Orthonorm':
                batch_ids = np.random.choice(len(x),
                                             size=min(len(x),
                                                      batch_sizes[input_type]),
                                             replace=False)
                feed_dict[input_placeholder] = x[batch_ids]
            elif input_type == 'Labeled':
                if len(x_labeled):
                    batch_ids = np.random.choice(len(x_labeled),
                                                 size=min(
                                                     batch_sizes[input_type],
                                                     len(x_labeled)),
                                                 replace=False)
                    feed_dict[input_placeholder] = x_labeled[batch_ids]
                    feed_dict[y_true] = y_labeled[batch_ids]
                else:
                    # we have no labeled points, so feed an empty array
                    feed_dict[input_placeholder] = x[0:0]
                    feed_dict[y_true] = np.empty([0] + y_shape)
            else:
                raise Exception(
                    "Unrecognized feed name ['{}']".format(input_type))

        # evaluate the batch
        y_pred_batch = np.asarray(K.get_session().run(predict_var,
                                                      feed_dict=feed_dict))
        y_preds.append(y_pred_batch)

    if len(y_preds[0].shape):
        return np.concatenate(y_preds)
    else:
        return np.sum(y_preds)
    def keras_generator(self, delays=7, batch_size=400, cell=0, scale=5, flatten=True, center=None, crop_size=None, shuffle=True, color_chan=False, log_transform_events=True, correct_eye_pos=False, gaussian_filter=0):
        from keras.engine.training import _standardize_input_data, _make_batches, _standardize_sample_weights
        
        if type(cell) is int:
            cell = [cell]

        if type(delays) is int:
            delays = range(delays)

        (stim, events, frame_numbers, weights, shifts) = self.vectorize_data(delays)


        evidx = np.where(events)[0]
        print(str(len(frame_numbers)) + ' Samples')
        print(str(len(evidx)) + ' Events')

        if correct_eye_pos:
            sh = stim.shape
            shift_stim_shape = (len(shifts),
                                sh[1] + 2*np.maximum(self.min_max_shift[1][0], -self.min_max_shift[0][0]) + 3,
                                sh[2] + 2*np.maximum(self.min_max_shift[1][1], -self.min_max_shift[0][1]) + 3)


            out_stim = np.zeros(shift_stim_shape, dtype='float32')


            shifts = shifts + [shift_stim_shape[1]/2, shift_stim_shape[2]/2]
            good_shift_locations = ~np.isnan(shifts[:, 0])
            for dd in delays:
                weights[np.minimum(np.where(np.isnan(shifts[:,0]))[0] + dd, len(weights)-1)] = 0

            for i in range(len(shifts)):
                if good_shift_locations[i]:
                    # print(-sh[1]/2 + np.int32(shifts[i, 0]))
                    # print(np.int32(shifts[i, 0]) + sh[1]/2)
                    out_stim[i, -sh[1]/2 + np.int32(shifts[i, 0]):np.int32(shifts[i, 0]) + sh[1]/2,
                                -sh[2]/2 + np.int32(shifts[i, 1]):np.int32(shifts[i, 1]) + sh[2]/2] = stim[frame_numbers[i]]

            stim = out_stim
            frame_numbers_i = np.arange(len(frame_numbers))
        else:
            frame_numbers_i = frame_numbers

        if color_chan:
            stim = stim[:, None, :, :]

        if crop_size is not None and center is not None:
            crop_range = np.arange(-crop_size/2, crop_size/2)
            stim = stim[:, (center[0]-crop_size/2):(center[0]+crop_size/2), (center[1]-crop_size/2):(center[1]+crop_size/2)]

        if flatten:
            stim = stim.reshape(stim.shape[0], -1)

        events = np.asarray(events)
        events = events[cell].T * scale


        if log_transform_events:
            events = np.log(1 + events)


        if gaussian_filter > 0:
            events = gaussian_filter1d(events, gaussian_filter)

        index_array = np.arange(events.shape[0])

        tlist = [1, 0] + list(range(2, np.ndim(stim) + 1))
        batches = _make_batches(events.shape[0], batch_size)
        while 1:
            if shuffle:
                np.random.shuffle(index_array)
            for batch_index, (batch_start, batch_end) in enumerate(batches):
                batch_ids = index_array[batch_start:batch_end]
                frame_numbers_b = frame_numbers[batch_ids]
                batch_ids_stim = [frame_numbers_i[np.maximum(0, batch_ids - d)] for d in delays]
                x_batch = _standardize_input_data(stim[batch_ids_stim, :].transpose(tlist), ['x_batch'])

                y_batch = _standardize_input_data(events[batch_ids, :], ['y_batch'])

                w_batch = weights[batch_ids]

                w_batch[frame_numbers_b < delays[-1]] = 0.
                w_batch = _standardize_sample_weights(w_batch, ['w_batch'])
                yield (x_batch, y_batch, w_batch)
Пример #17
0
    def _fit_loop(self,
                  f: callable,
                  ins: List[numpy.array],
                  out_labels: List[str] = None,
                  batch_size: int = 32,
                  epochs: int = 100,
                  verbose: int = 1,
                  callbacks: List[Callback] = None,
                  val_f: callable = None,
                  val_ins: List[numpy.array] = None,
                  shuffle: bool = True,
                  callback_metrics: List[str] = None,
                  initial_epoch: int = 0):
        """
        Abstract fit function which preprocesses and batches
        data before training a model. We override this keras backend
        function to support multi-gpu training via splitting a large
        batch size across multiple gpus. This function is broadly the
        same as the Keras backend version aside from this - changed elements
        have corresponding comments attached.

        Note that this should not be called directly - it is used by calling
        model.fit().

        Assume that step_function returns a list, labeled by out_labels.

        Parameters
        ----------
        f: A callable ``Step`` or a Keras ``Function``, required.
            A DeepQA Step or Keras Function returning a list of tensors.
        ins: List[numpy.array], required.
            The list of tensors to be fed to ``step_function``.
        out_labels: List[str], optional (default = None).
            The display names of the outputs of ``step_function``.
        batch_size: int, optional (default = 32).
            The integer batch size.
        epochs: int, optional (default = 100).
            Number of times to iterate over the data.
        verbose: int, optional, (default = 1)
            Verbosity mode, 0, 1 or 2.
        callbacks: List[Callback], optional (default = None).
            A list of Keras callbacks to be called during training.
        val_f: A callable ``Step`` or a Keras ``Function``, optional (default = None).
            The Keras function to call for validation.
        val_ins: List[numpy.array], optional (default)
            A list of tensors to be fed to ``val_f``.
        shuffle: bool, optional (default = True).
            whether to shuffle the data at the beginning of each epoch
        callback_metrics: List[str], optional, (default = None).
            A list of strings, the display names of the validation metrics.
            passed to the callbacks. They should be the concatenation of list the display
            names of the outputs of ``f`` and the list of display names of the outputs of ``f_val``.
        initial_epoch: int, optional (default = 0).
            The epoch at which to start training (useful for resuming a previous training run).

        Returns
        -------
        A Keras ``History`` object.

        """
        do_validation = False
        if val_f and val_ins:
            do_validation = True
            if verbose:
                print('Train on %d samples, validate on %d samples' %
                      (ins[0].shape[0], val_ins[0].shape[0]))

        if ins and hasattr(ins[0], 'shape'):
            num_train_samples = ins[0].shape[0]
        else:
            # May happen if we are running `fit` without Numpy input data,
            # i.e. if all inputs to the models are data tensors
            # instead of placeholders.
            # In that case we will run `fit` over a single batch.
            num_train_samples = batch_size
            verbose = 2
        index_array = numpy.arange(num_train_samples)
        out_labels = out_labels or []
        callbacks, callback_model = self._prepare_callbacks(
            callbacks, val_ins, epochs, batch_size, num_train_samples,
            callback_metrics, do_validation, verbose)

        for epoch in range(initial_epoch, epochs):
            callbacks.on_epoch_begin(epoch)
            if shuffle == 'batch':
                index_array = _batch_shuffle(index_array, batch_size)
            elif shuffle:
                numpy.random.shuffle(index_array)

            batches = _make_batches(num_train_samples, batch_size)
            epoch_logs = {}
            for batch_index, (batch_start, batch_end) in enumerate(batches):
                batch_ids = index_array[batch_start:batch_end]
                try:
                    if isinstance(ins[-1], float):
                        # Do not slice the training phase flag.
                        ins_batch = _slice_arrays(ins[:-1],
                                                  batch_ids) + [ins[-1]]
                    else:
                        ins_batch = _slice_arrays(ins, batch_ids)
                except TypeError:
                    raise TypeError('TypeError while preparing batch. '
                                    'If using HDF5 input data, '
                                    'pass shuffle="batch".')

                # Here is the main difference between a single gpu model and one split
                # across multiple gpus. In our multiple gpu model, all of the inputs
                # are replicated num_gpus times, so we need to split our large batch
                # into the corresponding sets of smaller batches for each model.
                if self.num_gpus > 1:

                    # The Keras learning phase is a global variable used across model towers.
                    # If it is present, we remove it before splitting up the inputs
                    # and add it back on afterwards.
                    if isinstance(ins_batch[-1], float):
                        model_inputs = self._multi_gpu_batch(ins_batch[:-1])
                        model_inputs.append(ins_batch[-1])
                    else:
                        model_inputs = self._multi_gpu_batch(ins_batch)
                    ins_batch = model_inputs

                batch_logs = {}
                batch_logs['batch'] = batch_index
                batch_logs['size'] = len(batch_ids)
                callbacks.on_batch_begin(batch_index, batch_logs)
                outs = f(ins_batch)
                if not isinstance(outs, list):
                    outs = [outs]
                for label, output in zip(out_labels, outs):
                    batch_logs[label] = output

                callbacks.on_batch_end(batch_index, batch_logs)

                if batch_index == len(batches) - 1:  # Last batch.
                    if do_validation:
                        # If we are using multiple gpus, our batch size will be
                        # scaled up accordingly. However, validation will run
                        # on a single gpu, so we divide by the number of gpus
                        # to avoid OOM errors.
                        if self.num_gpus > 1:
                            val_batch_size = int(batch_size / self.num_gpus)  # pylint: disable=no-member
                        else:
                            val_batch_size = batch_size

                        val_outs = self._test_loop(val_f,
                                                   val_ins,
                                                   batch_size=val_batch_size,
                                                   verbose=0)
                        if not isinstance(val_outs, list):
                            val_outs = [val_outs]
                        # Same labels assumed.
                        for label, output in zip(out_labels, val_outs):
                            epoch_logs['val_' + label] = output
            callbacks.on_epoch_end(epoch, epoch_logs)
            if callback_model.stop_training:  # pylint: disable=no-member
                break
        callbacks.on_train_end()
        return self.history