def predict(self, x, batch_size=None, steps=None, max_queue_size=10, workers=1): """Generates output predictions for the input samples. Computation is done in batches. # Arguments x: Input data. It could be: - A Numpy array (or array-like). - A generator or `keras.utils.Sequence` returning `(inputs, targets)` or `(inputs, targets, sample weights)`. batch_size: Integer or `None`. Number of samples per gradient update. If unspecified, `batch_size` will default to 32. Do not specify the `batch_size` if your data is in the form of generators, or `keras.utils.Sequence` instances (since they generate batches). steps: Total number of steps (batches of samples) before declaring the prediction round finished. Ignored with the default value of `None`. max_queue_size: Integer. Used for generator or `keras.utils.Sequence` input only. Maximum size for the generator queue. If unspecified, `max_queue_size` will default to 10. workers: Integer. Used for generator or `keras.utils.Sequence` input only. Maximum number of processes to spin up when using process-based threading. If unspecified, `workers` will default to 1. # Returns Numpy array(s) of predictions. # Raises ValueError: In case of mismatch between the provided input data and the model's expectations. """ if batch_size is not None and data_utils.is_generator_or_sequence(x): raise ValueError( 'The `batch_size` argument must not be specified when' ' using a generator or Sequence as an input.') if batch_size is None: # Backwards compatibility batch_size = 32 # Case 1: generator-like. Input is Python generator, or Sequence object. if data_utils.is_generator_or_sequence(x): return self.predict_generator(x, steps=steps, max_queue_size=max_queue_size, workers=workers) # Case 2: Numpy array-like. outputs = [] for start, stop in generic_utils.make_batches(len(x), batch_size): outputs.append(self.predict_on_batch(x[start:stop])) return np.vstack(outputs)
def _validate_arguments(is_sequence, is_dataset, use_multiprocessing, workers, steps_per_epoch, validation_data, validation_steps, mode, kwargs): """Raises errors if arguments are invalid. Args: is_sequence: Boolean, whether data is a `keras.utils.data_utils.Sequence` instance. is_dataset: Boolean, whether data is a dataset instance. use_multiprocessing: Boolean. If `True`, use process-based threading. If unspecified, `use_multiprocessing` will default to `False`. Note that because this implementation relies on multiprocessing, you should not pass non-picklable arguments to the generator as they can't be passed easily to children processes. workers: Integer. Maximum number of processes to spin up when using process-based threading. If unspecified, `workers` will default to 1. If 0, will execute the generator on the main thread. steps_per_epoch: Total number of steps (batches of samples) before declaring one epoch finished and starting the next epoch. Ignored with the default value of `None`. validation_data: Either a tuple of NumPy/Tensor inputs (i.e. `(x,)` or `(x, y)` or `(x, y, sample_weights)`) or a generator or `keras.utils.data_utils.Sequence` object or Eager Iterator or Dataset. validation_steps: Total number of steps (batches of samples) before declaring validation finished. mode: One of ModeKeys.TRAIN/ModeKeys.TEST/ModeKeys.PREDICT. kwargs: Additional arguments for backwards compatibility. Raises: ValueError: If `steps_per_epoch` or `validation_steps` are not passed for data types that require them, or if unrecognized keyword arguments are passed. """ if not is_sequence and use_multiprocessing and workers > 1: logging.warning( UserWarning('Using a generator with `use_multiprocessing=True`' ' and multiple workers may duplicate your data.' ' Please consider using the `keras.utils.Sequence`' ' class.')) if steps_per_epoch is None and not is_dataset: arg_name = 'steps_per_epoch' if mode == ModeKeys.TRAIN else 'steps' raise ValueError('Please specify the number of steps via the ' '`{}` argument.'.format(arg_name)) val_gen = ( data_utils.is_generator_or_sequence(validation_data) or isinstance(validation_data, tf.data.Iterator)) if (val_gen and not isinstance(validation_data, data_utils.Sequence) and not validation_steps): raise ValueError('Please specify the `validation_steps` argument.') if any(k != 'steps' for k in kwargs): raise ValueError('Invalid arguments passed: {}'.format( [k for k in kwargs if k != 'steps']))
def convert_to_generator_like(data, batch_size=None, steps_per_epoch=None, epochs=1, shuffle=False): """Make a generator out of NumPy or EagerTensor inputs. Arguments: data: Either a generator or `keras.utils.data_utils.Sequence` object or `Dataset`, `Iterator`, or a {1,2,3}-tuple of NumPy arrays or EagerTensors. If a tuple, the elements represent `(x, y, sample_weights)` and may be `None` or `[None]`. batch_size: Used when creating a generator out of tuples of NumPy arrays or EagerTensors. steps_per_epoch: Steps of the generator to run each epoch. If `None` the number of steps will be read from the data (for `keras.utils.data_utils.Sequence` types). epochs: Total number of epochs to run. shuffle: Whether the data should be shuffled. Returns: - Generator, `keras.utils.data_utils.Sequence`, or `Iterator`. Raises: - ValueError: If `batch_size` is not provided for NumPy or EagerTensor inputs. """ if isinstance(data, tuple): # Scrub `Nones` that might have been passed for `targets`, `sample_weights`. data = tuple(ele for ele in data if not all(e is None for e in tf.nest.flatten(ele))) if data_utils.is_generator_or_sequence(data) or isinstance( data, tf.data.Iterator): if isinstance(data, data_utils.Sequence): if steps_per_epoch is None: steps_per_epoch = len(data) return data, steps_per_epoch if isinstance(data, tf.data.Dataset): return tf.compat.v1.data.make_one_shot_iterator(data), steps_per_epoch # Create generator from NumPy or EagerTensor Input. num_samples = int(tf.nest.flatten(data)[0].shape[0]) if batch_size is None: raise ValueError( 'When passing input data as arrays, do not specify ' '`steps_per_epoch`/`steps` argument. Please use `batch_size` instead.' ) steps_per_epoch = int(math.ceil(num_samples / batch_size)) def _gen(data): """Makes a generator out of a structure of NumPy/EagerTensors.""" index_array = np.arange(num_samples) for _ in range(epochs): if shuffle: np.random.shuffle(index_array) batches = generic_utils.make_batches(num_samples, batch_size) for (batch_start, batch_end) in batches: batch_ids = index_array[batch_start:batch_end] flat_batch_data = training_utils.slice_arrays( tf.nest.flatten(data), batch_ids, contiguous=(not shuffle)) yield tf.nest.pack_sequence_as(data, flat_batch_data) return _gen(data), steps_per_epoch