Exemplo n.º 1
0
def input_fn(is_training,
             data_dir,
             batch_size,
             num_epochs=1,
             num_parallel_calls=1):
    """Input function which provides batches for train or eval.
  Args:
    is_training: A boolean denoting whether the input is for training.
    data_dir: The directory containing the input data.
    batch_size: The number of samples per batch.
    num_epochs: The number of epochs to repeat the dataset.
    num_parallel_calls: The number of records that are processed in parallel.
      This can be optimized per data set but for generally homogeneous data
      sets, should be approximately the number of available CPU cores.
  Returns:
    A dataset that can be used for iteration.
  """
    filenames = get_filenames(is_training, data_dir)
    dataset = tf.data.Dataset.from_tensor_slices(filenames)

    if is_training:
        # Shuffle the input files
        dataset = dataset.shuffle(buffer_size=_NUM_TRAIN_FILES)

    # Convert to individual records
    dataset = dataset.flat_map(tf.data.TFRecordDataset)

    return resnet.process_record_dataset(dataset, is_training, batch_size,
                                         _SHUFFLE_BUFFER, parse_record,
                                         num_epochs, num_parallel_calls)
Exemplo n.º 2
0
def input_fn(is_training,
             data,
             labels,
             batch_size,
             num_epochs=1,
             num_parallel_calls=1,
             multi_gpu=False):
    """Input function which provides batches for train or eval.
  Args:
    is_training: A boolean denoting whether the input is for training.
    data_dir: The directory containing the input data.
    batch_size: The number of samples per batch.
    num_epochs: The number of epochs to repeat the dataset.
    num_parallel_calls: The number of records that are processed in parallel.
      This can be optimized per data set but for generally homogeneous data
      sets, should be approximately the number of available CPU cores.
    multi_gpu: Whether this is run multi-GPU. Note that this is only required
      currently to handle the batch leftovers, and can be removed
      when that is handled directly by Estimator.

  Returns:
    A dataset that can be used for iteration.
  """
    dataset = tf.data.Dataset.from_tensor_slices((data, labels))

    if is_training:
        # Shuffle the input files
        dataset = dataset.shuffle(buffer_size=_NUM_TRAIN_FILES)
    num_images = len(labels)
    return resnet.process_record_dataset(dataset, is_training, batch_size,
                                         num_images, parse_record, num_epochs,
                                         num_parallel_calls)
Exemplo n.º 3
0
def input_fn(is_training,
             data_dir,
             batch_size,
             num_epochs=1,
             num_parallel_calls=1):
    """Input_fn using the tf.data input pipeline for CIFAR-10 dataset.

  Args:
    is_training: A boolean denoting whether the input is for training.
    data_dir: The directory containing the input data.
    batch_size: The number of samples per batch.
    num_epochs: The number of epochs to repeat the dataset.
    num_parallel_calls: The number of records that are processed in parallel.
      This can be optimized per data set but for generally homogeneous data
      sets, should be approximately the number of available CPU cores.

  Returns:
    A dataset that can be used for iteration.
  """
    filenames = get_filenames(is_training, data_dir)
    dataset = tf.data.FixedLengthRecordDataset(filenames, _RECORD_BYTES)

    return resnet.process_record_dataset(dataset, is_training, batch_size,
                                         _NUM_IMAGES['train'], parse_record,
                                         num_epochs, num_parallel_calls)
Exemplo n.º 4
0
def input_fn(is_training, data_dir, batch_size, num_epochs=1,
             num_parallel_calls=1):
  """Input function which provides batches for train or eval.
  Args:
    is_training: A boolean denoting whether the input is for training.
    data_dir: The directory containing the input data.
    batch_size: The number of samples per batch.
    num_epochs: The number of epochs to repeat the dataset.
    num_parallel_calls: The number of records that are processed in parallel.
      This can be optimized per data set but for generally homogeneous data
      sets, should be approximately the number of available CPU cores.

  Returns:
    A dataset that can be used for iteration.
  """
  filenames = get_filenames(is_training, data_dir)
  dataset = tf.data.Dataset.from_tensor_slices(filenames)

  if is_training:
    # Shuffle the input files
    dataset = dataset.shuffle(buffer_size=_NUM_TRAIN_FILES)

  # Convert to individual records
  dataset = dataset.flat_map(tf.data.TFRecordDataset)

  return resnet.process_record_dataset(dataset, is_training, batch_size,
      _SHUFFLE_BUFFER, parse_record, num_epochs, num_parallel_calls)
Exemplo n.º 5
0
def input_fn(is_training, data_dir, batch_size, num_epochs=1,
             num_parallel_calls=1, multi_gpu=False):
  """Input function which provides batches for train or eval.
  Args:
    is_training: A boolean denoting whether the input is for training.
    data_dir: The directory containing the input data.
    batch_size: The number of samples per batch.
    num_epochs: The number of epochs to repeat the dataset.
    num_parallel_calls: The number of records that are processed in parallel.
      This can be optimized per data set but for generally homogeneous data
      sets, should be approximately the number of available CPU cores.
    multi_gpu: Whether this is run multi-GPU. Note that this is only required
      currently to handle the batch leftovers, and can be removed
      when that is handled directly by Estimator.

  Returns:
    A dataset that can be used for iteration.
  """
  filenames = get_filenames(is_training, data_dir)
  dataset = tf.data.Dataset.from_tensor_slices(filenames)

  if is_training:
    # Shuffle the input files
    dataset = dataset.shuffle(buffer_size=_NUM_TRAIN_FILES)

  num_images = is_training and _NUM_IMAGES['train'] or _NUM_IMAGES['validation']

  # Convert to individual records
  dataset = dataset.flat_map(tf.data.TFRecordDataset)

  return resnet.process_record_dataset(
      dataset, is_training, batch_size, _SHUFFLE_BUFFER, parse_record,
      num_epochs, num_parallel_calls, examples_per_epoch=num_images,
      multi_gpu=multi_gpu)
Exemplo n.º 6
0
def input_fn(is_training, data_dir, batch_size, num_epochs=1,
             num_parallel_calls=1, multi_gpu=False):
  """Input_fn using the tf.data input pipeline for CIFAR-10 dataset.

  Args:
    is_training: A boolean denoting whether the input is for training.
    data_dir: The directory containing the input data.
    batch_size: The number of samples per batch.
    num_epochs: The number of epochs to repeat the dataset.
    num_parallel_calls: The number of records that are processed in parallel.
      This can be optimized per data set but for generally homogeneous data
      sets, should be approximately the number of available CPU cores.
    multi_gpu: Whether this is run multi-GPU. Note that this is only required
      currently to handle the batch leftovers, and can be removed
      when that is handled directly by Estimator.

  Returns:
    A dataset that can be used for iteration.
  """
  filenames = get_filenames(is_training, data_dir)
  dataset = tf.data.FixedLengthRecordDataset(filenames, _RECORD_BYTES)

  num_images = is_training and _NUM_IMAGES['train'] or _NUM_IMAGES['validation']

  return resnet.process_record_dataset(dataset, is_training, batch_size,
      _NUM_IMAGES['train'], parse_record, num_epochs, num_parallel_calls,
      examples_per_epoch=num_images, multi_gpu=multi_gpu)
Exemplo n.º 7
0
def input_fn(is_training,
             data_dir,
             channel_name,
             batch_size,
             num_epochs=1,
             num_parallel_calls=1,
             multi_gpu=False,
             mode='File'):
    """Input function which provides batches for train or eval.
  Args:
    is_training: A boolean denoting whether the input is for training.
    channel_name: The directory containing the input data.
    batch_size: The number of samples per batch.
    num_epochs: The number of epochs to repeat the dataset.
    num_parallel_calls: The number of records that are processed in parallel.
      This can be optimized per data set but for generally homogeneous data
      sets, should be approximately the number of available CPU cores.
    multi_gpu: Whether this is run multi-GPU. Note that this is only required
      currently to handle the batch leftovers, and can be removed
      when that is handled directly by Estimator.

  Returns:
    A dataset that can be used for iteration.
  """
    dataset = None
    num_images = is_training and _NUM_IMAGES['train'] or _NUM_IMAGES[
        'validation']

    if mode == 'File':
        filenames = get_filenames(data_dir)
        dataset = tf.data.Dataset.from_tensor_slices(filenames)

        # Convert to individual records
        dataset = dataset.flat_map(tf.data.TFRecordDataset)
    else:
        generator = TFRecordDatasetGenerator(channel_name)
        dataset = Dataset.from_generator(generator, tf.string)

    return resnet.process_record_dataset(dataset,
                                         is_training,
                                         batch_size,
                                         parse_record,
                                         num_epochs,
                                         num_parallel_calls,
                                         examples_per_epoch=num_images,
                                         multi_gpu=multi_gpu)
Exemplo n.º 8
0
def input_fn(is_training, data_dir, batch_size, num_epochs=1,
             num_parallel_calls=1):
  """Input_fn using the tf.data input pipeline for CIFAR-10 dataset.

  Args:
    is_training: A boolean denoting whether the input is for training.
    data_dir: The directory containing the input data.
    batch_size: The number of samples per batch.
    num_epochs: The number of epochs to repeat the dataset.
    num_parallel_calls: The number of records that are processed in parallel.
      This can be optimized per data set but for generally homogeneous data
      sets, should be approximately the number of available CPU cores.

  Returns:
    A dataset that can be used for iteration.
  """
  filenames = get_filenames(is_training, data_dir)
  dataset = tf.data.FixedLengthRecordDataset(filenames, _RECORD_BYTES)

  return resnet.process_record_dataset(dataset, is_training, batch_size,
      _NUM_IMAGES['train'], parse_record, num_epochs, num_parallel_calls)
Exemplo n.º 9
0
def input_fn(is_training,
             data_dir,
             batch_size,
             num_epochs=1,
             num_parallel_calls=1,
             multi_gpu=False):
    """Input_fn using the tf.data input pipeline for CIFAR-10 dataset.

  Args:
    is_training: A boolean denoting whether the input is for training.
    data_dir: The directory containing the input data.
    batch_size: The number of samples per batch.
    num_epochs: The number of epochs to repeat the dataset.
    num_parallel_calls: The number of records that are processed in parallel.
      This can be optimized per data set but for generally homogeneous data
      sets, should be approximately the number of available CPU cores.
    multi_gpu: Whether this is run multi-GPU. Note that this is only required
      currently to handle the batch leftovers, and can be removed
      when that is handled directly by Estimator.

  Returns:
    A dataset that can be used for iteration.
  """
    filenames = get_filenames(is_training, data_dir)
    dataset = tf.data.FixedLengthRecordDataset(filenames, _RECORD_BYTES)

    num_images = is_training and _NUM_IMAGES['train'] or _NUM_IMAGES[
        'validation']

    return resnet.process_record_dataset(dataset,
                                         is_training,
                                         batch_size,
                                         _NUM_IMAGES['train'],
                                         parse_record,
                                         num_epochs,
                                         num_parallel_calls,
                                         examples_per_epoch=num_images,
                                         multi_gpu=multi_gpu)