예제 #1
0
 def test_batch_size(self):
     self.assertEquals(
         distribution_utils.per_device_batch_size(147, num_gpus=0), 147)
     self.assertEquals(
         distribution_utils.per_device_batch_size(147, num_gpus=1), 147)
     self.assertEquals(
         distribution_utils.per_device_batch_size(147, num_gpus=7), 21)
def get_3d_multi_records_dataset_for_eval(image_filenames, hist_filenames, mode, args):
    flat_fn = (partial(_flat_map_fn, mode=mode, args=args)
               if args.input_group == 1 else
               partial(_flat_map_fn_multi_channels, mode=mode, args=args))
    augment_fn = (partial(data_processing_eval_while_train, args=args)
                  if mode == "eval_while_train" else
                  partial(data_processing_eval, args=args))

    def parse_image_hist(*examples):
        features, labels = parse_3d_example_proto(examples[0], args=args)
        hists = parse_hist_example_proto(examples[1], nd=3, args=args, pad=features["pads"])
        features["density_hists"] = hists["hists"]
        return ((features, labels) if mode == "eval_while_train" else
                _before_flat_fn_eval(features, labels, args=args))

    if len(image_filenames) != len(hist_filenames):
        raise ValueError("Image and bbox shape mismatch: {} vs {}"
                         .format(len(image_filenames), len(hist_filenames)))

    dataset = Dataset.zip((tf.data.TFRecordDataset(image_filenames[0]),
                           tf.data.TFRecordDataset(hist_filenames[0])))
    for image_filename, hist_filename in zip(image_filenames[1:], hist_filenames[1:]):
        dataset = dataset.concatenate(Dataset.zip((tf.data.TFRecordDataset(image_filename),
                                                   tf.data.TFRecordDataset(hist_filename))))
    if args.eval_skip_num:
        dataset = dataset.skip(args.eval_skip_num)

    bs = distribution_utils.per_device_batch_size(args.batch_size, args.num_gpus)
    dataset = (dataset.map(parse_image_hist, num_parallel_calls=2)
               .flat_map(flat_fn)
               .apply(tf.data.experimental.map_and_batch(augment_fn, bs,
                                                         num_parallel_batches=1))
               .prefetch(buffer_size=200))

    return dataset
def get_2d_multi_records_dataset_for_train(image_filenames, hist_filenames, args):
    filter_fn = partial(filter_slices, args=args, strategy="area", size=args.filter_size)

    def parse_image_hist(*examples):
        features, labels = parse_2d_example_proto(examples[0], mode=ModeKeys.TRAIN, args=args)
        hists = parse_hist_example_proto(examples[1], nd=2, args=args)
        features["density_hists"] = hists["hists"]
        return data_augmentation(features, labels, args=args)

    if len(image_filenames) != len(hist_filenames):
        raise ValueError("Image and bbox shape mismatch: {} vs {}"
                         .format(len(image_filenames), len(hist_filenames)))
    if len(image_filenames) > 1:
        dataset = (Dataset.from_tensor_slices(list(zip(image_filenames, hist_filenames)))
                   .shuffle(buffer_size=len(image_filenames), seed=SEED_FILE)
                   .interleave(lambda x: (Dataset.zip((tf.data.TFRecordDataset(x[0]),   # For image/label
                                                       tf.data.TFRecordDataset(x[1])))),    # For histogram
                               cycle_length=len(image_filenames),
                               block_length=BLOCK_LENGTH))
    else:
        dataset = Dataset.zip((tf.data.TFRecordDataset(image_filenames[0]),
                               tf.data.TFRecordDataset(hist_filenames[0])))

    bs = distribution_utils.per_device_batch_size(args.batch_size, args.num_gpus)
    dataset = (dataset.filter(filter_fn)
               .prefetch(buffer_size=bs)
               .shuffle(buffer_size=SHUFFLE_BUFFER_SIZE, seed=SEED_BATCH)
               .repeat(count=None)
               .apply(tf.data.experimental.map_and_batch(parse_image_hist, bs,
                                                         num_parallel_batches=1))
               .prefetch(buffer_size=contrib_data.AUTOTUNE))

    return dataset
예제 #4
0
def test(_):
    tf.enable_eager_execution()
    flag_obj = define_coco_flags()
    cocodataset = coco_dataset.CocoDataset()

    cocodataset.load_coco('/home/hume/Deep-learning/dataset/coco', 'train',
                          DEFAULT_DATASET_YEAR)
    cocodataset.prepare()

    augmentation = imgaug.augmenters.Fliplr(0.5)

    input_iter = input_fn(
        cocodataset,
        is_training=True,
        batch_size=distribution_utils.per_device_batch_size(
            flag_obj.batch_size, flags_core.get_num_gpus(flag_obj)),
        anchors_path=flag_obj.anchors_path,
        num_epochs=flag_obj.train_epochs,
        dtype=tf.float32,
        max_num_boxes_per_image=flag_obj.max_num_boxes_per_image,
        image_size=flag_obj.image_size,
        augmentation=augmentation,
        num_parallel_batches=flag_obj.datasets_num_parallel_batches,
        datasets_num_private_threads=multiprocessing.cpu_count() - 3)
    coco_iter = input_iter.make_one_shot_iterator()
    starttime = time()
    imgs, y_gt = coco_iter.get_next()
    print('cost {}ms\n'.format((time() - starttime) * 1000))

    print(imgs.shape)
    print(y_gt.shape)
예제 #5
0
 def input_fn_eval():
     return input_function(
         is_training=False,
         data_dir=flags_obj.data_dir,
         batch_size=distribution_utils.per_device_batch_size(
             flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)),
         num_epochs=1,
         dtype=flags_core.get_tf_dtype(flags_obj))
예제 #6
0
    def __init__(self, args, name=None):
        """ Don't create ops/tensors in __init__() """
        super(UNet, self).__init__(args)
        self.name = name or "UNet"
        self.classes.extend(self.args.classes)

        self.bs = distribution_utils.per_device_batch_size(
            args.batch_size, args.num_gpus)
        self.height = args.im_height
        self.width = args.im_width
        self.channel = args.im_channel
예제 #7
0
 def input_fn_train(num_epochs):
     return input_function(
         data_set=dataset,
         is_training=True,
         batch_size=distribution_utils.per_device_batch_size(
             flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)),
         anchors_path=flags_obj.anchors_path,
         num_epochs=num_epochs,
         augmentation=augmentation,
         dtype=tf.float32,
         max_num_boxes_per_image=flags_obj.max_num_boxes_per_image,
         image_size=flags_obj.image_size,
         datasets_num_private_threads=flags_obj.
         datasets_num_private_threads,
         num_parallel_batches=flags_obj.datasets_num_parallel_batches)
예제 #8
0
def get_2d_multi_records_dataset_for_eval(file_names, mode, args):
    dataset = tf.data.TFRecordDataset(file_names[0])
    for file_name in file_names[1:]:
        dataset = dataset.concatenate(tf.data.TFRecordDataset(file_name))

    def parse_fn(example_proto):
        features, labels = parse_2d_example_proto(example_proto,
                                                  mode=mode,
                                                  args=args)
        return data_processing_eval_while_train(features, labels, args=args)

    bs = distribution_utils.per_device_batch_size(args.batch_size,
                                                  args.num_gpus)
    dataset = (dataset.apply(
        tf.data.experimental.map_and_batch(
            parse_fn, bs, num_parallel_batches=1,
            drop_remainder=True)).prefetch(buffer_size=contrib_data.AUTOTUNE))

    return dataset
예제 #9
0
def get_2d_multi_records_dataset_for_train(file_names, args):
    """
    Generate tf.data.Dataset from tf-record file for training

    Parameters
    ----------
    file_names: list or tuple
        A list of tf-record file names
    args: ArgumentParser
        Used arguments: batch_size, zoom, zoom_scale, noise, noise_scale, w_width, w_level

    Returns
    -------
    A tf.data.Dataset instance

    """
    parse_fn = partial(data_augmentation, args=args)
    filter_fn = partial(filter_slices,
                        args=args,
                        strategy="area",
                        size=args.filter_size)

    if len(file_names) > 1:
        dataset = (Dataset.from_tensor_slices(file_names).shuffle(
            buffer_size=len(file_names), seed=SEED_FILE).apply(
                tf.data.experimental.parallel_interleave(
                    tf.data.TFRecordDataset, cycle_length=len(file_names))))
    else:
        dataset = tf.data.TFRecordDataset(file_names[0])

    bs = distribution_utils.per_device_batch_size(args.batch_size,
                                                  args.num_gpus)
    dataset = (dataset.filter(filter_fn).prefetch(buffer_size=bs).shuffle(
        buffer_size=SHUFFLE_BUFFER_SIZE,
        seed=SEED_BATCH).repeat(count=None).apply(
            tf.data.experimental.map_and_batch(
                parse_fn, bs, num_parallel_batches=1)).prefetch(
                    buffer_size=contrib_data.AUTOTUNE))

    return dataset
예제 #10
0
    def __init__(self):

        anchors = utils.get_anchors(flags.FLAGS.anchors_path)
        num_anchors = len(anchors)
        anchors = np.array(anchors, dtype=np.float32)

        super(CocoModel, self).__init__(
            image_size=flags.FLAGS.image_size,
            image_channels=flags.FLAGS.image_channels,
            num_classes=flags.FLAGS.num_classes,
            anchors=anchors,
            batch_size=distribution_utils.per_device_batch_size(
                flags.FLAGS.batch_size, flags_core.get_num_gpus(flags.FLAGS)),
            num_anchors=num_anchors,
            learning_rate=flags.FLAGS.learning_rate,
            backbone=flags.FLAGS.backbone,
            norm=flags.FLAGS.norm,
            threshold=flags.FLAGS.threshold,
            max_num_boxes_per_image=flags.FLAGS.max_num_boxes_per_image,
            confidence_score=flags.FLAGS.confidence_score,
            data_format=flags.FLAGS.data_format,
            dtype=flags_core.get_tf_dtype(flags.FLAGS))
예제 #11
0
def get_3d_multi_records_dataset_for_eval(file_names, mode, args):
    parse_fn = partial(parse_3d_example_proto, args=args)
    flat_fn = partial(_flat_map_fn_multi_channels, mode=mode, args=args)
    augment_fn = (partial(data_processing_eval_while_train, args=args) if mode
                  == "eval_while_train" else partial(data_processing_eval,
                                                     args=args))

    dataset = tf.data.TFRecordDataset(file_names[0])
    for file_name in file_names[1:]:
        dataset = dataset.concatenate(tf.data.TFRecordDataset(file_name))
    if args.eval_skip_num:
        dataset = dataset.skip(args.eval_skip_num)

    bs = distribution_utils.per_device_batch_size(args.batch_size,
                                                  args.num_gpus)
    dataset = (dataset.map(parse_fn,
                           num_parallel_calls=2).flat_map(flat_fn).apply(
                               tf.data.experimental.map_and_batch(
                                   augment_fn, bs,
                                   num_parallel_batches=1)).prefetch(
                                       buffer_size=contrib_data.AUTOTUNE))

    return dataset
def get_2d_multi_records_dataset_for_eval(image_filenames, hist_filenames, mode, args):
    def parse_image_hist(*examples):
        features, labels = parse_2d_example_proto(examples[0], mode=mode, args=args)
        hists = parse_hist_example_proto(examples[1], nd=2, args=args)
        features["density_hists"] = hists["hists"]
        return data_processing_eval_while_train(features, labels, args=args)

    if len(image_filenames) != len(hist_filenames):
        raise ValueError("Image and bbox shape mismatch: {} vs {}"
                         .format(len(image_filenames), len(hist_filenames)))

    dataset = Dataset.zip((tf.data.TFRecordDataset(image_filenames[0]),
                           tf.data.TFRecordDataset(hist_filenames[0])))
    for image_filename, hist_filename in zip(image_filenames[1:], hist_filenames[1:]):
        dataset = dataset.concatenate(Dataset.zip((tf.data.TFRecordDataset(image_filename),
                                                   tf.data.TFRecordDataset(hist_filename))))

    bs = distribution_utils.per_device_batch_size(args.batch_size, args.num_gpus)
    dataset = (dataset.apply(tf.data.experimental.map_and_batch(parse_image_hist, bs,
                                                                num_parallel_batches=1,
                                                                drop_remainder=True))
               .prefetch(buffer_size=contrib_data.AUTOTUNE))

    return dataset
def run_transformer(flags_obj):
  """Create tf.Estimator to train and evaluate transformer model.

  Args:
    flags_obj: Object containing parsed flag values.
  """
  num_gpus = flags_core.get_num_gpus(flags_obj)

  # Add flag-defined parameters to params object
  params = PARAMS_MAP[flags_obj.param_set]
  if num_gpus > 1:
    if flags_obj.param_set == "big":
      params = model_params.BIG_MULTI_GPU_PARAMS
    elif flags_obj.param_set == "base":
      params = model_params.BASE_MULTI_GPU_PARAMS

  params["data_dir"] = flags_obj.data_dir
  params["model_dir"] = flags_obj.model_dir
  params["num_parallel_calls"] = flags_obj.num_parallel_calls

  params["tpu"] = flags_obj.tpu
  params["use_tpu"] = bool(flags_obj.tpu)  # was a tpu specified.
  params["static_batch"] = flags_obj.static_batch or params["use_tpu"]
  params["allow_ffn_pad"] = not params["use_tpu"]

  params["use_synthetic_data"] = flags_obj.use_synthetic_data

  # Set batch size parameter, which depends on the availability of
  # TPU and GPU, and distribution settings.
  params["batch_size"] = (flags_obj.batch_size or (
      params["default_batch_size_tpu"] if params["use_tpu"]
      else params["default_batch_size"]))

  if not params["use_tpu"]:
    params["batch_size"] = distribution_utils.per_device_batch_size(
        params["batch_size"], num_gpus)

  schedule_manager = schedule.Manager(
      train_steps=flags_obj.train_steps,
      steps_between_evals=flags_obj.steps_between_evals,
      train_epochs=flags_obj.train_epochs,
      epochs_between_evals=flags_obj.epochs_between_evals,
      default_train_epochs=DEFAULT_TRAIN_EPOCHS,
      batch_size=params["batch_size"],
      max_length=params["max_length"],
      use_tpu=params["use_tpu"],
      num_tpu_shards=flags_obj.num_tpu_shards
  )

  params["repeat_dataset"] = schedule_manager.repeat_dataset

  model_helpers.apply_clean(flags.FLAGS)

  # Create hooks that log information about the training and metric values
  train_hooks = hooks_helper.get_train_hooks(
      flags_obj.hooks,
      model_dir=flags_obj.model_dir,
      tensors_to_log=TENSORS_TO_LOG,  # used for logging hooks
      batch_size=schedule_manager.batch_size,  # for ExamplesPerSecondHook
      use_tpu=params["use_tpu"]  # Not all hooks can run with TPUs
  )
  benchmark_logger = logger.get_benchmark_logger()
  benchmark_logger.log_run_info(
      model_name="transformer",
      dataset_name="wmt_translate_ende",
      run_params=params,
      test_id=flags_obj.benchmark_test_id)

  # Train and evaluate transformer model
  estimator = construct_estimator(flags_obj, params, schedule_manager)
  run_loop(
      estimator=estimator,
      # Training arguments
      schedule_manager=schedule_manager,
      train_hooks=train_hooks,
      benchmark_logger=benchmark_logger,
      # BLEU calculation arguments
      bleu_source=flags_obj.bleu_source,
      bleu_ref=flags_obj.bleu_ref,
      bleu_threshold=flags_obj.stop_threshold,
      vocab_file=flags_obj.vocab_file)

  if flags_obj.export_dir and not params["use_tpu"]:
    serving_input_fn = export.build_tensor_serving_input_receiver_fn(
        shape=[None], dtype=tf.int64, batch_size=None)
    # Export saved model, and save the vocab file as an extra asset. The vocab
    # file is saved to allow consistent input encoding and output decoding.
    # (See the "Export trained model" section in the README for an example of
    # how to use the vocab file.)
    # Since the model itself does not use the vocab file, this file is saved as
    # an extra asset rather than a core asset.
    estimator.export_savedmodel(
        flags_obj.export_dir, serving_input_fn,
        assets_extra={"vocab.txt": flags_obj.vocab_file})
예제 #14
0
 def test_batch_size_with_remainder(self):
     with self.assertRaises(ValueError):
         distribution_utils.per_device_batch_size(147, num_gpus=5)
예제 #15
0
def yolo_main(flags_obj, model_function, input_function, dataset,
              augmentation):
    """Shared main loop for yolo Models.

    Args:
        flags_obj: An object containing parsed flags. See define_yolo_flags()
            for details.
        model_function: the function that instantiates the Model and builds the
            ops for train/eval. This will be passed directly into the estimator.
        input_function: the function that processes the dataset and returns a
            dataset that the estimator can train on. This will be wrapped with
            all the relevant flags for running and passed to estimator.
        dataset: A dataset for training and evaluation.
        augmentation: Optional. An imgaug (https://github.com/aleju/imgaug) augmentation.
            For example, passing imgaug.augmenters.Fliplr(0.5) flips images
            right/left 50% of the time.
      """

    model_helpers.apply_clean(flags_obj)

    # Ensures flag override logic is only executed if explicitly triggered.
    if flags_obj.tf_gpu_thread_mode:
        override_flags_and_set_envars_for_gpu_thread_pool(flags_obj)

    # Creates session config. allow_soft_placement = True, is required for
    # multi-GPU and is not harmful for other modes.
    session_config = tf.ConfigProto(
        log_device_placement=True,
        inter_op_parallelism_threads=flags_obj.inter_op_parallelism_threads,
        intra_op_parallelism_threads=flags_obj.intra_op_parallelism_threads,
        allow_soft_placement=True)

    session_config.gpu_options.allow_growth = True

    distribution_strategy = distribution_utils.get_distribution_strategy(
        flags_core.get_num_gpus(flags_obj), flags_obj.all_reduce_alg)

    run_config = tf.estimator.RunConfig(train_distribute=distribution_strategy,
                                        session_config=session_config,
                                        save_checkpoints_secs=60 * 60 * 24)

    # Initializes model with all but the dense layer from pretrained ResNet.
    if flags_obj.pretrained_model_checkpoint_path is not None:
        warm_start_settings = tf.estimator.WarmStartSettings(
            flags_obj.pretrained_model_checkpoint_path,
            vars_to_warm_start='^(?!.*dense)')
    else:
        warm_start_settings = None

    anchors = np.array(utils.get_anchors(flags_obj.anchors_path))

    detector = tf.estimator.Estimator(
        model_fn=model_function,
        model_dir=flags_obj.model_dir,
        config=run_config,
        warm_start_from=warm_start_settings,
        params={
            'num_classes':
            flags_obj.num_classes,
            'data_format':
            flags_obj.data_format,
            'batch_size':
            distribution_utils.per_device_batch_size(
                flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)),
            'image_size':
            int(flags_obj.image_size),
            'loss_scale':
            flags_core.get_loss_scale(flags_obj),
            'dtype':
            flags_core.get_tf_dtype(flags_obj),
            'fine_tune':
            flags_obj.fine_tune,
            'anchors':
            anchors,
            'num_anchors':
            len(anchors),
            'max_num_boxes_per_image':
            flags_obj.max_num_boxes_per_image,
            'threshold':
            flags_obj.threshold,
            'train':
            dataset.num_images,
            'learning_rate':
            flags_obj.learning_rate
        })

    # if flags_obj.use_synthetic_data:
    #     dataset_name = dataset_name + '-synthetic'

    def input_fn_train(num_epochs):
        return input_function(
            data_set=dataset,
            is_training=True,
            batch_size=distribution_utils.per_device_batch_size(
                flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)),
            anchors_path=flags_obj.anchors_path,
            num_epochs=num_epochs,
            augmentation=augmentation,
            dtype=tf.float32,
            max_num_boxes_per_image=flags_obj.max_num_boxes_per_image,
            image_size=flags_obj.image_size,
            datasets_num_private_threads=flags_obj.
            datasets_num_private_threads,
            num_parallel_batches=flags_obj.datasets_num_parallel_batches)

    '''
    def input_fn_eval():
        return input_function(
            is_training=False,
            data_dir=flags_obj.data_dir,
            batch_size=distribution_utils.per_device_batch_size(
                flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)),
            num_epochs=1,
            dtype=flags_core.get_tf_dtype(flags_obj))
            '''

    if flags_obj.eval_only or not flags_obj.train_epochs:
        # If --eval_only is set, perform a single loop with zero train epochs.
        schedule, n_loops = [0], 1
    else:
        # Compute the number of times to loop while training. All but the last
        # pass will train for `epochs_between_evals` epochs, while the last will
        # train for the number needed to reach `training_epochs`. For instance if
        #   train_epochs = 25 and epochs_between_evals = 10
        # schedule will be set to [10, 10, 5]. That is to say, the loop will:
        #   Train for 10 epochs and then evaluate.
        #   Train for another 10 epochs and then evaluate.
        #   Train for a final 5 epochs (to reach 25 epochs) and then evaluate.
        n_loops = math.ceil(flags_obj.train_epochs /
                            flags_obj.epochs_between_evals)
        schedule = [
            flags_obj.epochs_between_evals for _ in range(int(n_loops))
        ]
        schedule[-1] = flags_obj.train_epochs - sum(
            schedule[:-1])  # over counting.

    for cycle_index, num_train_epochs in enumerate(schedule):
        tf.logging.info('Starting cycle: %d/%d', cycle_index, int(n_loops))

        if num_train_epochs:
            detector.train(input_fn=lambda: input_fn_train(num_train_epochs),
                           max_steps=flags_obj.max_train_steps)
    '''