Exemplo n.º 1
0
def main(_):
    mode = FLAGS.tf_hub_mode
    data_dir = amoeba_net.FLAGS.data_dir
    model_dir = amoeba_net.FLAGS.model_dir
    hparams = amoeba_net.build_hparams()
    hparams.add_hparam('drop_path_burn_in_steps', 0)
    hparams.set_hparam('use_tpu', False)
    # So far, there is no standardized way of exposing aux heads for
    # fine-tuning Hub image modules. Disable aux heads to avoid putting unused
    # variables and ops into the module.
    hparams.set_hparam('use_aux_head', False)
    eval_steps = FLAGS.num_eval_images // FLAGS.eval_batch_size
    export_path = FLAGS.export_path or (model_dir + '/export')

    input_pipeline = model_lib.InputPipeline(is_training=False,
                                             data_dir=data_dir,
                                             hparams=hparams,
                                             eval_from_hub=True)

    if mode == 'eval_from_hub':
        eval_from_hub(export_path,
                      input_pipeline.input_fn,
                      eval_steps=eval_steps)
    elif mode == 'export_to_hub':
        num_classes = (None if FLAGS.export_feature_vector else
                       input_pipeline.num_classes)

        if FLAGS.dryrun_with_untrained_weights:
            checkpoint_path = None
        else:
            checkpoint_path = tf.train.latest_checkpoint(model_dir)
            if not checkpoint_path:
                raise IOError('No checkpoint found.')
        export_to_hub(checkpoint_path, export_path, num_classes, hparams)
    else:
        raise ValueError('Unsupported tf_hub_mode = {}'.format(mode))
Exemplo n.º 2
0
def main(_):
    mode = FLAGS.mode
    data_dir = FLAGS.data_dir
    model_dir = FLAGS.model_dir
    hparams = build_hparams()

    estimator_parmas = {}

    train_steps_per_epoch = int(
        math.ceil(model_lib.NUM_TRAIN_IMAGES /
                  float(hparams.train_batch_size)))
    eval_steps = model_lib.NUM_EVAL_IMAGES // hparams.eval_batch_size
    eval_batch_size = (None if mode == 'train' else hparams.eval_batch_size)

    run_config = build_run_config()

    model = model_lib.AmoebaNetEstimatorModel(hparams, model_dir)

    if hparams.use_tpu:
        image_classifier = tpu_estimator.TPUEstimator(
            model_fn=model.model_fn,
            use_tpu=True,
            config=run_config,
            params=estimator_parmas,
            train_batch_size=hparams.train_batch_size,
            eval_batch_size=eval_batch_size)
    else:
        image_classifier = tf.estimator.Estimator(model_fn=model.model_fn,
                                                  config=run_config,
                                                  params=estimator_parmas)

    # Input pipelines are slightly different (with regards to shuffling and
    # preprocessing) between training and evaluation.
    imagenet_train = model_lib.InputPipeline(is_training=True,
                                             data_dir=data_dir,
                                             hparams=hparams)
    imagenet_eval = model_lib.InputPipeline(is_training=False,
                                            data_dir=data_dir,
                                            hparams=hparams)

    if hparams.moving_average_decay < 1:
        eval_hooks = [
            model_lib.LoadEMAHook(model_dir, hparams.moving_average_decay)
        ]
    else:
        eval_hooks = []

    if mode == 'eval':
        for checkpoint in _get_next_checkpoint():
            tf.logging.info('Starting to evaluate.')
            try:
                eval_results = image_classifier.evaluate(
                    input_fn=imagenet_eval.input_fn,
                    steps=eval_steps,
                    hooks=eval_hooks,
                    checkpoint_path=checkpoint)
                tf.logging.info('Evaluation results: %s' % eval_results)
            except tf.errors.NotFoundError:
                # skip checkpoint if it gets deleted prior to evaluation
                tf.logging.info('Checkpoint %s no longer exists ... skipping')
    elif mode == 'train_and_eval':
        current_step = _load_global_step_from_checkpoint_dir(model_dir)
        tf.logging.info('Starting training at step=%d.' % current_step)
        train_steps_per_eval = hparams.num_epochs_per_eval * train_steps_per_epoch
        # Final Evaluation if training is finished.
        if current_step >= hparams.num_epochs * train_steps_per_epoch:
            eval_results = image_classifier.evaluate(
                input_fn=imagenet_eval.input_fn,
                steps=eval_steps,
                hooks=eval_hooks)
            tf.logging.info('Evaluation results: %s' % eval_results)
        while current_step < hparams.num_epochs * train_steps_per_epoch:
            image_classifier.train(input_fn=imagenet_train.input_fn,
                                   steps=train_steps_per_eval)
            current_step += train_steps_per_eval
            tf.logging.info('Starting evaluation at step=%d.' % current_step)
            eval_results = image_classifier.evaluate(
                input_fn=imagenet_eval.input_fn,
                steps=eval_steps,
                hooks=eval_hooks)
            tf.logging.info('Evaluation results: %s' % eval_results)
    else:
        current_step = _load_global_step_from_checkpoint_dir(model_dir)
        total_step = hparams.num_epochs * train_steps_per_epoch
        if current_step < total_step:
            tf.logging.info('Starting training ...')
            image_classifier.train(input_fn=imagenet_train.input_fn,
                                   steps=total_step - current_step)
Exemplo n.º 3
0
def main(_):
    mode = FLAGS.mode
    data_dir = FLAGS.data_dir
    model_dir = FLAGS.model_dir
    hparams = build_hparams()

    estimator_parmas = {}

    train_steps_per_epoch = int(
        math.ceil(hparams.num_train_images / float(hparams.train_batch_size)))
    eval_steps = hparams.num_eval_images // hparams.eval_batch_size
    eval_batch_size = (None if mode == 'train' else hparams.eval_batch_size)

    model = model_lib.AmoebaNetEstimatorModel(hparams, model_dir)

    if hparams.use_tpu:
        run_config = build_run_config()
        # Temporary treatment until flags are released.
        image_classifier = contrib_tpu.TPUEstimator(
            model_fn=model.model_fn,
            use_tpu=True,
            config=run_config,
            params=estimator_parmas,
            predict_batch_size=eval_batch_size,
            train_batch_size=hparams.train_batch_size,
            eval_batch_size=eval_batch_size,
            export_to_tpu=FLAGS.export_to_tpu)
    else:
        save_checkpoints_steps = (FLAGS.save_checkpoints_steps
                                  or FLAGS.iterations_per_loop)
        run_config = tf.estimator.RunConfig(
            model_dir=FLAGS.model_dir,
            save_checkpoints_steps=save_checkpoints_steps)
        image_classifier = tf.estimator.Estimator(model_fn=model.model_fn,
                                                  config=run_config,
                                                  params=estimator_parmas)

    # Input pipelines are slightly different (with regards to shuffling and
    # preprocessing) between training and evaluation.
    imagenet_train = model_lib.InputPipeline(is_training=True,
                                             data_dir=data_dir,
                                             hparams=hparams)
    imagenet_eval = model_lib.InputPipeline(is_training=False,
                                            data_dir=data_dir,
                                            hparams=hparams)

    if hparams.moving_average_decay < 1:
        eval_hooks = [
            model_lib.LoadEMAHook(model_dir, hparams.moving_average_decay)
        ]
    else:
        eval_hooks = []

    if mode == 'eval':
        for checkpoint in _get_next_checkpoint():
            tf.logging.info('Starting to evaluate.')
            try:
                eval_results = image_classifier.evaluate(
                    input_fn=imagenet_eval.input_fn,
                    steps=eval_steps,
                    hooks=eval_hooks,
                    checkpoint_path=checkpoint)
                tf.logging.info('Evaluation results: %s' % eval_results)
            except tf.errors.NotFoundError:
                # skip checkpoint if it gets deleted prior to evaluation
                tf.logging.info('Checkpoint %s no longer exists ... skipping')
    elif mode == 'train_and_eval':
        current_step = _load_global_step_from_checkpoint_dir(model_dir)
        tf.logging.info('Starting training at step=%d.' % current_step)
        train_steps_per_eval = int(hparams.num_epochs_per_eval *
                                   train_steps_per_epoch)
        # Final Evaluation if training is finished.
        if current_step >= hparams.num_epochs * train_steps_per_epoch:
            eval_results = image_classifier.evaluate(
                input_fn=imagenet_eval.input_fn,
                steps=eval_steps,
                hooks=eval_hooks)
            tf.logging.info('Evaluation results: %s' % eval_results)
        while current_step < hparams.num_epochs * train_steps_per_epoch:
            image_classifier.train(input_fn=imagenet_train.input_fn,
                                   steps=train_steps_per_eval)
            current_step += train_steps_per_eval
            tf.logging.info('Starting evaluation at step=%d.' % current_step)
            eval_results = image_classifier.evaluate(
                input_fn=imagenet_eval.input_fn,
                steps=eval_steps,
                hooks=eval_hooks)
            tf.logging.info('Evaluation results: %s' % eval_results)
    elif mode == 'predict':
        for checkpoint in _get_next_checkpoint():
            tf.logging.info('Starting prediction ...')
            time_hook = model_lib.SessionTimingHook()
            eval_hooks.append(time_hook)
            result_iter = image_classifier.predict(
                input_fn=imagenet_eval.input_fn,
                hooks=eval_hooks,
                checkpoint_path=checkpoint,
                yield_single_examples=False)
            results = list(itertools.islice(result_iter, eval_steps))
            tf.logging.info('Inference speed = {} images per second.'.format(
                time_hook.compute_speed(len(results) * eval_batch_size)))
    elif mode == 'train':
        current_step = _load_global_step_from_checkpoint_dir(model_dir)
        total_step = int(hparams.num_epochs * train_steps_per_epoch)
        if current_step < total_step:
            tf.logging.info('Starting training ...')
            image_classifier.train(input_fn=imagenet_train.input_fn,
                                   steps=total_step - current_step)
    else:
        tf.logging.info('Mode not found.')

    if FLAGS.export_dir is not None:
        tf.logging.info('Starting exporting saved model ...')
        serving_shape = [hparams.image_size, hparams.image_size, 3]
        export_path = image_classifier.export_saved_model(
            export_dir_base=FLAGS.export_dir,
            serving_input_receiver_fn=build_image_serving_input_receiver_fn(
                serving_shape),
            as_text=True)
        if FLAGS.add_warmup_requests:
            inference_warmup.write_warmup_requests(
                export_path,
                FLAGS.model_name,
                hparams.image_size,
                batch_sizes=FLAGS.inference_batch_sizes)
Exemplo n.º 4
0
def main(_):
    mode = FLAGS.mode
    data_dir = FLAGS.data_dir
    model_dir = FLAGS.model_dir
    hparams = build_hparams()

    estimator_parmas = {}

    train_steps_per_epoch = int(
        math.ceil(hparams.num_train_images / float(hparams.train_batch_size)))
    eval_steps = hparams.num_eval_images // hparams.eval_batch_size
    eval_batch_size = (None if mode == 'train' else hparams.eval_batch_size)

    model = slice_model_lib.AmoebaNetEstimatorModel(hparams, model_dir)

    save_checkpoints_steps = (FLAGS.save_checkpoints_steps
                              or FLAGS.iterations_per_loop)
    prepare_tf_config()
    #  rewrite_options = rewriter_config_pb2.RewriterConfig(
    #      layout_optimizer=rewriter_config_pb2.RewriterConfig.OFF)
    #  graph_options = config_pb2.GraphOptions(rewrite_options=rewrite_options)
    session_config = tf.ConfigProto(
        #          graph_options=graph_options,
        allow_soft_placement=True,
        log_device_placement=False,
        gpu_options=tf.GPUOptions(allow_growth=True))
    if FLAGS.cross_pipeline:
        cluster_manager = cluster_utils.get_cluster_manager(
            config_proto=session_config)
    run_config = tf.estimator.RunConfig(
        log_step_count_steps=100,
        session_config=session_config,
        save_checkpoints_steps=save_checkpoints_steps)
    image_classifier = tf.estimator.Estimator(model_fn=model.model_fn,
                                              config=run_config,
                                              params=estimator_parmas)

    # Input pipelines are slightly different (with regards to shuffling and
    # preprocessing) between training and evaluation.
    imagenet_train = model_lib.InputPipeline(is_training=True,
                                             data_dir=data_dir,
                                             hparams=hparams)
    imagenet_eval = model_lib.InputPipeline(is_training=False,
                                            data_dir=data_dir,
                                            hparams=hparams)

    if hparams.moving_average_decay < 1:
        eval_hooks = [
            model_lib.LoadEMAHook(model_dir, hparams.moving_average_decay)
        ]
    else:
        eval_hooks = []

    if mode == 'eval':
        for checkpoint in _get_next_checkpoint():
            tf.logging.info('Starting to evaluate.')
            try:
                eval_results = image_classifier.evaluate(
                    input_fn=imagenet_eval.input_fn,
                    steps=eval_steps,
                    hooks=eval_hooks,
                    checkpoint_path=checkpoint)
                tf.logging.info('Evaluation results: %s' % eval_results)
            except tf.errors.NotFoundError:
                # skip checkpoint if it gets deleted prior to evaluation
                tf.logging.info('Checkpoint %s no longer exists ... skipping')
    elif mode == 'train_and_eval':
        current_step = _load_global_step_from_checkpoint_dir(model_dir)
        tf.logging.info('Starting training at step=%d.' % current_step)
        train_steps_per_eval = int(hparams.num_epochs_per_eval *
                                   train_steps_per_epoch)
        # Final Evaluation if training is finished.
        if current_step >= hparams.num_epochs * train_steps_per_epoch:
            eval_results = image_classifier.evaluate(
                input_fn=imagenet_eval.input_fn,
                steps=eval_steps,
                hooks=eval_hooks)
            tf.logging.info('Evaluation results: %s' % eval_results)
        while current_step < hparams.num_epochs * train_steps_per_epoch:
            image_classifier.train(input_fn=imagenet_train.input_fn,
                                   steps=train_steps_per_eval)
            current_step += train_steps_per_eval
            tf.logging.info('Starting evaluation at step=%d.' % current_step)
            eval_results = image_classifier.evaluate(
                input_fn=imagenet_eval.input_fn,
                steps=eval_steps,
                hooks=eval_hooks)
            tf.logging.info('Evaluation results: %s' % eval_results)
    elif mode == 'predict':
        for checkpoint in _get_next_checkpoint():
            tf.logging.info('Starting prediction ...')
            time_hook = model_lib.SessionTimingHook()
            eval_hooks.append(time_hook)
            result_iter = image_classifier.predict(
                input_fn=imagenet_eval.input_fn,
                hooks=eval_hooks,
                checkpoint_path=checkpoint,
                yield_single_examples=False)
            results = list(itertools.islice(result_iter, eval_steps))
            tf.logging.info('Inference speed = {} images per second.'.format(
                time_hook.compute_speed(len(results) * eval_batch_size)))
    elif mode == 'train':
        current_step = _load_global_step_from_checkpoint_dir(model_dir)
        total_step = int(hparams.num_epochs * train_steps_per_epoch)
        if current_step < total_step:
            tf.logging.info('Starting training ...')
            image_classifier.train(input_fn=imagenet_train.input_fn,
                                   steps=min(total_step - current_step,
                                             FLAGS.max_steps))
    else:
        tf.logging.info('Mode not found.')