예제 #1
0
def define_mnist_flags():
    flags.DEFINE_integer('eval_secs', 10,
                         'How frequently to run evaluation step')
    flags.DEFINE_integer('ckpt_steps', 100,
                         'How frequently to save a model checkpoin')
    flags.DEFINE_integer('max_ckpts', 2,
                         'Maximum number of checkpoints to keep')
    flags.DEFINE_integer('max_steps', os.environ.get('MAX_STEPS', 100),
                         'Max steps')
    flags.DEFINE_integer('save_summary_steps', 10,
                         'How frequently to save TensorBoard summaries')
    flags.DEFINE_integer('log_step_count_steps', 10,
                         'How frequently to log loss & global steps/s')
    flags_core.define_base()
    flags_core.define_performance(num_parallel_calls=False)
    flags_core.define_image()
    data_dir = os.path.abspath(
        os.environ.get('PS_JOBSPACE', os.getcwd()) + '/data')
    model_dir = os.path.abspath(
        os.environ.get('PS_MODEL_PATH',
                       os.getcwd() + '/models') + '/mnist')
    export_dir = os.path.abspath(
        os.environ.get('PS_MODEL_PATH',
                       os.getcwd() + '/models'))
    flags.adopt_module_key_flags(flags_core)
    flags_core.set_defaults(
        data_dir=data_dir,
        model_dir=model_dir,
        export_dir=export_dir,
        train_epochs=int(os.environ.get('TRAIN_EPOCHS', 3)),
        epochs_between_evals=int(os.environ.get('EPOCHS_EVAL', 5)),
        batch_size=int(os.environ.get('BATCH_SIZE', 100)),
    )
예제 #2
0
def define_mnist_flags():
    flags_core.define_base()
    flags_core.define_image()
    flags.adopt_module_key_flags(flags_core)
    flags_core.set_defaults(data_dir='/tmp/mnist_data',
                            model_dir='/tmp/mnist_model',
                            batch_size=100,
                            train_epochs=40)
예제 #3
0
def define_mnist_flags():
    flags_core.define_base()
    flags_core.define_performance(num_parallel_calls=False)
    flags_core.define_image()
    flags.adopt_module_key_flags(flags_core)
    flags_core.set_defaults(data_dir='/tmp/mnist_data',
                            model_dir='/tmp/mnist_model',
                            batch_size=100,
                            train_epochs=40)
예제 #4
0
def define_mnist_flags():
    flags_core.define_base()
    flags_core.define_performance(num_parallel_calls=False)
    flags_core.define_image()
    flags.adopt_module_key_flags(flags_core)
    flags_core.set_defaults(
        data_dir=
        "/workspace/zigangzhao/TensoFlowBDD/models/official/mnist/mnist_data",
        model_dir=
        "/workspace/zigangzhao/TensoFlowBDD/models/official/mnist/mnist_model/",
        batch_size=100,
        train_epochs=40)
예제 #5
0
def define_mnist_flags():
    flags_core.define_base()
    flags_core.define_performance(num_parallel_calls=False)
    flags_core.define_image()
    data_dir = os.path.abspath(os.environ.get('PS_JOBSPACE', os.getcwd()) + '/data')
    model_dir = os.path.abspath(os.environ.get('PS_MODEL_PATH', os.getcwd() + '/models') + '/mnist')
    flags.adopt_module_key_flags(flags_core)
    flags_core.set_defaults(data_dir=data_dir,
                            model_dir=model_dir,
                            export_dir=os.environ.get('PS_MODEL_PATH', os.getcwd() + '/models'),
                            batch_size=int(os.environ.get('batch_size', 100)),
                            epochs_between_evals=20,
                            train_epochs=int(os.environ.get('train_epochs', 40)))
예제 #6
0
def define_flags():
    flags_core.define_base(clean=True,
                           num_gpu=False,
                           stop_threshold=True,
                           hooks=True,
                           train_epochs=True,
                           epochs_between_evals=True)
    flags_core.define_performance(num_parallel_calls=True,
                                  inter_op=True,
                                  intra_op=True,
                                  dynamic_loss_scale=True,
                                  loss_scale=True,
                                  synthetic_data=True,
                                  dtype=True)
    flags_core.define_image()
    flags_core.define_benchmark()
예제 #7
0
def define_resnet_flags(resnet_size_choices=None):
    """Add flags and validators for ResNet."""
    flags_core.define_base()
    flags_core.define_performance(num_parallel_calls=False)
    flags_core.define_image()
    flags_core.define_benchmark()
    flags.adopt_module_key_flags(flags_core)

    flags.DEFINE_enum(
        name='resnet_version',
        short_name='rv',
        default='1',
        enum_values=['1', '2'],
        help=flags_core.help_wrap(
            'Version of ResNet. (1 or 2) See README.md for details.'))
    flags.DEFINE_bool(
        name='fine_tune',
        short_name='ft',
        default=False,
        help=flags_core.help_wrap(
            'If True do not train any parameters except for the final layer.'))
    flags.DEFINE_string(
        name='pretrained_model_checkpoint_path',
        short_name='pmcp',
        default=None,
        help=flags_core.help_wrap(
            'If not None initialize all the network except the final layer with '
            'these values'))
    flags.DEFINE_boolean(name='eval_only',
                         default=False,
                         help=flags_core.help_wrap(
                             'Skip training and only perform evaluation on '
                             'the latest checkpoint.'))

    choice_kwargs = dict(
        name='resnet_size',
        short_name='rs',
        default='50',
        help=flags_core.help_wrap('The size of the ResNet model to use.'))

    if resnet_size_choices is None:
        flags.DEFINE_string(**choice_kwargs)
    else:
        flags.DEFINE_enum(enum_values=resnet_size_choices, **choice_kwargs)
예제 #8
0
def define_mnist_eager_flags():
    """Defined flags and defaults for MNIST in eager mode."""
    flags_core.define_base_eager()
    flags_core.define_image()
    flags.adopt_module_key_flags(flags_core)

    flags.DEFINE_integer(
        name='log_interval',
        short_name='li',
        default=10,
        help=flags_core.help_wrap('batches between logging training status'))

    flags.DEFINE_string(
        name='output_dir',
        short_name='od',
        default='/tmp/tensorflow/mnist/',
        help=flags_core.help_wrap('Directory to write TensorBoard summaries'))

    flags.DEFINE_float(name='learning_rate',
                       short_name='lr',
                       default=0.01,
                       help=flags_core.help_wrap('Learning rate.'))

    flags.DEFINE_float(name='momentum',
                       short_name='m',
                       default=0.5,
                       help=flags_core.help_wrap('SGD momentum.'))

    flags.DEFINE_bool(name='no_gpu',
                      short_name='nogpu',
                      default=False,
                      help=flags_core.help_wrap(
                          'disables GPU usage even if a GPU is available'))

    flags_core.set_defaults(
        data_dir='/tmp/tensorflow/mnist/input_data',
        model_dir='/tmp/tensorflow/mnist/checkpoints/',
        batch_size=100,
        train_epochs=10,
    )
예제 #9
0
def define_resnet_flags(resnet_size_choices=None):
    """Add flags and validators for ResNet."""
    flags_core.define_base()
    flags_core.define_performance(num_parallel_calls=False)
    flags_core.define_image()
    flags_core.define_benchmark()
    flags.adopt_module_key_flags(flags_core)

    flags.DEFINE_enum(
        name='resnet_version',
        short_name='rv',
        default='2',
        enum_values=['1', '2'],
        help=flags_core.help_wrap(
            'Version of ResNet. (1 or 2) See README.md for details.'))

    choice_kwargs = dict(
        name='resnet_size',
        short_name='rs',
        default='50',
        help=flags_core.help_wrap('The size of the ResNet model to use.'))

    if resnet_size_choices is None:
        flags.DEFINE_string(**choice_kwargs)
    else:
        flags.DEFINE_enum(enum_values=resnet_size_choices, **choice_kwargs)

    # The current implementation of ResNet v1 is numerically unstable when run
    # with fp16 and will produce NaN errors soon after training begins.
    msg = ('ResNet version 1 is not currently supported with fp16. '
           'Please use version 2 instead.')

    @flags.multi_flags_validator(['dtype', 'resnet_version'], message=msg)
    def _forbid_v1_fp16(flag_values):  # pylint: disable=unused-variable
        return (flags_core.DTYPE_MAP[flag_values['dtype']][0] != tf.float16
                or flag_values['resnet_version'] != '1')
예제 #10
0
def define_flags():
    flags_core.define_base(num_gpu=False)
    flags_core.define_performance(dynamic_loss_scale=True, loss_scale=True)
    flags_core.define_image()
    flags_core.define_benchmark()
예제 #11
0
파일: common.py 프로젝트: zeta1999/autodist
def define_keras_flags(dynamic_loss_scale=True):
    """Define flags for Keras models."""
    flags_core.define_base(clean=True,
                           num_gpu=True,
                           run_eagerly=True,
                           train_epochs=True,
                           epochs_between_evals=True,
                           distribution_strategy=True)
    flags_core.define_performance(num_parallel_calls=False,
                                  synthetic_data=True,
                                  dtype=True,
                                  all_reduce_alg=True,
                                  num_packs=True,
                                  tf_gpu_thread_mode=True,
                                  datasets_num_private_threads=True,
                                  dynamic_loss_scale=dynamic_loss_scale,
                                  loss_scale=True,
                                  fp16_implementation=True,
                                  tf_data_experimental_slack=True,
                                  enable_xla=True,
                                  force_v2_in_keras_compile=True,
                                  training_dataset_cache=True)
    flags_core.define_image()
    flags_core.define_benchmark()
    flags_core.define_distribution()
    flags.adopt_module_key_flags(flags_core)

    flags.DEFINE_boolean(name='enable_eager',
                         default=False,
                         help='Enable eager?')
    flags.DEFINE_boolean(name='skip_eval',
                         default=False,
                         help='Skip evaluation?')
    # TODO(b/135607288): Remove this flag once we understand the root cause of
    # slowdown when setting the learning phase in Keras backend.
    flags.DEFINE_boolean(
        name='set_learning_phase_to_train',
        default=True,
        help='If skip eval, also set Keras learning phase to 1 (training).')
    flags.DEFINE_boolean(
        name='explicit_gpu_placement',
        default=False,
        help='If not using distribution strategy, explicitly set device scope '
        'for the Keras training loop.')
    flags.DEFINE_boolean(name='use_trivial_model',
                         default=False,
                         help='Whether to use a trivial Keras model.')
    flags.DEFINE_boolean(name='report_accuracy_metrics',
                         default=True,
                         help='Report metrics during training and evaluation.')
    flags.DEFINE_boolean(
        name='use_tensor_lr',
        default=False,
        help='Use learning rate tensor instead of a callback.')
    flags.DEFINE_boolean(name='enable_tensorboard',
                         default=False,
                         help='Whether to enable Tensorboard callback.')
    flags.DEFINE_integer(
        name='train_steps',
        default=None,
        help='The number of steps to run for training. If it is larger than '
        '# batches per epoch, then use # batches per epoch. This flag will be '
        'ignored if train_epochs is set to be larger than 1. ')
    flags.DEFINE_string(
        name='profile_steps',
        default=None,
        help=
        'Save profiling data to model dir at given range of global steps. The '
        'value must be a comma separated pair of positive integers, specifying '
        'the first and last step to profile. For example, "--profile_steps=2,4" '
        'triggers the profiler to process 3 steps, starting from the 2nd step. '
        'Note that profiler has a non-trivial performance overhead, and the '
        'output file can be gigantic if profiling many steps.')
    flags.DEFINE_boolean(
        name='batchnorm_spatial_persistent',
        default=True,
        help='Enable the spacial persistent mode for CuDNN batch norm kernel.')
    flags.DEFINE_boolean(
        name='enable_get_next_as_optional',
        default=False,
        help='Enable get_next_as_optional behavior in DistributedIterator.')
    flags.DEFINE_boolean(
        name='enable_checkpoint_and_export',
        default=False,
        help=
        'Whether to enable a checkpoint callback and export the savedmodel.')
    flags.DEFINE_string(name='tpu',
                        default='',
                        help='TPU address to connect to.')
    flags.DEFINE_integer(
        name='steps_per_loop',
        default=1,
        help='Number of steps per graph-mode loop. Only training step happens '
        'inside the loop. Callbacks will not be called inside. Will be capped at '
        'steps per epoch.')
예제 #12
0
def define_resnet_flags(resnet_size_choices=None):
    """Add flags and validators for ResNet."""
    flags_core.define_base()
    flags_core.define_performance(num_parallel_calls=False,
                                  tf_gpu_thread_mode=True,
                                  datasets_num_private_threads=True,
                                  datasets_num_parallel_batches=True)
    flags_core.define_image()
    flags_core.define_benchmark()
    flags.adopt_module_key_flags(flags_core)

    flags.DEFINE_enum(
        name='resnet_version',
        short_name='rv',
        default='2',
        enum_values=['1', '2'],
        help=flags_core.help_wrap(
            'Version of ResNet. (1 or 2) See README.md for details.'))
    flags.DEFINE_bool(
        name='fine_tune',
        short_name='ft',
        default=False,
        help=flags_core.help_wrap(
            'If True do not train any parameters except for the final layer.'))
    flags.DEFINE_string(
        name='pretrained_model_checkpoint_path',
        short_name='pmcp',
        default=None,
        help=flags_core.help_wrap(
            'If not None initialize all the network except the final layer with '
            'these values'))
    flags.DEFINE_boolean(name='eval_only',
                         default=False,
                         help=flags_core.help_wrap(
                             'Skip training and only perform evaluation on '
                             'the latest checkpoint.'))
    flags.DEFINE_boolean(
        name='image_bytes_as_serving_input',
        default=False,
        help=flags_core.help_wrap(
            'If True exports savedmodel with serving signature that accepts '
            'JPEG image bytes instead of a fixed size [HxWxC] tensor that '
            'represents the image. The former is easier to use for serving at '
            'the expense of image resize/cropping being done as part of model '
            'inference. Note, this flag only applies to ImageNet and cannot '
            'be used for CIFAR.'))
    flags.DEFINE_float(name='reconst_loss_scale',
                       default=10.0,
                       help=flags_core.help_wrap('scale the reconst_loss'))
    flags.DEFINE_boolean(
        name='use_ce',
        default=False,
        help=flags_core.help_wrap(
            'use cross entropy loss for compressive sensing training'))
    flags.DEFINE_string(
        name='optimizer',
        short_name='opt',
        # default='sgd',
        default='adam',
        help=flags_core.help_wrap('Choose optimizer for training'))
    flags.DEFINE_boolean(
        name='clip_grad',
        default=False,
        help=flags_core.help_wrap('whether to clip weights during training'))
    flags.DEFINE_boolean(name='spectral_norm',
                         short_name='sn',
                         default=True,
                         help=flags_core.help_wrap(
                             'whether to user spectral norm in the cs part'))
    flags.DEFINE_float(name='ce_scale',
                       default=1.0,
                       help=flags_core.help_wrap('scale the cross_entropy'))
    flags.DEFINE_boolean(
        name='sep_grad_nrom',
        default=False,
        help=flags_core.help_wrap(
            'spearate the gradients from reconstruction and ce, and norm the ce grad'
        ))
    flags.DEFINE_boolean(
        name='norm_teach_feature',
        default=False,
        help=flags_core.help_wrap(
            'norm each channel of teaching feature with BN params'))
    flags.DEFINE_boolean(name='no_dense_init',
                         default=False,
                         help=flags_core.help_wrap(
                             'dont init resenet/dense during fine tuning'))
    flags.DEFINE_float(name='compress_ratio',
                       default=0.1,
                       help=flags_core.help_wrap(
                           'the compress ratio of the offloading layer'))

    choice_kwargs = dict(
        name='resnet_size',
        short_name='rs',
        default='50',
        help=flags_core.help_wrap('The size of the ResNet model to use.'))

    if resnet_size_choices is None:
        flags.DEFINE_string(**choice_kwargs)
    else:
        flags.DEFINE_enum(enum_values=resnet_size_choices, **choice_kwargs)
def define_keras_benchmark_flags():
    """Add flags for keras built-in application models."""
    flags_core.define_base(hooks=False)
    flags_core.define_performance()
    flags_core.define_image()
    flags_core.define_benchmark()
    flags.adopt_module_key_flags(flags_core)

    flags_core.set_defaults(data_format="channels_last",
                            use_synthetic_data=True,
                            batch_size=32,
                            train_epochs=2)

    flags.DEFINE_enum(name="model",
                      default=None,
                      enum_values=MODELS.keys(),
                      case_sensitive=False,
                      help=flags_core.help_wrap("Model to be benchmarked."))

    flags.DEFINE_integer(
        name="num_train_images",
        default=1000,
        help=flags_core.help_wrap(
            "The number of synthetic images for training. The default value is "
            "1000."))

    flags.DEFINE_integer(
        name="num_eval_images",
        default=50,
        help=flags_core.help_wrap(
            "The number of synthetic images for evaluation. The default value is "
            "50."))

    flags.DEFINE_boolean(
        name="eager",
        default=False,
        help=flags_core.help_wrap(
            "To enable eager execution. Note that if eager execution is enabled, "
            "only one GPU is utilized even if multiple GPUs are provided and "
            "multi_gpu_model is used."))

    flags.DEFINE_boolean(
        name="dist_strat",
        default=False,
        help=flags_core.help_wrap(
            "To enable distribution strategy for model training and evaluation. "
            "Number of GPUs used for distribution strategy can be set by the "
            "argument --num_gpus."))

    flags.DEFINE_list(
        name="callbacks",
        default=["ExamplesPerSecondCallback", "LoggingMetricCallback"],
        help=flags_core.help_wrap(
            "A list of (case insensitive) strings to specify the names of "
            "callbacks. For example: `--callbacks ExamplesPerSecondCallback,"
            "LoggingMetricCallback`"))

    @flags.multi_flags_validator(
        ["eager", "dist_strat"],
        message="Both --eager and --dist_strat were set. Only one can be "
        "defined, as DistributionStrategy is not supported in Eager "
        "execution currently.")
    # pylint: disable=unused-variable
    def _check_eager_dist_strat(flag_dict):
        return not (flag_dict["eager"] and flag_dict["dist_strat"])
def define_flags():
    flags_core.define_base(num_gpu=False)
    flags_core.define_performance()
    flags_core.define_image()
    flags_core.define_benchmark()
예제 #15
0
def define_resnet_flags(resnet_size_choices=None,
                        dynamic_loss_scale=False,
                        fp16_implementation=False):
    """Add flags and validators for ResNet."""
    flags_core.define_base()
    flags_core.define_performance(num_parallel_calls=False,
                                  tf_gpu_thread_mode=True,
                                  datasets_num_private_threads=True,
                                  dynamic_loss_scale=dynamic_loss_scale,
                                  fp16_implementation=fp16_implementation,
                                  loss_scale=True,
                                  tf_data_experimental_slack=True)
    flags_core.define_image()
    flags_core.define_benchmark()
    flags.adopt_module_key_flags(flags_core)

    flags.DEFINE_enum(
        name='resnet_version',
        short_name='rv',
        default='2',
        enum_values=['1', '2'],
        help=flags_core.help_wrap(
            'Version of ResNet. (1 or 2) See README.md for details.'))
    flags.DEFINE_bool(
        name='fine_tune',
        short_name='ft',
        default=False,
        help=flags_core.help_wrap(
            'If True do not train any parameters except for the final layer.'))
    flags.DEFINE_string(  # "/home/zxc/Liu/models-master-new/official/r1/resnet/model/"
        name='pretrained_model_checkpoint_path',
        short_name='pmcp',
        default="/home/zxc/Liu/models-master-new/official/r1/resnet/model/",
        # default=None,
        help=flags_core.help_wrap(
            'If not None initialize all the network except the final layer with '
            'these values'))
    flags.DEFINE_boolean(name='eval_only',
                         default=False,
                         help=flags_core.help_wrap(
                             'Skip training and only perform evaluation on '
                             'the latest checkpoint.'))
    flags.DEFINE_boolean(
        name='image_bytes_as_serving_input',
        default=False,
        help=flags_core.help_wrap(
            'If True exports savedmodel with serving signature that accepts '
            'JPEG image bytes instead of a fixed size [HxWxC] tensor that '
            'represents the image. The former is easier to use for serving at '
            'the expense of image resize/cropping being done as part of model '
            'inference. Note, this flag only applies to ImageNet and cannot '
            'be used for CIFAR.'))
    flags.DEFINE_boolean(
        name='use_train_and_evaluate',
        default=False,
        help=flags_core.help_wrap(
            'If True, uses `tf.estimator.train_and_evaluate` for the training '
            'and evaluation loop, instead of separate calls to `classifier.train '
            'and `classifier.evaluate`, which is the default behavior.'))
    flags.DEFINE_string(
        name='worker_hosts',
        default=None,
        help=flags_core.help_wrap(
            'Comma-separated list of worker ip:port pairs for running '
            'multi-worker models with DistributionStrategy.  The user would '
            'start the program on each host with identical value for this flag.'
        ))
    flags.DEFINE_integer(name='task_index',
                         default=-1,
                         help=flags_core.help_wrap(
                             'If multi-worker training, the task_index of '
                             'this worker.'))
    flags.DEFINE_bool(name='enable_lars',
                      default=False,
                      help=flags_core.help_wrap(
                          'Enable LARS optimizer for large batch training.'))
    flags.DEFINE_float(
        name='label_smoothing',
        default=0.0,
        help=flags_core.help_wrap(
            'Label smoothing parameter used in the softmax_cross_entropy'))
    flags.DEFINE_float(name='weight_decay',
                       default=1e-4,
                       help=flags_core.help_wrap(
                           'Weight decay coefficiant for l2 regularization.'))

    choice_kwargs = dict(
        name='resnet_size',
        short_name='rs',
        default='50',
        help=flags_core.help_wrap('The size of the ResNet model to use.'))

    if resnet_size_choices is None:
        flags.DEFINE_string(**choice_kwargs)
    else:
        flags.DEFINE_enum(enum_values=resnet_size_choices, **choice_kwargs)