def define_resnet_flags(resnet_size_choices=None, dynamic_loss_scale=False, fp16_implementation=False): """Add flags and validators for ResNet.""" flags_core.define_base(clean=True, train_epochs=True, epochs_between_evals=True, stop_threshold=True, num_gpu=True, hooks=True, export_dir=True, distribution_strategy=True) flags_core.define_performance(num_parallel_calls=False, inter_op=True, intra_op=True, synthetic_data=True, dtype=True, all_reduce_alg=True, num_packs=True, tf_gpu_thread_mode=True, datasets_num_private_threads=True, dynamic_loss_scale=dynamic_loss_scale, fp16_implementation=fp16_implementation, loss_scale=True, tf_data_experimental_slack=True, max_train_steps=True) flags_core.define_image() flags_core.define_benchmark() flags_core.define_distribution() flags.adopt_module_key_flags(flags_core) flags.DEFINE_enum( name='resnet_version', short_name='rv', default='1', enum_values=['1', '2'], help=flags_core.help_wrap( 'Version of ResNet. (1 or 2) See README.md for details.')) flags.DEFINE_bool( name='fine_tune', short_name='ft', default=False, help=flags_core.help_wrap( 'If True do not train any parameters except for the final layer.')) flags.DEFINE_string( name='pretrained_model_checkpoint_path', short_name='pmcp', default=None, help=flags_core.help_wrap( 'If not None initialize all the network except the final layer with ' 'these values')) flags.DEFINE_boolean(name='eval_only', default=False, help=flags_core.help_wrap( 'Skip training and only perform evaluation on ' 'the latest checkpoint.')) flags.DEFINE_boolean( name='image_bytes_as_serving_input', default=False, help=flags_core.help_wrap( 'If True exports savedmodel with serving signature that accepts ' 'JPEG image bytes instead of a fixed size [HxWxC] tensor that ' 'represents the image. The former is easier to use for serving at ' 'the expense of image resize/cropping being done as part of model ' 'inference. Note, this flag only applies to ImageNet and cannot ' 'be used for CIFAR.')) flags.DEFINE_boolean( name='use_train_and_evaluate', default=False, help=flags_core.help_wrap( 'If True, uses `tf.estimator.train_and_evaluate` for the training ' 'and evaluation loop, instead of separate calls to `classifier.train ' 'and `classifier.evaluate`, which is the default behavior.')) flags.DEFINE_bool(name='enable_lars', default=False, help=flags_core.help_wrap( 'Enable LARS optimizer for large batch training.')) flags.DEFINE_float( name='label_smoothing', default=0.0, help=flags_core.help_wrap( 'Label smoothing parameter used in the softmax_cross_entropy')) flags.DEFINE_float(name='weight_decay', default=1e-4, help=flags_core.help_wrap( 'Weight decay coefficiant for l2 regularization.')) flags.DEFINE_float(name='percent', default=0, help=flags_core.help_wrap('percent of data to poison')) flags.DEFINE_bool( name='adv_train', default=False, help=flags_core.help_wrap('whether adversarial training')) choice_kwargs = dict( name='resnet_size', short_name='rs', default='50', help=flags_core.help_wrap('The size of the ResNet model to use.')) if resnet_size_choices is None: flags.DEFINE_string(**choice_kwargs) else: flags.DEFINE_enum(enum_values=resnet_size_choices, **choice_kwargs)
def define_keras_flags(dynamic_loss_scale=True): """Define flags for Keras models.""" flags_core.define_base(run_eagerly=True) flags_core.define_performance(num_parallel_calls=False, synthetic_data=True, dtype=True, all_reduce_alg=True, num_packs=True, tf_gpu_thread_mode=True, datasets_num_private_threads=True, dynamic_loss_scale=dynamic_loss_scale, loss_scale=True, tf_data_experimental_slack=True, enable_xla=True, force_v2_in_keras_compile=True) flags_core.define_image() flags_core.define_benchmark() flags_core.define_distribution() flags.adopt_module_key_flags(flags_core) flags.DEFINE_boolean(name='enable_eager', default=False, help='Enable eager?') flags.DEFINE_boolean(name='skip_eval', default=False, help='Skip evaluation?') # TODO(b/135607288): Remove this flag once we understand the root cause of # slowdown when setting the learning phase in Keras backend. flags.DEFINE_boolean( name='set_learning_phase_to_train', default=True, help='If skip eval, also set Keras learning phase to 1 (training).') flags.DEFINE_boolean( name='explicit_gpu_placement', default=False, help='If not using distribution strategy, explicitly set device scope ' 'for the Keras training loop.') flags.DEFINE_boolean(name='use_trivial_model', default=False, help='Whether to use a trivial Keras model.') flags.DEFINE_boolean(name='report_accuracy_metrics', default=True, help='Report metrics during training and evaluation.') flags.DEFINE_boolean( name='use_tensor_lr', default=False, help='Use learning rate tensor instead of a callback.') flags.DEFINE_boolean(name='enable_tensorboard', default=False, help='Whether to enable Tensorboard callback.') flags.DEFINE_integer( name='train_steps', default=None, help='The number of steps to run for training. If it is larger than ' '# batches per epoch, then use # batches per epoch. When this flag is ' 'set, only one epoch is going to run for training.') flags.DEFINE_string( name='profile_steps', default=None, help='Save profiling data to model dir at given range of steps. The ' 'value must be a comma separated pair of positive integers, specifying ' 'the first and last step to profile. For example, "--profile_steps=2,4" ' 'triggers the profiler to process 3 steps, starting from the 2nd step. ' 'Note that profiler has a non-trivial performance overhead, and the ' 'output file can be gigantic if profiling many steps.') flags.DEFINE_boolean( name='data_delay_prefetch', default=False, help= 'Add a small delay in tf.data prefetch to prioritize memory copy of ' 'other tensors over the data minibatch for the (T+1)th step. It should ' 'help improve performance using EagerIterator and function. The codepath ' 'when enabling this feature is experimental and will be removed once the ' 'corresponding performance features are fully supported in TensorFlow.' ) flags.DEFINE_boolean( name='batchnorm_spatial_persistent', default=True, help='Enable the spacial persistent mode for CuDNN batch norm kernel.') flags.DEFINE_boolean( name='enable_get_next_as_optional', default=False, help='Enable get_next_as_optional behavior in DistributedIterator.')
def define_keras_flags(dynamic_loss_scale=True, model=False, optimizer=False, pretrained_filepath=False): """Define flags for Keras models.""" flags_core.define_base(clean=True, num_gpu=True, run_eagerly=True, train_epochs=True, epochs_between_evals=True, distribution_strategy=True) flags_core.define_performance(num_parallel_calls=False, synthetic_data=True, dtype=True, all_reduce_alg=True, num_packs=True, tf_gpu_thread_mode=True, datasets_num_private_threads=True, dynamic_loss_scale=dynamic_loss_scale, loss_scale=True, fp16_implementation=True, tf_data_experimental_slack=True, enable_xla=True, force_v2_in_keras_compile=True, training_dataset_cache=True) flags_core.define_image() flags_core.define_benchmark() flags_core.define_distribution() flags.adopt_module_key_flags(flags_core) flags.DEFINE_boolean(name='enable_eager', default=False, help='Enable eager?') flags.DEFINE_boolean(name='skip_eval', default=False, help='Skip evaluation?') # TODO(b/135607288): Remove this flag once we understand the root cause of # slowdown when setting the learning phase in Keras backend. flags.DEFINE_boolean( name='set_learning_phase_to_train', default=True, help='If skip eval, also set Keras learning phase to 1 (training).') flags.DEFINE_boolean( name='explicit_gpu_placement', default=False, help='If not using distribution strategy, explicitly set device scope ' 'for the Keras training loop.') flags.DEFINE_boolean(name='use_trivial_model', default=False, help='Whether to use a trivial Keras model.') flags.DEFINE_boolean(name='report_accuracy_metrics', default=True, help='Report metrics during training and evaluation.') flags.DEFINE_boolean( name='use_tensor_lr', default=False, help='Use learning rate tensor instead of a callback.') flags.DEFINE_boolean(name='enable_tensorboard', default=False, help='Whether to enable Tensorboard callback.') flags.DEFINE_integer( name='train_steps', default=None, help='The number of steps to run for training. If it is larger than ' '# batches per epoch, then use # batches per epoch. This flag will be ' 'ignored if train_epochs is set to be larger than 1. ') flags.DEFINE_string( name='profile_steps', default=None, help= 'Save profiling data to model dir at given range of global steps. The ' 'value must be a comma separated pair of positive integers, specifying ' 'the first and last step to profile. For example, "--profile_steps=2,4" ' 'triggers the profiler to process 3 steps, starting from the 2nd step. ' 'Note that profiler has a non-trivial performance overhead, and the ' 'output file can be gigantic if profiling many steps.') flags.DEFINE_boolean( name='batchnorm_spatial_persistent', default=True, help='Enable the spacial persistent mode for CuDNN batch norm kernel.') flags.DEFINE_boolean( name='enable_get_next_as_optional', default=False, help='Enable get_next_as_optional behavior in DistributedIterator.') flags.DEFINE_boolean( name='enable_checkpoint_and_export', default=False, help= 'Whether to enable a checkpoint callback and export the savedmodel.') flags.DEFINE_string(name='tpu', default='', help='TPU address to connect to.') flags.DEFINE_integer( name='steps_per_loop', default=1, help='Number of steps per graph-mode loop. Only training step happens ' 'inside the loop. Callbacks will not be called inside. Will be capped at ' 'steps per epoch.') flags.DEFINE_boolean( name='use_tf_keras_layers', default=False, help='Whether to use tf.keras.layers instead of tf.python.keras.layers.' 'It only changes imagenet resnet model layers for now. This flag is ' 'a temporal flag during transition to tf.keras.layers. Do not use this ' 'flag for external usage. this will be removed shortly.') if model: flags.DEFINE_string( 'model', 'resnet50_v1.5', 'Name of model preset. (mobilenet, resnet50_v1.5)') if optimizer: flags.DEFINE_string( 'optimizer', 'resnet50_default', 'Name of optimizer preset. ' '(mobilenet_default, resnet50_default)') if pretrained_filepath: flags.DEFINE_string('pretrained_filepath', '', 'Pretrained file path.')
def define_common_bert_flags(): """Define common flags for BERT tasks.""" flags_core.define_base(data_dir=False, model_dir=True, clean=False, train_epochs=False, epochs_between_evals=False, stop_threshold=False, batch_size=False, num_gpu=True, export_dir=False, distribution_strategy=True, run_eagerly=True) flags_core.define_distribution() flags.DEFINE_string('bert_config_file', None, 'Bert configuration file to define core bert layers.') flags.DEFINE_string( 'model_export_path', None, 'Path to the directory, where trainined model will be ' 'exported.') flags.DEFINE_string('tpu', '', 'TPU address to connect to.') flags.DEFINE_string( 'init_checkpoint', None, 'Initial checkpoint (usually from a pre-trained BERT model).') flags.DEFINE_integer('num_train_epochs', 3, 'Total number of training epochs to perform.') flags.DEFINE_integer( 'steps_per_loop', None, 'Number of steps per graph-mode loop. Only training step ' 'happens inside the loop. Callbacks will not be called ' 'inside. If not set the value will be configured depending on the ' 'devices available.') flags.DEFINE_float('learning_rate', 5e-5, 'The initial learning rate for Adam.') flags.DEFINE_float('end_lr', 0.0, 'The end learning rate for learning rate decay.') flags.DEFINE_string( 'optimizer_type', 'adamw', 'The type of optimizer to use for training (adamw|lamb)') flags.DEFINE_boolean( 'scale_loss', False, 'Whether to divide the loss by number of replica inside the per-replica ' 'loss function.') flags.DEFINE_boolean( 'use_keras_compile_fit', False, 'If True, uses Keras compile/fit() API for training logic. Otherwise ' 'use custom training loop.') flags.DEFINE_string( 'hub_module_url', None, 'TF-Hub path/url to Bert module. ' 'If specified, init_checkpoint flag should not be used.') flags.DEFINE_bool( 'hub_module_trainable', True, 'True to make keras layers in the hub module trainable.') flags_core.define_log_steps() # Adds flags for mixed precision and multi-worker training. flags_core.define_performance( num_parallel_calls=False, inter_op=False, intra_op=False, synthetic_data=False, max_train_steps=False, dtype=True, dynamic_loss_scale=True, loss_scale=True, all_reduce_alg=True, num_packs=False, tf_gpu_thread_mode=True, datasets_num_private_threads=True, enable_xla=True, fp16_implementation=True, ) # Adds gin configuration flags. hyperparams_flags.define_gin_flags()
def define_transformer_flags(): """Add flags and flag validators for running transformer_main.""" # Add common flags (data_dir, model_dir, etc.). flags_core.define_base(num_gpu=True, distribution_strategy=True) flags_core.define_performance( num_parallel_calls=True, inter_op=False, intra_op=False, synthetic_data=True, max_train_steps=False, dtype=True, loss_scale=True, all_reduce_alg=True, num_packs=True, tf_gpu_thread_mode=True, datasets_num_private_threads=True, enable_xla=True, force_v2_in_keras_compile=True, fp16_implementation=True ) # Additional performance flags # TODO(b/76028325): Remove when generic layout optimizer is ready. flags.DEFINE_boolean( name='enable_grappler_layout_optimizer', default=True, help='Enable Grappler layout optimizer. Currently Grappler can ' 'de-optimize fp16 graphs by forcing NCHW layout for all ' 'convolutions and batch normalizations, and this flag allows to ' 'disable it.' ) flags_core.define_benchmark() flags_core.define_device(tpu=True) flags_core.define_distribution() flags.DEFINE_integer( name='train_steps', short_name='ts', default=300000, help=flags_core.help_wrap('The number of steps used to train.')) flags.DEFINE_integer( name='steps_between_evals', short_name='sbe', default=1000, help=flags_core.help_wrap( 'The Number of training steps to run between evaluations. This is ' 'used if --train_steps is defined.')) flags.DEFINE_boolean( name='enable_time_history', default=True, help='Whether to enable TimeHistory callback.') flags.DEFINE_boolean( name='enable_tensorboard', default=False, help='Whether to enable Tensorboard callback.') flags.DEFINE_boolean( name='enable_metrics_in_training', default=False, help='Whether to enable metrics during training.') flags.DEFINE_string( name='profile_steps', default=None, help='Save profiling data to model dir at given range of steps. The ' 'value must be a comma separated pair of positive integers, specifying ' 'the first and last step to profile. For example, "--profile_steps=2,4" ' 'triggers the profiler to process 3 steps, starting from the 2nd step. ' 'Note that profiler has a non-trivial performance overhead, and the ' 'output file can be gigantic if profiling many steps.') # Set flags from the flags_core module as 'key flags' so they're listed when # the '-h' flag is used. Without this line, the flags defined above are # only shown in the full `--helpful` help text. flags.adopt_module_key_flags(flags_core) # Add transformer-specific flags flags.DEFINE_enum( name='param_set', short_name='mp', default='big', enum_values=PARAMS_MAP.keys(), help=flags_core.help_wrap( 'Parameter set to use when creating and training the model. The ' 'parameters define the input shape (batch size and max length), ' 'model configuration (size of embedding, # of hidden layers, etc.), ' 'and various other settings. The big parameter set increases the ' 'default batch size, embedding/hidden size, and filter size. For a ' 'complete list of parameters, please see model/model_params.py.')) flags.DEFINE_bool( name='static_batch', short_name='sb', default=False, help=flags_core.help_wrap( 'Whether the batches in the dataset should have static shapes. In ' 'general, this setting should be False. Dynamic shapes allow the ' 'inputs to be grouped so that the number of padding tokens is ' 'minimized, and helps model training. In cases where the input shape ' 'must be static (e.g. running on TPU), this setting will be ignored ' 'and static batching will always be used.')) flags.DEFINE_integer( name='max_length', short_name='ml', default=256, help=flags_core.help_wrap( 'Max sentence length for Transformer. Default is 256. Note: Usually ' 'it is more effective to use a smaller max length if static_batch is ' 'enabled, e.g. 64.')) # Flags for training with steps (may be used for debugging) flags.DEFINE_integer( name='validation_steps', short_name='vs', default=64, help=flags_core.help_wrap('The number of steps used in validation.')) # BLEU score computation flags.DEFINE_string( name='bleu_source', short_name='bls', default=None, help=flags_core.help_wrap( 'Path to source file containing text translate when calculating the ' 'official BLEU score. Both --bleu_source and --bleu_ref must be set. ' )) flags.DEFINE_string( name='bleu_ref', short_name='blr', default=None, help=flags_core.help_wrap( 'Path to source file containing text translate when calculating the ' 'official BLEU score. Both --bleu_source and --bleu_ref must be set. ' )) flags.DEFINE_string( name='vocab_file', short_name='vf', default=None, help=flags_core.help_wrap( 'Path to subtoken vocabulary file. If data_download.py was used to ' 'download and encode the training data, look in the data_dir to find ' 'the vocab file.')) flags.DEFINE_string( name='mode', default='train', help=flags_core.help_wrap('mode: train, eval, or predict')) flags.DEFINE_bool( name='use_ctl', default=False, help=flags_core.help_wrap( 'Whether the model runs with custom training loop.')) flags.DEFINE_integer( name='decode_batch_size', default=32, help=flags_core.help_wrap( 'Global batch size used for Transformer autoregressive decoding on ' 'TPU.')) flags.DEFINE_integer( name='decode_max_length', default=97, help=flags_core.help_wrap( 'Max sequence length of the decode/eval data. This is used by ' 'Transformer autoregressive decoding on TPU to have minimum ' 'paddings.')) flags.DEFINE_bool( name='padded_decode', default=False, help=flags_core.help_wrap( 'Whether the autoregressive decoding runs with input data padded to ' 'the decode_max_length. For TPU/XLA-GPU runs, this flag has to be ' 'set due the static shape requirement. Although CPU/GPU could also ' 'use padded_decode, it has not been tested. In addition, this method ' 'will introduce unnecessary overheads which grow quadratically with ' 'the max sequence length.')) flags_core.set_defaults(data_dir='/tmp/translate_ende', model_dir='/tmp/transformer_model', batch_size=None) # pylint: disable=unused-variable @flags.multi_flags_validator( ['bleu_source', 'bleu_ref'], message='Both or neither --bleu_source and --bleu_ref must be defined.') def _check_bleu_files(flags_dict): return (flags_dict['bleu_source'] is None) == ( flags_dict['bleu_ref'] is None) @flags.multi_flags_validator( ['bleu_source', 'bleu_ref', 'vocab_file'], message='--vocab_file must be defined if --bleu_source and --bleu_ref ' 'are defined.') def _check_bleu_vocab_file(flags_dict): if flags_dict['bleu_source'] and flags_dict['bleu_ref']: return flags_dict['vocab_file'] is not None return True
def define_keras_flags(dynamic_loss_scale=True): """Define flags for Keras models.""" flags_core.define_base( clean=True, num_gpu=True, run_eagerly=True, train_epochs=True, epochs_between_evals=True, distribution_strategy=True, ) flags_core.define_performance( num_parallel_calls=False, synthetic_data=True, dtype=True, all_reduce_alg=True, num_packs=True, tf_gpu_thread_mode=True, datasets_num_private_threads=True, dynamic_loss_scale=dynamic_loss_scale, loss_scale=True, fp16_implementation=True, tf_data_experimental_slack=True, enable_xla=True, force_v2_in_keras_compile=True, training_dataset_cache=True, ) flags_core.define_image() flags_core.define_benchmark() flags_core.define_distribution() flags.adopt_module_key_flags(flags_core) flags.DEFINE_boolean(name="enable_eager", default=False, help="Enable eager?") flags.DEFINE_boolean(name="skip_eval", default=False, help="Skip evaluation?") # TODO(b/135607288): Remove this flag once we understand the root cause of # slowdown when setting the learning phase in Keras backend. flags.DEFINE_boolean( name="set_learning_phase_to_train", default=True, help="If skip eval, also set Keras learning phase to 1 (training).", ) flags.DEFINE_boolean( name="explicit_gpu_placement", default=False, help="If not using distribution strategy, explicitly set device scope " "for the Keras training loop.", ) flags.DEFINE_boolean(name="use_trivial_model", default=False, help="Whether to use a trivial Keras model.") flags.DEFINE_boolean( name="report_accuracy_metrics", default=True, help="Report metrics during training and evaluation.", ) flags.DEFINE_boolean( name="use_tensor_lr", default=False, help="Use learning rate tensor instead of a callback.") flags.DEFINE_boolean(name="enable_tensorboard", default=False, help="Whether to enable Tensorboard callback.") flags.DEFINE_integer( name="train_steps", default=None, help="The number of steps to run for training. If it is larger than " "# batches per epoch, then use # batches per epoch. This flag will be " "ignored if train_epochs is set to be larger than 1. ", ) flags.DEFINE_string( name="profile_steps", default=None, help="Save profiling data to model dir at given range of steps. The " "value must be a comma separated pair of positive integers, specifying " 'the first and last step to profile. For example, "--profile_steps=2,4" ' "triggers the profiler to process 3 steps, starting from the 2nd step. " "Note that profiler has a non-trivial performance overhead, and the " "output file can be gigantic if profiling many steps.", ) flags.DEFINE_boolean( name="data_delay_prefetch", default=False, help= "Add a small delay in tf.data prefetch to prioritize memory copy of " "other tensors over the data minibatch for the (T+1)th step. It should " "help improve performance using EagerIterator and function. The codepath " "when enabling this feature is experimental and will be removed once the " "corresponding performance features are fully supported in TensorFlow.", ) flags.DEFINE_boolean( name="batchnorm_spatial_persistent", default=True, help="Enable the spacial persistent mode for CuDNN batch norm kernel.", ) flags.DEFINE_boolean( name="enable_get_next_as_optional", default=False, help="Enable get_next_as_optional behavior in DistributedIterator.", ) flags.DEFINE_boolean( name="enable_checkpoint_and_export", default=False, help= "Whether to enable a checkpoint callback and export the savedmodel.", ) flags.DEFINE_string(name="tpu", default="", help="TPU address to connect to.") flags.DEFINE_integer( name="steps_per_loop", default=1, help="Number of steps per graph-mode loop. Only training step happens " "inside the loop. Callbacks will not be called inside. Will be capped at " "steps per epoch.", )
def define_common_bert_flags(): """Define common flags for BERT tasks.""" flags_core.define_base( data_dir=False, model_dir=True, clean=False, train_epochs=False, epochs_between_evals=False, stop_threshold=False, batch_size=False, num_gpu=True, hooks=False, export_dir=False, distribution_strategy=True, run_eagerly=True) flags_core.define_distribution() flags.DEFINE_string('bert_config_file', None, 'Bert configuration file to define core bert layers.') flags.DEFINE_string( 'model_export_path', None, 'Path to the directory, where trainined model will be ' 'exported.') flags.DEFINE_string('tpu', '', 'TPU address to connect to.') flags.DEFINE_string( 'init_checkpoint', None, 'Initial checkpoint (usually from a pre-trained BERT model).') flags.DEFINE_integer('num_train_epochs', 3, 'Total number of training epochs to perform.') flags.DEFINE_integer( 'steps_per_loop', 200, 'Number of steps per graph-mode loop. Only training step ' 'happens inside the loop. Callbacks will not be called ' 'inside.') flags.DEFINE_float('learning_rate', 5e-5, 'The initial learning rate for Adam.') flags.DEFINE_boolean( 'scale_loss', False, 'Whether to divide the loss by number of replica inside the per-replica ' 'loss function.') flags.DEFINE_boolean( 'use_keras_compile_fit', False, 'If True, uses Keras compile/fit() API for training logic. Otherwise ' 'use custom training loop.') flags.DEFINE_string( 'hub_module_url', None, 'TF-Hub path/url to Bert module. ' 'If specified, init_checkpoint flag should not be used.') flags.DEFINE_enum( 'model_type', 'bert', ['bert', 'albert'], 'Specifies the type of the model. ' 'If "bert", will use canonical BERT; if "albert", will use ALBERT model.') # Adds flags for mixed precision training. flags_core.define_performance( num_parallel_calls=False, inter_op=False, intra_op=False, synthetic_data=False, max_train_steps=False, dtype=True, dynamic_loss_scale=True, loss_scale=True, all_reduce_alg=False, num_packs=False, enable_xla=True, fp16_implementation=True, )
def define_keras_flags(dynamic_loss_scale=True, model=False, optimizer=False, pretrained_filepath=False): """Define flags for Keras models.""" flags_core.define_base(clean=True, num_gpu=True, run_eagerly=True, train_epochs=True, epochs_between_evals=True, distribution_strategy=True) flags_core.define_performance(num_parallel_calls=False, synthetic_data=True, dtype=True, all_reduce_alg=True, num_packs=True, tf_gpu_thread_mode=True, datasets_num_private_threads=True, dynamic_loss_scale=dynamic_loss_scale, loss_scale=True, fp16_implementation=True, tf_data_experimental_slack=True, enable_xla=True, training_dataset_cache=True) flags_core.define_image() flags_core.define_benchmark() flags_core.define_distribution() flags.adopt_module_key_flags(flags_core) flags.DEFINE_boolean(name='enable_eager', default=False, help='Enable eager?') flags.DEFINE_boolean(name='skip_eval', default=False, help='Skip evaluation?') # TODO(b/135607288): Remove this flag once we understand the root cause of # slowdown when setting the learning phase in Keras backend. flags.DEFINE_boolean( name='set_learning_phase_to_train', default=True, help='If skip eval, also set Keras learning phase to 1 (training).') flags.DEFINE_boolean( name='explicit_gpu_placement', default=False, help='If not using distribution strategy, explicitly set device scope ' 'for the Keras training loop.') flags.DEFINE_boolean(name='use_trivial_model', default=False, help='Whether to use a trivial Keras model.') flags.DEFINE_boolean(name='report_accuracy_metrics', default=True, help='Report metrics during training and evaluation.') flags.DEFINE_boolean( name='use_tensor_lr', default=True, help='Use learning rate tensor instead of a callback.') flags.DEFINE_boolean(name='enable_tensorboard', default=False, help='Whether to enable Tensorboard callback.') flags.DEFINE_integer( name='train_steps', default=None, help='The number of steps to run for training. If it is larger than ' '# batches per epoch, then use # batches per epoch. This flag will be ' 'ignored if train_epochs is set to be larger than 1. ') flags.DEFINE_boolean( name='batchnorm_spatial_persistent', default=True, help='Enable the spacial persistent mode for CuDNN batch norm kernel.') flags.DEFINE_boolean( name='enable_get_next_as_optional', default=False, help='Enable get_next_as_optional behavior in DistributedIterator.') flags.DEFINE_boolean( name='enable_checkpoint_and_export', default=False, help= 'Whether to enable a checkpoint callback and export the savedmodel.') flags.DEFINE_string(name='tpu', default='', help='TPU address to connect to.') flags.DEFINE_integer( name='steps_per_loop', default=500, help='Number of steps per training loop. Only training step happens ' 'inside the loop. Callbacks will not be called inside. Will be capped at ' 'steps per epoch.') flags.DEFINE_boolean( name='use_tf_while_loop', default=True, help='Whether to build a tf.while_loop inside the training loop on the ' 'host. Setting it to True is critical to have peak performance on ' 'TPU.') if model: flags.DEFINE_string( 'model', 'resnet50_v1.5', 'Name of model preset. (mobilenet, resnet50_v1.5)') if optimizer: flags.DEFINE_string( 'optimizer', 'resnet50_default', 'Name of optimizer preset. ' '(mobilenet_default, resnet50_default)') # TODO(kimjaehong): Replace as general hyper-params not only for mobilenet. flags.DEFINE_float( 'initial_learning_rate_per_sample', 0.00007, 'Initial value of learning rate per sample for ' 'mobilenet_default.') flags.DEFINE_float( 'lr_decay_factor', 0.94, 'Learning rate decay factor for mobilenet_default.') flags.DEFINE_float( 'num_epochs_per_decay', 2.5, 'Number of epochs per decay for mobilenet_default.') if pretrained_filepath: flags.DEFINE_string('pretrained_filepath', '', 'Pretrained file path.')