示例#1
0
def define_gtsrb_flags():
    flags_core.define_base(clean=True,
                           num_gpu=True,
                           train_epochs=True,
                           epochs_between_evals=True,
                           distribution_strategy=True)
    flags_core.define_device()
    flags_core.define_distribution()
    absl_flags.DEFINE_string('config', None, 'config file path')
    absl_flags.DEFINE_bool('download', False,
                           'Whether to download data to `--data_dir` ')
示例#2
0
def define_mnist_flags():
    """Define command line flags for MNIST model."""
    flags_core.define_base(clean=True,
                           num_gpu=True,
                           train_epochs=True,
                           epochs_between_evals=True,
                           distribution_strategy=True)
    flags_core.define_device()
    flags_core.define_distribution()
    flags.DEFINE_bool('download', False,
                      'Whether to download data to `--data_dir`.')
    FLAGS.set_default('batch_size', 1024)
示例#3
0
def initialize_common_flags():
    """Define the common flags across models."""
    define_common_hparams_flags()

    flags_core.define_device(tpu=True)
    flags_core.define_base(num_gpu=True,
                           model_dir=False,
                           data_dir=False,
                           batch_size=False)
    flags_core.define_distribution(worker_hosts=True, task_index=True)
    flags_core.define_performance(all_reduce_alg=True, num_packs=True)

    # Reset the default value of num_gpus to zero.
    FLAGS.num_gpus = 0

    flags.DEFINE_string(
        'strategy_type', 'mirrored', 'Type of distribute strategy.'
        'One of mirrored, tpu and multiworker.')
示例#4
0
def define_ncf_flags():
  """Add flags for running ncf_main."""
  # Add common flags
  flags_core.define_base(export_dir=False)
  flags_core.define_performance(
      num_parallel_calls=False,
      inter_op=False,
      intra_op=False,
      synthetic_data=True,
      max_train_steps=False,
      dtype=False,
      all_reduce_alg=False
  )
  flags_core.define_device(tpu=True)
  flags_core.define_benchmark()

  flags.adopt_module_key_flags(flags_core)

  flags_core.set_defaults(
      model_dir="/tmp/ncf/",
      data_dir="/tmp/movielens-data/",
      train_epochs=2,
      batch_size=256,
      hooks="ProfilerHook",
      tpu=None
  )

  # Add ncf-specific flags
  flags.DEFINE_enum(
      name="dataset", default="ml-1m",
      enum_values=["ml-1m", "ml-20m"], case_sensitive=False,
      help=flags_core.help_wrap(
          "Dataset to be trained and evaluated."))

  flags.DEFINE_boolean(
      name="download_if_missing", default=True, help=flags_core.help_wrap(
          "Download data to data_dir if it is not already present."))

  flags.DEFINE_string(
      name="eval_batch_size", default=None, help=flags_core.help_wrap(
          "The batch size used for evaluation. This should generally be larger"
          "than the training batch size as the lack of back propagation during"
          "evaluation can allow for larger batch sizes to fit in memory. If not"
          "specified, the training batch size (--batch_size) will be used."))

  flags.DEFINE_integer(
      name="num_factors", default=8,
      help=flags_core.help_wrap("The Embedding size of MF model."))

  # Set the default as a list of strings to be consistent with input arguments
  flags.DEFINE_list(
      name="layers", default=["64", "32", "16", "8"],
      help=flags_core.help_wrap(
          "The sizes of hidden layers for MLP. Example "
          "to specify different sizes of MLP layers: --layers=32,16,8,4"))

  flags.DEFINE_float(
      name="mf_regularization", default=0.,
      help=flags_core.help_wrap(
          "The regularization factor for MF embeddings. The factor is used by "
          "regularizer which allows to apply penalties on layer parameters or "
          "layer activity during optimization."))

  flags.DEFINE_list(
      name="mlp_regularization", default=["0.", "0.", "0.", "0."],
      help=flags_core.help_wrap(
          "The regularization factor for each MLP layer. See mf_regularization "
          "help for more info about regularization factor."))

  flags.DEFINE_integer(
      name="num_neg", default=4,
      help=flags_core.help_wrap(
          "The Number of negative instances to pair with a positive instance."))

  flags.DEFINE_float(
      name="learning_rate", default=0.001,
      help=flags_core.help_wrap("The learning rate."))

  flags.DEFINE_float(
      name="beta1", default=0.9,
      help=flags_core.help_wrap("beta1 hyperparameter for the Adam optimizer."))

  flags.DEFINE_float(
      name="beta2", default=0.999,
      help=flags_core.help_wrap("beta2 hyperparameter for the Adam optimizer."))

  flags.DEFINE_float(
      name="epsilon", default=1e-8,
      help=flags_core.help_wrap("epsilon hyperparameter for the Adam "
                                "optimizer."))

  flags.DEFINE_float(
      name="hr_threshold", default=None,
      help=flags_core.help_wrap(
          "If passed, training will stop when the evaluation metric HR is "
          "greater than or equal to hr_threshold. For dataset ml-1m, the "
          "desired hr_threshold is 0.68 which is the result from the paper; "
          "For dataset ml-20m, the threshold can be set as 0.95 which is "
          "achieved by MLPerf implementation."))

  flags.DEFINE_bool(
      name="ml_perf", default=False,
      help=flags_core.help_wrap(
          "If set, changes the behavior of the model slightly to match the "
          "MLPerf reference implementations here: \n"
          "https://github.com/mlperf/reference/tree/master/recommendation/"
          "pytorch\n"
          "The two changes are:\n"
          "1. When computing the HR and NDCG during evaluation, remove "
          "duplicate user-item pairs before the computation. This results in "
          "better HRs and NDCGs.\n"
          "2. Use a different soring algorithm when sorting the input data, "
          "which performs better due to the fact the sorting algorithms are "
          "not stable."))

  flags.DEFINE_bool(
      name="output_ml_perf_compliance_logging", default=False,
      help=flags_core.help_wrap(
          "If set, output the MLPerf compliance logging. This is only useful "
          "if one is running the model for MLPerf. See "
          "https://github.com/mlperf/policies/blob/master/training_rules.adoc"
          "#submission-compliance-logs for details. This uses sudo and so may "
          "ask for your password, as root access is needed to clear the system "
          "caches, which is required for MLPerf compliance."
      )
  )

  flags.DEFINE_integer(
      name="seed", default=None, help=flags_core.help_wrap(
          "This value will be used to seed both NumPy and TensorFlow."))

  flags.DEFINE_bool(
      name="hash_pipeline", default=False, help=flags_core.help_wrap(
          "This flag will perform a separate run of the pipeline and hash "
          "batches as they are produced. \nNOTE: this will significantly slow "
          "training. However it is useful to confirm that a random seed is "
          "does indeed make the data pipeline deterministic."))

  @flags.validator("eval_batch_size", "eval_batch_size must be at least {}"
                   .format(rconst.NUM_EVAL_NEGATIVES + 1))
  def eval_size_check(eval_batch_size):
    return (eval_batch_size is None or
            int(eval_batch_size) > rconst.NUM_EVAL_NEGATIVES)

  flags.DEFINE_bool(
      name="use_subprocess", default=True, help=flags_core.help_wrap(
          "By default, ncf_main.py starts async data generation process as a "
          "subprocess. If set to False, ncf_main.py will assume the async data "
          "generation process has already been started by the user."))

  flags.DEFINE_integer(name="cache_id", default=None, help=flags_core.help_wrap(
      "Use a specified cache_id rather than using a timestamp. This is only "
      "needed to synchronize across multiple workers. Generally this flag will "
      "not need to be set."
  ))

  flags.DEFINE_bool(
      name="use_xla_for_gpu", default=False, help=flags_core.help_wrap(
          "If True, use XLA for the model function. Only works when using a "
          "GPU. On TPUs, XLA is always used"))

  xla_message = "--use_xla_for_gpu is incompatible with --tpu"
  @flags.multi_flags_validator(["use_xla_for_gpu", "tpu"], message=xla_message)
  def xla_validator(flag_dict):
    return not flag_dict["use_xla_for_gpu"] or not flag_dict["tpu"]

  flags.DEFINE_bool(
      name="use_estimator", default=True, help=flags_core.help_wrap(
          "If True, use Estimator to train. Setting to False is slightly "
          "faster, but when False, the following are currently unsupported:\n"
          "  * Using TPUs\n"
          "  * Using more than 1 GPU\n"
          "  * Reloading from checkpoints\n"
          "  * Any hooks specified with --hooks\n"))

  flags.DEFINE_bool(
      name="use_while_loop", default=None, help=flags_core.help_wrap(
          "If set, run an entire epoch in a session.run() call using a "
          "TensorFlow while loop. This can improve performance, but will not "
          "print out losses throughout the epoch. Requires "
          "--use_estimator=false"
      ))

  xla_message = "--use_while_loop requires --use_estimator=false"
  @flags.multi_flags_validator(["use_while_loop", "use_estimator"],
                               message=xla_message)
  def while_loop_validator(flag_dict):
    return (not flag_dict["use_while_loop"] or
            not flag_dict["use_estimator"])
示例#5
0
def define_transformer_flags():
  """Add flags and flag validators for running transformer_main."""
  # Add common flags (data_dir, model_dir, etc.).
  flags_core.define_base(num_gpu=True, distribution_strategy=True)
  flags_core.define_performance(
      num_parallel_calls=True,
      inter_op=False,
      intra_op=False,
      synthetic_data=True,
      max_train_steps=False,
      dtype=True,
      loss_scale=True,
      all_reduce_alg=True,
      num_packs=True,
      tf_gpu_thread_mode=True,
      datasets_num_private_threads=True,
      enable_xla=True,
      fp16_implementation=True)

  flags_core.define_benchmark()
  flags_core.define_device(tpu=True)

  flags.DEFINE_integer(
      name='train_steps',
      short_name='ts',
      default=300000,
      help=flags_core.help_wrap('The number of steps used to train.'))
  flags.DEFINE_integer(
      name='steps_between_evals',
      short_name='sbe',
      default=5000,
      help=flags_core.help_wrap(
          'The Number of training steps to run between evaluations. This is '
          'used if --train_steps is defined.'))
  flags.DEFINE_boolean(
      name='enable_time_history',
      default=True,
      help='Whether to enable TimeHistory callback.')
  flags.DEFINE_boolean(
      name='enable_tensorboard',
      default=False,
      help='Whether to enable Tensorboard callback.')
  flags.DEFINE_boolean(
      name='enable_metrics_in_training',
      default=False,
      help='Whether to enable metrics during training.')
  flags.DEFINE_boolean(
      name='enable_mlir_bridge',
      default=False,
      help='Whether to enable the TF to XLA bridge.')
  # Set flags from the flags_core module as 'key flags' so they're listed when
  # the '-h' flag is used. Without this line, the flags defined above are
  # only shown in the full `--helpful` help text.
  flags.adopt_module_key_flags(flags_core)

  # Add transformer-specific flags
  flags.DEFINE_enum(
      name='param_set',
      short_name='mp',
      default='big',
      enum_values=PARAMS_MAP.keys(),
      help=flags_core.help_wrap(
          'Parameter set to use when creating and training the model. The '
          'parameters define the input shape (batch size and max length), '
          'model configuration (size of embedding, # of hidden layers, etc.), '
          'and various other settings. The big parameter set increases the '
          'default batch size, embedding/hidden size, and filter size. For a '
          'complete list of parameters, please see model/model_params.py.'))

  flags.DEFINE_bool(
      name='static_batch',
      short_name='sb',
      default=False,
      help=flags_core.help_wrap(
          'Whether the batches in the dataset should have static shapes. In '
          'general, this setting should be False. Dynamic shapes allow the '
          'inputs to be grouped so that the number of padding tokens is '
          'minimized, and helps model training. In cases where the input shape '
          'must be static (e.g. running on TPU), this setting will be ignored '
          'and static batching will always be used.'))
  flags.DEFINE_integer(
      name='max_length',
      short_name='ml',
      default=256,
      help=flags_core.help_wrap(
          'Max sentence length for Transformer. Default is 256. Note: Usually '
          'it is more effective to use a smaller max length if static_batch is '
          'enabled, e.g. 64.'))

  # Flags for training with steps (may be used for debugging)
  flags.DEFINE_integer(
      name='validation_steps',
      short_name='vs',
      default=64,
      help=flags_core.help_wrap('The number of steps used in validation.'))

  # BLEU score computation
  flags.DEFINE_string(
      name='bleu_source',
      short_name='bls',
      default=None,
      help=flags_core.help_wrap(
          'Path to source file containing text translate when calculating the '
          'official BLEU score. Both --bleu_source and --bleu_ref must be set. '
      ))
  flags.DEFINE_string(
      name='bleu_ref',
      short_name='blr',
      default=None,
      help=flags_core.help_wrap(
          'Path to source file containing text translate when calculating the '
          'official BLEU score. Both --bleu_source and --bleu_ref must be set. '
      ))
  flags.DEFINE_string(
      name='vocab_file',
      short_name='vf',
      default=None,
      help=flags_core.help_wrap(
          'Path to subtoken vocabulary file. If data_download.py was used to '
          'download and encode the training data, look in the data_dir to find '
          'the vocab file.'))
  flags.DEFINE_string(
      name='mode',
      default='train',
      help=flags_core.help_wrap('mode: train, eval, or predict'))
  flags.DEFINE_bool(
      name='use_ctl',
      default=False,
      help=flags_core.help_wrap(
          'Whether the model runs with custom training loop.'))
  flags.DEFINE_integer(
      name='decode_batch_size',
      default=32,
      help=flags_core.help_wrap(
          'Global batch size used for Transformer autoregressive decoding on '
          'TPU.'))
  flags.DEFINE_integer(
      name='decode_max_length',
      default=97,
      help=flags_core.help_wrap(
          'Max sequence length of the decode/eval data. This is used by '
          'Transformer autoregressive decoding on TPU to have minimum '
          'paddings.'))
  flags.DEFINE_bool(
      name='padded_decode',
      default=False,
      help=flags_core.help_wrap(
          'Whether the autoregressive decoding runs with input data padded to '
          'the decode_max_length. For TPU/XLA-GPU runs, this flag has to be '
          'set due the static shape requirement. Although CPU/GPU could also '
          'use padded_decode, it has not been tested. In addition, this method '
          'will introduce unnecessary overheads which grow quadratically with '
          'the max sequence length.'))
  flags.DEFINE_bool(
      name='enable_checkpointing',
      default=True,
      help=flags_core.help_wrap(
          'Whether to do checkpointing during training. When running under '
          'benchmark harness, we will avoid checkpointing.'))
  flags.DEFINE_bool(
      name='save_weights_only',
      default=True,
      help=flags_core.help_wrap(
          'Only used when above `enable_checkpointing` is True. '
          'If True, then only the model\'s weights will be saved '
          '(`model.save_weights(filepath)`), else the full model is saved '
          '(`model.save(filepath)`)'))

  flags_core.set_defaults(
      data_dir='/tmp/translate_ende',
      model_dir='/tmp/transformer_model',
      batch_size=None)

  # pylint: disable=unused-variable
  @flags.multi_flags_validator(
      ['bleu_source', 'bleu_ref'],
      message='Both or neither --bleu_source and --bleu_ref must be defined.')
  def _check_bleu_files(flags_dict):
    return (flags_dict['bleu_source'] is None) == (
        flags_dict['bleu_ref'] is None)

  @flags.multi_flags_validator(
      ['bleu_source', 'bleu_ref', 'vocab_file'],
      message='--vocab_file must be defined if --bleu_source and --bleu_ref '
      'are defined.')
  def _check_bleu_vocab_file(flags_dict):
    if flags_dict['bleu_source'] and flags_dict['bleu_ref']:
      return flags_dict['vocab_file'] is not None
    return True
def define_transformer_flags():
    """Add flags and flag validators for running transformer_main."""
    # Add common flags (data_dir, model_dir, train_epochs, etc.).
    flags.DEFINE_integer(name="max_length",
                         short_name="ml",
                         default=None,
                         help=flags_core.help_wrap("Max length."))

    flags_core.define_base(clean=True,
                           train_epochs=True,
                           epochs_between_evals=True,
                           stop_threshold=True,
                           num_gpu=True,
                           hooks=True,
                           export_dir=True,
                           distribution_strategy=True)
    flags_core.define_performance(num_parallel_calls=True,
                                  inter_op=False,
                                  intra_op=False,
                                  synthetic_data=True,
                                  max_train_steps=False,
                                  dtype=True,
                                  all_reduce_alg=True)
    flags_core.define_benchmark()
    flags_core.define_device(tpu=True)

    # Set flags from the flags_core module as "key flags" so they're listed when
    # the '-h' flag is used. Without this line, the flags defined above are
    # only shown in the full `--helpful` help text.
    flags.adopt_module_key_flags(flags_core)

    # Add transformer-specific flags
    flags.DEFINE_enum(
        name="param_set",
        short_name="mp",
        default="big",
        enum_values=PARAMS_MAP.keys(),
        help=flags_core.help_wrap(
            "Parameter set to use when creating and training the model. The "
            "parameters define the input shape (batch size and max length), "
            "model configuration (size of embedding, # of hidden layers, etc.), "
            "and various other settings. The big parameter set increases the "
            "default batch size, embedding/hidden size, and filter size. For a "
            "complete list of parameters, please see model/model_params.py."))

    flags.DEFINE_bool(
        name="static_batch",
        default=False,
        help=flags_core.help_wrap(
            "Whether the batches in the dataset should have static shapes. In "
            "general, this setting should be False. Dynamic shapes allow the "
            "inputs to be grouped so that the number of padding tokens is "
            "minimized, and helps model training. In cases where the input shape "
            "must be static (e.g. running on TPU), this setting will be ignored "
            "and static batching will always be used."))

    # Flags for training with steps (may be used for debugging)
    flags.DEFINE_integer(
        name="train_steps",
        short_name="ts",
        default=None,
        help=flags_core.help_wrap("The number of steps used to train."))
    flags.DEFINE_integer(
        name="steps_between_evals",
        short_name="sbe",
        default=1000,
        help=flags_core.help_wrap(
            "The Number of training steps to run between evaluations. This is "
            "used if --train_steps is defined."))

    # BLEU score computation
    flags.DEFINE_string(
        name="bleu_source",
        short_name="bls",
        default=None,
        help=flags_core.help_wrap(
            "Path to source file containing text translate when calculating the "
            "official BLEU score. Both --bleu_source and --bleu_ref must be set. "
            "Use the flag --stop_threshold to stop the script based on the "
            "uncased BLEU score."))
    flags.DEFINE_string(
        name="bleu_ref",
        short_name="blr",
        default=None,
        help=flags_core.help_wrap(
            "Path to source file containing text translate when calculating the "
            "official BLEU score. Both --bleu_source and --bleu_ref must be set. "
            "Use the flag --stop_threshold to stop the script based on the "
            "uncased BLEU score."))
    flags.DEFINE_string(
        name="vocab_file",
        short_name="vf",
        default=None,
        help=flags_core.help_wrap(
            "Path to subtoken vocabulary file. If data_download.py was used to "
            "download and encode the training data, look in the data_dir to find "
            "the vocab file."))
    flags.DEFINE_integer(name="save_checkpoints_steps",
                         short_name="scs",
                         default=50000,
                         help=flags_core.help_wrap("the vocab file."))

    flags_core.set_defaults(data_dir="/tmp/translate_ende",
                            model_dir="/tmp/transformer_model",
                            batch_size=None,
                            train_epochs=None)

    @flags.multi_flags_validator(
        ["train_epochs", "train_steps"],
        message=
        "Both --train_steps and --train_epochs were set. Only one may be "
        "defined.")
    def _check_train_limits(flag_dict):
        return flag_dict["train_epochs"] is None or flag_dict[
            "train_steps"] is None

    @flags.multi_flags_validator(
        ["bleu_source", "bleu_ref"],
        message="Both or neither --bleu_source and --bleu_ref must be defined."
    )
    def _check_bleu_files(flags_dict):
        return (flags_dict["bleu_source"] is None) == (flags_dict["bleu_ref"]
                                                       is None)

    @flags.multi_flags_validator(
        ["bleu_source", "bleu_ref", "vocab_file"],
        message="--vocab_file must be defined if --bleu_source and --bleu_ref "
        "are defined.")
    def _check_bleu_vocab_file(flags_dict):
        if flags_dict["bleu_source"] and flags_dict["bleu_ref"]:
            return flags_dict["vocab_file"] is not None
        return True

    @flags.multi_flags_validator(
        ["export_dir", "vocab_file"],
        message="--vocab_file must be defined if --export_dir is set.")
    def _check_export_vocab_file(flags_dict):
        if flags_dict["export_dir"]:
            return flags_dict["vocab_file"] is not None
        return True

    flags_core.require_cloud_storage(["data_dir", "model_dir", "export_dir"])
示例#7
0
def define_ncf_flags():
  """Add flags for running ncf_main."""
  # Add common flags
  flags_core.define_base(export_dir=False)
  flags_core.define_performance(
      num_parallel_calls=False,
      inter_op=False,
      intra_op=False,
      synthetic_data=True,
      max_train_steps=False,
      dtype=False,
      all_reduce_alg=False
  )
  flags_core.define_device(tpu=True)
  flags_core.define_benchmark()

  flags.adopt_module_key_flags(flags_core)

  flags_core.set_defaults(
      model_dir="/tmp/ncf/",
      data_dir="/tmp/movielens-data/",
      train_epochs=2,
      batch_size=256,
      hooks="ProfilerHook",
      tpu=None
  )

  # Add ncf-specific flags
  flags.DEFINE_enum(
      name="dataset", default="ml-1m",
      enum_values=["ml-1m", "ml-20m"], case_sensitive=False,
      help=flags_core.help_wrap(
          "Dataset to be trained and evaluated."))

  flags.DEFINE_boolean(
      name="download_if_missing", default=True, help=flags_core.help_wrap(
          "Download data to data_dir if it is not already present."))

  flags.DEFINE_integer(
      name="eval_batch_size", default=None, help=flags_core.help_wrap(
          "The batch size used for evaluation. This should generally be larger"
          "than the training batch size as the lack of back propagation during"
          "evaluation can allow for larger batch sizes to fit in memory. If not"
          "specified, the training batch size (--batch_size) will be used."))

  flags.DEFINE_integer(
      name="num_factors", default=8,
      help=flags_core.help_wrap("The Embedding size of MF model."))

  # Set the default as a list of strings to be consistent with input arguments
  flags.DEFINE_list(
      name="layers", default=["64", "32", "16", "8"],
      help=flags_core.help_wrap(
          "The sizes of hidden layers for MLP. Example "
          "to specify different sizes of MLP layers: --layers=32,16,8,4"))

  flags.DEFINE_float(
      name="mf_regularization", default=0.,
      help=flags_core.help_wrap(
          "The regularization factor for MF embeddings. The factor is used by "
          "regularizer which allows to apply penalties on layer parameters or "
          "layer activity during optimization."))

  flags.DEFINE_list(
      name="mlp_regularization", default=["0.", "0.", "0.", "0."],
      help=flags_core.help_wrap(
          "The regularization factor for each MLP layer. See mf_regularization "
          "help for more info about regularization factor."))

  flags.DEFINE_integer(
      name="num_neg", default=4,
      help=flags_core.help_wrap(
          "The Number of negative instances to pair with a positive instance."))

  flags.DEFINE_float(
      name="learning_rate", default=0.001,
      help=flags_core.help_wrap("The learning rate."))

  flags.DEFINE_float(
      name="beta1", default=0.9,
      help=flags_core.help_wrap("beta1 hyperparameter for the Adam optimizer."))

  flags.DEFINE_float(
      name="beta2", default=0.999,
      help=flags_core.help_wrap("beta2 hyperparameter for the Adam optimizer."))

  flags.DEFINE_float(
      name="epsilon", default=1e-8,
      help=flags_core.help_wrap("epsilon hyperparameter for the Adam "
                                "optimizer."))

  flags.DEFINE_float(
      name="hr_threshold", default=None,
      help=flags_core.help_wrap(
          "If passed, training will stop when the evaluation metric HR is "
          "greater than or equal to hr_threshold. For dataset ml-1m, the "
          "desired hr_threshold is 0.68 which is the result from the paper; "
          "For dataset ml-20m, the threshold can be set as 0.95 which is "
          "achieved by MLPerf implementation."))

  flags.DEFINE_enum(
      name="constructor_type", default="bisection",
      enum_values=["bisection", "materialized"], case_sensitive=False,
      help=flags_core.help_wrap(
          "Strategy to use for generating false negatives. materialized has a"
          "precompute that scales badly, but a faster per-epoch construction"
          "time and can be faster on very large systems."))

  flags.DEFINE_bool(
      name="ml_perf", default=False,
      help=flags_core.help_wrap(
          "If set, changes the behavior of the model slightly to match the "
          "MLPerf reference implementations here: \n"
          "https://github.com/mlperf/reference/tree/master/recommendation/"
          "pytorch\n"
          "The two changes are:\n"
          "1. When computing the HR and NDCG during evaluation, remove "
          "duplicate user-item pairs before the computation. This results in "
          "better HRs and NDCGs.\n"
          "2. Use a different soring algorithm when sorting the input data, "
          "which performs better due to the fact the sorting algorithms are "
          "not stable."))

  flags.DEFINE_bool(
      name="output_ml_perf_compliance_logging", default=False,
      help=flags_core.help_wrap(
          "If set, output the MLPerf compliance logging. This is only useful "
          "if one is running the model for MLPerf. See "
          "https://github.com/mlperf/policies/blob/master/training_rules.adoc"
          "#submission-compliance-logs for details. This uses sudo and so may "
          "ask for your password, as root access is needed to clear the system "
          "caches, which is required for MLPerf compliance."
      )
  )

  flags.DEFINE_integer(
      name="seed", default=None, help=flags_core.help_wrap(
          "This value will be used to seed both NumPy and TensorFlow."))

  flags.DEFINE_boolean(
      name="turn_off_distribution_strategy",
      default=False,
      help=flags_core.help_wrap(
          "If set, do not use any distribution strategy."))

  @flags.validator("eval_batch_size", "eval_batch_size must be at least {}"
                   .format(rconst.NUM_EVAL_NEGATIVES + 1))
  def eval_size_check(eval_batch_size):
    return (eval_batch_size is None or
            int(eval_batch_size) > rconst.NUM_EVAL_NEGATIVES)

  flags.DEFINE_bool(
      name="use_xla_for_gpu", default=False, help=flags_core.help_wrap(
          "If True, use XLA for the model function. Only works when using a "
          "GPU. On TPUs, XLA is always used"))

  xla_message = "--use_xla_for_gpu is incompatible with --tpu"
  @flags.multi_flags_validator(["use_xla_for_gpu", "tpu"], message=xla_message)
  def xla_validator(flag_dict):
    return not flag_dict["use_xla_for_gpu"] or not flag_dict["tpu"]

  flags.DEFINE_bool(
      name="clone_model_in_keras_dist_strat",
      default=True,
      help=flags_core.help_wrap(
          'If False, then the experimental code path is used that doesn\'t '
          "clone models for distribution."))
示例#8
0
def define_ncf_flags():
    """Add flags for running ncf_main."""
    # Add common flags
    flags_core.define_base(export_dir=False)
    flags_core.define_performance(num_parallel_calls=False,
                                  inter_op=False,
                                  intra_op=False,
                                  synthetic_data=True,
                                  max_train_steps=False,
                                  dtype=False,
                                  all_reduce_alg=False)
    flags_core.define_device(tpu=True)
    flags_core.define_benchmark()

    flags.adopt_module_key_flags(flags_core)

    flags_core.set_defaults(model_dir="/tmp/ncf/",
                            data_dir="/tmp/movielens-data/",
                            train_epochs=2,
                            batch_size=256,
                            hooks="ProfilerHook",
                            tpu=None)

    # Add ncf-specific flags
    flags.DEFINE_enum(
        name="dataset",
        default="ml-1m",
        enum_values=["ml-1m", "ml-20m"],
        case_sensitive=False,
        help=flags_core.help_wrap("Dataset to be trained and evaluated."))

    flags.DEFINE_boolean(
        name="download_if_missing",
        default=True,
        help=flags_core.help_wrap(
            "Download data to data_dir if it is not already present."))

    flags.DEFINE_string(
        name="eval_batch_size",
        default=None,
        help=flags_core.help_wrap(
            "The batch size used for evaluation. This should generally be larger"
            "than the training batch size as the lack of back propagation during"
            "evaluation can allow for larger batch sizes to fit in memory. If not"
            "specified, the training batch size (--batch_size) will be used."))

    flags.DEFINE_integer(
        name="num_factors",
        default=8,
        help=flags_core.help_wrap("The Embedding size of MF model."))

    # Set the default as a list of strings to be consistent with input arguments
    flags.DEFINE_list(
        name="layers",
        default=["64", "32", "16", "8"],
        help=flags_core.help_wrap(
            "The sizes of hidden layers for MLP. Example "
            "to specify different sizes of MLP layers: --layers=32,16,8,4"))

    flags.DEFINE_float(
        name="mf_regularization",
        default=0.,
        help=flags_core.help_wrap(
            "The regularization factor for MF embeddings. The factor is used by "
            "regularizer which allows to apply penalties on layer parameters or "
            "layer activity during optimization."))

    flags.DEFINE_list(
        name="mlp_regularization",
        default=["0.", "0.", "0.", "0."],
        help=flags_core.help_wrap(
            "The regularization factor for each MLP layer. See mf_regularization "
            "help for more info about regularization factor."))

    flags.DEFINE_integer(
        name="num_neg",
        default=4,
        help=flags_core.help_wrap(
            "The Number of negative instances to pair with a positive instance."
        ))

    flags.DEFINE_float(name="learning_rate",
                       default=0.001,
                       help=flags_core.help_wrap("The learning rate."))

    flags.DEFINE_float(name="beta1",
                       default=0.9,
                       help=flags_core.help_wrap(
                           "beta1 hyperparameter for the Adam optimizer."))

    flags.DEFINE_float(name="beta2",
                       default=0.999,
                       help=flags_core.help_wrap(
                           "beta2 hyperparameter for the Adam optimizer."))

    flags.DEFINE_float(name="epsilon",
                       default=1e-8,
                       help=flags_core.help_wrap(
                           "epsilon hyperparameter for the Adam "
                           "optimizer."))

    flags.DEFINE_float(
        name="hr_threshold",
        default=None,
        help=flags_core.help_wrap(
            "If passed, training will stop when the evaluation metric HR is "
            "greater than or equal to hr_threshold. For dataset ml-1m, the "
            "desired hr_threshold is 0.68 which is the result from the paper; "
            "For dataset ml-20m, the threshold can be set as 0.95 which is "
            "achieved by MLPerf implementation."))

    flags.DEFINE_bool(
        name="ml_perf",
        default=False,
        help=flags_core.help_wrap(
            "If set, changes the behavior of the model slightly to match the "
            "MLPerf reference implementations here: \n"
            "https://github.com/mlperf/reference/tree/master/recommendation/"
            "pytorch\n"
            "The two changes are:\n"
            "1. When computing the HR and NDCG during evaluation, remove "
            "duplicate user-item pairs before the computation. This results in "
            "better HRs and NDCGs.\n"
            "2. Use a different soring algorithm when sorting the input data, "
            "which performs better due to the fact the sorting algorithms are "
            "not stable."))

    flags.DEFINE_integer(
        name="seed",
        default=None,
        help=flags_core.help_wrap(
            "This value will be used to seed both NumPy and TensorFlow."))

    flags.DEFINE_bool(
        name="hash_pipeline",
        default=False,
        help=flags_core.help_wrap(
            "This flag will perform a separate run of the pipeline and hash "
            "batches as they are produced. \nNOTE: this will significantly slow "
            "training. However it is useful to confirm that a random seed is "
            "does indeed make the data pipeline deterministic."))

    @flags.validator("eval_batch_size",
                     "eval_batch_size must be at least {}".format(
                         rconst.NUM_EVAL_NEGATIVES + 1))
    def eval_size_check(eval_batch_size):
        return (eval_batch_size is None
                or int(eval_batch_size) > rconst.NUM_EVAL_NEGATIVES)

    flags.DEFINE_bool(
        name="use_subprocess",
        default=True,
        help=flags_core.help_wrap(
            "By default, ncf_main.py starts async data generation process as a "
            "subprocess. If set to False, ncf_main.py will assume the async data "
            "generation process has already been started by the user."))

    flags.DEFINE_integer(
        name="cache_id",
        default=None,
        help=flags_core.help_wrap(
            "Use a specified cache_id rather than using a timestamp. This is only "
            "needed to synchronize across multiple workers. Generally this flag will "
            "not need to be set."))

    flags.DEFINE_bool(
        name="use_xla_for_gpu",
        default=False,
        help=flags_core.help_wrap(
            "If True, use XLA for the model function. Only works when using a "
            "GPU. On TPUs, XLA is always used"))

    xla_message = "--use_xla_for_gpu is incompatible with --tpu"

    @flags.multi_flags_validator(["use_xla_for_gpu", "tpu"],
                                 message=xla_message)
    def xla_validator(flag_dict):
        return not flag_dict["use_xla_for_gpu"] or not flag_dict["tpu"]
def define_transformer_flags():
    """Add flags and flag validators for running transformer_main."""
    # Add common flags (data_dir, model_dir, train_epochs, etc.).
    flags_core.define_base(multi_gpu=False, num_gpu=False, export_dir=False)
    flags_core.define_performance(num_parallel_calls=True,
                                  inter_op=False,
                                  intra_op=False,
                                  synthetic_data=False,
                                  max_train_steps=False,
                                  dtype=False)
    flags_core.define_benchmark()
    flags_core.define_device(tpu=True)

    # Set flags from the flags_core module as "key flags" so they're listed when
    # the '-h' flag is used. Without this line, the flags defined above are
    # only shown in the full `--helpful` help text.
    flags.adopt_module_key_flags(flags_core)

    # Add transformer-specific flags
    flags.DEFINE_enum(
        name="param_set",
        short_name="mp",
        default="big",
        enum_values=["base", "big", "tiny"],
        help=flags_core.help_wrap(
            "Parameter set to use when creating and training the model. The "
            "parameters define the input shape (batch size and max length), "
            "model configuration (size of embedding, # of hidden layers, etc.), "
            "and various other settings. The big parameter set increases the "
            "default batch size, embedding/hidden size, and filter size. For a "
            "complete list of parameters, please see model/model_params.py."))

    flags.DEFINE_bool(
        name="static_batch",
        default=False,
        help=flags_core.help_wrap(
            "Whether the batches in the dataset should have static shapes. In "
            "general, this setting should be False. Dynamic shapes allow the "
            "inputs to be grouped so that the number of padding tokens is "
            "minimized, and helps model training. In cases where the input shape "
            "must be static (e.g. running on TPU), this setting will be ignored "
            "and static batching will always be used."))

    # Flags for training with steps (may be used for debugging)
    flags.DEFINE_integer(
        name="train_steps",
        short_name="ts",
        default=None,
        help=flags_core.help_wrap("The number of steps used to train."))
    flags.DEFINE_integer(
        name="steps_between_evals",
        short_name="sbe",
        default=1000,
        help=flags_core.help_wrap(
            "The Number of training steps to run between evaluations. This is "
            "used if --train_steps is defined."))

    # BLEU score computation
    flags.DEFINE_string(
        name="bleu_source",
        short_name="bls",
        default=None,
        help=flags_core.help_wrap(
            "Path to source file containing text translate when calculating the "
            "official BLEU score. --bleu_source, --bleu_ref, and --vocab_file "
            "must be set. Use the flag --stop_threshold to stop the script based "
            "on the uncased BLEU score."))
    flags.DEFINE_string(
        name="bleu_ref",
        short_name="blr",
        default=None,
        help=flags_core.help_wrap(
            "Path to source file containing text translate when calculating the "
            "official BLEU score. --bleu_source, --bleu_ref, and --vocab_file "
            "must be set. Use the flag --stop_threshold to stop the script based "
            "on the uncased BLEU score."))
    flags.DEFINE_string(
        name="vocab_file",
        short_name="vf",
        default=VOCAB_FILE,
        help=flags_core.help_wrap(
            "Name of vocabulary file containing subtokens for subtokenizing the "
            "bleu_source file. This file is expected to be in the directory "
            "defined by --data_dir."))

    flags_core.set_defaults(data_dir="/tmp/translate_ende",
                            model_dir="/tmp/transformer_model",
                            batch_size=None,
                            train_epochs=None)

    @flags.multi_flags_validator(
        ["train_epochs", "train_steps"],
        message=
        "Both --train_steps and --train_epochs were set. Only one may be "
        "defined.")
    def _check_train_limits(flag_dict):
        return flag_dict["train_epochs"] is None or flag_dict[
            "train_steps"] is None

    @flags.multi_flags_validator(
        ["data_dir", "bleu_source", "bleu_ref", "vocab_file"],
        message="--bleu_source, --bleu_ref, and/or --vocab_file don't exist. "
        "Please ensure that the file paths are correct.")
    def _check_bleu_files(flags_dict):
        """Validate files when bleu_source and bleu_ref are defined."""
        if flags_dict["bleu_source"] is None or flags_dict["bleu_ref"] is None:
            return True
        # Ensure that bleu_source, bleu_ref, and vocab files exist.
        vocab_file_path = os.path.join(flags_dict["data_dir"],
                                       flags_dict["vocab_file"])
        return all([
            tf.gfile.Exists(flags_dict["bleu_source"]),
            tf.gfile.Exists(flags_dict["bleu_ref"]),
            tf.gfile.Exists(vocab_file_path)
        ])

    flags_core.require_cloud_storage(["data_dir", "model_dir"])
示例#10
0
def define_ncf_flags():
  """Add flags for running ncf_main."""
  # Add common flags
  flags_core.define_base(export_dir=False)
  flags_core.define_performance(
      num_parallel_calls=False,
      inter_op=False,
      intra_op=False,
      synthetic_data=False,
      max_train_steps=False,
      dtype=False,
      all_reduce_alg=False
  )
  flags_core.define_device(tpu=True)
  flags_core.define_benchmark()

  flags.adopt_module_key_flags(flags_core)

  flags_core.set_defaults(
      model_dir="/tmp/ncf/",
      data_dir="/tmp/movielens-data/",
      train_epochs=2,
      batch_size=256,
      hooks="ProfilerHook",
      tpu=None
  )

  # Add ncf-specific flags
  flags.DEFINE_enum(
      name="dataset", default="ml-1m",
      enum_values=["ml-1m", "ml-20m"], case_sensitive=False,
      help=flags_core.help_wrap(
          "Dataset to be trained and evaluated."))

  flags.DEFINE_boolean(
      name="download_if_missing", default=True, help=flags_core.help_wrap(
          "Download data to data_dir if it is not already present."))

  flags.DEFINE_string(
      name="eval_batch_size", default=None, help=flags_core.help_wrap(
          "The batch size used for evaluation. This should generally be larger"
          "than the training batch size as the lack of back propagation during"
          "evaluation can allow for larger batch sizes to fit in memory. If not"
          "specified, the training batch size (--batch_size) will be used."))

  flags.DEFINE_integer(
      name="num_factors", default=8,
      help=flags_core.help_wrap("The Embedding size of MF model."))

  # Set the default as a list of strings to be consistent with input arguments
  flags.DEFINE_list(
      name="layers", default=["64", "32", "16", "8"],
      help=flags_core.help_wrap(
          "The sizes of hidden layers for MLP. Example "
          "to specify different sizes of MLP layers: --layers=32,16,8,4"))

  flags.DEFINE_float(
      name="mf_regularization", default=0.,
      help=flags_core.help_wrap(
          "The regularization factor for MF embeddings. The factor is used by "
          "regularizer which allows to apply penalties on layer parameters or "
          "layer activity during optimization."))

  flags.DEFINE_list(
      name="mlp_regularization", default=["0.", "0.", "0.", "0."],
      help=flags_core.help_wrap(
          "The regularization factor for each MLP layer. See mf_regularization "
          "help for more info about regularization factor."))

  flags.DEFINE_integer(
      name="num_neg", default=4,
      help=flags_core.help_wrap(
          "The Number of negative instances to pair with a positive instance."))

  flags.DEFINE_float(
      name="learning_rate", default=0.001,
      help=flags_core.help_wrap("The learning rate."))

  flags.DEFINE_float(
      name="hr_threshold", default=None,
      help=flags_core.help_wrap(
          "If passed, training will stop when the evaluation metric HR is "
          "greater than or equal to hr_threshold. For dataset ml-1m, the "
          "desired hr_threshold is 0.68 which is the result from the paper; "
          "For dataset ml-20m, the threshold can be set as 0.95 which is "
          "achieved by MLPerf implementation."))

  flags.DEFINE_bool(
      name="ml_perf", default=None,
      help=flags_core.help_wrap(
          "If set, changes the behavior of the model slightly to match the "
          "MLPerf reference implementations here: \n"
          "https://github.com/mlperf/reference/tree/master/recommendation/"
          "pytorch\n"
          "The two changes are:\n"
          "1. When computing the HR and NDCG during evaluation, remove "
          "duplicate user-item pairs before the computation. This results in "
          "better HRs and NDCGs.\n"
          "2. Use a different soring algorithm when sorting the input data, "
          "which performs better due to the fact the sorting algorithms are "
          "not stable."))
示例#11
0
文件: misc.py 项目: gang4gh/dl
def define_transformer_flags():
  """Add flags and flag validators for running transformer_main."""
  # Add common flags (data_dir, model_dir, etc.).
  flags_core.define_base(num_gpu=True, distribution_strategy=True)
  flags_core.define_performance(
      num_parallel_calls=True,
      inter_op=False,
      intra_op=False,
      synthetic_data=True,
      max_train_steps=False,
      dtype=True,
      loss_scale=True,
      all_reduce_alg=True,
      enable_xla=True,
      force_v2_in_keras_compile=True,
      fp16_implementation=True
  )

  # Additional performance flags
  # TODO(b/76028325): Remove when generic layout optimizer is ready.
  flags.DEFINE_boolean(
      name='enable_grappler_layout_optimizer',
      default=True,
      help='Enable Grappler layout optimizer. Currently Grappler can '
           'de-optimize fp16 graphs by forcing NCHW layout for all '
           'convolutions and batch normalizations, and this flag allows to '
           'disable it.'
  )

  flags_core.define_benchmark()
  flags_core.define_device(tpu=True)

  flags.DEFINE_integer(
      name='train_steps', short_name='ts', default=300000,
      help=flags_core.help_wrap('The number of steps used to train.'))
  flags.DEFINE_integer(
      name='steps_between_evals', short_name='sbe', default=1000,
      help=flags_core.help_wrap(
          'The Number of training steps to run between evaluations. This is '
          'used if --train_steps is defined.'))
  flags.DEFINE_boolean(
      name='enable_time_history', default=True,
      help='Whether to enable TimeHistory callback.')
  flags.DEFINE_boolean(
      name='enable_tensorboard', default=False,
      help='Whether to enable Tensorboard callback.')
  flags.DEFINE_integer(
      name='batches_between_tensorboard_log', default=100,
      help=flags_core.help_wrap('The number of steps to write tensorboard log.'))
  flags.DEFINE_boolean(
      name='enable_metrics_in_training', default=False,
      help='Whether to enable metrics during training.')
  flags.DEFINE_string(
      name='profile_steps', default=None,
      help='Save profiling data to model dir at given range of steps. The '
      'value must be a comma separated pair of positive integers, specifying '
      'the first and last step to profile. For example, "--profile_steps=2,4" '
      'triggers the profiler to process 3 steps, starting from the 2nd step. '
      'Note that profiler has a non-trivial performance overhead, and the '
      'output file can be gigantic if profiling many steps.')
  # Set flags from the flags_core module as 'key flags' so they're listed when
  # the '-h' flag is used. Without this line, the flags defined above are
  # only shown in the full `--helpful` help text.
  flags.adopt_module_key_flags(flags_core)

  # Add transformer-specific flags
  flags.DEFINE_enum(
      name='param_set', short_name='mp', default='big',
      enum_values=PARAMS_MAP.keys(),
      help=flags_core.help_wrap(
          'Parameter set to use when creating and training the model. The '
          'parameters define the input shape (batch size and max length), '
          'model configuration (size of embedding, # of hidden layers, etc.), '
          'and various other settings. The big parameter set increases the '
          'default batch size, embedding/hidden size, and filter size. For a '
          'complete list of parameters, please see model/model_params.py.'))

  flags.DEFINE_bool(
      name='static_batch', short_name='sb', default=False,
      help=flags_core.help_wrap(
          'Whether the batches in the dataset should have static shapes. In '
          'general, this setting should be False. Dynamic shapes allow the '
          'inputs to be grouped so that the number of padding tokens is '
          'minimized, and helps model training. In cases where the input shape '
          'must be static (e.g. running on TPU), this setting will be ignored '
          'and static batching will always be used.'))
  flags.DEFINE_integer(
      name='max_input_length', short_name='mil', default=1024,
      help=flags_core.help_wrap('Max input sequence length (token count) for Transformer'))
  flags.DEFINE_integer(
      name='max_target_length', short_name='mtl', default=48,
      help=flags_core.help_wrap('Max target sequence length (token count) for Transformer'))

  # Flags for training with steps (may be used for debugging)
  flags.DEFINE_integer(
      name='validation_example_count', short_name='vec', default=1024,
      help=flags_core.help_wrap('The number of examples used in validation.'))

  # BLEU score computation
  flags.DEFINE_string(
      name='bleu_source', short_name='bls', default=None,
      help=flags_core.help_wrap(
          'Path to source file containing text translate when calculating the '
          'official BLEU score. Both --bleu_source and --bleu_ref must be set. '
          ))
  flags.DEFINE_string(
      name='bleu_ref', short_name='blr', default=None,
      help=flags_core.help_wrap(
          'Path to source file containing text translate when calculating the '
          'official BLEU score. Both --bleu_source and --bleu_ref must be set. '
          ))
  flags.DEFINE_string(
      name='vocab_file', short_name='vf', default=None,
      help=flags_core.help_wrap(
          'Path to subtoken vocabulary file. If data_download.py was used to '
          'download and encode the training data, look in the data_dir to find '
          'the vocab file.'))
  flags.DEFINE_string(
      name='mode', default='train',
      help=flags_core.help_wrap('mode: train, eval, or predict'))
  flags.DEFINE_bool(
      name='use_ctl',
      default=False,
      help=flags_core.help_wrap(
          'Whether the model runs with custom training loop.'))
  flags.DEFINE_bool(
      name='use_tpu_2vm_config',
      default=False,
      help=flags_core.help_wrap(
          'Whether the model runs in 2VM mode, Headless server and unit test '
          'all use 1VM config.'))
  flags.DEFINE_integer(
      name='decode_batch_size',
      default=32,
      help=flags_core.help_wrap(
          'Global batch size used for Transformer autoregressive decoding on '
          'TPU.'))
  flags.DEFINE_integer(
      name='decode_max_length',
      default=97,
      help=flags_core.help_wrap(
          'Max sequence length of the decode/eval data. This is used by '
          'Transformer autoregressive decoding on TPU to have minimum '
          'paddings.'))
  flags.DEFINE_bool(
      name='padded_decode',
      default=False,
      help=flags_core.help_wrap(
          'Whether the autoregressive decoding runs with input data padded to '
          'the decode_max_length. For TPU/XLA-GPU runs, this flag has to be '
          'set due the static shape requirement. Although CPU/GPU could also '
          'use padded_decode, it has not been tested. In addition, this method '
          'will introduce unnecessary overheads which grow quadratically with '
          'the max sequence length.'))

  flags.DEFINE_string(
      name='loss_fn', default='smoothed_corss_entropy',
      help=flags_core.help_wrap('loss_fn: corss_entropy, smoothed_corss_entropy'))

  flags.DEFINE_string(
      name='input_concat_schema', default='v2',
      help=flags_core.help_wrap(
          'input_concat_schema: [v0, v1, v2, v3]. v0: html only; '
          'v1: concatenated (url, hostname, html); '
          'v2: concatenated and padded (url, hostname, html); '
          'v3: padded (url, hostname, html)'))

  flags.DEFINE_bool(
      name='compact_predict_result', default=False,
      help=flags_core.help_wrap('Whether dump predict result as a TSV'))

  flags.DEFINE_integer(
      name='max_predict_count',
      default=None,
      help=flags_core.help_wrap('max example count to predict'))

  flags.DEFINE_string(
      name='prediction_details_file', default=None,
      help=flags_core.help_wrap(
          'output prediction details to the specified file. '
          'disabled when None; output to the model folder when #model_dir.'))

  flags.DEFINE_string(
      name='prediction_reference_file', default=None,
      help=flags_core.help_wrap('reference file for prediction details'))

  flags.DEFINE_string(
      name='prediction_compact_file', default='#model_dir',
      help=flags_core.help_wrap(
          'output prediction compact result to the specified file, '
          'disabled when None; output to the model folder when #model_dir.'))

  flags.DEFINE_bool(
      name='calc_rouge_scores', default=True,
      help=flags_core.help_wrap('Whether to calculate ROUGE scores or not'))

  flags.DEFINE_bool(
      name='use_reformer', default=False,
      help=flags_core.help_wrap('use Reformer model instead of Transformer'))

  flags.DEFINE_bool(
      name='use_full_attention_in_reformer', default=False,
      help=flags_core.help_wrap('use full attention in reformer, instead of LSH attention, for eval purpose'))

  flags.DEFINE_integer(
      name='num_hashes',
      default=4,
      help=flags_core.help_wrap('number of hashes used in LSH attention for training'))

  flags.DEFINE_integer(
      name='test_num_hashes',
      default=None,
      help=flags_core.help_wrap('number of hashes used in LSH attention for test'))

  flags.DEFINE_integer(
      name='bucket_size',
      default=64,
      help=flags_core.help_wrap('bucket size for LSH attention'))

  flags.DEFINE_string(
      name='val_data_dir', default=None,
      help=flags_core.help_wrap('validation data file used in training. If None, then try to find matching test file based on data_dir'))

  flags.DEFINE_float(
      name='one_dropout', default=None,
      help=flags_core.help_wrap('one dropout rate for all layers'))

  flags.DEFINE_float(
      name='attention_dropout', default=None,
      help=flags_core.help_wrap('dropout rate for attention layers'))

  flags.DEFINE_float(
      name='lsh_attention_dropout', default=0.0,
      help=flags_core.help_wrap('dropout rate for lsh_attention layers'))

  flags.DEFINE_bool(
      name='dev_mode', default=False,
      help=flags_core.help_wrap('if dev_mode is True, output more details'))

  flags.DEFINE_string(
      name='training_schema', default=None,
      help=flags_core.help_wrap('format: input1:limit1,input2:limit2...=>target'))

  flags.DEFINE_string(
      name='dtitle_data_schema', default='Url,DocumentUrl,Language,LanguageAnchor,DocumentType,AHtmlTitle,AMetaDesc,AOGTitle,AOGDesc,InjHdr_CDG_H,InjHdr_CDG_E,Wiki_Name,ODPTitle,CaptionAnchorText,TargetTitle',
      help=flags_core.help_wrap('format: field1,field2,field3...'))

  flags.DEFINE_bool(
      name='must_fuzzy_match', default=False,
      help=flags_core.help_wrap('predict result have to fuzzy match in input, otherwise output is abandoned.t'))

  flags.DEFINE_bool(
      name='restore_case_info', default=False,
      help=flags_core.help_wrap('predict result have to fuzzy match in input, otherwise output is abandoned.t'))

  flags.DEFINE_bool(
      name='dedup_predict_input', default=False,
      help=flags_core.help_wrap('remove duplicated inputs in predict'))

  flags_core.set_defaults(data_dir='/tmp/translate_ende',
                          model_dir='/tmp/transformer_model',
                          batch_size=16)
示例#12
0
def define_ncf_flags():
    """Add flags for running ncf_main."""
    # Add common flags
    flags_core.define_base(export_dir=False)
    flags_core.define_performance(num_parallel_calls=False,
                                  inter_op=True,
                                  intra_op=True,
                                  synthetic_data=False,
                                  max_train_steps=False,
                                  dtype=False,
                                  all_reduce_alg=False)
    flags_core.define_device(tpu=True)
    flags_core.define_benchmark()

    flags.adopt_module_key_flags(flags_core)

    flags_core.set_defaults(model_dir="/tmp/ncf/",
                            data_dir="/tmp/movielens-data/",
                            train_epochs=2,
                            batch_size=256,
                            hooks=None,
                            tpu=None)

    # Add ncf-specific flags
    flags.DEFINE_enum(
        name="dataset",
        default="ml-1m",
        enum_values=["ml-1m", "ml-20m"],
        case_sensitive=False,
        help=flags_core.help_wrap("Dataset to be trained and evaluated."))

    flags.DEFINE_boolean(
        name="download_if_missing",
        default=True,
        help=flags_core.help_wrap(
            "Download data to data_dir if it is not already present."))

    flags.DEFINE_string(
        name="eval_batch_size",
        default=None,
        help=flags_core.help_wrap(
            "The batch size used for evaluation. This should generally be larger"
            "than the training batch size as the lack of back propagation during"
            "evaluation can allow for larger batch sizes to fit in memory. If not"
            "specified, the training batch size (--batch_size) will be used."))

    flags.DEFINE_integer(
        name="num_factors",
        default=8,
        help=flags_core.help_wrap("The Embedding size of MF model."))

    # Set the default as a list of strings to be consistent with input arguments
    flags.DEFINE_list(
        name="layers",
        default=["64", "32", "16", "8"],
        help=flags_core.help_wrap(
            "The sizes of hidden layers for MLP. Example "
            "to specify different sizes of MLP layers: --layers=32,16,8,4"))

    flags.DEFINE_float(
        name="mf_regularization",
        default=0.,
        help=flags_core.help_wrap(
            "The regularization factor for MF embeddings. The factor is used by "
            "regularizer which allows to apply penalties on layer parameters or "
            "layer activity during optimization."))

    flags.DEFINE_list(
        name="mlp_regularization",
        default=["0.", "0.", "0.", "0."],
        help=flags_core.help_wrap(
            "The regularization factor for each MLP layer. See mf_regularization "
            "help for more info about regularization factor."))

    flags.DEFINE_integer(
        name="num_neg",
        default=4,
        help=flags_core.help_wrap(
            "The Number of negative instances to pair with a positive instance."
        ))

    flags.DEFINE_float(name="learning_rate",
                       default=0.001,
                       help=flags_core.help_wrap("The learning rate."))

    flags.DEFINE_float(
        name="hr_threshold",
        default=None,
        help=flags_core.help_wrap(
            "If passed, training will stop when the evaluation metric HR is "
            "greater than or equal to hr_threshold. For dataset ml-1m, the "
            "desired hr_threshold is 0.68 which is the result from the paper; "
            "For dataset ml-20m, the threshold can be set as 0.95 which is "
            "achieved by MLPerf implementation."))

    flags.DEFINE_bool(
        name="ml_perf",
        default=None,
        help=flags_core.help_wrap(
            "If set, changes the behavior of the model slightly to match the "
            "MLPerf reference implementations here: \n"
            "https://github.com/mlperf/reference/tree/master/recommendation/"
            "pytorch\n"
            "The two changes are:\n"
            "1. When computing the HR and NDCG during evaluation, remove "
            "duplicate user-item pairs before the computation. This results in "
            "better HRs and NDCGs.\n"
            "2. Use a different soring algorithm when sorting the input data, "
            "which performs better due to the fact the sorting algorithms are "
            "not stable."))

    flags.DEFINE_bool(
        name="inference_only",
        default=False,
        help=flags_core.help_wrap("If set, runs only the forward pass."))

    flags.DEFINE_bool(
        name="accuracy_only",
        default=False,
        help=flags_core.help_wrap(
            "If set, only accuracy (i.e. no performance benchmarking) "
            "metrics are computed."))

    flags.DEFINE_bool(
        name="benchmark_only",
        default=True,
        help=flags_core.help_wrap(
            "If set, only performance benchmarking (i.e. no accuracy) "
            "metrics are computed."))

    flags.DEFINE_bool(
        name="export_savedmodel",
        default=False,
        help=flags_core.help_wrap(
            "If set, the model is exported in serving-compatible format to "
            "the model_dir."))
def define_transformer_flags():
  """Add flags and flag validators for running transformer_main."""
  # Add common flags (data_dir, model_dir, train_epochs, etc.).
  flags_core.define_base()
  flags_core.define_performance(
      num_parallel_calls=True,
      inter_op=True,
      intra_op=True,
      synthetic_data=True,
      max_train_steps=False,
      dtype=False,
      all_reduce_alg=False
  )
  flags_core.define_benchmark()
  flags_core.define_device(tpu=True)

  # Set flags from the flags_core module as "key flags" so they're listed when
  # the '-h' flag is used. Without this line, the flags defined above are
  # only shown in the full `--helpful` help text.
  flags.adopt_module_key_flags(flags_core)

  # Add transformer-specific flags
  flags.DEFINE_enum(
      name="param_set", short_name="mp", default="big",
      enum_values=PARAMS_MAP.keys(),
      help=flags_core.help_wrap(
          "Parameter set to use when creating and training the model. The "
          "parameters define the input shape (batch size and max length), "
          "model configuration (size of embedding, # of hidden layers, etc.), "
          "and various other settings. The big parameter set increases the "
          "default batch size, embedding/hidden size, and filter size. For a "
          "complete list of parameters, please see model/model_params.py."))

  flags.DEFINE_bool(
      name="static_batch", default=False,
      help=flags_core.help_wrap(
          "Whether the batches in the dataset should have static shapes. In "
          "general, this setting should be False. Dynamic shapes allow the "
          "inputs to be grouped so that the number of padding tokens is "
          "minimized, and helps model training. In cases where the input shape "
          "must be static (e.g. running on TPU), this setting will be ignored "
          "and static batching will always be used."))

  # Flags for training with steps (may be used for debugging)
  flags.DEFINE_integer(
      name="train_steps", short_name="ts", default=None,
      help=flags_core.help_wrap("The number of steps used to train."))
  flags.DEFINE_integer(
      name="steps_between_evals", short_name="sbe", default=1000,
      help=flags_core.help_wrap(
          "The Number of training steps to run between evaluations. This is "
          "used if --train_steps is defined."))

  # add intra_op and inter_op flags as arguments

  flags.DEFINE_integer(
     name="intra_op", default=None,
     help=flags_core.help_wrap("The number of intra_op_parallelism threads"))
  flags.DEFINE_integer(
     name="inter_op", default=None,
     help=flags_core.help_wrap("The number of inter_op_parallelism threads"))

  # added flags to override the learning rate, decay, warmup, max_length,   info from params file

  flags.DEFINE_float(
     name="learning_rate",default=2.0,
     help=flags_core.help_wrap("Learning rate"))
  
  # learning_rate_decay_rate is not used anywhere. Added just to be in sync with the params file.
  flags.DEFINE_float(
     name="learning_rate_decay_rate",default=1.0,
     help=flags_core.help_wrap("Learning rate decay rate"))

  flags.DEFINE_integer(
     name="learning_rate_warmup_steps",default=16000,
     help=flags_core.help_wrap("Learning rate warmup steps"))

  flags.DEFINE_integer(
     name="max_length",default=256,
     help=flags_core.help_wrap("Maximum number of tokens per example"))

  flags.DEFINE_integer(
     name="vocab_size",default=33708,
     help=flags_core.help_wrap("Number of tokens defined in the vocabulary file"))

  flags.DEFINE_integer(
     name="save_checkpoints_secs",default=3600,
     help=flags_core.help_wrap("Save checkpoints every mentioned seconds"))
  flags.DEFINE_integer(
     name="log_step_count_steps",default=100,
     help=flags_core.help_wrap("Frequency in steps at which loss and global step/sec is logged"))

  # added for learning rate decay scheme

  flags.DEFINE_integer(
     name="lr_scheme",default=1,
     help=flags_core.help_wrap("Type of learning rate decay scheme."
           "Can be 0,1 or 2."
           " 0 - constant learning rate"
           " 1 - does noam"
           " 2 - does linear lr growth and inverse sqrt decay"))


  flags.DEFINE_float(
     name="warmup_init_lr",default=1e-07,
     help=flags_core.help_wrap("Initial learning rate for the warm up phase"))


  flags.DEFINE_float(
     name="layer_postprocess_dropout",default=0.1,
     help=flags_core.help_wrap("Dropout value"))

  # added for optimizers and it's parameters

  flags.DEFINE_string(
      name="opt_alg", short_name="opt", default="lazyadam",
      help=flags_core.help_wrap("Optimizer algorithm to be used"))
      
  flags.DEFINE_float(
      name="optimizer_sgd_momentum", short_name="sgdm", default=None,
      help=flags_core.help_wrap("Value for SGD's momentum param"))
      
  flags.DEFINE_float(
      name="optimizer_rms_decay", short_name="rmsd", default=0.9,
      help=flags_core.help_wrap("RMSProp Decay value"))
      
  flags.DEFINE_float(
      name="optimizer_rms_momentum", short_name="rmsm", default=0.0,
      help=flags_core.help_wrap("RMSProp momentum value"))
      
  flags.DEFINE_float(
      name="optimizer_rms_epsilon", short_name="rmse", default=1e-10,
      help=flags_core.help_wrap("RMSProp epsilon value"))


  # BLEU score computation
  flags.DEFINE_string(
      name="bleu_source", short_name="bls", default=None,
      help=flags_core.help_wrap(
          "Path to source file containing text translate when calculating the "
          "official BLEU score. Both --bleu_source and --bleu_ref must be set. "
          "Use the flag --stop_threshold to stop the script based on the "
          "uncased BLEU score."))
  flags.DEFINE_string(
      name="bleu_ref", short_name="blr", default=None,
      help=flags_core.help_wrap(
          "Path to source file containing text translate when calculating the "
          "official BLEU score. Both --bleu_source and --bleu_ref must be set. "
          "Use the flag --stop_threshold to stop the script based on the "
          "uncased BLEU score."))
  flags.DEFINE_string(
      name="vocab_file", short_name="vf", default=None,
      help=flags_core.help_wrap(
          "Path to subtoken vocabulary file. If data_download.py was used to "
          "download and encode the training data, look in the data_dir to find "
          "the vocab file."))

  flags_core.set_defaults(data_dir="/tmp/translate_ende",
                          model_dir="/tmp/transformer_model",
                          batch_size=None,
                          train_epochs=None)

  @flags.multi_flags_validator(
      ["train_epochs", "train_steps"],
      message="Both --train_steps and --train_epochs were set. Only one may be "
              "defined.")
  def _check_train_limits(flag_dict):
    return flag_dict["train_epochs"] is None or flag_dict["train_steps"] is None

  @flags.multi_flags_validator(
      ["bleu_source", "bleu_ref"],
      message="Both or neither --bleu_source and --bleu_ref must be defined.")
  def _check_bleu_files(flags_dict):
    return (flags_dict["bleu_source"] is None) == (
        flags_dict["bleu_ref"] is None)

  @flags.multi_flags_validator(
      ["bleu_source", "bleu_ref", "vocab_file"],
      message="--vocab_file must be defined if --bleu_source and --bleu_ref "
              "are defined.")
  def _check_bleu_vocab_file(flags_dict):
    if flags_dict["bleu_source"] and flags_dict["bleu_ref"]:
      return flags_dict["vocab_file"] is not None
    return True

  @flags.multi_flags_validator(
      ["export_dir", "vocab_file"],
      message="--vocab_file must be defined if --export_dir is set.")
  def _check_export_vocab_file(flags_dict):
    if flags_dict["export_dir"]:
      return flags_dict["vocab_file"] is not None
    return True

  flags_core.require_cloud_storage(["data_dir", "model_dir", "export_dir"])
示例#14
0
def define_ncf_flags():
  """Add flags for running ncf_main."""
  # Add common flags
  flags_core.define_base(export_dir=False)
  flags_core.define_performance(
      num_parallel_calls=False,
      inter_op=False,
      intra_op=False,
      synthetic_data=True,
      max_train_steps=False,
      dtype=False,
      all_reduce_alg=False
  )
  flags_core.define_device(tpu=True)
  flags_core.define_benchmark()

  flags.adopt_module_key_flags(flags_core)

  flags_core.set_defaults(
      model_dir="/tmp/ncf/",
      data_dir="/tmp/movielens-data/",
      train_epochs=2,
      batch_size=256,
      hooks="ProfilerHook",
      tpu=None
  )

  # Add ncf-specific flags
  flags.DEFINE_enum(
      name="dataset", default="ml-1m",
      enum_values=["ml-1m", "ml-20m"], case_sensitive=False,
      help=flags_core.help_wrap(
          "Dataset to be trained and evaluated."))

  flags.DEFINE_boolean(
      name="download_if_missing", default=True, help=flags_core.help_wrap(
          "Download data to data_dir if it is not already present."))

  flags.DEFINE_integer(
      name="eval_batch_size", default=None, help=flags_core.help_wrap(
          "The batch size used for evaluation. This should generally be larger"
          "than the training batch size as the lack of back propagation during"
          "evaluation can allow for larger batch sizes to fit in memory. If not"
          "specified, the training batch size (--batch_size) will be used."))

  flags.DEFINE_integer(
      name="num_factors", default=8,
      help=flags_core.help_wrap("The Embedding size of MF model."))

  # Set the default as a list of strings to be consistent with input arguments
  flags.DEFINE_list(
      name="layers", default=["64", "32", "16", "8"],
      help=flags_core.help_wrap(
          "The sizes of hidden layers for MLP. Example "
          "to specify different sizes of MLP layers: --layers=32,16,8,4"))

  flags.DEFINE_float(
      name="mf_regularization", default=0.,
      help=flags_core.help_wrap(
          "The regularization factor for MF embeddings. The factor is used by "
          "regularizer which allows to apply penalties on layer parameters or "
          "layer activity during optimization."))

  flags.DEFINE_list(
      name="mlp_regularization", default=["0.", "0.", "0.", "0."],
      help=flags_core.help_wrap(
          "The regularization factor for each MLP layer. See mf_regularization "
          "help for more info about regularization factor."))

  flags.DEFINE_integer(
      name="num_neg", default=4,
      help=flags_core.help_wrap(
          "The Number of negative instances to pair with a positive instance."))

  flags.DEFINE_float(
      name="learning_rate", default=0.001,
      help=flags_core.help_wrap("The learning rate."))

  flags.DEFINE_float(
      name="beta1", default=0.9,
      help=flags_core.help_wrap("beta1 hyperparameter for the Adam optimizer."))

  flags.DEFINE_float(
      name="beta2", default=0.999,
      help=flags_core.help_wrap("beta2 hyperparameter for the Adam optimizer."))

  flags.DEFINE_float(
      name="epsilon", default=1e-8,
      help=flags_core.help_wrap("epsilon hyperparameter for the Adam "
                                "optimizer."))

  flags.DEFINE_float(
      name="hr_threshold", default=1.0,
      help=flags_core.help_wrap(
          "If passed, training will stop when the evaluation metric HR is "
          "greater than or equal to hr_threshold. For dataset ml-1m, the "
          "desired hr_threshold is 0.68 which is the result from the paper; "
          "For dataset ml-20m, the threshold can be set as 0.95 which is "
          "achieved by MLPerf implementation."))

  flags.DEFINE_enum(
      name="constructor_type", default="bisection",
      enum_values=["bisection", "materialized"], case_sensitive=False,
      help=flags_core.help_wrap(
          "Strategy to use for generating false negatives. materialized has a"
          "precompute that scales badly, but a faster per-epoch construction"
          "time and can be faster on very large systems."))

  flags.DEFINE_bool(
      name="ml_perf", default=False,
      help=flags_core.help_wrap(
          "If set, changes the behavior of the model slightly to match the "
          "MLPerf reference implementations here: \n"
          "https://github.com/mlperf/reference/tree/master/recommendation/"
          "pytorch\n"
          "The two changes are:\n"
          "1. When computing the HR and NDCG during evaluation, remove "
          "duplicate user-item pairs before the computation. This results in "
          "better HRs and NDCGs.\n"
          "2. Use a different soring algorithm when sorting the input data, "
          "which performs better due to the fact the sorting algorithms are "
          "not stable."))

  flags.DEFINE_bool(
      name="output_ml_perf_compliance_logging", default=False,
      help=flags_core.help_wrap(
          "If set, output the MLPerf compliance logging. This is only useful "
          "if one is running the model for MLPerf. See "
          "https://github.com/mlperf/policies/blob/master/training_rules.adoc"
          "#submission-compliance-logs for details. This uses sudo and so may "
          "ask for your password, as root access is needed to clear the system "
          "caches, which is required for MLPerf compliance."
      )
  )

  flags.DEFINE_integer(
      name="seed", default=None, help=flags_core.help_wrap(
          "This value will be used to seed both NumPy and TensorFlow."))

  flags.DEFINE_boolean(
      name="turn_off_distribution_strategy",
      default=False,
      help=flags_core.help_wrap(
          "If set, do not use any distribution strategy."))

  @flags.validator("eval_batch_size", "eval_batch_size must be at least {}"
                   .format(rconst.NUM_EVAL_NEGATIVES + 1))
  def eval_size_check(eval_batch_size):
    return (eval_batch_size is None or
            int(eval_batch_size) > rconst.NUM_EVAL_NEGATIVES)

  flags.DEFINE_bool(
      name="use_xla_for_gpu", default=False, help=flags_core.help_wrap(
          "If True, use XLA for the model function. Only works when using a "
          "GPU. On TPUs, XLA is always used"))

  xla_message = "--use_xla_for_gpu is incompatible with --tpu"
  @flags.multi_flags_validator(["use_xla_for_gpu", "tpu"], message=xla_message)
  def xla_validator(flag_dict):
    return not flag_dict["use_xla_for_gpu"] or not flag_dict["tpu"]

  flags.DEFINE_bool(
      name="clone_model_in_keras_dist_strat",
      default=True,
      help=flags_core.help_wrap(
          'If False, then the experimental code path is used that doesn\'t '
          "clone models for distribution."))

  flags.DEFINE_bool(
      name="early_stopping",
      default=False,
      help=flags_core.help_wrap(
          'If True, we stop the training when it reaches hr_threshold'))
示例#15
0
def define_transformer_flags():
  """Add flags and flag validators for running transformer_main."""
  # Add common flags (data_dir, model_dir, train_epochs, etc.).
  flags_core.define_base()
  flags_core.define_performance(
      num_parallel_calls=True,
      inter_op=False,
      intra_op=False,
      synthetic_data=True,
      max_train_steps=False,
      dtype=False,
      all_reduce_alg=True
  )
  flags_core.define_benchmark()
  flags_core.define_device(tpu=True)

  # Set flags from the flags_core module as "key flags" so they're listed when
  # the '-h' flag is used. Without this line, the flags defined above are
  # only shown in the full `--helpful` help text.
  flags.adopt_module_key_flags(flags_core)

  # Add transformer-specific flags
  flags.DEFINE_enum(
      name="param_set", short_name="mp", default="big",
      enum_values=PARAMS_MAP.keys(),
      help=flags_core.help_wrap(
          "Parameter set to use when creating and training the model. The "
          "parameters define the input shape (batch size and max length), "
          "model configuration (size of embedding, # of hidden layers, etc.), "
          "and various other settings. The big parameter set increases the "
          "default batch size, embedding/hidden size, and filter size. For a "
          "complete list of parameters, please see model/model_params.py."))

  flags.DEFINE_bool(
      name="static_batch", default=False,
      help=flags_core.help_wrap(
          "Whether the batches in the dataset should have static shapes. In "
          "general, this setting should be False. Dynamic shapes allow the "
          "inputs to be grouped so that the number of padding tokens is "
          "minimized, and helps model training. In cases where the input shape "
          "must be static (e.g. running on TPU), this setting will be ignored "
          "and static batching will always be used."))

  # Flags for training with steps (may be used for debugging)
  flags.DEFINE_integer(
      name="train_steps", short_name="ts", default=None,
      help=flags_core.help_wrap("The number of steps used to train."))
  flags.DEFINE_integer(
      name="steps_between_evals", short_name="sbe", default=1000,
      help=flags_core.help_wrap(
          "The Number of training steps to run between evaluations. This is "
          "used if --train_steps is defined."))

  # BLEU score computation
  flags.DEFINE_string(
      name="bleu_source", short_name="bls", default=None,
      help=flags_core.help_wrap(
          "Path to source file containing text translate when calculating the "
          "official BLEU score. Both --bleu_source and --bleu_ref must be set. "
          "Use the flag --stop_threshold to stop the script based on the "
          "uncased BLEU score."))
  flags.DEFINE_string(
      name="bleu_ref", short_name="blr", default=None,
      help=flags_core.help_wrap(
          "Path to source file containing text translate when calculating the "
          "official BLEU score. Both --bleu_source and --bleu_ref must be set. "
          "Use the flag --stop_threshold to stop the script based on the "
          "uncased BLEU score."))
  flags.DEFINE_string(
      name="vocab_file", short_name="vf", default=None,
      help=flags_core.help_wrap(
          "Path to subtoken vocabulary file. If data_download.py was used to "
          "download and encode the training data, look in the data_dir to find "
          "the vocab file."))

  flags_core.set_defaults(data_dir="/tmp/translate_ende",
                          model_dir="/tmp/transformer_model",
                          batch_size=None,
                          train_epochs=None)

  @flags.multi_flags_validator(
      ["train_epochs", "train_steps"],
      message="Both --train_steps and --train_epochs were set. Only one may be "
              "defined.")
  def _check_train_limits(flag_dict):
    return flag_dict["train_epochs"] is None or flag_dict["train_steps"] is None

  @flags.multi_flags_validator(
      ["bleu_source", "bleu_ref"],
      message="Both or neither --bleu_source and --bleu_ref must be defined.")
  def _check_bleu_files(flags_dict):
    return (flags_dict["bleu_source"] is None) == (
        flags_dict["bleu_ref"] is None)

  @flags.multi_flags_validator(
      ["bleu_source", "bleu_ref", "vocab_file"],
      message="--vocab_file must be defined if --bleu_source and --bleu_ref "
              "are defined.")
  def _check_bleu_vocab_file(flags_dict):
    if flags_dict["bleu_source"] and flags_dict["bleu_ref"]:
      return flags_dict["vocab_file"] is not None
    return True

  @flags.multi_flags_validator(
      ["export_dir", "vocab_file"],
      message="--vocab_file must be defined if --export_dir is set.")
  def _check_export_vocab_file(flags_dict):
    if flags_dict["export_dir"]:
      return flags_dict["vocab_file"] is not None
    return True

  flags_core.require_cloud_storage(["data_dir", "model_dir", "export_dir"])
示例#16
0
def define_transformer_flags():
    """Add flags and flag validators for running transformer_main."""
    # Add common flags (data_dir, model_dir, train_epochs, etc.).
    flags_core.define_base()
    flags_core.define_performance(num_parallel_calls=True,
                                  inter_op=False,
                                  intra_op=False,
                                  synthetic_data=True,
                                  max_train_steps=False,
                                  dtype=True,
                                  loss_scale=True,
                                  all_reduce_alg=True,
                                  enable_xla=True)

    # Additional performance flags
    # TODO(b/76028325): Remove when generic layout optimizer is ready.
    flags.DEFINE_boolean(
        name='enable_grappler_layout_optimizer',
        default=True,
        help='Enable Grappler layout optimizer. Currently Grappler can '
        'de-optimize fp16 graphs by forcing NCHW layout for all '
        'convolutions and batch normalizations, and this flag allows to '
        'disable it.')

    flags_core.define_benchmark()
    flags_core.define_device(tpu=True)

    flags.DEFINE_integer(
        name='train_steps',
        short_name='ts',
        default=300000,
        help=flags_core.help_wrap('The number of steps used to train.'))
    flags.DEFINE_integer(
        name='steps_between_evals',
        short_name='sbe',
        default=1000,
        help=flags_core.help_wrap(
            'The Number of training steps to run between evaluations. This is '
            'used if --train_steps is defined.'))
    flags.DEFINE_boolean(name='enable_time_history',
                         default=True,
                         help='Whether to enable TimeHistory callback.')
    flags.DEFINE_boolean(name='enable_tensorboard',
                         default=False,
                         help='Whether to enable Tensorboard callback.')
    flags.DEFINE_boolean(name='enable_metrics_in_training',
                         default=False,
                         help='Whether to enable metrics during training.')
    flags.DEFINE_string(
        name='profile_steps',
        default=None,
        help='Save profiling data to model dir at given range of steps. The '
        'value must be a comma separated pair of positive integers, specifying '
        'the first and last step to profile. For example, "--profile_steps=2,4" '
        'triggers the profiler to process 3 steps, starting from the 2nd step. '
        'Note that profiler has a non-trivial performance overhead, and the '
        'output file can be gigantic if profiling many steps.')
    # Set flags from the flags_core module as 'key flags' so they're listed when
    # the '-h' flag is used. Without this line, the flags defined above are
    # only shown in the full `--helpful` help text.
    flags.adopt_module_key_flags(flags_core)

    # Add transformer-specific flags
    flags.DEFINE_enum(
        name='param_set',
        short_name='mp',
        default='big',
        enum_values=PARAMS_MAP.keys(),
        help=flags_core.help_wrap(
            'Parameter set to use when creating and training the model. The '
            'parameters define the input shape (batch size and max length), '
            'model configuration (size of embedding, # of hidden layers, etc.), '
            'and various other settings. The big parameter set increases the '
            'default batch size, embedding/hidden size, and filter size. For a '
            'complete list of parameters, please see model/model_params.py.'))

    flags.DEFINE_bool(
        name='static_batch',
        short_name='sb',
        default=False,
        help=flags_core.help_wrap(
            'Whether the batches in the dataset should have static shapes. In '
            'general, this setting should be False. Dynamic shapes allow the '
            'inputs to be grouped so that the number of padding tokens is '
            'minimized, and helps model training. In cases where the input shape '
            'must be static (e.g. running on TPU), this setting will be ignored '
            'and static batching will always be used.'))
    flags.DEFINE_integer(
        name='max_length',
        short_name='ml',
        default=256,
        help=flags_core.help_wrap(
            'Max sentence length for Transformer. Default is 256. Note: Usually '
            'it is more effective to use a smaller max length if static_batch is '
            'enabled, e.g. 64.'))

    # Flags for training with steps (may be used for debugging)
    flags.DEFINE_integer(
        name='validation_steps',
        short_name='vs',
        default=64,
        help=flags_core.help_wrap('The number of steps used in validation.'))

    # BLEU score computation
    flags.DEFINE_string(
        name='bleu_source',
        short_name='bls',
        default=None,
        help=flags_core.help_wrap(
            'Path to source file containing text translate when calculating the '
            'official BLEU score. Both --bleu_source and --bleu_ref must be set. '
            'Use the flag --stop_threshold to stop the script based on the '
            'uncased BLEU score.'))
    flags.DEFINE_string(
        name='bleu_ref',
        short_name='blr',
        default=None,
        help=flags_core.help_wrap(
            'Path to source file containing text translate when calculating the '
            'official BLEU score. Both --bleu_source and --bleu_ref must be set. '
            'Use the flag --stop_threshold to stop the script based on the '
            'uncased BLEU score.'))
    flags.DEFINE_string(
        name='vocab_file',
        short_name='vf',
        default=None,
        help=flags_core.help_wrap(
            'Path to subtoken vocabulary file. If data_download.py was used to '
            'download and encode the training data, look in the data_dir to find '
            'the vocab file.'))
    flags.DEFINE_string(
        name='mode',
        default='train',
        help=flags_core.help_wrap('mode: train, eval, or predict'))

    flags_core.set_defaults(data_dir='/tmp/translate_ende',
                            model_dir='/tmp/transformer_model',
                            batch_size=None,
                            train_epochs=10)

    # pylint: disable=unused-variable
    @flags.multi_flags_validator(
        ['mode', 'train_epochs'],
        message='--train_epochs must be defined in train mode')
    def _check_train_limits(flag_dict):
        if flag_dict['mode'] == 'train':
            return flag_dict['train_epochs'] is not None
        return True

    @flags.multi_flags_validator(
        ['bleu_source', 'bleu_ref'],
        message='Both or neither --bleu_source and --bleu_ref must be defined.'
    )
    def _check_bleu_files(flags_dict):
        return (flags_dict['bleu_source'] is None) == (flags_dict['bleu_ref']
                                                       is None)

    @flags.multi_flags_validator(
        ['bleu_source', 'bleu_ref', 'vocab_file'],
        message='--vocab_file must be defined if --bleu_source and --bleu_ref '
        'are defined.')
    def _check_bleu_vocab_file(flags_dict):
        if flags_dict['bleu_source'] and flags_dict['bleu_ref']:
            return flags_dict['vocab_file'] is not None
        return True

    @flags.multi_flags_validator(
        ['export_dir', 'vocab_file'],
        message='--vocab_file must be defined if --export_dir is set.')
    def _check_export_vocab_file(flags_dict):
        if flags_dict['export_dir']:
            return flags_dict['vocab_file'] is not None
        return True

    # pylint: enable=unused-variable

    flags_core.require_cloud_storage(['data_dir', 'model_dir', 'export_dir'])
示例#17
0
def define_ncf_flags():
    """Add flags for running ncf_main."""
    # Add common flags
    flags_core.define_base(export_dir=False)
    flags_core.define_performance(num_parallel_calls=False,
                                  inter_op=False,
                                  intra_op=False,
                                  synthetic_data=False,
                                  max_train_steps=False,
                                  dtype=False,
                                  all_reduce_alg=False)
    flags_core.define_device(tpu=True)
    flags_core.define_benchmark()

    flags.adopt_module_key_flags(flags_core)

    flags_core.set_defaults(model_dir="/tmp/ncf/",
                            data_dir="/tmp/movielens-data/",
                            train_epochs=2,
                            batch_size=256,
                            hooks="ProfilerHook",
                            tpu=None)

    # Add ncf-specific flags
    flags.DEFINE_enum(
        name="dataset",
        default="ml-1m",
        enum_values=["ml-1m", "ml-20m"],
        case_sensitive=False,
        help=flags_core.help_wrap("Dataset to be trained and evaluated."))

    flags.DEFINE_boolean(
        name="download_if_missing",
        default=True,
        help=flags_core.help_wrap(
            "Download data to data_dir if it is not already present."))

    flags.DEFINE_string(
        name="eval_batch_size",
        default=None,
        help=flags_core.help_wrap(
            "The batch size used for evaluation. This should generally be larger"
            "than the training batch size as the lack of back propagation during"
            "evaluation can allow for larger batch sizes to fit in memory. If not"
            "specified, the training batch size (--batch_size) will be used."))

    flags.DEFINE_integer(
        name="num_factors",
        default=8,
        help=flags_core.help_wrap("The Embedding size of MF model."))

    # Set the default as a list of strings to be consistent with input arguments
    flags.DEFINE_list(
        name="layers",
        default=["64", "32", "16", "8"],
        help=flags_core.help_wrap(
            "The sizes of hidden layers for MLP. Example "
            "to specify different sizes of MLP layers: --layers=32,16,8,4"))

    flags.DEFINE_float(
        name="mf_regularization",
        default=0.,
        help=flags_core.help_wrap(
            "The regularization factor for MF embeddings. The factor is used by "
            "regularizer which allows to apply penalties on layer parameters or "
            "layer activity during optimization."))

    flags.DEFINE_list(
        name="mlp_regularization",
        default=["0.", "0.", "0.", "0."],
        help=flags_core.help_wrap(
            "The regularization factor for each MLP layer. See mf_regularization "
            "help for more info about regularization factor."))

    flags.DEFINE_integer(
        name="num_neg",
        default=4,
        help=flags_core.help_wrap(
            "The Number of negative instances to pair with a positive instance."
        ))

    flags.DEFINE_float(name="learning_rate",
                       default=0.001,
                       help=flags_core.help_wrap("The learning rate."))

    flags.DEFINE_float(
        name="hr_threshold",
        default=None,
        help=flags_core.help_wrap(
            "If passed, training will stop when the evaluation metric HR is "
            "greater than or equal to hr_threshold. For dataset ml-1m, the "
            "desired hr_threshold is 0.68 which is the result from the paper; "
            "For dataset ml-20m, the threshold can be set as 0.95 which is "
            "achieved by MLPerf implementation."))