Пример #1
0
  def Params(cls):
    """Params for a MLPerfProgramSchedule."""
    p = hyperparams.InstantiableParams(cls)

    p.Define('task_dict', None, 'dataset_name -> task params')
    p.Define('task_name', None, 'High level task name')
    p.Define('logdir', None, 'Log directory')
    p.Define('train_program', None, 'Train program params')
    p.Define('train_executions_per_eval', 1, '')
    p.Define('dataset_names', [], 'List of all dataset names.')
    p.Define('num_splits_per_client', None, '')

    p.Define('ml_perf', hyperparams.Params(), 'MlPerf configuration.')

    mlp = p.ml_perf
    mlp.Define('benchmark_name', None, 'Benchmark name for compliance log.')
    mlp.Define('decoder_metric_name', None,
               'Name of the decoder metric to report for compliance log.')
    mlp.Define('decoder_metric_success_threshold', None,
               'Benchmark run must exceed this value to succeeed.')
    mlp.Define('steps_per_epoch', None, 'Number of training steps per epoch.')
    mlp.Define('global_batch_size', None, 'Global batch size.')
    mlp.Define('max_sequence_length', None, 'Maximum sequence length.')
    mlp.Define('optimizer_name', None, 'Optimizer used.')
    mlp.Define('opt_adam_beta_1', None, 'beta_1 used by Adam optimizer.')
    mlp.Define('opt_adam_beta_2', None, 'beta_2 used by Adam optimizer.')
    mlp.Define('opt_adam_epsilon', None, 'epsilon used by Adam optimizer.')
    mlp.Define('base_learning_rate', None, 'Base learning rate.')
    mlp.Define('warmup_steps', None, 'Number of warm-up steps.')
    mlp.Define('train_samples', None, 'Number of train samples.')
    mlp.Define('eval_samples', None, 'Number of eval samples.')

    return p
Пример #2
0
 def Params(cls):
   """The params of this layer."""
   p = hyperparams.InstantiableParams(cls)
   p.Define('deterministic_dropout', False,
            'Used deterministic dropout or not.')
   p.Define(
       'fprop_dtype', None,
       'Activations datatype to use. To enable bfloat16 activations for '
       'layers built using model builder, set fprop_dtype to '
       'tf.bfloat16, which will be propagated to layers that support '
       'bfloat16 activations. Default is None, which will use float32 '
       'activations.')
   return p
Пример #3
0
 def Params(cls):
   """"Defaults parameters for Programs."""
   p = hyperparams.InstantiableParams(cls)
   p.Define('task', None, 'Underlying task')
   p.Define('logdir', None, 'Log directory')
   p.Define('num_splits_per_client', None, '')
   p.Define('steps_per_loop', None, 'Number of steps to run.')
   p.Define('dataset_name', None,
            'Dataset the program is operating on, eg: "Test"')
   p.Define('name', 'base_program', 'Program name.')
   p.Define('task_name', None,
            'If multi-task, what the high-level task name is')
   p.Define('num_threads', 1, 'Number of threads in multiprocessing pool.')
   return p
Пример #4
0
  def Params(cls):
    """Params for a SimpleProgramSchedule."""
    p = hyperparams.InstantiableParams(cls)
    p.Define('task_dict', None, 'dataset_name -> task params')
    p.Define('task_name', None, 'High level task name')
    p.Define('logdir', None, 'Log directory')
    p.Define('train_program', None, 'Train program params')
    p.Define('train_executions_per_eval', 1, '')
    p.Define('eval_programs', [], 'List of eval program params.')
    p.Define('num_splits_per_client', None, '')
    p.Define('dataset_names', [], 'List of all dataset names.')

    # TODO(blee): Clean these up.
    p.Define('ml_perf', hyperparams.Params(), 'MlPerf configuration.')
    mlp = p.ml_perf
    mlp.Define('benchmark_name', None, 'Benchmark name for compliance log.')
    return p
Пример #5
0
    def Params(cls):
        """Defaults parameters for a cluster."""
        p = hyperparams.InstantiableParams(cls)
        p.Define(
            'mode', 'async', 'A string noting the overall training method. '
            'Valid values: sync, async.')
        p.Define(
            'job', 'trainer', 'The role of this job in the training cluster. '
            'E.g., trainer_client, trainer, controller,  etc.')
        p.Define('task', 0, 'This process is the task-th task in the job.')
        p.Define('logdir', '', 'The log directory.')

        # How the cluster is composed.
        #
        # A typical training cluster has a few jobs (controller, worker, ps, etc).
        # One can potentially place computation on any device of these jobs.
        # Here, we specify how each job is configured. E.g., number of GPUs each
        # task is equipped with, the number of replicas, etc.
        #
        # Note that trainer client may dispatch operations on just a
        # smaller subset of jobs. For example, the controller only places
        # computations onto the controller and ps devices; while evaler
        # only places computations on the evaler devices.
        #
        # cluster.job refers to the role of a client process performs.  It
        # can be 'controller', 'trainer', 'trainer_client', 'evaler' and
        # 'decoder', etc. Often, a client can be the same process as one
        # of the compute devices (e.g., controller). Sometimes, they can
        # be a separate processes. E.g., trainer_client is a separate
        # standalone process. It places computations on the worker and
        # ps devices, while itself does not host any.
        p.Define('controller', cls._JobSpec(1), 'The controller job.')
        p.Define('worker', cls._JobSpec(1), 'The worker job.')
        p.Define('ps', cls._JobSpec(1), 'The ps job.')
        p.Define('input', cls._JobSpec(0), 'The input job.')
        p.Define('evaler', cls._JobSpec(0), 'The evaler job.')
        p.Define('decoder', cls._JobSpec(0), 'The decoder job.')

        # A few 'global' knobs.
        p.Define(
            'add_summary', None, 'Whether to add summaries. If None, '
            'decides based on the job type.')
        p.Define('do_eval', None, 'Whether to do eval.')
        p.Define('split_id', 0, 'Split id for the model.')
        return p
Пример #6
0
 def Params(cls):
   p = hyperparams.InstantiableParams(cls)
   p.Define('program_schedule_dict', None,
            'task_name -> ProgramScheduleParams')
   return p