def Params(cls): """Params for a MLPerfProgramSchedule.""" p = hyperparams.InstantiableParams(cls) p.Define('task_dict', None, 'dataset_name -> task params') p.Define('task_name', None, 'High level task name') p.Define('logdir', None, 'Log directory') p.Define('train_program', None, 'Train program params') p.Define('train_executions_per_eval', 1, '') p.Define('dataset_names', [], 'List of all dataset names.') p.Define('num_splits_per_client', None, '') p.Define('ml_perf', hyperparams.Params(), 'MlPerf configuration.') mlp = p.ml_perf mlp.Define('benchmark_name', None, 'Benchmark name for compliance log.') mlp.Define('decoder_metric_name', None, 'Name of the decoder metric to report for compliance log.') mlp.Define('decoder_metric_success_threshold', None, 'Benchmark run must exceed this value to succeeed.') mlp.Define('steps_per_epoch', None, 'Number of training steps per epoch.') mlp.Define('global_batch_size', None, 'Global batch size.') mlp.Define('max_sequence_length', None, 'Maximum sequence length.') mlp.Define('optimizer_name', None, 'Optimizer used.') mlp.Define('opt_adam_beta_1', None, 'beta_1 used by Adam optimizer.') mlp.Define('opt_adam_beta_2', None, 'beta_2 used by Adam optimizer.') mlp.Define('opt_adam_epsilon', None, 'epsilon used by Adam optimizer.') mlp.Define('base_learning_rate', None, 'Base learning rate.') mlp.Define('warmup_steps', None, 'Number of warm-up steps.') mlp.Define('train_samples', None, 'Number of train samples.') mlp.Define('eval_samples', None, 'Number of eval samples.') return p
def _JobSpec(cls, replicas): """Construct a job spec param with the given number of replicas.""" p = hyperparams.Params() # By default, we use /job:localhost so that most of tests can just # work out of the box. trainer.py will then set job names accordingly. p.Define('name', '/job:localhost', 'TensorFlow job spec, e.g., /job:trainer, /job:ps') p.Define('replicas', replicas, 'The number of tasks of a job.') p.Define( 'targets', '', 'The target network address(es) to which we can ' 'create tf sessions. E.g., a single ip:port, or a list of ' 'comma-separated grpc://ip:port, etc.') p.Define('cpus_per_replica', 1, 'The number of CPU devices to use per ' 'replica.') p.Define('gpus_per_replica', 0, 'The number of GPU devices to use per ' 'replica.') p.Define( 'devices_per_split', 1, 'Devices of a replica are grouped into ' 'splits. Each split contains these many devices. One split is a ' 'group of devices on which the computation nodes of a graph is ' 'placed upon.E.g., one can place the forward lstm on device 0 of ' 'a split and place the backward lstm on device 1. etc.') p.Define('tpus_per_replica', 0, 'The number of tpu cores to use per replica.') p.Define('num_tpu_hosts', 0, 'The number of tpu hosts.') return p
def Params(cls): p = hyperparams.Params() p.Define('name', 'EarlyStop', '') p.Define('metric_history', MetricHistory.Params(), 'Metric history params.') p.Define( 'tolerance', 0.0, 'Minimum significant difference in metric; ' 'useful if progress is asymptotic.') p.Define('window', 0, 'Maximum number of steps between best and current.') p.Define('verbose', True, 'Log early-stop checks.') p.Define('min_steps', 0, 'Minimum number of steps before stopping.') return p
def Params(cls): """Default parameters for a trial.""" p = hyperparams.Params() p.Define( 'report_interval_seconds', 600, 'Interval between reporting trial results and checking for early ' 'stopping.') p.Define( 'vizier_objective_metric_key', 'loss', 'Which eval metric to use as the "objective value" for tuning.') p.Define( 'report_during_training', False, 'Whether to report objective metrics during the training process.') return p
def Params(cls): """Params for a SimpleProgramSchedule.""" p = hyperparams.InstantiableParams(cls) p.Define('task_dict', None, 'dataset_name -> task params') p.Define('task_name', None, 'High level task name') p.Define('logdir', None, 'Log directory') p.Define('train_program', None, 'Train program params') p.Define('train_executions_per_eval', 1, '') p.Define('eval_programs', [], 'List of eval program params.') p.Define('num_splits_per_client', None, '') p.Define('dataset_names', [], 'List of all dataset names.') # TODO(blee): Clean these up. p.Define('ml_perf', hyperparams.Params(), 'MlPerf configuration.') mlp = p.ml_perf mlp.Define('benchmark_name', None, 'Benchmark name for compliance log.') return p
def BuildData(): """Returns a hyperparam recording build information of this py binary.""" p = hyperparams.Params() p.Define('timestamp', build_data.TimestampAscii(), 'Build timestamp as a string.') p.Define('info', build_data.BuildInfo(), 'User, host, and directory of builder.') p.Define('target', build_data.Target(), 'Build target.') p.Define('id', build_data.BuildID(), 'Build id.') p.Define('changelist', build_data.Changelist(), 'Build CL.') p.Define('client_info', build_data.ClientInfo(), 'Perforce client changelist and status as descriptive string.') p.Define('label', build_data.BuildLabel(), 'Build label (passed to make-{opt,dbg} -l).') p.Define('platform', build_data.Platform(), 'Google platform.') p.Define('tool', build_data.BuildTool(), 'Build tool.') p.Define('paropts', build_data.ParOptions(), 'Par options.') return p
def Params(cls): p = hyperparams.Params() p.Define('name', 'MetricHistory', 'Used by SetLogdirInMetricHistories.') p.Define('jobname', 'eval_dev', 'Job and dataset to which metric applies.') p.Define('metric', 'log_pplx', 'Metric to record.') p.Define( 'minimize', True, 'If True, training minimizes the metric. If False, training ' 'maximizes the metric.') p.Define('logdir', '', 'Root dir for BF logs.') p.Define( 'tfevent_file', False, 'If True, read the metric from ' 'events.out.tfevents.* files in the job dir instead of ' 'maintaining a history file.') p.Define('local_filesystem', False, 'Logdir is on local filesystem (needed for unit test).') return p
def Params(cls): """Defaults params for input generators.""" p = super(BaseInputGenerator, cls).Params() p.name = 'input' p.Define( 'batch_size', 0, 'Batch size for a device split. This will be ' 'scaled to match the accelarator hardware topology.') p.Define( 'num_samples', 0, 'If non-zero, the dataset contains these many samples. ' 'For test/eval dataset, if we want the test/evel job evaluate ' 'the whole dataset, this param must be set precisely. Otherwise, ' 'this param is optional.') # TPU related infeed tuning. p.Define('use_per_host_infeed', False, 'Whether run infeed op on each host.') p.Define( 'tpu_infeed_parallelism', 1, 'Uses these many python threads to drive infeed concurrently.') p.Define('use_partitioned_infeed_queue', False, 'Use partitioned infeed') p.Define('num_partitions', None, 'Num partitions') p.Define('remote', hyperparams.Params(), 'Params to configure remote input policy.') pp = p.remote pp.Define( 'shardable_batch', True, 'True if and only if this input generates simple batches whose 1st ' 'dimension of every tensor in a batch is the batch dimension, and ' 'other dimensions are always the same.') pp.Define( 'max_inflights_per_target', 32, 'The maximum number of ' 'concurrent inflight remote input fetches per remote target.') return p
def Test(self): """Returns Params for the testing dataset.""" return hyperparams.Params()
def Dev(self): """Returns Params for the development dataset.""" return hyperparams.Params()
def Train(self): """Returns Params for the training dataset.""" return hyperparams.Params()
def Params(cls): p = super(QuantizableLayer, cls).Params() p.Define('qdomain', hyperparams.Params(), 'Container for quantization domains.') p.qdomain.Define('default', None, 'Default quantization domain.') return p