示例#1
0
  def Params(cls):
    """Defaults params."""
    extractors = hyperparams.Params()
    extractors.Define('lasers', WaymoLaserExtractor.Params(), '')
    extractors.Define('labels', WaymoLabelExtractor.Params(), '')
    extractors.Define('metadata', WaymoFrameMetadataExtractor.Params(), '')

    preprocessors = py_utils.NestedMap(
        count_points=input_preprocessors.CountNumberOfPointsInBoxes3D.Params(),
        viz_copy=input_preprocessors.CreateDecoderCopy.Params(),
        keep_xyz_range=input_preprocessors.DropLaserPointsOutOfRange.Params(),
        filter_nlz_points=FilterNLZPoints.Params(),
        select_centers=input_preprocessors.SparseCenterSelector.Params(),
        gather_features=input_preprocessors.SparseCellGatherFeatures.Params(),
        tile_anchors=input_preprocessors.TileAnchorBBoxes.Params(),
        assign_anchors=input_preprocessors.AnchorAssignment.Params(),
        pad_lasers=input_preprocessors.PadLaserFeatures.Params().Set(),
    )
    p = super(WaymoSparseLaser, cls).Params(extractors).Set(
        preprocessors=_NestedMapToParams(preprocessors),
        preprocessors_order=[
            'viz_copy',
            'keep_xyz_range',
            'filter_nlz_points',
            'count_points',
            'select_centers',
            'gather_features',
            'tile_anchors',
            'assign_anchors',
            'pad_lasers',
        ],
    )

    p.file_datasource = datasource.PrefixedDataSourceWrapper.Params()
    p.file_datasource.base_datasource = datasource.SimpleDataSource.Params()
    p.file_datasource.base_datasource.file_type = 'tfrecord'

    return p
示例#2
0
def EncoderConfig() -> hyperparams.Params:
  """Returns Params for configuring one `DualEncoder` modality."""
  p = hyperparams.Params()
  p.Define(
      'input_features', '',
      'Feature(s) from the input batch to feed to the encoder. The structure '
      'of this field determines the number and structure of the encoder '
      'arguments. Examples: If set to "feature_name", the encoder is called '
      'with a single argument `input_batch["feature_name"]`; if set to an '
      'N-element tuple, it is called with N arguments. See `Selector` class '
      'for more details.')
  p.Define('id_feature', '', 'Name of id feature to use for loss masking.')
  p.Define(
      'encoder', None,
      'Params of a layer that encodes input_features. The layer should '
      'accept the output of Selector(input_features) as arguments.')
  p.Define('output_dim', None,
           'Dimension of the embeddings produced by `encoder`.')
  p.Define('encoder_scope', '',
           'Optional variable scope name to create the encoder in.')
  p.Define('projection_scope', '',
           'Optional variable scope in which to create the projection layer.')
  return p
示例#3
0
    def Params(cls):
        """Defaults params for input generators."""
        p = super(BaseInputGenerator, cls).Params()
        p.name = 'input'
        p.Define(
            'batch_size', 0, 'Batch size for a device split. This will be '
            'scaled to match the accelarator hardware topology.')
        p.Define(
            'num_samples', 0,
            'If non-zero, the dataset contains these many samples. '
            'For test/eval dataset, if we want the test/evel job evaluate '
            'the whole dataset, this param must be set precisely. Otherwise, '
            'this param is optional.')

        # TPU related infeed tuning.
        p.Define('use_per_host_infeed', False,
                 'Whether run infeed op on each host.')
        p.Define(
            'tpu_infeed_parallelism', 1,
            'Uses these many python threads to drive infeed concurrently.')
        p.Define('use_partitioned_infeed_queue', False,
                 'Use partitioned infeed')
        p.Define('num_partitions', None, 'Num partitions')

        p.Define('remote', hyperparams.Params(),
                 'Params to configure remote input policy.')
        pp = p.remote
        pp.Define(
            'shardable_batch', True,
            'True if and only if this input generates simple batches whose 1st '
            'dimension of every tensor in a batch is the batch dimension, and '
            'other dimensions are always the same.')
        pp.Define(
            'max_inflights_per_target', 32, 'The maximum number of '
            'concurrent inflight remote input fetches per remote target.')
        return p
示例#4
0
 def Dev(cls):
     """Returns Params for the development dataset."""
     return hyperparams.Params()
示例#5
0
 def Test(self) -> InputParams:
     """Returns Params for the testing dataset."""
     return hyperparams.Params()
示例#6
0
  def Params(cls):
    p = super(BaseTask, cls).Params()
    p.Define('input', None, 'Input generator Params.')
    p.Define('encoder', None, 'Encoder Params.')
    p.Define('online_encoder', None, 'Online Encoder Params.')
    p.Define('decoder', None, 'Decoder Params.')
    p.Define('train', hyperparams.Params(),
             'Params to control how this task should be trained.')

    tp = p.train
    tp.Define(
        'task_global_step', False,
        'Whether or not to create a task-specific global step. '
        'When a task specific global step exists, learning rate schedule '
        'depends on the task specific global step, instead of the shared '
        'global step.')
    tp.Define(
        'start_up_delay_steps', 200, 'i-th replica starts training after '
        'i*(i+1)/2*start_up_delay_steps steps')
    tp.Define('max_steps', 4 * 10**6, 'Maximum number of training steps.')
    tp.Define('tpu_steps_per_loop', 100, 'The number of training steps per '
              'training loop for TPUs.')
    tp.Define(
        'vn_start_step', 200000000,
        'Step starting from which variational noise is added to '
        'params values during training.')
    tp.Define('vn_std', 0.0, 'Std of the variational noise.')
    tp.Define(
        'l2_regularizer_weight', None,
        'If not None, L2 regularization to apply to the weights. '
        'Otherwise, disable L2 regularization.')
    tp.Define(
        'l1_regularizer_weight', None,
        'If not None, L1 regularization to apply to the weights. '
        'Otherwise, disable L1 regularization.')
    tp.Define('learning_rate', 0.0, 'learning rate to use.')
    tp.Define('clip_gradient_norm_to_value', 0.0,
              'Clip gradient norm to this value.')
    tp.Define('grad_norm_to_clip_to_zero', 0.0,
              'Clip gradient to 0 if its norm exceeds this value.')
    tp.Define('grad_norm_tracker', None, 'Params for GradNormTracker.')
    tp.Define('optimizer', optimizer.Adam.Params(), 'Params for the optimizer.')
    tp.Define('lr_schedule',
              lr_schedule.ContinuousLearningRateSchedule.Params(),
              'Learning rate decay schedule.')
    tp.Define('early_stop', early_stop.EarlyStop.Params(),
              'Early stopping based on dev-set performance.')
    tp.Define(
        'ema_decay', 0.0,
        'If > 0, enable ExponentialMovingAverage during training '
        'with the give decay. '
        'Must be < 1. Disabled if <= 0.')
    tp.Define(
        'bprop_variable_filter', None,
        'If set, only backprop variables whose names partially match '
        'this regexp (re.search).')
    tp.Define(
        'init_from_checkpoint_rules', {},
        'If not None, a dictionary with keys corresponding to a checkpoint '
        'path and values corresponding to variable loading rules is expected. '
        'Each key is expected to be a path to a checkpoint from which to '
        'initialize part of the model. Variables are only loaded form this '
        'path during initialization and will override values provided by '
        'initialization.'
        'The corresponding values (loading_rules) are expected to be a tuple '
        'consisting of two list: loading rules, and ignore rules, respectively.'
        'The first list (loading rules) contains the list of variables '
        'which should be initialized from the checkpoint: each element in the '
        'list is a pair of strings. The first element is a regex and the '
        'second is a python format string. If a variable in the model matches '
        'a regex, we rename using the format string to determine the '
        'corresponding var in the checkpoint. Note that, it is an error if a '
        'model variable matches multiple loading rules, for the same '
        'checkpoint or across checkpoints.'
        'The second list (ignore rules) is a list of regexes which specify '
        'variables in the model which should not be initialized using the '
        'loading rules. Thus, if a variable in the model to be trained matches '
        'one of the rules in the loading rules, as well as one of the regular '
        'expressions in the ignore rules, the variable will not be initialized '
        'from the checkpoint, but will instead be initialized from the '
        'variable initalizer defined in the graph.'
        'Examples:'
        '{"checkpoint_path": ([("(.*)", "%s")], [])} will initialize all the '
        'model parameters from the checkpoint_path.')
    tp.Define(
        'pruning_hparams_dict', None, 'Pruning related hyperparameters. A dict '
        'with hyperparameter: value pairs. See tf.contrib.model_pruning.')
    tp.Define('save_interval_seconds', 60 * 10,
              'Generates a checkpoint roughly once every this many seconds.')
    tp.Define('summary_interval_steps', 100,
              'Generates a checkpoint roughly once every this many steps.')

    p.Define('eval', hyperparams.Params(),
             'Params to control how this task should be evaled.')
    ep = p.eval
    ep.Define(
        'samples_per_summary', 1000,
        'If > 0, generates one summary after this many samples, at most. '
        'If == 0 or the dataset has fewer examples, evaluate the whole set.')
    ep.Define(
        'decoder_samples_per_summary', 0,
        'If > 0, each decoder summary will contain at most this many samples. '
        'If == 0, defaults to `samples_per_summary` for '
        'backwards compatibility.')
    return p
示例#7
0
 def Train(self) -> InputParams:
     """Returns Params for the training dataset."""
     return hyperparams.Params()
示例#8
0
 def Dev(self) -> InputParams:
     """Returns Params for the development dataset."""
     return hyperparams.Params()
示例#9
0
def BuildData():
    """Returns a hyperparam recording build information of this py binary."""
    p = hyperparams.Params()
    return p
示例#10
0
 def Params(cls):
     """Returns the optimizer params."""
     p = hyperparams.Params()
     p.Define('cls', cls, 'Cls that this param object is associated with.')
     p.Define('add_summary', True, 'Adds summary iff true.')
     return p
示例#11
0
 def testDefineExisting(self):
   p = hyperparams.Params()
   p.Define('foo', 1, '')
   self.assertRaisesRegex(AttributeError, 'already defined',
                          lambda: p.Define('foo', 1, ''))
示例#12
0
 def loss_params():
   p = hyperparams.Params()
   p.Define('beta', 0.5, '')
   return p
示例#13
0
def _NestedMapToParams(nmap):
    p = hyperparams.Params()
    for k, v in nmap.FlattenItems():
        p.Define(k, v, '')
    return p
示例#14
0
 def Test(self):
     p = base_input_generator.BaseInputGenerator.Params()
     inputs = hyperparams.Params()
     for task_name in ['a', 'b']:
         inputs.Define(task_name, p.Copy(), '')
     return inputs
示例#15
0
    def Params(cls):
        r"""Defaults params for TextInput.

    Returns:
      A Params object for TextPackedInput.

    Notes about usage:

    * Input files contain UTF8 encoded texts. p.input_file_type controls
      what format to extract these texts from. The default is 'tsv', in which
      case p.file_pattern should be prefixed by file type 'text:', and
      every line in the input file should have text columns separated by
      a tab '\t'. Otherwise p.input_file_type can be Sentence or SentencePair
      protos.

      For tsv input files, in the default case, the file should contain 2
      columns, for the source and the target sentence. Special cases:

      - When quality scores are present (see p.quality_score_filter_fn below),
        it should contain 3 columns, the last being a quality score.
      - When MASS is enabled, it should contain a single column.

    * p.tokenizer or p.tokenizer_dict is used to perform string to id
      conversions. If key `src` or `tgt` is present in p.tokenizer_dict,
      it will be used for generating the ids for src or tgt, respectively.
      Otherwise the default tokenizer will be used.

    * p.packing_factor depends on the training data and max lengths used.

      If this value is too small, we generate packed batches that contain
      too many padding that could have been used to pack more examples.
      If this value is too large, we use more host memory and randomly discard
      examples that could not fit.

      One can look at the 'examples/src_packed_token_ratio' (or
      'examples/tgt_packed_token_ratio') graph to determine if
      its value is too small. For example, with p.packing_factor=3.5, if we
      observe that 'examples/src_packed_token_ratio' is saturated at 1.0, this
      means 3.5 is likely too small. If we instead observe that
      'examples/src_packed_token_ratio' fluctuates around 0.5, this means 3.5 is
      larger than needed.

      We believe that there can be a slight bias against longer sequences
      (meaning longer sequences have a slightly higher probability of being
      dropped) when packing factor is too large to have to drop data. The
      remedy is either use larger effective batch size, or use a conservative
      packing factor to not drop data.

      Note that the metric 'num_samples_in_batch' is a static value for
      max global number of samples, while 'exmples/num_packed_examples'
      is the actual number of samples per batch.

    * p.source_max_length and p.target_max_length control both the shape of
      the generated input batch (how long each row is) and the filtering
      (max allowed lengths for source and targt, respectively).

      p.bucket_upper_bound also conrols the filtering of examples. Inputs
      with either source or target sequence lengths exceeding it will be
      filtered out.

      It's not meaningful to set p.bucket_upper_bound higher than both
      p.source_max_length and p.target_max_length.

      When packing is enabled, however, a smaller p.bucket_upper_bound means
      that individual sequences have a smaller max length, but the packed
      batch may have a larger total length.

    * p.file_pattern_task_ids, p.task_to_{src,tgt}_lang_map are all used
      to manipulate batch.{src,tgt}.task_ids.

      For each eaxmple, its task is obtained from the source id, which is
      the index of the example's origin file in p.file_pattern. The task
      id populated in the input batch is determined by:
      p.task_to_{src,tgt}_lang_map[ p.file_pattern_task_ids[souce_id] ],
      for src and tgt, respectively, where if a list is empty it falls
      back to an identity map.

      In the future we may define a separate lang_ids field to the input
      batch to disambiguate.

    * p.quality_score_filter_fn can be used when a column of quality score
      is present in the input .tsv file. The quality score must be the last
      column. This filter function returns True to filter, e.g. use
      p.quality_score_filter_fn = lambda x: x <= 0.3 for scores where higher
      means better.

      p.quality_score_filter_fn typically should only contain a simple
      comparison (<, >, <=, or >=), as it relies on tf.Tensor's overloading
      of __le__() etc. to work. For example: lambda x: ( 0.3 < x and x < 0.9)
      won't work. But tf.math.logical_and(0.3 < x, x < 0.9) is okay.

      Also note that 'p.quality_score_filter_fn = lambda _: False' is
      equivalent with 'p.quality_score_filter_fn = None', in which case
      no quality score column is needed (or evaluated).

    * Consider enabling multithreading for the trainer job (in the Train()
      method). For example: p.num_batcher_threads = 128.
    """
        p = super().Params()

        p.Define('file_pattern_task_ids', [],
                 'task_id corresponding to list of file_patterns.')
        p.Define('task_to_src_lang_map', [],
                 'Map of task id to src language id.')
        p.Define('task_to_tgt_lang_map', [],
                 'Map of task id to tgt language id.')

        p.Define(
            'packing_factor', None,
            'A multiplicative factor for packing. This is the ratio between '
            'pre-packing batch size and after-packing batch size. If None, '
            'packing is disabled; otherwise the packing factor should be a '
            'float >= 1.')

        p.Define(
            'quality_score_filter_fn', None,
            'A user defined boolean function on a float (quality score). '
            'When present, the input .tsv file has an additional column '
            'of floats representing a quality score, and each line is '
            'filtered out when this function returns True on that score.')

        p.Define(
            'input_file_type', 'tsv', 'The type of input file contents.'
            ' Must be one of ["tsv", "sentence_proto"], for tab-separated'
            ' values, or Sentence/SentencePair protos, respectively.')
        p.Define(
            'single_column_input', False,
            'Indicates input is single-column rather'
            ' than double-column. When input_file_type is sentence_proto, this'
            ' means Sentence proto rather than SentencePair proto.')

        p.Define('natural_order_model', True, 'Only True is supported now.')
        p.Define('target_language', '', 'Language on target side.')
        p.Define('mass_layer', None,
                 'If not None, use the specified layer to do '
                 'MASS masking.')
        p.Define(
            'mass_task_ids', None, 'List of task IDs for MASS. If None and '
            'single_column_input=True, apply MASS to all tasks, otherwise '
            'only apply to the specified tasks.')
        p.Define('enable_mass_for_eval', False, 'Enables masking during eval.')
        # Back translation
        p.Define('bt_task_ids', [], 'List of task ids for back-translation.')
        # Denoising (https://arxiv.org/pdf/1711.00043)
        p.Define('denoise', hyperparams.Params(), 'Params for denosing tasks.')
        p.denoise.Define('task_ids', [], 'List of task IDs for denoising.')
        p.denoise.Define('noise_sent_prob', 1,
                         'Probability of noising an input sentence.')
        p.denoise.Define(
            'shuffle_tok_range', 3,
            'Range of noise for shuffling tokens, following'
            ' https://arxiv.org/pdf/1711.00043. Note that shuffle_tok_range of 3 '
            'implies tokens may be permuted at most 3 position.')
        p.denoise.Define('drop_tok_prob', 0.1,
                         'Probability of dropping tokens.')
        p.denoise.Define('blank_tok_prob', 0.1,
                         'Probability of blanking tokens.')
        p.denoise.Define('blank_id', 3, 'ID of blank token.')
        return p
示例#16
0
    def testToText(self):
        outer = _params.Params()
        outer.Define('foo', 1, '')
        inner = _params.Params()
        inner.Define('bar', 2.71, '')
        inner.Define('baz', 'hello', '')
        outer.Define('inner', inner, '')
        outer.Define('tau', False, '')
        outer.Define('dtype', tf.float32, '')
        outer.Define('dtype2', tf.int32, '')
        outer.Define('seqlen', [10, inner, 30], '')
        outer.Define('tuple', (1, None), '')
        outer.Define('list_of_params', [inner.Copy()], '')
        outer.Define('class', TestClass1, '')
        outer.Define('plain_dict', {'a': 10}, '')
        outer.Define('complex_dict', {'a': 10, 'b': inner}, '')
        outer.Define('complex_dict_escape', {'a': 'abc"\'\ndef'}, '')
        outer.Define('some_class', complex(0, 1), '')
        outer.Define('optional_bool', None, '')
        outer.Define('enum', TestEnum.B, '')
        # Arbitrarily use HyperparameterValue as some example proto.
        outer.Define('proto', hyperparams_pb2.HyperparamValue(int_val=42), '')

        self.assertEqual(
            '\n' + outer.ToText(), r"""
class : type/__main__/TestClass1
complex_dict : {'a': 10, 'b': {'bar': 2.71, 'baz': 'hello'}}
complex_dict_escape : {'a': 'abc"\'\ndef'}
dtype : float32
dtype2 : int32
enum : TestEnum.B
foo : 1
inner.bar : 2.71
inner.baz : 'hello'
list_of_params[0].bar : 2.71
list_of_params[0].baz : 'hello'
optional_bool : NoneType
plain_dict : {'a': 10}
proto : proto/lingvo.core.hyperparams_pb2/HyperparamValue/int_val: 42
seqlen : [10, {'bar': 2.71, 'baz': 'hello'}, 30]
some_class : complex
tau : False
tuple : (1, 'NoneType')
""")

        outer.FromText("""
        dtype2 : float32
        inner.baz : 'world'
        # foo : 123
        optional_bool : true
        list_of_params[0].bar : 2.72
        seqlen : [1, 2.0, '3', [4]]
        plain_dict : {'x': 0.3}
        class : type/__main__/TestClass2
        tau : true
        tuple : (2, 3)
        enum : TestEnum.A
        proto : proto/lingvo.core.hyperparams_pb2/HyperparamValue/string_val: "a/b"
        """)

        # Note that the 'hello' has turned into 'world'!
        self.assertEqual(
            '\n' + outer.ToText(), r"""
class : type/__main__/TestClass2
complex_dict : {'a': 10, 'b': {'bar': 2.71, 'baz': 'world'}}
complex_dict_escape : {'a': 'abc"\'\ndef'}
dtype : float32
dtype2 : float32
enum : TestEnum.A
foo : 1
inner.bar : 2.71
inner.baz : 'world'
list_of_params[0].bar : 2.72
list_of_params[0].baz : 'hello'
optional_bool : True
plain_dict : {'x': 0.3}
proto : proto/lingvo.core.hyperparams_pb2/HyperparamValue/string_val: "a/b"
seqlen : [1, 2.0, '3', [4]]
some_class : complex
tau : True
tuple : (2, 3)
""")
示例#17
0
    def _configure_input(self, p, split):
        p.file_pattern_prefix = _WAYMO_BASE

        job_type = cluster_factory.Current().job

        max_num_points = int(64 * 2650 * 1.5)
        p.preprocessors = hyperparams.Params()
        p.preprocessors.Define(
            'filter_nlz_points',
            waymo_open_input_generator.FilterNLZPoints.Params(), '')
        # TODO(bencaine): Change this to filter based on difficulty instead
        p.preprocessors.Define(
            'filter_groundtruth',
            input_preprocessors.FilterGroundTruthByNumPoints.Params(), '')
        p.preprocessors.Define('viz_copy',
                               input_preprocessors.CreateDecoderCopy.Params(),
                               '')
        p.preprocessors.Define(
            'select_centers',
            input_preprocessors.SparseCenterSelector.Params(), '')
        p.preprocessors.Define(
            'gather_features',
            input_preprocessors.SparseCellGatherFeatures.Params(), '')
        p.preprocessors.Define('tile_anchors',
                               input_preprocessors.TileAnchorBBoxes.Params(),
                               '')
        p.preprocessors.Define('assign_anchors',
                               input_preprocessors.AnchorAssignment.Params(),
                               '')
        p.preprocessors.Define(
            'pad_lasers',
            input_preprocessors.PadLaserFeatures.Params().Set(
                max_num_points=max_num_points), '')

        p.preprocessors.viz_copy.pad_lasers.max_num_points = max_num_points
        p.preprocessors.filter_groundtruth.min_num_points = self.GT_MIN_NUM_POINTS

        p.preprocessors.select_centers.num_cell_centers = 1024
        p.preprocessors.gather_features.num_points_per_cell = self.NUM_POINTS_PER_CELL
        p.preprocessors.gather_features.sample_neighbors_uniformly = True
        p.preprocessors.gather_features.max_distance = 2.75

        p.preprocessors.assign_anchors.foreground_assignment_threshold = 0.6
        p.preprocessors.assign_anchors.background_assignment_threshold = 0.45

        p.preprocessors_order = [
            'filter_nlz_points',
            'filter_groundtruth',
            'viz_copy',
            'select_centers',
            'gather_features',
            'tile_anchors',
            'assign_anchors',
            'pad_lasers',
        ]

        # Apply car anchor box settings.
        tile_anchors_p = p.preprocessors.tile_anchors
        self.AnchorBoxSettings.Update(p.preprocessors.tile_anchors)
        num_anchor_configs = self.AnchorBoxSettings.NumAnchors()

        assert len(tile_anchors_p.anchor_box_dimensions) == num_anchor_configs
        assert len(tile_anchors_p.anchor_box_rotations) == num_anchor_configs
        assert len(tile_anchors_p.anchor_box_offsets) == num_anchor_configs

        # If this is not the decoder job (e.g., this is trainer), turn off
        # image decoding, do not count points, and do not make visualization copies.
        if job_type != 'decoder':
            p.preprocessors_order.remove('viz_copy')
            # Do not need laser points during training for current V2 model. This
            # reduces amount of data sent over during training.
            p.preprocessors.pad_lasers.max_num_points = 0

        p.file_buffer_size = 32
        p.file_parallelism = 8
        p.num_batcher_threads = 8
        if self.RUN_LOCALLY:
            p.num_batcher_threads = 1
            p.file_buffer_size = 1
            p.file_parallelism = 1

        if job_type.startswith('trainer'):
            p.batch_size = 2
        else:
            p.batch_size = 4
            p.file_buffer_size = 64
            p.file_parallelism = 16
            p.num_batcher_threads = 16
        return p
示例#18
0
  def Params(cls):
    p = super(BaseTask, cls).Params()
    p.Define('input', None, 'Input generator Params.')
    p.Define('encoder', None, 'Encoder Params.')
    p.Define('online_encoder', None, 'Online Encoder Params.')
    p.Define('decoder', None, 'Decoder Params.')
    p.Define('train', hyperparams.Params(),
             'Params to control how this task should be trained.')

    tp = p.train
    tp.Define(
        'start_up_delay_steps', 200, 'i-th replica starts training after '
        'i*(i+1)/2*start_up_delay_steps steps')
    tp.Define('max_steps', 4 * 10**6, 'Maximum number of training steps.')
    tp.Define('tpu_steps_per_loop', 100, 'The number of training steps per '
              'training loop for TPUs.')
    tp.Define(
        'vn_start_step', 200000000,
        'Step starting from which variational noise is added to '
        'params values during training.')
    tp.Define('vn_std', 0.0, 'Std of the variational noise.')
    tp.Define('early_stop', early_stop.EarlyStop.Params(),
              'Early stopping based on dev-set performance.')
    tp.Define(
        'ema_decay', 0.0,
        'If > 0, enable ExponentialMovingAverage during training '
        'with the give decay. '
        'Must be < 1. Disabled if <= 0.')
    tp.Define(
        'init_from_checkpoint_rules', {},
        'If not None, a dictionary with keys corresponding to a checkpoint '
        'path and values corresponding to variable loading rules is expected. '
        'Each key is expected to be a path to a checkpoint from which to '
        'initialize part of the model. Variables are only loaded from this '
        'path during initialization and will override values provided by '
        'initialization.'
        'The corresponding values (loading_rules) are expected to be a tuple '
        'consisting of two list: loading rules, and ignore rules, respectively.'
        'The first list (loading rules) contains the list of variables '
        'which should be initialized from the checkpoint: each element in the '
        'list is a pair of strings. The first element is a regex and the '
        'second is a python format string. If a variable in the model matches '
        'a regex, we rename using the format string to determine the '
        'corresponding var in the checkpoint. Note that, it is an error if a '
        'model variable matches multiple loading rules, for the same '
        'checkpoint or across checkpoints.'
        'The second list (ignore rules) is a list of regexes which specify '
        'variables in the model which should not be initialized using the '
        'loading rules. Thus, if a variable in the model to be trained matches '
        'one of the rules in the loading rules, as well as one of the regular '
        'expressions in the ignore rules, the variable will not be initialized '
        'from the checkpoint, but will instead be initialized from the '
        'variable initalizer defined in the graph.'
        'Examples:'
        '{"checkpoint_path": ([("(.*)", "%s")], [])} will initialize all the '
        'model parameters from the checkpoint_path.')
    tp.Define(
        'pruning_hparams_dict', None, 'Pruning related hyperparameters. A dict '
        'with hyperparameter: value pairs. See google-research.model_pruning.')
    tp.Define(
        'enqueue_max_steps', -1, 'Max enqueue steps. -1 meaning no limit.'
        ' This flag should be set for unit-test only.')
    tp.Define('save_interval_seconds', 60 * 10,
              'Generates a checkpoint roughly once every this many seconds.')
    tp.Define('save_max_to_keep', 100,
              'Maximum number of recent checkpoints to keep.')
    tp.Define('save_keep_checkpoint_every_n_hours', 0.5,
              'How often to keep a checkpoint.')

    tp.Define('summary_interval_steps', 100,
              'Generates a summary roughly once every this many steps.')
    # The following params must mirror those in Learner.Params().
    # TODO(rpang): migrate existing params to use learner and
    # delete legacy params.
    # LINT.IfChange
    tp.Define(
        'learner', None, 'One or a list of optimization programs. '
        'If None, uses a Learner created from the legacy params '
        'defined below: learning_rate, lr_schedule, optimizer, etc.')
    tp.Define(
        'l2_regularizer_weight', None,
        'If not None, L2 regularization to apply to the weights. '
        'Otherwise, disable L2 regularization.')
    tp.Define(
        'l1_regularizer_weight', None,
        'If not None, L1 regularization to apply to the weights. '
        'Otherwise, disable L1 regularization.')
    tp.Define('learning_rate', 0.0, 'learning rate to use.')
    tp.Define(
        'clip_gradient_norm_to_value', 0.0,
        'Clip gradient by global norm to this value. This is similar to '
        'the bahaviour of tf.clip_by_global_norm, if you are looking for '
        'tf.clip_by_norm refer to clip_gradient_single_norm_to_value. Note '
        'these are mutually exclusive.')
    tp.Define(
        'clip_gradient_single_norm_to_value', 0.0,
        'Clip gradient by single tensor norm to this value. This is '
        'similar to the bahaviour of tf.clip_by_norm. Note this is mutually '
        'exlusive to using clip_gradient_norm_to_value.')
    tp.Define('grad_norm_to_clip_to_zero', 0.0,
              'Clip gradient to 0 if its norm exceeds this value.')
    tp.Define('grad_norm_tracker', None, 'Params for GradNormTracker.')
    tp.Define('optimizer', optimizer.Adam.Params(), 'Params for the optimizer.')
    tp.Define('lr_schedule', schedule.ContinuousLearningRateSchedule.Params(),
              'Learning rate decay schedule.')
    tp.Define(
        'bprop_variable_filter', None,
        'If set, only backprop variables whose names partially match '
        'this regexp (re.search).')
    tp.Define(
        'bprop_variable_exclusion', None,
        'If set, do not backprop variables whose names partially match '
        'this regexp (re.search).')
    tp.Define(
        'grad_aggregation_method', tf.AggregationMethod.EXPERIMENTAL_TREE,
        'Specifies the method used to combine gradient terms. Accepted '
        'values are constants defined in the class AggregationMethod.')
    tp.Define(
        'gate_gradients', False,
        'If True, add a tuple around the gradients returned for an '
        'operations. This avoids some race conditions.')
    tp.Define('colocate_gradients_with_ops', True,
              'If True, try colocating gradients with the corresponding op.')
    # LINT.ThenChange(learner.py)
    p.Define('eval', hyperparams.Params(),
             'Params to control how this task should be evaled.')
    ep = p.eval
    ep.Define(
        'samples_per_summary', 1000,
        'If > 0, generates one summary after this many samples, at most. '
        'If == 0 or the dataset has fewer examples, evaluate the whole set.')
    ep.Define(
        'decoder_samples_per_summary', 0,
        'If > 0, each decoder summary will contain at most this many samples. '
        'If == 0, defaults to `samples_per_summary` for '
        'backwards compatibility.')
    ep.Define(
        'load_checkpoint_from', None,
        'If not None, specifies a location for the checkpoint that '
        'should be used for eval. One example format is a '
        'checkpoint directory of a training run.')
    ep.Define('start_eval_after', 0,
              'Start evaluation after specified number of steps.')
    ep.Define('start_decoder_after', 0,
              'Only decode checkpoints after this step.')
    return p
示例#19
0
    def testToText(self):
        outer = _params.Params()
        outer.Define('foo', 1, '')
        inner = _params.Params()
        inner.Define('bar', 2.71, '')
        inner.Define('baz', 'hello', '')
        outer.Define('inner', inner, '')
        outer.Define('tau', False, '')
        outer.Define('dtype', tf.float32, '')
        outer.Define('dtype2', tf.int32, '')
        outer.Define('seqlen', [10, inner, 30], '')
        outer.Define('tuple', (1, None), '')
        outer.Define('list_of_params', [inner.Copy()], '')
        outer.Define('class', TestClass1, '')
        outer.Define('plain_dict', {'a': 10}, '')
        outer.Define('complex_dict', {'a': 10, 'b': inner}, '')
        outer.Define('complex_dict_escape', {'a': 'abc"\'\ndef'}, '')
        outer.Define('some_class', complex(0, 1), '')
        outer.Define('optional_bool', None, '')

        self.assertEqual(
            '\n' + outer.ToText(), r"""
class : type/__main__/TestClass1
complex_dict : {'a': 10, 'b': {'bar': 2.71, 'baz': 'hello'}}
complex_dict_escape : {'a': 'abc"\'\ndef'}
dtype : float32
dtype2 : int32
foo : 1
inner.bar : 2.71
inner.baz : 'hello'
list_of_params[0].bar : 2.71
list_of_params[0].baz : 'hello'
optional_bool : NoneType
plain_dict : {'a': 10}
seqlen : [10, {'bar': 2.71, 'baz': 'hello'}, 30]
some_class : complex
tau : False
tuple : (1, 'NoneType')
""")

        outer.FromText("""
        dtype2 : float32
        inner.baz : 'world'
        # foo : 123
        optional_bool : true
        list_of_params[0].bar : 2.72
        seqlen : [1, 2.0, '3', [4]]
        plain_dict : {'x': 0.3}
        class : type/__main__/TestClass2
        tau : true
        tuple : (2, 3)
        """)

        # Note that the 'hello' has turned into 'world'!
        self.assertEqual(
            '\n' + outer.ToText(), r"""
class : type/__main__/TestClass2
complex_dict : {'a': 10, 'b': {'bar': 2.71, 'baz': 'world'}}
complex_dict_escape : {'a': 'abc"\'\ndef'}
dtype : float32
dtype2 : float32
foo : 1
inner.bar : 2.71
inner.baz : 'world'
list_of_params[0].bar : 2.72
list_of_params[0].baz : 'hello'
optional_bool : True
plain_dict : {'x': 0.3}
seqlen : [1, 2.0, '3', [4]]
some_class : complex
tau : True
tuple : (2, 3)
""")
示例#20
0
 def Params(cls):
     p = super(QuantizableLayer, cls).Params()
     p.Define('qdomain', hyperparams.Params(),
              'Container for quantization domains.')
     p.qdomain.Define('default', None, 'Default quantization domain.')
     return p
示例#21
0
 def Test(cls):
     """Returns Params for the testing dataset."""
     return hyperparams.Params()
示例#22
0
  def testToText(self):
    outer = hyperparams.Params()
    outer.Define('foo', 1, '')
    inner = hyperparams.Params()
    inner.Define('bar', 2.71, '')
    inner.Define('baz', 'hello', '')
    outer.Define('inner', inner, '')
    outer.Define('tau', False, '')
    outer.Define('dtype', tf.float32, '')
    outer.Define('dtype2', tf.int32, '')
    outer.Define('seqlen', [10, inner, 30], '')
    outer.Define('tuple', (1, None), '')
    outer.Define('list_of_params', [inner.Copy()], '')
    outer.Define('class', TestClass1, '')
    outer.Define('plain_dict', {'a': 10}, '')
    outer.Define('complex_dict', {'a': 10, 'b': inner}, '')
    outer.Define('complex_dict_escape', {'a': 'abc"\'\ndef'}, '')
    outer.Define('some_class', complex(0, 1), '')
    outer.Define('optional_bool', None, '')
    outer.Define('enum', TestEnum.B, '')
    outer.Define('dataclass', TestDataClass(a=[42], b=tf.float32), '')
    outer.Define('namedtuple', TestNamedTuple([42], tf.float32), '')
    outer.Define('namedtuple2', tf.io.FixedLenSequenceFeature([42], tf.float32),
                 '')
    # Arbitrarily use HyperparameterValue as some example proto.
    outer.Define('proto', hyperparams_pb2.HyperparamValue(int_val=42), '')

    self.assertEqual(
        '\n' + outer.ToText(), r"""
class : type/__main__/TestClass1
complex_dict : {'a': 10, 'b': {'bar': 2.71, 'baz': 'hello'}}
complex_dict_escape : {'a': 'abc"\'\ndef'}
dataclass : {'a': [42], 'b': 'float32'}
dtype : float32
dtype2 : int32
enum : TestEnum.B
foo : 1
inner.bar : 2.71
inner.baz : 'hello'
list_of_params[0].bar : 2.71
list_of_params[0].baz : 'hello'
namedtuple : {'a': [42], 'b': 'float32'}
namedtuple2 : {'allow_missing': False, 'default_value': 'NoneType', 'dtype': 'float32', 'shape': [42]}
optional_bool : NoneType
plain_dict : {'a': 10}
proto : proto/lingvo.core.hyperparams_pb2/HyperparamValue/int_val: 42
seqlen : [10, {'bar': 2.71, 'baz': 'hello'}, 30]
some_class : complex
tau : False
tuple : (1, 'NoneType')
""")

    outer.FromText("""
        dataclass : {'a': 27, 'b': 'int32'}
        dtype2 : float32
        inner.baz : 'world'
        # foo : 123
        optional_bool : true
        list_of_params[0].bar : 2.72
        seqlen : [1, 2.0, '3', [4]]
        plain_dict : {'x': 0.3}
        class : type/__main__/TestClass2
        tau : true
        tuple : (2, 3)
        enum : TestEnum.A
        # Note dtypes and other non-POD are represented as strings.
        namedtuple : {'a': 27, 'b': 'int32'}
        namedtuple2 : {'allow_missing': True, 'default_value': 'NoneType', 'dtype': 'int32', 'shape': [43]}
        proto : proto/lingvo.core.hyperparams_pb2/HyperparamValue/string_val: "a/b"
        """)

    # Note that the 'hello' has turned into 'world'!
    self.assertEqual(
        '\n' + outer.ToText(), r"""
class : type/__main__/TestClass2
complex_dict : {'a': 10, 'b': {'bar': 2.71, 'baz': 'world'}}
complex_dict_escape : {'a': 'abc"\'\ndef'}
dataclass : {'a': 27, 'b': 'int32'}
dtype : float32
dtype2 : float32
enum : TestEnum.A
foo : 1
inner.bar : 2.71
inner.baz : 'world'
list_of_params[0].bar : 2.72
list_of_params[0].baz : 'hello'
namedtuple : {'a': 27, 'b': 'int32'}
namedtuple2 : {'allow_missing': True, 'default_value': 'NoneType', 'dtype': 'int32', 'shape': [43]}
optional_bool : True
plain_dict : {'x': 0.3}
proto : proto/lingvo.core.hyperparams_pb2/HyperparamValue/string_val: "a/b"
seqlen : [1, 2.0, '3', [4]]
some_class : complex
tau : True
tuple : (2, 3)
""")
    self.assertEqual(outer.dataclass.b, tf.int32)
    self.assertEqual(outer.namedtuple.b, tf.int32)
    self.assertEqual(outer.namedtuple2.dtype, tf.int32)
    self.assertIsNone(outer.namedtuple2.default_value, tf.int32)
示例#23
0
 def Train(cls):
     """Returns Params for the training dataset."""
     return hyperparams.Params()
示例#24
0
 def Params(cls):
     extractors = hyperparams.Params()
     extractors.Define('e1', E1WithCheck.Params(), '')
     extractors.Define('e2', E2WithCheck.Params(), '')
     return super().Params(extractors).Set(
         preprocessors=hyperparams.Params(), preprocessors_order=[])
示例#25
0
  def Params(cls):
    p = super().Params()
    p.Define('input_dim', None, 'Input and (in fact,) output dimension.')
    p.Define('kernel_size', None, 'Kernel size of 1d deptwise conv.')
    p.Define('conv_activation', 'SWISH', 'Activation after normalization.')
    p.Define(
        'is_causal', False, 'Whether this is a causal layer.'
        'If set to true, use '
        'conv_layers_with_time_padding.CausalDepthwiseConv2DLayer for '
        '`depthwise_conv_tpl`.')
    p.Define(
        'glu_activation', 'NONE',
        'Activation in GLU. Check lingvo.core.activations._ACTIVATIONS for '
        'other options.')
    p.Define('dropout_prob', 0., 'Dropout probability.')

    p.Define('ln_tpl', layers.LayerNorm.Params(), 'Input layer norm template.')
    p.Define('linear_start_tpl', layers.FCLayer.Params(), 'Linear start layer.')
    p.Define(
        'depthwise_conv_tpl',
        conv_layers_with_time_padding.DepthwiseConv2DLayer.Params(),
        'Depthwise conv template. For causal layer, use '
        'conv_layers_with_time_padding.CausalDepthwiseConv2DLayer.')
    p.Define('conv_norm_layer_tpl', bn_layers.BatchNormLayer.Params(),
             'Normalization layer after conv.')
    p.Define('linear_end_tpl', layers.FCLayer.Params(), 'Linear end layer.')
    p.Define('dropout_tpl', layers.DropoutLayer.Params(),
             'Residual dropout layer.')
    p.Define(
        'split_act_gated_linear_start', False,
        'Separate act and gated linear start to remove data formatting '
        'overheads')
    p.linear_start_tpl.Set(activation='NONE', has_bias=True)
    p.linear_end_tpl.Set(activation='NONE', has_bias=True)
    # SPMD partition related params.
    #
    # d - model_dim
    # f - ff_hidden_dim (here ff_hidden_dim has the same size as model_dim)
    # h - height
    # w - width
    # i - in_channels
    # m - channel_multiplier
    # b - batch_size
    # l - seq_len
    p.weight_split_dims_mapping = hparams_lib.Params()
    wp = p.weight_split_dims_mapping
    wp.Define(
        'df', None,
        'Mesh split for lconv linear start weight with the shape of '
        '[model_dim, ff_hidden_dim], the default hidden_dim is the same as '
        'the model_dim.')
    wp.Define(
        'hwim', None,
        'Mesh split for lconv depthwise conv weight with the shape of '
        '[height, width, in_channels, channel_multiplier]. Width and '
        'channel_multiplier are both 1 for the common use case.')
    wp.Define(
        'fd', None, 'Mesh split for lconv linear end weight with the shape of '
        '[ff_hidden_dim, model_dim], the default hidden_dim is the same as '
        'the model_dim.')
    p.activation_split_dims_mapping = hparams_lib.Params()
    ap = p.activation_split_dims_mapping
    ap.Define(
        'blf', None, 'Mesh split for lconv linear start activation and lconv '
        'depthwise conv after normalization with the shape of '
        '[batch_size, seq_len, ff_hidden_dim], the default hidden_dim is the '
        'same as model_dim.')
    ap.Define(
        'bld', None,
        'Mesh split for lconv linear end activation with the shape of '
        '[batch_size, seq_len, model_dim].')
    return p