def __init__(self, inputs, sequence_length, embedding, sampling_probability, time_major=False, seed=None, scheduling_seed=None, name=None): """Initializer. Args: inputs: A (structure of) input tensors. sequence_length: An int32 vector tensor. embedding: A callable or the `params` argument for `embedding_lookup`. If a callable, it can take a vector tensor of token `ids`, or take two arguments (`ids`, `times`), where `ids` is a vector tensor of token ids, and `times` is a vector tensor of current time steps (i.e., position ids). The latter case can be used when attr:`embedding` is a combination of word embedding and position embedding. sampling_probability: A 0D `float32` tensor: the probability of sampling categorically from the output ids instead of reading directly from the inputs. time_major: Python bool. Whether the tensors in `inputs` are time major. If `False` (default), they are assumed to be batch major. seed: The sampling seed. scheduling_seed: The schedule decision rule sampling seed. name: Name scope for any created operations. Raises: ValueError: if `sampling_probability` is not a scalar or vector. """ with ops.name_scope(name, "ScheduledEmbeddingSamplingWrapper", [embedding, sampling_probability]): if callable(embedding): self._embedding_fn = embedding else: self._embedding_fn = ( lambda ids: embedding_ops.embedding_lookup(embedding, ids)) self._embedding_args_cnt = len(get_args(self._embedding_fn)) if self._embedding_args_cnt != 1 and self._embedding_args_cnt != 2: raise ValueError('`embedding` should expect 1 or 2 arguments.') self._sampling_probability = ops.convert_to_tensor( sampling_probability, name="sampling_probability") if self._sampling_probability.get_shape().ndims not in (0, 1): raise ValueError( "sampling_probability must be either a scalar or a vector. " "saw shape: %s" % (self._sampling_probability.get_shape())) self._seed = seed self._scheduling_seed = scheduling_seed super(ScheduledEmbeddingTrainingHelper, self).__init__(inputs=inputs, sequence_length=sequence_length, time_major=time_major, name=name)
def get_gradient_clip_fn(hparams=None): """Creates a gradient clipping function based on the hyperparameters. See the :attr:`gradient_clip` field in :meth:`~texar.tf.core.default_optimization_hparams` for all hyperparameters and default values. The gradient clipping function takes a list of `(gradients, variables)` tuples and returns a list of `(clipped_gradients, variables)` tuples. Typical examples include :tf_main:`tf.clip_by_global_norm <clip_by_global_norm>`, :tf_main:`tf.clip_by_value <clip_by_value>`, :tf_main:`tf.clip_by_norm <clip_by_norm>`, :tf_main:`tf.clip_by_average_norm <clip_by_average_norm>`, etc. Args: hparams (dict or HParams, optional): hyperparameters. Missing hyperparameters are set to default values automatically. Returns: function or `None`: If hparams["type"] is specified, returns the respective function. If hparams["type"] is empty, returns `None`. """ if hparams is None or isinstance(hparams, dict): hparams = HParams( hparams, default_optimization_hparams()["gradient_clip"]) fn_type = hparams["type"] if fn_type is None or fn_type == "": return None fn_modules = ["tensorflow", "texar.tf.custom"] clip_fn = utils.get_function(fn_type, fn_modules) clip_fn_args = utils.get_args(clip_fn) fn_kwargs = hparams["kwargs"] if isinstance(fn_kwargs, HParams): fn_kwargs = fn_kwargs.todict() def grad_clip_fn(grads_and_vars): """Gradient clipping function. Args: grads_and_vars (list): A list of `(gradients, variables)` tuples. Returns: list: A list of `(clipped_gradients, variables)` tuples. """ grads, vars_ = zip(*grads_and_vars) if clip_fn == tf.clip_by_global_norm: clipped_grads, _ = clip_fn(t_list=grads, **fn_kwargs) elif 't_list' in clip_fn_args: clipped_grads = clip_fn(t_list=grads, **fn_kwargs) elif 't' in clip_fn_args: # e.g., tf.clip_by_value clipped_grads = [clip_fn(t=grad, **fn_kwargs) for grad in grads] return list(zip(clipped_grads, vars_)) return grad_clip_fn
def __init__(self, embedding, start_tokens, end_token, tau, embedding_size=None, stop_gradient=False, use_finish=True): if callable(embedding): self._embedding_fn = embedding if embedding_size is None: raise ValueError('`embedding_size` must be provided if ' '`embedding` is a callable.') self._embedding_size = tf.convert_to_tensor(embedding_size, dtype=tf.int32, name="embedding_size") else: self._embedding_fn = ( lambda soft_ids: soft_embedding_lookup(embedding, soft_ids)) self._embedding_size = tf.shape(embedding)[0] self._start_tokens = tf.convert_to_tensor(start_tokens, dtype=tf.int32, name="start_tokens") self._end_token = tf.convert_to_tensor(end_token, dtype=tf.int32, name="end_token") if self._start_tokens.get_shape().ndims != 1: raise ValueError("start_tokens must be a vector") self._batch_size = array_ops.size(start_tokens) if self._end_token.get_shape().ndims != 0: raise ValueError("end_token must be a scalar") soft_start_tokens = tf.one_hot(self._start_tokens, self._embedding_size, dtype=tf.float32) self._embedding_args_cnt = len(utils.get_args(self._embedding_fn)) if self._embedding_args_cnt == 1: self._start_inputs = self._embedding_fn(soft_ids=soft_start_tokens) elif self._embedding_args_cnt == 2: # Position index is 0 in the beginning times = tf.zeros([self._batch_size], dtype=tf.int32) self._start_inputs = self._embedding_fn(soft_ids=soft_start_tokens, times=times) else: raise ValueError('`embedding` should expect 1 or 2 arguments.') self._batch_size = tf.size(self._start_tokens) self._tau = tau self._stop_gradient = stop_gradient self._use_finish = use_finish
def __init__(self, embedding, start_tokens, end_token): """Initializer. Args: embedding: A callable or the `params` argument for `embedding_lookup`. If a callable, it can take a vector tensor of `ids` (argmax ids), or take two arguments (`ids`, `times`), where `ids` is a vector tensor of argmax ids, and `times` is a vector tensor of current time steps (i.e., position ids). The latter case can be used when attr:`embedding` is a combination of word embedding and position embedding. The returned tensor will be returned by :meth:`next_inputs`. start_tokens: `int32` vector shaped `[batch_size]`, the start tokens. end_token: `int32` scalar, the token that marks end of decoding. Raises: ValueError: if `start_tokens` is not a 1D tensor or `end_token` is not a scalar. """ if callable(embedding): self._embedding_fn = embedding else: self._embedding_fn = ( lambda ids: embedding_ops.embedding_lookup(embedding, ids)) self._start_tokens = ops.convert_to_tensor(start_tokens, dtype=dtypes.int32, name="start_tokens") self._end_token = ops.convert_to_tensor(end_token, dtype=dtypes.int32, name="end_token") if self._start_tokens.get_shape().ndims != 1: raise ValueError("start_tokens must be a vector") self._batch_size = shape_list(start_tokens)[0] if self._end_token.get_shape().ndims != 0: raise ValueError("end_token must be a scalar") self._embedding_args_cnt = len(get_args(self._embedding_fn)) if self._embedding_args_cnt == 1: self._start_inputs = self._embedding_fn(self._start_tokens) elif self._embedding_args_cnt == 2: # Position index is 0 in the beginning times = tf.zeros([self._batch_size], dtype=tf.int32) self._start_inputs = self._embedding_fn(self._start_tokens, times) else: raise ValueError('`embedding` should expect 1 or 2 arguments.')
def _get_opt(learning_rate=None): opt_kwargs = hparams["kwargs"].todict() fn_args = set(utils.get_args(opt_class.__init__)) if 'learning_rate' in fn_args and learning_rate is not None: opt_kwargs["learning_rate"] = learning_rate return opt_class(**opt_kwargs)