Пример #1
0
    def __init__(self,
                 hparams,
                 mode,
                 problem_hparams=None,
                 problem_idx=0,
                 data_parallelism=None,
                 ps_devices=None,
                 decode_hparams=None):
        """Create a T2TModel.

    Args:
      hparams: a hyperparameters object.
      mode: The execution mode, as defined in tf.estimator.ModeKeys.
      problem_hparams: a hyperparameters object.
      problem_idx: an integer.
      data_parallelism: a expert_utils.parallelism
        (specifies devices for data parallelism).
      ps_devices: a list of devices to be used for experts
      decode_hparams: a hyperparameter object with decoding parameters.

    Returns:
      a T2TModel
    """
        # Determine name first: use registered name if possible, class name else.
        default_name = registry.default_name(type(self))
        name = self.REGISTERED_NAME or default_name
        super(T2TModel,
              self).__init__(trainable=mode == tf.estimator.ModeKeys.TRAIN,
                             name=name)
        if data_parallelism is None:
            data_parallelism = eu.Parallelism([""])
        if ps_devices is None:
            ps_devices = [""]
        if problem_hparams is None:
            problem_hparams = hparams.problems[0]

        # If vocabularies differ, unset shared_embedding_and_softmax_weights.
        hparams = copy.copy(hparams)
        if hparams.shared_embedding_and_softmax_weights:
            same_vocab_sizes = True
            for problem in hparams.problems:
                if "inputs" in problem.input_modality:
                    if problem.input_modality[
                            "inputs"] != problem.target_modality:
                        same_vocab_sizes = False
            if not same_vocab_sizes:
                tf.logging.info(
                    "Unsetting shared_embedding_and_softmax_weights.")
                hparams.shared_embedding_and_softmax_weights = 0
        self._original_hparams = hparams
        self.set_mode(mode)
        self._decode_hparams = copy.copy(decode_hparams)
        self._data_parallelism = data_parallelism
        self._num_datashards = data_parallelism.n
        self._ps_devices = ps_devices
        self._problem_hparams = problem_hparams
        self._problem_idx = problem_idx
        self._create_modalities(problem_hparams, self._hparams)
        self._var_store = create_eager_var_store()
Пример #2
0
    def __init__(self,
                 hparams,
                 mode=tf.estimator.ModeKeys.TRAIN,
                 problem_hparams=None,
                 data_parallelism=None,
                 decode_hparams=None):
        """Create a T2TModel.

    Args:
      hparams: tf.contrib.training.HParams, model hyperparameters.
      mode: tf.estimator.ModeKeys, the execution mode.
      problem_hparams: tf.contrib.training.HParams, hyperparameters for the
        Problem. If provided here or in hparams.problems, the model will
        automatically determine bottom, top, and loss methods. If not provided,
        calling the model will only invoke body.
      data_parallelism: a expert_utils.Parallelism object,
        specifies devices for data parallelism.
      decode_hparams: a hyperparameter object with decoding parameters.
        See decoding.decode_hparams.

    Returns:
      a T2TModel
    """
        # Determine name first: use registered name if possible, class name else.
        default_name = registry.default_name(type(self))
        name = self.REGISTERED_NAME or default_name
        super(T2TModel,
              self).__init__(trainable=mode == tf.estimator.ModeKeys.TRAIN,
                             name=name)

        if not problem_hparams and hasattr(hparams, "problems"):
            problem_hparams = hparams.problems[0]
        self._problem_hparams = problem_hparams

        # Setup hparams
        # If vocabularies differ, unset shared_embedding_and_softmax_weights.
        hparams = copy.copy(hparams)
        if self._problem_hparams and hparams.shared_embedding_and_softmax_weights:
            same_vocab_sizes = True
            if "inputs" in self._problem_hparams.input_modality:
                if (self._problem_hparams.input_modality["inputs"] !=
                        self._problem_hparams.target_modality):
                    same_vocab_sizes = False
            if not same_vocab_sizes:
                tf.logging.info(
                    "Unsetting shared_embedding_and_softmax_weights.")
                hparams.shared_embedding_and_softmax_weights = 0
        self._original_hparams = hparams
        self.set_mode(mode)

        self._decode_hparams = copy.copy(decode_hparams
                                         or decoding.decode_hparams())
        self._data_parallelism = data_parallelism or eu.Parallelism([""])
        self._num_datashards = self._data_parallelism.n
        self._ps_devices = self._data_parallelism.ps_devices
        self._eager_var_store = create_eager_var_store()
        if self._problem_hparams:
            self._create_modalities(self._problem_hparams, self._hparams)
Пример #3
0
  def __init__(self,
               hparams,
               mode=tf.estimator.ModeKeys.TRAIN,
               problem_hparams=None,
               data_parallelism=None,
               decode_hparams=None):
    """Create a T2TModel.

    Args:
      hparams: tf.contrib.training.HParams, model hyperparameters.
      mode: tf.estimator.ModeKeys, the execution mode.
      problem_hparams: tf.contrib.training.HParams, hyperparameters for the
        Problem. If provided here or in hparams.problems, the model will
        automatically determine bottom, top, and loss methods. If not provided,
        calling the model will only invoke body.
      data_parallelism: a expert_utils.Parallelism object,
        specifies devices for data parallelism.
      decode_hparams: a hyperparameter object with decoding parameters.
        See decoding.decode_hparams.

    Returns:
      a T2TModel
    """
    # Determine name first: use registered name if possible, class name else.
    default_name = registry.default_name(type(self))
    name = self.REGISTERED_NAME or default_name
    super(T2TModel, self).__init__(
        trainable=mode == tf.estimator.ModeKeys.TRAIN, name=name)

    if not problem_hparams and hasattr(hparams, "problems"):
      problem_hparams = hparams.problems[0]
    self._problem_hparams = problem_hparams

    # Setup hparams
    # If vocabularies differ, unset shared_embedding_and_softmax_weights.
    hparams = copy.copy(hparams)
    if self._problem_hparams and hparams.shared_embedding_and_softmax_weights:
      same_vocab_sizes = True
      if "inputs" in self._problem_hparams.input_modality:
        if (self._problem_hparams.input_modality["inputs"] !=
            self._problem_hparams.target_modality):
          same_vocab_sizes = False
      if not same_vocab_sizes:
        log_info("Unsetting shared_embedding_and_softmax_weights.")
        hparams.shared_embedding_and_softmax_weights = 0
    self._original_hparams = hparams
    self.set_mode(mode)

    self._decode_hparams = copy.copy(decode_hparams or
                                     decoding.decode_hparams())
    self._data_parallelism = data_parallelism or eu.Parallelism([""])
    self._num_datashards = self._data_parallelism.n
    self._ps_devices = self._data_parallelism.ps_devices
    self._eager_var_store = create_eager_var_store()
    if self._problem_hparams:
      self._create_modalities(self._problem_hparams, self._hparams)
Пример #4
0
  def __init__(self,
               hparams,
               mode,
               problem_hparams=None,
               problem_idx=0,
               data_parallelism=None,
               ps_devices=None,
               decode_hparams=None):
    """Create a T2TModel.

    Args:
      hparams: a hyperparameters object.
      mode: The execution mode, as defined in tf.estimator.ModeKeys.
      problem_hparams: a hyperparameters object.
      problem_idx: an integer.
      data_parallelism: a expert_utils.parallelism
        (specifies devices for data parallelism).
      ps_devices: a list of devices to be used for experts
      decode_hparams: a hyperparameter object with decoding parameters.

    Returns:
      a T2TModel
    """
    # Determine name first: use registered name if possible, class name else.
    default_name = registry.default_name(type(self))
    name = self.REGISTERED_NAME or default_name
    super(T2TModel, self).__init__(
        trainable=mode == tf.estimator.ModeKeys.TRAIN, name=name)
    if data_parallelism is None:
      data_parallelism = eu.Parallelism([""])
    if ps_devices is None:
      ps_devices = [""]
    if problem_hparams is None:
      problem_hparams = hparams.problems[0]

    # If vocabularies differ, unset shared_embedding_and_softmax_weights.
    hparams = copy.copy(hparams)
    if hparams.shared_embedding_and_softmax_weights:
      same_vocab_sizes = True
      for problem in hparams.problems:
        if "inputs" in problem.input_modality:
          if problem.input_modality["inputs"] != problem.target_modality:
            same_vocab_sizes = False
      if not same_vocab_sizes:
        tf.logging.info("Unsetting shared_embedding_and_softmax_weights.")
        hparams.shared_embedding_and_softmax_weights = 0
    self._original_hparams = hparams
    self.set_mode(mode)
    self._decode_hparams = copy.copy(decode_hparams)
    self._data_parallelism = data_parallelism
    self._num_datashards = data_parallelism.n
    self._ps_devices = ps_devices
    self._problem_hparams = problem_hparams
    self._problem_idx = problem_idx
    self._create_modalities(problem_hparams, self._hparams)
    self._var_store = create_eager_var_store()
Пример #5
0
    def __init__(self,
                 hparams,
                 mode,
                 problem_hparams=None,
                 problem_idx=0,
                 data_parallelism=None,
                 decode_hparams=None):
        default_name = registry.default_name(type(self))
        name = "transformer"
        base.Layer.__init__(self,
                            trainable=mode == tf.estimator.ModeKeys.TRAIN,
                            name=name)
        if data_parallelism is None:
            data_parallelism = eu.Parallelism([""])
        if problem_hparams is None:
            problem_hparams = hparams.problems[0]

        # If vocabularies differ, unset shared_embedding_and_softmax_weights.
        hparams = copy.copy(hparams)
        if hparams.shared_embedding_and_softmax_weights:
            same_vocab_sizes = True
            for problem in hparams.problems:
                if "inputs" in problem.input_modality:
                    if problem.input_modality[
                            "inputs"] != problem.target_modality:
                        same_vocab_sizes = False
            if not same_vocab_sizes:
                tf.logging.info(
                    "Unsetting shared_embedding_and_softmax_weights.")
                hparams.shared_embedding_and_softmax_weights = 0
        self._original_hparams = hparams
        self.set_mode(mode)
        self._decode_hparams = copy.copy(decode_hparams)
        self._data_parallelism = data_parallelism
        self._num_datashards = data_parallelism.n
        self._ps_devices = data_parallelism.ps_devices
        self._problem_hparams = problem_hparams
        self._problem_idx = problem_idx
        self._create_modalities(problem_hparams, self._hparams)
        self._var_store = t2t_model.create_eager_var_store()
        self.attention_weights = dict()  # For vizualizing attention heads.
 def name(self):
     t2t_registry.default_name(self.__class__)