Пример #1
0
 def feature_encoders(self, data_dir):
     # This vocab file must be present within the data directory.
     vocab_filename = os.path.join(data_dir, "charset_size134.txt")
     return {
         "inputs": text_encoder.TextEncoder(),
         "targets": text_encoder.SubwordTextEncoder(vocab_filename)
     }
Пример #2
0
 def feature_encoders(self, data_dir):
   del data_dir
   return {
       "inputs": dna_encoder.DNAEncoder(chunk_size=self.chunk_size),
       # TODO(rsepassi): RealEncoder?
       "targets": text_encoder.TextEncoder()
   }
Пример #3
0
 def feature_encoders(self, data_dir):
     vocab_filename = os.path.join(data_dir, self.vocab_file)
     encoder = text_encoder.SubwordTextEncoder(vocab_filename)
     return {
         "inputs": encoder,
         "targets": text_encoder.TextEncoder(),
     }
Пример #4
0
def audio_wsj_tokens(model_hparams, wrong_vocab_size):
    """English audio transcription benchmark.

  Args:
    model_hparams: a tf.contrib.training.HParams
    wrong_vocab_size: a number used in the filename indicating the approximate
      vocabulary size.  This is not to be confused with the actual vocabulary
      size.
  Returns:
    a tf.contrib.training.HParams
  """
    p = default_problem_hparams()
    # This vocab file must be present within the data directory.
    vocab_filename = os.path.join(model_hparams.data_dir,
                                  "vocab.endefr.%d" % wrong_vocab_size)
    subtokenizer = text_encoder.SubwordTextEncoder(vocab_filename)
    p.input_modality = {
        "inputs": (registry.Modalities.AUDIO, None),
    }
    p.target_modality = (registry.Modalities.SYMBOL, subtokenizer.vocab_size)
    p.vocabulary = {
        "inputs": text_encoder.TextEncoder(),
        "targets": subtokenizer,
    }
    p.batch_size_multiplier = 512
    p.loss_multiplier = 2.0
    p.input_space_id = 12
    p.target_space_id = 3
    return p
Пример #5
0
 def feature_encoders(self, data_dir):
     vocab_filename = os.path.join(
         data_dir, "vocab.endefr.%d" % self.target_vocab_size)
     subtokenizer = text_encoder.SubwordTextEncoder(vocab_filename)
     return {
         "inputs": text_encoder.TextEncoder(),
         "targets": subtokenizer,
     }
Пример #6
0
def image_mscoco_characters(unused_model_hparams):
    """COCO image captioning with captions as characters."""
    p = default_problem_hparams()
    p.input_modality = {"inputs": (registry.Modalities.IMAGE, None)}
    p.target_modality = (registry.Modalities.SYMBOL, 256)
    p.vocabulary = {
        "inputs": text_encoder.TextEncoder(),
        "targets": text_encoder.ByteTextEncoder(),
    }
    p.batch_size_multiplier = 128
    p.max_expected_batch_size_per_shard = 2
    p.loss_multiplier = 2.0
    p.input_space_id = 1
    p.target_space_id = 2
    return p
Пример #7
0
def image_mscoco_tokens(model_hparams, vocab_count):
    """COCO image captioning with captions as tokens."""
    p = default_problem_hparams()
    p.input_modality = {"inputs": (registry.Modalities.IMAGE, None)}
    # This vocab file must be present within the data directory.
    vocab_filename = os.path.join(model_hparams.data_dir,
                                  "vocab.endefr.%d" % vocab_count)
    subtokenizer = text_encoder.SubwordTextEncoder(vocab_filename)
    p.target_modality = (registry.Modalities.SYMBOL, subtokenizer.vocab_size)
    p.vocabulary = {
        "inputs": text_encoder.TextEncoder(),
        "targets": subtokenizer,
    }
    p.batch_size_multiplier = 256
    p.max_expected_batch_size_per_shard = 2
Пример #8
0
def image_mscoco_characters(model_hparams):
  """COCO image captioning with captions as characters."""
  p = default_problem_hparams()
  p.input_modality = {"inputs": modality.ImageModality(model_hparams)}
  p.target_modality = modality.SymbolModality(model_hparams, 256)
  p.vocabulary = {
      "inputs": text_encoder.TextEncoder(),
      "targets": text_encoder.ByteTextEncoder(),
  }
  p.batch_size_multiplier = 128
  p.max_expected_batch_size_per_shard = 2
  p.loss_multiplier = 2.0
  p.input_space_id = 1
  p.target_space_id = 2
  return p
Пример #9
0
def audio_wsj_characters(model_hparams):
  """English audio transcription benchmark."""
  p = default_problem_hparams()
  p.input_modality = {
      "inputs": modality.AudioSpectralModality(model_hparams),
  }
  p.target_modality = modality.SymbolModality(model_hparams, 256)
  p.vocabulary = {
      "inputs": text_encoder.TextEncoder(),
      "targets": text_encoder.ByteTextEncoder(),
  }
  p.batch_size_multiplier = 512
  p.loss_multiplier = 2.0
  p.input_space_id = 13
  p.target_space_id = 2
  return p
Пример #10
0
def audio_timit_characters(unused_model_hparams):
    """English audio transcription benchmark."""
    p = default_problem_hparams()
    p.input_modality = {
        "inputs": (registry.Modalities.AUDIO, None),
    }
    p.target_modality = (registry.Modalities.SYMBOL, 256)
    p.vocabulary = {
        "inputs": text_encoder.TextEncoder(),
        "targets": text_encoder.ByteTextEncoder(),
    }
    p.batch_size_multiplier = 256
    p.loss_multiplier = 2.0
    p.input_space_id = 12
    p.target_space_id = 2
    return p
Пример #11
0
def image_mscoco_tokens(model_hparams, vocab_count):
  """COCO image captioning with captions as tokens."""
  p = default_problem_hparams()
  p.input_modality = {"inputs": modality.ImageModality(model_hparams)}
  # This vocab file must be present within the data directory.
  vocab_filename = os.path.join(model_hparams.data_dir,
                                "tokens.vocab.%d" % vocab_count)
  subtokenizer = text_encoder.SubwordTextEncoder(vocab_filename)
  p.target_modality = modality.SymbolModality(model_hparams,
                                              subtokenizer.vocab_size)
  p.vocabulary = {
      "inputs": text_encoder.TextEncoder(),
      "targets": subtokenizer,
  }
  p.batch_size_multiplier = 256
  p.max_expected_batch_size_per_shard = 2
  p.input_space_id = 1
  p.target_space_id = 3
  return p
Пример #12
0
 def hparams(self, defaults, model_hparams):
     p = defaults
     p.input_modality = {"inputs": (registry.Modalities.IMAGE, None)}
     # This vocab file must be present within the data directory.
     vocab_filename = os.path.join(model_hparams.data_dir,
                                   "charset_size134.txt")
     subtokenizer = text_encoder.SubwordTextEncoder(vocab_filename)
     p.target_modality = (registry.Modalities.SYMBOL,
                          subtokenizer.vocab_size)
     p.vocabulary = {
         "inputs": text_encoder.TextEncoder(),
         "targets": subtokenizer,
     }
     p.batch_size_multiplier = 256
     p.max_expected_batch_size_per_shard = 2
     vocab_size = 144
     p.input_modality = {"inputs": (registry.Modalities.SYMBOL, vocab_size)}
     p.target_modality = (registry.Modalities.SYMBOL, vocab_size)
     p.input_space_id = problem.SpaceID.DIGIT_0
     p.target_space_id = problem.SpaceID.DIGIT_1
Пример #13
0
def default_problem_hparams():
    """A set of basic model hyperparameters."""
    return tf.contrib.training.HParams(
        # Use this parameter to get comparable perplexity numbers with different
        # tokenizations.  This value should be set to the ratio of the number of
        # tokens in the test set according to the tokeization used to the number
        # of tokens in the test set in the "official" tokenization.  For example,
        # if we are using a word-piece based model and we want to compute
        # per-word perplexity, then we set loss_multiplier to the number of
        # wordpieces per word in the test set.
        loss_multiplier=1.0,

        # Use this parameter to allow for larger sequences in the batch. Without
        # the use of this parameter, the size of the inner two dimensions will be
        # used to judge the sequence length.
        batch_size_multiplier=1,

        # To make queues of the right capacity, it's good to know the maximal
        # expected batch size, as it can vary a lot. It only affects performance
        # of input readers and memory use. The defaults should be safe and fast,
        # but decrease if your reader uses a lot of memory and increase if slow.
        max_expected_batch_size_per_shard=64,

        # Modalities used to map from input features to a space compatible with
        # chosen model architecture.  One modality spec (which is a 2-tuple,
        # (modality_full_name, vocab_size)) per feature key. modality_full_name is
        # a string type:name, e.g. class_label:2d. Leaving off the name uses the
        # default modality for that type (e.g. class_label ==
        # class_label:default).
        input_modality={},

        # Modality used to map from hidden representation to the target space.
        # Specified as a modality spec, a 2-tuple described above.
        target_modality=None,

        # Identifiers used to tell the model which input/target space will be
        # expected. For example, it can tell that we expect French as characters
        # as output, or Spanish as sound. An integer with the following semantics:
        #   0: Generic / unknown output space (default)
        #   1: Image labels
        #   2: English characters
        #   3: English tokens
        #   4: English bpe tokens
        #   5: French characters
        #   6: French tokens
        #   7: German characters
        #   8: German tokens
        #   9: German bpe tokens
        #   10: Digit cipher lexicon 0
        #   11: Digit cipher lexicon 1
        #   12: Audio waveform domain
        #   13: Audio spectral domain
        #   14: Parse characters
        #   15: Parse tokens
        #   16: Chinese tokens
        #   17: Icelandic characters
        #   18: Icelandic tokens
        #   19: Icelandic parse tokens
        #   20: Macedonian tokens
        #   21: Czech tokens
        #   22: Czech characters
        # Add more above if needed.
        input_space_id=0,
        target_space_id=0,

        # Vocabulary per feature key.
        #   a vocabulary converts to/from human-readable strings.
        # E.g. {"inputs": text_encoder.ByteTextEncoder(),
        #       "targets": text_encoder.SubwordTextEncoder("vocab_filename.txt")}
        vocabulary={
            "inputs": text_encoder.TextEncoder(),
            "targets": text_encoder.TextEncoder()
        },

        # This is a marker to keep track if the problem was reversed or copied.
        # Only set automatically, do not override the default.
        #
        # These tags can be combined in order to perform copies of the input or
        # the targets. For instance `problem_copy` will copy the inputs, but
        # `problem_rev_copy` will copy the targets.
        was_reversed=False,
        was_copy=False,
    )
Пример #14
0
 def feature_encoders(self, _):
     return {
         "inputs": text_encoder.TextEncoder(),
         "targets": text_encoder.ByteTextEncoder(),
     }
Пример #15
0
 def feature_encoders(self, data_dir):
     del data_dir
     return {
         "inputs": text_encoder.TextEncoder(),
         "targets": text_encoder.ClassLabelEncoder(self.class_labels)
     }
Пример #16
0
 def feature_encoders(self, _):
     return {
         "inputs": text_encoder.TextEncoder(),
         "targets": LibrispeechTextEncoder(),
     }
Пример #17
0
 def feature_encoders(self, data_dir):
   del data_dir
   return {
       "inputs": text_encoder.TextEncoder(),
       "targets": text_encoder.TextEncoder()
   }
Пример #18
0
 def feature_encoders(self, data_dir):
     encoder = text_encoder.TextEncoder()
     return {
         "inputs": encoder,
         "targets": text_encoder.ClassLabelEncoder(self.class_labels())
     }