Пример #1
0
 def hparams(self, defaults, model_hparams):
     hp = defaults
     hp.input_modality = {
         "inputs": modalities.SymbolModality(model_hparams, 2)
     }
     hp.target_modality = modalities.SymbolModality(
         model_hparams, 3)
Пример #2
0
def action_modalities(hparams):
    hparams.problem_hparams.modality = {
        "inputs": modalities.VideoModalityL2Raw(hparams, 256),
        "input_action": modalities.SymbolModality(hparams, 5),
        "targets": modalities.VideoModalityL2Raw(hparams, 256),
        "target_action": modalities.SymbolModality(hparams, 5),
    }
    return hparams
Пример #3
0
def full_modalities(hparams):
    """Full modalities with actions and rewards."""
    hparams.problem_hparams.modality = {
        "inputs": modalities.VideoModalityL2Raw(hparams, 256),
        "input_reward": modalities.SymbolModality(hparams, 3),
        "input_action": modalities.SymbolModality(hparams, 5),
        "targets": modalities.VideoModalityL2Raw(hparams, 256),
        "target_reward": modalities.SymbolModality(hparams, 3),
        "target_action": modalities.SymbolModality(hparams, 5),
    }
    hparams.force_full_predict = True
    return hparams
Пример #4
0
 def testSymbolModalityTargetsFactored(self):
   batch_size = 10
   num_datashards = 5
   length = 6
   height = 7
   hidden_size = 9
   vocab_size = 11
   model_hparams = common_hparams.basic_params1()
   model_hparams.factored_logits = True
   model_hparams.hidden_size = hidden_size
   model_hparams.mode = tf.estimator.ModeKeys.TRAIN
   body_output = -1 + np.random.random_integers(
       100, size=(batch_size, length, height, hidden_size))
   targets = -1 + np.random.random_integers(
       vocab_size, size=(batch_size, length, height, 1))
   m = modalities.SymbolModality(model_hparams, vocab_size)
   data_parallelism = expert_utils.Parallelism(
       ["/device:CPU:0"] * num_datashards)
   with self.test_session() as session:
     sharded_body_output = tf.split(tf.to_float(body_output), num_datashards)
     sharded_targets = tf.split(targets, num_datashards)
     sharded_logits = m.top_sharded(sharded_body_output, sharded_targets,
                                    data_parallelism)
     train_loss = m.loss_sharded(sharded_logits, sharded_targets,
                                 data_parallelism)
     logits = tf.concat(sharded_logits, 0)
     session.run(tf.global_variables_initializer())
     res1, res2 = session.run((logits, train_loss))
   self.assertEqual(res1.shape, (batch_size, length, height, 1, vocab_size))
   self.assertEqual(res2.shape, ())
Пример #5
0
 def testSymbolModalityInputs(self):
     batch_size = 10
     num_datashards = 5
     length = 5
     vocab_size = 5000
     hidden_size = 9
     model_hparams = tf.contrib.training.HParams(
         symbol_modality_num_shards=4,
         hidden_size=hidden_size,
         multiply_embedding_mode="sqrt_depth",
         symbol_modality_skip_top=0,
         shared_embedding_and_softmax_weights=0,
         prepend_mode="none",
         use_eager_mode=False,
         use_tpu=False)
     x = -1 + np.random.random_integers(vocab_size,
                                        size=(batch_size, length, 1, 1))
     m = modalities.SymbolModality(model_hparams, vocab_size)
     data_parallelism = expert_utils.Parallelism(["/device:CPU:0"] *
                                                 num_datashards,
                                                 reuse=True)
     with self.test_session() as session:
         xs = tf.split(x, num_datashards)
         sharded_output = m.bottom_sharded(xs, data_parallelism)
         output = tf.concat(sharded_output, 0)
         session.run(tf.global_variables_initializer())
         res = session.run(output)
     self.assertEqual(res.shape, (batch_size, length, 1, hidden_size))
Пример #6
0
 def testSymbolModalityTargets(self):
   batch_size = 10
   num_datashards = 5
   length = 6
   height = 7
   hidden_size = 9
   vocab_size = 11
   model_hparams = common_hparams.basic_params1()
   model_hparams.hidden_size = hidden_size
   model_hparams.mode = tf.estimator.ModeKeys.TRAIN
   body_output = np.random.randint(
       100, size=(batch_size, length, height, hidden_size))
   targets = np.random.randint(
       vocab_size, size=(batch_size, length, height, 1))
   m = modalities.SymbolModality(model_hparams, vocab_size)
   data_parallelism = expert_utils.Parallelism(
       ["/device:CPU:0"] * num_datashards)
   sharded_body_output = tf.split(tf.to_float(body_output), num_datashards)
   sharded_targets = tf.split(targets, num_datashards)
   sharded_logits = data_parallelism(m.top,
                                     sharded_body_output,
                                     sharded_targets)
   sharded_loss_num, sharded_loss_den = data_parallelism(m.loss,
                                                         sharded_logits,
                                                         sharded_targets)
   train_loss = (tf.add_n(sharded_loss_num) /
                 tf.maximum(1.0, tf.add_n(sharded_loss_den)))
   logits = tf.concat(sharded_logits, 0)
   self.evaluate(tf.global_variables_initializer())
   res1, res2 = self.evaluate((logits, train_loss))
   self.assertEqual(res1.shape, (batch_size, length, height, 1, vocab_size))
   self.assertEqual(res2.shape, ())
Пример #7
0
 def testSymbolModalityTargets(self):
     batch_size = 10
     num_datashards = 5
     length = 6
     height = 7
     hidden_size = 9
     vocab_size = 11
     model_hparams = tf.contrib.training.HParams(
         symbol_modality_num_shards=4,
         hidden_size=hidden_size,
         label_smoothing=0.2,
         shared_embedding_and_softmax_weights=0)
     body_output = -1 + np.random.random_integers(
         100, size=(batch_size, length, height, hidden_size))
     targets = -1 + np.random.random_integers(
         vocab_size, size=(batch_size, length, height, 1))
     m = modalities.SymbolModality(model_hparams, vocab_size)
     data_parallelism = expert_utils.Parallelism(["/device:CPU:0"] *
                                                 num_datashards,
                                                 reuse=True)
     with self.test_session() as session:
         sharded_body_output = tf.split(tf.to_float(body_output),
                                        num_datashards)
         sharded_targets = tf.split(targets, num_datashards)
         sharded_logits = m.top_sharded(sharded_body_output,
                                        sharded_targets, data_parallelism)
         train_loss = m.loss_sharded(sharded_logits, sharded_targets,
                                     data_parallelism)
         logits = tf.concat(sharded_logits, 0)
         session.run(tf.global_variables_initializer())
         res1, res2 = session.run((logits, train_loss))
     self.assertEqual(res1.shape,
                      (batch_size, length, height, 1, vocab_size))
     self.assertEqual(res2.shape, ())
Пример #8
0
    def get_hparams(self, model_hparams=None):
        if self._hparams is not None:
            return self._hparams

        self._hparams = self.task_list[0].get_hparams(model_hparams)
        # increase the vocab size in order to account for task ids
        vocab_size_inc = len(self.task_list)
        vocab_size_inc += self.get_max_num_classes()
        vocab_size = self._hparams.vocabulary["targets"].vocab_size
        tf.logging.info("Old vocabulary size: %d" % vocab_size)
        tf.logging.info("New vocabulary size: %d" %
                        (vocab_size + vocab_size_inc))
        self._hparams.target_modality = modalities.SymbolModality(
            model_hparams, vocab_size + vocab_size_inc)

        return self._hparams
Пример #9
0
 def get_hparams(self, model_hparams=None):
     if self._hparams is not None:
         return self._hparams
     self._hparams = self.task_list[0].get_hparams(model_hparams)
     # Increase the vocab size to account for task ids and modify the modality.
     vocab_size_inc = len(self.task_list)
     vocab_size_inc += self.get_max_num_classes()
     vocab_size = self._hparams.vocabulary["targets"].vocab_size
     new_vocab_size = vocab_size + vocab_size_inc
     if model_hparams.multiproblem_vocab_size > new_vocab_size:
         new_vocab_size = model_hparams.multiproblem_vocab_size
     tf.logging.info("Old vocabulary size: %d" % vocab_size)
     self.update_task_ids(vocab_size)
     tf.logging.info("New vocabulary size: %d" % new_vocab_size)
     self._hparams.vocab_size["targets"] = new_vocab_size
     self._hparams.modality["targets"] = modalities.SymbolModality(
         model_hparams, self._hparams.vocab_size["targets"])
     return self._hparams
Пример #10
0
 def testSymbolModalityInputs(self):
     batch_size = 10
     num_datashards = 5
     length = 5
     vocab_size = 5000
     hidden_size = 9
     model_hparams = common_hparams.basic_params1()
     model_hparams.hidden_size = hidden_size
     model_hparams.mode = tf.estimator.ModeKeys.TRAIN
     x = np.random.randint(vocab_size, size=(batch_size, length, 1, 1))
     m = modalities.SymbolModality(model_hparams, vocab_size)
     data_parallelism = expert_utils.Parallelism(["/device:CPU:0"] *
                                                 num_datashards)
     xs = tf.split(x, num_datashards)
     sharded_output = data_parallelism(m.bottom, xs)
     output = tf.concat(sharded_output, 0)
     self.evaluate(tf.global_variables_initializer())
     res = self.evaluate(output)
     self.assertEqual(res.shape, (batch_size, length, 1, hidden_size))
    def testVqaAttentionBaseline(self):

        batch_size = 3
        image_size = 448
        vocab_size = 100
        num_classes = 10
        question_length = 5
        answer_length = 10
        x = 2 * np.random.rand(batch_size, image_size, image_size, 3) - 1
        q = np.random.random_integers(1,
                                      high=vocab_size - 1,
                                      size=(batch_size, question_length, 1, 1))
        a = np.random.random_integers(0,
                                      high=num_classes,
                                      size=(batch_size, answer_length, 1, 1))
        hparams = vqa_attention.vqa_attention_base()
        p_hparams = problem_hparams.test_problem_hparams(
            vocab_size, vocab_size, hparams)
        p_hparams.modality["inputs"] = modalities.ImageModality(hparams)
        p_hparams.modality["question"] = modalities.SymbolModality(
            hparams, vocab_size)
        p_hparams.modality["targets"] = modalities.MultiLabelModality(
            hparams, num_classes + 1)
        with self.test_session() as session:
            features = {
                "inputs": tf.constant(x, dtype=tf.float32),
                "question": tf.constant(q, dtype=tf.int32),
                "targets": tf.constant(a, dtype=tf.int32),
            }
            model = vqa_attention.VqaAttentionBaseline(
                hparams, tf.estimator.ModeKeys.TRAIN, p_hparams)
            logits, losses = model(features)
            session.run(tf.global_variables_initializer())
            logits_, losses_ = session.run([logits, losses])

        self.assertEqual(logits_.shape, (batch_size, 1, 1, 1, num_classes + 1))
        self.assertEqual(losses_["training"].shape, ())