def hparams(self, defaults, model_hparams): hp = defaults hp.input_modality = { "inputs": modalities.SymbolModality(model_hparams, 2) } hp.target_modality = modalities.SymbolModality( model_hparams, 3)
def action_modalities(hparams): hparams.problem_hparams.modality = { "inputs": modalities.VideoModalityL2Raw(hparams, 256), "input_action": modalities.SymbolModality(hparams, 5), "targets": modalities.VideoModalityL2Raw(hparams, 256), "target_action": modalities.SymbolModality(hparams, 5), } return hparams
def full_modalities(hparams): """Full modalities with actions and rewards.""" hparams.problem_hparams.modality = { "inputs": modalities.VideoModalityL2Raw(hparams, 256), "input_reward": modalities.SymbolModality(hparams, 3), "input_action": modalities.SymbolModality(hparams, 5), "targets": modalities.VideoModalityL2Raw(hparams, 256), "target_reward": modalities.SymbolModality(hparams, 3), "target_action": modalities.SymbolModality(hparams, 5), } hparams.force_full_predict = True return hparams
def testSymbolModalityTargetsFactored(self): batch_size = 10 num_datashards = 5 length = 6 height = 7 hidden_size = 9 vocab_size = 11 model_hparams = common_hparams.basic_params1() model_hparams.factored_logits = True model_hparams.hidden_size = hidden_size model_hparams.mode = tf.estimator.ModeKeys.TRAIN body_output = -1 + np.random.random_integers( 100, size=(batch_size, length, height, hidden_size)) targets = -1 + np.random.random_integers( vocab_size, size=(batch_size, length, height, 1)) m = modalities.SymbolModality(model_hparams, vocab_size) data_parallelism = expert_utils.Parallelism( ["/device:CPU:0"] * num_datashards) with self.test_session() as session: sharded_body_output = tf.split(tf.to_float(body_output), num_datashards) sharded_targets = tf.split(targets, num_datashards) sharded_logits = m.top_sharded(sharded_body_output, sharded_targets, data_parallelism) train_loss = m.loss_sharded(sharded_logits, sharded_targets, data_parallelism) logits = tf.concat(sharded_logits, 0) session.run(tf.global_variables_initializer()) res1, res2 = session.run((logits, train_loss)) self.assertEqual(res1.shape, (batch_size, length, height, 1, vocab_size)) self.assertEqual(res2.shape, ())
def testSymbolModalityInputs(self): batch_size = 10 num_datashards = 5 length = 5 vocab_size = 5000 hidden_size = 9 model_hparams = tf.contrib.training.HParams( symbol_modality_num_shards=4, hidden_size=hidden_size, multiply_embedding_mode="sqrt_depth", symbol_modality_skip_top=0, shared_embedding_and_softmax_weights=0, prepend_mode="none", use_eager_mode=False, use_tpu=False) x = -1 + np.random.random_integers(vocab_size, size=(batch_size, length, 1, 1)) m = modalities.SymbolModality(model_hparams, vocab_size) data_parallelism = expert_utils.Parallelism(["/device:CPU:0"] * num_datashards, reuse=True) with self.test_session() as session: xs = tf.split(x, num_datashards) sharded_output = m.bottom_sharded(xs, data_parallelism) output = tf.concat(sharded_output, 0) session.run(tf.global_variables_initializer()) res = session.run(output) self.assertEqual(res.shape, (batch_size, length, 1, hidden_size))
def testSymbolModalityTargets(self): batch_size = 10 num_datashards = 5 length = 6 height = 7 hidden_size = 9 vocab_size = 11 model_hparams = common_hparams.basic_params1() model_hparams.hidden_size = hidden_size model_hparams.mode = tf.estimator.ModeKeys.TRAIN body_output = np.random.randint( 100, size=(batch_size, length, height, hidden_size)) targets = np.random.randint( vocab_size, size=(batch_size, length, height, 1)) m = modalities.SymbolModality(model_hparams, vocab_size) data_parallelism = expert_utils.Parallelism( ["/device:CPU:0"] * num_datashards) sharded_body_output = tf.split(tf.to_float(body_output), num_datashards) sharded_targets = tf.split(targets, num_datashards) sharded_logits = data_parallelism(m.top, sharded_body_output, sharded_targets) sharded_loss_num, sharded_loss_den = data_parallelism(m.loss, sharded_logits, sharded_targets) train_loss = (tf.add_n(sharded_loss_num) / tf.maximum(1.0, tf.add_n(sharded_loss_den))) logits = tf.concat(sharded_logits, 0) self.evaluate(tf.global_variables_initializer()) res1, res2 = self.evaluate((logits, train_loss)) self.assertEqual(res1.shape, (batch_size, length, height, 1, vocab_size)) self.assertEqual(res2.shape, ())
def testSymbolModalityTargets(self): batch_size = 10 num_datashards = 5 length = 6 height = 7 hidden_size = 9 vocab_size = 11 model_hparams = tf.contrib.training.HParams( symbol_modality_num_shards=4, hidden_size=hidden_size, label_smoothing=0.2, shared_embedding_and_softmax_weights=0) body_output = -1 + np.random.random_integers( 100, size=(batch_size, length, height, hidden_size)) targets = -1 + np.random.random_integers( vocab_size, size=(batch_size, length, height, 1)) m = modalities.SymbolModality(model_hparams, vocab_size) data_parallelism = expert_utils.Parallelism(["/device:CPU:0"] * num_datashards, reuse=True) with self.test_session() as session: sharded_body_output = tf.split(tf.to_float(body_output), num_datashards) sharded_targets = tf.split(targets, num_datashards) sharded_logits = m.top_sharded(sharded_body_output, sharded_targets, data_parallelism) train_loss = m.loss_sharded(sharded_logits, sharded_targets, data_parallelism) logits = tf.concat(sharded_logits, 0) session.run(tf.global_variables_initializer()) res1, res2 = session.run((logits, train_loss)) self.assertEqual(res1.shape, (batch_size, length, height, 1, vocab_size)) self.assertEqual(res2.shape, ())
def get_hparams(self, model_hparams=None): if self._hparams is not None: return self._hparams self._hparams = self.task_list[0].get_hparams(model_hparams) # increase the vocab size in order to account for task ids vocab_size_inc = len(self.task_list) vocab_size_inc += self.get_max_num_classes() vocab_size = self._hparams.vocabulary["targets"].vocab_size tf.logging.info("Old vocabulary size: %d" % vocab_size) tf.logging.info("New vocabulary size: %d" % (vocab_size + vocab_size_inc)) self._hparams.target_modality = modalities.SymbolModality( model_hparams, vocab_size + vocab_size_inc) return self._hparams
def get_hparams(self, model_hparams=None): if self._hparams is not None: return self._hparams self._hparams = self.task_list[0].get_hparams(model_hparams) # Increase the vocab size to account for task ids and modify the modality. vocab_size_inc = len(self.task_list) vocab_size_inc += self.get_max_num_classes() vocab_size = self._hparams.vocabulary["targets"].vocab_size new_vocab_size = vocab_size + vocab_size_inc if model_hparams.multiproblem_vocab_size > new_vocab_size: new_vocab_size = model_hparams.multiproblem_vocab_size tf.logging.info("Old vocabulary size: %d" % vocab_size) self.update_task_ids(vocab_size) tf.logging.info("New vocabulary size: %d" % new_vocab_size) self._hparams.vocab_size["targets"] = new_vocab_size self._hparams.modality["targets"] = modalities.SymbolModality( model_hparams, self._hparams.vocab_size["targets"]) return self._hparams
def testSymbolModalityInputs(self): batch_size = 10 num_datashards = 5 length = 5 vocab_size = 5000 hidden_size = 9 model_hparams = common_hparams.basic_params1() model_hparams.hidden_size = hidden_size model_hparams.mode = tf.estimator.ModeKeys.TRAIN x = np.random.randint(vocab_size, size=(batch_size, length, 1, 1)) m = modalities.SymbolModality(model_hparams, vocab_size) data_parallelism = expert_utils.Parallelism(["/device:CPU:0"] * num_datashards) xs = tf.split(x, num_datashards) sharded_output = data_parallelism(m.bottom, xs) output = tf.concat(sharded_output, 0) self.evaluate(tf.global_variables_initializer()) res = self.evaluate(output) self.assertEqual(res.shape, (batch_size, length, 1, hidden_size))
def testVqaAttentionBaseline(self): batch_size = 3 image_size = 448 vocab_size = 100 num_classes = 10 question_length = 5 answer_length = 10 x = 2 * np.random.rand(batch_size, image_size, image_size, 3) - 1 q = np.random.random_integers(1, high=vocab_size - 1, size=(batch_size, question_length, 1, 1)) a = np.random.random_integers(0, high=num_classes, size=(batch_size, answer_length, 1, 1)) hparams = vqa_attention.vqa_attention_base() p_hparams = problem_hparams.test_problem_hparams( vocab_size, vocab_size, hparams) p_hparams.modality["inputs"] = modalities.ImageModality(hparams) p_hparams.modality["question"] = modalities.SymbolModality( hparams, vocab_size) p_hparams.modality["targets"] = modalities.MultiLabelModality( hparams, num_classes + 1) with self.test_session() as session: features = { "inputs": tf.constant(x, dtype=tf.float32), "question": tf.constant(q, dtype=tf.int32), "targets": tf.constant(a, dtype=tf.int32), } model = vqa_attention.VqaAttentionBaseline( hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) logits, losses = model(features) session.run(tf.global_variables_initializer()) logits_, losses_ = session.run([logits, losses]) self.assertEqual(logits_.shape, (batch_size, 1, 1, 1, num_classes + 1)) self.assertEqual(losses_["training"].shape, ())