def testCreateOutputTrainMode(self, likelihood, num_mixtures, depth): batch = 1 height = 8 width = 8 channels = 3 rows = height if likelihood == common_image_attention.DistributionType.CAT: cols = channels * width else: cols = width hparams = hparam.HParams( hidden_size=2, likelihood=likelihood, num_channels=channels, mode=tf_estimator.ModeKeys.TRAIN, num_mixtures=num_mixtures, ) decoder_output = tf.random_normal( [batch, rows, cols, hparams.hidden_size]) targets = tf.random_uniform([batch, height, width, channels], minval=-1., maxval=1.) output = common_image_attention.create_output(decoder_output, rows, cols, targets, hparams) if hparams.likelihood == common_image_attention.DistributionType.CAT: self.assertEqual(output.shape, (batch, height, width, channels, depth)) else: self.assertEqual(output.shape, (batch, height, width, depth))
def body(self, features): hparams = copy.copy(self._hparams) inputs = features["inputs"] targets = features["targets"] targets_shape = common_layers.shape_list(targets) if not (tf.get_variable_scope().reuse or hparams.mode == tf.estimator.ModeKeys.PREDICT): tf.summary.image("targets", targets, max_outputs=1) decoder_input, rows, cols = cia.prepare_decoder(targets, hparams) # Add class label to decoder input. if not hparams.unconditional: decoder_input += tf.reshape( inputs, [targets_shape[0], 1, 1, hparams.hidden_size]) decoder_output = cia.transformer_decoder_layers( decoder_input, None, hparams.num_decoder_layers, hparams, attention_type=hparams.dec_attention_type, name="decoder") output = cia.create_output(decoder_output, rows, cols, targets, hparams) return output
def body(self, features): hparams = copy.copy(self._hparams) targets = features["targets"] inputs = features["inputs"] if not (tf.get_variable_scope().reuse or hparams.mode == tf.estimator.ModeKeys.PREDICT): tf.summary.image("inputs", inputs, max_outputs=1) tf.summary.image("targets", targets, max_outputs=1) encoder_input = cia.prepare_encoder(inputs, hparams) encoder_output = cia.transformer_encoder_layers( encoder_input, hparams.num_encoder_layers, hparams, attention_type=hparams.enc_attention_type, name="encoder") decoder_input, rows, cols = cia.prepare_decoder(targets, hparams) decoder_output = cia.transformer_decoder_layers( decoder_input, encoder_output, hparams.num_decoder_layers, hparams, attention_type=hparams.dec_attention_type, name="decoder") output = cia.create_output(decoder_output, rows, cols, targets, hparams) return output
def body(self, features): hparams = copy.copy(self._hparams) targets = features["targets"] inputs = features["inputs"] if not (tf.get_variable_scope().reuse or hparams.mode == tf.contrib.learn.ModeKeys.INFER): tf.summary.image("inputs", inputs, max_outputs=1) tf.summary.image("targets", targets, max_outputs=1) encoder_input = cia.prepare_encoder(inputs, hparams) encoder_output = cia.transformer_encoder_layers( encoder_input, hparams.num_encoder_layers, hparams, attention_type=hparams.enc_attention_type, name="encoder") decoder_input, rows, cols = cia.prepare_decoder( targets, hparams) decoder_output = cia.transformer_decoder_layers( decoder_input, encoder_output, hparams.num_decoder_layers, hparams, attention_type=hparams.dec_attention_type, name="decoder") output = cia.create_output(decoder_output, rows, cols, targets, hparams) return output
def body(self, features): hparams = copy.copy(self._hparams) inputs = features["inputs"] targets = features["targets"] if not (tf.get_variable_scope().reuse or hparams.mode == tf.contrib.learn.ModeKeys.INFER): tf.summary.image("targets", tf.to_float(targets), max_outputs=1) # Extra losses list if we want to use moe. losses = [] # Prepare decoder inputs and bias. decoder_input, rows, cols = cia.prepare_decoder(targets, hparams) # Add class label to decoder input. if not hparams.unconditional: decoder_input += tf.reshape(inputs, [ common_layers.shape_list(targets)[0], 1, 1, hparams.hidden_size ]) decoder_output = cia.transformer_decoder_layers( decoder_input, None, hparams.num_decoder_layers or hparams.num_hidden_layers, hparams, attention_type=hparams.dec_attention_type, losses=losses, name="decoder") output = cia.create_output(decoder_output, rows, cols, targets, hparams) if losses: return output, {"extra_loss": tf.add_n(losses)} else: return output
def body(self, features): hparams = copy.copy(self._hparams) inputs = features["inputs"] targets = features["targets"] targets_shape = common_layers.shape_list(targets) if not (tf.get_variable_scope().reuse or hparams.mode == tf.contrib.learn.ModeKeys.INFER): tf.summary.image("targets", targets, max_outputs=1) decoder_input, rows, cols = cia.prepare_decoder( targets, hparams) # Add class label to decoder input. if not hparams.unconditional: decoder_input += tf.reshape(inputs, [targets_shape[0], 1, 1, hparams.hidden_size]) decoder_output = cia.transformer_decoder_layers( decoder_input, None, hparams.num_decoder_layers, hparams, attention_type=hparams.dec_attention_type, name="decoder") output = cia.create_output(decoder_output, rows, cols, targets, hparams) return output
def generator(self, inputs, targets): """From tensor2tensor.models.img2img_transformer_2d.""" hparams = copy.copy(self._hparams) encoder_input = cia.prepare_encoder(inputs, hparams) encoder_output = cia.transformer_encoder_layers( encoder_input, hparams.num_encoder_layers, hparams, attention_type=hparams.enc_attention_type, name="encoder") decoder_input, rows, cols = cia.prepare_decoder( targets, hparams) decoder_output = cia.transformer_decoder_layers( decoder_input, encoder_output, hparams.num_decoder_layers, hparams, attention_type=hparams.dec_attention_type, name="decoder") output = cia.create_output(decoder_output, rows, cols, targets, hparams) return output
def body(self, features): hparams = copy.copy(self._hparams) targets = features["targets"] if (hparams.likelihood == cia.DistributionType.DMOL and hparams.num_channels != 1): raise ValueError("When using DMOL for the likelihood, bottom function " " must be identity and num_channels must be 1.") if (not tf.get_variable_scope().reuse and hparams.mode != tf.estimator.ModeKeys.PREDICT): tf.summary.image("targets", tf.to_float(targets), max_outputs=1) # Extra losses list if we want to use moe. losses = [] # Prepare decoder inputs and bias. decoder_input, rows, cols = cia.prepare_decoder(targets, hparams) # Add class label to decoder input. if not hparams.unconditional: inputs = features["inputs"] decoder_input += tf.reshape( inputs, [common_layers.shape_list(targets)[0], 1, 1, hparams.hidden_size]) decoder_output = cia.transformer_decoder_layers( decoder_input, None, hparams.num_decoder_layers or hparams.num_hidden_layers, hparams, attention_type=hparams.dec_attention_type, losses=losses, name="decoder") output = cia.create_output(decoder_output, rows, cols, targets, hparams) if losses: return output, {"extra_loss": tf.add_n(losses)} else: return output
def testCreateOutputTrainMode(self, likelihood, num_mixtures, depth): batch = 1 height = 8 width = 8 channels = 3 rows = height if likelihood == common_image_attention.DistributionType.CAT: cols = channels * width else: cols = width hparams = tf.contrib.training.HParams( hidden_size=2, likelihood=likelihood, mode=tf.estimator.ModeKeys.TRAIN, num_mixtures=num_mixtures, ) decoder_output = tf.random_normal([batch, rows, cols, hparams.hidden_size]) targets = tf.random_uniform([batch, height, width, channels], minval=-1., maxval=1.) output = common_image_attention.create_output( decoder_output, rows, cols, targets, hparams) if hparams.likelihood == common_image_attention.DistributionType.CAT: self.assertEqual(output.shape, (batch, height, width, channels, depth)) else: self.assertEqual(output.shape, (batch, height, width, depth))
def body(self, features): hparams = copy.copy(self._hparams) targets = features["targets"] if (hparams.likelihood == cia.DistributionType.DMOL and (hparams.modality["targets"] != modalities.ImageChannelBottomIdentityModality or hparams.num_channels != 1)): raise ValueError("When using DMOL for the likelihood,modality['targets'] " "must be ImageChannelBottomIdentityModality and " "num_channels must be 1.") if (not tf.get_variable_scope().reuse and hparams.mode != tf.contrib.learn.ModeKeys.INFER and hparams.modality["targets"] != modalities.ImageChannelBottomIdentityModality): tf.summary.image("targets", tf.to_float(targets), max_outputs=1) # Extra losses list if we want to use moe. losses = [] # Prepare decoder inputs and bias. decoder_input, rows, cols = cia.prepare_decoder(targets, hparams) # Add class label to decoder input. if not hparams.unconditional: inputs = features["inputs"] decoder_input += tf.reshape( inputs, [common_layers.shape_list(targets)[0], 1, 1, hparams.hidden_size]) decoder_output = cia.transformer_decoder_layers( decoder_input, None, hparams.num_decoder_layers or hparams.num_hidden_layers, hparams, attention_type=hparams.dec_attention_type, losses=losses, name="decoder") output = cia.create_output(decoder_output, rows, cols, targets, hparams) if losses: return output, {"extra_loss": tf.add_n(losses)} else: return output