def testCreateOutputTrainMode(self, likelihood, num_mixtures, depth):
     batch = 1
     height = 8
     width = 8
     channels = 3
     rows = height
     if likelihood == common_image_attention.DistributionType.CAT:
         cols = channels * width
     else:
         cols = width
     hparams = hparam.HParams(
         hidden_size=2,
         likelihood=likelihood,
         num_channels=channels,
         mode=tf_estimator.ModeKeys.TRAIN,
         num_mixtures=num_mixtures,
     )
     decoder_output = tf.random_normal(
         [batch, rows, cols, hparams.hidden_size])
     targets = tf.random_uniform([batch, height, width, channels],
                                 minval=-1.,
                                 maxval=1.)
     output = common_image_attention.create_output(decoder_output, rows,
                                                   cols, targets, hparams)
     if hparams.likelihood == common_image_attention.DistributionType.CAT:
         self.assertEqual(output.shape,
                          (batch, height, width, channels, depth))
     else:
         self.assertEqual(output.shape, (batch, height, width, depth))
    def body(self, features):
        hparams = copy.copy(self._hparams)
        inputs = features["inputs"]
        targets = features["targets"]
        targets_shape = common_layers.shape_list(targets)
        if not (tf.get_variable_scope().reuse
                or hparams.mode == tf.estimator.ModeKeys.PREDICT):
            tf.summary.image("targets", targets, max_outputs=1)

        decoder_input, rows, cols = cia.prepare_decoder(targets, hparams)
        # Add class label to decoder input.
        if not hparams.unconditional:
            decoder_input += tf.reshape(
                inputs, [targets_shape[0], 1, 1, hparams.hidden_size])

        decoder_output = cia.transformer_decoder_layers(
            decoder_input,
            None,
            hparams.num_decoder_layers,
            hparams,
            attention_type=hparams.dec_attention_type,
            name="decoder")

        output = cia.create_output(decoder_output, rows, cols, targets,
                                   hparams)
        return output
    def body(self, features):
        hparams = copy.copy(self._hparams)
        targets = features["targets"]
        inputs = features["inputs"]
        if not (tf.get_variable_scope().reuse
                or hparams.mode == tf.estimator.ModeKeys.PREDICT):
            tf.summary.image("inputs", inputs, max_outputs=1)
            tf.summary.image("targets", targets, max_outputs=1)

        encoder_input = cia.prepare_encoder(inputs, hparams)
        encoder_output = cia.transformer_encoder_layers(
            encoder_input,
            hparams.num_encoder_layers,
            hparams,
            attention_type=hparams.enc_attention_type,
            name="encoder")
        decoder_input, rows, cols = cia.prepare_decoder(targets, hparams)
        decoder_output = cia.transformer_decoder_layers(
            decoder_input,
            encoder_output,
            hparams.num_decoder_layers,
            hparams,
            attention_type=hparams.dec_attention_type,
            name="decoder")
        output = cia.create_output(decoder_output, rows, cols, targets,
                                   hparams)
        return output
Пример #4
0
  def body(self, features):
    hparams = copy.copy(self._hparams)
    targets = features["targets"]
    inputs = features["inputs"]
    if not (tf.get_variable_scope().reuse or
            hparams.mode == tf.contrib.learn.ModeKeys.INFER):
      tf.summary.image("inputs", inputs, max_outputs=1)
      tf.summary.image("targets", targets, max_outputs=1)

    encoder_input = cia.prepare_encoder(inputs, hparams)
    encoder_output = cia.transformer_encoder_layers(
        encoder_input,
        hparams.num_encoder_layers,
        hparams,
        attention_type=hparams.enc_attention_type,
        name="encoder")
    decoder_input, rows, cols = cia.prepare_decoder(
        targets, hparams)
    decoder_output = cia.transformer_decoder_layers(
        decoder_input,
        encoder_output,
        hparams.num_decoder_layers,
        hparams,
        attention_type=hparams.dec_attention_type,
        name="decoder")
    output = cia.create_output(decoder_output, rows, cols, targets, hparams)
    return output
Пример #5
0
    def body(self, features):
        hparams = copy.copy(self._hparams)
        inputs = features["inputs"]
        targets = features["targets"]
        if not (tf.get_variable_scope().reuse
                or hparams.mode == tf.contrib.learn.ModeKeys.INFER):
            tf.summary.image("targets", tf.to_float(targets), max_outputs=1)

        # Extra losses list if we want to use moe.
        losses = []
        # Prepare decoder inputs and bias.
        decoder_input, rows, cols = cia.prepare_decoder(targets, hparams)
        # Add class label to decoder input.
        if not hparams.unconditional:
            decoder_input += tf.reshape(inputs, [
                common_layers.shape_list(targets)[0], 1, 1, hparams.hidden_size
            ])
        decoder_output = cia.transformer_decoder_layers(
            decoder_input,
            None,
            hparams.num_decoder_layers or hparams.num_hidden_layers,
            hparams,
            attention_type=hparams.dec_attention_type,
            losses=losses,
            name="decoder")
        output = cia.create_output(decoder_output, rows, cols, targets,
                                   hparams)

        if losses:
            return output, {"extra_loss": tf.add_n(losses)}
        else:
            return output
Пример #6
0
  def body(self, features):
    hparams = copy.copy(self._hparams)
    inputs = features["inputs"]
    targets = features["targets"]
    targets_shape = common_layers.shape_list(targets)
    if not (tf.get_variable_scope().reuse or
            hparams.mode == tf.contrib.learn.ModeKeys.INFER):
      tf.summary.image("targets", targets, max_outputs=1)

    decoder_input, rows, cols = cia.prepare_decoder(
        targets, hparams)
    # Add class label to decoder input.
    if not hparams.unconditional:
      decoder_input += tf.reshape(inputs,
                                  [targets_shape[0], 1, 1, hparams.hidden_size])

    decoder_output = cia.transformer_decoder_layers(
        decoder_input, None,
        hparams.num_decoder_layers,
        hparams,
        attention_type=hparams.dec_attention_type,
        name="decoder")

    output = cia.create_output(decoder_output, rows, cols, targets, hparams)
    return output
Пример #7
0
  def generator(self, inputs, targets):
    """From tensor2tensor.models.img2img_transformer_2d."""

    hparams = copy.copy(self._hparams)

    encoder_input = cia.prepare_encoder(inputs, hparams)

    encoder_output = cia.transformer_encoder_layers(
        encoder_input,
        hparams.num_encoder_layers,
        hparams,
        attention_type=hparams.enc_attention_type,
        name="encoder")

    decoder_input, rows, cols = cia.prepare_decoder(
        targets, hparams)

    decoder_output = cia.transformer_decoder_layers(
        decoder_input,
        encoder_output,
        hparams.num_decoder_layers,
        hparams,
        attention_type=hparams.dec_attention_type,
        name="decoder")

    output = cia.create_output(decoder_output, rows, cols, targets, hparams)

    return output
Пример #8
0
  def body(self, features):
    hparams = copy.copy(self._hparams)
    targets = features["targets"]
    if (hparams.likelihood == cia.DistributionType.DMOL and
        hparams.num_channels != 1):
      raise ValueError("When using DMOL for the likelihood, bottom function "
                       " must be identity and num_channels must be 1.")
    if (not tf.get_variable_scope().reuse and
        hparams.mode != tf.estimator.ModeKeys.PREDICT):
      tf.summary.image("targets", tf.to_float(targets), max_outputs=1)

    # Extra losses list if we want to use moe.
    losses = []
    # Prepare decoder inputs and bias.
    decoder_input, rows, cols = cia.prepare_decoder(targets, hparams)
    # Add class label to decoder input.
    if not hparams.unconditional:
      inputs = features["inputs"]
      decoder_input += tf.reshape(
          inputs,
          [common_layers.shape_list(targets)[0], 1, 1, hparams.hidden_size])
    decoder_output = cia.transformer_decoder_layers(
        decoder_input,
        None,
        hparams.num_decoder_layers or hparams.num_hidden_layers,
        hparams,
        attention_type=hparams.dec_attention_type,
        losses=losses,
        name="decoder")
    output = cia.create_output(decoder_output, rows, cols, targets, hparams)

    if losses:
      return output, {"extra_loss": tf.add_n(losses)}
    else:
      return output
 def testCreateOutputTrainMode(self, likelihood, num_mixtures, depth):
   batch = 1
   height = 8
   width = 8
   channels = 3
   rows = height
   if likelihood == common_image_attention.DistributionType.CAT:
     cols = channels * width
   else:
     cols = width
   hparams = tf.contrib.training.HParams(
       hidden_size=2,
       likelihood=likelihood,
       mode=tf.estimator.ModeKeys.TRAIN,
       num_mixtures=num_mixtures,
   )
   decoder_output = tf.random_normal([batch, rows, cols, hparams.hidden_size])
   targets = tf.random_uniform([batch, height, width, channels],
                               minval=-1., maxval=1.)
   output = common_image_attention.create_output(
       decoder_output, rows, cols, targets, hparams)
   if hparams.likelihood == common_image_attention.DistributionType.CAT:
     self.assertEqual(output.shape, (batch, height, width, channels, depth))
   else:
     self.assertEqual(output.shape, (batch, height, width, depth))
Пример #10
0
  def body(self, features):
    hparams = copy.copy(self._hparams)
    targets = features["targets"]
    if (hparams.likelihood == cia.DistributionType.DMOL and
        (hparams.modality["targets"] !=
         modalities.ImageChannelBottomIdentityModality or
         hparams.num_channels != 1)):
      raise ValueError("When using DMOL for the likelihood,modality['targets'] "
                       "must be ImageChannelBottomIdentityModality and "
                       "num_channels must be 1.")
    if (not tf.get_variable_scope().reuse and
        hparams.mode != tf.contrib.learn.ModeKeys.INFER and
        hparams.modality["targets"] !=
        modalities.ImageChannelBottomIdentityModality):
      tf.summary.image("targets", tf.to_float(targets), max_outputs=1)

    # Extra losses list if we want to use moe.
    losses = []
    # Prepare decoder inputs and bias.
    decoder_input, rows, cols = cia.prepare_decoder(targets, hparams)
    # Add class label to decoder input.
    if not hparams.unconditional:
      inputs = features["inputs"]
      decoder_input += tf.reshape(
          inputs,
          [common_layers.shape_list(targets)[0], 1, 1, hparams.hidden_size])
    decoder_output = cia.transformer_decoder_layers(
        decoder_input,
        None,
        hparams.num_decoder_layers or hparams.num_hidden_layers,
        hparams,
        attention_type=hparams.dec_attention_type,
        losses=losses,
        name="decoder")
    output = cia.create_output(decoder_output, rows, cols, targets, hparams)

    if losses:
      return output, {"extra_loss": tf.add_n(losses)}
    else:
      return output