Exemplo n.º 1
0
  def _testGPipeTransformerFPropPackedInput(self, splits=1):
    batch = 4
    tf.flags.FLAGS.tpu_compatible = True
    with self.session() as sess:
      with tf.variable_scope('transformer_test', reuse=tf.AUTO_REUSE):
        params = self._TransformerParams(splits=splits)
        params.dtype = tf.float32
        params.fprop_dtype = tf.float32
        packed_params = params.Copy()
        packed_params.packed_input = True
        xformer = GPipeTransformerStack(params)
        packed_xformer = GPipeTransformerStack(packed_params)
        # Prepare inputs
        inputs, paddings, tgt_inputs, tgt_paddings = self._random_inputs(batch)
        packed_inputs = tf.reshape(inputs, [-1, 1, 2])
        packed_tgt_inputs = tf.reshape(tgt_inputs, [-1, 1, 2])
        packed_paddings = tf.reshape(paddings, [-1, 1])
        packed_tg_paddings = tf.reshape(tgt_paddings, [-1, 1])
        segment_ids = tf.transpose(
            tf.constant([[0, 1, 2, 3, 0, 1, 2, 3]], dtype=tf.float32))
        tgt_segment_id = tf.transpose(
            tf.constant([[0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]],
                        dtype=tf.float32))

        output = xformer.FProp(xformer.theta, inputs, paddings, tgt_inputs,
                               tgt_paddings)
        packed_output = packed_xformer.FProp(
            packed_xformer.theta, packed_inputs, packed_paddings,
            packed_tgt_inputs, packed_tg_paddings, segment_ids, tgt_segment_id)
        packed_output = tf.reshape(packed_output, output.shape)

        tf.global_variables_initializer().run()
        output, packed_output = sess.run([output, packed_output])
        self.assertAllClose(output, packed_output)
Exemplo n.º 2
0
    def testGPipeTransformerStackTrainTransparentFPropWithEmbeddings(
            self, splits=1, num_micro_batches=1):
        # time = 2,
        batch = 4
        with self.session() as sess:
            params = _TransformerParamsWithEmbeddings(
                splits=splits,
                num_micro_batches=num_micro_batches,
                num_decoder_layers=3,
                num_encoder_layers=1)
            params.is_transparent = True
            params.transparent_merger_dropout_prob = 0.0
            xformer = GPipeTransformerStack(params)

            input_ids, id_paddings, tgt_inputs, tgt_paddings, _, _ = _TransformerRandomInputsIds(
                batch=batch)
            inputs, paddings, _, _ = _TransformerRandomInputsVecs(batch=batch)
            tf.random.set_seed(1234)
            tf.global_variables_initializer().run()
            enc_outputs = xformer.EncoderFPropDefaultTheta(inputs, paddings)
            dec_output = xformer.FProp(xformer.theta, input_ids, id_paddings,
                                       tgt_inputs, tgt_paddings)[2]
            enc_out_1 = sess.run(enc_outputs)
            dec_out = sess.run(dec_output)
            self.assertAllClose(
                [[[0.017581, 0.802863, 0.975554, -1.164572]] * batch,
                 [[-0.549953, 1.196884, 4.910457, -0.102137]] * batch],
                enc_out_1)
            self.assertAllClose(
                [[[-1.122128, 1.111972, 4.642949, -2.14831]] * batch,
                 [[-1.336919, 1.182709, 4.785938, -2.039246]] * batch,
                 [[-1.335168, 1.297679, 4.720459, -2.111006]] * batch],
                dec_out)
Exemplo n.º 3
0
    def _testGPipeTransformerDecoderStackFProp(self,
                                               splits=1,
                                               num_micro_batches=1):
        batch = 4
        tf.flags.FLAGS.tpu_compatible = True
        with self.session() as sess:
            params = self._TransformerParams(
                num_decoder_layers=4,
                num_encoder_layers=0,
                splits=splits,
                num_micro_batches=num_micro_batches)
            params.dtype = tf.float32
            params.fprop_dtype = tf.float32
            xformer = GPipeTransformerStack(params)

            inputs, paddings, tgt_inputs, tgt_paddings = self._random_inputs(
                batch)

            output = xformer.FProp(xformer.theta, inputs, paddings, tgt_inputs,
                                   tgt_paddings)

            tf.global_variables_initializer().run()
            output_val = sess.run(output)
            self.assertAllCloseAccordingToType(
                [[[1.03550637, -1.3199079]] * batch,
                 [[-3.36382699, -0.74492991]] * batch,
                 [[-3.36382723, -0.74492997]] * batch], output_val)
Exemplo n.º 4
0
    def _testGPipeTransformerStackTrainEncoderTransparentFProp(
            self, splits=1, num_micro_batches=1):
        # time = 2,
        batch = 4
        with self.session() as sess:
            params = self._TransformerParams(
                splits=splits,
                num_micro_batches=num_micro_batches,
                num_decoder_layers=2,
                num_encoder_layers=2)
            params.is_transparent = True
            params.num_transparent_outputs = 1
            params.transparent_merger_dropout_prob = 0.0
            xformer = GPipeTransformerStack(params)

            inputs, paddings, tgt_inputs, tgt_paddings = self._random_inputs(
                batch=batch)
            py_utils.GetOrCreateGlobalStep()
            tf.set_random_seed(1234)
            tf.global_variables_initializer().run()
            enc_output = xformer.EncoderFPropDefaultTheta(inputs, paddings)
            dec_output = xformer.FProp(xformer.theta, inputs, paddings,
                                       tgt_inputs, tgt_paddings)
            enc_out = sess.run(enc_output)
            dec_out = sess.run(dec_output)
            self.assertAllClose(enc_out, [[[-0.118476, 1.031626]] * batch,
                                          [[0.643884, -1.02581167]] * batch])
            self.assertAllClose(dec_out, [[[-2.8764534, 1.00808454]] * batch,
                                          [[1.02129495, -0.78406084]] * batch,
                                          [[1.02129495, -0.78406084]] * batch])
Exemplo n.º 5
0
    def _testGPipeTransformerStackTrainTransparentFProp(
            self, splits=1, num_micro_batches=1):
        # time = 2,
        batch = 4
        with self.session() as sess:
            params = self._TransformerParams(
                splits=splits,
                num_micro_batches=num_micro_batches,
                num_decoder_layers=3,
                num_encoder_layers=1)
            params.is_transparent = True
            params.num_transparent_outputs = 3
            params.transparent_merger_dropout_prob = 0.0
            xformer = GPipeTransformerStack(params)

            inputs, paddings, tgt_inputs, tgt_paddings = self._random_inputs(
                batch=batch)
            py_utils.GetOrCreateGlobalStep()
            tf.set_random_seed(1234)
            tf.global_variables_initializer().run()
            enc_outputs = xformer.EncoderFPropDefaultTheta(inputs, paddings)
            dec_output = xformer.FProp(xformer.theta, inputs, paddings,
                                       tgt_inputs, tgt_paddings)
            enc_out_1, enc_out_2, enc_out_3 = sess.run(enc_outputs)
            dec_out = sess.run(dec_output)
            self.assertAllClose(enc_out_1, enc_out_2)
            self.assertAllClose(enc_out_2, enc_out_3)
            self.assertAllClose(enc_out_1,
                                [[[-0.27896273, 1.46589136]] * batch,
                                 [[1.03141928, -0.847896]] * batch])
            self.assertAllClose(dec_out, [[[2.926736, -4.090812]] * batch,
                                          [[-1.69508219, 1.75891459]] * batch,
                                          [[-1.6950829, 1.75891507]] * batch])
Exemplo n.º 6
0
    def testGPipeTransformerStackTrainTransparentFPropWithEmbeddings(
            self, splits=1, num_micro_batches=1):
        # time = 2,
        batch = 4
        with self.session() as sess:
            params = _TransformerParamsWithEmbeddings(
                splits=splits,
                num_micro_batches=num_micro_batches,
                num_decoder_layers=3,
                num_encoder_layers=1)
            params.is_transparent = True
            params.transparent_merger_dropout_prob = 0.0
            xformer = GPipeTransformerStack(params)

            input_ids, id_paddings, tgt_inputs, tgt_paddings, _, _ = _TransformerRandomInputsIds(
                batch=batch)
            inputs, paddings, _, _ = _TransformerRandomInputsVecs(batch=batch)
            tf.set_random_seed(1234)
            tf.global_variables_initializer().run()
            enc_outputs = xformer.EncoderFPropDefaultTheta(inputs, paddings)
            dec_output = xformer.FProp(xformer.theta, input_ids, id_paddings,
                                       tgt_inputs, tgt_paddings)[2]
            enc_out_1 = sess.run(enc_outputs)
            dec_out = sess.run(dec_output)
            self.assertAllClose(
                [[[0.68660116, 0.947429, 0.78953624, -1.20142817]] * batch,
                 [[0.57919669, 1.12979364, 4.29336643, 0.45106331]] * batch],
                enc_out_1)
            self.assertAllClose(
                [[[-0.46651918, -1.62957835, 1.15657926, 1.08397353]] * batch,
                 [[-0.34674695, -1.65999401, 1.08431196, 1.07384491]] * batch,
                 [[-0.41073492, -1.60431314, 1.04607999, 1.08858371]] * batch],
                dec_out)
Exemplo n.º 7
0
  def testGPipeTransformerDecoderStackFPropWithEmbeddings(
      self, splits=1, num_micro_batches=1):
    batch = 4
    tf.flags.FLAGS.tpu_compatible = True
    with self.session() as sess:
      params = self._TransformerParamsWithEmbeddings(
          num_decoder_layers=4,
          num_encoder_layers=0,
          splits=splits,
          num_micro_batches=num_micro_batches)
      params.dtype = tf.float32
      xformer = GPipeTransformerStack(params)

      inputs, paddings, tgt_inputs, tgt_paddings = self._random_inputs_ids(
          batch)

      output = xformer.FProp(xformer.theta, inputs, paddings, tgt_inputs,
                             tgt_paddings)

      tf.global_variables_initializer().run()
      output_val = sess.run(output)
      self.assertAllCloseAccordingToType(
          [[[-2.29650807, 0.25992393, 1.81951356, 1.52897644]] * batch,
           [[-2.14101386, 0.32607365, 1.73413348, 1.51806736]] * batch,
           [[-2.18863297, 0.34420109, 1.65913653, 1.58703828]] * batch],
          output_val)
Exemplo n.º 8
0
  def testGPipeTransformerMtModel(self, splits=1, num_micro_batches=1):
    batch = 4
    tf.flags.FLAGS.tpu_compatible = True
    with self.session() as sess:
      with tf.variable_scope('transformer_test', reuse=tf.AUTO_REUSE):
        params = self._TransformerParamsWithEmbeddings(
            splits=splits,
            num_micro_batches=num_micro_batches,
            num_decoder_layers=2,
            has_softmax=True)
        params.state_dtype = tf.float32
      xformer = GPipeTransformerStack(params)

      input_ids, id_paddings, tgt_inputs, tgt_paddings = (
          self._random_inputs_ids(batch=batch))
      labels = tf.ones([3, batch])
      label_weights = tf.ones([3, batch])
      tf.set_random_seed(1234)
      tf.global_variables_initializer().run()
      xent, logits = xformer.FProp(xformer.theta, input_ids, id_paddings,
                                   tgt_inputs, tgt_paddings, None, None, labels,
                                   label_weights)
      xent_out, logits_out = sess.run([xent, logits])
      print('xent_out={}'.format(xent_out))
      print('logits_out={}'.format(logits_out))
Exemplo n.º 9
0
    def testGPipeTransformerFPropPackedInputWithEmbeddings(self, splits=1):
        batch = 4
        tf.flags.FLAGS.tpu_compatible = True
        with self.session():
            with tf.variable_scope('transformer_test', reuse=tf.AUTO_REUSE):
                params = _TransformerParamsWithEmbeddings(splits=splits,
                                                          num_decoder_layers=2)
                params.dtype = tf.float32
                params.fprop_dtype = tf.float32
                packed_params = params.Copy()
                packed_params.packed_input = True
                xformer = GPipeTransformerStack(params)
                packed_xformer = GPipeTransformerStack(packed_params)
                # Prepare inputs
                inputs, paddings, tgt_inputs, tgt_paddings, _, _ = _TransformerRandomInputsIds(
                    batch)
                packed_inputs = tf.reshape(inputs, [-1, 1])
                packed_tgt_inputs = tf.reshape(tgt_inputs, [-1, 1])
                packed_paddings = tf.reshape(paddings, [-1, 1])
                packed_tg_paddings = tf.reshape(tgt_paddings, [-1, 1])
                segment_ids = tf.transpose(
                    tf.constant([[0, 1, 2, 3, 0, 1, 2, 3]], dtype=tf.float32))
                tgt_segment_id = tf.transpose(
                    tf.constant([[0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]],
                                dtype=tf.float32))
                segment_pos_id = tf.transpose(
                    tf.constant([[0, 0, 0, 0, 1, 1, 1, 1]], dtype=tf.int32))
                tgt_segment_pos_id = tf.transpose(
                    tf.constant([[0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2]],
                                dtype=tf.int32))

                output = xformer.FProp(xformer.theta, inputs, paddings,
                                       tgt_inputs, tgt_paddings)[2]
                packed_output = packed_xformer.FProp(
                    packed_xformer.theta, packed_inputs, packed_paddings,
                    packed_tgt_inputs, packed_tg_paddings, segment_ids,
                    tgt_segment_id, None, None, segment_pos_id,
                    tgt_segment_pos_id)[2]
                packed_output = tf.reshape(packed_output, output.shape)

                self.evaluate(tf.global_variables_initializer())
                output, packed_output = self.evaluate([output, packed_output])
                self.assertAllClose(output,
                                    packed_output,
                                    rtol=1e-05,
                                    atol=1e-05)
Exemplo n.º 10
0
    def _testGPipeTransformerStackFProp(self, splits=1, num_micro_batches=1):
        batch = 4
        tf.flags.FLAGS.tpu_compatible = True
        with self.session() as sess:
            params = self._TransformerParams(
                splits=splits, num_micro_batches=num_micro_batches)
            params.dtype = tf.float32
            params.fprop_dtype = tf.float32
            xformer = GPipeTransformerStack(params)

            inputs, paddings, _, _ = self._random_inputs(batch)

            output = xformer.FProp(xformer.theta, inputs, paddings)

            tf.global_variables_initializer().run()
            output = sess.run(output)

            self.assertAllCloseAccordingToType(
                [[[0.21085747, 0.60925347]] * batch,
                 [[0.21085747, 0.60925347]] * batch], output)
Exemplo n.º 11
0
  def testGPipeTransformerStackFPropWithEmbeddings(self,
                                                   splits=1,
                                                   num_micro_batches=1):
    batch = 4
    tf.flags.FLAGS.tpu_compatible = True
    with self.session() as sess:
      params = self._TransformerParamsWithEmbeddings(
          splits=splits, num_micro_batches=num_micro_batches)
      params.dtype = tf.float32
      params.fprop_dtype = tf.float32
      xformer = GPipeTransformerStack(params)

      inputs, paddings, _, _ = self._random_inputs_ids(batch)

      output = xformer.FProp(xformer.theta, inputs, paddings)

      tf.global_variables_initializer().run()
      output = sess.run(output)

      self.assertAllCloseAccordingToType(
          [[[-1.67121327, -1.24759686, 1.41572773, 2.42515182]] * batch,
           [[-1.71240354, -1.1253252, 0.23407015, 3.40547156]] * batch], output)