def _testGPipeTransformerStackTrainEncoderTransparentFProp( self, splits=1, num_micro_batches=1): # time = 2, batch = 4 with self.session() as sess: params = self._TransformerParams( splits=splits, num_micro_batches=num_micro_batches, num_decoder_layers=2, num_encoder_layers=2) params.is_transparent = True params.num_transparent_outputs = 1 params.transparent_merger_dropout_prob = 0.0 xformer = GPipeTransformerStack(params) inputs, paddings, tgt_inputs, tgt_paddings = self._random_inputs( batch=batch) py_utils.GetOrCreateGlobalStep() tf.set_random_seed(1234) tf.global_variables_initializer().run() enc_output = xformer.EncoderFPropDefaultTheta(inputs, paddings) dec_output = xformer.FProp(xformer.theta, inputs, paddings, tgt_inputs, tgt_paddings) enc_out = sess.run(enc_output) dec_out = sess.run(dec_output) self.assertAllClose(enc_out, [[[-0.118476, 1.031626]] * batch, [[0.643884, -1.02581167]] * batch]) self.assertAllClose(dec_out, [[[-2.8764534, 1.00808454]] * batch, [[1.02129495, -0.78406084]] * batch, [[1.02129495, -0.78406084]] * batch])
def testGPipeTransformerStackTrainTransparentFPropWithEmbeddings( self, splits=1, num_micro_batches=1): # time = 2, batch = 4 with self.session() as sess: params = _TransformerParamsWithEmbeddings( splits=splits, num_micro_batches=num_micro_batches, num_decoder_layers=3, num_encoder_layers=1) params.is_transparent = True params.transparent_merger_dropout_prob = 0.0 xformer = GPipeTransformerStack(params) input_ids, id_paddings, tgt_inputs, tgt_paddings, _, _ = _TransformerRandomInputsIds( batch=batch) inputs, paddings, _, _ = _TransformerRandomInputsVecs(batch=batch) tf.random.set_seed(1234) tf.global_variables_initializer().run() enc_outputs = xformer.EncoderFPropDefaultTheta(inputs, paddings) dec_output = xformer.FProp(xformer.theta, input_ids, id_paddings, tgt_inputs, tgt_paddings)[2] enc_out_1 = sess.run(enc_outputs) dec_out = sess.run(dec_output) self.assertAllClose( [[[0.017581, 0.802863, 0.975554, -1.164572]] * batch, [[-0.549953, 1.196884, 4.910457, -0.102137]] * batch], enc_out_1) self.assertAllClose( [[[-1.122128, 1.111972, 4.642949, -2.14831]] * batch, [[-1.336919, 1.182709, 4.785938, -2.039246]] * batch, [[-1.335168, 1.297679, 4.720459, -2.111006]] * batch], dec_out)
def _testGPipeTransformerStackTrainTransparentFProp( self, splits=1, num_micro_batches=1): # time = 2, batch = 4 with self.session() as sess: params = self._TransformerParams( splits=splits, num_micro_batches=num_micro_batches, num_decoder_layers=3, num_encoder_layers=1) params.is_transparent = True params.num_transparent_outputs = 3 params.transparent_merger_dropout_prob = 0.0 xformer = GPipeTransformerStack(params) inputs, paddings, tgt_inputs, tgt_paddings = self._random_inputs( batch=batch) py_utils.GetOrCreateGlobalStep() tf.set_random_seed(1234) tf.global_variables_initializer().run() enc_outputs = xformer.EncoderFPropDefaultTheta(inputs, paddings) dec_output = xformer.FProp(xformer.theta, inputs, paddings, tgt_inputs, tgt_paddings) enc_out_1, enc_out_2, enc_out_3 = sess.run(enc_outputs) dec_out = sess.run(dec_output) self.assertAllClose(enc_out_1, enc_out_2) self.assertAllClose(enc_out_2, enc_out_3) self.assertAllClose(enc_out_1, [[[-0.27896273, 1.46589136]] * batch, [[1.03141928, -0.847896]] * batch]) self.assertAllClose(dec_out, [[[2.926736, -4.090812]] * batch, [[-1.69508219, 1.75891459]] * batch, [[-1.6950829, 1.75891507]] * batch])
def _testGPipeTransformerEncoderFPropDefaultTheta(self, splits=1, num_micro_batches=1): batch = 4 tf.flags.FLAGS.tpu_compatible = True with self.session() as sess: params = self._TransformerParams( num_decoder_layers=4, num_encoder_layers=4, splits=splits, num_micro_batches=num_micro_batches) params.dtype = tf.float32 params.fprop_dtype = tf.float32 xformer = GPipeTransformerStack(params) inputs, paddings, _, _ = self._random_inputs(batch) output = xformer.EncoderFPropDefaultTheta(inputs, paddings) tf.global_variables_initializer().run() output = sess.run(output) self.assertAllCloseAccordingToType( [[[0.21085747, 0.60925347]] * batch, [[0.21085747, 0.60925347]] * batch], output)
def testGPipeTransformerStackTrainTransparentFPropWithEmbeddings( self, splits=1, num_micro_batches=1): # time = 2, batch = 4 with self.session() as sess: params = _TransformerParamsWithEmbeddings( splits=splits, num_micro_batches=num_micro_batches, num_decoder_layers=3, num_encoder_layers=1) params.is_transparent = True params.transparent_merger_dropout_prob = 0.0 xformer = GPipeTransformerStack(params) input_ids, id_paddings, tgt_inputs, tgt_paddings, _, _ = _TransformerRandomInputsIds( batch=batch) inputs, paddings, _, _ = _TransformerRandomInputsVecs(batch=batch) tf.set_random_seed(1234) tf.global_variables_initializer().run() enc_outputs = xformer.EncoderFPropDefaultTheta(inputs, paddings) dec_output = xformer.FProp(xformer.theta, input_ids, id_paddings, tgt_inputs, tgt_paddings)[2] enc_out_1 = sess.run(enc_outputs) dec_out = sess.run(dec_output) self.assertAllClose( [[[0.68660116, 0.947429, 0.78953624, -1.20142817]] * batch, [[0.57919669, 1.12979364, 4.29336643, 0.45106331]] * batch], enc_out_1) self.assertAllClose( [[[-0.46651918, -1.62957835, 1.15657926, 1.08397353]] * batch, [[-0.34674695, -1.65999401, 1.08431196, 1.07384491]] * batch, [[-0.41073492, -1.60431314, 1.04607999, 1.08858371]] * batch], dec_out)
def testGPipeTransformerStackTrainEncoderTransparentFPropEval(self): # time = 2, batch = 4 with self.session() as sess: params = self._TransformerParams(num_decoder_layers=3, num_encoder_layers=3) params.is_transparent = True params.num_transparent_outputs = 1 params.is_eval = True xformer = GPipeTransformerStack(params) inputs, paddings, _, _ = self._random_inputs(batch=batch) tf.global_variables_initializer().run() enc_outputs = xformer.EncoderFPropDefaultTheta(inputs, paddings) enc_out = sess.run(enc_outputs) self.assertAllClose(enc_out, [[[0.18823329, 0.71548849]] * batch, [[0.76032472, -0.82791042]] * batch])
def testGPipeTransformerStackTrainTransparentFPropEval(self): # time = 2, batch = 4 with self.session() as sess: params = self._TransformerParams(num_decoder_layers=3, num_encoder_layers=1) params.is_transparent = True params.is_eval = True xformer = GPipeTransformerStack(params) inputs, paddings, _, _ = self._random_inputs(batch=batch) tf.global_variables_initializer().run() enc_outputs = xformer.EncoderFPropDefaultTheta(inputs, paddings) enc_out = sess.run(enc_outputs) self.assertAllClose( enc_out, [[[[-0.27896273] * 3, [1.46589136] * 3]] * batch, [[[1.03141928] * 3, [-0.847896] * 3]] * batch])