def testEmbedding(self): builder = gshard_builder.DenseBuilder.Params().Set( model_dim=4, model_dim_reshape_segments=2).Instantiate() ids = [[1, 2, 3], [3, 2, 1]] graph = tf.Graph() with graph.as_default(): tf.random.set_seed(24332) py_utils.GetOrCreateGlobalStepVar() emb_layer_p = builder.Embedding('emb', vocab_dim=4) emb_layer = emb_layer_p.Instantiate() enc_out = emb_layer.FPropDefaultTheta( tf.convert_to_tensor(ids, dtype=tf.int32)) expected_val = [[[[-0.67452705, -2.6386688], [1.1666715, 0.04592554]], [[-1.0561675, -0.48270327], [0.7765603, 0.6768117]], [[0.8349989, 0.67100984], [-0.15557083, 1.275625]]], [[[0.8349989, 0.67100984], [-0.15557083, 1.275625]], [[-1.0561675, -0.48270327], [0.7765603, 0.6768117]], [[-0.67452705, -2.6386688], [1.1666715, 0.04592554]]]] with self.session(graph=graph) as sess: sess.run(tf.global_variables_initializer()) enc_out_vals = sess.run(enc_out) self.assertAllClose(expected_val, enc_out_vals)
def testBProp(self): with self.session(): tf.random.set_seed(_TF_RANDOM_SEED) p = self._testParams() mdl = p.Instantiate() mdl.FPropDefaultTheta() mdl.BProp() loss = mdl.loss logp = mdl.eval_metrics['log_pplx'][0] self.evaluate(tf.global_variables_initializer()) vals = [] for _ in range(5): vals += [self.evaluate((loss, logp, mdl.train_op))[:2]] print('bprop actual vals = %s' % np.array_repr(np.array(vals))) expected_vals = [ [226.92014, 10.373492], [225.25146, 9.585169], [248.49757, 9.8904505], [212.02884, 10.943424], [314.57098, 11.983657], ] self.assertAllClose(vals, expected_vals, atol=1e-3)
def testDecoderFPropWithMeanSeqLoss(self): """Create and fprop a decoder with different dims per layer.""" with self.session(use_gpu=False): tf.random.set_seed(8372749040) p = _DecoderParams( vn_config=py_utils.VariationalNoiseParams( None, True, False, seed=12345)) p.token_normalized_per_seq_loss = True p.per_token_avg_loss = False metrics, per_sequence_loss = self._getDecoderFPropMetrics(params=p) self.evaluate(tf.global_variables_initializer()) metrics_val, per_sequence_loss_val = self.evaluate( [metrics, per_sequence_loss]) tf.logging.info('metrics=%s, per_sequence_loss=%s', metrics_val, per_sequence_loss_val) self.assertNotEqual(metrics_val['loss'][0], metrics_val['log_pplx'][0]) self.assertAllClose(metrics_val['loss'], (3.484608, 4.0)) self.assertAllClose(metrics_val['log_pplx'], (3.496482, 15.0)) # Target batch size is 4. Therefore, we should expect 4 here. self.assertEqual(per_sequence_loss_val.shape, (4,))
def testForwardPass(self): with self.session(use_gpu=False): tf.random.set_seed(8372749040) p = self._EncoderParams() mt_enc = encoder.MTEncoderV1(p) batch = py_utils.NestedMap() batch.ids = tf.transpose(tf.reshape(tf.range(0, 8, 1), [4, 2])) batch.paddings = tf.zeros([2, 4]) enc_out = mt_enc.FPropDefaultTheta(batch).encoded self.evaluate(tf.global_variables_initializer()) actual_enc_out = enc_out.eval() expected_enc_out = [ [[1.5309354e-06, -1.7816075e-07, 3.8047763e-06, -5.6422067e-07], [1.9017770e-06, -2.9778969e-06, -4.5083775e-06, -1.7054812e-06]], [[-2.1852782e-06, -1.8208171e-06, -1.4747930e-06, -5.8206351e-06], [6.7667429e-07, -3.6828042e-06, -1.0916860e-05, -3.2522742e-06]], [[-3.2333378e-07, 3.2147584e-06, 5.0556650e-07, -7.0188378e-07], [-6.5340635e-07, 1.9502845e-06, -9.2459632e-06, 5.1955390e-06]], [[2.0232728e-06, 4.9331529e-06, 1.1346837e-06, 7.5571520e-06], [-5.8475212e-07, 3.5547487e-06, -3.9037773e-06, 8.9575424e-06]] ] self.assertAllClose(expected_enc_out, actual_enc_out)
def testScaleGradientsCheckNumerics(self): """ScaleGradients when enable_check_numerics=True.""" FLAGS.enable_check_numerics = True p = self.TestParams() p.input = base_input_generator.BaseSequenceInputGenerator.Params() task = p.Instantiate() task.CreateVariable( 'a', py_utils.WeightParams(shape=[], init=py_utils.WeightInit.Constant(0))) var_a = task.theta.a # Make a NaN gradient. var_grads = py_utils.NestedMap( a=py_utils.VarGrad(var_a, 0. * tf.math.log(0.))) scaled_grads_map = task.learners[0].ScaleGradients(var_grads) with self.session(): self.evaluate(tf.global_variables_initializer()) self.assertEqual(0., scaled_grads_map.grad_scale.eval()) # Fetching the gradient raises an exception with enable_check_numerics. with self.assertRaisesRegex(tf.errors.InvalidArgumentError, 'is not finite'): _ = scaled_grads_map.final_var_grads.a[1].eval()
def _testLearner(self, layer, learner_p): tf.train.get_or_create_global_step() # needed for lr_schedule lrnr = learner_p.Instantiate() if isinstance(learner_p.loss_name, (list, tuple)): main_loss = layer.MainLoss(layer.theta) aux_loss = layer.AuxLoss(layer.theta) metrics = { 'main_loss': (main_loss, 1.), 'aux_loss': (aux_loss, 1.) } expected_losses = [main_loss, aux_loss] else: loss = layer.Loss(layer.theta) metrics = {learner_p.name: (loss, 1.)} expected_losses = [loss] losses, update_op, eval_metrics = lrnr.Apply(metrics, layer.vars) self.assertAllEqual(losses, expected_losses) with self.session(): self.evaluate(tf.global_variables_initializer()) var_grads = self.evaluate(lrnr.GetVarGrads().Transform(tuple)) update_op.run() updated_vars = self.evaluate(layer.vars) return var_grads, updated_vars, eval_metrics
def __init__(self, train_dir, model): """Initialize Checkpointer. Args: train_dir: Training directory for saving checkpoints. model: Model. """ self._train_dir = train_dir self._model = model self._params = model.params self._vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) self._uninitialized_vars = tf.report_uninitialized_variables( self._vars) self._initialize_vars = tf.global_variables_initializer() self._save_path = os.path.join(self._train_dir, 'ckpt') self._model_tasks = model.tasks tp = self._params.train self._save_interval_seconds = tp.save_interval_seconds self._next_checkpoint_seconds = 0 self._saver = self._GetSaver()
def testBiEncoderForwardPassWithDropout(self): with self.session(use_gpu=False): tf.random.set_seed(8372749040) p = self._BiEncoderParams() p.dropout_prob = 0.5 mt_enc = encoder.MTEncoderBiRNN(p) batch = py_utils.NestedMap() batch.ids = tf.transpose(tf.reshape(tf.range(0, 8, 1), [4, 2])) batch.paddings = tf.zeros([2, 4]) enc_out = mt_enc.FPropDefaultTheta(batch).encoded self.evaluate(tf.global_variables_initializer()) actual_enc_out = enc_out.eval() print('bi_enc_actual_enc_out_with_dropout', np.array_repr(actual_enc_out)) expected_enc_out = [[[1.60383240e-06, 1.22550023e-06], [-7.21660126e-06, 1.05704457e-05]], [[1.42539475e-05, -2.06075638e-05], [-4.98754298e-06, 1.51066461e-05]], [[-7.15192800e-06, -6.44075908e-06], [5.02962678e-07, -3.40795486e-06]], [[-6.54424548e-06, 9.88359807e-06], [1.42836643e-06, -1.68607176e-06]]] self.assertAllClose(expected_enc_out, actual_enc_out)
def testBiEncoderForwardPass(self): with self.session(use_gpu=False): tf.random.set_seed(8372749040) p = self._BiEncoderParams() mt_enc = encoder.MTEncoderBiRNN(p) batch = py_utils.NestedMap() batch.ids = tf.transpose(tf.reshape(tf.range(0, 8, 1), [4, 2])) batch.paddings = tf.zeros([2, 4]) enc_out = mt_enc.FPropDefaultTheta(batch).encoded self.evaluate(tf.global_variables_initializer()) actual_enc_out = enc_out.eval() tf.logging.info('testBiEncoderForwardPass actual_enc_out %r' % actual_enc_out) expected_enc_out = [[[-2.47998378e-06, 7.36457878e-06], [7.89248020e-07, -2.67464316e-06]], [[-2.98803275e-06, 8.20233890e-06], [1.00139073e-06, -2.24554151e-06]], [[-5.06675951e-06, 1.15983785e-05], [-4.58391014e-07, -2.99553108e-07]], [[-4.34937465e-06, 8.58816838e-06], [-1.74859031e-06, 3.99598093e-06]]] self.assertAllClose(expected_enc_out, actual_enc_out)
def testDummyPipelineCnnNestedMapInput(self): batch_size = 16 num_layers = 4 cells = [] with self.session(graph=tf.Graph()) as sess: for i in range(num_layers): cells.append(_SimpyLayerWithNestedMapInput.Params().Set( name='layer_{}'.format(i))) p = PipeliningLayer.Params().Set( name='pipeline', num_micro_batches=8, micro_batch_size=2, nested_map_fprop=True, cell_tpl=cells, before_tpl=[]) layer = p.Instantiate() tf.set_random_seed(1245) inputs = tf.random_uniform([batch_size, 8, 8, 1], seed=12345) outputs = layer.FPropDefaultTheta( py_utils.NestedMap(vec=inputs, paddings=None)) sess.run(tf.global_variables_initializer()) sess.run(outputs.vec) self.assertEqual(outputs.vec.shape, (batch_size, 8, 8, 1))
def testEncNotVisible(self): def _Notvisible(x): a, b = tf.expand_dims(x, -1), tf.expand_dims(x, -2) return tf.cast( tf.math.logical_or( tf.not_equal(a, b), # also ignoring segment_id=0 tf.math.logical_not( tf.math.logical_or(tf.cast(a, tf.bool), tf.cast(b, tf.bool)))), tf.float32) builder = gshard_builder.DenseBuilder.Params().Set( dtype=tf.float32).Instantiate() graph = tf.Graph() with graph.as_default(): segment_ids = tf.convert_to_tensor([[1, 1, 1, 1]], dtype=tf.int32) y = builder._EncNotVisible(segment_ids, segment_ids) y2 = _Notvisible(segment_ids) with self.session(graph=graph) as sess: sess.run(tf.global_variables_initializer()) y_val, y2_val = sess.run([y, y2]) self.assertAllEqual(y_val, y2_val)
def testUniEncoderForwardPass(self): with self.session(use_gpu=False): tf.random.set_seed(8372749040) p = self._UniEncoderParams() mt_enc = encoder.MTEncoderUniRNN(p) batch = py_utils.NestedMap() batch.ids = tf.transpose(tf.reshape(tf.range(0, 8, 1), [4, 2])) batch.paddings = tf.zeros([2, 4]) enc_out = mt_enc.FPropDefaultTheta(batch).encoded self.evaluate(tf.global_variables_initializer()) actual_enc_out = enc_out.eval() tf.logging.info('testUniEncoderForwardPass actual_enc_out %r' % actual_enc_out) expected_enc_out = [[[-4.3304257e-07, 5.4100457e-07], [-4.0170832e-07, -2.6441572e-07]], [[-1.7024040e-07, -1.8555815e-07], [-6.4563977e-07, -3.7835261e-07]], [[-2.4001852e-07, 5.1114228e-07], [-3.4349023e-07, -1.0049351e-06]], [[1.8068013e-07, -6.8982729e-08], [3.3005003e-07, -8.8834116e-07]]] self.assertAllClose(expected_enc_out, actual_enc_out)
def testScaleGradients(self): p = self.TestParams() p.input = base_input_generator.BaseSequenceInputGenerator.Params() task = p.Instantiate() task.CreateVariable( 'a', py_utils.WeightParams(shape=[], init=py_utils.WeightInit.Constant(0))) var_a = task.theta.a var_grads = py_utils.NestedMap( a=py_utils.VarGrad(var_a, tf.ones_like(var_a))) scaled_grads_map = task.learners[0].ScaleGradients(var_grads) FLAGS.enable_check_numerics = False with self.session(): self.evaluate(tf.global_variables_initializer()) self.assertEqual(1.0, scaled_grads_map.grad_scale.eval()) # The final gradient must be finite. self.assertFalse( tf.math.is_nan(scaled_grads_map.final_var_grads.a[1]).eval()) self.assertTrue( tf.math.is_finite( scaled_grads_map.final_var_grads.a[1]).eval())
def testFPropNoPostGatingRNN(self): vocab, time, batch = 7, 13, 3 p = self._MoeLmParams(vocab, False, False) with self.session(graph=tf.Graph()) as sess: np.random.seed(54321) tf.random.set_seed(123456) lm = p.Instantiate() inputs, paddings, labels = self._GetData(vocab, time, batch) sess.run(tf.global_variables_initializer()) xent_output, state1 = lm.FPropDefaultTheta(inputs=inputs, paddings=paddings, state0=lm.zero_state( lm.theta, batch), labels=labels) xent_output_val, state1_val = sess.run([xent_output, state1]) print('xent_output_val', xent_output_val) print('state1', state1_val) test_utils.CompareToGoldenSingleFloat(self, 1.9443978, xent_output_val.avg_xent) # pyformat: disable pylint: disable=line-too-long self.assertAllEqual(xent_output_val.per_example_argmax, np.argmax(xent_output_val.logits, axis=-1))
def testBProp(self): with self.session() as sess: tf.random.set_seed(_TF_RANDOM_SEED) p = self._testParams() mdl = p.Instantiate() mdl.FPropDefaultTheta() mdl.BProp() loss = mdl.loss logp = mdl.eval_metrics['log_pplx'][0] self.evaluate(tf.global_variables_initializer()) vals = [] for _ in range(5): vals += [sess.run((loss, logp, mdl.train_op))[:2]] print('BProp actual vals = ', vals) expected_vals = [ [233.57518, 10.381119], [236.05138, 10.375884], [217.9087, 10.376605], [217.77725, 10.370345], [159.43497, 10.369753], ] self.assertAllClose(vals, expected_vals)
def testRnnStackStepNoContext(self): with self.session(use_gpu=False): p = rnn_steps.RnnStackStep.Params() p.name = 'rnn_stack_step' p.rnn_cell_tpl.params_init = py_utils.WeightInit.Uniform(1.24, 429891685) p.rnn_cell_tpl.bias_init = py_utils.WeightInit.Uniform(1.24, 429891685) p.rnn_cell_tpl.vn.global_vn = False p.rnn_cell_tpl.vn.per_step_vn = False p.step_input_dim = 1 p.rnn_cell_dim = 3 p.rnn_cell_hidden_dim = 3 p.rnn_layers = 2 p.residual_start = 0 rnn_stack = p.Instantiate() packed = rnn_stack.PrepareExternalInputs(rnn_stack.theta, py_utils.NestedMap()) state0 = rnn_stack.ZeroState(rnn_stack.theta, packed, 1) output1, state1 = rnn_stack.FProp( rnn_stack.theta, packed, py_utils.NestedMap(inputs=[tf.constant([[4]], tf.float32)]), tf.constant([0.0], dtype=tf.float32), state0) self.evaluate(tf.global_variables_initializer()) output1, state1 = self.evaluate([output1, state1]) self.assertAllClose(output1.output, [[5.900284, 3.0231729, 3.0207822]]) self.assertAllClose( state1, { 'sub': [{ 'm': [[1.1416901, -0.32166323, -0.5909376]], 'c': [[-0.98086286, 0.9052862, 0.10041453]] }, { 'm': [[0.7585938, -0.655164, -0.3882802]], 'c': [[-8.3011830e-01, 1.8685710e-01, 1.0723456e-04]] }] })
def _testGradDrop(self, graddrop_params): batch_size, dims = 4, 5 gd_layer = graddrop_params.Set(name='test_gd_layer').Instantiate() linear_layer = builder_layers.LinearLayer.Params().Set( name='test_linear_layer', input_dims=dims, output_dims=dims).Instantiate() x = tf.random.uniform((batch_size, dims)) x = linear_layer.FPropDefaultTheta(x) # Make a copy of x after graddrop. x_gd = gd_layer.FPropDefaultTheta(x) # Compute a loss based on graddrop's version of x. gd_loss_0 = tf.reduce_sum(x_gd**2) gd_loss_1 = tf.reduce_sum(-tf.abs(x_gd)) gd_layer.SetLosses([ (gd_loss_0, 0.1), (gd_loss_1, 0.2), ]) gd_total_loss = gd_loss_0 + gd_loss_1 gd_grad = tf.gradients(gd_total_loss, x) # Compute the same loss based on the regular version of x. loss_0 = tf.reduce_sum(x**2) loss_1 = tf.reduce_sum(-tf.abs(x)) total_loss = loss_0 + loss_1 grad = tf.gradients(total_loss, x) with self.session() as sess: sess.run(tf.global_variables_initializer()) actual_total_loss, actual_grad, actual_gd_total_loss, actual_gd_grad = ( sess.run([total_loss, grad, gd_total_loss, gd_grad])) # Verify that losses are similar, but the gradients are different. self.assertAllClose(actual_total_loss, actual_gd_total_loss) self.assertNotAllClose(actual_grad, actual_gd_grad)
def testTransformerStackAlternateLayers(self): batch = 3 tf.flags.FLAGS.tpu_compatible = True with self.session(use_gpu=False) as sess: model_dim = 2 num_transformer_layers = 2 transformer_tpl = layers_with_attention.TransformerLayer.Params() transformer_tpl.tr_atten_tpl.num_attention_heads = 1 transformer_tpl.tr_fflayer_tpl.hidden_dim = 2 params = mt_layers.TransformerStack.Params().Set( name='transformer', model_dim=model_dim, num_transformer_layers=num_transformer_layers, transformer_tpl=[ transformer_tpl.Copy() for _ in range(num_transformer_layers) ], random_seed=123456) xformer = mt_layers.TransformerStack(params) input_arr = np.array([ [[0, 1]] * batch, [[1, -1]] * batch, ], dtype=int) paddings_arr = np.array([[0] * batch, [0] * batch], dtype=int) inputs = tf.constant( input_arr.tolist(), dtype=py_utils.FPropDtype(params)) paddings = tf.constant( paddings_arr.tolist(), dtype=py_utils.FPropDtype(params)) output, _, _ = xformer.FProp(xformer.theta, inputs, paddings) self.evaluate(tf.global_variables_initializer()) output = sess.run(output) print(repr(output)) self.assertAllCloseAccordingToType( np.array([[[-0.940543, 1.479253]] * batch, [[-0.413938, -2.550903]] * batch]), output)
def testParalellMultiOutputsLayer(self): g = tf.Graph() with g.as_default(): tf.set_random_seed(24332) def Merge(xs): rets = [] for x in zip(*xs): if x[0] is None: rets.append(None) else: rets.append(tf.add_n(list(x))) return tuple(rets) p = layers.ParallelLayer.Params().Set( name='parallel', merge=Merge, sub=[ lingvo_layers.ConvLayer.Params().Set(name='p%d' % i, filter_shape=(3, 3, 3, 5), filter_stride=(1, 1), batch_norm=False) for i in range(3) ]) l = p.Instantiate() x = tf.zeros(shape=[2, 32, 32, 3]) y0, y1 = l.FPropDefaultTheta(x) y_sum = tf.reduce_sum(y0) # Ensures the 2nd return value (None) are handled properly. self.assertEqual(None, y1) with self.session(graph=g) as sess: sess.run(tf.global_variables_initializer()) y_sum_val = sess.run(y_sum) self.assertEqual(y_sum_val, 0.)
def testParallelRepeatLayerLayer(self): repeat = 100 body_p = layers.SequentialLayer.Params().Set( name='body', sub=[ layers.LinearLayer.Params().Set(name='ln1', input_dims=2, output_dims=4), layers.FnLayer.Params().Set( name='relu', fn=tf.nn.relu, fn_meta=lambda x: py_utils.NestedMap(flops=1, out_shapes=(x, ))), layers.LinearLayer.Params().Set(name='ln2', input_dims=4, output_dims=2) ]) with self.session(use_gpu=False, graph=tf.Graph()): tf.random.set_seed(24332) p = layers.ParallelRepeatLayer.Params().Set(name='moe', repeat=repeat, body=body_p) l = p.Instantiate() x = tf.random.normal(shape=[repeat, 2, 2]) y = l.FPropDefaultTheta(x) self.evaluate(tf.global_variables_initializer()) x_val, y_val, w = self.evaluate([x, y, l.vars]) np_val = [] for i in range(repeat): # relu(act \dot w_1) \dot w_2 np_val.append( np.dot(np.maximum(0, np.dot(x_val[i], w.body.ln1.w[i])), w.body.ln2.w[i])) np_val = np.stack(np_val) self.assertAllClose(np_val, y_val)
def _TestRightContextStackingLayersHelper(self, **kwargs): """Applicable only if the layer implements StreamStep() with right context.""" batch_size, max_seqlen, input_dim = 2, 32, kwargs['input_dim'] stride = kwargs['stride'] num_layers = kwargs['num_layers'] right_context = kwargs.get('right_context', 0) assert max_seqlen % stride == 0 # Prepares inputs. inputs, paddings = self._GetInputs(batch_size, max_seqlen, input_dim) # Gets params. p = self._GetParams(**kwargs) ps = [p.Copy().Set(name=f'base{i}') for i in range(num_layers)] # Builds graphs. layers = [x.Instantiate() for x in ps] base_outputs = self._BuildStackingBaseGraph(layers, num_layers, inputs, paddings) outputs = self._BuildStackingStreamGraph(layers, num_layers, inputs, paddings, stride, right_context) init_op = tf.global_variables_initializer() with self.session(use_gpu=False) as sess: sess.run(init_op) expected, actual = sess.run([base_outputs, outputs]) print(f'expected: {repr(expected)}, {expected.shape}') print(f'actual: {repr(actual)}, {actual.shape}') print(f'np.sum(np.abs(expected)): {np.sum(np.abs(expected))}') print(f'np.sum(np.abs(actual)): {np.sum(np.abs(actual))}') self.assertAllClose(expected, actual, atol=5e-5) self.assertEqual( tuple(expected.shape), (batch_size, max_seqlen, input_dim))
def _TestHelperWithState(self, params, list_of_batches): """Returns the expected outputs for the tests. Args: params: Babelfish configuration parameters for setting up the cumulative_statistics_layer. list_of_batches: A list of padded batches of examples. The structure is a list of the following: { 'features': tf.tensor(float32) of shape(len, batch, dim) 'paddings': tf.tensor(float32) of shape(len, batch) } Returns: A dictionary containing numpy arrays of the expected test outputs. The structure is as follows: { 'features': np.array(float32) of shape(len, batch, dim) 'paddings': np.array(float32) of shape(len, batch) } """ with self.session() as sess: tf.random.set_seed(_TF_RANDOM_SEED) network = params.Instantiate() batch_size = list_of_batches[0].features.shape[1] state = network.zero_state(network.theta, batch_size) for batch_t in list_of_batches: output = network.FProp(network.theta, batch_t, state) # Pass the output state over to the next batch as input state. state = output.state sess.run( tf.group(tf.global_variables_initializer(), tf.tables_initializer())) return sess.run(output)
def testForwardPassWithTaskEmb(self): with self.session(use_gpu=False): bs = 2 sl = 21 tf.random.set_seed(8372749040) p = self._EncoderParams() p.task_emb = p.token_emb.Copy() p.task_emb.vocab_size = 4 mt_enc = encoder.TransformerEncoder(p) batch = py_utils.NestedMap() batch.ids = tf.constant( np.random.randint(low=0, high=63, size=[bs, sl], dtype=np.int32)) batch.task_ids = tf.constant( np.random.randint(low=0, high=3, size=[bs, sl], dtype=np.int32)) batch.paddings = tf.zeros([bs, sl]) enc_out = mt_enc.FPropDefaultTheta(batch) enc_out_sum = tf.reduce_sum(enc_out.encoded, 0) self.evaluate(tf.global_variables_initializer()) actual_enc_out = enc_out_sum.eval() # pyformat: disable # pylint: disable=bad-whitespace expected_enc_out = [ [ 1.2796677, -31.786997, -0.4054339, -32.61311 , 42.41403, 11.020338, 54.115948, -61.322887, 39.593548, 15.315696, -20.373957, 1.8548622, -17.743631, 3.140956, 30.730812, 41.4348], [ -1.0373995, -31.306532, -2.6323462, -32.078648, 45.80049, 16.409424, 55.00114, -63.102333, 40.4261, 14.198621, -23.027012, 1.0839912, -20.739473, 0.7242553, 32.49956, 41.592197]] # pylint: enable=bad-whitespace # pyformat: enable self.assertAllClose( expected_enc_out, actual_enc_out, rtol=1e-05, atol=1e-05)
def testSequentialLayer(self): g = tf.Graph() with g.as_default(): tf.set_random_seed(24332) p = layers.SequentialLayer.Params().Set( name='seq', repeat=2, sub=[ lingvo_layers.FCLayer.Params().Set(name='foo', input_dim=32, output_dim=8), lingvo_layers.FCLayer.Params().Set(name='bar', input_dim=8, output_dim=8), lingvo_layers.FCLayer.Params().Set(name='baz', input_dim=8, output_dim=32), lingvo_layers.DropoutLayer.Params().Set(name='dropout', keep_prob=0.5) ]) p.is_eval = True l = p.Instantiate() x = tf.random_normal(shape=[2, 32]) y = l.FPropDefaultTheta(x) l.vars.Transform(lambda x: x.shape).VLog(0, 'vars: ') with self.session(graph=g) as sess: sess.run(tf.global_variables_initializer()) x_val, y_val, w = sess.run([x, y, l.vars]) act = x_val # relu(act \dot w + b) for i in range(2): act = np.maximum(0, np.dot(act, w.rep[i].foo.w) + w.rep[i].foo.b) act = np.maximum(0, np.dot(act, w.rep[i].bar.w) + w.rep[i].bar.b) act = np.maximum(0, np.dot(act, w.rep[i].baz.w) + w.rep[i].baz.b) self.assertAllClose(act, y_val)
def testEagerMultiLearnerCheckpointCompatibility(self): self.assertTrue(tf.executing_eagerly()) cfg = model_registry.GetParams('test.LinearModelParams', 'Train') mdl = cfg.Instantiate() # Disable async checkpointing. cfg.task.train.async_checkpointing = False cfg.train.async_checkpointing = False with py_utils.GradientTape(persistent=True): mdl.ConstructFPropBPropGraph() eager_v1_logdir = os.path.join(self.get_temp_dir(), 'eager_v1') eager_v2_logdir = os.path.join(self.get_temp_dir(), 'eager_v2') checkpointer.EagerCheckpointerV1(eager_v1_logdir, mdl).Save(gsteps=0) checkpointer.EagerCheckpointerV2(eager_v2_logdir, mdl).Save(gsteps=0) eager_v1_keys = _GetCheckpointKeys( os.path.join(eager_v1_logdir, 'ckpt_V1', 'ckpt-00000000')) eager_v2_keys = _GetCheckpointKeys( os.path.join(eager_v2_logdir, 'ckpt_V2', 'ckpt-0')) # Expecting two more variables in V2 checkpoints: # _CHECKPOINTABLE_OBJECT_GRAPH # save_counter self.assertEqual(len(eager_v1_keys) + 2, len(eager_v2_keys)) # pylint:disable=g-generic-assert py_utils.SetEagerMode(False) self.assertFalse(tf.executing_eagerly()) graph_logdir = os.path.join(self.get_temp_dir(), 'graph') os.mkdir(graph_logdir) with self.session(graph=tf.Graph()) as sess: mdl = cfg.Instantiate() for lrn in mdl.GetTask().learners: lrn.optimizer.params.clear_variable_scope = False mdl.ConstructFPropBPropGraph() sess.run(tf.global_variables_initializer()) checkpointer.Checkpointer(graph_logdir, mdl).Save(sess, gsteps=0) graph_keys = _GetCheckpointKeys( os.path.join(graph_logdir, 'ckpt-00000000')) self.assertEqual(eager_v1_keys, graph_keys)
def testInference(self): with self.session() as sess: tf.random.set_seed(1618) p = model_registry.GetParams('test.MnistV2', 'Test') p.random_seed = 73234288 p.input.ckpt = self.data_path p.task.params_init = py_utils.WeightInit.Uniform(0.1, seed=73234288) model = p.Instantiate() subgraphs = model.GetTask().Inference() self.assertCountEqual(['default'], list(subgraphs.keys())) fetches, feeds = subgraphs['default'] self.assertCountEqual(['normalized_image'], list(feeds.keys())) self.assertCountEqual(['logits', 'probs', 'prediction'], list(fetches.keys())) self.evaluate(tf.global_variables_initializer()) fetch_results = sess.run( fetches, {feeds['normalized_image']: np.zeros(p.input.data_shape)}) self.assertAllEqual([p.task.softmax.num_classes], fetch_results['logits'].shape) self.assertAllEqual([p.task.softmax.num_classes], fetch_results['probs'].shape) self.assertAllEqual([], fetch_results['prediction'].shape)
def testGraphLayer(self): g = tf.Graph() with g.as_default(), self.SetEval(True): tf.random.set_seed(24332) def _FnMeta(*shapes): return py_utils.NestedMap(flops=1, out_shapes=shapes) p = layers.GraphLayer.Params().Set( name='graph', input_endpoints=['x'], output_endpoints=['y'], sub=[ ('x.a->y.c', layers.FnLayer.Params().Set(fn=lambda x: 2 * x, fn_meta=_FnMeta)), ('x.b->y.d', layers.FnLayer.Params().Set( name='bar', fn=lambda x: x + 2, fn_meta=_FnMeta)), ('y.c,y.d->y.e, y.f', layers.FnLayer.Params().Set( name='baz', fn=lambda x, y: (x + y, x - y), fn_meta=_FnMeta)), ]) l = p.Instantiate() x = py_utils.NestedMap(a=tf.constant(1.0), b=tf.constant(2.0)) y = l.FProp(l.theta, x) y_shape = l.FPropMeta( p, py_utils.Transform(lambda t: tshape.Shape(t.shape), x)).out_shapes[0] self.assertDictEqual( py_utils.Transform(lambda t: t.shape.as_list(), y), py_utils.Transform(lambda t: t.ToTensorShape().as_list(), y_shape)) with self.session(graph=g): self.evaluate(tf.global_variables_initializer()) y_val = self.evaluate(y) print(y_val) self.assertEqual(py_utils.NestedMap(c=2.0, d=4.0, e=6.0, f=-2.0), y_val)
def testRepeatLayerNestedMapFPropInputSignature(self): """Tests RepeatLayer having body layer with NestedMap in FProp signature.""" repeat = 100 input_dim, output_dim = 2, 2 # Reference RepeatLayer. ref_p = layers.RepeatLayer.Params().Set( name='ref_recurrent', repeat=repeat, body=lingvo_layers.FCLayer.Params().Set( input_dim=input_dim, output_dim=output_dim)) # RepeatLayer with NestedMap in `body` FProp input signature. new_p = layers.RepeatLayer.Params().Set( name='nested_map_recurrent', repeat=repeat, body=FCLayerTestNestedMapFPropInput.Params().Set( input_dim=input_dim, output_dim=output_dim)) # Verify FProp output equality for both layers. ref_layer = ref_p.Instantiate() new_layer = new_p.Instantiate() assign_op = [ tf.assign(dst, src) for (src, dst) in zip(ref_layer.vars.Flatten(), new_layer.vars.Flatten()) ] with self.session() as sess: tf.random.set_seed(24332) sess.run(tf.global_variables_initializer()) sess.run(assign_op) inputs = tf.random.normal(shape=[2, 2]) paddings = tf.zeros((2, 1)) ref_outputs = ref_layer.FPropDefaultTheta(inputs) new_out_nmap = new_layer.FPropDefaultTheta( py_utils.NestedMap(features=inputs, paddings=paddings)) ref_out_vals = sess.run(ref_outputs) new_out_vals = sess.run(new_out_nmap.features) self.assertAllClose(ref_out_vals, new_out_vals)
def __init__(self, train_dir, model, train_params=None, save_only=False): """Initialize Checkpointer. Args: train_dir: Training directory for saving checkpoints. model: A BaseModel instance or None. train_params: If specified, use these training params instead of those in the `model`. save_only: This checkpointer is only intended for saving checkpoints. """ self._train_dir = train_dir self._save_only = save_only self._vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) self._uninitialized_vars = tf.report_uninitialized_variables( self._vars) self._initialize_vars = tf.global_variables_initializer() self._save_path = os.path.join(self._train_dir, 'ckpt') if train_params: self._train_params = train_params self._model = None else: assert model self._train_params = model.params.train self._model = model if not self._save_only: self._params = model.params self._model_tasks = model.tasks self._model = model self._next_checkpoint_seconds = 0 self._save_interval_seconds = self._train_params.save_interval_seconds self._saver = self._GetSaver()
def testLinearLayer(self): g = tf.Graph() with g.as_default(): tf.set_random_seed(24332) p = layers.LinearLayer.Params().Set(name='test', input_dims=10, output_dims=5) l = p.Instantiate() xs = [] ys = [] for shape in ([2, 10], [2, 3, 10], [2, 3, 5, 10], [2, 3, 5, 7, 10]): x = tf.random_normal(shape=shape) y = l.FPropDefaultTheta(x) xs += [x] ys += [y] with self.session(graph=g) as sess: sess.run(tf.global_variables_initializer()) xs_val, ys_val, w_val = sess.run([xs, ys, l.vars]) self.assertEqual(w_val.w.shape, (10, 5)) for (xv, yv) in zip(xs_val, ys_val): self.assertAllClose(np.matmul(xv, w_val.w), yv)