def test_lookup(self): opt = Adam() opt_wrapper = OptimizerWrapper(opt, None, {}) embedding_dim = 4 layers = ["embedding_0", "embedding_1"] grads = [ tf.IndexedSlices(None, tf.constant([2, 0, 2])), tf.IndexedSlices(None, tf.constant([1, 2, 0, 2])), ] ids_list = [[2, 0], [1, 2, 0]] grads_and_vars = list(zip(grads, layers)) mock_kv_store = MockKvStore({}) for layer in layers: for id in range(3): mock_kv_store.update( keys=[Embedding.get_key([layer, id])], values=[np.random.rand(embedding_dim).astype(np.float32)], ) for i, slot in enumerate(["m", "v"]): mock_kv_store.update( keys=[Embedding.get_key([layer, slot, id])], values=[ np.random.rand(embedding_dim).astype(np.float32) ], ) with mock.patch.object( EmbeddingService, "lookup_embedding", mock_kv_store.lookup ): embeddings, slot_values = opt_wrapper._lookup_embeddings_and_slots( grads_and_vars ) grad0 = grads_and_vars[0][0] self.assertTrue((grad0.indices.numpy() == [0, 1, 0]).all()) grad1 = grads_and_vars[1][0] self.assertTrue((grad1.indices.numpy() == [0, 1, 2, 1]).all()) for ids, layer in zip(ids_list, layers): self.assertTrue( (opt_wrapper._unique_ids_all_layers[layer] == ids).all() ) values, _ = mock_kv_store.lookup( keys=[Embedding.get_key([layer, id]) for id in ids] ) values = np.concatenate(values).reshape(-1, embedding_dim) self.assertTrue((embeddings[layer] - values < 0.0001).all()) for slot in ["m", "v"]: values, _ = mock_kv_store.lookup( keys=[Embedding.get_key([layer, slot, id]) for id in ids] ) values = np.concatenate(values).reshape(-1, embedding_dim) self.assertTrue( (slot_values[layer][slot] - values < 0.0001).all() )
def test_report_to_kv_store(self): opt = SGD(momentum=0.1) opt_wrapper = OptimizerWrapper(opt, None, {}) ids_list = [[1, 5], [10]] opt_wrapper._unique_ids_all_layers = { "test_1": np.array(ids_list[0]), "test_2": np.array(ids_list[1]), } t = np.array([1.0, 1.0, 1.0]) opt_wrapper._embed_variables = { "test_1": tf.Variable([t, t * 5]), "test_2": tf.Variable([t * 10]), } opt_wrapper._slot_variables = { "test_1": { "momentum": tf.Variable([t / 10.0, t / 2.0]) }, "test_2": { "momentum": tf.Variable([t]) }, } mock_kv_store = MockKvStore({}) with mock.patch.object(EmbeddingService, "update_embedding", mock_kv_store.update): opt_wrapper._report_to_kv_store() expected_mock_kv_store = MockKvStore({}) expected_mock_kv_store.update( keys=["test_1-1", "test_1-5", "test_2-10"], values=[t, t * 5.0, t * 10.0], ) expected_mock_kv_store.update( keys=[ "test_1-momentum-1", "test_1-momentum-5", "test_2-momentum-10", ], values=[t / 10.0, t / 2.0, t], ) for k, ids in zip(["test_1", "test_2"], ids_list): for id in ids: key = Embedding.get_key([k, id]) v, _ = mock_kv_store.lookup([key]) expected_v, _ = expected_mock_kv_store.lookup([key]) self.assertTrue((v[0] == expected_v[0]).all())
def _test_correctness(self, optimizer_class, X, Y, seed, **kwargs): """Test the correctness of specific TensorFlow optimizer.""" _model_file = get_module_file_path( os.path.dirname(os.path.realpath(__file__)), "embedding_test_module.KerasEmbeddingModel", ) model_module = load_module(_model_file).__dict__ # train model with TensorFlow optimizer weights = self._random_init_model_weight( [(4, 4), (4, 4), (72, 1), (1,)], seed ) loss_fn = model_module["loss"] model1 = model_module["KerasEmbeddingModel"](4, 4, weights) opt1 = optimizer_class(**kwargs) _train(model1, opt1, X, Y, loss_fn, random_seed=seed) model2 = model_module["EdlEmbeddingModel"](4, weights[2:]) opt2 = optimizer_class(**kwargs) layer_names = [layer.name for layer in find_layer(model2, Embedding)] embed_dims = dict([(layer_name, 4) for layer_name in layer_names]) # intialize embedding vectors in kv store mock_kv_store = MockKvStore({}) for layer, embed_table in zip(layer_names, weights[:2]): for i, embed_vector in enumerate(embed_table): mock_kv_store.update(["%s-%d" % (layer, i)], [embed_vector]) # train model with optimizer wrapper with mock.patch.object( EmbeddingService, "lookup_embedding", mock_kv_store.lookup ), mock.patch.object( EmbeddingService, "update_embedding", mock_kv_store.update ): _train_edl_embedding_with_optimizer_wrapper( model2, opt2, X, Y, loss_fn, embed_dims, random_seed=seed ) # compare trained parameters wrong_msg = ( "The updated parameters of Optimizer Wrapper and TensorFlow " "optimizer %s differ." % opt1.get_config()["name"] ) for layer1, layer2 in zip(model1.layers, model2.layers): if "embedding" in layer2.name: w1 = layer1.weights[0].numpy() keys = [Embedding.get_key([layer2.name, i]) for i in range(4)] w2 = np.concatenate(mock_kv_store.lookup(keys)[0]).reshape( 4, -1 ) self.assertTrue((w1 - w2 < 0.0001).all(), msg=wrong_msg) else: for w1, w2 in zip(layer1.weights, layer2.weights): self.assertTrue( (w1 - w2 < 0.0001).numpy().all(), msg=wrong_msg )
def _test_async_correctness( self, grads_and_vars_batches, embed_values, expected_non_embed_values, expected_embed_values=None, ): """Checks the correctness of async OptimizerWrapper. This function creates many threads and these threads call `OptimizerWrapper.apply_gradients` simultaneously. Args: grads_and_vars_batches: A python list of `grads_and_vars`. Every thread takes a `grads_and_vars` and calls `apply_gradients`. embed_values: A python dictionary of `(layer_name, embedding table)`. expected_non_embed_values: A python list of expected non-embdding values after applying gradients. expected_embed_values: A python dictionary of expected embedding values after applying gradients. None means no need to check embedding values. """ thread_num = len(grads_and_vars_batches) embed_dims = {} embed_var_n = len(embed_values) mock_kv_store = MockKvStore() for layer, values in embed_values.items(): embed_dims[layer] = values.shape[1] input_dim = values.shape[0] keys = [ Embedding.get_key([layer, idx]) for idx in range(input_dim) ] mock_kv_store.update(keys, values) opt = SGD(0.1) opt_wrapper = OptimizerWrapper(opt, None, embed_dims, True) with mock.patch.object(EmbeddingService, "lookup_embedding", mock_kv_store.lookup), mock.patch.object( EmbeddingService, "update_embedding", mock_kv_store.update): # call optimizer_wrapper.apply_gradients asynchronously def _apply_gradients(opt_wrapper, grads_and_vars): # sleep 1s to wait that all threads are in this method call time.sleep(1) opt_wrapper.apply_gradients(grads_and_vars) executor = ThreadPoolExecutor(max_workers=thread_num) tasks = [ executor.submit(_apply_gradients, opt_wrapper, grads_and_vars) for grads_and_vars in grads_and_vars_batches ] _ = [task.result() for task in tasks] # check updated results of non-embedding variables non_embed_vars = [ var for grad, var in grads_and_vars_batches[0][:-embed_var_n] ] for var, expected_value in zip(non_embed_vars, expected_non_embed_values): self.assertTrue(np.isclose(var.numpy(), expected_value).all()) # `expected_embed_values=None` means that no need to check # embedding table if not expected_embed_values: return # check updated results of embedding table for layer, expected_values in expected_embed_values.items(): keys = [ Embedding.get_key([layer, idx]) for idx in range(input_dim) ] raw_value, _ = mock_kv_store.lookup(keys) value = np.concatenate(raw_value).reshape(input_dim, -1) self.assertTrue( any([ np.isclose(value, expected).all() for expected in expected_values ]))