Пример #1
0
    def test_report_to_kv_store(self):
        opt = SGD(momentum=0.1)
        opt_wrapper = OptimizerWrapper(opt, None, {})

        ids_list = [[1, 5], [10]]
        opt_wrapper._unique_ids_all_layers = {
            "test_1": np.array(ids_list[0]),
            "test_2": np.array(ids_list[1]),
        }
        t = np.array([1.0, 1.0, 1.0])
        opt_wrapper._embed_variables = {
            "test_1": tf.Variable([t, t * 5]),
            "test_2": tf.Variable([t * 10]),
        }
        opt_wrapper._slot_variables = {
            "test_1": {"momentum": tf.Variable([t / 10.0, t / 2.0])},
            "test_2": {"momentum": tf.Variable([t])},
        }

        mock_kv_store = MockKvStore({})
        with mock.patch.object(
            EmbeddingService, "update_embedding", mock_kv_store.update
        ):
            opt_wrapper._report_to_kv_store()

        expected_mock_kv_store = MockKvStore({})
        expected_mock_kv_store.update(
            keys=["test_1-1", "test_1-5", "test_2-10"],
            values=[t, t * 5.0, t * 10.0],
        )
        expected_mock_kv_store.update(
            keys=[
                "test_1-momentum-1",
                "test_1-momentum-5",
                "test_2-momentum-10",
            ],
            values=[t / 10.0, t / 2.0, t],
        )
        for k, ids in zip(["test_1", "test_2"], ids_list):
            for id in ids:
                key = Embedding.get_key([k, id])
                v, _ = mock_kv_store.lookup([key])
                expected_v, _ = expected_mock_kv_store.lookup([key])
                self.assertTrue((v[0] == expected_v[0]).all())
    def test_set_embedding_values_to_variables(self):
        layers = ["test-1", "test-2"]
        id_num = 3
        embedding_dims = {layer: 4 for layer in layers}
        all_values = np.arange(16).reshape(4, 4)

        embedding_values = {}
        offset = 0
        for layer in layers:
            start = offset
            end = offset + id_num
            embedding_values.setdefault(layer, all_values[start:end])
            offset = end

        opt = SGD()
        opt_wrapper = OptimizerWrapper(opt, None, embedding_dims)
        grads_and_vars = [
            ("test-1-grads", "test-1"),
            ("test-2-grads", "test-2"),
        ]
        opt_wrapper._set_embedding_values_to_variables(
            grads_and_vars, embedding_values
        )
        for i, layer in enumerate(layers):
            self.assertTrue(
                (
                    opt_wrapper._embed_variables[layer].numpy()
                    == embedding_values[layer]
                ).all()
            )
            self.assertTrue(
                (
                    grads_and_vars[i][1].numpy()
                    == opt_wrapper._embed_variables[layer].numpy()
                ).all()
            )

        embedding_values_new = {"test-1": np.zeros((3, 4), np.float32)}
        grads_and_vars = [("test-1-grads", "test-1")]
        opt_wrapper._set_embedding_values_to_variables(
            grads_and_vars, embedding_values_new
        )
        self.assertTrue(
            (opt_wrapper._embed_variables["test-1"].numpy() < 0.0001).all()
        )
Пример #3
0
 def _compare_initialize_values(self, opt, dim, slot, expected_init):
     tmp = OptimizerWrapper(opt, None, {"test": dim})
     self.assertTrue(
         (tmp._initialize_unknown_slot("test", slot) - expected_init(dim) <
          0.0001).all())
Пример #4
0
 def _compare_slot_names(self, opt, expected):
     tmp = OptimizerWrapper(opt, None, {})
     self.assertTrue(sorted(tmp.allowed_slot_names) == sorted(expected))
def _train_edl_embedding_with_optimizer_wrapper(model, opt_keras, X, Y,
                                                loss_fn, params, random_seed):
    """Train model with optimizer wrapper."""
    tf.random.set_seed(random_seed)
    opt_wrapper = OptimizerWrapper(
        opt_keras,
        lookup_embedding_func=params.get_embedding_param,
        update_embedding_func=params.set_embedding_param,
    )

    embed_layers = find_layer(model, Embedding)

    # initialize slot params
    params.create_slot_params(opt_wrapper.allowed_slot_names,
                              opt_wrapper.slot_initial_value)

    # initialize ElasticDL embedding layer
    for layer in embed_layers:
        layer.set_lookup_embedding_func(params.get_embedding_param)

    # training process
    for train_iter, (features, labels) in enumerate(zip(X, Y)):
        with tf.GradientTape() as tape:
            for layer in embed_layers:
                layer.set_tape(tape)
            outputs = model.call(features)
            loss = loss_fn(outputs, labels)

        # Need to get non-embedding variables inside for loop because model
        # creates variables after the first time `model.call` is called
        if not train_iter:
            non_embed_vars = get_non_embedding_trainable_vars(
                model, embed_layers)
        embed_items = []
        for layer in embed_layers:
            embed_items.extend([(bet, layer.name, ids)
                                for bet, ids in layer.embedding_and_ids])

        grads = tape.gradient(
            loss, non_embed_vars + [var for var, _, _ in embed_items])

        # TODO: do not need to merge gradient from the same embedding layer
        # after `optimizer_wrapper` support grads_and_vars with duplicated
        # layer name
        non_embed_vars_n = len(non_embed_vars)
        non_embed_grads = grads[:non_embed_vars_n]
        embed_grads_dict = {}
        for (_, layer_name, ids), grad in zip(embed_items,
                                              grads[non_embed_vars_n:]):
            if layer_name in embed_grads_dict:
                merged_grads = embed_grads_dict[layer_name]
                embed_grads_dict[layer_name] = tf.IndexedSlices(
                    tf.concat([merged_grads.values, grad.values], axis=0),
                    tf.concat([merged_grads.indices, ids], axis=0),
                )
            else:
                embed_grads_dict[layer_name] = tf.IndexedSlices(
                    grad.values, ids)

        opt_wrapper.apply_gradients(
            list(zip(non_embed_grads, non_embed_vars)) +
            [(grad, layer_name)
             for layer_name, grad in embed_grads_dict.items()])

        for layer in embed_layers:
            layer.reset()
    def _test_async_correctness(
        self,
        grads_and_vars_batches,
        embed_values,
        expected_non_embed_values,
        expected_embed_values=None,
    ):
        """Checks the correctness of async OptimizerWrapper. This function
        creates many threads and these threads call
        `OptimizerWrapper.apply_gradients` simultaneously.

        Args:
            grads_and_vars_batches: A python list of `grads_and_vars`. Every
                thread takes a `grads_and_vars` and calls `apply_gradients`.
            embed_values: A python dictionary of
                `(layer_name, embedding table)`.
            expected_non_embed_values: A python list of expected non-embdding
                values after applying gradients.
            expected_embed_values: A python dictionary of expected embedding
                values after applying gradients. None means no need to check
                embedding values.
        """
        thread_num = len(grads_and_vars_batches)
        input_dims = {}
        embed_var_n = len(embed_values)
        params = Parameters()
        for layer, values in embed_values.items():
            embed_dim = values.shape[1]
            input_dims[layer] = values.shape[0]
            embed_table = EmbeddingTable(layer, embed_dim)
            embed_table.set(range(input_dims[layer]), values)
            params.embedding_params[layer] = embed_table

        opt = SGD(0.1)
        opt_wrapper = OptimizerWrapper(
            opt,
            True,
            lookup_embedding_func=params.get_embedding_param,
            update_embedding_func=params.set_embedding_param,
        )

        # call optimizer_wrapper.apply_gradients asynchronously
        def _apply_gradients(opt_wrapper, grads_and_vars):
            # sleep 1s to wait that all threads are in this method call
            time.sleep(1)
            opt_wrapper.apply_gradients(grads_and_vars)

        executor = ThreadPoolExecutor(max_workers=thread_num)
        tasks = [
            executor.submit(_apply_gradients, opt_wrapper, grads_and_vars)
            for grads_and_vars in grads_and_vars_batches
        ]
        _ = [task.result() for task in tasks]

        # check updated results of non-embedding variables
        non_embed_vars = [
            var for grad, var in grads_and_vars_batches[0][:-embed_var_n]
        ]
        for var, expected_value in zip(non_embed_vars,
                                       expected_non_embed_values):
            self.assertTrue(np.isclose(var.numpy(), expected_value).all())

        # `expected_embed_values=None` means that no need to check
        # embedding table
        if not expected_embed_values:
            return
        # check updated results of embedding table
        for layer, expected_values in expected_embed_values.items():
            value = params.get_embedding_param(layer, range(input_dims[layer]))

            self.assertTrue(
                any([
                    np.isclose(value, expected).all()
                    for expected in expected_values
                ]))
def _train_edl_embedding_with_optimizer_wrapper(
    model, opt_keras, X, Y, loss_fn, embed_dims, random_seed
):
    """Train model with optimizer wrapper."""
    tf.random.set_seed(random_seed)
    optimizer = OptimizerWrapper(opt_keras, None, embed_dims)

    # initialization process related to embedding layer and optimizer wrapper
    embed_layers = find_layer(model, Embedding)

    def lookup_func(ids, layer_name, initializer, output_dim):
        values, unknown = EmbeddingService.lookup_embedding(
            [Embedding.get_key([layer_name, i]) for i in ids]
        )
        return np.concatenate(values).reshape(len(ids), -1)

    for layer in embed_layers:
        layer.set_lookup_func(lookup_func)

    # training process
    for features, labels in zip(X, Y):
        with tf.GradientTape() as tape:
            for layer in embed_layers:
                layer.set_tape(tape)
            outputs = model.call(features)
            loss = loss_fn(outputs, labels)

        # TODO: calculate train_vars_embed and train_vars_other can be a
        # reusable function
        train_vars_embed = []
        train_vars_other = []
        for layer in model.layers:
            if isinstance(layer, Embedding):
                for bet, ids in layer.bet_ids_pair:
                    train_vars_embed.append((bet, layer.name, ids))
            else:
                vars = layer.trainable_variables
                train_vars_other.extend(vars)

        grads = tape.gradient(
            loss, train_vars_other + [var for var, _, _ in train_vars_embed]
        )

        # TODO: do not need to merge gradient from the same embedding layer
        # after `optimizer_wrapper` support grads_and_vars with duplicated
        # layer name
        train_vars_other_len = len(train_vars_other)
        grads_new = grads[:train_vars_other_len]
        grads_embed_dict = {}
        for (_, layer_name, ids), grad in zip(
            train_vars_embed, grads[train_vars_other_len:]
        ):
            if layer_name in grads_embed_dict:
                grads_merged = grads_embed_dict[layer_name]
                grads_embed_dict[layer_name] = tf.IndexedSlices(
                    tf.concat([grads_merged.values, grad.values], axis=0),
                    tf.concat([grads_merged.indices, ids], axis=0),
                )
            else:
                grads_embed_dict[layer_name] = tf.IndexedSlices(
                    grad.values, ids
                )

        optimizer.apply_gradients(
            list(zip(grads_new, train_vars_other))
            + [
                (grad, layer_name)
                for layer_name, grad in grads_embed_dict.items()
            ]
        )

        for layer in embed_layers:
            layer.reset()
    def test_set_slot_values_to_variables(self):
        layers = ["test-1", "test-2"]
        slots = ["m", "v"]
        id_num = 3
        embedding_dims = {layer: 4 for layer in layers}
        all_values = np.arange(48).reshape(12, 4).astype(np.float32)

        slot_values = {}
        offset = 0
        for layer in layers:
            for slot in slots:
                start = offset
                end = offset + id_num
                slot_values.setdefault(layer, {}).setdefault(
                    slot, all_values[start:end]
                )
                offset = end

        opt = Adam()
        opt_wrapper = OptimizerWrapper(opt, None, embedding_dims)
        for layer in layers:
            opt_wrapper._create_embedding_variable(layer, tf.zeros((1, 4)))
        opt_wrapper._set_slot_values_to_variables(slot_values)
        self.assertTrue(len(opt.weights) == 4)
        for layer in layers:
            slots_dict = None
            for k, v in opt._slots.items():
                if k.startswith(layer):
                    slots_dict = v
                    break

            for slot in slots:
                self.assertTrue(
                    (
                        slots_dict[slot].numpy() == slot_values[layer][slot]
                    ).all()
                )
                self.assertTrue(
                    (
                        slots_dict[slot].numpy()
                        == opt_wrapper._slot_variables[layer][slot].numpy()
                    ).all()
                )

                slots_dict[slot].assign(tf.ones((10, 4)))
                self.assertTrue(
                    (
                        opt_wrapper._slot_variables[layer][slot].numpy() - 1.0
                        < 0.0001
                    ).all()
                )
                opt_wrapper._slot_variables[layer][slot].assign(
                    -tf.ones((10, 4))
                )
                self.assertTrue(
                    (slots_dict[slot].numpy() + 1.0 < 0.0001).all()
                )

        slot_values_new = {"test-1": {"m": np.zeros((3, 4), np.float32)}}
        opt_wrapper._set_slot_values_to_variables(slot_values_new)
        self.assertTrue(
            (opt_wrapper._slot_variables["test-1"]["m"].numpy() < 0.0001).all()
        )
Пример #9
0
 def _init_optimizer(self, opt, use_async):
     self._modulate_lr_if_needed(opt)
     if opt:
         return OptimizerWrapper(opt, use_async)
     return opt
Пример #10
0
    def _test_async_correctness(
        self,
        grads_and_vars_batches,
        embed_values,
        expected_non_embed_values,
        expected_embed_values=None,
    ):
        """Checks the correctness of async OptimizerWrapper. This function
        creates many threads and these threads call
        `OptimizerWrapper.apply_gradients` simultaneously.

        Args:
            grads_and_vars_batches: A python list of `grads_and_vars`. Every
                thread takes a `grads_and_vars` and calls `apply_gradients`.
            embed_values: A python dictionary of
                `(layer_name, embedding table)`.
            expected_non_embed_values: A python list of expected non-embdding
                values after applying gradients.
            expected_embed_values: A python dictionary of expected embedding
                values after applying gradients. None means no need to check
                embedding values.
        """
        thread_num = len(grads_and_vars_batches)
        embed_dims = {}
        embed_var_n = len(embed_values)
        mock_kv_store = MockKvStore()
        for layer, values in embed_values.items():
            embed_dims[layer] = values.shape[1]
            input_dim = values.shape[0]

            keys = [
                Embedding.get_key([layer, idx]) for idx in range(input_dim)
            ]
            mock_kv_store.update(keys, values)

        opt = SGD(0.1)
        opt_wrapper = OptimizerWrapper(opt, None, embed_dims, True)

        with mock.patch.object(EmbeddingService, "lookup_embedding",
                               mock_kv_store.lookup), mock.patch.object(
                                   EmbeddingService, "update_embedding",
                                   mock_kv_store.update):
            # call optimizer_wrapper.apply_gradients asynchronously
            def _apply_gradients(opt_wrapper, grads_and_vars):
                # sleep 1s to wait that all threads are in this method call
                time.sleep(1)
                opt_wrapper.apply_gradients(grads_and_vars)

            executor = ThreadPoolExecutor(max_workers=thread_num)
            tasks = [
                executor.submit(_apply_gradients, opt_wrapper, grads_and_vars)
                for grads_and_vars in grads_and_vars_batches
            ]
            _ = [task.result() for task in tasks]

            # check updated results of non-embedding variables
            non_embed_vars = [
                var for grad, var in grads_and_vars_batches[0][:-embed_var_n]
            ]
            for var, expected_value in zip(non_embed_vars,
                                           expected_non_embed_values):
                self.assertTrue(np.isclose(var.numpy(), expected_value).all())

            # `expected_embed_values=None` means that no need to check
            # embedding table
            if not expected_embed_values:
                return
            # check updated results of embedding table
            for layer, expected_values in expected_embed_values.items():
                keys = [
                    Embedding.get_key([layer, idx]) for idx in range(input_dim)
                ]
                raw_value, _ = mock_kv_store.lookup(keys)
                value = np.concatenate(raw_value).reshape(input_dim, -1)

                self.assertTrue(
                    any([
                        np.isclose(value, expected).all()
                        for expected in expected_values
                    ]))