Exemplo n.º 1
0
    def report_embedding_info(self):
        model = elasticdl_pb2.Model()
        if self._embedding_layers:
            embedding_infos = model.embedding_table_infos
            for layer in self._embedding_layers:
                embedding_info = embedding_infos.add()
                embedding_info.name = layer.embedding_weight_name
                embedding_info.dim = layer.output_dim
                embedding_info.initializer = layer.embeddings_initializer
                # set to float32
                embedding_info.dtype = dtype_numpy_to_tensor(
                    np.dtype("float32")
                )

        if self._embedding_columns:
            embedding_infos = model.embedding_table_infos
            for column in self._embedding_columns:
                embedding_info = embedding_infos.add()
                embedding_info.name = column.embedding_weight_name
                embedding_info.dim = column.dimension
                # TODO(brightcoder01): The initializer in embedding column is
                # a variable initializer function. For embedding layer, it's a
                # tf.keras.initializers. Keep aligned between these two.
                embedding_info.initializer = "uniform"
                # set to float32
                embedding_info.dtype = dtype_numpy_to_tensor(
                    np.dtype("float32")
                )

        for ps_id in range(self._ps_num):
            self._ps_stubs[ps_id].push_embedding_table_infos(model)
Exemplo n.º 2
0
    def GetModel(self, request, _):
        if not self._use_async:
            self._validate_model_version(request.version)

        if (
            request.method == elasticdl_pb2.MINIMUM
            or request.version == self._version
        ):
            if self._use_async:
                res = self._get_model_no_lock()
            else:
                with self._lock:
                    res = self._get_model_no_lock()
            return res

        # Read from checkpoint for the fixed version model
        pb_model = elasticdl_pb2.Model()
        try:
            pb_model = self._checkpoint_service.get_checkpoint_model(
                request.version
            )
        except Exception:
            logger.error(
                "Failed to fetch checkpoint model for "
                "model version {}".format(request.version)
            )
        return pb_model
Exemplo n.º 3
0
def load_from_checkpoint_file(file_name):
    from elasticdl.proto import elasticdl_pb2

    pb_model = elasticdl_pb2.Model()
    with open(file_name, "rb") as f:
        pb_model.ParseFromString(f.read())
    return pb_model
Exemplo n.º 4
0
    def test_pull_variable(self):
        self.create_default_server_and_stub()
        param0 = {
            "v0": np.random.rand(3, 2).astype(np.float32),
            "v1": np.random.rand(10, 32).astype(np.float32),
        }
        pull_req = empty_pb2.Empty()
        # try to pull variable
        res = self._stub.pull_variable(pull_req)
        # not initialized
        self.assertFalse(res.model_init_status)

        # init variable
        req = elasticdl_pb2.Model()
        req.version = 1
        for name, var in param0.items():
            emplace_tensor_pb_from_ndarray(req.param, var, name=name)
        res = self._stub.push_model(req)
        self.assertEqual(res, empty_pb2.Empty())

        # pull variable back
        res = self._stub.pull_variable(pull_req)
        self.assertTrue(res.model_init_status)
        self.assertEqual(res.model.version, req.version)
        for param in res.model.param:
            name = param.name
            tensor = tensor_pb_to_ndarray(param)
            self.assertTrue(np.allclose(param0[name], tensor))
Exemplo n.º 5
0
    def test_pull_dense_parameters(self):
        self.create_default_server_and_stub()
        param0 = {
            "v0": np.random.rand(3, 2).astype(np.float32),
            "v1": np.random.rand(10, 32).astype(np.float32),
        }
        pull_req = elasticdl_pb2.PullDenseParametersRequest()
        pull_req.version = -1
        # try to pull variable
        res = self._stub.pull_dense_parameters(pull_req)
        # not initialized
        self.assertFalse(res.initialized)

        # init variable
        req = elasticdl_pb2.Model()
        req.version = 1
        for name, var in param0.items():
            serialize_ndarray(var, req.dense_parameters[name])
        res = self._stub.push_model(req)
        self.assertEqual(res, empty_pb2.Empty())

        # pull variable back
        res = self._stub.pull_dense_parameters(pull_req)
        self.assertTrue(res.initialized)
        self.assertEqual(res.version, req.version)
        for name, pb in res.dense_parameters.items():
            tensor = pb_to_ndarray(pb)
            self.assertTrue(np.allclose(param0[name], tensor))

        # pull variable again, no param as no updated version
        pull_req.version = res.version
        res = self._stub.pull_dense_parameters(pull_req)
        self.assertTrue(res.initialized)
        self.assertEqual(res.version, pull_req.version)
        self.assertTrue(not res.dense_parameters)
Exemplo n.º 6
0
 def report_variable_to_ps(self, ps_id):
     model = elasticdl_pb2.Model()
     model.version = self._model_versions_from_ps[ps_id]
     if ps_id in self._ps_vars:
         vars = self._ps_vars[ps_id]
         for var in vars:
             serialize_ndarray(var.numpy(),
                               model.dense_parameters[var.name])
     self._ps_stubs[ps_id].push_model(model)
Exemplo n.º 7
0
 def report_variable_to_ps(self, ps_id):
     model = elasticdl_pb2.Model()
     if ps_id in self._ps_vars:
         vars = self._ps_vars[ps_id]
         for var in vars:
             emplace_tensor_pb_from_ndarray(model.param,
                                            var.numpy(),
                                            name=var.name)
     self._ps_stubs[ps_id].push_model(model)
Exemplo n.º 8
0
 def get_version_from_checkpoint(checkpoint_dir):
     """Get model version from the checkpoint. There may be several shard
     files in the checkpoint directory. The model versions of shard files
     are same, so we only need to read one shard file to get model version.
     """
     variable_shard_files = os.listdir(checkpoint_dir)
     shard_file_path = os.path.join(checkpoint_dir, variable_shard_files[0])
     model_pb = elasticdl_pb2.Model()
     model_pb = load_pb_from_file(model_pb, shard_file_path)
     return model_pb.version
Exemplo n.º 9
0
    def report_embedding_info(self):
        model = elasticdl_pb2.Model()
        if self._embedding_layers:
            embedding_infos = model.embedding_table_info
            for layer in self._embedding_layers:
                embedding_info = embedding_infos.add()
                embedding_info.name = layer.name
                embedding_info.dim = layer.output_dim
                embedding_info.initializer = layer.embeddings_initializer

        for ps_id in range(len(self._ps_stubs)):
            self._ps_stubs[ps_id].push_embedding_info(model)
Exemplo n.º 10
0
    def restore_params_from_checkpoint(checkpoint_dir, shard_index, shard_num):
        """Restore a shard parameters from the checkpoint directory.
        If shard_num=1, a entire model parameters will be restored.

        Args:
            checkpoint_dir: a directory with checkpoint files.
            shard_index: Model shard index, e.g. the PS instance index
                using ParameterServerStrategy with multiple PS instances.
            shard_num: The total number of model shards, e.g. the total PS
                instancecount using ParameterServerStrategy with multiple
                PS instances.

        Return:
            parameters: A Parameter object which contains model version,
                non-embedding parameters and embedding tables for the
                PS instance with ps_id.
        """

        variable_shard_files = os.listdir(checkpoint_dir)
        non_embedding_vars = {}
        embedding_tables = {}
        version = None
        for shard_file in variable_shard_files:
            shard_file_path = os.path.join(checkpoint_dir, shard_file)
            model_pb = elasticdl_pb2.Model()
            model_pb = load_pb_from_file(model_pb, shard_file_path)
            if version is None:
                version = model_pb.version
            elif version != model_pb.version:
                raise ValueError(
                    "The versions in model shards are not consistent"
                )

            for embedding_info_pb in model_pb.embedding_table_infos:
                embedding_table = create_embedding_table(embedding_info_pb)
                embedding_tables.setdefault(
                    embedding_table.name, embedding_table
                )

            (
                shard_non_embedding_vars,
                shard_embedding_table_values,
            ) = _get_params_shard_from_pb(model_pb, shard_index, shard_num)

            non_embedding_vars.update(shard_non_embedding_vars)
            for name, pair in shard_embedding_table_values.items():
                embedding_tables[name].set(pair[0], pair[1])

        parameters = Parameters()
        parameters.non_embedding_params.update(non_embedding_vars)
        parameters.embedding_params.update(embedding_tables)
        parameters.version = version
        return parameters
Exemplo n.º 11
0
    def push_embedding_table_infos(self, infos):
        model = elasticdl_pb2.Model()
        embedding_infos = model.embedding_table_infos

        for info in infos:
            embedding_info = embedding_infos.add()
            embedding_info.name = info.name
            embedding_info.dim = info.dim
            embedding_info.initializer = info.initializer
            embedding_info.dtype = info.dtype

        for ps_id in range(self.ps_num):
            self.ps_stubs[ps_id].push_embedding_table_infos(model)
Exemplo n.º 12
0
 def push_dense_parameters(self, parameters, ps_id, version):
     """
     Push dense parameters to PS
     Args:
         parameters: a list of Tensors
         ps_id: PS id
         version: model version
     """
     model = elasticdl_pb2.Model()
     model.version = version
     for p in parameters:
         if self.parameter_to_ps[p.name] == ps_id:
             serialize_ndarray(p.values, model.dense_parameters[p.name])
     self.ps_stubs[ps_id].push_model(model)
Exemplo n.º 13
0
    def test_emplace_tensor_pb_from_ndarray(self):
        values = np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], np.float32)
        indices = np.array([0, 2])
        name = "test"
        model = elasticdl_pb2.Model()
        emplace_tensor_pb_from_ndarray(model.param, values, indices, name)
        pb = model.param[-1]
        print("pb", pb)

        expected_pb = Tensor(values, indices, name).to_tensor_pb()
        self.assertEqual(pb.name, expected_pb.name)
        self.assertEqual(pb.dim, expected_pb.dim)
        self.assertEqual(pb.content, expected_pb.content)
        self.assertEqual(pb.indices, expected_pb.indices)
        self.assertEqual(pb.dtype, expected_pb.dtype)
Exemplo n.º 14
0
    def push_gradient_test_setup(self):
        self.var_names = ["test_1", "test_2"]
        self.var_values = [
            np.array([10.0, 20.0, 30.0], np.float32),
            np.array([20.0, 40.0, 60.0], np.float32),
        ]
        self.grad_values0 = [
            np.array([1.0, 2.0, 3.0], np.float32),
            np.array([2.0, 4.0, 6.0], np.float32),
        ]
        self.grad_values1 = [
            np.array([0.0, 0.0, 7.0], np.float32),
            np.array([9.0, 9.0, 6.0], np.float32),
        ]

        dim = self._embedding_info.dim
        self.embedding_table = (
            np.random.rand(4 * dim).reshape((4, dim)).astype(np.float32)
        )
        self.embedding_grads0 = tf.IndexedSlices(
            values=np.random.rand(3 * dim)
            .reshape((3, dim))
            .astype(np.float32),
            indices=(3, 1, 3),
        )
        self.embedding_grads1 = tf.IndexedSlices(
            values=np.random.rand(3 * dim)
            .reshape((3, dim))
            .astype(np.float32),
            indices=(2, 2, 3),
        )
        push_model_req = elasticdl_pb2.Model()
        push_model_req.version = self._parameters.version
        for name, value in zip(self.var_names, self.var_values):
            emplace_tensor_pb_from_ndarray(
                push_model_req.param, value, name=name
            )
        push_model_req.embedding_table_info.append(self._embedding_info)
        self._stub.push_model(push_model_req)

        for name, var in zip(self.var_names, self.var_values):
            self._parameters.non_embedding_params[name] = tf.Variable(var)

        self._parameters.embedding_params[self._embedding_info.name].set(
            range(len(self.embedding_table)), self.embedding_table
        )
Exemplo n.º 15
0
    def test_pull_embedding_vectors(self):
        self.create_default_server_and_stub()

        id_list_0 = [1, 3, 9, 6]
        id_list_1 = [8, 9, 1, 0, 6]

        req = elasticdl_pb2.Model()
        req.version = 1
        req.embedding_table_infos.append(self._embedding_info)
        another_embedding_info = elasticdl_pb2.EmbeddingTableInfo()
        another_embedding_info.name = "layer_b"
        another_embedding_info.dim = 16
        another_embedding_info.initializer = "normal"
        req.embedding_table_infos.append(another_embedding_info)
        res = self._stub.push_model(req)
        self.assertEqual(res, empty_pb2.Empty())

        vectors_a_0 = self.get_embedding_vectors("layer_a", id_list_0)
        self.assertEqual(vectors_a_0.shape[0], len(id_list_0))
        self.assertEqual(vectors_a_0.shape[1], 32)

        vectors_a_1 = self.get_embedding_vectors("layer_a", id_list_1)
        self.assertEqual(vectors_a_1.shape[0], len(id_list_1))
        self.assertEqual(vectors_a_1.shape[1], 32)

        vectors_b_1 = self.get_embedding_vectors("layer_b", id_list_1)
        self.assertEqual(vectors_b_1.shape[0], len(id_list_1))
        self.assertEqual(vectors_b_1.shape[1], 16)

        vectors_b_0 = self.get_embedding_vectors("layer_b", id_list_0)
        self.assertEqual(vectors_b_0.shape[0], len(id_list_0))
        self.assertEqual(vectors_b_0.shape[1], 16)

        for idx0, id0 in enumerate(id_list_0):
            for idx1, id1 in enumerate(id_list_1):
                if id0 == id1:
                    self.assertTrue(
                        np.array_equal(vectors_a_0[idx0], vectors_a_1[idx1])
                    )
                    self.assertTrue(
                        np.array_equal(vectors_b_0[idx0], vectors_b_1[idx1])
                    )

        vectors = self.get_embedding_vectors("layer_a", [])
        self.assertEqual(vectors, None)
Exemplo n.º 16
0
    def push_gradient_test_setup(self):
        self.var_names = ["test_1", "test_2"]
        self.var_values = [
            np.array([10.0, 20.0, 30.0], np.float32),
            np.array([20.0, 40.0, 60.0], np.float32),
        ]
        self.grad_values0 = [
            np.array([1.0, 2.0, 3.0], np.float32),
            np.array([2.0, 4.0, 6.0], np.float32),
        ]
        self.grad_values1 = [
            np.array([0.0, 0.0, 7.0], np.float32),
            np.array([9.0, 9.0, 6.0], np.float32),
        ]

        dim = self._embedding_info.dim
        self.embedding_table = (
            np.random.rand(4 * dim).reshape((4, dim)).astype(np.float32)
        )
        self.embedding_grads0 = Tensor(
            None,
            np.random.rand(3 * dim).reshape((3, dim)).astype(np.float32),
            np.asarray([3, 1, 3]),
        )
        self.embedding_grads1 = Tensor(
            None,
            np.random.rand(3 * dim).reshape((3, dim)).astype(np.float32),
            np.asarray([2, 2, 3]),
        )
        push_model_req = elasticdl_pb2.Model()
        push_model_req.version = self._parameters.version
        for name, value in zip(self.var_names, self.var_values):
            serialize_ndarray(value, push_model_req.dense_parameters[name])
        push_model_req.embedding_table_infos.append(self._embedding_info)
        self._stub.push_model(push_model_req)

        for name, var in zip(self.var_names, self.var_values):
            self._parameters.non_embedding_params[name] = tf.Variable(var)

        self._parameters.embedding_params[self._embedding_info.name].set(
            range(len(self.embedding_table)), self.embedding_table
        )
Exemplo n.º 17
0
    def to_model_pb(self):
        """ Convert all parameters including embedding and non-embedding
        parameters to `elasticdl_pb2.Model` which can be serialized.
        """
        model_pb = elasticdl_pb2.Model()
        model_pb.version = self.version
        for name, var in self.non_embedding_params.items():
            serialize_ndarray(var.numpy(), model_pb.dense_parameters[name])

        for name, embedding_table in self.embedding_params.items():
            # Slot embedding table is not weights in the model, so we don't
            # save it to checkpoint.
            if not embedding_table.is_slot:
                serialize_indexed_slices(
                    embedding_table.to_indexed_slices(),
                    model_pb.embedding_tables[name],
                )
                embedding_info = embedding_table.to_embedding_table_info_pb()
                model_pb.embedding_table_infos.append(embedding_info)
        return model_pb
Exemplo n.º 18
0
    def to_model_pb(self):
        """ Convert all parameters including embedding and non-embedding
        parameters to `elasticdl_pb2.Model` which can be serialized.
        """
        model_pb = elasticdl_pb2.Model()
        model_pb.version = self.version
        for name, var in self.non_embedding_params.items():
            emplace_tensor_pb_from_ndarray(
                model_pb.param, var.numpy(), name=name
            )

        for name, embedding_table in self.embedding_params.items():
            embedding_table_tensor = embedding_table.to_tensor()
            tensor_pb = model_pb.param.add()
            serialize_tensor(embedding_table_tensor, tensor_pb)

            embedding_info = embedding_table.to_embedding_table_info_pb()
            model_pb.embedding_table_info.append(embedding_info)

        return model_pb
Exemplo n.º 19
0
    def report_embedding_info(self):
        model = elasticdl_pb2.Model()
        if self._embedding_layers:
            embedding_infos = model.embedding_table_info
            for layer in self._embedding_layers:
                embedding_info = embedding_infos.add()
                embedding_info.name = layer.name
                embedding_info.dim = layer.output_dim
                embedding_info.initializer = layer.embeddings_initializer

        if self._embedding_columns:
            embedding_infos = model.embedding_table_info
            for column in self._embedding_columns:
                embedding_info = embedding_infos.add()
                embedding_info.name = column.name
                embedding_info.dim = column.dimension
                # TODO(brightcoder01): The initializer in embedding column is
                # a variable initializer function. For embedding layer, it's a
                # tf.keras.initializers. Keep aligned between these two.
                embedding_info.initializer = "uniform"

        for ps_id in range(len(self._ps_stubs)):
            self._ps_stubs[ps_id].push_embedding_info(model)
Exemplo n.º 20
0
 def _get_model_no_lock(self):
     pb_model = elasticdl_pb2.Model()
     pb_model.version = self._version
     for k, v in self._model.items():
         pb_model.param[k].CopyFrom(ndarray_to_tensor(v.numpy()))
     return pb_model
Exemplo n.º 21
0
 def _get_model_no_lock(self):
     pb_model = elasticdl_pb2.Model()
     pb_model.version = self._version
     for k, v in self._model.items():
         emplace_tensor_pb_from_ndarray(pb_model.param, v.numpy(), name=k)
     return pb_model
Exemplo n.º 22
0
    def test_push_model(self):
        opt_func_name = "ftrl_optimizer"
        opt = load_module(_module_file).__dict__[opt_func_name]()
        opt_config = opt.get_config()
        slot_names = ["accumulator", "linear"]
        slot_init_value = {
            "accumulator": opt_config["initial_accumulator_value"],
            "linear": 0.0,
        }

        self.create_default_server_and_stub(optimizer=opt_func_name)
        param0 = {
            "v0": np.random.rand(3, 2).astype(np.float32),
            "v1": np.random.rand(10, 32).astype(np.float32),
        }
        param1 = {
            "v0": np.ones([3, 2], dtype=np.float32),
            "v1": np.ones([10, 32], dtype=np.float32),
        }

        models = [param0, param1]

        for idx, model in enumerate(models):
            req = elasticdl_pb2.Model()
            req.version = idx + 1
            for name in model:
                serialize_ndarray(model[name], req.dense_parameters[name])
            req.embedding_table_infos.append(self._embedding_info)
            res = self._stub.push_model(req)
            self.assertEqual(res, empty_pb2.Empty())
            # self._parameters is initialized with the first push_model call
            # and the second push_model has no effect
            self.assertEqual(self._parameters.version, 1)
            for name in param0:
                self.assertTrue(
                    np.allclose(
                        param0[name],
                        self._parameters.non_embedding_params[name].numpy(),
                    )
                )
            self.assertEqual(
                self._embedding_info.name,
                self._parameters.embedding_params[
                    self._embedding_info.name
                ].name,
            )
            self.assertEqual(
                self._embedding_info.dim,
                self._parameters.embedding_params[
                    self._embedding_info.name
                ].dim,
            )
            self.assertEqual(
                tf.keras.initializers.get(
                    self._embedding_info.initializer
                ).__class__,
                self._parameters.embedding_params[
                    self._embedding_info.name
                ].initializer.__class__,
            )

            for slot_name in slot_names:
                name = get_slot_table_name(
                    self._embedding_info.name, slot_name
                )
                table = self._parameters.embedding_params[name]
                self.assertTrue(name, table.name)
                self.assertTrue(self._embedding_info.dim, table.dim)
                embedding = table.get([2])
                self.assertTrue(
                    (embedding - slot_init_value[slot_name] < 0.0001).all()
                )