def test_encode_context_feature_values_with_prefix(self, mock_module_cls):
        def mock_fn(input, signature=None):
            self.assertIn(signature,
                          {"encode_context", "encode_response", None})
            if signature == "encode_context":
                self.assertIsInstance(input, dict)
                self.assertEqual(2, len(input))
                for input_t in input.values():
                    self.assertEqual(input_t.dtype, tf.string)

        mock_module_cls.return_value = mock_fn
        with mock.patch("encoder_client._batch_session_run") as f:
            client = encoder_client.EncoderClient(
                "test_uri",
                use_extra_context=True,
                max_extra_contexts=3,
                use_extra_context_prefixes=True,
                cache_size=0,
            )
            mock_module_cls.assert_called_with("test_uri")
            encodings = client.encode_contexts(
                ["hello", "hi", "yo"],
                extra_contexts=[
                    ["a", "b", "c", "d d"],
                    ["A", "B", "C", "D", "E", "F"],
                    [],
                ],
            )
            f.assert_called_once()
            self.assertEqual(
                ["0: d d 1: c 2: b", "0: F 1: E 2: D", ""],
                list(f.call_args[0][1][client._fed_extra_contexts]),
            )
            self.assertEqual(f.return_value, encodings)
    def test_encode_context(self, mock_module_cls):
        def mock_fn(input, signature=None):
            self.assertIn(signature,
                          {"encode_context", "encode_response", None})
            if signature == "encode_context":
                self.assertIsInstance(input, dict)
                self.assertEqual(2, len(input))
                for input_t in input.values():
                    self.assertEqual(input_t.dtype, tf.string)

                return tf.ones([tf.shape(input_t)[0], 3])

        mock_module_cls.return_value = mock_fn

        client = encoder_client.EncoderClient("test_uri",
                                              use_extra_context=True)
        mock_module_cls.assert_called_with("test_uri")

        encodings = client.encode_contexts(
            ["hello", "hi", "yo"],
            extra_contexts=[
                ["a", "b", "c", "d"],
                ["A", "B", "C", "D", "E", "F"],
                [],
            ],
        )
        np.testing.assert_allclose([[1., 1., 1.]] * 3, encodings)
    def test_encode_sentences_batching_caching(self, mock_module_cls):
        def mock_fn(input, signature=None):
            self.assertIn(signature,
                          {"encode_context", "encode_response", None})
            self.assertIsInstance(input, tf.Tensor)
            self.assertEqual(input.dtype, tf.string)
            if signature is None:
                return tf.random_normal([tf.shape(input)[0], 3])

        mock_module_cls.return_value = mock_fn

        client = encoder_client.EncoderClient(
            # force batching by setting batch size to 3
            "test_uri",
            internal_batch_size=3,
            cache_size=100,
        )
        mock_module_cls.assert_called_with("test_uri")

        encodings = client.encode_sentences(
            ["a", "a", "b", "c", "d", "e", "f", "g"])
        # Test de-duplication:
        np.testing.assert_allclose(encodings[0], encodings[1])

        encodings_2 = client.encode_sentences(["a", "b", "c", "z"])
        # Test caching
        np.testing.assert_allclose(encodings[0], encodings_2[0])
        np.testing.assert_allclose(encodings[2], encodings_2[1])
        np.testing.assert_allclose(encodings[3], encodings_2[2])
 def __init__(self, train_x, train_y, valid_x, valid_y):
   self.train_x = train_x
   self.train_y = train_y
   self.valid_x = valid_x
   self.valid_y = valid_y
   self.n_labels = self.valid_y.shape[1]
   self.module = encoder_client.EncoderClient(
       "http://models.poly-ai.com/convert/v1/model.tar.gz")
    def test_encode_response(self, mock_module_cls):
        def mock_fn(input, signature=None):
            self.assertIn(signature,
                          {"encode_context", "encode_response", None})
            self.assertIsInstance(input, tf.Tensor)
            self.assertEqual(input.dtype, tf.string)
            if signature == "encode_response":
                return tf.ones([tf.shape(input)[0], 3])

        mock_module_cls.return_value = mock_fn

        client = encoder_client.EncoderClient("test_uri")
        mock_module_cls.assert_called_with("test_uri")

        encodings = client.encode_responses(["hello"])
        np.testing.assert_allclose([[1, 1, 1]], encodings)
Exemplo n.º 6
0
        # Find the position of 0 in the argsort, as index 0 is the correct
        # response.
        ranks.append((-scores).argsort().argmin())
        if (i + 1) % 100 == 0:
            glog.info(f"Scored {i + 1} / {len(examples)} examples.")

    ranks = numpy.asarray(ranks)

    for k in [1, 10, 50]:
        recall_at_k = (ranks < k).mean()
        glog.info(f"Recall@{k} = {recall_at_k:.3f}")

    mrr = (1 / (1.0 + ranks)).mean()
    glog.info(f"MRR = {mrr:.3f}")


if __name__ == "__main__":
    flags = _parse_args()
    client = encoder_client.EncoderClient(
        flags.encoder,
        use_extra_context=True,
        use_extra_context_prefixes=True,
        max_extra_contexts=10,
    )
    reader = test_reader.TestReader(
        examples_json=flags.examples_json,
        labels_tsv=flags.labels_tsv,
    )
    _evaluate(client, reader.examples)
Exemplo n.º 7
0
print(sequence_encoding)
print(sequence_encoding.shape)
tokens = encoding['tokens']
print(tokens)
tokenized_encoding = sess.run(encoding_tensor2,
                              feed_dict={text_placeholder: text})
# print(encoding.shape)
print(tokenized_encoding)
if sess is not None:
    sess.close()

# --------------------------- experiment convert encode client
# encode_sentences
import encoder_client
import pdb
# Internally it implements caching, deduplication, and batching, to help speed up encoding. Note that because it does batching internally, you can pass very large lists of sentences to encode without going out of memory.
client = encoder_client.EncoderClient(
    "http://models.poly-ai.com/convert/v1/model.tar.gz")
# find good responses to the folowing context
context_encodings = client.encode_contexts(["What's your name?"])
# rank the following reponses as candidates
candidate_responses = ["No, thanks.", "I'm Matt", "Hey", "I have a dog"]
response_encodings = client.encode_responses(candidate_responses)
# compute score using dot product
scores = response_encodings.dot(context_encodings.T).flatten()
# output top score response
top_idx = scores.argmax()
pdb.set_trace()
print('Best response: {}, score: {:.3f}'.format(candidate_responses[top_idx],
                                                scores[top_idx]))