def test_preprocess_example():

    class Processor:
        def process_input(self, source, **kwargs):
            return source.split(), None, None
    example = serving.preprocess_example(Processor(), 1, {"text": "a b c"})
    assert example.index == 1
    assert example.source_tokens == [["a", "b", "c"]]
    assert example.metadata == [None]
    assert example.mode == "default"

    class Processor:
        def process_input(self, source, target=None, **kwargs):
            return source.split(), target.split(), len(source)
    raw_example = {"text": "a b c", "target_prefix": "d e", "mode": "alternatives"}
    example = serving.preprocess_example(Processor(), 2, raw_example)
    assert example.index == 2
    assert example.source_tokens == [["a", "b", "c"]]
    assert example.target_tokens == [["d", "e"]]
    assert example.metadata == [5]
    assert example.mode == "alternatives"

    class Processor:
        def process_input(self, source, **kwargs):
            source = source.split()
            return [source[:2], source[2:]], None, [1, 2]
    example = serving.preprocess_example(Processor(), 3, {"text": "a b c d"})
    assert example.index == 3
    assert example.source_tokens == [["a", "b"], ["c", "d"]]
    assert example.metadata == [1, 2]
    assert example.mode == "default"
Exemplo n.º 2
0
def test_preprocess_example():
    func = lambda *args: (args[0].split(), None)
    example = serving.preprocess_example(func, 1, {"text": "a b c"})
    assert example.index == 1
    assert example.source_tokens == [["a", "b", "c"]]
    assert example.metadata == [None]
    assert example.mode == "default"

    func = lambda *args: (args[0].split(), args[1].split(), len(args[0]))
    raw_example = {
        "text": "a b c",
        "target_prefix": "d e",
        "mode": "alternatives"
    }
    example = serving.preprocess_example(func, 2, raw_example)
    assert example.index == 2
    assert example.source_tokens == [["a", "b", "c"]]
    assert example.target_tokens == [["d", "e"]]
    assert example.metadata == [5]
    assert example.mode == "alternatives"

    func = lambda *args: ([args[0].split()[:2], args[0].split()[2:]], None,
                          [1, 2])
    example = serving.preprocess_example(func, 3, {"text": "a b c d"})
    assert example.index == 3
    assert example.source_tokens == [["a", "b"], ["c", "d"]]
    assert example.metadata == [1, 2]
    assert example.mode == "default"
Exemplo n.º 3
0
def test_preprocess_example_with_v2_options():
    config = {
        "source": "en",
        "target": "fr",
        "preprocess": [
            {
                "op": "_add_marker",
                "name": "politeness-op",
                "default_value": "neutral",
            },
            {
                "op": "_add_emotion",
                "name": "emotion-op",
                "default_mood": "neutral",
            },
        ],
        "inference_options": {
            "json_schema": {
                "type": "object",
                "properties": {
                    "politeness": {
                        "type": "string",
                        "default": "neutral",
                        "enum": ["formal", "informal", "neutral"],
                    }
                },
            },
            "options": [
                {
                    "option_path": "politeness",
                    "config_path": [
                        "preprocess/politeness-op/value",
                        "preprocess/emotion-op/mood",
                    ],
                },
            ],
        },
    }

    class Processor:
        def process_input(
            self, source, target=None, config=None, options=None, **kwargs
        ):
            assert config is None
            assert options == {
                "politeness-op": {"value": "informal"},
                "emotion-op": {"mood": "informal"},
            }
            return source.split(), None, None

    example = {"text": "a b c d", "options": {"politeness": "informal"}}
    serving.preprocess_example(Processor(), 0, example, config=config)
def test_preprocess_example_with_fuzzy():

    class Processor:
        def __init__(self, support_fuzzy):
            self._support_fuzzy = support_fuzzy
        def process_input(self, source, target=None, target_name=None, **kwargs):
            if self._support_fuzzy:
                assert target is not None
                assert target_name == "fuzzy"
                return source.split(), target.split(), None
            else:
                assert target is None
                assert target_name is None
                return source.split(), None, None

    example = {"text": "Hello world", "fuzzy": "Bonjour monde"}
    config = {"supported_features": {"NFA": True}}
    serving.preprocess_example(Processor(False), 0, example)
    serving.preprocess_example(Processor(True), 0, example, config=config)
Exemplo n.º 5
0
def test_preprocess_example_with_v1_options():
    config = {
        "source": "en",
        "target": "fr",
        "preprocess": {
            "politeness": {
                "default_value": "neutral",
            }
        },
        "inference_options": {
            "json_schema": {
                "type": "object",
                "properties": {
                    "politeness": {
                        "type": "string",
                        "default": "neutral",
                        "enum": ["formal", "informal", "neutral"]
                    }
                }
            },
            "options": [
                {
                    "option_path": "politeness",
                    "config_path": "preprocess/politeness/value"
                },
            ]
        }
    }

    class Processor:
        def process_input(self,
                          source,
                          target=None,
                          config=None,
                          options=None,
                          **kwargs):
            assert config["preprocess"]["politeness"]["value"] == "informal"
            assert options is None
            return source.split(), None, None

    example = {"text": "a b c d", "options": {"politeness": "informal"}}
    serving.preprocess_example(Processor(), 0, example, config=config)