def test_remove_low_scoring_entities_doesnt_remove_unscored_entities(): duckling_plugin = DucklingPlugin(locale="en_IN", dimensions=["time"], timezone="Asia/Kolkata", threshold=0.2) body = "12th december" entity_A = BaseEntity( range={ "from": 0, "to": len(body) }, body=body, type="basic", dim="default", values=[], score=0.0, ) entity_B = BaseEntity( range={ "from": 0, "to": len(body) }, body=body, type="basic", dim="default", values=[], score=0.5, ) assert duckling_plugin.remove_low_scoring_entities([entity_A, entity_B]) == [ entity_B, ]
def test_plugin_cases(payload) -> None: """ Test cases where the plugin should work. """ entities = payload.get("inputs", {}).get("entities", []) tracker = payload.get("inputs", {}).get("tracker", []) expected = payload.get("expected", {}) duckling_plugin = DucklingPlugin(dimensions=["date", "time"], timezone="Asia/Kolkata", dest="output.entities") for i, entity in enumerate(entities): current_turn_entities = duckling_plugin._reshape(entity, i) combine_date_time_plugin = CombineDateTimeOverSlots( trigger_intents=["_callback_"], dest="output.entities", ) workflow = Workflow(plugins=[combine_date_time_plugin]) workflow.output = Output(entities=current_turn_entities) _, output = workflow.run(Input(utterances=[""], slot_tracker=tracker)) entity_values = [entity["value"] for entity in output[const.ENTITIES]] if len(entity_values) != len(expected): pytest.fail("Expected {} entities but got {}".format( len(expected), len(entity_values))) for entity_value, expected_value in zip(entity_values, expected): try: expected = datetime.fromisoformat(expected_value) generated = datetime.fromisoformat(entity_value) assert generated == expected, f"Expected {expected} but got {generated}" except (ValueError, TypeError): assert entity_value == expected_value
def test_duckling_get_operator_happy_case(): duckling_plugin = DucklingPlugin( locale="en_IN", dimensions=["time"], timezone="Asia/Kolkata", threshold=0.2, datetime_filters="future", ) assert duckling_plugin.get_operator("lt") == operator.lt
def test_duckling_reftime(): duckling_plugin = DucklingPlugin( locale="en_IN", dimensions=["time"], timezone="Asia/Kolkata", threshold=0.2, datetime_filters="future", ) with pytest.raises(TypeError): duckling_plugin.validate("test", None)
def test_duckling_get_operator_exception(): duckling_plugin = DucklingPlugin( locale="en_IN", dimensions=["time"], timezone="Asia/Kolkata", threshold=0.2, datetime_filters="future", ) with pytest.raises(ValueError): duckling_plugin.get_operator("invalid")
def test_plugin_no_transform(): mock_entity_json = [{ "body": "today", "start": 0, "value": { "values": [{ "value": "2021-09-14T00:00:00.000+05:30", "grain": "day", "type": "value", }], "value": "2021-09-14T00:00:00.000+05:30", "grain": "day", "type": "value", }, "end": 5, "dim": "time", "latent": False, }] request_callback = request_builder(mock_entity_json, response_code=200) httpretty.register_uri(httpretty.POST, "http://0.0.0.0:8000/parse", body=request_callback) df = pd.DataFrame( [ { "data": ["today"], "reftime": "2021-09-14T00:00:00.000+05:30", }, { "data": ["no"], "reftime": "2021-09-14T00:00:00.000+05:30", }, ], columns=["data", "reftime"], ) duckling_plugin = DucklingPlugin( locale="en_IN", dimensions=["time"], timezone="Asia/Kolkata", dest="output.entities", threshold=0.2, timeout=0.01, use_transform=False, input_column="data", output_column="entities", ) df_ = duckling_plugin.transform(df) assert "entities" not in df_.columns
def make_entity_object(entity_items): duckling_plugin = DucklingPlugin( dest="output.entities", dimensions=["date", "time", "duration", "number", "people"], timezone="Asia/Kolkata", debug=False, locale="en_IN", ) return py_.flatten( [ duckling_plugin._reshape(entities, i) for i, entities in enumerate(entity_items) ] )
def test_max_workers_greater_than_zero() -> None: """Checks that "ValueError: max_workers must be greater than 0" is not raised when there are no transcriptions When we get an empty transcription from ASR in a production setup, FSM does not send the empty transcription to the SLU service. Whereas in a development setup, when one tries to run `slu test` with atleast one data point that does not have any transcriptions(`[]`) it will raise a `ValueError: max_workers must be greater than 0` exception. The corresponding fix has been done and this test ensures that the exception is not raised when there are no transcriptions even in development setup :return: None :rtype: None """ locale = "en_IN" duckling_plugin = DucklingPlugin( dest="output.entities", dimensions=["time"], timezone="Asia/Kolkata", url="https://duckling/parse", ) workflow = Workflow([duckling_plugin]) alternatives = [] # When ASR returns empty transcriptions. try: workflow.run(Input(utterances=alternatives, locale=locale)) except ValueError as exc: pytest.fail(f"{exc}")
def test_duckling_timeout() -> None: """ [summary] :return: [description] :rtype: [type] """ locale = "en_IN" wait_time = 0.1 def raise_timeout(_, __, headers): time.sleep(wait_time) return 200, headers, "received" httpretty.register_uri(httpretty.POST, "http://0.0.0.0:8000/parse", body=raise_timeout) duckling_plugin = DucklingPlugin( locale=locale, dimensions=["time"], timezone="Asia/Kolkata", threshold=0.2, timeout=0.01, dest="output.entities", ) workflow = Workflow([duckling_plugin]) _, output = workflow.run(Input(utterances="test")) assert output["entities"] == []
def test_entity_type(payload) -> None: """ Evaluate a set of cases from a file. """ body = payload["input"] mock_entity_json = payload["mock_entity_json"] expected = payload.get("expected") exception = payload.get("exception") duckling_plugin = DucklingPlugin( dest="output.entities", dimensions=["people", "time", "date", "duration"], locale="en_IN", timezone="Asia/Kolkata", ) request_callback = request_builder(mock_entity_json) httpretty.register_uri(httpretty.POST, "http://0.0.0.0:8000/parse", body=request_callback) workflow = Workflow([duckling_plugin]) if expected: _, output = workflow.run(Input(utterances=body)) entities = output["entities"] for i, entity in enumerate(entities): assert entity["entity_type"] == expected[i]["entity_type"] elif exception: with pytest.raises(EXCEPTIONS[exception]): workflow.run(Input(utterances=body))
def test_remove_low_scoring_entities_works_only_if_threshold_is_not_none(): duckling_plugin = DucklingPlugin( locale="en_IN", dimensions=["time"], timezone="Asia/Kolkata", ) body = "12th december" entity = BaseEntity( range={ "from": 0, "to": len(body) }, body=body, type="basic", dim="default", values=[], score=0.2, ) assert duckling_plugin.remove_low_scoring_entities([entity]) == [entity]
def test_plugin_working_cases(payload) -> None: """ An end-to-end example showing how to use `DucklingPlugin` with a `Workflow`. """ body = payload["input"] mock_entity_json = payload["mock_entity_json"] expected_types = payload.get("expected") exception = payload.get("exception") duckling_args = payload.get("duckling") response_code = payload.get("response_code", 200) locale = payload.get("locale") reference_time = payload.get("reference_time") use_latent = payload.get("use_latent") duckling_plugin = DucklingPlugin(dest="output.entities", **duckling_args) request_callback = request_builder(mock_entity_json, response_code=response_code) httpretty.register_uri(httpretty.POST, "http://0.0.0.0:8000/parse", body=request_callback) workflow = Workflow([duckling_plugin]) if isinstance(reference_time, str): reference_time = make_unix_ts("Asia/Kolkata")(reference_time) if expected_types is not None: input_ = Input( utterances=body, locale=locale, reference_time=reference_time, latent_entities=use_latent, ) _, output = workflow.run(input_) if not output["entities"]: assert output["entities"] == [] for i, entity in enumerate(output["entities"]): expected_entity_type = expected_types[i]["entity_type"] assert entity["entity_type"] == expected_entity_type else: with pytest.raises(EXCEPTIONS[exception]): input_ = Input( utterances=body, locale=locale, reference_time=reference_time, latent_entities=use_latent, ) workflow.run(input_)
def test_plugin_with_custom_entity_map() -> None: """ Here we are checking if the plugin has access to workflow. Since we haven't provided `access`, `mutate` to `DucklingPlugin` we will receive a `TypeError`. """ duckling_plugin = DucklingPlugin( locale="en_IN", timezone="Asia/Kolkata", dimensions=["time"], entity_map={"number": { "value": BaseEntity }}, ) assert duckling_plugin.dimension_entity_map["number"][ "value"] == BaseEntity
def test_duckling_connection_error() -> None: """ [summary] :return: [description] :rtype: [type] """ locale = "en_IN" duckling_plugin = DucklingPlugin( locale=locale, dimensions=["time"], timezone="Asia/Kolkata", dest="output.entities", threshold=0.2, timeout=0.01, url="https://duckling/parse", ) workflow = Workflow([duckling_plugin]) _, output = workflow.run(Input(utterances="test", locale=locale)) assert output["entities"] == []
def test_plugin_transform_existing_entity(): mock_entity_json = [{ "body": "today", "start": 0, "value": { "values": [{ "value": "2021-09-14T00:00:00.000+05:30", "grain": "day", "type": "value", }], "value": "2021-09-14T00:00:00.000+05:30", "grain": "day", "type": "value", }, "end": 5, "dim": "time", "latent": False, }] request_callback = request_builder(mock_entity_json, response_code=200) httpretty.register_uri(httpretty.POST, "http://0.0.0.0:8000/parse", body=request_callback) df = pd.DataFrame( [ { "data": '["today"]', "reftime": "2021-09-14T00:00:00.000+05:30", }, { "data": '["no"]', "reftime": "2021-09-14T00:00:00.000+05:30", "entities": [ KeywordEntity( range={ "start": 0, "end": 0 }, value="apple", entity_type="fruits", body="apple", ) ], }, { "data": '["no"]', "reftime": pd.NA, "entities": [ KeywordEntity( range={ "start": 0, "end": 0 }, value="apple", entity_type="fruits", body="apple", ) ], }, ], columns=["data", "reftime", "entities"], ) duckling_plugin = DucklingPlugin( locale="en_IN", dimensions=["time"], timezone="Asia/Kolkata", dest="output.entities", threshold=0.2, timeout=0.01, use_transform=True, input_column="data", output_column="entities", ) df_ = duckling_plugin.transform(df) today = TimeEntity( entity_type="date", body="today", parsers=["DucklingPlugin"], range={ "start": 0, "end": 5 }, score=1.0, alternative_index=0, latent=False, value="2021-09-14T00:00:00.000+05:30", origin="value", grain="day", ) parsed_entity = df_["entities"].iloc[0][0] assert parsed_entity.value == today.value assert parsed_entity.type == today.type assert df_["entities"].iloc[1][0].value == "apple"