Пример #1
0
def test_cached_component_replace_schema_node():
    schema_node = SchemaNode(
        needs={
            "i1": "first_input",
            "i2": "second_input"
        },
        uses=FingerprintComponent,
        fn="add",
        constructor_name="load",
        config={"a": 1},
        eager=False,
        is_input=False,
        resource=Resource("hello"),
    )

    PrecomputedValueProvider.replace_schema_node(schema_node, 2)

    assert schema_node == SchemaNode(
        needs={
            "i1": "first_input",
            "i2": "second_input"
        },
        uses=PrecomputedValueProvider,
        fn="get_value",
        constructor_name="create",
        config={"output": 2},
        eager=False,
        is_input=False,
        resource=Resource("hello"),
    )
Пример #2
0
    def replace_schema_node(cls, node: SchemaNode,
                            cache: TrainingCache) -> None:
        """Updates a `SchemaNode` to use a `FingerprintComponent`.

        This is for when we want to do a fingerprint run. During the fingerprint run we
        replace all non-input nodes with `FingerprintComponent`s so we can determine
        whether they are able to be pruned or cached before the next graph run without
        running the actual components.


        Args:
            node: The node to update.
            cache: The cache is needed to determine of there is cache hit for the
                fingerprint key.
        """
        graph_component_class = node.uses
        node.uses = cls
        # We update the node to be "eager" so that `FingerprintComponent.run` sees
        # ALL the inputs to the node. If it was not eager, we would miss any args used
        # by the constructor.
        node.eager = True
        node.constructor_name = cls.create.__name__
        node.fn = cls.run.__name__
        node.config = {
            "config_of_replaced_component": node.config,
            "cache": cache,
            "graph_component_class": graph_component_class,
        }
Пример #3
0
def test_target_override(eager: bool, default_model_storage: ModelStorage):
    graph_schema = GraphSchema(
        {
            "add": SchemaNode(
                needs={"i1": "first_input", "i2": "second_input"},
                uses=AddInputs,
                fn="add",
                constructor_name="create",
                config={},
                eager=eager,
            ),
            "subtract_2": SchemaNode(
                needs={"i": "add"},
                uses=SubtractByX,
                fn="subtract_x",
                constructor_name="create",
                config={"x": 3},
                eager=eager,
                is_target=True,
            ),
        }
    )

    execution_context = ExecutionContext(graph_schema=graph_schema, model_id="1")

    runner = DaskGraphRunner(
        graph_schema=graph_schema,
        model_storage=default_model_storage,
        execution_context=execution_context,
    )
    results = runner.run(inputs={"first_input": 3, "second_input": 4}, targets=["add"])
    assert results == {"add": 7}
Пример #4
0
    def _add_end_to_end_features_for_training(
            self, preprocessors: List[Text],
            train_nodes: Dict[Text, SchemaNode]) -> None:
        train_nodes["story_to_nlu_training_data_converter"] = SchemaNode(
            needs={
                "story_graph": "story_graph_provider",
                "domain": "domain_for_core_training_provider",
            },
            uses=CoreFeaturizationInputConverter,
            constructor_name="create",
            fn="convert_for_training",
            config={},
            is_input=True,
        )

        last_node_name = "story_to_nlu_training_data_converter"
        for preprocessor in preprocessors:
            node = copy.deepcopy(train_nodes[preprocessor])
            node.needs["training_data"] = last_node_name

            node_name = f"e2e_{preprocessor}"
            train_nodes[node_name] = node
            last_node_name = node_name

        node_with_e2e_features = "end_to_end_features_provider"
        train_nodes[node_with_e2e_features] = SchemaNode(
            needs={"messages": last_node_name},
            uses=CoreFeaturizationCollector,
            constructor_name="create",
            fn="collect",
            config={},
        )
Пример #5
0
def test_serialize_graph_schema(tmp_path: Path):
    graph_schema = GraphSchema(
        {
            "train": SchemaNode(
                needs={},
                uses=PersistableTestComponent,
                fn="train",
                constructor_name="create",
                config={"some_config": 123455, "some more config": [{"nested": "hi"}]},
            ),
            "load": SchemaNode(
                needs={"resource": "train"},
                uses=PersistableTestComponent,
                fn="run_inference",
                constructor_name="load",
                config={},
                is_target=True,
                resource=Resource("test resource"),
            ),
        }
    )

    serialized = graph_schema.as_dict()

    # Dump it to make sure it's actually serializable
    file_path = tmp_path / "my_graph.yml"
    rasa.shared.utils.io.write_yaml(serialized, file_path)

    serialized_graph_schema_from_file = rasa.shared.utils.io.read_yaml_file(file_path)
    graph_schema_from_file = GraphSchema.from_dict(serialized_graph_schema_from_file)

    assert graph_schema_from_file == graph_schema
Пример #6
0
    def _add_end_to_end_features_for_inference(
            self, predict_nodes: Dict[Text, SchemaNode],
            preprocessors: List[Text]) -> Text:
        predict_nodes["tracker_to_message_converter"] = SchemaNode(
            **DEFAULT_PREDICT_KWARGS,
            needs={"tracker": PLACEHOLDER_TRACKER},
            uses=CoreFeaturizationInputConverter,
            fn="convert_for_inference",
            config={},
        )

        last_node_name = "tracker_to_message_converter"
        for preprocessor in preprocessors:
            node = dataclasses.replace(predict_nodes[preprocessor],
                                       needs={"messages": last_node_name})

            node_name = f"e2e_{preprocessor}"
            predict_nodes[node_name] = node
            last_node_name = node_name

        node_with_e2e_features = "end_to_end_features_provider"
        predict_nodes[node_with_e2e_features] = SchemaNode(
            **DEFAULT_PREDICT_KWARGS,
            needs={"messages": last_node_name},
            uses=CoreFeaturizationCollector,
            fn="collect",
            config={},
        )
        return node_with_e2e_features
Пример #7
0
def test_unused_node(default_model_storage: ModelStorage):
    graph_schema = GraphSchema({
        "provide":
        SchemaNode(
            needs={},
            uses=ProvideX,
            fn="provide",
            constructor_name="create",
            config={},
            is_target=True,
        ),
        "provide_2":
        SchemaNode(  # This will not output
            needs={},
            uses=ProvideX,
            fn="provide",
            constructor_name="create",
            config={},
        ),
    })
    runner = DaskGraphRunner(
        graph_schema=graph_schema,
        model_storage=default_model_storage,
        execution_context=ExecutionContext(graph_schema=graph_schema,
                                           model_id="1"),
    )
    results = runner.run()
    assert results == {"provide": 1}
Пример #8
0
def test_loop(default_model_storage: ModelStorage):
    graph_schema = GraphSchema({
        "subtract_a":
        SchemaNode(
            needs={"i": "subtract_b"},
            uses=SubtractByX,
            fn="subtract_x",
            constructor_name="create",
            config={},
            is_target=False,
        ),
        "subtract_b":
        SchemaNode(
            needs={"i": "subtract_a"},
            uses=SubtractByX,
            fn="subtract_x",
            constructor_name="create",
            config={},
            is_target=True,
        ),
    })
    runner = DaskGraphRunner(
        graph_schema=graph_schema,
        model_storage=default_model_storage,
        execution_context=ExecutionContext(graph_schema=graph_schema,
                                           model_id="1"),
    )
    with pytest.raises(GraphRunError):
        runner.run()
Пример #9
0
def test_unused_node(default_model_storage: ModelStorage):
    graph_schema = GraphSchema({
        "provide":
        SchemaNode(
            needs={},
            uses=ProvideX,
            fn="provide",
            constructor_name="create",
            config={},
            is_target=True,
        ),
        # This node will not fail as it will be pruned because it is not a target
        # or a target's ancestor.
        "assert_false":
        SchemaNode(
            needs={"i": "input"},
            uses=AssertComponent,
            fn="run_assert",
            constructor_name="create",
            config={"value_to_assert": "some_value"},
        ),
    })
    runner = DaskGraphRunner(
        graph_schema=graph_schema,
        model_storage=default_model_storage,
        execution_context=ExecutionContext(graph_schema=graph_schema,
                                           model_id="1"),
    )
    results = runner.run(inputs={"input": "some_other_value"})
    assert results == {"provide": 1}
Пример #10
0
def create_test_schema(
    uses: Type,  # The unspecified type is on purpose to enable testing of invalid cases
    constructor_name: Text = "create",
    run_fn: Text = "run",
    needs: Optional[Dict[Text, Text]] = None,
    eager: bool = True,
    parent: Optional[Type[GraphComponent]] = None,
) -> GraphSchema:
    parent_node = {}
    if parent:
        parent_node = {
            "parent": SchemaNode(
                needs={}, uses=parent, constructor_name="create", fn="run", config={}
            )
        }
    # noinspection PyTypeChecker
    return GraphSchema(
        {
            "my_node": SchemaNode(
                needs=needs or {},
                uses=uses,
                eager=eager,
                constructor_name=constructor_name,
                fn=run_fn,
                config={},
            ),
            **parent_node,
        }
    )
Пример #11
0
def test_resources_fingerprints_remain_after_being_cached(
    temp_cache: LocalTrainingCache, train_with_schema: Callable
):
    train_schema = GraphSchema(
        {
            "train": SchemaNode(
                needs={},
                uses=PersistableTestComponent,
                fn="train",
                constructor_name="create",
                config={"test_value": "4"},
                is_target=True,
            ),
            "process": SchemaNode(
                needs={"resource": "train"},
                uses=PersistableTestComponent,
                fn="run_inference",
                constructor_name="load",
                config={},
                is_target=True,
            ),
        }
    )

    # Train and cache.
    train_with_schema(train_schema, temp_cache)

    # We can determine if a cached `Resource` has a static fingerprint by comparing two
    # subsequent cache entries of a child node.
    import sqlalchemy as sa

    with temp_cache._sessionmaker.begin() as session:
        # This will get the cache entry for the "process" node.
        query_for_most_recently_used_entry = sa.select(temp_cache.CacheEntry).order_by(
            temp_cache.CacheEntry.last_used.desc()
        )
        entry = session.execute(query_for_most_recently_used_entry).scalars().first()
        # The fingerprint key will incorporate the fingerprint of the `Resource`
        # provided by the "train" node. We save this key to compare after the next run.
        fingerprint_key = entry.fingerprint_key
        # Deleting the entry will force it to be recreated next train.
        delete_query = sa.delete(temp_cache.CacheEntry).where(
            temp_cache.CacheEntry.fingerprint_key == fingerprint_key
        )
        session.execute(delete_query)

    # In this second train, the Resource output of "train" will be retrieved from the
    # cache.
    train_with_schema(train_schema, temp_cache)

    with temp_cache._sessionmaker.begin() as session:
        # This will get the new cache entry for the "process" node.
        query_for_most_recently_used_entry = sa.select(temp_cache.CacheEntry).order_by(
            temp_cache.CacheEntry.last_used.desc()
        )
        entry = session.execute(query_for_most_recently_used_entry).scalars().first()
        # Assert the fingerprint key of the new entry is the same. This confirms that
        # the Resource from the cache has the same fingerprint.
        assert entry.fingerprint_key == fingerprint_key
Пример #12
0
def test_graph_trainer_returns_prediction_runner(
    default_model_storage: ModelStorage,
    temp_cache: TrainingCache,
    tmp_path: Path,
    domain_path: Path,
):
    graph_trainer = GraphTrainer(
        model_storage=default_model_storage,
        cache=temp_cache,
        graph_runner_class=DaskGraphRunner,
    )

    test_value = "test_value"

    train_schema = GraphSchema({
        "train":
        SchemaNode(
            needs={},
            uses=PersistableTestComponent,
            fn="train",
            constructor_name="create",
            config={
                "test_value": test_value,
            },
            is_target=True,
        ),
        "load":
        SchemaNode(
            needs={"resource": "train"},
            uses=PersistableTestComponent,
            fn="run_inference",
            constructor_name="load",
            config={},
        ),
    })
    predict_schema = GraphSchema({
        "load":
        SchemaNode(
            needs={},
            uses=PersistableTestComponent,
            fn="run_inference",
            constructor_name="load",
            config={},
            is_target=True,
            resource=Resource("train"),
        ),
    })

    output_filename = tmp_path / "model.tar.gz"
    predict_graph_runner = graph_trainer.train(
        train_schema=train_schema,
        predict_schema=predict_schema,
        domain_path=domain_path,
        output_filename=output_filename,
    )
    assert isinstance(predict_graph_runner, DaskGraphRunner)
    assert output_filename.is_file()
    assert predict_graph_runner.run() == {"load": test_value}
Пример #13
0
    def _add_nlu_predict_nodes(
        self,
        last_run_node: Text,
        predict_config: Dict[Text, Any],
        predict_nodes: Dict[Text, SchemaNode],
        train_nodes: Dict[Text, SchemaNode],
    ) -> Text:
        for idx, config in enumerate(predict_config["pipeline"]):
            component_name = config.pop("name")
            component = self._from_registry(component_name)
            component_name = f"{component_name}{idx}"
            if self.ComponentType.MODEL_LOADER in component.types:
                predict_nodes[f"provide_{component_name}"] = SchemaNode(
                    **DEFAULT_PREDICT_KWARGS,
                    needs={},
                    uses=component.clazz,
                    fn="provide",
                    config=config,
                )

            if component.types.intersection({
                    self.ComponentType.MESSAGE_TOKENIZER,
                    self.ComponentType.MESSAGE_FEATURIZER,
            }):
                last_run_node = self._add_nlu_predict_node_from_train(
                    predict_nodes,
                    component_name,
                    train_nodes,
                    last_run_node,
                    config,
                    from_resource=component.is_trainable,
                )
            elif component.types.intersection({
                    self.ComponentType.INTENT_CLASSIFIER,
                    self.ComponentType.ENTITY_EXTRACTOR,
            }):
                if component.is_trainable:
                    last_run_node = self._add_nlu_predict_node_from_train(
                        predict_nodes,
                        component_name,
                        train_nodes,
                        last_run_node,
                        config,
                        from_resource=component.is_trainable,
                    )
                else:
                    new_node = SchemaNode(
                        needs={"messages": last_run_node},
                        uses=component.clazz,
                        constructor_name="create",
                        fn="process",
                        config=config,
                    )

                    last_run_node = self._add_nlu_predict_node(
                        predict_nodes, new_node, component_name, last_run_node)

        return last_run_node
Пример #14
0
def test_cycle(is_train_graph: bool):
    class MyTestComponent(TestComponentWithoutRun):
        def run(self, training_data: TrainingData) -> TrainingData:
            pass

    train_schema = GraphSchema({})
    predict_schema = DEFAULT_PREDICT_SCHEMA

    schema = GraphSchema({
        "A":
        SchemaNode(
            needs={"training_data": "B"},
            uses=MyTestComponent,
            eager=True,
            constructor_name="create",
            fn="run",
            is_target=True,
            config={},
        ),
        "B":
        SchemaNode(
            needs={"training_data": "C"},
            uses=MyTestComponent,
            eager=True,
            constructor_name="create",
            fn="run",
            config={},
        ),
        "C":
        SchemaNode(
            needs={"training_data": "A"},
            uses=MyTestComponent,
            eager=True,
            constructor_name="create",
            fn="run",
            config={},
        ),
    })

    if is_train_graph:
        train_schema = schema
    else:
        predict_schema = schema

    with pytest.raises(GraphSchemaValidationException, match="Cycles"):
        validation.validate(
            GraphModelConfiguration(
                train_schema=train_schema,
                predict_schema=predict_schema,
                training_type=TrainingType.BOTH,
                language=None,
                core_target=None,
                nlu_target="nlu_target",
            ))
Пример #15
0
def create_test_schema(
    uses:
    Type,  # The unspecified type is on purpose to enable testing of invalid cases
    constructor_name: Text = "create",
    run_fn: Text = "run",
    needs: Optional[Dict[Text, Text]] = None,
    eager: bool = True,
    parent: Optional[Type[GraphComponent]] = None,
    language: Optional[Text] = None,
    is_train_graph: bool = True,
) -> GraphModelConfiguration:

    parent_node = {}
    if parent:
        parent_node = {
            "parent":
            SchemaNode(needs={},
                       uses=parent,
                       constructor_name="create",
                       fn="run",
                       config={})
        }

    train_schema = GraphSchema({})
    predict_schema = DEFAULT_PREDICT_SCHEMA
    # noinspection PyTypeChecker
    schema = GraphSchema(
        {
            "my_node":
            SchemaNode(
                needs=needs or {},
                uses=uses,
                eager=eager,
                constructor_name=constructor_name,
                fn=run_fn,
                config={},
            ),
            **DEFAULT_PREDICT_SCHEMA.nodes,
            **parent_node,
        }, )

    if is_train_graph:
        train_schema = schema
    else:
        predict_schema = schema

    return GraphModelConfiguration(
        train_schema=train_schema,
        predict_schema=predict_schema,
        training_type=TrainingType.BOTH,
        core_target=None,
        nlu_target="nlu_target",
        language=language,
    )
Пример #16
0
def test_graph_trainer_always_reads_input(
    temp_cache: TrainingCache,
    tmp_path: Path,
    train_with_schema: Callable,
    spy_on_all_components: Callable,
):

    input_file = tmp_path / "input_file.txt"
    input_file.write_text("3")

    train_schema = GraphSchema(
        {
            "read_file": SchemaNode(
                needs={},
                uses=FileReader,
                fn="read",
                constructor_name="create",
                config={"file_path": str(input_file)},
                is_input=True,
            ),
            "subtract": SchemaNode(
                needs={"i": "read_file"},
                uses=SubtractByX,
                fn="subtract_x",
                constructor_name="create",
                config={"x": 1},
            ),
            "assert_node": SchemaNode(
                needs={"i": "subtract"},
                uses=AssertComponent,
                fn="run_assert",
                constructor_name="create",
                config={"value_to_assert": 2},
                is_target=True,
            ),
        }
    )

    # The first train should call all the components and cache their outputs.
    mocks = spy_on_all_components(train_schema)
    train_with_schema(train_schema, temp_cache)
    assert node_call_counts(mocks) == {"read_file": 1, "subtract": 1, "assert_node": 1}

    # Nothing has changed so this time so no components will run
    # (just input nodes during fingerprint run).
    mocks = spy_on_all_components(train_schema)
    train_with_schema(train_schema, temp_cache)
    assert node_call_counts(mocks) == {"read_file": 1, "subtract": 0, "assert_node": 0}

    # When we update the input file, all the nodes will run again and the assert_node
    # will fail.
    input_file.write_text("5")
    with pytest.raises(GraphComponentException):
        train_with_schema(train_schema, temp_cache)
Пример #17
0
def test_graph_trainer_train_logging_with_cached_components(
    tmp_path: Path,
    temp_cache: TrainingCache,
    train_with_schema: Callable,
    caplog: LogCaptureFixture,
):
    input_file = tmp_path / "input_file.txt"
    input_file.write_text("3")

    train_schema = GraphSchema({
        "input":
        SchemaNode(
            needs={},
            uses=ProvideX,
            fn="provide",
            constructor_name="create",
            config={},
        ),
        "subtract":
        SchemaNode(
            needs={"i": "input"},
            uses=SubtractByX,
            fn="subtract_x",
            constructor_name="create",
            config={"x": 1},
            is_target=True,
            is_input=False,
        ),
        "cache_able_node":
        SchemaNode(
            needs={"suffix": "input"},
            uses=CacheableComponent,
            fn="run",
            constructor_name="create",
            config={},
            is_target=True,
            is_input=False,
        ),
    })

    # Train to cache
    train_with_schema(train_schema, temp_cache)

    # Train a second time
    with caplog.at_level(logging.INFO, logger="rasa.engine.training.hooks"):
        train_with_schema(train_schema, temp_cache)

        assert set(caplog.messages) == {
            "Starting to train component 'SubtractByX'.",
            "Finished training component 'SubtractByX'.",
            "Restored component 'CacheableComponent' from cache.",
        }
Пример #18
0
    def _create_predict_nodes(
        self,
        config: Dict[Text, SchemaNode],
        preprocessors: List[Text],
        train_nodes: Dict[Text, SchemaNode],
    ) -> Dict[Text, SchemaNode]:

        predict_config = copy.deepcopy(config)
        predict_nodes = {}

        from rasa.nlu.classifiers.regex_message_handler import RegexMessageHandler

        predict_nodes["nlu_message_converter"] = SchemaNode(
            **DEFAULT_PREDICT_KWARGS,
            needs={"messages": PLACEHOLDER_MESSAGE},
            uses=NLUMessageConverter,
            fn="convert_user_message",
            config={},
        )

        last_run_nlu_node = "nlu_message_converter"

        if self._use_nlu:
            last_run_nlu_node = self._add_nlu_predict_nodes(
                last_run_nlu_node, predict_config, predict_nodes, train_nodes)

        domain_needs = {}
        if self._use_core:
            domain_needs["domain"] = "domain_provider"

        regex_handler_node_name = f"run_{RegexMessageHandler.__name__}"
        predict_nodes[regex_handler_node_name] = SchemaNode(
            **DEFAULT_PREDICT_KWARGS,
            needs={
                "messages": last_run_nlu_node,
                **domain_needs
            },
            uses=RegexMessageHandler,
            fn="process",
            config={},
        )

        if self._use_core:
            self._add_core_predict_nodes(
                predict_config,
                predict_nodes,
                train_nodes,
                preprocessors,
            )

        return predict_nodes
Пример #19
0
def test_validate_after_adding_adding_default_parameter(
    get_validation_method: Callable[..., ValidationMethodType],
    nlu: bool,
    core: bool,
):
    # create a schema and rely on rasa to fill in defaults later
    schema1 = _get_example_schema()
    schema1.nodes["nlu-node"] = SchemaNode(needs={},
                                           uses=WhitespaceTokenizer,
                                           constructor_name="",
                                           fn="",
                                           config={})
    schema1.nodes["core-node"] = SchemaNode(needs={},
                                            uses=RulePolicy,
                                            constructor_name="",
                                            fn="",
                                            config={})

    # training
    validate = get_validation_method(finetuning=False,
                                     load=False,
                                     nlu=nlu,
                                     core=core,
                                     graph_schema=schema1)
    validate(importer=EmptyDataImporter())

    # same schema -- we just explicitly pass default values
    schema2 = copy.deepcopy(schema1)
    schema2.nodes["nlu-node"] = SchemaNode(
        needs={},
        uses=WhitespaceTokenizer,
        constructor_name="",
        fn="",
        config=WhitespaceTokenizer.get_default_config(),
    )
    schema2.nodes["core-node"] = SchemaNode(
        needs={},
        uses=RulePolicy,
        constructor_name="",
        fn="",
        config=RulePolicy.get_default_config(),
    )

    # finetuning *does not raise*
    loaded_validate = get_validation_method(finetuning=True,
                                            load=True,
                                            nlu=nlu,
                                            core=core,
                                            graph_schema=schema2)
    loaded_validate(importer=EmptyDataImporter())
Пример #20
0
def test_core_warn_if_data_but_no_policy(monkeypatch: MonkeyPatch,
                                         policy_type: Optional[Type[Policy]]):

    importer = TrainingDataImporter.load_from_dict(
        domain_path="data/test_e2ebot/domain.yml",
        training_data_paths=[
            "data/test_e2ebot/data/nlu.yml",
            "data/test_e2ebot/data/stories.yml",
        ],
    )

    nodes = {
        "tokenizer": SchemaNode({}, WhitespaceTokenizer, "", "", {}),
        "nlu-component": SchemaNode({}, DIETClassifier, "", "", {}),
    }
    if policy_type is not None:
        nodes["some-policy"] = SchemaNode({}, policy_type, "", "", {})
    graph_schema = GraphSchema(nodes)

    validator = DefaultV1RecipeValidator(graph_schema)
    monkeypatch.setattr(
        validator,
        "_raise_if_a_rule_policy_is_incompatible_with_domain",
        lambda *args, **kwargs: None,
    )
    monkeypatch.setattr(validator, "_warn_if_no_rule_policy_is_contained",
                        lambda: None)
    monkeypatch.setattr(
        validator,
        "_warn_if_rule_based_data_is_unused_or_missing",
        lambda *args, **kwargs: None,
    )

    if policy_type is None:
        with pytest.warns(
                UserWarning,
                match="Found data for training policies but no policy"
        ) as records:
            validator.validate(importer)
        assert len(records) == 1
    else:
        with pytest.warns(
                UserWarning,
                match="Slot auto-fill has been removed in 3.0") as records:
            validator.validate(importer)
        assert all([
            warn.message.args[0].startswith("Slot auto-fill has been removed")
            for warn in records.list
        ])
Пример #21
0
def test_validation_with_core_target_used_by_other_node():
    class CoreTargetConsumer(TestComponentWithoutRun):
        def run(self,
                core_target_output: PolicyPrediction) -> PolicyPrediction:
            pass

    graph_config = GraphSchema(
        {
            "A":
            SchemaNode(
                needs={},
                uses=TestNLUTarget,
                eager=True,
                constructor_name="create",
                fn="run",
                config={},
            ),
            "B":
            SchemaNode(
                needs={},
                uses=TestCoreTarget,
                eager=True,
                constructor_name="create",
                fn="run",
                config={},
            ),
            "C":
            SchemaNode(
                needs={"core_target_output": "B"},
                uses=CoreTargetConsumer,
                eager=True,
                constructor_name="create",
                fn="run",
                config={},
            ),
        }, )

    with pytest.raises(GraphSchemaValidationException,
                       match="uses the Core target 'B' as input"):
        validation.validate(
            GraphModelConfiguration(
                train_schema=GraphSchema({}),
                predict_schema=graph_config,
                training_type=TrainingType.BOTH,
                language=None,
                core_target="B",
                nlu_target="A",
            ))
Пример #22
0
def test_graph_trainer_train_logging(
    tmp_path: Path,
    temp_cache: TrainingCache,
    train_with_schema: Callable,
    caplog: LogCaptureFixture,
):

    input_file = tmp_path / "input_file.txt"
    input_file.write_text("3")

    train_schema = GraphSchema({
        "input":
        SchemaNode(
            needs={},
            uses=ProvideX,
            fn="provide",
            constructor_name="create",
            config={},
        ),
        "subtract 2":
        SchemaNode(
            needs={},
            uses=ProvideX,
            fn="provide",
            constructor_name="create",
            config={},
            is_target=True,
            is_input=True,
        ),
        "subtract":
        SchemaNode(
            needs={"i": "input"},
            uses=SubtractByX,
            fn="subtract_x",
            constructor_name="create",
            config={"x": 1},
            is_target=True,
            is_input=False,
        ),
    })

    with caplog.at_level(logging.INFO, logger="rasa.engine.training.hooks"):
        train_with_schema(train_schema, temp_cache)

    assert caplog.messages == [
        "Starting to train component 'SubtractByX'.",
        "Finished training component 'SubtractByX'.",
    ]
Пример #23
0
def test_validation_with_core_target_wrong_type():
    graph_config = GraphSchema(
        {
            "A":
            SchemaNode(
                needs={},
                uses=TestNLUTarget,
                eager=True,
                constructor_name="create",
                fn="run",
                config={},
            ),
        }, )

    with pytest.raises(
            GraphSchemaValidationException,
            match="Core model's .* invalid return type",
    ):
        validation.validate(
            GraphModelConfiguration(
                train_schema=GraphSchema({}),
                predict_schema=graph_config,
                training_type=TrainingType.BOTH,
                language=None,
                core_target="A",
                nlu_target="A",
            ))
Пример #24
0
def test_core_raise_if_a_rule_policy_is_incompatible_with_domain(
    monkeypatch: MonkeyPatch, ):

    domain = Domain.empty()

    num_instances = 2
    nodes = {}
    configs_for_rule_policies = []
    for feature_type in POLICY_CLASSSES:
        for idx in range(num_instances):
            unique_name = f"{feature_type.__name__}-{idx}"
            unique_config = {unique_name: None}
            nodes[unique_name] = SchemaNode({}, feature_type, "", "",
                                            unique_config)
        if feature_type == RulePolicy:
            configs_for_rule_policies.append(unique_config)

    mock = Mock()
    monkeypatch.setattr(RulePolicy, "raise_if_incompatible_with_domain", mock)

    validator = DefaultV1RecipeValidator(graph_schema=GraphSchema(nodes))
    monkeypatch.setattr(
        validator,
        "_warn_if_rule_based_data_is_unused_or_missing",
        lambda *args, **kwargs: None,
    )
    importer = DummyImporter()
    validator.validate(importer)

    # Note: this works because we validate nodes in insertion order
    mock.all_args_list == [{
        "config": config,
        "domain": domain
    } for config in configs_for_rule_policies]
Пример #25
0
def _test_validation_warnings_with_default_configs(
    training_data: TrainingData,
    component_types: List[Type],
    warnings: Optional[List[Text]] = None,
):
    dummy_importer = DummyImporter(training_data=training_data)
    graph_schema = GraphSchema({
        f"{idx}": SchemaNode(
            needs={},
            uses=component_type,
            constructor_name="",
            fn="",
            config=component_type.get_default_config(),
        )
        for idx, component_type in enumerate(component_types)
    })
    validator = DefaultV1RecipeValidator(graph_schema)
    if not warnings:
        with pytest.warns(None) as records:
            validator.validate(dummy_importer)
            assert len(records) == 0, [
                warning.message for warning in records.list
            ]
    else:
        with pytest.warns(None) as records:
            validator.validate(dummy_importer)
        assert len(records) == len(warnings), ", ".join(warning.message.args[0]
                                                        for warning in records)
        assert [
            re.match(warning.message.args[0], expected_warning)
            for warning, expected_warning in zip(records, warnings)
        ]
Пример #26
0
def test_validation_with_missing_nlu_target():
    graph_config = GraphSchema(
        {
            "A": SchemaNode(
                needs={},
                uses=TestNLUTarget,
                eager=True,
                constructor_name="create",
                fn="run",
                config={},
            )
        }
    )

    with pytest.raises(
        GraphSchemaValidationException, match="no target for the 'nlu_target'"
    ):
        validation.validate(
            GraphModelConfiguration(
                train_schema=GraphSchema({}),
                predict_schema=graph_config,
                training_type=TrainingType.BOTH,
                language=None,
                core_target=None,
                nlu_target=None,
            )
        )
Пример #27
0
def _get_example_schema(num_epochs: int = 5,
                        other_parameter: int = 10) -> GraphSchema:
    example_configs = [
        {
            "epochs": num_epochs,
            "other-parameter": other_parameter,
            "some-parameter": "bla",
        },
        {
            "epochs": num_epochs,
            "yet-other-parameter": 344
        },
        {
            "no-epochs-defined-here": None
        },
    ]
    return GraphSchema(
        nodes={
            f"node-{idx}": SchemaNode(needs={},
                                      uses=GraphComponent,
                                      constructor_name="",
                                      fn="",
                                      config=config)
            for idx, config in enumerate(example_configs)
        })
Пример #28
0
    def _add_nlu_process_node(
        self,
        train_nodes: Dict[Text, SchemaNode],
        component_class: Type[GraphComponent],
        component_name: Text,
        last_run_node: Text,
        component_config: Dict[Text, Any],
        from_resource: Optional[Text] = None,
    ) -> Text:
        resource_needs = {}
        if from_resource:
            resource_needs = {"resource": from_resource}

        model_provider_needs = self._get_model_provider_needs(
            train_nodes, component_class)

        node_name = f"run_{component_name}"
        train_nodes[node_name] = SchemaNode(
            needs={
                "training_data": last_run_node,
                **resource_needs,
                **model_provider_needs,
            },
            uses=component_class,
            constructor_name="load",
            fn="process_training_data",
            config=component_config,
        )
        return node_name
Пример #29
0
def test_invalid_module_error_when_deserializing_schemas(tmp_path: Path):
    graph_schema = GraphSchema(
        {
            "train": SchemaNode(
                needs={},
                uses=PersistableTestComponent,
                fn="train",
                constructor_name="create",
                config={"some_config": 123455, "some more config": [{"nested": "hi"}]},
            )
        }
    )

    serialized = graph_schema.as_dict()

    # Pretend module is for some reason invalid
    serialized["nodes"]["train"]["uses"] = "invalid.class"

    # Dump it to make sure it's actually serializable
    file_path = tmp_path / "my_graph.yml"
    rasa.shared.utils.io.write_yaml(serialized, file_path)

    serialized_graph_schema_from_file = rasa.shared.utils.io.read_yaml_file(file_path)

    with pytest.raises(GraphSchemaException):
        _ = GraphSchema.from_dict(serialized_graph_schema_from_file)
Пример #30
0
    def _add_nlu_train_node(
        self,
        train_nodes: Dict[Text, SchemaNode],
        component: Type[GraphComponent],
        component_name: Text,
        last_run_node: Text,
        config: Dict[Text, Any],
        cli_parameters: Dict[Text, Any],
    ) -> Text:
        config_from_cli = self._extra_config_from_cli(cli_parameters,
                                                      component, config)
        model_provider_needs = self._get_model_provider_needs(
            train_nodes, component)

        train_node_name = f"train_{component_name}"
        train_nodes[train_node_name] = SchemaNode(
            needs={
                "training_data": last_run_node,
                **model_provider_needs
            },
            uses=component,
            constructor_name="load" if self._is_finetuning else "create",
            fn="train",
            config={
                **config,
                **config_from_cli
            },
            is_target=True,
        )
        return train_node_name