Пример #1
0
def test_spacy_intent_featurizer(spacy_nlp_component: SpacyNLP,
                                 spacy_model: SpacyModel):
    td = loading.load_data("data/examples/rasa/demo-rasa.json")
    spacy_nlp_component.process_training_data(td, spacy_model)
    spacy_featurizer = create_spacy_featurizer({})
    spacy_featurizer.process_training_data(td)

    intent_features_exist = np.array([
        True if example.get("intent_features") is not None else False
        for example in td.intent_examples
    ])

    # no intent features should have been set
    assert not any(intent_features_exist)
Пример #2
0
def test_spacy_preprocessor_process_training_data(
        spacy_nlp_component: SpacyNLP, spacy_model: SpacyModel):
    training_data = TrainingDataImporter.load_from_dict(training_data_paths=[
        "data/test_e2ebot/data/nlu.yml",
        "data/test_e2ebot/data/stories.yml",
    ]).get_nlu_data()

    spacy_nlp_component.process_training_data(training_data, spacy_model)

    for message in training_data.training_examples:
        for attr in DENSE_FEATURIZABLE_ATTRIBUTES:
            attr_text = message.data.get(attr)
            if attr_text:
                doc = message.data[SPACY_DOCS[attr]]
                assert isinstance(doc, spacy.tokens.doc.Doc)
                assert doc.text == attr_text.lower()
Пример #3
0
def test_persist_and_load(
    training_data: TrainingData,
    default_sklearn_intent_classifier: SklearnIntentClassifier,
    default_model_storage: ModelStorage,
    default_execution_context: ExecutionContext,
    train_and_preprocess: Callable[..., Tuple[TrainingData, List[GraphComponent]]],
    spacy_nlp_component: SpacyNLP,
    spacy_model: SpacyModel,
):
    training_data = spacy_nlp_component.process_training_data(
        training_data, spacy_model
    )

    training_data, loaded_pipeline = train_and_preprocess(
        pipeline=[{"component": SpacyTokenizer}, {"component": SpacyFeaturizer}],
        training_data=training_data,
    )
    default_sklearn_intent_classifier.train(training_data)

    loaded = SklearnIntentClassifier.load(
        SklearnIntentClassifier.get_default_config(),
        default_model_storage,
        Resource("sklearn"),
        default_execution_context,
    )

    predicted = copy.deepcopy(training_data)
    actual = copy.deepcopy(training_data)
    loaded_messages = loaded.process(predicted.training_examples)
    trained_messages = default_sklearn_intent_classifier.process(
        actual.training_examples
    )

    for m1, m2 in zip(loaded_messages, trained_messages):
        assert m1.get("intent") == m2.get("intent")
Пример #4
0
async def test_train_persist_with_different_configurations(
    crf_entity_extractor: Callable[[Dict[Text, Any]], CRFEntityExtractor],
    config_params: Dict[Text, Any],
    default_model_storage: ModelStorage,
    default_execution_context: ExecutionContext,
    spacy_tokenizer: SpacyTokenizer,
    spacy_featurizer: SpacyFeaturizer,
    spacy_nlp_component: SpacyNLP,
    spacy_model: SpacyModel,
):

    crf_extractor = crf_entity_extractor(config_params)

    importer = RasaFileImporter(training_data_paths=["data/examples/rasa"])
    training_data = importer.get_nlu_data()

    training_data = spacy_nlp_component.process_training_data(
        training_data, spacy_model)
    training_data = spacy_tokenizer.process_training_data(training_data)
    training_data = spacy_featurizer.process_training_data(training_data)
    crf_extractor.train(training_data)

    message = Message(data={TEXT: "I am looking for an italian restaurant"})
    messages = spacy_nlp_component.process([message], spacy_model)
    messages = spacy_tokenizer.process(messages)
    message = spacy_featurizer.process(messages)[0]
    message2 = copy.deepcopy(message)

    processed_message = crf_extractor.process([message])[0]

    loaded_extractor = CRFEntityExtractor.load(
        {
            **CRFEntityExtractor.get_default_config(),
            **config_params
        },
        default_model_storage,
        Resource("CRFEntityExtractor"),
        default_execution_context,
    )

    processed_message2 = loaded_extractor.process([message2])[0]

    assert processed_message2.fingerprint() == processed_message.fingerprint()

    detected_entities = processed_message2.get(ENTITIES)

    assert len(detected_entities) == 1
    assert detected_entities[0]["entity"] == "cuisine"
    assert detected_entities[0]["value"] == "italian"
Пример #5
0
def test_process_unfeaturized_input(
    training_data: TrainingData,
    default_sklearn_intent_classifier: SklearnIntentClassifier,
    default_model_storage: ModelStorage,
    default_execution_context: ExecutionContext,
    train_and_preprocess: Callable[..., Tuple[TrainingData,
                                              List[GraphComponent]]],
    spacy_nlp_component: SpacyNLP,
    spacy_model: SpacyModel,
):
    training_data = spacy_nlp_component.process_training_data(
        training_data, spacy_model)
    training_data, loaded_pipeline = train_and_preprocess(
        pipeline=[
            {
                "component": SpacyTokenizer
            },
            {
                "component": SpacyFeaturizer
            },
        ],
        training_data=training_data,
    )
    default_sklearn_intent_classifier.train(training_data)
    classifier = SklearnIntentClassifier.load(
        SklearnIntentClassifier.get_default_config(),
        default_model_storage,
        Resource("sklearn"),
        default_execution_context,
    )
    message_text = "message text"
    message = Message(data={TEXT: message_text})
    processed_message = classifier.process([message])[0]

    assert processed_message.get(TEXT) == message_text
    assert not processed_message.get(INTENT)