Пример #1
0
 def inner(config: Optional[Dict[Text, Any]] = None) -> CountVectorsFeaturizer:
     config = config or {}
     return CountVectorsFeaturizer.create(
         {**CountVectorsFeaturizer.get_default_config(), **config},
         default_model_storage,
         Resource("count_vectors_featurizer"),
         default_execution_context,
     )
Пример #2
0
 def inner(
     config: Optional[Dict[Text, Any]] = None, is_finetuning: bool = False
 ) -> CountVectorsFeaturizer:
     config = config or {}
     return CountVectorsFeaturizer.load(
         {**CountVectorsFeaturizer.get_default_config(), **config},
         default_model_storage,
         Resource("count_vectors_featurizer"),
         dataclasses.replace(default_execution_context, is_finetuning=is_finetuning),
     )
def featurizer_sparse(tmpdir):
    """Generate a featurizer for tests."""
    node_storage = LocalModelStorage(pathlib.Path(tmpdir))
    node_resource = Resource("sparse_feat")
    context = ExecutionContext(node_storage, node_resource)
    return CountVectorsFeaturizer(
        config=CountVectorsFeaturizer.get_default_config(),
        resource=node_resource,
        model_storage=node_storage,
        execution_context=context,
    )
Пример #4
0
def test_convert_training_examples(
    spacy_nlp: Language,
    text: Text,
    intent: Optional[Text],
    entities: Optional[List[Dict[Text, Any]]],
    attributes: List[Text],
    real_sparse_feature_sizes: Dict[Text, Dict[Text, List[int]]],
    default_model_storage: ModelStorage,
    default_execution_context: ExecutionContext,
):
    message = Message(data={TEXT: text, INTENT: intent, ENTITIES: entities})

    tokenizer = SpacyTokenizer.create(
        SpacyTokenizer.get_default_config(),
        default_model_storage,
        Resource("tokenizer"),
        default_execution_context,
    )
    count_vectors_featurizer = CountVectorsFeaturizer.create(
        CountVectorsFeaturizer.get_default_config(),
        default_model_storage,
        Resource("count_featurizer"),
        default_execution_context,
    )
    spacy_featurizer = SpacyFeaturizer.create(
        SpacyFeaturizer.get_default_config(),
        default_model_storage,
        Resource("spacy_featurizer"),
        default_execution_context,
    )

    message.set(SPACY_DOCS[TEXT], spacy_nlp(text))

    training_data = TrainingData([message])
    tokenizer.process_training_data(training_data)
    count_vectors_featurizer.train(training_data)
    count_vectors_featurizer.process_training_data(training_data)
    spacy_featurizer.process_training_data(training_data)

    entity_tag_spec = [
        EntityTagSpec(
            "entity",
            {
                0: "O",
                1: "name",
                2: "location"
            },
            {
                "O": 0,
                "name": 1,
                "location": 2
            },
            3,
        )
    ]
    output, sparse_feature_sizes = model_data_utils.featurize_training_examples(
        [message], attributes=attributes, entity_tag_specs=entity_tag_spec)

    assert len(output) == 1
    for attribute in attributes:
        assert attribute in output[0]
    for attribute in {INTENT, TEXT, ENTITIES} - set(attributes):
        assert attribute not in output[0]
    # we have sparse sentence, sparse sequence, dense sentence, and dense sequence
    # features in the list
    assert len(output[0][TEXT]) == 4
    if INTENT in attributes:
        # we will just have space sentence features
        assert len(output[0][INTENT]) == 1
    if ENTITIES in attributes:
        # we will just have space sentence features
        assert len(output[0][ENTITIES]) == len(entity_tag_spec)
    # check that it calculates sparse_feature_sizes correctly
    assert sparse_feature_sizes == real_sparse_feature_sizes