示例#1
0
def test_mitie_featurizer(
    create: Callable[[Dict[Text, Any]], MitieFeaturizer],
    mitie_model: MitieModel,
    mitie_tokenizer: MitieTokenizer,
):

    featurizer = create({"alias": "mitie_featurizer"})

    sentence = "Hey how are you today"
    message = Message(data={TEXT: sentence})
    mitie_tokenizer.process([message])
    tokens = message.get(TOKENS_NAMES[TEXT])

    seq_vec, sen_vec = featurizer.features_for_tokens(
        tokens, mitie_model.word_feature_extractor)

    expected = np.array([
        0.00000000e00, -5.12735510e00, 4.39929873e-01, -5.60760403e00,
        -8.26445103e00
    ])
    expected_cls = np.array(
        [0.0, -4.4551446, 0.26073121, -1.46632245, -1.84205751])

    assert 6 == len(seq_vec) + len(sen_vec)
    assert np.allclose(seq_vec[0][:5], expected, atol=1e-5)
    assert np.allclose(sen_vec[-1][:5], expected_cls, atol=1e-5)
def test_train_load_predict_loop(
    default_model_storage: ModelStorage,
    default_execution_context: ExecutionContext,
    mitie_model: MitieModel,
    mitie_tokenizer: MitieTokenizer,
):
    resource = Resource("mitie_classifier")
    component = MitieIntentClassifier.create(
        MitieIntentClassifier.get_default_config(),
        default_model_storage,
        resource,
        default_execution_context,
    )

    training_data = rasa.shared.nlu.training_data.loading.load_data(
        "data/examples/rasa/demo-rasa.yml")
    # Tokenize message as classifier needs that
    mitie_tokenizer.process_training_data(training_data)

    component.train(training_data, mitie_model)

    component = MitieIntentClassifier.load(
        MitieIntentClassifier.get_default_config(),
        default_model_storage,
        resource,
        default_execution_context,
    )

    test_message = Message({TEXT: "hi"})
    mitie_tokenizer.process([test_message])
    component.process([test_message], mitie_model)

    assert test_message.data[INTENT][INTENT_NAME_KEY] == "greet"
    assert test_message.data[INTENT][PREDICTED_CONFIDENCE_KEY] > 0
示例#3
0
def test_mitie(text, expected_tokens, expected_indices):
    tk = MitieTokenizer()

    tokens = tk.tokenize(Message(text), attribute=TEXT)

    assert [t.text for t in tokens] == expected_tokens
    assert [t.start for t in tokens] == [i[0] for i in expected_indices]
    assert [t.end for t in tokens] == [i[1] for i in expected_indices]
示例#4
0
 def inner(config: Dict[Text, Any]) -> MitieTokenizer:
     return MitieTokenizer.create(
         {
             **MitieTokenizer.get_default_config(),
             **config
         },
         default_model_storage,
         Resource("mitie_tokenizer"),
         default_execution_context,
     )
示例#5
0
def test_mitie_featurizer_train(
    create: Callable[[Dict[Text, Any]], MitieFeaturizer],
    mitie_model: MitieModel,
    mitie_tokenizer: MitieTokenizer,
):

    featurizer = create({"alias": "mitie_featurizer"})

    sentence = "Hey how are you today"
    message = Message(data={TEXT: sentence})
    message.set(RESPONSE, sentence)
    message.set(INTENT, "intent")
    mitie_tokenizer.process_training_data(TrainingData([message]))

    featurizer.process_training_data(TrainingData([message]), mitie_model)

    expected = np.array([
        0.00000000e00, -5.12735510e00, 4.39929873e-01, -5.60760403e00,
        -8.26445103e00
    ])
    expected_cls = np.array(
        [0.0, -4.4551446, 0.26073121, -1.46632245, -1.84205751])

    seq_vec, sen_vec = message.get_dense_features(TEXT, [])
    if seq_vec:
        seq_vec = seq_vec.features
    if sen_vec:
        sen_vec = sen_vec.features

    assert len(message.get(TOKENS_NAMES[TEXT])) == len(seq_vec)
    assert np.allclose(seq_vec[0][:5], expected, atol=1e-5)
    assert np.allclose(sen_vec[-1][:5], expected_cls, atol=1e-5)

    seq_vec, sen_vec = message.get_dense_features(RESPONSE, [])
    if seq_vec:
        seq_vec = seq_vec.features
    if sen_vec:
        sen_vec = sen_vec.features

    assert len(message.get(TOKENS_NAMES[RESPONSE])) == len(seq_vec)
    assert np.allclose(seq_vec[0][:5], expected, atol=1e-5)
    assert np.allclose(sen_vec[-1][:5], expected_cls, atol=1e-5)

    seq_vec, sen_vec = message.get_dense_features(INTENT, [])
    if seq_vec:
        seq_vec = seq_vec.features
    if sen_vec:
        sen_vec = sen_vec.features

    assert seq_vec is None
    assert sen_vec is None
示例#6
0
def test_mitie_add_cls_token():
    from rasa.nlu.tokenizers.mitie_tokenizer import MitieTokenizer

    component_config = {"use_cls_token": True}

    tk = MitieTokenizer(component_config)

    text = "Forecast for lunch"
    assert [t.text for t in tk.tokenize(text)] == [
        "Forecast",
        "for",
        "lunch",
        CLS_TOKEN,
    ]
    assert [t.offset for t in tk.tokenize(text)] == [0, 9, 13, 19]
示例#7
0
def test_custom_intent_symbol(text, expected_tokens):
    component_config = {
        "intent_tokenization_flag": True,
        "intent_split_symbol": "+"
    }

    tk = MitieTokenizer(component_config)

    message = Message(text)
    message.set(INTENT, text)

    tk.train(TrainingData([message]))

    assert [t.text
            for t in message.get(TOKENS_NAMES[INTENT])] == expected_tokens
示例#8
0
def test_mitie_featurizer(mitie_feature_extractor, default_config):
    from rasa.nlu.featurizers.mitie_featurizer import MitieFeaturizer

    mitie_component_config = {"name": "MitieFeaturizer"}
    ftr = MitieFeaturizer.create(mitie_component_config, RasaNLUModelConfig())
    sentence = "Hey how are you today"
    tokens = MitieTokenizer().tokenize(sentence)
    vecs = ftr.features_for_tokens(tokens, mitie_feature_extractor)
    expected = np.array([0.0, -4.4551446, 0.26073121, -1.46632245, -1.84205751])
    assert np.allclose(vecs[:5], expected, atol=1e-5)
def test_load_from_untrained(
    default_model_storage: ModelStorage,
    default_execution_context: ExecutionContext,
    mitie_model: MitieModel,
    mitie_tokenizer: MitieTokenizer,
):
    resource = Resource("some_resource")

    component = MitieIntentClassifier.load(
        MitieIntentClassifier.get_default_config(),
        default_model_storage,
        resource,
        default_execution_context,
    )

    test_message = Message({TEXT: "hi"})
    mitie_tokenizer.process([test_message])
    component.process([test_message], mitie_model)

    assert test_message.data[INTENT] == {"name": None, "confidence": 0.0}
示例#10
0
def test_mitie_featurizer_train(mitie_feature_extractor):

    featurizer = MitieFeaturizer.create({}, RasaNLUModelConfig())

    sentence = "Hey how are you today"
    message = Message(data={TEXT: sentence})
    message.set(RESPONSE, sentence)
    message.set(INTENT, "intent")
    MitieTokenizer().train(TrainingData([message]))

    featurizer.train(
        TrainingData([message]),
        RasaNLUModelConfig(),
        **{"mitie_feature_extractor": mitie_feature_extractor},
    )

    expected = np.array([
        0.00000000e00, -5.12735510e00, 4.39929873e-01, -5.60760403e00,
        -8.26445103e00
    ])
    expected_cls = np.array(
        [0.0, -4.4551446, 0.26073121, -1.46632245, -1.84205751])

    seq_vec, sen_vec = message.get_dense_features(TEXT, [])
    if seq_vec:
        seq_vec = seq_vec.features
    if sen_vec:
        sen_vec = sen_vec.features

    assert len(message.get(TOKENS_NAMES[TEXT])) == len(seq_vec)
    assert np.allclose(seq_vec[0][:5], expected, atol=1e-5)
    assert np.allclose(sen_vec[-1][:5], expected_cls, atol=1e-5)

    seq_vec, sen_vec = message.get_dense_features(RESPONSE, [])
    if seq_vec:
        seq_vec = seq_vec.features
    if sen_vec:
        sen_vec = sen_vec.features

    assert len(message.get(TOKENS_NAMES[RESPONSE])) == len(seq_vec)
    assert np.allclose(seq_vec[0][:5], expected, atol=1e-5)
    assert np.allclose(sen_vec[-1][:5], expected_cls, atol=1e-5)

    seq_vec, sen_vec = message.get_dense_features(INTENT, [])
    if seq_vec:
        seq_vec = seq_vec.features
    if sen_vec:
        sen_vec = sen_vec.features

    assert seq_vec is None
    assert sen_vec is None
示例#11
0
def test_mitie_featurizer_no_sequence(mitie_feature_extractor, default_config):
    from rasa.nlu.featurizers.dense_featurizer.mitie_featurizer import MitieFeaturizer

    component_config = {"name": "MitieFeaturizer", "return_sequence": False}
    featurizer = MitieFeaturizer.create(component_config, RasaNLUModelConfig())

    sentence = f"Hey how are you today {CLS_TOKEN}"
    tokens = MitieTokenizer().tokenize(sentence)

    vecs = featurizer.features_for_tokens(tokens, mitie_feature_extractor)[0]

    expected = np.array(
        [0.0, -4.4551446, 0.26073121, -1.46632245, -1.84205751])
    assert np.allclose(vecs[:5], expected, atol=1e-5)
示例#12
0
def test_load_from_untrained_but_with_resource_existing(
    default_model_storage: ModelStorage,
    default_execution_context: ExecutionContext,
    mitie_model: MitieModel,
    mitie_tokenizer: MitieTokenizer,
):
    resource = Resource("some_resource")

    with default_model_storage.write_to(resource):
        # This makes sure the directory exists but the model file itself doesn't
        pass

    component = MitieIntentClassifier.load(
        MitieIntentClassifier.get_default_config(),
        default_model_storage,
        resource,
        default_execution_context,
    )

    test_message = Message({TEXT: "hi"})
    mitie_tokenizer.process([test_message])
    component.process([test_message], mitie_model)

    assert test_message.data[INTENT] == {"name": None, "confidence": 0.0}
示例#13
0
def test_mitie_featurizer(mitie_feature_extractor, default_config):
    from rasa.nlu.featurizers.dense_featurizer.mitie_featurizer import MitieFeaturizer

    component_config = {"name": "MitieFeaturizer", "return_sequence": True}
    featurizer = MitieFeaturizer.create(component_config, RasaNLUModelConfig())

    sentence = f"Hey how are you today {CLS_TOKEN}"

    tokens = MitieTokenizer().tokenize(sentence)

    vecs = featurizer.features_for_tokens(tokens, mitie_feature_extractor)[0]

    expected = np.array([
        0.00000000e00, -5.12735510e00, 4.39929873e-01, -5.60760403e00,
        -8.26445103e00
    ])
    assert np.allclose(vecs[:5], expected, atol=1e-5)
示例#14
0
def test_mitie_featurizer(mitie_feature_extractor):

    featurizer = MitieFeaturizer.create({}, RasaNLUModelConfig())

    sentence = "Hey how are you today"
    message = Message(sentence)
    MitieTokenizer().process(message)
    tokens = message.get(TOKENS_NAMES[TEXT])[:-1]  # remove CLS token

    vecs = featurizer.features_for_tokens(tokens, mitie_feature_extractor)

    expected = np.array(
        [0.00000000e00, -5.12735510e00, 4.39929873e-01, -5.60760403e00, -8.26445103e00]
    )
    expected_cls = np.array([0.0, -4.4551446, 0.26073121, -1.46632245, -1.84205751])

    assert 6 == len(vecs)
    assert np.allclose(vecs[0][:5], expected, atol=1e-5)
    assert np.allclose(vecs[-1][:5], expected_cls, atol=1e-5)
示例#15
0
def test_mitie():
    from rasa.nlu.tokenizers.mitie_tokenizer import MitieTokenizer
    tk = MitieTokenizer()

    text = "Forecast for lunch"
    assert [t.text for t in tk.tokenize(text)] == \
           ['Forecast', 'for', 'lunch']
    assert [t.offset for t in tk.tokenize(text)] == \
           [0, 9, 13]

    text = "hey ńöñàśçií how're you?"
    assert [t.text for t in tk.tokenize(text)] == \
           ['hey', 'ńöñàśçií', 'how', '\'re', 'you', '?']
    assert [t.offset for t in tk.tokenize(text)] == \
           [0, 4, 13, 16, 20, 23]
示例#16
0
def test_mitie_featurizer_train(mitie_feature_extractor):

    featurizer = MitieFeaturizer.create({}, RasaNLUModelConfig())

    sentence = "Hey how are you today"
    message = Message(sentence)
    message.set(RESPONSE, sentence)
    message.set(INTENT, "intent")
    MitieTokenizer().train(TrainingData([message]))

    featurizer.train(
        TrainingData([message]),
        RasaNLUModelConfig(),
        **{"mitie_feature_extractor": mitie_feature_extractor},
    )

    expected = np.array([
        0.00000000e00, -5.12735510e00, 4.39929873e-01, -5.60760403e00,
        -8.26445103e00
    ])
    expected_cls = np.array(
        [0.0, -4.4551446, 0.26073121, -1.46632245, -1.84205751])

    vecs = message.get(DENSE_FEATURE_NAMES[TEXT])

    assert len(message.get(TOKENS_NAMES[TEXT])) == len(vecs)
    assert np.allclose(vecs[0][:5], expected, atol=1e-5)
    assert np.allclose(vecs[-1][:5], expected_cls, atol=1e-5)

    vecs = message.get(DENSE_FEATURE_NAMES[RESPONSE])

    assert len(message.get(TOKENS_NAMES[RESPONSE])) == len(vecs)
    assert np.allclose(vecs[0][:5], expected, atol=1e-5)
    assert np.allclose(vecs[-1][:5], expected_cls, atol=1e-5)

    vecs = message.get(DENSE_FEATURE_NAMES[INTENT])

    assert vecs is None
示例#17
0
def test_mitie():
    from rasa.nlu.tokenizers.mitie_tokenizer import MitieTokenizer

    tk = MitieTokenizer()

    text = "Forecast for lunch"
    assert [t.text for t in tk.tokenize(text)] == ["Forecast", "for", "lunch"]
    assert [t.offset for t in tk.tokenize(text)] == [0, 9, 13]

    text = "hey ńöñàśçií how're you?"
    assert [t.text for t in tk.tokenize(text)] == [
        "hey",
        "ńöñàśçií",
        "how",
        "'re",
        "you",
        "?",
    ]
    assert [t.offset for t in tk.tokenize(text)] == [0, 4, 13, 16, 20, 23]
示例#18
0
from rasa.nlu.constants import (TEXT, SPACY_DOCS)

logger = logging_setup()

test_input = "Okay, pick up this yellow banana for me."
message = Message(test_input)

tk = WhitespaceTokenizer()
tokens = tk.tokenize(message, attribute=TEXT)
logger.info('Whitespace: {}'.format([t.text for t in tokens]))

tk = SpacyTokenizer()

message.set(SPACY_DOCS[TEXT], spacy_nlp(test_input))
tokens = tk.tokenize(message, attribute=TEXT)
logger.info('SpaCy: {}'.format([t.text for t in tokens]))

tk = MitieTokenizer()
tokens = tk.tokenize(message, attribute=TEXT)
logger.info('Mitie: {}'.format([t.text for t in tokens]))

tk = ConveRTTokenizer()
tokens = tk.tokenize(message, attribute=TEXT)
logger.info('ConveRT: {}'.format([t.text for t in tokens]))

tk = LanguageModelTokenizer()
transformers_nlp = HFTransformersNLP({"model_name": "bert"})
transformers_nlp.process(message)
tokens = tk.tokenize(message, attribute=TEXT)
logger.info('BERT: {}'.format([t.text for t in tokens]))
示例#19
0
def mitie_tokenizer() -> MitieTokenizer:
    return MitieTokenizer(MitieTokenizer.get_default_config())
示例#20
0
from rasa.nlu.featurizers.sparse_featurizer.count_vectors_featurizer import CountVectorsFeaturizer

from rasa.nlu.constants import SPACY_DOCS, TEXT, DENSE_FEATURE_NAMES, TOKENS_NAMES, SPARSE_FEATURE_NAMES

logger = logging_setup()

featurizer = SpacyFeaturizer.create({}, RasaNLUModelConfig())

test_input = "Okay, pick up this yellow banana for me."
message = Message(test_input)

message.set(SPACY_DOCS[TEXT], spacy_nlp(test_input))
featurizer._set_spacy_features(message)
vecs = message.get(DENSE_FEATURE_NAMES[TEXT])
logger.info("SpaCy: {}".format(vecs.shape))

message = Message(test_input)
featurizer = MitieFeaturizer.create({}, RasaNLUModelConfig())
MitieTokenizer().process(message)
tokens = message.get(TOKENS_NAMES[TEXT])
vecs = featurizer.features_for_tokens(tokens, mitie_feature_extractor)
logger.info("Mitie: {}".format(vecs.shape))

featurizer = ConveRTFeaturizer.create({}, RasaNLUModelConfig())
tokens = ConveRTTokenizer().tokenize(message, attribute=TEXT)
tokens = Tokenizer.add_cls_token(tokens, attribute=TEXT)
message.set(TOKENS_NAMES[TEXT], tokens)
featurizer.process(message)
vecs = message.get(DENSE_FEATURE_NAMES[TEXT])
logger.info("ConveRT: {}".format(vecs.shape))