Пример #1
0
def test_xlmr_init_mock():
    save_module_name = const.XLMR_MODULE
    save_model_name = const.XLMR_MULTI_CLASS_MODEL
    const.XLMR_MODULE = "tests.plugin.text.classification.test_xlmr"
    const.XLMR_MULTI_CLASS_MODEL = "MockClassifier"

    xlmr_clf = XLMRMultiClass(model_dir=".",
                              dest="output.intents",
                              debug=False)
    xlmr_clf.init_model(5)
    assert xlmr_clf.model is not None

    const.XLMR_MODULE = save_module_name
    const.XLMR_MULTI_CLASS_MODEL = save_model_name
Пример #2
0
def test_xlmr_plugin_no_module_error():
    save_val = const.XLMR_MODULE
    const.XLMR_MODULE = "this-module-doesn't-exist"

    with pytest.raises(ModuleNotFoundError):
        XLMRMultiClass(model_dir=".", dest="output.intents", debug=False)
    const.XLMR_MODULE = save_val
Пример #3
0
def test_xlmr_plugin_when_no_labelencoder_saved():
    save_module_name = const.XLMR_MODULE
    save_model_name = const.XLMR_MULTI_CLASS_MODEL
    const.XLMR_MODULE = "tests.plugin.text.classification.test_xlmr"
    const.XLMR_MULTI_CLASS_MODEL = "MockClassifier"

    xlmr_clf = XLMRMultiClass(model_dir=".",
                              dest="output.intents",
                              debug=False)
    assert isinstance(xlmr_clf, XLMRMultiClass)
    assert xlmr_clf.model is None
    const.XLMR_MODULE = save_module_name
    const.XLMR_MULTI_CLASS_MODEL = save_model_name
Пример #4
0
def test_xlmr_init_mock():
    save_module_name = const.XLMR_MODULE
    save_model_name = const.XLMR_MULTI_CLASS_MODEL
    const.XLMR_MODULE = "tests.plugin.text.classification.test_xlmr"
    const.XLMR_MULTI_CLASS_MODEL = "MockClassifier"

    with pytest.raises(ValueError):
        XLMRMultiClass(
            model_dir=".",
            dest="output.intents",
            debug=False,
            args_map={"invalid": "value"},
        )
    const.XLMR_MODULE = save_module_name
    const.XLMR_MULTI_CLASS_MODEL = save_model_name
Пример #5
0
def test_xlmr_plugin_when_labelencoder_EOFError(capsys):
    save_module_name = const.XLMR_MODULE
    save_model_name = const.XLMR_MULTI_CLASS_MODEL
    const.XLMR_MODULE = "tests.plugin.text.classification.test_xlmr"
    const.XLMR_MULTI_CLASS_MODEL = "MockClassifier"
    _, file_path = tempfile.mkstemp(suffix=".pkl")
    save_label_encoder_file = const.LABELENCODER_FILE
    directory, file_name = os.path.split(file_path)
    const.LABELENCODER_FILE = file_name
    with capsys.disabled():
        xlmr_plugin = XLMRMultiClass(
            model_dir=directory,
            dest="output.intents",
            debug=False,
        )
        assert xlmr_plugin.model is None
    os.remove(file_path)
    const.LABELENCODER_FILE = save_label_encoder_file
    const.XLMR_MODULE = save_module_name
    const.XLMR_MULTI_CLASS_MODEL = save_model_name
Пример #6
0
def test_inference(payload):
    save_module_name = const.XLMR_MODULE
    save_model_name = const.XLMR_MULTI_CLASS_MODEL
    const.XLMR_MODULE = "tests.plugin.text.classification.test_xlmr"
    const.XLMR_MULTI_CLASS_MODEL = "MockClassifier"
    directory = "/tmp"
    file_path = os.path.join(directory, const.LABELENCODER_FILE)
    if os.path.exists(file_path):
        os.remove(file_path)

    transcripts = payload.get("input")
    intent = payload["expected"]["label"]

    xlmr_clf = XLMRMultiClass(
        model_dir=directory,
        dest="output.intents",
        debug=False,
    )

    merge_asr_output_plugin = MergeASROutputPlugin(dest="input.clf_feature",
                                                   debug=False)

    workflow = Workflow([merge_asr_output_plugin, xlmr_clf])

    train_df = pd.DataFrame([
        {
            "data": json.dumps([[{
                "transcript": "yes"
            }]]),
            "labels": "_confirm_",
        },
        {
            "data": json.dumps([[{
                "transcript": "yea"
            }]]),
            "labels": "_confirm_",
        },
        {
            "data": json.dumps([[{
                "transcript": "no"
            }]]),
            "labels": "_cancel_",
        },
        {
            "data": json.dumps([[{
                "transcript": "nope"
            }]]),
            "labels": "_cancel_",
        },
    ])

    workflow.train(train_df)
    assert isinstance(
        xlmr_clf.model,
        MockClassifier), "model should be a MockClassifier after training."

    _, output = workflow.run(input_=Input(utterances=[[{
        "transcript": transcript
    } for transcript in transcripts]]))
    assert output[const.INTENTS][0]["name"] == intent
    assert output[const.INTENTS][0]["score"] > 0.9

    if os.path.exists(file_path):
        os.remove(file_path)
    const.XLMR_MODULE = save_module_name
    const.XLMR_MULTI_CLASS_MODEL = save_model_name
Пример #7
0
def test_invalid_operations():
    save_module_name = const.XLMR_MODULE
    save_model_name = const.XLMR_MULTI_CLASS_MODEL
    const.XLMR_MODULE = "tests.plugin.text.classification.test_xlmr"
    const.XLMR_MULTI_CLASS_MODEL = "MockClassifier"

    directory = "/tmp"
    file_path = os.path.join(directory, const.LABELENCODER_FILE)
    if os.path.exists(file_path):
        os.remove(file_path)

    xlmr_clf = XLMRMultiClass(model_dir=directory,
                              dest="output.intents",
                              debug=False)
    xlmr_clf_state = XLMRMultiClass(
        model_dir=directory,
        dest="output.intents",
        debug=False,
        use_state=True,
    )

    with pytest.raises(ValueError):
        xlmr_clf.init_model(None)

    train_df_empty = pd.DataFrame()
    train_df_invalid = pd.DataFrame([
        {
            "apples": "yes",
            "fruit": "fruit"
        },
        {
            "apples": "yea",
            "fruit": "fruit"
        },
        {
            "apples": "no",
            "fruit": "fruit"
        },
        {
            "apples": "nope",
            "fruit": "fruit"
        },
    ])
    assert xlmr_clf.validate(train_df_empty) is False

    xlmr_clf.train(train_df_empty)
    assert load_file(file_path, mode="rb", loader=pickle.load) is None
    assert xlmr_clf.validate(train_df_invalid) is False
    assert xlmr_clf_state.validate(train_df_invalid) is False

    xlmr_clf.train(train_df_invalid)
    assert load_file(file_path, mode="rb", loader=pickle.load) is None
    assert xlmr_clf.inference(["text"])[0].name == "_error_"

    xlmr_clf_state.model = MockClassifier(const.XLMR_MODEL,
                                          const.XLMR_MODEL_TIER)
    assert xlmr_clf_state.inference(["text"])[0].name == "_error_"

    with pytest.raises(ValueError):
        xlmr_clf.save()

    xlmr_clf.model = MockClassifier(const.XLMR_MODEL, const.XLMR_MODEL_TIER)
    with pytest.raises(AttributeError):
        xlmr_clf.inference(["text"])

    if os.path.exists(file_path):
        os.remove(file_path)
    const.XLMR_MODULE = save_module_name
    const.XLMR_MULTI_CLASS_MODEL = save_model_name
Пример #8
0
def test_train_xlmr_mock():
    save_module_name = const.XLMR_MODULE
    save_model_name = const.XLMR_MULTI_CLASS_MODEL
    const.XLMR_MODULE = "tests.plugin.text.classification.test_xlmr"
    const.XLMR_MULTI_CLASS_MODEL = "MockClassifier"
    directory = "/tmp"
    file_path = os.path.join(directory, const.LABELENCODER_FILE)

    xlmr_clf = XLMRMultiClass(model_dir=directory,
                              dest="output.intents",
                              debug=False)

    xlmr_clf_state = XLMRMultiClass(
        model_dir=directory,
        dest="output.intents",
        debug=False,
        use_state=True,
    )

    train_df = pd.DataFrame([
        {
            "data": "yes",
            "labels": "_confirm_"
        },
        {
            "data": "yea",
            "labels": "_confirm_"
        },
        {
            "data": "no",
            "labels": "_cancel_"
        },
        {
            "data": "nope",
            "labels": "_cancel_"
        },
    ])

    train_df_state = pd.DataFrame([
        {
            "data": "yes",
            "labels": "_confirm_",
            "state": "state1"
        },
        {
            "data": "yea",
            "labels": "_confirm_",
            "state": "state2"
        },
        {
            "data": "no",
            "labels": "_cancel_",
            "state": "state3"
        },
        {
            "data": "nope",
            "labels": "_cancel_",
            "state": "state4"
        },
    ])

    xlmr_clf.train(train_df)
    xlmr_clf_state.train(train_df_state)

    # This copy loads from the same directory that was trained previously.
    # So this instance would have read the labelencoder saved.
    xlmr_clf_copy = XLMRMultiClass(model_dir=directory,
                                   dest="output.intents",
                                   debug=False)

    assert len(xlmr_clf_copy.labelencoder.classes_) == 2

    os.remove(file_path)
    const.XLMR_MODULE = save_module_name
    const.XLMR_MULTI_CLASS_MODEL = save_model_name