def test_xlmr_init_mock(): save_module_name = const.XLMR_MODULE save_model_name = const.XLMR_MULTI_CLASS_MODEL const.XLMR_MODULE = "tests.plugin.text.classification.test_xlmr" const.XLMR_MULTI_CLASS_MODEL = "MockClassifier" xlmr_clf = XLMRMultiClass(model_dir=".", dest="output.intents", debug=False) xlmr_clf.init_model(5) assert xlmr_clf.model is not None const.XLMR_MODULE = save_module_name const.XLMR_MULTI_CLASS_MODEL = save_model_name
def test_xlmr_plugin_no_module_error(): save_val = const.XLMR_MODULE const.XLMR_MODULE = "this-module-doesn't-exist" with pytest.raises(ModuleNotFoundError): XLMRMultiClass(model_dir=".", dest="output.intents", debug=False) const.XLMR_MODULE = save_val
def test_xlmr_plugin_when_no_labelencoder_saved(): save_module_name = const.XLMR_MODULE save_model_name = const.XLMR_MULTI_CLASS_MODEL const.XLMR_MODULE = "tests.plugin.text.classification.test_xlmr" const.XLMR_MULTI_CLASS_MODEL = "MockClassifier" xlmr_clf = XLMRMultiClass(model_dir=".", dest="output.intents", debug=False) assert isinstance(xlmr_clf, XLMRMultiClass) assert xlmr_clf.model is None const.XLMR_MODULE = save_module_name const.XLMR_MULTI_CLASS_MODEL = save_model_name
def test_xlmr_init_mock(): save_module_name = const.XLMR_MODULE save_model_name = const.XLMR_MULTI_CLASS_MODEL const.XLMR_MODULE = "tests.plugin.text.classification.test_xlmr" const.XLMR_MULTI_CLASS_MODEL = "MockClassifier" with pytest.raises(ValueError): XLMRMultiClass( model_dir=".", dest="output.intents", debug=False, args_map={"invalid": "value"}, ) const.XLMR_MODULE = save_module_name const.XLMR_MULTI_CLASS_MODEL = save_model_name
def test_xlmr_plugin_when_labelencoder_EOFError(capsys): save_module_name = const.XLMR_MODULE save_model_name = const.XLMR_MULTI_CLASS_MODEL const.XLMR_MODULE = "tests.plugin.text.classification.test_xlmr" const.XLMR_MULTI_CLASS_MODEL = "MockClassifier" _, file_path = tempfile.mkstemp(suffix=".pkl") save_label_encoder_file = const.LABELENCODER_FILE directory, file_name = os.path.split(file_path) const.LABELENCODER_FILE = file_name with capsys.disabled(): xlmr_plugin = XLMRMultiClass( model_dir=directory, dest="output.intents", debug=False, ) assert xlmr_plugin.model is None os.remove(file_path) const.LABELENCODER_FILE = save_label_encoder_file const.XLMR_MODULE = save_module_name const.XLMR_MULTI_CLASS_MODEL = save_model_name
def test_inference(payload): save_module_name = const.XLMR_MODULE save_model_name = const.XLMR_MULTI_CLASS_MODEL const.XLMR_MODULE = "tests.plugin.text.classification.test_xlmr" const.XLMR_MULTI_CLASS_MODEL = "MockClassifier" directory = "/tmp" file_path = os.path.join(directory, const.LABELENCODER_FILE) if os.path.exists(file_path): os.remove(file_path) transcripts = payload.get("input") intent = payload["expected"]["label"] xlmr_clf = XLMRMultiClass( model_dir=directory, dest="output.intents", debug=False, ) merge_asr_output_plugin = MergeASROutputPlugin(dest="input.clf_feature", debug=False) workflow = Workflow([merge_asr_output_plugin, xlmr_clf]) train_df = pd.DataFrame([ { "data": json.dumps([[{ "transcript": "yes" }]]), "labels": "_confirm_", }, { "data": json.dumps([[{ "transcript": "yea" }]]), "labels": "_confirm_", }, { "data": json.dumps([[{ "transcript": "no" }]]), "labels": "_cancel_", }, { "data": json.dumps([[{ "transcript": "nope" }]]), "labels": "_cancel_", }, ]) workflow.train(train_df) assert isinstance( xlmr_clf.model, MockClassifier), "model should be a MockClassifier after training." _, output = workflow.run(input_=Input(utterances=[[{ "transcript": transcript } for transcript in transcripts]])) assert output[const.INTENTS][0]["name"] == intent assert output[const.INTENTS][0]["score"] > 0.9 if os.path.exists(file_path): os.remove(file_path) const.XLMR_MODULE = save_module_name const.XLMR_MULTI_CLASS_MODEL = save_model_name
def test_invalid_operations(): save_module_name = const.XLMR_MODULE save_model_name = const.XLMR_MULTI_CLASS_MODEL const.XLMR_MODULE = "tests.plugin.text.classification.test_xlmr" const.XLMR_MULTI_CLASS_MODEL = "MockClassifier" directory = "/tmp" file_path = os.path.join(directory, const.LABELENCODER_FILE) if os.path.exists(file_path): os.remove(file_path) xlmr_clf = XLMRMultiClass(model_dir=directory, dest="output.intents", debug=False) xlmr_clf_state = XLMRMultiClass( model_dir=directory, dest="output.intents", debug=False, use_state=True, ) with pytest.raises(ValueError): xlmr_clf.init_model(None) train_df_empty = pd.DataFrame() train_df_invalid = pd.DataFrame([ { "apples": "yes", "fruit": "fruit" }, { "apples": "yea", "fruit": "fruit" }, { "apples": "no", "fruit": "fruit" }, { "apples": "nope", "fruit": "fruit" }, ]) assert xlmr_clf.validate(train_df_empty) is False xlmr_clf.train(train_df_empty) assert load_file(file_path, mode="rb", loader=pickle.load) is None assert xlmr_clf.validate(train_df_invalid) is False assert xlmr_clf_state.validate(train_df_invalid) is False xlmr_clf.train(train_df_invalid) assert load_file(file_path, mode="rb", loader=pickle.load) is None assert xlmr_clf.inference(["text"])[0].name == "_error_" xlmr_clf_state.model = MockClassifier(const.XLMR_MODEL, const.XLMR_MODEL_TIER) assert xlmr_clf_state.inference(["text"])[0].name == "_error_" with pytest.raises(ValueError): xlmr_clf.save() xlmr_clf.model = MockClassifier(const.XLMR_MODEL, const.XLMR_MODEL_TIER) with pytest.raises(AttributeError): xlmr_clf.inference(["text"]) if os.path.exists(file_path): os.remove(file_path) const.XLMR_MODULE = save_module_name const.XLMR_MULTI_CLASS_MODEL = save_model_name
def test_train_xlmr_mock(): save_module_name = const.XLMR_MODULE save_model_name = const.XLMR_MULTI_CLASS_MODEL const.XLMR_MODULE = "tests.plugin.text.classification.test_xlmr" const.XLMR_MULTI_CLASS_MODEL = "MockClassifier" directory = "/tmp" file_path = os.path.join(directory, const.LABELENCODER_FILE) xlmr_clf = XLMRMultiClass(model_dir=directory, dest="output.intents", debug=False) xlmr_clf_state = XLMRMultiClass( model_dir=directory, dest="output.intents", debug=False, use_state=True, ) train_df = pd.DataFrame([ { "data": "yes", "labels": "_confirm_" }, { "data": "yea", "labels": "_confirm_" }, { "data": "no", "labels": "_cancel_" }, { "data": "nope", "labels": "_cancel_" }, ]) train_df_state = pd.DataFrame([ { "data": "yes", "labels": "_confirm_", "state": "state1" }, { "data": "yea", "labels": "_confirm_", "state": "state2" }, { "data": "no", "labels": "_cancel_", "state": "state3" }, { "data": "nope", "labels": "_cancel_", "state": "state4" }, ]) xlmr_clf.train(train_df) xlmr_clf_state.train(train_df_state) # This copy loads from the same directory that was trained previously. # So this instance would have read the labelencoder saved. xlmr_clf_copy = XLMRMultiClass(model_dir=directory, dest="output.intents", debug=False) assert len(xlmr_clf_copy.labelencoder.classes_) == 2 os.remove(file_path) const.XLMR_MODULE = save_module_name const.XLMR_MULTI_CLASS_MODEL = save_model_name