示例#1
0
    def ignore_test_trainer_persist(self):
        """
        test pipeline persist, metadata will be saved
        :return:
        """
        test_config = "tests/data/test_config/test_config.json"
        config = AnnotatorConfig(test_config)

        trainer = Trainer(config)
        assert len(trainer.pipeline) > 0
        # char_tokenizer component should been created
        assert trainer.pipeline[0] is not None
        # create tmp train set
        tmp_path = create_tmp_test_jsonfile("tmp.json")
        train_data = load_local_data(tmp_path)
        # rm tmp train set
        rm_tmp_file("tmp.json")

        trainer.train(train_data)
        persisted_path = trainer.persist(config['path'], config['project'],
                                         config['fixed_model_name'])
        # load persisted metadata
        metadata_path = os.path.join(persisted_path, 'metadata.json')
        with io.open(metadata_path) as f:
            metadata = json.load(f)
        assert 'trained_at' in metadata
        # rm tmp files and dirs
        shutil.rmtree(config['path'], ignore_errors=False)
示例#2
0
    def ignore_test_pipeline_flow(self):
        """
        test trainer's train func for pipeline
        :return:
        """
        test_config = "tests/data/test_config/test_config.json"
        config = AnnotatorConfig(test_config)

        trainer = Trainer(config)
        assert len(trainer.pipeline) > 0
        # create tmp train set
        tmp_path = create_tmp_test_jsonfile("tmp.json")
        train_data = load_local_data(tmp_path)
        # rm tmp train set
        rm_tmp_file("tmp.json")

        interpreter = trainer.train(train_data)
        assert interpreter is not None
        out1 = interpreter.parse(("点连接拿红包啦"))

        # test persist and load
        persisted_path = trainer.persist(config['path'], config['project'],
                                         config['fixed_model_name'])

        interpreter_loaded = Interpreter.load(persisted_path, config)
        out2 = interpreter_loaded.parse("点连接拿红包啦")
        assert out1.get("classifylabel").get("name") == out2.get(
            "classifylabel").get("name")

        # remove tmp models
        shutil.rmtree(config['path'], ignore_errors=True)
示例#3
0
    def ignore_test_load_and_persist_without_train(self):
        """
        test save and load model without train
        :return:
        """
        test_config = "tests/data/test_config/test_config.json"
        config = AnnotatorConfig(test_config)

        trainer = Trainer(config)
        assert len(trainer.pipeline) > 0
        # create tmp train set
        tmp_path = create_tmp_test_jsonfile("tmp.json")
        train_data = load_local_data(tmp_path)
        # rm tmp train set
        rm_tmp_file("tmp.json")

        # interpreter = trainer.train(train_data)
        # test persist and load
        persisted_path = trainer.persist(config['path'], config['project'],
                                         config['fixed_model_name'])

        interpreter_loaded = Interpreter.load(persisted_path, config)
        assert interpreter_loaded.pipeline
        assert interpreter_loaded.parse("hello") is not None
        assert interpreter_loaded.parse(
            "Hello today is Monday, again!") is not None
        # remove tmp models
        shutil.rmtree(config['path'], ignore_errors=False)
    def ignore_test_pipeline_flow(self):
        """
        test trainer's train func for pipeline
        :return:
        """
        test_config = "tests/data/test_config/test_config.json"
        config = AnnotatorConfig(test_config)

        trainer = Trainer(config)
        assert len(trainer.pipeline) > 0
        # create tmp train set
        tmp_path = create_tmp_test_jsonfile("tmp.json")
        train_data = load_local_data(tmp_path)
        # rm tmp train set
        rm_tmp_file("tmp.json")

        interpreter = trainer.train(train_data)
        assert interpreter is not None
        out1 = interpreter.parse(("点连接拿红包啦"))

        # test persist and load
        persisted_path = trainer.persist(config['path'],
                                         config['project'],
                                         config['fixed_model_name'])

        interpreter_loaded = Interpreter.load(persisted_path, config)
        out2 = interpreter_loaded.parse("点连接拿红包啦")
        assert out1.get("classifylabel").get("name") == out2.get("classifylabel").get("name")

        # remove tmp models
        shutil.rmtree(config['path'], ignore_errors=True)
    def ignore_test_load_and_persist_without_train(self):
        """
        test save and load model without train
        :return:
        """
        test_config = "tests/data/test_config/test_config.json"
        config = AnnotatorConfig(test_config)

        trainer = Trainer(config)
        assert len(trainer.pipeline) > 0
        # create tmp train set
        tmp_path = create_tmp_test_jsonfile("tmp.json")
        train_data = load_local_data(tmp_path)
        # rm tmp train set
        rm_tmp_file("tmp.json")

        # interpreter = trainer.train(train_data)
        # test persist and load
        persisted_path = trainer.persist(config['path'],
                                         config['project'],
                                         config['fixed_model_name'])

        interpreter_loaded = Interpreter.load(persisted_path, config)
        assert interpreter_loaded.pipeline
        assert interpreter_loaded.parse("hello") is not None
        assert interpreter_loaded.parse("Hello today is Monday, again!") is not None
        # remove tmp models
        shutil.rmtree(config['path'], ignore_errors=False)
    def ignore_test_trainer_persist(self):
        """
        test pipeline persist, metadata will be saved
        :return:
        """
        test_config = "tests/data/test_config/test_config.json"
        config = AnnotatorConfig(test_config)

        trainer = Trainer(config)
        assert len(trainer.pipeline) > 0
        # char_tokenizer component should been created
        assert trainer.pipeline[0] is not None
        # create tmp train set
        tmp_path = create_tmp_test_jsonfile("tmp.json")
        train_data = load_local_data(tmp_path)
        # rm tmp train set
        rm_tmp_file("tmp.json")

        trainer.train(train_data)
        persisted_path = trainer.persist(config['path'],
                                         config['project'],
                                         config['fixed_model_name'])
        # load persisted metadata
        metadata_path = os.path.join(persisted_path, 'metadata.json')
        with io.open(metadata_path) as f:
            metadata = json.load(f)
        assert 'trained_at' in metadata
        # rm tmp files and dirs
        shutil.rmtree(config['path'], ignore_errors=False)
示例#7
0
 def ignore_test_load_local_data(self):
     """
     test load local json format data
     :return:
     """
     tmp_path = create_tmp_test_jsonfile("tmp.json")
     train_data = load_local_data(tmp_path)
     rm_tmp_file("tmp.json")
     assert train_data is not None
     assert len(train_data.training_examples) == 1000
     assert "text" not in train_data.training_examples[0].data
     assert "label" in train_data.training_examples[0].data
 def ignore_test_load_local_data(self):
     """
     test load local json format data
     :return:
     """
     tmp_path = create_tmp_test_jsonfile("tmp.json")
     train_data = load_local_data(tmp_path)
     rm_tmp_file("tmp.json")
     assert train_data is not None
     assert len(train_data.training_examples) == 1000
     assert "text" not in train_data.training_examples[0].data
     assert "label" in train_data.training_examples[0].data
示例#9
0
    def ignore_test_train_model_empty_pipeline(self):
        """
        train model with no component
        :return:
        """
        test_config = "tests/data/test_config/test_config.json"
        config = AnnotatorConfig(test_config)
        config['pipeline'] = []

        tmp_path = create_tmp_test_jsonfile("tmp.json")
        train_data = load_local_data(tmp_path)
        rm_tmp_file("tmp.json")

        with pytest.raises(ValueError):
            trainer = Trainer(config)
            trainer.train(train_data)
示例#10
0
    def ignore_test_train_model_empty_pipeline(self):
        """
        train model with no component
        :return:
        """
        test_config = "tests/data/test_config/test_config.json"
        config = AnnotatorConfig(test_config)
        config['pipeline'] = []

        tmp_path = create_tmp_test_jsonfile("tmp.json")
        train_data = load_local_data(tmp_path)
        rm_tmp_file("tmp.json")

        with pytest.raises(ValueError):
            trainer = Trainer(config)
            trainer.train(train_data)
示例#11
0
    def ignore_test_handles_pipeline_with_non_existing_component(self):
        """
        handle no exist component in pipeline
        :return:
        """
        test_config = "tests/data/test_config/test_config.json"
        config = AnnotatorConfig(test_config)
        config['pipeline'].append("unknown_component")

        tmp_path = create_tmp_test_jsonfile("tmp.json")
        train_data = load_local_data(tmp_path)
        rm_tmp_file("tmp.json")

        with pytest.raises(Exception) as execinfo:
            trainer = Trainer(config)
            trainer.train(train_data)
        assert "Failed to find component" in str(execinfo.value)
示例#12
0
    def ignore_test_handles_pipeline_with_non_existing_component(self):
        """
        handle no exist component in pipeline
        :return:
        """
        test_config = "tests/data/test_config/test_config.json"
        config = AnnotatorConfig(test_config)
        config['pipeline'].append("unknown_component")

        tmp_path = create_tmp_test_jsonfile("tmp.json")
        train_data = load_local_data(tmp_path)
        rm_tmp_file("tmp.json")

        with pytest.raises(Exception) as execinfo:
            trainer = Trainer(config)
            trainer.train(train_data)
        assert "Failed to find component" in str(execinfo.value)
 def setup_class(cls):
     """ setup any state specific to the execution of the given class (which
     usually contains tests).
     """
     # create test data for test case
     create_tmp_test_jsonfile("test_data.json")
 def setup_class(cls):
     """ setup any state specific to the execution of the given class (which
     usually contains tests).
     """
     # create test data for test case
     create_tmp_test_jsonfile("test_data.json")