示例#1
0
def foo():
    loader = TokenizeDatasetLoader("./data_for_tests/cws_pku_utf_8")
    train_data = loader.load_pku()

    train_args = ConfigSection()
    ConfigLoader("config.cfg").load_config("./data_for_tests/config",
                                           {"POS": train_args})

    # Preprocessor
    p = SeqLabelPreprocess()
    train_data = p.run(train_data)
    train_args["vocab_size"] = p.vocab_size
    train_args["num_classes"] = p.num_classes

    model = SeqLabeling(train_args)

    valid_args = {
        "save_output": True,
        "validate_in_training": True,
        "save_dev_input": True,
        "save_loss": True,
        "batch_size": 8,
        "pickle_path": "./data_for_tests/",
        "use_cuda": True
    }
    validator = SeqLabelTester(**valid_args)

    print("start validation.")
    validator.test(model, train_data)
    print(validator.show_metrics())
示例#2
0
def train_test():
    # Config Loader
    train_args = ConfigSection()
    ConfigLoader("config.cfg").load_config("./data_for_tests/config",
                                           {"POS": train_args})

    # Data Loader
    loader = TokenizeDatasetLoader(cws_data_path)
    train_data = loader.load_pku()

    # Preprocessor
    p = SeqLabelPreprocess()
    data_train = p.run(train_data, pickle_path=pickle_path)
    train_args["vocab_size"] = p.vocab_size
    train_args["num_classes"] = p.num_classes

    # Trainer
    trainer = SeqLabelTrainer(**train_args.data)

    # Model
    model = SeqLabeling(train_args)

    # Start training
    trainer.train(model, data_train)
    print("Training finished!")

    # Saver
    saver = ModelSaver("./data_for_tests/saved_model.pkl")
    saver.save_pytorch(model)
    print("Model saved!")

    del model, trainer, loader

    # Define the same model
    model = SeqLabeling(train_args)

    # Dump trained parameters into the model
    ModelLoader.load_pytorch(model, "./data_for_tests/saved_model.pkl")
    print("model loaded!")

    # Load test configuration
    test_args = ConfigSection()
    ConfigLoader("config.cfg").load_config("./data_for_tests/config",
                                           {"POS_test": test_args})

    # Tester
    tester = SeqLabelTester(**test_args.data)

    # Start testing
    tester.test(model, data_train)

    # print test results
    print(tester.show_metrics())
    print("model tested!")
示例#3
0
    def test_case_1(self):
        data = [
            [['Tom', 'and', 'Jerry', '.'], ['n', '&', 'n', '.']],
            [['Hello', 'world', '!'], ['a', 'n', '.']],
            [['Tom', 'and', 'Jerry', '.'], ['n', '&', 'n', '.']],
            [['Hello', 'world', '!'], ['a', 'n', '.']],
            [['Tom', 'and', 'Jerry', '.'], ['n', '&', 'n', '.']],
            [['Hello', 'world', '!'], ['a', 'n', '.']],
            [['Tom', 'and', 'Jerry', '.'], ['n', '&', 'n', '.']],
            [['Hello', 'world', '!'], ['a', 'n', '.']],
            [['Tom', 'and', 'Jerry', '.'], ['n', '&', 'n', '.']],
            [['Hello', 'world', '!'], ['a', 'n', '.']],
        ]

        if os.path.exists("./save"):
            for root, dirs, files in os.walk("./save", topdown=False):
                for name in files:
                    os.remove(os.path.join(root, name))
                for name in dirs:
                    os.rmdir(os.path.join(root, name))
        result = SeqLabelPreprocess().run(train_dev_data=data,
                                          train_dev_split=0.4,
                                          pickle_path="./save")
        result = SeqLabelPreprocess().run(train_dev_data=data,
                                          train_dev_split=0.4,
                                          pickle_path="./save")
        if os.path.exists("./save"):
            for root, dirs, files in os.walk("./save", topdown=False):
                for name in files:
                    os.remove(os.path.join(root, name))
                for name in dirs:
                    os.rmdir(os.path.join(root, name))
        result = SeqLabelPreprocess().run(test_data=data,
                                          train_dev_data=data,
                                          pickle_path="./save",
                                          train_dev_split=0.4,
                                          cross_val=True)
        result = SeqLabelPreprocess().run(test_data=data,
                                          train_dev_data=data,
                                          pickle_path="./save",
                                          train_dev_split=0.4,
                                          cross_val=True)
示例#4
0
def train():
    # Config Loader
    train_args = ConfigSection()
    test_args = ConfigSection()
    ConfigLoader("good_path").load_config(cfgfile, {
        "train": train_args,
        "test": test_args
    })

    # Data Loader
    loader = TokenizeDatasetLoader(cws_data_path)
    train_data = loader.load_pku()

    # Preprocessor
    preprocessor = SeqLabelPreprocess()
    data_train, data_dev = preprocessor.run(train_data,
                                            pickle_path=pickle_path,
                                            train_dev_split=0.3)
    train_args["vocab_size"] = preprocessor.vocab_size
    train_args["num_classes"] = preprocessor.num_classes

    # Trainer
    trainer = SeqLabelTrainer(**train_args.data)

    # Model
    model = AdvSeqLabel(train_args)
    try:
        ModelLoader.load_pytorch(model, "./save/saved_model.pkl")
        print('model parameter loaded!')
    except Exception as e:
        print("No saved model. Continue.")
        pass

    # Start training
    trainer.train(model, data_train, data_dev)
    print("Training finished!")

    # Saver
    saver = ModelSaver("./save/saved_model.pkl")
    saver.save_pytorch(model)
    print("Model saved!")
示例#5
0
    def test(self):
        num_folds = 2
        result = SeqLabelPreprocess().run(test_data=None,
                                          train_dev_data=data,
                                          pickle_path="./save",
                                          train_dev_split=0.4,
                                          cross_val=True,
                                          n_fold=num_folds)
        self.assertEqual(len(result), 2)
        self.assertEqual(len(result[0]), num_folds)
        self.assertEqual(len(result[1]), num_folds)
        for data_set in result[0] + result[1]:
            self.assertEqual(type(data_set), DataSet)

        os.system("rm -rf save")
        print("pickle path deleted")
示例#6
0
    def test(self):
        if os.path.exists("./save"):
            for root, dirs, files in os.walk("./save", topdown=False):
                for name in files:
                    os.remove(os.path.join(root, name))
                for name in dirs:
                    os.rmdir(os.path.join(root, name))
        result = SeqLabelPreprocess().run(train_dev_data=data,
                                          train_dev_split=0.4,
                                          pickle_path="./save")
        self.assertEqual(len(result), 2)
        self.assertEqual(type(result[0]), DataSet)
        self.assertEqual(type(result[1]), DataSet)

        os.system("rm -rf save")
        print("pickle path deleted")
示例#7
0
def train_and_test():
    # Config Loader
    trainer_args = ConfigSection()
    model_args = ConfigSection()
    ConfigLoader("config.cfg").load_config(config_dir, {
        "test_seq_label_trainer": trainer_args,
        "test_seq_label_model": model_args
    })

    # Data Loader
    pos_loader = POSDatasetLoader(data_path)
    train_data = pos_loader.load_lines()

    # Preprocessor
    p = SeqLabelPreprocess()
    data_train, data_dev = p.run(train_data,
                                 pickle_path=pickle_path,
                                 train_dev_split=0.5)
    model_args["vocab_size"] = p.vocab_size
    model_args["num_classes"] = p.num_classes

    # Trainer: two definition styles
    # 1
    # trainer = SeqLabelTrainer(trainer_args.data)

    # 2
    trainer = SeqLabelTrainer(
        epochs=trainer_args["epochs"],
        batch_size=trainer_args["batch_size"],
        validate=trainer_args["validate"],
        use_cuda=trainer_args["use_cuda"],
        pickle_path=pickle_path,
        save_best_dev=trainer_args["save_best_dev"],
        model_name=model_name,
        optimizer=Optimizer("SGD", lr=0.01, momentum=0.9),
    )

    # Model
    model = SeqLabeling(model_args)

    # Start training
    trainer.train(model, data_train, data_dev)
    print("Training finished!")

    # Saver
    saver = ModelSaver(os.path.join(pickle_path, model_name))
    saver.save_pytorch(model)
    print("Model saved!")

    del model, trainer, pos_loader

    # Define the same model
    model = SeqLabeling(model_args)

    # Dump trained parameters into the model
    ModelLoader.load_pytorch(model, os.path.join(pickle_path, model_name))
    print("model loaded!")

    # Load test configuration
    tester_args = ConfigSection()
    ConfigLoader("config.cfg").load_config(
        config_dir, {"test_seq_label_tester": tester_args})

    # Tester
    tester = SeqLabelTester(save_output=False,
                            save_loss=False,
                            save_best_dev=False,
                            batch_size=4,
                            use_cuda=False,
                            pickle_path=pickle_path,
                            model_name="seq_label_in_test.pkl",
                            print_every_step=1)

    # Start testing with validation data
    tester.test(model, data_dev)

    # print test results
    print(tester.show_metrics())
    print("model tested!")