Exemplo n.º 1
0
def batch_dataset(args):
    # read data from configuration file
    config = Parameters.from_config(args.path)

    # load entire dataset
    train_data = DataLoader.from_files(
        config.data.src_train,
        config.data.tgt_train,
        config.model.max_length,
        config.training.batch_size
    )

    outputfile = Path(args.output)
    with open(outputfile, "w", encoding="utf-8") as ofile:
        for i, batch in enumerate(train_data):
            for src, tgt in zip(*batch):
                s_sen = " ".join(src)
                t_sen = " ".join(tgt)
                ofile.write(f"{s_sen}\t{t_sen}\n")

            # print progress
            print(f"Batching dataset: {i}/{len(train_data)}", end="\r")

    print(" " * 50, end="\r")
    print("Batching dataset: complete")
Exemplo n.º 2
0
def train(args):
    # extract arguments
    resume = args.resume
    batched = args.batched
    params = Parameters.from_config(args.path)

    # initialize trainer
    trainer = Trainer(resume, batched, params)
    trainer.read_data()
    trainer.create_model()
    trainer.train_loop()
    trainer.save_model()