Exemplo n.º 1
0
def main():
    args = get_args()
    src_vocab_size = len(
        pickle.load(open(args.data_bin + '/dict' + "." + args.src_lang,
                         'rb')).keys())
    tgt_vocab_size = len(
        pickle.load(open(args.data_bin + '/dict' + '.' + args.tgt_lang,
                         'rb')).keys())
    device = 'cuda'
    model = Transformer(src_vocab_size=src_vocab_size,
                        tgt_vocab_size=tgt_vocab_size,
                        encoder_layer_num=args.encoder_layer_num,
                        decoder_layer_num=args.decoder_layer_num,
                        hidden_size=args.hidden_size,
                        feedback_size=args.feedback,
                        num_head=args.num_head,
                        dropout=args.dropout,
                        device=device)
    optim = Optim(Adam(model.parameters(), betas=(0.9, 0.98), eps=1e-9),
                  warmup_step=4000,
                  d_model=args.hidden_size)
    train_loader = DataLoader(Dataload(args.data_bin + '/' + 'train',
                                       args.src_lang, args.tgt_lang),
                              batch_size=args.batch_size,
                              collate_fn=collate_fn,
                              shuffle=True)
    # optim = Adam(model.parameters(), lr=5e-6)
    test_loader = DataLoader(Dataload(args.data_bin + '/' + 'test',
                                      args.src_lang, args.tgt_lang),
                             batch_size=args.batch_size,
                             collate_fn=collate_fn)
    valid_loader = DataLoader(Dataload(args.data_bin + '/' + 'valid',
                                       args.src_lang, args.tgt_lang),
                              batch_size=args.batch_size,
                              collate_fn=collate_fn)
    best_loss = 1e4
    model = model.to(device)
    # model.load_state_dict(torch.load('best_model.pkl'))
    for i in range(args.epoch):
        train(i, model, data_loader=train_loader, optim=optim, device=device)
        with torch.no_grad():
            best_loss = eval(i, model, valid_loader, best_loss, device)
Exemplo n.º 2
0
args = parser.parse_args()

src_dict = pickle.load(
    open(add_(args.bin_path) + 'dict.' + args.src_lang, 'rb'))
trg_dict = pickle.load(
    open(add_(args.bin_path) + 'dict.' + args.tgt_lang, 'rb'))
model = Transformer(src_vocab_size=len(src_dict.keys()),
                    tgt_vocab_size=len(trg_dict.keys()),
                    encoder_layer_num=6,
                    decoder_layer_num=6,
                    hidden_size=512,
                    feedback_size=2048,
                    num_head=8,
                    dropout=0.1,
                    device=device)
model = model.to(device)
model.load_state_dict(torch.load(args.model_path))
dataload = DataLoader(Dataload(add_(args.bin_path) + 'test',
                               src=args.src_lang,
                               trg=args.tgt_lang),
                      batch_size=32,
                      collate_fn=collate_fn)
real = []
predict = []
pbtr = tqdm(total=len(dataload))
with torch.no_grad():
    model.eval()
    for src, trg in dataload:
        src = src.to(device)
        predicts = beamsearch(model, src, 1, 100, device=device)