Пример #1
0
def train(cfg, log_path=None):
    torch.backends.cudnn.benchmark = True

    def rein_loss(model, inputs, bs, t, device):
        inputs = list(map(lambda x: x.to(device), inputs))
        L, ll = model(inputs, decode_type='sampling')
        b = bs[t] if bs is not None else baseline.eval(inputs, L)
        return ((L - b.to(device)) * ll).mean(), L.mean()

    model = AttentionModel(cfg.embed_dim, cfg.n_encode_layers, cfg.n_heads,
                           cfg.tanh_clipping)
    model.train()
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    baseline = RolloutBaseline(model, cfg.task, cfg.weight_dir,
                               cfg.n_rollout_samples, cfg.embed_dim,
                               cfg.n_customer, cfg.warmup_beta, cfg.wp_epochs,
                               device)
    optimizer = optim.Adam(model.parameters(), lr=cfg.lr)

    t1 = time()
    for epoch in range(cfg.epochs):
        ave_loss, ave_L = 0., 0.
        dataset = Generator(cfg.batch * cfg.batch_steps, cfg.n_customer)

        bs = baseline.eval_all(dataset)
        bs = bs.view(
            -1, cfg.batch
        ) if bs is not None else None  # bs: (cfg.batch_steps, cfg.batch) or None

        dataloader = DataLoader(dataset, batch_size=cfg.batch, shuffle=True)
        for t, inputs in enumerate(dataloader):

            loss, L_mean = rein_loss(model, inputs, bs, t, device)
            optimizer.zero_grad()
            loss.backward()
            # print('grad: ', model.Decoder.Wk1.weight.grad[0][0])
            # https://github.com/wouterkool/attention-learn-to-route/blob/master/train.py
            nn.utils.clip_grad_norm_(model.parameters(),
                                     max_norm=1.0,
                                     norm_type=2)
            optimizer.step()

            ave_loss += loss.item()
            ave_L += L_mean.item()

            if t % (cfg.batch_verbose) == 0:
                t2 = time()
                print(
                    'Epoch %d (batch = %d): Loss: %1.3f L: %1.3f, %dmin%dsec' %
                    (epoch, t, ave_loss / (t + 1), ave_L / (t + 1),
                     (t2 - t1) // 60, (t2 - t1) % 60))
                if cfg.islogger:
                    if log_path is None:
                        log_path = '%s%s_%s.csv' % (
                            cfg.log_dir, cfg.task, cfg.dump_date
                        )  #cfg.log_dir = ./Csv/
                        with open(log_path, 'w') as f:
                            f.write('time,epoch,batch,loss,cost\n')
                    with open(log_path, 'a') as f:
                        f.write('%dmin%dsec,%d,%d,%1.3f,%1.3f\n' %
                                ((t2 - t1) // 60,
                                 (t2 - t1) % 60, epoch, t, ave_loss /
                                 (t + 1), ave_L / (t + 1)))
                t1 = time()

        baseline.epoch_callback(model, epoch)
        torch.save(model.state_dict(),
                   '%s%s_epoch%s.pt' % (cfg.weight_dir, cfg.task, epoch))
Пример #2
0
    # add padding
    train_tokens = data.add_padding(train_tokens, max)
    test_tokens = data.add_padding(test_tokens, max)
    # convert2vec
    train_tokens, train_tags = data.convert2vec(train_tokens, train_tags, word2idx, tag2idx)
    test_tokens, test_tags = data.convert2vec(test_tokens, test_tags, word2idx=word2idx, tag2idx = tag2idx)
    # dataset
    train_dataset = myDataSet(train_tokens, train_tags, train_seqlen)
    test_dataset = myDataSet(test_tokens, test_tags, test_seqlen)
    # dataloader
    train_data = DataLoader(train_dataset, batch_size=args.batch_size)
    test_data = DataLoader(test_dataset, batch_size=args.batch_size)

    # model
    baseModel = baseModel(vocab_size=vocab_size, embedding_dim=args.embedding_dim, hidden_dim=args.hidden_dim, tag2idx=tag2idx,
                          batch_size=args.batch_size, use_gpu=use_gpu, idx2word=idx2word, emb_path=emb_path)
    attentionModel = AttentionModel(vocab_size=vocab_size, embedding_dim=args.embedding_dim, hidden_dim=args.hidden_dim, tag2idx=tag2idx,
                          batch_size=args.batch_size, use_gpu=use_gpu, idx2word=idx2word, emb_path=emb_path)
    optimizer = optim.Adam(attentionModel.parameters(), lr=args.lr)

    # trainer
    if args.mode == 'base':
        myTrainer = trainer(model=baseModel, train_dataloader=train_data, test_dataloader=test_data, optimizer=optimizer,
                            epochs=args.epochs, word2idx=word2idx, tag2idx=tag2idx, idx2word=idx2word, idx2tag=idx2tag, use_gpu=use_gpu)
    if args.mode == 'attention':
        myTrainer = trainer(model=attentionModel, train_dataloader=train_data, test_dataloader=test_data, optimizer=optimizer,
                            epochs=args.epochs, word2idx=word2idx, tag2idx=tag2idx, idx2word=idx2word, idx2tag=idx2tag, use_gpu=use_gpu)
    else:
        print('not right mode')
    myTrainer.train()