Пример #1
0
def train(train_loader, model: seq2seq.Seq2seq, criterion, optimizer, epoch, teacher_forcing_ratio):
    """Run one train epoch"""
    losses = AverageMeter()

    # Switch to train mode
    model.train()

    for i, batch in enumerate(train_loader):
        # data: seq_len, N
        # data_mask: seq_len, N
        # target: seq_len, N
        data, data_mask, target = batch
        target = target.cuda(non_blocking=True)
        data_mask = data_mask.cuda(non_blocking=True)
        data = data.cuda()

        batch_size = data.size(1)
        target_len = target.size(0)
        
        # Forward
        # Encoder
        source_hs, hidden = model.encoder(data)
        # Decoder
        ctx = None
        hidden = model.transformHidden(hidden)
        
        outputs = []
        use_teacher_forcing = random.random() < teacher_forcing_ratio
        x = target[0]
        for j in range(1, target_len):
            output, hidden, ctx = model.decoder(x, hidden, ctx, source_hs, data_mask)
            outputs.append(output)

            with torch.no_grad():
                if use_teacher_forcing:
                    x = target[j]
                else:
                    topi = torch.topk(output, 1, dim=1)[1] # N, 1
                    x = topi.squeeze() # N

        outputs = torch.stack(outputs) # seq_len, N, n_tokens
        loss = criterion(outputs, target[1:], batch_size)

        # Backward
        optimizer.zero_grad()
        loss.backward()

        # Update
        torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
        optimizer.step()

        # Measure loss
        losses.update(loss.item(), batch_size)

        # Print Training Information
        if i % args.print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})'.format(
                      epoch, i, len(train_loader), loss=losses))
Пример #2
0
from seq2seq import Seq2seq as seq
# 训练
import tensorflow as tf
import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
char_inputs = [[2,1],[1,2],[2,3],[3,4],[4,0]]

used = tf.sign(tf.abs(char_inputs))
length = tf.reduce_sum(used, reduction_indices=0)
lengths = tf.cast(length, tf.int32)


sess = tf.Session()
print(sess.run(lengths))

# 训练
seq.train()
# 预测
seq.predict("天气")
# 重新训练
seq.retrain()