def train(train_loader, model: seq2seq.Seq2seq, criterion, optimizer, epoch, teacher_forcing_ratio): """Run one train epoch""" losses = AverageMeter() # Switch to train mode model.train() for i, batch in enumerate(train_loader): # data: seq_len, N # data_mask: seq_len, N # target: seq_len, N data, data_mask, target = batch target = target.cuda(non_blocking=True) data_mask = data_mask.cuda(non_blocking=True) data = data.cuda() batch_size = data.size(1) target_len = target.size(0) # Forward # Encoder source_hs, hidden = model.encoder(data) # Decoder ctx = None hidden = model.transformHidden(hidden) outputs = [] use_teacher_forcing = random.random() < teacher_forcing_ratio x = target[0] for j in range(1, target_len): output, hidden, ctx = model.decoder(x, hidden, ctx, source_hs, data_mask) outputs.append(output) with torch.no_grad(): if use_teacher_forcing: x = target[j] else: topi = torch.topk(output, 1, dim=1)[1] # N, 1 x = topi.squeeze() # N outputs = torch.stack(outputs) # seq_len, N, n_tokens loss = criterion(outputs, target[1:], batch_size) # Backward optimizer.zero_grad() loss.backward() # Update torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) optimizer.step() # Measure loss losses.update(loss.item(), batch_size) # Print Training Information if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})'.format( epoch, i, len(train_loader), loss=losses))
from seq2seq import Seq2seq as seq # 训练 import tensorflow as tf import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' char_inputs = [[2,1],[1,2],[2,3],[3,4],[4,0]] used = tf.sign(tf.abs(char_inputs)) length = tf.reduce_sum(used, reduction_indices=0) lengths = tf.cast(length, tf.int32) sess = tf.Session() print(sess.run(lengths)) # 训练 seq.train() # 预测 seq.predict("天气") # 重新训练 seq.retrain()