def registe_curves(self): self.agent = Agent(username='', password='', address='127.0.0.1', port=self.port) loss_D_exp = {self.exp: "D loss: D predicts samples' attributes"} loss_E_exp = {self.exp: 'E loss: E encodes samples'} loss_M_exp = {self.exp: 'M loss: M classifies samples'} acc_A_exp = {self.exp: 'Categorization accuracy on data A'} acc_B_exp = {self.exp: 'Categorization accuracy on data B'} pre_loss_E_exp = {self.exp: 'Pretrain E loss: E encodes samples'} pre_loss_M_exp = {self.exp: 'Pretrain M loss: M classifies samples'} pre_acc_A_exp = { self.exp: 'Pretrain categorization accuracy on data A' } pre_acc_B_exp = { self.exp: 'Pretrain categorization accuracy on data B' } lr_exp = {self.exp: 'Learning rate at training phase(log scale)'} pre_lr_exp = { self.exp: 'Learning rate at pretraining phase(log scale)' } self.d_loss = self.agent.register(loss_D_exp, 'D loss', overwrite=True) self.e_loss = self.agent.register(loss_E_exp, 'E loss', overwrite=True) self.m_loss = self.agent.register(loss_M_exp, 'M loss', overwrite=True) self.acc_A = self.agent.register(acc_A_exp, 'acc', overwrite=True) self.acc_B = self.agent.register(acc_B_exp, 'acc', overwrite=True) self.pre_e_loss = self.agent.register(pre_loss_E_exp, 'E loss', overwrite=True) self.pre_m_loss = self.agent.register(pre_loss_M_exp, 'M loss', overwrite=True) self.pre_acc_A = self.agent.register(pre_acc_A_exp, 'acc', overwrite=True) self.pre_acc_B = self.agent.register(pre_acc_B_exp, 'acc', overwrite=True) self.tlr = self.agent.register(lr_exp, 'lr', overwrite=True) self.plr = self.agent.register(pre_lr_exp, 'lr', overwrite=True)
def train(model_path,train_batch_size,validate_batch_size,validate_batch_num,resize,train_gpu,validate_gpu=-1): # train_gpu = 0 # validate_gpu = 1 # model_path = '../amazon2/alexnet' # train_batch_size = 256 # validate_batch_size = 128 # validate_batch_num = 8 # parameters k=5 epochs = 1 lr = 1e-4 weight_decay = 0 momentum = 0.9 criteria2metric = { 'train loss': 'loss', 'valid loss': 'loss' } hyperparameters_train = { 'name':'train', 'learning rate': lr, 'batch size': train_batch_size, 'optimizer': 'Adam', 'momentum': 0, 'net':model_path.split('/')[-1], 'epoch':'No.1', } hyperparameters_validate = { 'name':'validate', 'learning rate': lr, 'batch size': train_batch_size, 'optimizer': 'Adam', 'momentum': 0, 'net':model_path.split('/')[-1], 'epoch': 'No.1', } agent = Agent(username='******',password='******') train_loss_show = agent.register(hyperparameters_train, criteria2metric['train loss']) validate_loss_show = agent.register(hyperparameters_validate, criteria2metric['valid loss']) global_step = 0 with open('kdf.pkl', 'rb') as f: kfold = pickle.load(f,encoding='latin1') loss_info = [] # 第i个记录了 fold i 的最小(train_loss,validate_loss) for fold in range(k): train_index = kfold[fold][0] validate_index = kfold[fold][1] model = AM_alex() if model.getname()!=model_path.split('/')[-1]: print('Wrong Model!') return model.cuda(device_id=train_gpu) optimizer = torch.optim.Adam(model.parameters(), lr=lr,weight_decay=weight_decay) dset_train = AmazonDateset_train(train_index,IMG_TRAIN_PATH,IMG_EXT,LABEL_PATH,resize=resize) train_loader = DataLoader(dset_train, batch_size=train_batch_size, shuffle=True, num_workers=6) min_loss = [0.9,0.9] for epoch in range(epochs): print('--------------Epoch %d: train-----------' % epoch) model.train() for step, (data, target) in enumerate(train_loader): data, target = Variable(data), Variable(target) data = data.cuda(device_id=train_gpu) target = target.cuda(device_id=train_gpu) optimizer.zero_grad() output = model(data) # print(output.size()) loss = F.binary_cross_entropy(output, target) loss.backward() optimizer.step() agent.append(train_loss_show, global_step, loss.data[0]) global_step += 1 if step % 10 == 0: model.eval() if validate_gpu != -1: model.cuda(validate_gpu) dset_validate = AmazonDateset_validate(validate_index, IMG_TRAIN_PATH, IMG_EXT, LABEL_PATH,random_transform=True,resize=resize) validate_loader = DataLoader(dset_validate, batch_size=validate_batch_size, shuffle=True, num_workers=6) total_vloss = 0 for vstep, (vdata, vtarget) in enumerate(validate_loader): vdata, vtarget = Variable(vdata), Variable(vtarget) if validate_gpu != -1: vdata = vdata.cuda(validate_gpu) vtarget = vtarget.cuda(validate_gpu) else: vdata = vdata.cuda(train_gpu) vtarget = vtarget.cuda(train_gpu) voutput = model(vdata) vloss = F.binary_cross_entropy(voutput, vtarget) total_vloss += vloss.data[0] if vstep == (validate_batch_num-1): break vloss = total_vloss / validate_batch_num model.train() if validate_gpu != -1: model.cuda(train_gpu) agent.append(validate_loss_show, global_step, vloss) print('{} Fold{} Epoch{} Step{}: [{}/{} ({:.0f}%)]\tTrain Loss: {:.6f}\tValidate Loss: {:.6f}'.format(model_path.split('/')[-1],fold, epoch,global_step, step * train_batch_size, len(train_loader.dataset), 100. * step / len(train_loader), loss.data[0],vloss)) if vloss<min_loss[1]: min_loss[1] = vloss min_loss[0] = loss.data[0] model_save = copy.deepcopy(model) torch.save(model_save.cpu(), os.path.join(model_path,'fold%d.mod'%(fold))) loss_info.append(min_loss) print('-----------------------------------------') print(model_path.split('/')[-1]+':') for i,l in enumerate(loss_info): print('Fold%d: Train loss:%f\tValidate loss:%f'%(i,l[0],l[1])) with open(os.path.join(model_path,'train_loss_info.pkl'),'wb') as f: pickle.dump(loss_info,f)
def train(lr, net, epoch, train_loader, valid_loader, transform, hyperparameters, batch_size): # register hypercurve agent = Agent(port=5001) hyperparameters['criteria'] = 'train loss' train_loss = agent.register(hyperparameters, 'loss') hyperparameters['criteria'] = 'valid loss' valid_loss = agent.register(hyperparameters, 'loss') hyperparameters['criteria'] = 'valid bleu' valid_bleu = agent.register(hyperparameters, 'bleu') hyperparameters['criteria'] = 'train bleu' train_bleu = agent.register(hyperparameters, 'bleu') hyperparameters['criteria'] = 'scheduled sampling probability' hyper_ssprob = agent.register(hyperparameters, 'probability') if torch.cuda.is_available(): net.cuda() optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, net.parameters()), lr=lr) net.train() best_score = -1 global_steps = 0 best_valid_loss = 10000 for iepoch in range(epoch): new_epoch = False batchid = 0 for (_, data) in enumerate(train_loader, 0): entext = data['entext'] enlen = data['enlen'] zhlabel = data['zhlabel'] zhgtruth = data['zhgtruth'] zhlen = data['zhlen'] ssprob = max(math.exp(-(global_steps - 100000) / 500000), 0.8) print('scheduled sampling pro: ', ssprob) logits, predic = net(entext, zhgtruth, enlen, ssprob, True) loss = net.get_loss(logits, zhlabel) optimizer.zero_grad() loss.backward() utils.clip_grad_norm(net.parameters(), 5) optimizer.step() batchid += 1 global_steps += 1 print(global_steps, iepoch, batchid, sum(loss.data.cpu().numpy())) agent.append(train_loss, global_steps, sum(loss.data.cpu().numpy())) agent.append(hyper_ssprob, global_steps, ssprob) if batchid % 50 == 0: net.eval() logits, predic = net(entext, zhgtruth, enlen, ssprob, True) tmppre = [0 for i in range(len(entext))] tmplabel = [0 for i in range(len(entext))] for i in range(len(entext)): tmppre[i] = transform.clip(predic[i], language='zh') tmplabel[i] = zhlabel[i][:zhlen[i]] tmpscore = bleuscore.score(tmppre, tmplabel) for i in range(25): ans_ = transform.i2t(tmplabel[i], language='zh') pre_ = transform.i2t(tmppre[i], language='zh') print(ans_) print(pre_) print('-------------------\n') agent.append(train_bleu, global_steps, tmpscore) del logits, predic if batchid % 400 == 0: print('\n------------------------\n') net.eval() all_pre = [] all_lable = [] all_len = [] all_loss = 0 bats = 0 for (_, data) in enumerate(valid_loader, 0): entext = data['entext'] enlen = data['enlen'] zhlabel = data['zhlabel'] zhgtruth = data['zhgtruth'] zhlen = data['zhlen'] logits, predic = net(entext, zhgtruth, enlen, 0, False) loss = net.get_loss(logits, zhlabel) all_pre.extend(predic) all_lable.extend(zhlabel) all_len.extend(zhlen) all_loss += sum(loss.data.cpu().numpy()) del loss, logits, predic bats += 1 for i in range(len(all_pre)): all_pre[i] = transform.clip(all_pre[i], language='zh') all_lable[i] = all_lable[i][:all_len[i]] score = bleuscore.score(all_pre, all_lable) for i in range(0, 600, 6): ans_ = transform.i2t(all_lable[i], language='zh') pre_ = transform.i2t(all_pre[i], language='zh') print(ans_) print(pre_) print('-------------------\n') all_loss /= bats print(global_steps, iepoch, batchid, all_loss, score, '\n********************\n') agent.append(valid_loss, global_steps, all_loss) agent.append(valid_bleu, global_steps, score) if best_valid_loss > all_loss or best_score < score: best_valid_loss = all_loss bestscore = score torch.save( net.state_dict(), model_dir + "ssprob-{:3f}-loss-{:3f}-steps-{:d}-model.pkl".format( ssprob, all_loss, global_steps)) del all_lable, all_len, all_loss, all_pre net.train()
from data import ArchDataset from model import MultimodalModule, AttentionModule from configure import GlobalVariable import numpy as np import math import torch import time from hyperboard import Agent agent = Agent(address='127.0.0.1', port=5001) from zutil.config import Config config = Config(source='parameters.json') print('building model') assert config.model_type in {'rnn', 'cnn', 'simple', 'attention'} if config.model_type == 'attention': model = AttentionModule(config) criterion = torch.nn.BCELoss() else: model = MultimodalModule(config) criterion = torch.nn.CosineEmbeddingLoss() print(model) print(criterion) if config.cuda: model = model.cuda() criterion = criterion.cuda() params = model.parameters() #optimizer = torch.optim.Adam(
episode_num = 300 with open(os.path.join(saved_folder, 'config.pkl'), 'rb') as f: config = pickle.load(f) config.EPSILON = 0.2 np.random.seed(config.RANDOM_SEED) torch.manual_seed(config.RANDOM_SEED) if config.GPU >= 0: torch.cuda.manual_seed(config.RANDOM_SEED) # for debug from hyperboard import Agent HBagent = Agent(username='******', password='******', address='127.0.0.1', port=5002) hp = deepcopy(config.todict()) hp['mode'] = 'test_reward' test_record = HBagent.register(hp, 'reward', overwrite=True) hp['mode'] = 'train_reward' train_r = HBagent.register(hp, 'reward', overwrite=True) env = SimpleBattleEnv( config.ip, config.port, config.MYSELF_NUM, config.ENEMY_NUM, config.ACTION_DIM, config.DISTANCE_FACTOR, config.POSITION_RANGE,
from torch.utils.data import DataLoader import pickle import numpy as np from data import dataset from models import model from torch import optim from config import DefaultConfig import torch.nn.functional as F import copy # from tqdm import tqdm config = DefaultConfig() if config.use_hyperboard: from hyperboard import Agent agent = Agent(username='******', password='******', port=5005) parameter = config.todict() validate_loss_record = agent.register(parameter, 'loss', overwrite=True) train_dataset = dataset.MyDataset() validate_dataset = dataset.MyDataset() criticer = torch.nn.MSELoss() model = model.Model() optimizer = optim.Adam(model.parameters(), lr=config.lr) if config.gpu >= 0: model.cuda(config.gpu) max_loss = 0 no_gain = 0 global_step = 0
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, net.parameters()), lr=lr) net.train() hyperparameters = defaultdict(lambda: 0) hyperparameters['criteria'] = None hyperparameters['dropout'] = dropout hyperparameters['lr'] = lr hyperparameters['batch_size'] = batch_size hyperparameters['hidden_size'] = hidden_size hyperparameters['lambda_m'] = lambda_m hyperparameters['lambda_g'] = lambda_g hyperparameters['lambda_c'] = lambda_c agent = Agent(port=args.port) hyperparameters['criteria'] = 'train match loss' train_match_loss = agent.register(hyperparameters, 'loss') hyperparameters['criteria'] = 'valid match loss' valid_match_loss = agent.register(hyperparameters, 'loss') hyperparameters['criteria'] = 'valid match em' valid_match_em = agent.register(hyperparameters, 'em') hyperparameters['criteria'] = 'valid match f1' valid_match_f1 = agent.register(hyperparameters, 'f1') hyperparameters['criteria'] = 'train generation loss' train_generation_loss = agent.register(hyperparameters, 'loss')
argparser.add_argument('-em', '--embedding_size', type=int, default=100) argparser.add_argument('-nb', '--num_batches', type=int, default=100) argparser.add_argument('-n', '--train_times', type=int, default=1000) argparser.add_argument('-m', '--margin', type=float, default=1.0) argparser.add_argument('-f', '--filter', type=int, default=1) argparser.add_argument('-mo', '--momentum', type=float, default=0.9) argparser.add_argument('-s', '--seed', type=int, default=0) argparser.add_argument('-op', '--optimizer', type=int, default=1) argparser.add_argument('-lo', '--loss_type', type=int, default=0) argparser.add_argument('-p', '--port', type=int, default=5000) argparser.add_argument('-np', '--num_processes', type=int, default=4) args = argparser.parse_args() # Start the hyperboard agent agent = Agent(address='127.0.0.1', port=args.port) if args.seed != 0: torch.manual_seed(args.seed) trainTotal, trainList, trainDict = loadTriple('./data/' + args.dataset, 'train2id.txt') validTotal, validList, validDict = loadTriple('./data/' + args.dataset, 'valid2id.txt') tripleTotal, tripleList, tripleDict = loadTriple('./data/' + args.dataset, 'triple2id.txt') with open( os.path.join('./data/', args.dataset, 'head_tail_proportion.pkl'), 'rb') as fr: head_per_tail = pickle.load(fr) tail_per_head = pickle.load(fr)
import torch.optim as optim import torch.optim.lr_scheduler as lr_scheduler import torch.nn as nn import sys, time, os sys.path.append('./utils') from nets import * from data import * from scipy.misc import imsave import matplotlib as mpl mpl.use('Agg') import matplotlib.pyplot as plt from hyperboard import Agent agent = Agent(username='', password='', address='127.0.0.1', port=5000) d_a_loss = {'CycleGAN': 'adv loss of D_A'} d_b_loss = {'CycleGAN': 'adv loss of D_B'} g_ab_loss = {'CycleGAN': 'adv loss of G_AB'} g_ba_loss = {'CycleGAN': 'adv loss of G_BA'} a_recon_loss = {'CycleGAN': 'reconstruction loss of A (A -> B -> A)'} b_recon_loss = {'CycleGAN': 'reconstruction loss of B (B -> A -> B)'} da_loss = agent.register(d_a_loss, 'loss', overwrite=True) db_loss = agent.register(d_b_loss, 'loss', overwrite=True) g_loss_ab = agent.register(g_ab_loss, 'loss', overwrite=True) g_loss_ba = agent.register(g_ba_loss, 'loss', overwrite=True) g_recon_loss_a = agent.register(a_recon_loss, 'loss', overwrite=True) g_recon_loss_b = agent.register(b_recon_loss, 'loss', overwrite=True) # def sample_z(batch_size, z_dim): # return np.random.uniform(-1., 1., size=[batch_size, z_dim])
'note': 'convertRGB+ReduceLROnPlateau-0.5-20', # ReduceLROnPlateau-0.5-20, MultiStepLR-5,20-0.025, MultiStepLR-10,14,16,18,20,22,24-0.2 'train_time': time_str, } locals().update(config) assert str(RESIZE[0]) in data_path and str(RESIZE[1]) in data_path dir_name = '{}_{}'.format(ENCODER, time_str) save_dir = os.path.join(SAVE_PATH, 'segment', dir_name) if not os.path.exists(save_dir): os.mkdir(save_dir) with open(os.path.join(save_dir, 'config'), 'w') as f: f.write('{}\n\n{}'.format(time_str, utils.config2str(config))) agent = Agent(username='******', password='******', port=5005) train_config = config.copy() train_config['phase'] = 'train' train_loss_record = agent.register(train_config, 'loss', overwrite=True) validate_config = config.copy() validate_config['phase'] = 'validate' validate_loss_record = agent.register(validate_config, 'loss', overwrite=True) lr_config = config.copy() lr_config['phase'] = 'learning rate' lr_record = agent.register(lr_config, 'lr', overwrite=True) train_csv = os.path.join(DATA_PATH, 'train.csv') img_path = os.path.join(DATA_PATH, 'train_images') kfold_path = 'kfold.pkl'
import time import math import json import random from hyperboard import Agent agent = Agent() metric2scale = {'cross entropy': 10, 'accuracy': 1, 'BLEU': 100} criteria2metric = { 'train loss': 'cross entropy', 'valid loss': 'cross entropy', 'train accu': 'accuracy', 'valid accu': 'accuracy', 'test BELU': 'BLEU' } name_list = [] criteria_list = [] offset_list = [] for learning_rate in [0.1, 0.01, 0.001]: for batch_size in [128, 256]: for optimizer in ['SGD', 'Adam']: for criteria in criteria2metric.keys(): for corpus in ['wikipedia', 'PennTreeBank']: hyperparameters = { 'learning rate': learning_rate, 'batch size': batch_size, 'criteria': criteria, 'corpus': corpus, 'optimizer': optimizer,
from model.faster_rcnn_vgg16 import FasterRCNNVGG16 from data.dataset import VOCBboxDataset from torch.utils.data import DataLoader from config import opt # use for debug https://github.com/WarBean/hyperboard if opt.use_hyperboard: from hyperboard import Agent agent = Agent(username='******', password='******', port=5005) loss_record = agent.register({'loss':'total'}, 'loss', overwrite=True) rpn_loc_loss = agent.register({'loss':'rpn_loc'}, 'loss', overwrite=True) rpn_cls_loss = agent.register({'loss':'rpn_cls'}, 'loss', overwrite=True) roi_loc_loss = agent.register({'loss':'roi_loc'}, 'loss', overwrite=True) roi_cls_loss = agent.register({'loss':'roi_cls'}, 'loss', overwrite=True) model = FasterRCNNVGG16(opt) import numpy as np import pickle global_step = 0 record_step = 10 ls = np.zeros((5)) ls_record = {} for epoch in range(opt.epoch): train_dataset = VOCBboxDataset(opt) train_num = len(train_dataset)
net.cuda() optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, net.parameters()), lr = lr) net.train() hyperparameters = defaultdict(lambda:0) hyperparameters['criteria'] = None hyperparameters['dropout'] = dropout hyperparameters['lr'] = lr hyperparameters['batch_size'] = batch_size hyperparameters['hidden_size'] = hidden_size hyperparameters['lambda_m'] = lambda_m hyperparameters['lambda_g'] = lambda_g hyperparameters['lambda_c'] = lambda_c agent = Agent(port=args.port) hyperparameters['criteria'] = 'train match loss' train_match_loss = agent.register(hyperparameters, 'loss') hyperparameters['criteria'] = 'valid match loss' valid_match_loss = agent.register(hyperparameters, 'loss') hyperparameters['criteria'] = 'valid match em' valid_match_em = agent.register(hyperparameters, 'em') hyperparameters['criteria'] = 'valid match f1' valid_match_f1 = agent.register(hyperparameters, 'f1') hyperparameters['criteria'] = 'train generation loss' train_generation_loss = agent.register(hyperparameters, 'loss')
#train_set = np.array(train_set+valid_set, dtype=np.float32) train_set = np.array(train_set, dtype=np.float32) valid_set = np.array(valid_set, dtype=np.float32) print(train_set.shape, valid_set.shape) #hyper para epochs = 100 lr = 0.0005 in_dim = 1109 hidden1_dim = 600 hidden2_dim = 300 criterion_weight = [1, 340] #hyperboard agent = Agent(port=5100) valid_auc_para = { 'name': 'valid_auc', 'in_dim': in_dim, 'hidden1_dim': hidden1_dim, 'hidden2_dim': hidden2_dim, 'lr': lr } train_loss_para = { 'name': 'train_loss', 'in_dim': in_dim, 'hidden1_dim': hidden1_dim, 'hidden2_dim': hidden2_dim, 'lr': lr } valid_auc = agent.register(valid_auc_para, 'y1', overwrite=True)
class AIFL_Digits(object): def __init__(self, E, D, M, data_A, data_B, exp, cuda=True, port=5000): self.E = E self.D = D self.M = M self.data_A = data_A self.data_B = data_B self.exp = exp self.cuda = cuda self.port = port assert self.data_A.channel == self.data_B.channel assert self.data_A.size == self.data_B.size assert self.data_A.n_class == self.data_B.n_class self.channel = self.data_A.channel self.size = self.data_A.size self.registe_curves() if self.cuda: self.E.cuda() self.D.cuda() self.M.cuda() def registe_curves(self): self.agent = Agent(username='', password='', address='127.0.0.1', port=self.port) loss_D_exp = {self.exp: "D loss: D predicts samples' attributes"} loss_E_exp = {self.exp: 'E loss: E encodes samples'} loss_M_exp = {self.exp: 'M loss: M classifies samples'} acc_A_exp = {self.exp: 'Categorization accuracy on data A'} acc_B_exp = {self.exp: 'Categorization accuracy on data B'} pre_loss_E_exp = {self.exp: 'Pretrain E loss: E encodes samples'} pre_loss_M_exp = {self.exp: 'Pretrain M loss: M classifies samples'} pre_acc_A_exp = { self.exp: 'Pretrain categorization accuracy on data A' } pre_acc_B_exp = { self.exp: 'Pretrain categorization accuracy on data B' } lr_exp = {self.exp: 'Learning rate at training phase(log scale)'} pre_lr_exp = { self.exp: 'Learning rate at pretraining phase(log scale)' } self.d_loss = self.agent.register(loss_D_exp, 'D loss', overwrite=True) self.e_loss = self.agent.register(loss_E_exp, 'E loss', overwrite=True) self.m_loss = self.agent.register(loss_M_exp, 'M loss', overwrite=True) self.acc_A = self.agent.register(acc_A_exp, 'acc', overwrite=True) self.acc_B = self.agent.register(acc_B_exp, 'acc', overwrite=True) self.pre_e_loss = self.agent.register(pre_loss_E_exp, 'E loss', overwrite=True) self.pre_m_loss = self.agent.register(pre_loss_M_exp, 'M loss', overwrite=True) self.pre_acc_A = self.agent.register(pre_acc_A_exp, 'acc', overwrite=True) self.pre_acc_B = self.agent.register(pre_acc_B_exp, 'acc', overwrite=True) self.tlr = self.agent.register(lr_exp, 'lr', overwrite=True) self.plr = self.agent.register(pre_lr_exp, 'lr', overwrite=True) def train(self, ckpt_dir, test_A, test_B, init_lr_E=1e-3, init_lr_D=1e-3, init_lr_M=1e-3, \ batch_size=64, training_epochs=50000): x = Variable( torch.FloatTensor(batch_size, self.channel, self.size, self.size)) y = Variable(torch.LongTensor(batch_size)) s = Variable(torch.FloatTensor(batch_size)) att_pred_criterion = nn.BCELoss() cat_criterion = nn.CrossEntropyLoss() if self.cuda: x = x.cuda() y = y.cuda() s = s.cuda() att_pred_criterion = att_pred_criterion.cuda() cat_criterion = cat_criterion.cuda() optimizer_D = optim.Adam(self.D.parameters(), lr=init_lr_D, betas=(0.5, 0.999)) optimizer_E = optim.Adam(self.E.parameters(), lr=init_lr_E, betas=(0.5, 0.999)) optimizer_M = optim.Adam(self.M.parameters(), lr=init_lr_M, betas=(0.5, 0.999)) # scheduler_D = lr_scheduler.StepLR(optimizer_D, step_size=1000, gamma=0.9) # scheduler_E = lr_scheduler.StepLR(optimizer_E, step_size=1000, gamma=0.9) # scheduler_M = lr_scheduler.StepLR(optimizer_M, step_size=1000, gamma=0.9) scheduler_D = lr_scheduler.ReduceLROnPlateau(optimizer_D, mode='max', min_lr=1e-7, patience=5, factor=0.65, verbose=True) scheduler_E = lr_scheduler.ReduceLROnPlateau(optimizer_E, mode='max', min_lr=1e-7, patience=5, factor=0.65, verbose=True) scheduler_M = lr_scheduler.ReduceLROnPlateau(optimizer_M, mode='max', min_lr=1e-7, patience=5, factor=0.65, verbose=True) for epoch in range(training_epochs): # scheduler_D.step() # scheduler_E.step() # scheduler_M.step() begin_time = time.time() # fetch data batch_x_A, batch_y_A = self.data_A(batch_size // 2) batch_x_B, batch_y_B = self.data_B(batch_size - batch_x_A.shape[0]) x.data.copy_( torch.from_numpy(np.concatenate([batch_x_A, batch_x_B]))) y.data.copy_( torch.from_numpy(np.concatenate([batch_y_A, batch_y_B]))) s.data.copy_( torch.from_numpy( np.array([0] * batch_x_A.shape[0] + [1] * batch_x_B.shape[0]))) # update D self.D.zero_grad() h = self.E(x) pred_s = self.D(h.detach()) D_loss = att_pred_criterion(pred_s, s) D_loss.backward() optimizer_D.step() # update E and M self.E.zero_grad() self.M.zero_grad() pred_s = self.D(h) pred_y = self.M(h) M_loss = cat_criterion(pred_y, y) E_loss = -att_pred_criterion(pred_s, s) + M_loss E_loss.backward() optimizer_E.step() optimizer_M.step() # registe data on curves self.agent.append(self.d_loss, epoch, float(D_loss.data[0])) self.agent.append(self.e_loss, epoch, float(E_loss.data[0])) self.agent.append(self.m_loss, epoch, float(M_loss.data[0])) elapsed_time = time.time() - begin_time print('Epoch[%06d], D_loss: %.4f, E_loss: %.4f, M_loss: %.4f, elapsed_time: %.4ssecs.' % \ (epoch+1, D_loss.data[0], E_loss.data[0], M_loss.data[0], elapsed_time)) if epoch % 500 == 0: acc = {'A': 0, 'B': 0} val_data = {'A': test_A, 'B': test_B} for domain in val_data: while val_data[domain].has_next(): batch_x, batch_y = val_data[domain](batch_size) x.data.copy_(torch.from_numpy(batch_x)) n = int(np.sum(batch_y != -1)) acc[domain] += np.sum( np.argmax(self.M(self.E(x)).cpu().data.numpy(), 1) [:n] == batch_y[:n]) acc[domain] /= float(val_data[domain].N) val_data[domain].reset( ) # reset so that next time when evaluates, cursor would start from 0 print('Epoch[%06d], acc_A: %.4f, acc_B: %.4f' % (epoch + 1, acc['A'], acc['B'])) self.agent.append(self.acc_A, epoch, acc['A']) self.agent.append(self.acc_B, epoch, acc['B']) scheduler_D.step((acc['A'] + acc['B']) / 2) scheduler_E.step((acc['A'] + acc['B']) / 2) scheduler_M.step((acc['A'] + acc['B']) / 2) self.agent.append( self.tlr, epoch, float(np.log(optimizer_E.param_groups[0]['lr']))) if epoch % 10000 == 9999 or epoch == training_epochs - 1: torch.save( self.E.state_dict(), os.path.join(ckpt_dir, 'E_epoch-%s.pth' % str(epoch + 1).zfill(6))) torch.save( self.M.state_dict(), os.path.join(ckpt_dir, 'M_epoch-%s.pth' % str(epoch + 1).zfill(6))) torch.save( self.D.state_dict(), os.path.join(ckpt_dir, 'D_epoch-%s.pth' % str(epoch + 1).zfill(6))) def pretrain(self, ckpt_dir, test_A, test_B, init_lr_E=1e-3, init_lr_M=1e-3, batch_size=64, pretrain_epochs=5000): x = Variable( torch.FloatTensor(batch_size, self.channel, self.size, self.size)) y = Variable(torch.LongTensor(batch_size)) cat_criterion = nn.CrossEntropyLoss() if self.cuda: x = x.cuda() y = y.cuda() cat_criterion = cat_criterion.cuda() optimizer_E = optim.Adam(self.E.parameters(), lr=init_lr_E, betas=(0.5, 0.999)) optimizer_M = optim.Adam(self.M.parameters(), lr=init_lr_M, betas=(0.5, 0.999)) # scheduler_E = lr_scheduler.StepLR(optimizer_E, step_size=1000, gamma=0.3) # scheduler_M = lr_scheduler.StepLR(optimizer_M, step_size=1000, gamma=0.3) scheduler_E = lr_scheduler.ReduceLROnPlateau(optimizer_E, mode='max', min_lr=1e-7, patience=5, factor=0.65, verbose=True) scheduler_M = lr_scheduler.ReduceLROnPlateau(optimizer_M, mode='max', min_lr=1e-7, patience=5, factor=0.65, verbose=True) for epoch in range(pretrain_epochs): # scheduler_E.step() # scheduler_M.step() begin_time = time.time() # fetch data batch_x_A, batch_y_A = self.data_A(batch_size // 2) batch_x_B, batch_y_B = self.data_B(batch_size - batch_x_A.shape[0]) x.data.copy_( torch.from_numpy(np.concatenate([batch_x_A, batch_x_B]))) y.data.copy_( torch.from_numpy(np.concatenate([batch_y_A, batch_y_B]))) # update E and M self.E.zero_grad() self.M.zero_grad() h = self.E(x) pred_y = self.M(h) M_loss = cat_criterion(pred_y, y) E_loss = M_loss E_loss.backward() optimizer_E.step() optimizer_M.step() # registe data on curves self.agent.append(self.pre_e_loss, epoch, float(E_loss.data[0])) self.agent.append(self.pre_m_loss, epoch, float(M_loss.data[0])) elapsed_time = time.time() - begin_time print('Pretrain epoch[%06d], E_loss(= M_loss): %.4f, elapsed_time: %.4ssecs.' % \ (epoch+1, E_loss.data[0], elapsed_time)) if epoch % 500 == 0: acc = {'A': 0, 'B': 0} val_data = {'A': test_A, 'B': test_B} for domain in val_data: while val_data[domain].has_next(): batch_x, batch_y = val_data[domain](batch_size) x.data.copy_(torch.from_numpy(batch_x)) n = int(np.sum(batch_y != -1)) acc[domain] += np.sum( np.argmax(self.M(self.E(x)).cpu().data.numpy(), 1) [:n] == batch_y[:n]) acc[domain] /= float(val_data[domain].N) val_data[domain].reset( ) # reset so that next time when evaluates, cursor would start from 0 print('Pretrain epoch[%06d], acc_A: %.4f, acc_B: %.4f' % (epoch + 1, acc['A'], acc['B'])) self.agent.append(self.pre_acc_A, epoch, acc['A']) self.agent.append(self.pre_acc_B, epoch, acc['B']) scheduler_E.step((acc['A'] + acc['B']) / 2) scheduler_M.step((acc['A'] + acc['B']) / 2) self.agent.append( self.plr, epoch, float(np.log(optimizer_E.param_groups[0]['lr']))) if epoch % 10000 == 9999 or epoch == pretrain_epochs - 1: torch.save( self.E.state_dict(), os.path.join( ckpt_dir, 'pretrain_E_epoch-%s.pth' % str(epoch + 1).zfill(6))) torch.save( self.M.state_dict(), os.path.join( ckpt_dir, 'pretrain_M_epoch-%s.pth' % str(epoch + 1).zfill(6)))
def trainer(data='coco', margin=0.2, dim=1024, dim_image=4096, dim_word=300, encoder='gru', max_epochs=15, dispFreq=10, decay_c=0.0, grad_clip=2.0, maxlen_w=150, batch_size=128, saveto='vse/coco', validFreq=100, lrate=0.0002, concat=True, reload_=False): hyper_params = { 'data': data, 'encoder': encoder, 'batch_size': batch_size, 'time': cur_time, 'lrate': lrate, 'concat': concat, } i2t_r1 = dict([('i2t_recall', 'r1')] + hyper_params.items()) i2t_r5 = dict([('i2t_recall', 'r5')] + hyper_params.items()) i2t_r10 = dict([('i2t_recall', 'r10')] + hyper_params.items()) t2i_r1 = dict([('t2i_recall', 'r1')] + hyper_params.items()) t2i_r5 = dict([('t2i_recall', 'r5')] + hyper_params.items()) t2i_r10 = dict([('t2i_recall', 'r10')] + hyper_params.items()) i2t_med = dict([('i2t_med', 'i2t_med')] + hyper_params.items()) t2i_med = dict([('t2i_med', 't2i_med')] + hyper_params.items()) agent = Agent(port=5020) i2t_r1_agent = agent.register(i2t_r1, 'recall', overwrite=True) i2t_r5_agent = agent.register(i2t_r5, 'recall', overwrite=True) i2t_r10_agent = agent.register(i2t_r10, 'recall', overwrite=True) t2i_r1_agent = agent.register(t2i_r1, 'recall', overwrite=True) t2i_r5_agent = agent.register(t2i_r5, 'recall', overwrite=True) t2i_r10_agent = agent.register(t2i_r10, 'recall', overwrite=True) i2t_med_agent = agent.register(i2t_med, 'median', overwrite=True) t2i_med_agent = agent.register(t2i_med, 'median', overwrite=True) # Model options model_options = {} model_options['data'] = data model_options['margin'] = margin model_options['dim'] = dim model_options['dim_image'] = dim_image model_options['dim_word'] = dim_word model_options['encoder'] = encoder model_options['max_epochs'] = max_epochs model_options['dispFreq'] = dispFreq model_options['decay_c'] = decay_c model_options['grad_clip'] = grad_clip model_options['maxlen_w'] = maxlen_w model_options['batch_size'] = batch_size model_options['saveto'] = saveto model_options['validFreq'] = validFreq model_options['lrate'] = lrate model_options['reload_'] = reload_ model_options['concat'] = concat print model_options # reload options if reload_ and os.path.exists(saveto): print 'reloading...' + saveto with open('%s.pkl' % saveto, 'rb') as f: model_options = pkl.load(f) # Load training and development sets print 'loading dataset' train, dev = load_dataset(data)[:2] # Create and save dictionary print 'Create dictionary' worddict = build_dictionary(train[0] + dev[0])[0] n_words = len(worddict) model_options['n_words'] = n_words print 'Dictionary size: ' + str(n_words) with open('%s.dictionary.pkl' % saveto, 'wb') as f: pkl.dump(worddict, f) # Inverse dictionary word_idict = dict() for kk, vv in worddict.iteritems(): word_idict[vv] = kk word_idict[0] = '<eos>' word_idict[1] = 'UNK' model_options['worddict'] = worddict model_options['word_idict'] = word_idict # Each sentence in the minibatch have same length (for encoder) train_iter = homogeneous_data.HomogeneousData([train[0], train[1]], batch_size=batch_size, maxlen=maxlen_w) img_sen_model = ImgSenRanking(model_options) img_sen_model = img_sen_model.cuda() loss_fn = PairwiseRankingLoss(margin=margin) loss_fn = loss_fn.cuda() params = filter(lambda p: p.requires_grad, img_sen_model.parameters()) optimizer = torch.optim.Adam(params, lrate) uidx = 0 curr = 0.0 n_samples = 0 for eidx in xrange(max_epochs): print 'Epoch ', eidx for x, im in train_iter: n_samples += len(x) uidx += 1 x_id, im = homogeneous_data.prepare_data(x, im, worddict, maxlen=maxlen_w, n_words=n_words) if x_id is None: print 'Minibatch with zero sample under length ', maxlen_w uidx -= 1 continue x_id = Variable(torch.from_numpy(x_id).cuda()) im = Variable(torch.from_numpy(im).cuda()) # Update ud_start = time.time() x, im = img_sen_model(x_id, im, x) cost = loss_fn(im, x) optimizer.zero_grad() cost.backward() torch.nn.utils.clip_grad_norm(params, grad_clip) optimizer.step() ud = time.time() - ud_start if numpy.mod(uidx, dispFreq) == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost.data.cpu( ).numpy()[0], 'UD ', ud if numpy.mod(uidx, validFreq) == 0: print 'Computing results...' curr_model = {} curr_model['options'] = model_options curr_model['worddict'] = worddict curr_model['word_idict'] = word_idict curr_model['img_sen_model'] = img_sen_model ls, lim = encode_sentences(curr_model, dev[0]), encode_images( curr_model, dev[1]) r1, r5, r10, medr = 0.0, 0.0, 0.0, 0 r1i, r5i, r10i, medri = 0.0, 0.0, 0.0, 0 r_time = time.time() if data == 'arch' or data == 'arch_small': (r1, r5, r10, medr) = i2t_arch(lim, ls) print "Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr) (r1i, r5i, r10i, medri) = t2i_arch(lim, ls) print "Text to image: %.1f, %.1f, %.1f, %.1f" % ( r1i, r5i, r10i, medri) else: (r1, r5, r10, medr) = i2t(lim, ls) print "Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr) (r1i, r5i, r10i, medri) = t2i(lim, ls) print "Text to image: %.1f, %.1f, %.1f, %.1f" % ( r1i, r5i, r10i, medri) print "Cal Recall@K using %ss" % (time.time() - r_time) record_num = uidx / validFreq agent.append(i2t_r1_agent, record_num, r1) agent.append(i2t_r5_agent, record_num, r5) agent.append(i2t_r10_agent, record_num, r10) agent.append(t2i_r1_agent, record_num, r1i) agent.append(t2i_r5_agent, record_num, r5i) agent.append(t2i_r10_agent, record_num, r10i) agent.append(i2t_med_agent, record_num, medr) agent.append(t2i_med_agent, record_num, medri) currscore = r1 + r5 + r10 + r1i + r5i + r10i if currscore > curr: curr = currscore # Save model print 'Saving model...', pkl.dump( model_options, open('%s_params_%s.pkl' % (saveto, encoder), 'wb')) torch.save(img_sen_model.state_dict(), '%s_model_%s.pkl' % (saveto, encoder)) print 'Done' print 'Seen %d samples' % n_samples
def train(lr, net, epoch, train_loader, valid_loader, transform, hyperparameters, batch_size): # register hypercurve agent = Agent(port=5000) hyperparameters['criteria'] = 'train loss' train_loss = agent.register(hyperparameters, 'loss') hyperparameters['criteria'] = 'valid loss' valid_loss = agent.register(hyperparameters, 'loss') hyperparameters['criteria'] = 'valid bleu' valid_bleu = agent.register(hyperparameters, 'bleu') #hyperparameters['criteria'] = 'train bleu' #train_bleu = agent.register(hyperparameters, 'bleu') hyperparameters['criteria'] = 'teacher_forcing_ratio' hyper_tfr = agent.register(hyperparameters, 'ratio') hyperparameters['criteria'] = 'teacher_forcing_loss' valid_tf_loss = agent.register(hyperparameters, 'loss') if torch.cuda.is_available(): net.cuda() optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, net.parameters()), lr=lr) net.train() best_score = -1 global_steps = 0 best_valid_loss = 10000 for iepoch in range(epoch): new_epoch = False batchid = 0 for (_, data) in enumerate(train_loader, 0): entext = data['entext'] enlen = data['enlen'] zhlabel = data['zhlabel'] zhgtruth = data['zhgtruth'] zhlen = data['zhlen'] enstr = data['enstr'] zhstr = data['zhstr'] teacher_forcing_ratio = math.exp(-global_steps / 10000000) print('teacher_forcing_ratio: ', teacher_forcing_ratio) decoder_outputs, ret_dict = net(entext, zhgtruth, enlen, True, teacher_forcing_ratio) loss = net.get_loss(decoder_outputs, zhlabel) optimizer.zero_grad() loss.backward() utils.clip_grad_norm(net.parameters(), 5) optimizer.step() batchid += 1 global_steps += 1 print(global_steps, iepoch, batchid, max(enlen), sum(loss.data.cpu().numpy())) agent.append(train_loss, global_steps, sum(loss.data.cpu().numpy())) agent.append(hyper_tfr, global_steps, teacher_forcing_ratio) if global_steps % 50 == 0: net.eval() decoder_outputs, ret_dict = net(entext, zhgtruth, enlen, True, teacher_forcing_ratio) length = ret_dict['length'] prediction = [0 for i in range(len(length))] tmppre = [ _.squeeze().cpu().data.tolist() for _ in ret_dict['sequence'] ] tmppre = np.array(tmppre).transpose(1, 0) for i in range(len(tmppre)): prediction[i] = tmppre[i][:length[i]] prediction[i] = transform.i2t(prediction[i], language='zh') prediction[i] = re.sub(r'nuk#', '', prediction[i]) prediction[i] = re.sub(r'eos#', '', prediction[i]) tmpscore = bleuscore.score(prediction, zhstr) for i in range(5): print(prediction[i]) print(zhstr[i]) print('-------------------\n') del decoder_outputs, ret_dict #agent.append(train_bleu, global_steps, tmpscore) net.train() if global_steps % 200 == 0: print('\n------------------------\n') net.eval() all_pre = [] all_label = [] all_loss = 0 all_en = [] bats = 0 teacher_forcing_loss = 0 for (_, data) in enumerate(valid_loader, 0): entext = data['entext'] enlen = data['enlen'] zhlabel = data['zhlabel'] zhgtruth = data['zhgtruth'] zhlen = data['zhlen'] enstr = data['enstr'] zhstr = data['zhstr'] decoder_outputs, ret_dict = net(entext, None, enlen, True, 0) length = ret_dict['length'] prediction = [0 for i in range(len(length))] tmppre = [ _.squeeze().cpu().data.tolist() for _ in ret_dict['sequence'] ] tmppre = np.array(tmppre).transpose(1, 0) for i in range(len(tmppre)): prediction[i] = tmppre[i][:length[i]] prediction[i] = transform.i2t(prediction[i], language='zh') prediction[i] = re.sub(r'nuk#', '', prediction[i]) prediction[i] = re.sub(r'eos#', '', prediction[i]) loss = net.get_loss(decoder_outputs, zhlabel) all_pre.extend(prediction) all_label.extend(zhstr) all_en.extend(enstr) all_loss += sum(loss.data.cpu().numpy()) del loss, decoder_outputs, ret_dict # teacher forcing loss, to judge if overfit decoder_outputs, _ = net(entext, zhgtruth, enlen, True, 1) loss = net.get_loss(decoder_outputs, zhlabel) teacher_forcing_loss += sum(loss.data.cpu().numpy()) bats += 1 score = bleuscore.score(all_pre, all_label) for i in range(0, 400): print(all_en[i]) print(all_pre[i]) print(all_label[i]) print('-------------------\n') all_loss /= bats teacher_forcing_loss /= bats print(global_steps, iepoch, batchid, all_loss, teacher_forcing_loss, score, '\n********************\n') agent.append(valid_loss, global_steps, all_loss) agent.append(valid_bleu, global_steps, score) agent.append(valid_tf_loss, global_steps, teacher_forcing_loss) if best_valid_loss > all_loss: best_valid_loss = all_loss #bestscore = score _ = model_dir + "ratio-{:3f}-loss-{:3f}-score-{:3f}-steps-{:d}-model.pkl".format( teacher_forcing_ratio, all_loss, score, global_steps) torch.save(net.state_dict(), _) elif global_steps % 1000 == 0: _ = model_dir + "ratio-{:3f}-loss-{:3f}-score-{:3f}-steps-{:d}-model.pkl".format( teacher_forcing_ratio, all_loss, score, global_steps) torch.save(net.state_dict(), _) del all_label, all_loss, all_pre net.train()
'eps': eps, 'input_size': input_size, 'train_scale': train_scale, 'test_scale': test_scale, 'train_transform': train_transform, 'lr_decay': lr_decay, 'monitoring': None } monitoring = [ 'train_loss', 'train_accu1', 'train_accu3', 'valid_loss', 'valid_accu1', 'valid_accu3' ] names = {} agent = Agent() for m in monitoring: hyperparameters['result'] = m metric = m.split('_')[-1] name = agent.register(hyperparameters, metric) names[m] = name latest_check = 'checkpoint/' + checkpoint_filename + '_latest.pth.tar' best_check = 'checkpoint/' + checkpoint_filename + '_best.pth.tar' def run(): model = load_model(arch, pretrained, use_gpu=use_gpu, num_classes=num_classes,
def train(lr, net, epoch, train_loader, valid_loader, transform, hyperparameters, batch_size): # register hypercurve agent = Agent(port=5005) hyperparameters['criteria'] = 'train loss' train_loss = agent.register(hyperparameters, 'loss') hyperparameters['criteria'] = 'valid loss' valid_loss = agent.register(hyperparameters, 'loss') hyperparameters['criteria'] = 'valid bleu' valid_bleu = agent.register(hyperparameters, 'bleu') hyperparameters['criteria'] = 'train bleu' train_bleu = agent.register(hyperparameters, 'bleu') hyperparameters['criteria'] = 'teacher_forcing_ratio' hyper_tfr = agent.register(hyperparameters, 'ratio') hyperparameters['criteria'] = 'teacher_forcing_loss' valid_tf_loss = agent.register(hyperparameters, 'loss') if torch.cuda.is_available(): net.cuda() optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, net.parameters()), lr = lr) net.train() best_score = -1 global_steps = 578800 best_valid_loss = 10000 for iepoch in range(epoch): batchid = 0 for (_, tdata) in enumerate(train_loader, 0): entext = tdata['entext'] enlen = tdata['enlen'] zhlabel = tdata['zhlabel'] zhgtruth = tdata['zhgtruth'] zhlen = tdata['zhlen'] enstr = tdata['enstr'] zhstr = tdata['zhstr'] teacher_forcing_ratio = 1 print ('teacher_forcing_ratio: ', teacher_forcing_ratio) decoder_outputs, ret_dict = net(entext, zhgtruth,True, teacher_forcing_ratio) loss = net.get_loss(decoder_outputs, zhlabel) optimizer.zero_grad() loss.backward() utils.clip_grad_norm(net.parameters(), 5) optimizer.step() batchid += 1 global_steps += 1 print (global_steps, iepoch, batchid, max(enlen), sum(loss.data.cpu().numpy())) agent.append(train_loss, global_steps, sum(loss.data.cpu().numpy())) agent.append(hyper_tfr, global_steps, teacher_forcing_ratio) if global_steps % 50 == 0: net.eval() decoder_outputs, ret_dict = net(entext, zhgtruth, True, teacher_forcing_ratio) length = ret_dict['length'] prediction = [0 for i in range(len(length))] tmppre = [_.squeeze().cpu().data.tolist() for _ in ret_dict['sequence']] tmppre = np.array(tmppre).transpose(1, 0) for i in range(len(tmppre)): prediction[i] = tmppre[i][:length[i]] prediction[i] = transform.i2t(prediction[i], language = 'zh') prediction[i] = re.sub(r'nuk#', '', prediction[i]) prediction[i] = re.sub(r'eos#', '', prediction[i]) tmpscore = bleuscore.score(prediction, zhstr) for i in range(5): print (prediction[i]) print (zhstr[i]) print ('-------------------\n') del decoder_outputs, ret_dict agent.append(train_bleu, global_steps, tmpscore) net.train() if global_steps % 200 == 0: print ('\n------------------------\n') net.eval() all_pre = [] all_label = [] all_loss = 0 all_en = [] bats = 0 teacher_forcing_loss = 0 for (_, vdata) in enumerate(valid_loader, 0): entext = vdata['entext'] enlen = vdata['enlen'] zhlabel = vdata['zhlabel'] zhgtruth = vdata['zhgtruth'] zhlen = vdata['zhlen'] enstr = vdata['enstr'] zhstr = vdata['zhstr'] decoder_outputs, ret_dict = net(entext, None, True, 0) length = ret_dict['length'] prediction = [0 for i in range(len(length))] tmppre = [_.squeeze().cpu().data.tolist() for _ in ret_dict['sequence']] tmppre = np.array(tmppre).transpose(1, 0) for i in range(len(tmppre)): prediction[i] = tmppre[i][:length[i]] prediction[i] = transform.i2t(prediction[i], language = 'zh') prediction[i] = re.sub(r'nuk#', '', prediction[i]) prediction[i] = re.sub(r'eos#', '', prediction[i]) loss = net.get_loss(decoder_outputs, zhlabel) all_pre.extend(prediction) all_label.extend(zhstr) all_en.extend(enstr) all_loss += sum(loss.data.cpu().numpy()) del loss, decoder_outputs, ret_dict # teacher forcing loss, to judge if overfit decoder_outputs, _ = net(entext, zhgtruth, True, 1) loss = net.get_loss(decoder_outputs, zhlabel) teacher_forcing_loss += sum(loss.data.cpu().numpy()) bats += 1 score = bleuscore.score(all_pre, all_label) for i in range(0, 400): print (all_en[i]) print (all_pre[i]) print (all_label[i]) print ('-------------------\n') all_loss /= bats teacher_forcing_loss /= bats print (global_steps, iepoch, batchid, all_loss, teacher_forcing_loss, score, '\n********************\n') agent.append(valid_loss, global_steps, all_loss) agent.append(valid_bleu, global_steps, score) agent.append(valid_tf_loss, global_steps, teacher_forcing_loss) if best_valid_loss > all_loss: best_valid_loss = all_loss #bestscore = score _ = model_dir + "ratio-{:3f}-loss-{:3f}-score-{:3f}-steps-{:d}-model.pkl".format(teacher_forcing_ratio, all_loss, score, global_steps) torch.save(net.state_dict(), _) elif global_steps % 400 == 0: _ = model_dir + "ratio-{:3f}-loss-{:3f}-score-{:3f}-steps-{:d}-model.pkl".format(teacher_forcing_ratio, all_loss, score, global_steps) torch.save(net.state_dict(), _) del all_label, all_loss, all_pre net.train()