def train(opt): data, word2ix, ix2word = get_data(opt) data = torch.from_numpy(data) dataloader = td.DataLoader(data, batch_size=opt.batch_size, shuffle=True, num_workers=1) model = PoetryModel(len(word2ix), 128, 256) optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr) criterion = nn.CrossEntropyLoss() if opt.use_gpu: model.cuda() criterion.cuda() for epoch in range(opt.epoch): for step, data_ in enumerate(dataloader): data_ = data_.long().transpose(1, 0).contiguous() if opt.use_gpu: data_ = data_.cuda() optimizer.zero_grad() input_, target = Variable(data_[:-1, :]), Variable(data_[1:, :]) output, _ = model(input_) loss = criterion(output, target.view(-1)) loss.backward() optimizer.step() if (1 + step) % 10 == 0: print("current loss", loss.data) t.save(model.state_dict(), '%s_%s.pth' % (opt.model_prefix, epoch))
def run(self): # 1 获取数据 data, char_to_ix, ix_to_chars = get_data(self.config) vocab_size = len(char_to_ix) print('样本数:%d' % len(data)) print('词典大小: %d' % vocab_size) # 2 设置dataloader data = torch.from_numpy(data) data_loader = Data.DataLoader(data, batch_size=self.config.batch_size, shuffle=True, num_workers=1) # 3 创建模型 model = PoetryModel(vocab_size=vocab_size, embedding_dim=self.config.embedding_dim, hidden_dim=self.config.hidden_dim, device=self.device, layer_num=self.config.layer_num) model.to(self.device) # 4 创建优化器 optimizer = optim.Adam(model.parameters(), lr=self.config.lr, weight_decay=self.config.weight_decay) # 5 创建损失函数,使用与logsoftmax的输出 criterion = nn.CrossEntropyLoss() # 6.训练 self.train(data_loader, model, optimizer, criterion, char_to_ix, ix_to_chars)
def train(): datas = np.load("tang.npz") data = datas['data'] ix2word = datas['ix2word'].item() word2ix = datas['word2ix'].item() data = torch.from_numpy(data) dataloader = DataLoader(data[:5000], batch_size=config.batch_size, shuffle=True, num_workers=2) model = PoetryModel(len(word2ix), embedding_dim=config.embedding_dim, hidden_dim=config.hidden_dim) optimizer = optim.Adam(model.parameters(), lr=config.lr) criterion = nn.CrossEntropyLoss() model.to(config.device) f = open('result.txt', 'w') loss_history = [] for epoch in range(config.epoch): start_time = time.time() temp_loss = 0 for step, batch_data in enumerate(dataloader): batch_data = batch_data.long().transpose(1, 0).contiguous() optimizer.zero_grad() trn, target = batch_data[:-1, :], batch_data[1:, :] output, _ = model(trn) loss = criterion(output, target.view(-1)) loss.backward() optimizer.step() temp_loss += loss.item() if step % config.print_freq == 0 or step == len(dataloader) - 1: print("Train: [{:2d}/{}] Step: {:03d}/{:03d} Loss: {} ".format( epoch + 1, config.epoch, step, len(dataloader) - 1, loss.item())) loss_history.append(temp_loss / len(len(dataloader))) elapsed_time = time.time() - start_time print("Epoch: %d" % epoch + " " + "Loss: %d" % loss_history[-1] + " Epoch time: " + time.strftime("%H: %M: %S", time.gmtime(elapsed_time))) torch.save(model.state_dict(), config.model_path)
def train(): # 获取数据 data, word2ix, ix2word = get_data() data = torch.from_numpy(data) dataloader = DataLoader(data, batch_size=config.batch_size, shuffle=True, num_workers=1) # 模型定义 model = PoetryModel(len(word2ix), config.embedding_dim, config.hidden_dim, config.num_layers) optimizer = optim.Adam(model.parameters(), lr=config.lr, weight_decay=config.weight_decay) scheduler = StepLR(optimizer, step_size=config.lr_step, gamma=config.lr_gamma) criterion = nn.CrossEntropyLoss() model.to(config.device) for epoch in range(config.epoch): total_loss = 0 for data_ in tqdm(dataloader): # 训练 data_ = data_.long().transpose(1, 0).contiguous() data_ = data_.to(config.device) optimizer.zero_grad() input_, target = data_[:-1, :], data_[1:, :] output, _ = model(input_) loss = criterion(output, target.view(-1)) loss.backward() optimizer.step() total_loss += loss.item() scheduler.step() print("epoch: ", epoch, "loss: ", total_loss / len(dataloader)) torch.save(model.state_dict(), '%s_%s.pth' % (config.model_prefix, epoch))
def train(**kwargs:dict) -> None: for k, v in kwargs.items(): setattr(opt, k, v) #vis = Visdom(env=opt.env) #data get data, word2ix, ix2word = get_data(opt) data = t.from_numpy(data) dataloader = t.utils.data.DataLoader( data, batch_size=opt.batch_size, shuffle = True, ) model = PoetryModel(len(word2ix), 2, 2) optimizer = t.optim.Adam(model.parameters(), lr=opt.lr) criterion = nn.CrossEntropyLoss() if opt.model_path: model.load_state_dict(t.load(opt.model_path)) if opt.user_gpu: model.cuda() criterion.cuda() for epoch in range(opt.epoch): for ii, data_ in tqdm.tqdm(enumerate(dataloader)): data_ = data.long().transpose(1, 0).contiguous() if opt.user_gpu : data_ = data_.cuda() optimizer.zero_grad() input_, target = V(data_[:-1, :]), V(data[1:, :]) ouput, _ = model(input_) loss = criterion(ouput, target.view(-1)) loss.backward() optimizer.step()
for i in range(len(data)): data[i] = toList(data[i]) data[i].append("<EOP>") # save the word dic for sample method p.dump(word_to_ix, file('wordDic', 'w')) # save all avaible word # wordList = open('wordList','w') # for w in word_to_ix: # wordList.write(w.encode('utf-8')) # wordList.close() model = PoetryModel(len(word_to_ix), 256, 256) model.cuda( ) # running on GPU,if you want to run it on CPU,delete all .cuda() usage. optimizer = optim.RMSprop(model.parameters(), lr=0.01, weight_decay=0.0001) criterion = nn.NLLLoss() one_hot_var_target = {} for w in word_to_ix: one_hot_var_target.setdefault(w, make_one_hot_vec_target(w, word_to_ix)) epochNum = 10 TRAINSIZE = len(data) batch = 100 def test(): v = int(TRAINSIZE / batch) loss = 0 counts = 0
def train(**kwargs): for k, v in kwargs.items(): setattr(opt, k, v) vis = Visualizer(env=opt.env) # 获取数据 data, word2ix, ix2word = get_data(opt) data = t.from_numpy(data) dataloader = t.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=True, num_workers=1) # 模型定义 model = PoetryModel(len(word2ix), 128, 256) optimizer = t.optim.Adam(model.parameters(), lr=opt.lr) criterion = nn.CrossEntropyLoss() if opt.model_path: model.load_state_dict(t.load(opt.model_path)) if opt.use_gpu: model.cuda() criterion.cuda() loss_meter = meter.AverageValueMeter() for epoch in range(opt.epoch): loss_meter.reset() for ii, data_ in tqdm.tqdm(enumerate(dataloader)): # 训练 data_ = data_.long().transpose(1, 0).contiguous() if opt.use_gpu: data_ = data_.cuda() optimizer.zero_grad() input_, target = Variable(data_[:-1, :]), Variable(data_[1:, :]) output, _ = model(input_) loss = criterion(output, target.view(-1)) loss.backward() optimizer.step() loss_meter.add(loss.data[0]) # 可视化 if (1 + ii) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() vis.plot('loss', loss_meter.value()[0]) # 诗歌原文 poetrys = [[ix2word[_word] for _word in data_[:, _iii]] for _iii in range(data_.size(1))][:16] vis.text('</br>'.join([''.join(poetry) for poetry in poetrys]), win=u'origin_poem') gen_poetries = [] # 分别以这几个字作为诗歌的第一个字,生成8首诗 for word in list(u'春江花月夜凉如水'): gen_poetry = ''.join(generate(model, word, ix2word, word2ix)) gen_poetries.append(gen_poetry) vis.text('</br>'.join([''.join(poetry) for poetry in gen_poetries]), win=u'gen_poem') t.save(model.state_dict(), '%s_%s.pth' % (opt.model_prefix, epoch))
def train(**kwargs): for k, v in kwargs.items(): setattr(config, k, v) device = torch.device('cuda') if use_cuda else torch.device('cpu') # 获取数据 data, vocab = get_data(config.filepath) np.random.shuffle(data) l = len(data) dev_data = data[:l // 5 - 1] data = data[l // 5:] data = torch.from_numpy(data) dev_data = torch.from_numpy(dev_data) dataloader = D.DataLoader(data, batch_size=config.batch_size, shuffle=True, num_workers=4) dev_dataloader = D.DataLoader(dev_data, batch_size=config.batch_size, shuffle=True, num_workers=4) # 模型定义 model = PoetryModel(len(vocab.word2idx), 128, 256) # if config.model_path: # model.load_state_dict(torch.load(config.model_path)) model.to(device) # SGD, SGD with momentum, Nesterov, Adagrad, Adadelta, Adam # optimizer = torch.optim.SGD(model.parameters(), lr=config.lr) # optimizer = torch.optim.SGD(model.parameters(), lr=config.lr, momentum=0.9) # optimizer = torch.optim.SGD(model.parameters(), lr=config.lr, momentum=0.9, nesterov=True) # optimizer = torch.optim.Adagrad(model.parameters(), lr=config.lr) # optimizer = torch.optim.Adadelta(model.parameters()) optimizer = torch.optim.Adam(model.parameters(), lr=config.lr) criterion = nn.CrossEntropyLoss() pre_pp = 0 cnt = -1 loss_his = [] pp_his = [] for epoch in range(config.epoch): for ii, data_ in enumerate(dataloader): # 训练 data_ = data_.long().transpose(1, 0).contiguous() data_ = data_.to(device) optimizer.zero_grad() input_, target = data_[:-1, :], data_[1:, :] output, _ = model(input_) loss = criterion(output, target.view(-1)) loss.backward() optimizer.step() print("epoch", epoch, "step", ii, "loss", loss.item()) loss_his.append(loss.item()) # 测试 if (1 + ii) % config.gen_every == 0: # "'春江花月夜凉如水'" word = "春" gen_poetry = ''.join(generate(model, word, vocab)) print(gen_poetry) if (1 + ii) % config.pp_every == 0: pp = check_perplexity(model, dev_dataloader) if pre_pp < pp: cnt += 1 pre_pp = pp print(pp.cpu().numpy()) pp_his.append(pp.cpu().numpy()) if cnt >= config.tolerance: torch.save(model.state_dict(), '%s_final.pth' % str(int(time.time()))) print("epoch", epoch, "step", ii, "final loss", loss.item()) for word in ["日", "红", "山", "夜", "湖", "海", "月"]: gen_poetry = ''.join(generate(model, word, vocab)) print(gen_poetry) return loss_his, pp_his if (epoch + 1) % config.save_every == 0 or epoch + 1 == config.epoch: torch.save(model.state_dict(), '%s_%s.pth' % (str(int(time.time())), str(epoch))) return loss_his, pp_his
def train(**kwargs): for k, v in kwargs.items(): setattr(opt, k, v) vis = Visualizer(env=opt.env) #获取数据 data, word2ix, ix2word = get_data(opt) data = t.from_numpy(data) dataloader = t.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=True, num_workers=1) #模型定义 model = PoetryModel(len(word2ix), 128, 256) optimizer = t.optim.Adam(model.parameters(), lr=opt.lr) criterion = nn.CrossEntropyLoss() if opt.model_path: model.load_state_dict(t.load(opt.model_path)) if opt.use_gpu: model.cuda() criterion.cuda() loss_meter = meter.AverageValueMeter() for epoch in range(opt.epoch): loss_meter.reset() for li, data_ in tqdm.tqdm(enumerate(dataloader)): #训练 data_ = data_.long().transpose(1, 0).contiguous() if opt.use_gpu: data_ = data_.cuda() optimizer.zero_grad() ##输入和目标错开 input_, target = Variable(data_[:-1, :]), Variable(data_[1:, :]) output, _ = model(input_) loss = criterion(output, target.view(-1)) loss.backward() optimizer.step() loss_meter.add(loss.data[0]) # 可视化 if (1 + ii) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() vis.plot('loss', loss_meter.value()[0]) #诗歌原文 poetrys = [[ix2word[_word] for _word in data_[:, -iii]] for _iii in range(data_.size(1))][:16] vis.text('</br>'.join([''.join(poetry) for poetry in poetrys]), win=u'origin_poem') gen_poetries = [] #分别以这几个字作为诗歌的第一个字,生成8首诗 for word in list(u'春江花月夜凉如水'): gen_poetry = ''.join( generate(model, word, ix2word, word2ix)) gen_poetries.append(gen_poetry) vis.text('</br>'.join( [''.join(poetry) for poetry in gen_poetries]), win=u'gen_poem') t.save(model.state_dict(), '%s_%s.pth' % (opt.model_prefix, epoch))
def train(**kwargs): for k, v in kwargs.items(): setattr(opt, k, v) opt.device = t.device('cuda' if t.cuda.is_available() else 'cpu') device = opt.device vis = Visualizer(env=opt.env) # 获取数据 data_all = np.load(opt.pickle_path) data = data_all['data'] word2ix = data_all['word2ix'].item() ix2word = data_all['ix2word'].item() data = t.from_numpy(data) dataloader = DataLoader(data, batch_size=opt.batch_size, shuffle=True, num_workers=1) # 模型定义 model = PoetryModel(len(word2ix), 128, 256) optimizer = t.optim.Adam(model.parameters(), lr=opt.lr) loss_func = nn.CrossEntropyLoss() if opt.model_path: model.load_state_dict( t.load(opt.model_path, map_location=t.device('cpu'))) model.to(device) loss_avg = 0 for epoch in range(opt.epoch): for ii, data_ in tqdm(enumerate(dataloader)): data_ = data_.long() data_ = data_.to(device) optimizer.zero_grad() input_, target = data_[:, :-1], data_[:, 1:] output, _ = model(input_) loss = loss_func(output, target.reshape(-1)) loss.backward() optimizer.step() loss_avg += loss.item() # 可视化 if (ii + 1) % opt.plot_every == 0: vis.plot('loss', loss_avg / opt.plot_every) loss_avg = 0 poetrys = [[ix2word[_word] for _word in data_[i].tolist()] for i in range(data_.shape[0])][:16] vis.text('</br>'.join([''.join(poetry) for poetry in poetrys]), win='origin_poem') gen_poetries = [] for word in list('春江花月夜凉如水'): gen_poetry = ''.join( generate(model, word, ix2word, word2ix)) gen_poetries.append(gen_poetry) vis.text('</br>'.join( [''.join(poetry) for poetry in gen_poetries]), win='gen_poem') t.save(model.state_dict(), '%s_%s.pth' % (opt.model_prefix, epoch))
def train(**kwargs): for k,v in kwargs.items(): setattr(opt,k,v) vis = Visualizer(env=opt.env) # 获取数据 data,word2ix,ix2word = get_data(opt) data = t.from_numpy(data)#把数据类型转为tensor dataloader = t.utils.data.DataLoader(data,#初始化Dataloader类实例 batch_size=opt.batch_size, shuffle=True, num_workers=1) # 模型定义 model = PoetryModel(len(word2ix), 128, 256)#(vocab_size, embedding_dim, hidden_dim) optimizer = t.optim.Adam(model.parameters(), lr=opt.lr) criterion = nn.CrossEntropyLoss()#损失函数定义为交叉熵 if opt.model_path: model.load_state_dict(t.load(opt.model_path)) if opt.use_gpu: model.cuda() criterion.cuda() loss_meter = meter.AverageValueMeter() for epoch in range(opt.epoch): loss_meter.reset() for ii,data_ in tqdm.tqdm(enumerate(dataloader)): #tqdm进度条工具 #取一个batch的数据 # 训练 #data_.size:(batch_size,maxlen) data_ = data_.long().transpose(1,0).contiguous()#转置后返回一个内存连续的有相同数据的tensor # if epoch==0 and ii ==0: # print('size of data_ after transpose: \n',data_.size()) if opt.use_gpu: data_ = data_.cuda() optimizer.zero_grad()#梯度清零 input_,target = Variable(data_[:-1,:]),Variable(data_[1:,:])#input_是所有句子的前maxlen-1个item的集合, #target是所有句子的后maxlen-1个item的集合 #以"床前明月光"为例,输入是"床前明月",要预测"前明月光" output,_ = model(input_) #Tensor.view(-1)按照第0个维度逐个元素读取将张量展开成数组 loss = criterion(output,target.view(-1)) loss.backward() optimizer.step() loss_meter.add(loss.data[0]) # 可视化 if (1+ii)%opt.plot_every==0: if os.path.exists(opt.debug_file):#如果存在调试文件, #则进入调试模式 ipdb.set_trace() vis.plot('loss',loss_meter.value()[0]) # 诗歌原文 poetrys=[ [ix2word[_word] for _word in data_[:,_iii]] #每一个句子(诗歌)的每一个item(id)要转换成文本 for _iii in range(data_.size(1))][:16]#_iii的取值范围[,127] vis.text('</br>'.join([''.join(poetry) for poetry in poetrys]),win=u'origin_poem') #在visdom中输出这些句子(诗歌)中的前16个 gen_poetries = [] # 分别以这几个字作为诗歌的第一个字,生成8首诗 for word in list(u'春江花月夜凉如水'): gen_poetry = ''.join(generate(model,word,ix2word,word2ix)) gen_poetries.append(gen_poetry) vis.text('</br>'.join([''.join(poetry) for poetry in gen_poetries]),win=u'gen_poem') t.save(model.state_dict(),'%s_%s.pth' %(opt.model_prefix,epoch))
def train(**kwargs): for k, v in kwargs.items(): setattr(opt, k, v) vis = Visualizer(env=opt.env) # 获取数据 data, word2ix, ix2word = get_data(opt) data = t.from_numpy(data) dataloader = t.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=True, num_workers=2) # 定义model model = PoetryModel(len(word2ix), opt.embedding_dim, opt.hidden_dim) # 优化器 optimizer = t.optim.Adam(model.parameters(), lr=opt.lr) # Loss Function criterion = nn.CrossEntropyLoss() # 使用预训练的模型,为了可持续训练 if opt.model_path and os.path.exists(opt.model_path): model.load_state_dict(t.load(opt.model_path)) # GPU related if opt.use_gpu: model = model.to(device) criterion = criterion.to(device) # loss 计量器 loss_meter = meter.AverageValueMeter() # for loop for epoch in range(opt.epoch): loss_meter.reset() # for : batching dataset for i, data_ in tqdm.tqdm(enumerate(dataloader)): # 训练 # data_ # size: [128, 125] 每次取128行,每行一首诗,长度为125 # type: Tensor # dtype: torch.int32 应该转成long # 这行代码信息量很大: # 第一步:int32 to long # 第二步:将行列互换,为了并行计算的需要 # 第三步:将数据放置在连续内存里,避免后续有些操作报错 data_ = data_.long().transpose(0, 1).contiguous() # GPU related if opt.use_gpu: data_ = data_.to(device) # 到这里 data_.dtype又变成了torch.int64 # print(data_.dtype) # 清空梯度 optimizer.zero_grad() # 错位训练,很容易理解 # 把前n-1行作为input,把后n-1行作为target : model的输入 # 这么做还是为了并行计算的需要 # input_ 加下划线是为了和built_in function input区分开 input_, target = data_[:-1, :], data_[1:, :] # model的返回值 output和hidden # 这里hidden没什么用 output, _ = model(input_) # 计算loss target = target.view(-1) # 新的target.size() [15872] 124 * 128 = 15872 # output.size() [15872, 8293] 8293 是词汇量的大小 loss = criterion(output, target) # 反向传播 loss.backward() # optimizer梯度下降更新参数 optimizer.step() loss_meter.add(loss.data[0]) # 可视化 if (1 + i) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() vis.plot('loss', loss_meter.value()[0]) # 诗歌原文 poetrys = [[ix2word[_word.item()] for _word in data_[:, _iii]] for _iii in range(data_.size(1))][:16] vis.text('</br>'.join([''.join(poetry) for poetry in poetrys]), win=u'origin_poem') gen_poetries = [] # 分别以这几个字作为诗歌的第一个字,生成8首诗 for word in list(u'春江花月夜凉如水'): gen_poetry = ''.join(generate(model, word, ix2word, word2ix)) gen_poetries.append(gen_poetry) vis.text('</br>'.join([''.join(poetry) for poetry in gen_poetries]), win=u'gen_poem') # 迭代一次epoch,保存一下模型 t.save(model.state_dict(), '%s_%s.pth' % (opt.model_prefix, epoch))
def train_torch_lstm(conf, args=None): pdata = PoemData() pdata.read_data(conf) pdata.get_vocab() if conf.use_gpu: device = torch.device('cuda') else: device = torch.device('cpu') model = PoetryModel(pdata.vocab_size, conf, device) train_data = pdata.train_data test_data = pdata.test_data train_data = torch.from_numpy(np.array(train_data['pad_words'])) dev_data = torch.from_numpy(np.array(test_data['pad_words'])) dataloader = DataLoader(train_data, batch_size=conf.batch_size, shuffle=True, num_workers=conf.num_workers) devloader = DataLoader(dev_data, batch_size=conf.batch_size, shuffle=True, num_workers=conf.num_workers) optimizer = Adam(model.parameters(), lr=conf.learning_rate) criterion = nn.CrossEntropyLoss() loss_meter = meter.AverageValueMeter() if conf.load_best_model: model.load_state_dict(torch.load(conf.beat_model_path)) if conf.use_gpu: model.cuda() criterion.cuda() step = 0 bestppl = 1e9 early_stop_controller = 0 for epoch in range(conf.n_epochs): losses = [] loss_meter.reset() model.train() for i, data in enumerate(dataloader): data = data.long().transpose(1, 0).contiguous() if conf.use_gpu: data = data.cuda() input, target = data[:-1, :], data[1:, :] optimizer.zero_grad() output, _ = model(input) loss = criterion(output, target.contiguous().view(-1)) loss.backward() optimizer.step() losses.append(loss.item()) loss_meter.add(loss.item()) step += 1 if step % 100 == 0: print("epoch_%d_step_%d_loss:%0.4f" % (epoch + 1, step, loss.item())) train_loss = float(loss_meter.value()[0]) model.eval() for i, data in enumerate(devloader): data = data.long().transpose(1, 0).contiguous() if conf.use_gpu: data = data.cuda() input, target = data[:-1, :], data[1:, :] output, _ = model(input) loss = criterion(output, target.view(-1)) loss_meter.add(loss.item()) ppl = math.exp(loss_meter.value()[0]) print("epoch_%d_loss:%0.4f , ppl:%0.4f" % (epoch + 1, train_loss, ppl)) if epoch % conf.save_every == 0: torch.save(model.state_dict(), "{0}_{1}".format(conf.model_prefix, epoch)) fout = open("{0}out_{1}".format(conf.out_path, epoch), 'w', encoding='utf-8') for word in list('日红山夜湖海月'): gen_poetry = generate_poet(model, word, pdata.vocab, conf) fout.write("".join(gen_poetry) + '\n\n') fout.close() if ppl < bestppl: bestppl = ppl early_stop_controller = 0 torch.save(model.state_dict(), "{0}_{1}".format(conf.best_model_path, "best_model")) else: early_stop_controller += 1 if early_stop_controller > conf.patience: print("early stop.") break
def train(Config): torch.multiprocessing.set_sharing_strategy('file_system') device = torch.device("cuda" if torch.cuda.is_available() else "cpu") datas = np.load("data/chinese-poetry-master/tang .npz", allow_pickle=True) data = datas["data"] ix2word = datas['ix2word'].item() word2ix = datas['word2ix'].item() data = torch.from_numpy(data) print(data.shape) # #去掉空格 # t_data = data.view(-1) # flat_data = t_data.numpy() # no_space_data = [] # for i in flat_data: # if (i != 8292): # no_space_data.append(i) # slice_size = 48 # txt = [no_space_data[i:i+slice_size] for i in range(0,len(no_space_data),slice_size)] # txt = np.array(txt[:-1])#去掉最后一个不够48的数据 # txt = torch.from_numpy(txt).long() # print(txt.shape) #datas = PoemDataSet(Config.data_path, 48) #data = datas.no_space_data#datas['data'] #ix2word = datas.ix2word#datas['ix2word'].item() #word2ix = datas.word2ix#datas['word2ix'].item() dataLoader = DataLoader(data, batch_size=Config.batch_size, shuffle=Config.shuffle, num_workers=Config.num_workers) model = PoetryModel(len(word2ix), embedding_dim=Config.embedding_dim, hidden_dim=Config.hidden_dim).to(device) optimizer = optim.Adam(model.parameters(), lr=Config.lr) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1) # 学习率调整 crierion = nn.CrossEntropyLoss() loss_meter = meter.AverageValueMeter() top1 = meter.AverageValueMeter() #top1 = utils.AverageMeter() # if Config.model_path: # model.load_state_dict(torch.load(Config.model_path)) train_loss_list = [] train_accuracy_list = [] for epoch in range(Config.epoch): loss_meter.reset() top1.reset() for ii, data_ in enumerate( dataLoader): #tqdm.tqdm(enumerate(dataLoader)): #inputs, labels =Variable(data_[0]), Variable(data_[1])#.to(device) data_ = data_.long().transpose(1, 0).contiguous() inputs, labels = Variable(data_[:-1, :]), Variable(data_[1:, :]) print(inputs.size(1)) optimizer.zero_grad() # 上边一句,将输入的诗句错开一个字,形成训练和目标 output, _ = model(inputs) loss = crierion(output, labels.view(-1)) loss.backward() optimizer.step() loss_meter.add(loss.item()) _, pred = output.topk(1) prec1, prec2 = accuracy(output, labels, topk=(1, 2)) n = inputs.size(0) top1.add(prec1.item()) #data = data.long().transpose(1,0).contiguous() if (1 + ii) % Config.plot_every == 0: if os.path.exists(Config.debug_file): ipdb.set_trace() # 下面是对目前模型情况的测试,诗歌原文、 #print(inputs.size(1)) #print(inputs.numpy()[:1].shape) # poetrys = [[ix2word[_word] for _word in inputs.numpy()[:, _iii]] # for _iii in range(inputs.size(1))][0] # poetrys =["".join(poetry) for poetry in poetrys] # print("origen") #print(poetrys) # 上面句子嵌套了两个循环,主要是将诗歌索引的前十六个字变成原文 gen_poetries = [] start = u"春江花月夜凉如水" gen_poetry = "".join( generate(model, start, ix2word, word2ix, Config)) # for word in list(u"春江花月夜凉如水"): # gen_poetry = "".join(generate(model, word, ix2word, word2ix,Config)) # gen_poetries.append(gen_poetry) # gen_poetries="</br>".join(["".join(poetry) for poetry in gen_poetries]) print("genetate") print(gen_poetry) # if os.path.exists(Config.tensorboard_path) == False: # os.mkdir(Config.tensorboard_path) # writer = SummaryWriter(Config.tensorboard_path) # writer.add_scalar('Train/Loss', loss.item(), epoch) # writer.add_scalar('Train/Accuracy', 100*prec1.item()/output.size(0), epoch) # # writer.flush() train_loss_list.append(loss.item()) train_accuracy_list.append(100 * prec1.item() / output.size(0)) print('train %d epoch loss: %.3f acc: %.3f ' % (epoch + 1, loss_meter.mean, 100 * top1.mean / output.size(0))) scheduler.step() x1 = range(0, configs.epoch) y1 = train_loss_list y3 = train_accuracy_list plt.subplot(2, 1, 1) plt.plot(x1, y1, 'o-') plt.legend(["train_loss"]) plt.title('Loss vs. epoches') plt.ylabel('Loss') plt.subplot(2, 1, 2) plt.plot(x1, y3, '.-') plt.legend("train_accuracy") plt.xlabel('Accuracy vs. epoches') plt.ylabel('Accuracy') plt.show() plt.savefig("pw_LSTM" + "_accuracy_loss.jpg") torch.save(model.state_dict(), "%s_%s.pth" % (Config.model_prefix, epoch))
def train(**kwargs): for k, v in kwargs.items(): setattr(opt, k, v) opt.device = t.device('cuda:0') if opt.use_gpu else t.device('cpu') device = opt.device #vis = Visualizer(env=opt.env) # 获取数据 data, word2ix, ix2word = get_data(opt) data = t.from_numpy(data) #[57580,125] dataloader = t.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=True, num_workers=1) # 模型定义 model = PoetryModel(len(word2ix), 128, 256) optimizer = t.optim.Adam(model.parameters(), lr=opt.lr) criterion = nn.CrossEntropyLoss() if opt.model_path: model.load_state_dict(t.load(opt.model_path)) model.to(device) # AverageMeter类用来管理一些变量的更新 loss_meter = meter.AverageValueMeter() for epoch in range(opt.epoch): loss_meter.reset() # 在每一个epoch 都要进行reset一遍 for ii, data_ in tqdm.tqdm(enumerate(dataloader)): # 训练 #contiguous:view只能用在contiguous的variable上。如果在view之前用了transpose, permute等,需要用contiguous()来返回一个contiguous copy。 #一种可能的解释是: #有些tensor并不是占用一整块内存,而是由不同的数据块组成,而tensor的view()操作依赖于内存是整块的,这时只需要执行contiguous()这个函数,把tensor变成在内存中连续分布的形式。 # 也就是说使用contiguous()是为了能够使用view() data_ = data_.long().transpose( 1, 0).contiguous() #data_ shape:[seq_len,batch_size] data_ = data_.to(device) optimizer.zero_grad() #input_ shape:[124,128] target shape:[124,128] input_, target = data_[:-1, :], data_[1:, :] output, _ = model( input_ ) # ouput shape:[seq_len * batch_size,vocab_size] 此处seq_len为124 loss = criterion( output, target.view(-1)) # target需要规整成[seq_len * batch_size] loss.backward() optimizer.step() # 更新loss_meter loss_meter.add(loss.item()) ''' # 可视化 if (1 + ii) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() vis.plot('loss', loss_meter.value()[0]) # 诗歌原文 poetrys = [[ix2word[_word] for _word in data_[:, _iii].tolist()] for _iii in range(data_.shape[1])][:16] vis.text('</br>'.join([''.join(poetry) for poetry in poetrys]), win=u'origin_poem') gen_poetries = [] # 分别以这几个字作为诗歌的第一个字,生成8首诗 for word in list(u'春江花月夜凉如水'): gen_poetry = ''.join(generate(model, word, ix2word, word2ix)) gen_poetries.append(gen_poetry) vis.text('</br>'.join([''.join(poetry) for poetry in gen_poetries]), win=u'gen_poem') ''' # 每一个epoch都打印下loss的值 print('epoch:%d, loss:%.3f' % (epoch, loss_meter.value()[0])) #需要改进的地方是 得到模型在验证集的结果 根据结果只保存最好的模型 t.save(model.state_dict(), '%s_%s.pth' % (opt.model_prefix, epoch)) return ix2word, word2ix
for i in range(len(data)): data[i] = toList(data[i]) data[i].append("<EOP>") # save the word dic for sample method p.dump(word_to_ix, file('wordDic', 'w')) # save all avaible word # wordList = open('wordList','w') # for w in word_to_ix: # wordList.write(w.encode('utf-8')) # wordList.close() model = PoetryModel(len(word_to_ix), 256, 256) model.cuda( ) # running on GPU,if you want to run it on CPU,delete all .cuda() usage. optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0001) criterion = nn.NLLLoss() one_hot_var_target = {} for w in word_to_ix: one_hot_var_target.setdefault(w, make_one_hot_vec_target(w, word_to_ix)) epochNum = 100 TRAINSIZE = len(data) batch = 200 def test(): model.eval() v = int(TRAINSIZE / batch) loss = 0