示例#1
0
def train(opt):
    data, word2ix, ix2word = get_data(opt)
    data = torch.from_numpy(data)
    dataloader = td.DataLoader(data,
                               batch_size=opt.batch_size,
                               shuffle=True,
                               num_workers=1)

    model = PoetryModel(len(word2ix), 128, 256)
    optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr)
    criterion = nn.CrossEntropyLoss()
    if opt.use_gpu:
        model.cuda()
        criterion.cuda()
    for epoch in range(opt.epoch):
        for step, data_ in enumerate(dataloader):
            data_ = data_.long().transpose(1, 0).contiguous()
            if opt.use_gpu: data_ = data_.cuda()
            optimizer.zero_grad()
            input_, target = Variable(data_[:-1, :]), Variable(data_[1:, :])
            output, _ = model(input_)
            loss = criterion(output, target.view(-1))
            loss.backward()
            optimizer.step()

            if (1 + step) % 10 == 0:
                print("current loss", loss.data)

        t.save(model.state_dict(), '%s_%s.pth' % (opt.model_prefix, epoch))
示例#2
0
    def run(self):
        # 1 获取数据
        data, char_to_ix, ix_to_chars = get_data(self.config)
        vocab_size = len(char_to_ix)
        print('样本数:%d' % len(data))
        print('词典大小: %d' % vocab_size)

        # 2 设置dataloader
        data = torch.from_numpy(data)
        data_loader = Data.DataLoader(data,
                                      batch_size=self.config.batch_size,
                                      shuffle=True,
                                      num_workers=1)

        # 3 创建模型
        model = PoetryModel(vocab_size=vocab_size,
                            embedding_dim=self.config.embedding_dim,
                            hidden_dim=self.config.hidden_dim,
                            device=self.device,
                            layer_num=self.config.layer_num)
        model.to(self.device)

        # 4 创建优化器
        optimizer = optim.Adam(model.parameters(),
                               lr=self.config.lr,
                               weight_decay=self.config.weight_decay)

        # 5 创建损失函数,使用与logsoftmax的输出
        criterion = nn.CrossEntropyLoss()

        # 6.训练
        self.train(data_loader, model, optimizer, criterion, char_to_ix,
                   ix_to_chars)
示例#3
0
def train():
    datas = np.load("tang.npz")
    data = datas['data']
    ix2word = datas['ix2word'].item()
    word2ix = datas['word2ix'].item()
    data = torch.from_numpy(data)
    dataloader = DataLoader(data[:5000],
                            batch_size=config.batch_size,
                            shuffle=True,
                            num_workers=2)

    model = PoetryModel(len(word2ix),
                        embedding_dim=config.embedding_dim,
                        hidden_dim=config.hidden_dim)
    optimizer = optim.Adam(model.parameters(), lr=config.lr)
    criterion = nn.CrossEntropyLoss()

    model.to(config.device)

    f = open('result.txt', 'w')
    loss_history = []
    for epoch in range(config.epoch):
        start_time = time.time()
        temp_loss = 0

        for step, batch_data in enumerate(dataloader):
            batch_data = batch_data.long().transpose(1, 0).contiguous()
            optimizer.zero_grad()
            trn, target = batch_data[:-1, :], batch_data[1:, :]
            output, _ = model(trn)
            loss = criterion(output, target.view(-1))
            loss.backward()
            optimizer.step()
            temp_loss += loss.item()
            if step % config.print_freq == 0 or step == len(dataloader) - 1:
                print("Train: [{:2d}/{}] Step: {:03d}/{:03d} Loss: {} ".format(
                    epoch + 1, config.epoch, step,
                    len(dataloader) - 1, loss.item()))

        loss_history.append(temp_loss / len(len(dataloader)))
        elapsed_time = time.time() - start_time
        print("Epoch: %d" % epoch + " " + "Loss: %d" % loss_history[-1] +
              " Epoch time: " +
              time.strftime("%H: %M: %S", time.gmtime(elapsed_time)))
        torch.save(model.state_dict(), config.model_path)
示例#4
0
def train():
    # 获取数据
    data, word2ix, ix2word = get_data()
    data = torch.from_numpy(data)
    dataloader = DataLoader(data,
                            batch_size=config.batch_size,
                            shuffle=True,
                            num_workers=1)

    # 模型定义
    model = PoetryModel(len(word2ix), config.embedding_dim, config.hidden_dim,
                        config.num_layers)
    optimizer = optim.Adam(model.parameters(),
                           lr=config.lr,
                           weight_decay=config.weight_decay)
    scheduler = StepLR(optimizer,
                       step_size=config.lr_step,
                       gamma=config.lr_gamma)
    criterion = nn.CrossEntropyLoss()
    model.to(config.device)

    for epoch in range(config.epoch):
        total_loss = 0
        for data_ in tqdm(dataloader):
            # 训练
            data_ = data_.long().transpose(1, 0).contiguous()
            data_ = data_.to(config.device)
            optimizer.zero_grad()
            input_, target = data_[:-1, :], data_[1:, :]
            output, _ = model(input_)
            loss = criterion(output, target.view(-1))
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        scheduler.step()
        print("epoch: ", epoch, "loss: ", total_loss / len(dataloader))
        torch.save(model.state_dict(),
                   '%s_%s.pth' % (config.model_prefix, epoch))
示例#5
0
def train(**kwargs:dict) -> None:

    for k, v in kwargs.items():
        setattr(opt, k, v)
    
    #vis = Visdom(env=opt.env)

    #data get 
    data, word2ix, ix2word = get_data(opt)
    data = t.from_numpy(data)
    dataloader = t.utils.data.DataLoader(
        data, 
        batch_size=opt.batch_size,
        shuffle = True,   
        )
    
    model = PoetryModel(len(word2ix), 2, 2)
    optimizer = t.optim.Adam(model.parameters(), lr=opt.lr)
    criterion = nn.CrossEntropyLoss()
    if opt.model_path:
        model.load_state_dict(t.load(opt.model_path))

    if opt.user_gpu:
        model.cuda()
        criterion.cuda()
    
    for epoch in range(opt.epoch):
        
        for ii, data_ in tqdm.tqdm(enumerate(dataloader)):
            data_ = data.long().transpose(1, 0).contiguous()
            if opt.user_gpu : data_ = data_.cuda()
            optimizer.zero_grad()
            input_, target = V(data_[:-1, :]), V(data[1:, :])
            ouput, _ = model(input_)
            loss = criterion(ouput, target.view(-1))
            loss.backward()
            optimizer.step()
示例#6
0
for i in range(len(data)):
    data[i] = toList(data[i])
    data[i].append("<EOP>")
# save the word dic for sample method
p.dump(word_to_ix, file('wordDic', 'w'))

# save all avaible word
# wordList = open('wordList','w')
# for w in word_to_ix:
#     wordList.write(w.encode('utf-8'))
# wordList.close()

model = PoetryModel(len(word_to_ix), 256, 256)
model.cuda(
)  # running on GPU,if you want to run it on CPU,delete all .cuda() usage.
optimizer = optim.RMSprop(model.parameters(), lr=0.01, weight_decay=0.0001)
criterion = nn.NLLLoss()

one_hot_var_target = {}
for w in word_to_ix:
    one_hot_var_target.setdefault(w, make_one_hot_vec_target(w, word_to_ix))

epochNum = 10
TRAINSIZE = len(data)
batch = 100


def test():
    v = int(TRAINSIZE / batch)
    loss = 0
    counts = 0
示例#7
0
def train(**kwargs):
    for k, v in kwargs.items():
        setattr(opt, k, v)

    vis = Visualizer(env=opt.env)

    # 获取数据
    data, word2ix, ix2word = get_data(opt)
    data = t.from_numpy(data)
    dataloader = t.utils.data.DataLoader(data,
                                         batch_size=opt.batch_size,
                                         shuffle=True,
                                         num_workers=1)

    # 模型定义
    model = PoetryModel(len(word2ix), 128, 256)
    optimizer = t.optim.Adam(model.parameters(), lr=opt.lr)
    criterion = nn.CrossEntropyLoss()

    if opt.model_path:
        model.load_state_dict(t.load(opt.model_path))

    if opt.use_gpu:
        model.cuda()
        criterion.cuda()
    loss_meter = meter.AverageValueMeter()

    for epoch in range(opt.epoch):
        loss_meter.reset()
        for ii, data_ in tqdm.tqdm(enumerate(dataloader)):

            # 训练
            data_ = data_.long().transpose(1, 0).contiguous()
            if opt.use_gpu: data_ = data_.cuda()
            optimizer.zero_grad()
            input_, target = Variable(data_[:-1, :]), Variable(data_[1:, :])
            output, _ = model(input_)
            loss = criterion(output, target.view(-1))
            loss.backward()
            optimizer.step()

            loss_meter.add(loss.data[0])

            # 可视化
            if (1 + ii) % opt.plot_every == 0:

                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                vis.plot('loss', loss_meter.value()[0])

                # 诗歌原文
                poetrys = [[ix2word[_word] for _word in data_[:, _iii]]
                           for _iii in range(data_.size(1))][:16]
                vis.text('</br>'.join([''.join(poetry) for poetry in poetrys]), win=u'origin_poem')

                gen_poetries = []
                # 分别以这几个字作为诗歌的第一个字,生成8首诗
                for word in list(u'春江花月夜凉如水'):
                    gen_poetry = ''.join(generate(model, word, ix2word, word2ix))
                    gen_poetries.append(gen_poetry)
                vis.text('</br>'.join([''.join(poetry) for poetry in gen_poetries]), win=u'gen_poem')

        t.save(model.state_dict(), '%s_%s.pth' % (opt.model_prefix, epoch))
示例#8
0
def train(**kwargs):
    for k, v in kwargs.items():
        setattr(config, k, v)

    device = torch.device('cuda') if use_cuda else torch.device('cpu')

    # 获取数据
    data, vocab = get_data(config.filepath)
    np.random.shuffle(data)
    l = len(data)
    dev_data = data[:l // 5 - 1]
    data = data[l // 5:]
    data = torch.from_numpy(data)
    dev_data = torch.from_numpy(dev_data)
    dataloader = D.DataLoader(data,
                              batch_size=config.batch_size,
                              shuffle=True,
                              num_workers=4)
    dev_dataloader = D.DataLoader(dev_data,
                                  batch_size=config.batch_size,
                                  shuffle=True,
                                  num_workers=4)

    # 模型定义
    model = PoetryModel(len(vocab.word2idx), 128, 256)

    # if config.model_path:
    #     model.load_state_dict(torch.load(config.model_path))
    model.to(device)

    # SGD, SGD with momentum, Nesterov, Adagrad, Adadelta, Adam
    # optimizer = torch.optim.SGD(model.parameters(), lr=config.lr)
    # optimizer = torch.optim.SGD(model.parameters(), lr=config.lr, momentum=0.9)
    # optimizer = torch.optim.SGD(model.parameters(), lr=config.lr, momentum=0.9, nesterov=True)
    # optimizer = torch.optim.Adagrad(model.parameters(), lr=config.lr)
    # optimizer = torch.optim.Adadelta(model.parameters())
    optimizer = torch.optim.Adam(model.parameters(), lr=config.lr)

    criterion = nn.CrossEntropyLoss()

    pre_pp = 0
    cnt = -1
    loss_his = []
    pp_his = []
    for epoch in range(config.epoch):
        for ii, data_ in enumerate(dataloader):
            # 训练
            data_ = data_.long().transpose(1, 0).contiguous()
            data_ = data_.to(device)
            optimizer.zero_grad()
            input_, target = data_[:-1, :], data_[1:, :]
            output, _ = model(input_)
            loss = criterion(output, target.view(-1))
            loss.backward()
            optimizer.step()

            print("epoch", epoch, "step", ii, "loss", loss.item())
            loss_his.append(loss.item())

            # 测试
            if (1 + ii) % config.gen_every == 0:
                # "'春江花月夜凉如水'"
                word = "春"
                gen_poetry = ''.join(generate(model, word, vocab))
                print(gen_poetry)

            if (1 + ii) % config.pp_every == 0:
                pp = check_perplexity(model, dev_dataloader)
                if pre_pp < pp:
                    cnt += 1
                pre_pp = pp
                print(pp.cpu().numpy())
                pp_his.append(pp.cpu().numpy())

                if cnt >= config.tolerance:
                    torch.save(model.state_dict(),
                               '%s_final.pth' % str(int(time.time())))
                    print("epoch", epoch, "step", ii, "final loss",
                          loss.item())
                    for word in ["日", "红", "山", "夜", "湖", "海", "月"]:
                        gen_poetry = ''.join(generate(model, word, vocab))
                        print(gen_poetry)
                    return loss_his, pp_his
        if (epoch + 1) % config.save_every == 0 or epoch + 1 == config.epoch:
            torch.save(model.state_dict(),
                       '%s_%s.pth' % (str(int(time.time())), str(epoch)))
    return loss_his, pp_his
示例#9
0
def train(**kwargs):
    for k, v in kwargs.items():
        setattr(opt, k, v)

    vis = Visualizer(env=opt.env)

    #获取数据
    data, word2ix, ix2word = get_data(opt)
    data = t.from_numpy(data)
    dataloader = t.utils.data.DataLoader(data,
                                         batch_size=opt.batch_size,
                                         shuffle=True,
                                         num_workers=1)

    #模型定义
    model = PoetryModel(len(word2ix), 128, 256)
    optimizer = t.optim.Adam(model.parameters(), lr=opt.lr)
    criterion = nn.CrossEntropyLoss()

    if opt.model_path:
        model.load_state_dict(t.load(opt.model_path))

    if opt.use_gpu:
        model.cuda()
        criterion.cuda()
    loss_meter = meter.AverageValueMeter()

    for epoch in range(opt.epoch):
        loss_meter.reset()
        for li, data_ in tqdm.tqdm(enumerate(dataloader)):
            #训练
            data_ = data_.long().transpose(1, 0).contiguous()
            if opt.use_gpu: data_ = data_.cuda()
            optimizer.zero_grad()
            ##输入和目标错开
            input_, target = Variable(data_[:-1, :]), Variable(data_[1:, :])
            output, _ = model(input_)
            loss = criterion(output, target.view(-1))
            loss.backward()
            optimizer.step()

            loss_meter.add(loss.data[0])

            # 可视化
            if (1 + ii) % opt.plot_every == 0:

                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                vis.plot('loss', loss_meter.value()[0])
                #诗歌原文
                poetrys = [[ix2word[_word] for _word in data_[:, -iii]]
                           for _iii in range(data_.size(1))][:16]
                vis.text('</br>'.join([''.join(poetry) for poetry in poetrys]),
                         win=u'origin_poem')

                gen_poetries = []
                #分别以这几个字作为诗歌的第一个字,生成8首诗
                for word in list(u'春江花月夜凉如水'):
                    gen_poetry = ''.join(
                        generate(model, word, ix2word, word2ix))
                    gen_poetries.append(gen_poetry)
                vis.text('</br>'.join(
                    [''.join(poetry) for poetry in gen_poetries]),
                         win=u'gen_poem')

        t.save(model.state_dict(), '%s_%s.pth' % (opt.model_prefix, epoch))
示例#10
0
def train(**kwargs):
    for k, v in kwargs.items():
        setattr(opt, k, v)

    opt.device = t.device('cuda' if t.cuda.is_available() else 'cpu')
    device = opt.device
    vis = Visualizer(env=opt.env)

    # 获取数据
    data_all = np.load(opt.pickle_path)
    data = data_all['data']
    word2ix = data_all['word2ix'].item()
    ix2word = data_all['ix2word'].item()
    data = t.from_numpy(data)
    dataloader = DataLoader(data,
                            batch_size=opt.batch_size,
                            shuffle=True,
                            num_workers=1)

    # 模型定义
    model = PoetryModel(len(word2ix), 128, 256)
    optimizer = t.optim.Adam(model.parameters(), lr=opt.lr)
    loss_func = nn.CrossEntropyLoss()
    if opt.model_path:
        model.load_state_dict(
            t.load(opt.model_path, map_location=t.device('cpu')))
    model.to(device)

    loss_avg = 0
    for epoch in range(opt.epoch):
        for ii, data_ in tqdm(enumerate(dataloader)):
            data_ = data_.long()
            data_ = data_.to(device)
            optimizer.zero_grad()
            input_, target = data_[:, :-1], data_[:, 1:]
            output, _ = model(input_)
            loss = loss_func(output, target.reshape(-1))
            loss.backward()
            optimizer.step()

            loss_avg += loss.item()

            # 可视化
            if (ii + 1) % opt.plot_every == 0:
                vis.plot('loss', loss_avg / opt.plot_every)
                loss_avg = 0
                poetrys = [[ix2word[_word] for _word in data_[i].tolist()]
                           for i in range(data_.shape[0])][:16]
                vis.text('</br>'.join([''.join(poetry) for poetry in poetrys]),
                         win='origin_poem')

                gen_poetries = []
                for word in list('春江花月夜凉如水'):
                    gen_poetry = ''.join(
                        generate(model, word, ix2word, word2ix))
                    gen_poetries.append(gen_poetry)
                vis.text('</br>'.join(
                    [''.join(poetry) for poetry in gen_poetries]),
                         win='gen_poem')

        t.save(model.state_dict(), '%s_%s.pth' % (opt.model_prefix, epoch))
示例#11
0
def train(**kwargs):

    for k,v in kwargs.items():
        setattr(opt,k,v)

    vis = Visualizer(env=opt.env)

    # 获取数据
    data,word2ix,ix2word = get_data(opt)
    data = t.from_numpy(data)#把数据类型转为tensor
    dataloader = t.utils.data.DataLoader(data,#初始化Dataloader类实例
                    batch_size=opt.batch_size,
                    shuffle=True,
                    num_workers=1)

    # 模型定义
    model = PoetryModel(len(word2ix), 128, 256)#(vocab_size, embedding_dim, hidden_dim)
    optimizer = t.optim.Adam(model.parameters(), lr=opt.lr)
    criterion = nn.CrossEntropyLoss()#损失函数定义为交叉熵
    
    if opt.model_path:
        model.load_state_dict(t.load(opt.model_path))

    if opt.use_gpu:
        model.cuda()
        criterion.cuda()
    loss_meter = meter.AverageValueMeter()

    for epoch in range(opt.epoch):
        loss_meter.reset()
        for ii,data_ in tqdm.tqdm(enumerate(dataloader)):    #tqdm进度条工具
            #取一个batch的数据
            # 训练

            #data_.size:(batch_size,maxlen)
            data_ = data_.long().transpose(1,0).contiguous()#转置后返回一个内存连续的有相同数据的tensor
            # if epoch==0 and ii ==0:
            #     print('size of data_ after transpose: \n',data_.size())
            if opt.use_gpu: data_ = data_.cuda()      
            optimizer.zero_grad()#梯度清零

            input_,target = Variable(data_[:-1,:]),Variable(data_[1:,:])#input_是所有句子的前maxlen-1个item的集合,
            #target是所有句子的后maxlen-1个item的集合
            #以"床前明月光"为例,输入是"床前明月",要预测"前明月光"
            output,_  = model(input_)
            #Tensor.view(-1)按照第0个维度逐个元素读取将张量展开成数组

            loss = criterion(output,target.view(-1))
            loss.backward()
            optimizer.step()
        
            loss_meter.add(loss.data[0])

            # 可视化
            if (1+ii)%opt.plot_every==0:

                if os.path.exists(opt.debug_file):#如果存在调试文件,
                    #则进入调试模式
                    ipdb.set_trace()

                vis.plot('loss',loss_meter.value()[0])
                
                # 诗歌原文
                poetrys=[ [ix2word[_word] for _word in data_[:,_iii]] #每一个句子(诗歌)的每一个item(id)要转换成文本
                                    for _iii in range(data_.size(1))][:16]#_iii的取值范围[,127]
                vis.text('</br>'.join([''.join(poetry) for poetry in poetrys]),win=u'origin_poem')
                #在visdom中输出这些句子(诗歌)中的前16个
                gen_poetries = []
                # 分别以这几个字作为诗歌的第一个字,生成8首诗
                for word in list(u'春江花月夜凉如水'):
                    gen_poetry =  ''.join(generate(model,word,ix2word,word2ix))
                    gen_poetries.append(gen_poetry)
                vis.text('</br>'.join([''.join(poetry) for poetry in gen_poetries]),win=u'gen_poem')  
        
        t.save(model.state_dict(),'%s_%s.pth' %(opt.model_prefix,epoch))
示例#12
0
def train(**kwargs):
    for k, v in kwargs.items():
        setattr(opt, k, v)
    
    vis = Visualizer(env=opt.env)
    
    # 获取数据
    data, word2ix, ix2word = get_data(opt)
    data = t.from_numpy(data)
    dataloader = t.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=True, num_workers=2)
    
    # 定义model
    model = PoetryModel(len(word2ix), opt.embedding_dim, opt.hidden_dim)
    # 优化器
    optimizer = t.optim.Adam(model.parameters(), lr=opt.lr)
    # Loss Function
    criterion = nn.CrossEntropyLoss()
    
    
    
    # 使用预训练的模型,为了可持续训练
    if opt.model_path and os.path.exists(opt.model_path):
        model.load_state_dict(t.load(opt.model_path))
    
    # GPU related

    if opt.use_gpu:
        model = model.to(device)
        criterion = criterion.to(device)
    
    # loss 计量器
    loss_meter = meter.AverageValueMeter()
    
    # for loop
    for epoch in range(opt.epoch):
        loss_meter.reset()
        
        # for : batching dataset
        for i, data_ in tqdm.tqdm(enumerate(dataloader)):
            
            # 训练
            # data_ 
            # size: [128, 125]  每次取128行,每行一首诗,长度为125
            # type: Tensor
            # dtype: torch.int32 应该转成long
            
            # 这行代码信息量很大:
            # 第一步:int32 to long
            # 第二步:将行列互换,为了并行计算的需要
            # 第三步:将数据放置在连续内存里,避免后续有些操作报错
            data_ = data_.long().transpose(0, 1).contiguous()
            
            # GPU related
            if opt.use_gpu:
                data_ = data_.to(device)
            
            # 到这里 data_.dtype又变成了torch.int64
            # print(data_.dtype)
            
            # 清空梯度
            optimizer.zero_grad()
            
            # 错位训练,很容易理解
            # 把前n-1行作为input,把后n-1行作为target  :  model的输入
            # 这么做还是为了并行计算的需要
            # input_ 加下划线是为了和built_in function input区分开
            input_, target = data_[:-1, :], data_[1:, :]
            
            # model的返回值 output和hidden
            # 这里hidden没什么用
            output, _ = model(input_)
            
            # 计算loss
            target = target.view(-1)
            
            # 新的target.size() [15872]  124 * 128 = 15872
            # output.size()  [15872, 8293] 8293 是词汇量的大小
            
            loss = criterion(output, target)
            
            # 反向传播
            loss.backward()
            
            # optimizer梯度下降更新参数
            optimizer.step()
            
            loss_meter.add(loss.data[0])

            # 可视化
            if (1 + i) % opt.plot_every == 0:

                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                vis.plot('loss', loss_meter.value()[0])

                # 诗歌原文
                poetrys = [[ix2word[_word.item()] for _word in data_[:, _iii]]
                           for _iii in range(data_.size(1))][:16]
                vis.text('</br>'.join([''.join(poetry) for poetry in poetrys]), win=u'origin_poem')

                gen_poetries = []
                # 分别以这几个字作为诗歌的第一个字,生成8首诗
                for word in list(u'春江花月夜凉如水'):
                    gen_poetry = ''.join(generate(model, word, ix2word, word2ix))
                    gen_poetries.append(gen_poetry)
                vis.text('</br>'.join([''.join(poetry) for poetry in gen_poetries]), win=u'gen_poem')
        # 迭代一次epoch,保存一下模型
        t.save(model.state_dict(), '%s_%s.pth' % (opt.model_prefix, epoch))
示例#13
0
def train_torch_lstm(conf, args=None):
    pdata = PoemData()
    pdata.read_data(conf)
    pdata.get_vocab()
    if conf.use_gpu:
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')
    model = PoetryModel(pdata.vocab_size, conf, device)

    train_data = pdata.train_data
    test_data = pdata.test_data

    train_data = torch.from_numpy(np.array(train_data['pad_words']))
    dev_data = torch.from_numpy(np.array(test_data['pad_words']))

    dataloader = DataLoader(train_data,
                            batch_size=conf.batch_size,
                            shuffle=True,
                            num_workers=conf.num_workers)
    devloader = DataLoader(dev_data,
                           batch_size=conf.batch_size,
                           shuffle=True,
                           num_workers=conf.num_workers)

    optimizer = Adam(model.parameters(), lr=conf.learning_rate)
    criterion = nn.CrossEntropyLoss()
    loss_meter = meter.AverageValueMeter()

    if conf.load_best_model:
        model.load_state_dict(torch.load(conf.beat_model_path))
    if conf.use_gpu:
        model.cuda()
        criterion.cuda()
    step = 0
    bestppl = 1e9
    early_stop_controller = 0
    for epoch in range(conf.n_epochs):
        losses = []
        loss_meter.reset()
        model.train()
        for i, data in enumerate(dataloader):
            data = data.long().transpose(1, 0).contiguous()
            if conf.use_gpu:
                data = data.cuda()
            input, target = data[:-1, :], data[1:, :]
            optimizer.zero_grad()
            output, _ = model(input)
            loss = criterion(output, target.contiguous().view(-1))
            loss.backward()
            optimizer.step()
            losses.append(loss.item())
            loss_meter.add(loss.item())
            step += 1
            if step % 100 == 0:
                print("epoch_%d_step_%d_loss:%0.4f" %
                      (epoch + 1, step, loss.item()))
        train_loss = float(loss_meter.value()[0])

        model.eval()
        for i, data in enumerate(devloader):
            data = data.long().transpose(1, 0).contiguous()
            if conf.use_gpu:
                data = data.cuda()
            input, target = data[:-1, :], data[1:, :]
            output, _ = model(input)
            loss = criterion(output, target.view(-1))
            loss_meter.add(loss.item())
        ppl = math.exp(loss_meter.value()[0])
        print("epoch_%d_loss:%0.4f , ppl:%0.4f" % (epoch + 1, train_loss, ppl))

        if epoch % conf.save_every == 0:
            torch.save(model.state_dict(),
                       "{0}_{1}".format(conf.model_prefix, epoch))

            fout = open("{0}out_{1}".format(conf.out_path, epoch),
                        'w',
                        encoding='utf-8')
            for word in list('日红山夜湖海月'):
                gen_poetry = generate_poet(model, word, pdata.vocab, conf)
                fout.write("".join(gen_poetry) + '\n\n')
            fout.close()
        if ppl < bestppl:
            bestppl = ppl
            early_stop_controller = 0
            torch.save(model.state_dict(),
                       "{0}_{1}".format(conf.best_model_path, "best_model"))
        else:
            early_stop_controller += 1
        if early_stop_controller > conf.patience:
            print("early stop.")
            break
示例#14
0
def train(Config):
    torch.multiprocessing.set_sharing_strategy('file_system')
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    datas = np.load("data/chinese-poetry-master/tang .npz", allow_pickle=True)

    data = datas["data"]
    ix2word = datas['ix2word'].item()
    word2ix = datas['word2ix'].item()
    data = torch.from_numpy(data)
    print(data.shape)
    # #去掉空格
    # t_data = data.view(-1)
    # flat_data = t_data.numpy()
    # no_space_data = []
    # for i in flat_data:
    #     if (i != 8292):
    #         no_space_data.append(i)
    # slice_size = 48
    # txt = [no_space_data[i:i+slice_size] for i in range(0,len(no_space_data),slice_size)]
    # txt = np.array(txt[:-1])#去掉最后一个不够48的数据
    # txt = torch.from_numpy(txt).long()
    # print(txt.shape)
    #datas = PoemDataSet(Config.data_path, 48)
    #data = datas.no_space_data#datas['data']
    #ix2word = datas.ix2word#datas['ix2word'].item()
    #word2ix = datas.word2ix#datas['word2ix'].item()

    dataLoader = DataLoader(data,
                            batch_size=Config.batch_size,
                            shuffle=Config.shuffle,
                            num_workers=Config.num_workers)

    model = PoetryModel(len(word2ix),
                        embedding_dim=Config.embedding_dim,
                        hidden_dim=Config.hidden_dim).to(device)
    optimizer = optim.Adam(model.parameters(), lr=Config.lr)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=10,
                                                gamma=0.1)  # 学习率调整
    crierion = nn.CrossEntropyLoss()
    loss_meter = meter.AverageValueMeter()
    top1 = meter.AverageValueMeter()
    #top1 = utils.AverageMeter()
    # if Config.model_path:
    #     model.load_state_dict(torch.load(Config.model_path))
    train_loss_list = []
    train_accuracy_list = []
    for epoch in range(Config.epoch):

        loss_meter.reset()
        top1.reset()
        for ii, data_ in enumerate(
                dataLoader):  #tqdm.tqdm(enumerate(dataLoader)):
            #inputs, labels =Variable(data_[0]), Variable(data_[1])#.to(device)
            data_ = data_.long().transpose(1, 0).contiguous()
            inputs, labels = Variable(data_[:-1, :]), Variable(data_[1:, :])
            print(inputs.size(1))
            optimizer.zero_grad()

            # 上边一句,将输入的诗句错开一个字,形成训练和目标
            output, _ = model(inputs)
            loss = crierion(output, labels.view(-1))
            loss.backward()
            optimizer.step()

            loss_meter.add(loss.item())
            _, pred = output.topk(1)
            prec1, prec2 = accuracy(output, labels, topk=(1, 2))
            n = inputs.size(0)
            top1.add(prec1.item())
            #data = data.long().transpose(1,0).contiguous()

            if (1 + ii) % Config.plot_every == 0:

                if os.path.exists(Config.debug_file):
                    ipdb.set_trace()

                # 下面是对目前模型情况的测试,诗歌原文、
                #print(inputs.size(1))
                #print(inputs.numpy()[:1].shape)
                # poetrys = [[ix2word[_word] for _word in inputs.numpy()[:, _iii]]
                #            for _iii in range(inputs.size(1))][0]
                # poetrys =["".join(poetry) for poetry in poetrys]
                # print("origen")
                #print(poetrys)
                # 上面句子嵌套了两个循环,主要是将诗歌索引的前十六个字变成原文

                gen_poetries = []
                start = u"春江花月夜凉如水"
                gen_poetry = "".join(
                    generate(model, start, ix2word, word2ix, Config))
                # for word in list(u"春江花月夜凉如水"):
                #     gen_poetry = "".join(generate(model, word, ix2word, word2ix,Config))
                #     gen_poetries.append(gen_poetry)
                # gen_poetries="</br>".join(["".join(poetry) for poetry in gen_poetries])
                print("genetate")
                print(gen_poetry)

            # if os.path.exists(Config.tensorboard_path) == False:
            #     os.mkdir(Config.tensorboard_path)
            # writer = SummaryWriter(Config.tensorboard_path)
            # writer.add_scalar('Train/Loss', loss.item(), epoch)
            # writer.add_scalar('Train/Accuracy', 100*prec1.item()/output.size(0), epoch)
            #
            # writer.flush()
        train_loss_list.append(loss.item())
        train_accuracy_list.append(100 * prec1.item() / output.size(0))
        print('train %d epoch loss: %.3f acc: %.3f ' %
              (epoch + 1, loss_meter.mean, 100 * top1.mean / output.size(0)))
        scheduler.step()
    x1 = range(0, configs.epoch)
    y1 = train_loss_list

    y3 = train_accuracy_list

    plt.subplot(2, 1, 1)
    plt.plot(x1, y1, 'o-')

    plt.legend(["train_loss"])
    plt.title('Loss vs. epoches')
    plt.ylabel('Loss')
    plt.subplot(2, 1, 2)
    plt.plot(x1, y3, '.-')
    plt.legend("train_accuracy")
    plt.xlabel('Accuracy vs. epoches')
    plt.ylabel('Accuracy')
    plt.show()

    plt.savefig("pw_LSTM" + "_accuracy_loss.jpg")
    torch.save(model.state_dict(), "%s_%s.pth" % (Config.model_prefix, epoch))
示例#15
0
def train(**kwargs):
    for k, v in kwargs.items():
        setattr(opt, k, v)

    opt.device = t.device('cuda:0') if opt.use_gpu else t.device('cpu')
    device = opt.device
    #vis = Visualizer(env=opt.env)

    # 获取数据
    data, word2ix, ix2word = get_data(opt)
    data = t.from_numpy(data)  #[57580,125]
    dataloader = t.utils.data.DataLoader(data,
                                         batch_size=opt.batch_size,
                                         shuffle=True,
                                         num_workers=1)

    # 模型定义
    model = PoetryModel(len(word2ix), 128, 256)
    optimizer = t.optim.Adam(model.parameters(), lr=opt.lr)
    criterion = nn.CrossEntropyLoss()

    if opt.model_path:
        model.load_state_dict(t.load(opt.model_path))
    model.to(device)

    # AverageMeter类用来管理一些变量的更新
    loss_meter = meter.AverageValueMeter()
    for epoch in range(opt.epoch):
        loss_meter.reset()  # 在每一个epoch 都要进行reset一遍

        for ii, data_ in tqdm.tqdm(enumerate(dataloader)):

            # 训练
            #contiguous:view只能用在contiguous的variable上。如果在view之前用了transpose, permute等,需要用contiguous()来返回一个contiguous copy。
            #一种可能的解释是:
            #有些tensor并不是占用一整块内存,而是由不同的数据块组成,而tensor的view()操作依赖于内存是整块的,这时只需要执行contiguous()这个函数,把tensor变成在内存中连续分布的形式。
            # 也就是说使用contiguous()是为了能够使用view()
            data_ = data_.long().transpose(
                1, 0).contiguous()  #data_ shape:[seq_len,batch_size]
            data_ = data_.to(device)
            optimizer.zero_grad()
            #input_ shape:[124,128] target shape:[124,128]
            input_, target = data_[:-1, :], data_[1:, :]
            output, _ = model(
                input_
            )  # ouput shape:[seq_len * batch_size,vocab_size] 此处seq_len为124
            loss = criterion(
                output, target.view(-1))  # target需要规整成[seq_len * batch_size]

            loss.backward()
            optimizer.step()

            # 更新loss_meter
            loss_meter.add(loss.item())
            '''
            
            # 可视化
            if (1 + ii) % opt.plot_every == 0:

                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                vis.plot('loss', loss_meter.value()[0])

                # 诗歌原文
                poetrys = [[ix2word[_word] for _word in data_[:, _iii].tolist()]
                           for _iii in range(data_.shape[1])][:16]
                vis.text('</br>'.join([''.join(poetry) for poetry in poetrys]), win=u'origin_poem')

                gen_poetries = []
                # 分别以这几个字作为诗歌的第一个字,生成8首诗
                for word in list(u'春江花月夜凉如水'):
                    gen_poetry = ''.join(generate(model, word, ix2word, word2ix))
                    gen_poetries.append(gen_poetry)
                vis.text('</br>'.join([''.join(poetry) for poetry in gen_poetries]), win=u'gen_poem')
            '''
        # 每一个epoch都打印下loss的值
        print('epoch:%d, loss:%.3f' % (epoch, loss_meter.value()[0]))

        #需要改进的地方是 得到模型在验证集的结果 根据结果只保存最好的模型
        t.save(model.state_dict(), '%s_%s.pth' % (opt.model_prefix, epoch))

    return ix2word, word2ix
示例#16
0
for i in range(len(data)):
    data[i] = toList(data[i])
    data[i].append("<EOP>")
# save the word dic for sample method
p.dump(word_to_ix, file('wordDic', 'w'))

# save all avaible word
# wordList = open('wordList','w')
# for w in word_to_ix:
#     wordList.write(w.encode('utf-8'))
# wordList.close()

model = PoetryModel(len(word_to_ix), 256, 256)
model.cuda(
)  # running on GPU,if you want to run it on CPU,delete all .cuda() usage.
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0001)
criterion = nn.NLLLoss()

one_hot_var_target = {}
for w in word_to_ix:
    one_hot_var_target.setdefault(w, make_one_hot_vec_target(w, word_to_ix))

epochNum = 100
TRAINSIZE = len(data)
batch = 200


def test():
    model.eval()
    v = int(TRAINSIZE / batch)
    loss = 0