示例#1
0
def train_generator_MLE(gen, gen_opt, oracle, real_data_samples, args):
    """
    Max Likelihood Pretraining for the generator
    """
    num_data = len(real_data_samples)
    total_loss = 0
    for i in range(0, num_data, args.g_bsz):
        inp, target = helpers.prepare_generator_batch(
            real_data_samples[i:i + args.g_bsz],
            start_letter=args.start_letter,
            gpu=args.cuda)
        gen_opt.zero_grad()
        loss = gen.batchNLLLoss(inp, target)
        loss.backward()
        gen_opt.step()

        total_loss += loss.data[0]

        if (i / args.g_bsz) % ceil(ceil(num_data / float(args.g_bsz)) /
                                   10.) == 0:  # roughly every 10% of an epoch
            print('.', end='')
            sys.stdout.flush()

    # each loss in a batch is loss per sample
    total_loss = total_loss / ceil(
        num_data / float(args.g_bsz)) / args.max_seq_len

    # sample from generator and compute oracle NLL
    oracle_loss = helpers.batchwise_oracle_nll(gen, oracle, args.num_eval,
                                               args)
    return oracle_loss, total_loss
示例#2
0
文件: main.py 项目: weizaiff/seqGAN
def train_generator_MLE(gen, gen_opt, oracle, real_data_samples, epochs):#使用的是正常的数据预训练
    """
    Max Likelihood Pretraining for the generator
    """
    for epoch in range(epochs):
        print('epoch %d : ' % (epoch + 1), end='')
        sys.stdout.flush()
        total_loss = 0

        for i in range(0, POS_NEG_SAMPLES, BATCH_SIZE):
            inp, target = helpers.prepare_generator_batch(real_data_samples[i:i + BATCH_SIZE], start_letter=START_LETTER,
                                                          gpu=CUDA)
            gen_opt.zero_grad()
            loss = gen.batchNLLLoss(inp, target)
            loss.backward()
            gen_opt.step()

            total_loss += loss.data.item()

            if (i / BATCH_SIZE) % ceil(
                            ceil(POS_NEG_SAMPLES / float(BATCH_SIZE)) / 10.) == 0:  # roughly every 10% of an epoch
                print('.', end='')
                sys.stdout.flush()

        # each loss in a batch is loss per sample
        total_loss = total_loss / ceil(POS_NEG_SAMPLES / float(BATCH_SIZE)) / MAX_SEQ_LEN

        # sample from generator and compute oracle NLL
        oracle_loss = helpers.batchwise_oracle_nll(gen, oracle, POS_NEG_SAMPLES, BATCH_SIZE, MAX_SEQ_LEN,
                                                   start_letter=START_LETTER, gpu=CUDA)

        print(' average_train_NLL = %.4f, oracle_sample_NLL = %.4f' % (total_loss, oracle_loss))
示例#3
0
def train_generator_MLE(gen, gen_opt, real_data_samples, epochs):
    """
    Max Likelihood Pretraining for the generator
    """
    for epoch in range(epochs):
        print('epoch %d : ' % (epoch + 1), end='')
        sys.stdout.flush()
        total_loss = 0

        for i in range(0, POS_NEG_SAMPLES, BATCH_SIZE):
            inp, target = helpers.prepare_generator_batch(real_data_samples[i:i + BATCH_SIZE], start_letter=START_LETTER,
                                                          gpu=CUDA)
            gen_opt.zero_grad()
            loss = gen.batchNLLLoss(inp, target)
            loss.backward()
            gen_opt.step()

            total_loss += loss.data.item()

            if (i / BATCH_SIZE) % ceil(
                            ceil(POS_NEG_SAMPLES / float(BATCH_SIZE)) / 10.) == 0:  # roughly every 10% of an epoch
                print('.', end='')
                sys.stdout.flush()
        # Generate LSTM samples
        path='output/MSE-{}.samples'.format(epoch)
        generateSamples(gen, path)

        # each loss in a batch is loss per sample
        total_loss = total_loss / ceil(POS_NEG_SAMPLES / float(BATCH_SIZE)) / MAX_SEQ_LEN


        print(' average_train_NLL = %.4f' % (total_loss))
示例#4
0
def train_generator_MLE(gen,gen_opt,oracle,real_data_samples,epochs):
    """
    极大似然预训练生成器
    """
    for epoch in range(epochs):
        print("epoch %d:"%(epoch+1),end=" ")
        sys.stdout.flush()
        total_loss = 0

        for i in range(0,POS_NEG_SAMPLES,BATCH_SIZE):
            inp,target = helpers.prepare_generator_batch(real_data_samples[i:i+BATCH_SIZE],start_letter=START_LETTER,gpu=CUDA)
            #inp表示生成器输入的内容,target是正常的文本内容,通过target来计算inp的loss优化generator
            gen_opt.zero_grad()
            loss = gen.batchNLLLoss(inp,target)
            loss.backward()
            gen_opt.step()

            total_loss += loss.data.item()
            #ceil(POS_NEG_SAMPLES / float(BATCH_SIZE))向上取整得到一共是m个batch_size,将m个batch_size分成10份,通过i来看是不是某百分之十的一部分
            if(i/BATCH_SIZE)%ceil(ceil(POS_NEG_SAMPLES / float(BATCH_SIZE)) / 10.) == 0:  # roughly every 10% of an epoch
                print('.', end='')
                sys.stdout.flush()

        #each loss in a batch is loss per sample,因为批次是按照seq_len来分别计算每个word的损失的,所以最后除上MAX_SEQ_LEN即为每个样本的loss
        total_loss = total_loss/ ceil(POS_NEG_SAMPLES/float(BATCH_SIZE))/ MAX_SEQ_LEN

        #sample from generator and compute oracle NLL,通过生成器自己生成的样本计算损失(计算生成器自动生成样本的能力)
        oracle_loss = helpers.batchwise_oracle_nll(gen,oracle,POS_NEG_SAMPLES,BATCH_SIZE,MAX_SEQ_LEN,start_letter=START_LETTER,gpu=CUDA)

        print("average_train_NLL=%.4f,oracle_sample_NLL=%.4f"%(total_loss,oracle_loss))
示例#5
0
文件: main.py 项目: skyerhxx/SeqGAN
def train_generator_PG(gen, gen_opt, oracle, dis, num_batches):
    """
    The generator is trained using policy gradients, using the reward from the discriminator.
    Training is done for num_batches batches.
    """

    for batch in range(num_batches):
        s = gen.sample(BATCH_SIZE * 2)  # 64 works best
        inp, target = helpers.prepare_generator_batch(
            s, start_letter=START_LETTER, gpu=CUDA)
        rewards = dis.batchClassify(target)

        gen_opt.zero_grad()
        pg_loss = gen.batchPGLoss(inp, target, rewards)
        pg_loss.backward()
        gen_opt.step()

    # sample from generator and compute oracle NLL
    oracle_loss = helpers.batchwise_oracle_nll(gen,
                                               oracle,
                                               POS_NEG_SAMPLES,
                                               BATCH_SIZE,
                                               MAX_SEQ_LEN,
                                               start_letter=START_LETTER,
                                               gpu=CUDA)

    print(' oracle_sample_NLL = %.4f' % oracle_loss)
示例#6
0
def train_generator_PG(gen, gen_opt, dis, train_iter, num_batches):
    """
    The generator is trained using policy gradients, using the reward from the discriminator.
    Training is done for num_batches batches.
    """
    global pg_count
    global best_advbleu
    pg_count += 1
    num_sentences = 0
    total_loss = 0
    rollout = Rollout(gen, update_learning_rate)
    for i, data in enumerate(train_iter):
        if i == num_batches:
            break
        src_data_wrap = data.source
        ans = data.answer[0]
        # tgt_data = data.target[0].permute(1, 0)
        passage = src_data_wrap[0].permute(1, 0)

        if CUDA:
            scr_data = data.source[0].to(device)  # lengths x batch_size
            scr_lengths = data.source[1].to(device)
            ans = ans.to(device)
            ans_p = ans.permute(1, 0)
            src_data_wrap = (scr_data, scr_lengths, ans)
            passage = passage.to(device)
            passage = (passage, ans_p)

        num_sentences += scr_data.size(1)
        with torch.no_grad():
            samples, _ = gen.sample(src_data_wrap)        # 64 batch_size works best
            rewards = rollout.get_reward(samples, passage, src_data_wrap, rollout_size, dis, src_rev, rev, train_ref, tgt_pad)

        inp, target = helpers.prepare_generator_batch(samples, gpu=CUDA)

        gen_opt.zero_grad()
        pg_loss = gen.batchPGLoss(src_data_wrap, inp, target, rewards)
        pg_loss.backward()
        gen_opt.step()
        total_loss += pg_loss
        rollout.update_params() # TODO: DON'T KNOW WHY

    gen.eval()
    # print("Set gen to {0} mode".format('train' if model.decoder.dropout.training else 'eval'))
    valid_bleu = evaluation.evalModel(gen, val_iter, pg_count, rev, src_special, tgt_special, tgt_ref, src_rev)
    print('Validation bleu-4 = %g' % (valid_bleu * 100))
    if valid_bleu > best_advbleu:
        best_advbleu = valid_bleu
        torch.save(gen.state_dict(), 'advparams.pkl')
        print('save model')
    # train_bleu = evaluation.evalModel(gen, train_iter)
    # print('training bleu = %g' % (train_bleu * 100))
    gen.train()

    print("\npg_loss on %d bactches : %.4f" %(i+1, total_loss/num_batches))
示例#7
0
def train_generator_PG(gen,gen_opt,oracle,dis,num_batches):
    #适用策略梯度训练生成器,使用来自鉴别器的奖励
    for batch in range(num_batches):
        s = gen.sample(BATCH_SIZE*2)  #长度为64的sample
        inp,target = helpers.prepare_generator_batch(s,start_letter=START_LETTER,gpu=CUDA)
        reward = dis.batchClassify(target)  #概率作为奖励值
        gen_opt.zero_grad()
        pg_loss = gen.batchPGLoss(inp,target,reward)
        pg_loss.backward()
        gen_opt.step()

    oracle_loss = helpers.batchwise_oracle_nll(gen,oracle,POS_NEG_SAMPLES,BATCH_SIZE,MAX_SEQ_LEN,start_letter=START_LETTER,gpu=CUDA)
    print('oracle_sample_NLL=%.4f'%oracle_loss)
示例#8
0
def train_generator_PG(gen, gen_opt, dis, num_batches):
    """
    The generator is trained using policy gradients, using the reward from the discriminator.
    Training is done for num_batches batches.
    """

    for batch in range(num_batches):
        s = gen.sample(BATCH_SIZE * 2)  # 64 works best
        inp, target = helpers.prepare_generator_batch(
            s, start_letter=START_LETTER, gpu=CUDA)
        rewards = dis.batchClassify(target)

        gen_opt.zero_grad()
        pg_loss = gen.batchPGLoss(inp, target, rewards)
        pg_loss.backward()
        gen_opt.step()
示例#9
0
def train_generator_PG(gen, gen_opt, dis, batch_size, episodes, num_batches, Sample_Size=20):
    """
    The generator is trained using policy gradients, using the reward from the discriminator.
    Training is done for num_batches batches.
    """

    for batch in range(num_batches):
        random_sample_index = np.random.choice(len(episodes), Sample_Size, replace=False)
        random_episodes = episodes[random_sample_index]
        s, condition = gen.sample(random_episodes, idx_BOC)        # 64 works best
        inp, target = helpers.prepare_generator_batch(s, gpu=CUDA)
        rewards = dis.batchClassify(target, condition)

        gen_opt.zero_grad()
        pg_loss = gen.batchPGLoss(inp, target, rewards, condition)
        pg_loss.backward()
        gen_opt.step()
        print("PG Loss = %f"%pg_loss.data[0])
示例#10
0
def train_generator_PG(gen, gen_opt, validation_data_samples, dis, num_batches,_id=0):
    """
    The generator is trained using policy gradients, using the reward from the discriminator.
    Training is done for num_batches batches.
    """

    for batch in range(num_batches):
        s = gen.sample(BATCH_SIZE*2)        # 64 works best
        inp, target = helpers.prepare_generator_batch(s, start_letter=START_LETTER, gpu=CUDA)
        rewards = dis.batchClassify(target)

        gen_opt.zero_grad()
        pg_loss = gen.batchPGLoss(inp, target, rewards)
        pg_loss.backward()
        gen_opt.step()

    # Generate LSTM samples
    path='output/ADV-{}.samples'.format(_id)
    generateSamples(gen, path)
示例#11
0
def calculatePPL(gen,testpath):
    testset=loadData(testpath)
    testset_tensor=torch.tensor(testset)

    length=[]
    with open(testpath,'r') as fin:
        for line in fin:
            length.append(getLength(line))
    length=np.array(length)
    
    nll_all=[]
    TEST_SIZE=testset_tensor.shape[0]
    for i in tqdm(range(0, TEST_SIZE)):
        inp, target = helpers.prepare_generator_batch(testset_tensor[i:i + 1], start_letter=START_LETTER,
                                                              gpu=CUDA)
        nll = gen.batchNLLLoss(inp, target)
        nll_all.append(float(nll.data.cpu()))
    nll_all=np.array(nll_all)

    return np.mean(2**(nll_all/length))
示例#12
0
def train_generator_PG(gen, gen_opt, dis, oracle, args):
    """
    The generator is trained using policy gradients, using the reward from the discriminator.
    Training is done for num_batches batches.
    """
    sample_buf = torch.zeros(args.g_bsz * args.max_seq_len, args.vocab_size)
    if args.cuda:
        sample_buf = sample_buf.cuda()
    for batch in range(args.g_steps):
        s = gen.sample(args.g_bsz)
        inp, target = helpers.prepare_generator_batch(
            s, start_letter=args.start_letter, gpu=args.cuda)
        # get reward from oracle
        #s_oh = helpers.get_oh(s, sample_buf)
        #rewards = dis.batchClassify(Variable(s_oh))
        rewards = oracle.batchLL(inp, target)

        gen_opt.zero_grad()
        pg_loss = gen.batchPGLoss(inp, target, rewards)
        pg_loss.backward()
        gen_opt.step()
示例#13
0
def train_generator_MLE(gen, gen_opt, oracle, real_data_samples, epochs):
    """
    Max Likelihood Pretraining for the generator
    """
    for epoch in range(epochs):
        print('epoch %d : ' % (epoch + 1), end='')
        sys.stdout.flush()
        total_loss = 0

        for i in range(0, POS_NEG_SAMPLES, BATCH_SIZE):
            inp, target = helpers.prepare_generator_batch(real_data_samples[i:i + BATCH_SIZE], start_letter=START_LETTER, # real_data_samples (10000, 20)
                                                          gpu=CUDA)
            # inp: (32, 20), [[   0,   87, 4410, 3560, 1699, 3485, 1407, 4982, 3391, 1144, 2960, 3784,
            #          2351, 3609,   92, 3391, 2187,  168, 4767, 4973],
            # target: (32, 20)
            #          tensor([[  87, 4410, 3560, 1699, 3485, 1407, 4982, 3391, 1144, 2960, 3784, 2351,
            #          3609,   92, 3391, 2187,  168, 4767, 4973,  619],
            gen_opt.zero_grad()
            loss = gen.batchNLLLoss(inp, target)
            loss.backward()
            gen_opt.step()

            total_loss += loss.data.item()

            if (i / BATCH_SIZE) % ceil(
                            ceil(POS_NEG_SAMPLES / float(BATCH_SIZE)) / 10.) == 0:  # roughly every 10% of an epoch
                print('.', end='')
                sys.stdout.flush()

        # each loss in a batch is loss per sample
        total_loss = total_loss / ceil(POS_NEG_SAMPLES / float(BATCH_SIZE)) / MAX_SEQ_LEN

        # sample from generator and compute oracle NLL
        oracle_loss = helpers.batchwise_oracle_nll(gen, oracle, POS_NEG_SAMPLES, BATCH_SIZE, MAX_SEQ_LEN,
                                                   start_letter=START_LETTER, gpu=CUDA)

        print(' average_train_NLL = %.4f, oracle_sample_NLL = %.4f' % (total_loss, oracle_loss))
示例#14
0
pretrained_dis_path = "seq30_b64_dim_200_v12028_mlep400, advp20_posnef_10700_dis.pth"
pretrained_oracle_path = "seq30_b64_dim_200_v12028_mlep400, advp20_posnef_10700_oracle.pth"

# load pre-train model
model_dic = torch.load(
    "seq30_b64_dim_200_v12028_mlep100_advp_20_posneg_10700.pth")
oracle.load_state_dict(model_dic['oracle'])

oracle_opt = optim.Adam(oracle.parameters(), lr=1e-2)
out, hid = oracle.forward(inp, hidden_mat)
total_loss = 0

for i in range(0, BATCH_SIZE * 1000, BATCH_SIZE):  # 300, 800, 100000
    batch = getbatch(real_data, i, i + BATCH_SIZE)
    inp, target = helpers.prepare_generator_batch(batch,
                                                  start_letter=START_LETTER,
                                                  gpu=CUDA)
    oracle_opt.zero_grad()
    loss = oracle.batchNLLLoss(inp, target)
    loss.backward()
    oracle_opt.step()

    total_loss += loss.data.item()

    # each loss in a batch is loss per sample
    total_loss = total_loss / ceil(
        POS_NEG_SAMPLES / float(BATCH_SIZE)) / MAX_SEQ_LEN
    if i % (BATCH_SIZE * 10) == 0:
        print("loss:", total_loss)

print("save oracle")
示例#15
0
def train_generator_MLE(gen, gen_opt, train_iter, epochs):
    """
    Max Likelihood Pretraining for the generator
    """
    best_bleu = 0
    for epoch in range(epochs):
        print('epoch %d : ' % (epoch + 1))
        total_loss = 0
        num_words = 0
        report_loss = 0
        report_num = 0
        for i, data in enumerate(train_iter):
            tgt_data = data.target[0]
            src_data_wrap = data.source
            ans = data.answer[0]

            if CUDA:
                scr_data = data.source[0].to(device)
                scr_lengths = data.source[1].to(device)
                ans = ans.to(device)
                src_data_wrap = (scr_data, scr_lengths, ans)

            tgt_lengths = data.target[1]
            tgt_lengths = torch.LongTensor(tgt_lengths)
            num_words += tgt_lengths.sum().item()

            tgt_data=tgt_data.permute(1,0)   # --> batch x length
            inp, target = helpers.prepare_generator_batch(tgt_data, gpu=CUDA)
            gen_opt.zero_grad()
            loss = gen.batchNLLLoss(src_data_wrap, inp, target)   # inp means decoder inp, target means decoder target.
            loss.div(tgt_data.size(1)).backward()
            # loss.backward()
            gen_opt.step()

            report_loss += loss.item()
            report_num += tgt_data.size(1)
            total_loss += loss.item()

            # if i % 20 == -1 % 20:
            #     print(("inter loss = %.4f") % (report_loss / report_num))
            #     report_loss = 0
            #     report_num = 0

        loss_perword = total_loss / num_words
        train_ppl = math.exp(min(loss_perword, 100))
        print('loss  = %.4f' % (total_loss / len(train_iter.dataset)))
        print('ppl  = %.4f' % train_ppl)

        # evaluate blue scores
        # valid data
        # if epoch%5 == -1%5:
        gen.eval()
        # print("Set gen to {0} mode".format('train' if model.decoder.dropout.training else 'eval'))
        valid_bleu = evaluation.evalModel(gen, val_iter, epoch, rev, src_special, tgt_special, tgt_ref, src_rev)
        print('Validation bleu-4 = %g' % (valid_bleu * 100))
        if valid_bleu > best_bleu:
            best_bleu = valid_bleu
            torch.save(gen.state_dict(), 'params.pkl')
            print('save '+str(epoch + 1)+' epoch model')

        gen_opt.updateLearningRate(valid_bleu)
        #train_bleu = evaluation.evalModel(gen, train_iter)
        #print('training bleu = %g' % (train_bleu * 100))
        gen.train()
示例#16
0
def train_generator_MLE(gen, gen_opt, episodes, valid_episodes, batch_size, epochs):
    """
    Max Likelihood Pretraining for the generator
    """
    #print("batch_size = %d"%batch_size)
    #print("epochs = %d"%epochs)
    for epoch in range(epochs):
        print('epoch %d : ' % (epoch + 1))
        sys.stdout.flush()
        total_loss = 0
        total_size = 0
        
        print("train:")
        print("len(episodes) = %d"%len(episodes))
        for batch_idx in range(0, len(episodes), batch_size):
            #print("batch_idx = %d"%batch_idx)
        #for i in range(0, POS_NEG_SAMPLES, BATCH_SIZE):
            #print("1")
            personas_your = get_persona_batch(episodes[batch_idx:batch_idx+batch_size], 1)
            #print("2")
            personas_partner = get_persona_batch(episodes[batch_idx:batch_idx+batch_size], 0)
            #print("3")
            turn_batch_list = get_dialog_batches(episodes[batch_idx:batch_idx+batch_size])
            #print("4")

            inp, target = helpers.prepare_generator_batch(turn_batch_list, gpu=CUDA)
            #print("5")
            gen_opt.zero_grad()
            #print("6")
            loss = gen.batchNLLLoss(inp, target, personas_your, personas_partner)
            #print("7")
            loss.backward()
            gen_opt.step()

            print("epoch: %d, batch_idx: %d, loss per sample = %f"%(epoch+1, batch_idx, loss.data[0]/turn_batch_list.size(0)/turn_batch_list.size(1)))

            #total_loss += loss.data[0]
            #total_size += turn_batch_list.size(0)

            #if (i / BATCH_SIZE) % ceil(
                            #ceil(POS_NEG_SAMPLES / float(BATCH_SIZE)) / 10.) == 0:  # roughly every 10% of an epoch
                #print('.', end='')
                #sys.stdout.flush()
            #if idx % 10 == 0:
                #print('.', end='')
                #sys.stdout.flush()

        if epoch % 5 == 0:
            print("valid:")
            for batch_idx in range(0, len(valid_episodes), batch_size):
            #for i in range(0, POS_NEG_SAMPLES, BATCH_SIZE):
                personas_your = get_persona_batch(valid_episodes[batch_idx:batch_idx+batch_size], 1)
                personas_partner = get_persona_batch(valid_episodes[batch_idx:batch_idx+batch_size], 0)
                turn_batch_list = get_dialog_batches(valid_episodes[batch_idx:batch_idx+batch_size])
                inp, target = helpers.prepare_generator_batch(turn_batch_list, gpu=CUDA)
                #gen_opt.zero_grad()
                loss = gen.batchNLLLoss(inp, target, personas_your, personas_partner)
                #loss.backward()
                #gen_opt.step()

                print("epoch: %d, batch_idx: %d, loss per sample = %f"%(epoch+1, batch_idx, loss.data[0]/turn_batch_list.size(0)/turn_batch_list.size(1)))
示例#17
0
                true_loss = loss_fn(true_out, torch.zeros([data.batch_size]).type(torch.cuda.FloatTensor))
                fake_out = dis.batchClassify(fake_tgt_data, (passage, ans)) # hidden = none over here
                fake_loss = loss_fn(fake_out, torch.ones([data.batch_size]).type(torch.cuda.FloatTensor))
                loss = true_loss + fake_loss
                loss.backward()
                dis_optimizer.step()
            else:
                gen.train()
                real_sample = tgt_data
                real_length = data.target[1]
                with torch.no_grad():
                    samples, _ = gen.sample(src_data_wrap)  # 64 batch_size works best
                    rewards = rollout.get_reward(samples, (passage, ans ), src_data_wrap, rollout_size, dis, src_rev, rev,
                                                 train_ref, tgt_pad)

                inp, target = helpers.prepare_generator_batch(samples, gpu=CUDA)

                gen_optimizer.zero_grad()
                pg_loss = gen.batchPGLoss(src_data_wrap, inp, target, rewards)
                pg_loss.backward()
                gen_optimizer.step()
                rollout.update_params()  # TODO: DON'T KNOW WHY

        gen.eval()
        # print("Set gen to {0} mode".format('train' if model.decoder.dropout.training else 'eval'))
        valid_bleu = evaluation.evalModel(gen, val_iter, pg_count, rev, src_special, tgt_special, tgt_ref, src_rev)
        print('Validation bleu-4 = %g' % (valid_bleu * 100))

        # print('\n--------\nEPOCH %d\n--------' % (epoch+1))
        # # TRAIN GENERATOR
        # print('\nAdversarial Training Generator : ', end='')