Exemplo n.º 1
0
def train_generator_PG(gen, gen_opt, oracle, dis, num_batches):
    """
    The generator is trained using policy gradients, using the reward from the discriminator.
    Training is done for num_batches batches.
    """

    for batch in range(num_batches):
        s = gen.sample(BATCH_SIZE * 2)  # 64 works best
        inp, target = helpers.prepare_generator_batch(
            s, start_letter=START_LETTER, gpu=CUDA)
        rewards = dis.batchClassify(target)

        gen_opt.zero_grad()
        pg_loss = gen.batchPGLoss(inp, target, rewards)
        pg_loss.backward()
        gen_opt.step()

    # sample from generator and compute oracle NLL
    oracle_loss = helpers.batchwise_oracle_nll(gen,
                                               oracle,
                                               POS_NEG_SAMPLES,
                                               BATCH_SIZE,
                                               MAX_SEQ_LEN,
                                               start_letter=START_LETTER,
                                               gpu=CUDA)

    print(' oracle_sample_NLL = %.4f' % oracle_loss)
Exemplo n.º 2
0
def train_generator_MLE(gen, gen_opt, oracle, real_data_samples, epochs):#使用的是正常的数据预训练
    """
    Max Likelihood Pretraining for the generator
    """
    for epoch in range(epochs):
        print('epoch %d : ' % (epoch + 1), end='')
        sys.stdout.flush()
        total_loss = 0

        for i in range(0, POS_NEG_SAMPLES, BATCH_SIZE):
            inp, target = helpers.prepare_generator_batch(real_data_samples[i:i + BATCH_SIZE], start_letter=START_LETTER,
                                                          gpu=CUDA)
            gen_opt.zero_grad()
            loss = gen.batchNLLLoss(inp, target)
            loss.backward()
            gen_opt.step()

            total_loss += loss.data.item()

            if (i / BATCH_SIZE) % ceil(
                            ceil(POS_NEG_SAMPLES / float(BATCH_SIZE)) / 10.) == 0:  # roughly every 10% of an epoch
                print('.', end='')
                sys.stdout.flush()

        # each loss in a batch is loss per sample
        total_loss = total_loss / ceil(POS_NEG_SAMPLES / float(BATCH_SIZE)) / MAX_SEQ_LEN

        # sample from generator and compute oracle NLL
        oracle_loss = helpers.batchwise_oracle_nll(gen, oracle, POS_NEG_SAMPLES, BATCH_SIZE, MAX_SEQ_LEN,
                                                   start_letter=START_LETTER, gpu=CUDA)

        print(' average_train_NLL = %.4f, oracle_sample_NLL = %.4f' % (total_loss, oracle_loss))
Exemplo n.º 3
0
def train_generator_MLE(gen,gen_opt,oracle,real_data_samples,epochs):
    """
    极大似然预训练生成器
    """
    for epoch in range(epochs):
        print("epoch %d:"%(epoch+1),end=" ")
        sys.stdout.flush()
        total_loss = 0

        for i in range(0,POS_NEG_SAMPLES,BATCH_SIZE):
            inp,target = helpers.prepare_generator_batch(real_data_samples[i:i+BATCH_SIZE],start_letter=START_LETTER,gpu=CUDA)
            #inp表示生成器输入的内容,target是正常的文本内容,通过target来计算inp的loss优化generator
            gen_opt.zero_grad()
            loss = gen.batchNLLLoss(inp,target)
            loss.backward()
            gen_opt.step()

            total_loss += loss.data.item()
            #ceil(POS_NEG_SAMPLES / float(BATCH_SIZE))向上取整得到一共是m个batch_size,将m个batch_size分成10份,通过i来看是不是某百分之十的一部分
            if(i/BATCH_SIZE)%ceil(ceil(POS_NEG_SAMPLES / float(BATCH_SIZE)) / 10.) == 0:  # roughly every 10% of an epoch
                print('.', end='')
                sys.stdout.flush()

        #each loss in a batch is loss per sample,因为批次是按照seq_len来分别计算每个word的损失的,所以最后除上MAX_SEQ_LEN即为每个样本的loss
        total_loss = total_loss/ ceil(POS_NEG_SAMPLES/float(BATCH_SIZE))/ MAX_SEQ_LEN

        #sample from generator and compute oracle NLL,通过生成器自己生成的样本计算损失(计算生成器自动生成样本的能力)
        oracle_loss = helpers.batchwise_oracle_nll(gen,oracle,POS_NEG_SAMPLES,BATCH_SIZE,MAX_SEQ_LEN,start_letter=START_LETTER,gpu=CUDA)

        print("average_train_NLL=%.4f,oracle_sample_NLL=%.4f"%(total_loss,oracle_loss))
Exemplo n.º 4
0
def train_generator_MLE(gen, gen_opt, oracle, real_data_samples, args):
    """
    Max Likelihood Pretraining for the generator
    """
    num_data = len(real_data_samples)
    total_loss = 0
    for i in range(0, num_data, args.g_bsz):
        inp, target = helpers.prepare_generator_batch(
            real_data_samples[i:i + args.g_bsz],
            start_letter=args.start_letter,
            gpu=args.cuda)
        gen_opt.zero_grad()
        loss = gen.batchNLLLoss(inp, target)
        loss.backward()
        gen_opt.step()

        total_loss += loss.data[0]

        if (i / args.g_bsz) % ceil(ceil(num_data / float(args.g_bsz)) /
                                   10.) == 0:  # roughly every 10% of an epoch
            print('.', end='')
            sys.stdout.flush()

    # each loss in a batch is loss per sample
    total_loss = total_loss / ceil(
        num_data / float(args.g_bsz)) / args.max_seq_len

    # sample from generator and compute oracle NLL
    oracle_loss = helpers.batchwise_oracle_nll(gen, oracle, args.num_eval,
                                               args)
    return oracle_loss, total_loss
Exemplo n.º 5
0
def train_generator_PG(gen,gen_opt,oracle,dis,num_batches):
    #适用策略梯度训练生成器,使用来自鉴别器的奖励
    for batch in range(num_batches):
        s = gen.sample(BATCH_SIZE*2)  #长度为64的sample
        inp,target = helpers.prepare_generator_batch(s,start_letter=START_LETTER,gpu=CUDA)
        reward = dis.batchClassify(target)  #概率作为奖励值
        gen_opt.zero_grad()
        pg_loss = gen.batchPGLoss(inp,target,reward)
        pg_loss.backward()
        gen_opt.step()

    oracle_loss = helpers.batchwise_oracle_nll(gen,oracle,POS_NEG_SAMPLES,BATCH_SIZE,MAX_SEQ_LEN,start_letter=START_LETTER,gpu=CUDA)
    print('oracle_sample_NLL=%.4f'%oracle_loss)
Exemplo n.º 6
0
def train_generator_MLE(gen, gen_opt, oracle, dataloader, epochs):
    """
    Max Likelihood Pretraining for the generator
    """
    for epoch in range(epochs):
        start_time = time.time()
        print('epoch %d/%d : ' % (epoch + 1, epochs), end='')
        sys.stdout.flush()
        total_loss = 0

        select_samples = dataloader.train

        for i in range(0, POS_NEG_SAMPLES, BATCH_SIZE):

            batch = select_samples[i:i + BATCH_SIZE]
            inp, target = split_data(batch, BATCH_SIZE)

            gen_opt.zero_grad()
            loss = gen.batchNLLLoss(inp, target)
            loss.backward()
            gen_opt.step()

            total_loss += loss.data.item()

            if (i / BATCH_SIZE) % ceil(
                    ceil(POS_NEG_SAMPLES / float(BATCH_SIZE)) /
                    10.) == 0:  # roughly every 10% of an epoch
                print('.', end='')
                sys.stdout.flush()

        # each loss in a batch is loss per sample
        total_loss = total_loss / ceil(
            POS_NEG_SAMPLES / float(BATCH_SIZE)) / MAX_SEQ_LEN

        # sample from generator and compute oracle NLL
        oracle_loss = helpers.batchwise_oracle_nll(gen,
                                                   oracle,
                                                   POS_NEG_SAMPLES,
                                                   BATCH_SIZE,
                                                   MAX_SEQ_LEN,
                                                   start_letter=START_LETTER,
                                                   gpu=CUDA)

        msg = ' average_train_NLL = %.4f, oracle_sample_NLL = %.4f, time = %.2f' % (
            total_loss, oracle_loss, time.time() - start_time)
        log.append(msg)
        print(msg)
Exemplo n.º 7
0
def train_generator_MLE(gen, gen_opt, oracle, real_data_samples, epochs):
    """
    Max Likelihood Pretraining for the generator
    """
    for epoch in range(epochs):
        print('epoch %d : ' % (epoch + 1), end='')
        sys.stdout.flush()
        total_loss = 0

        for i in range(0, POS_NEG_SAMPLES, BATCH_SIZE):
            inp, target = helpers.prepare_generator_batch(real_data_samples[i:i + BATCH_SIZE], start_letter=START_LETTER, # real_data_samples (10000, 20)
                                                          gpu=CUDA)
            # inp: (32, 20), [[   0,   87, 4410, 3560, 1699, 3485, 1407, 4982, 3391, 1144, 2960, 3784,
            #          2351, 3609,   92, 3391, 2187,  168, 4767, 4973],
            # target: (32, 20)
            #          tensor([[  87, 4410, 3560, 1699, 3485, 1407, 4982, 3391, 1144, 2960, 3784, 2351,
            #          3609,   92, 3391, 2187,  168, 4767, 4973,  619],
            gen_opt.zero_grad()
            loss = gen.batchNLLLoss(inp, target)
            loss.backward()
            gen_opt.step()

            total_loss += loss.data.item()

            if (i / BATCH_SIZE) % ceil(
                            ceil(POS_NEG_SAMPLES / float(BATCH_SIZE)) / 10.) == 0:  # roughly every 10% of an epoch
                print('.', end='')
                sys.stdout.flush()

        # each loss in a batch is loss per sample
        total_loss = total_loss / ceil(POS_NEG_SAMPLES / float(BATCH_SIZE)) / MAX_SEQ_LEN

        # sample from generator and compute oracle NLL
        oracle_loss = helpers.batchwise_oracle_nll(gen, oracle, POS_NEG_SAMPLES, BATCH_SIZE, MAX_SEQ_LEN,
                                                   start_letter=START_LETTER, gpu=CUDA)

        print(' average_train_NLL = %.4f, oracle_sample_NLL = %.4f' % (total_loss, oracle_loss))
Exemplo n.º 8
0
def train_generator_PG_rollout(gen, gen_opt, oracle, dis, num_batches):
    """
    New function with roll-out policy
    """

    for batch in range(num_batches):
        seq, h = gen.sample_rollout_init(BATCH_SIZE*2)
        for t in range(1,MAX_SEQ_LEN): 
            loss = 0
            for n in range(MC_SIZE):
                inp, target = gen.mc(seq, t, gpu=CUDA)
                rewards = dis.batchClassify(target)
                pg_loss = gen.batchPGLoss(inp, target, rewards)
                loss += pg_loss
            loss /= MC_SIZE
            gen_opt.zero_grad()
            loss.backward()
            gen_opt.step()
            seq, h = gen.sample_rollout(BATCH_SIZE*2, seq, h, t)

    # sample from generator and compute oracle NLL
    oracle_loss = helpers.batchwise_oracle_nll(gen, oracle, POS_NEG_SAMPLES, BATCH_SIZE, MAX_SEQ_LEN, start_letter=START_LETTER, gpu=CUDA)

    print(' oracle_sample_NLL = %.4f' % oracle_loss)    
Exemplo n.º 9
0
    # gen.load_state_dict(torch.load(pretrained_gen_path))

    # PRETRAIN DISCRIMINATOR
    print('\nStarting Discriminator Training...')
    dis_optimizer = optim.Adagrad(dis.parameters())
    train_discriminator(dis, dis_optimizer, oracle_samples, gen, oracle, 50, 3)

    torch.save(dis.state_dict(), pretrained_dis_path)
    # dis.load_state_dict(torch.load(pretrained_dis_path))

    # ADVERSARIAL TRAINING
    print('\nStarting Adversarial Training...')
    oracle_loss = helpers.batchwise_oracle_nll(gen,
                                               oracle,
                                               POS_NEG_SAMPLES,
                                               BATCH_SIZE,
                                               MAX_SEQ_LEN,
                                               start_letter=START_LETTER,
                                               gpu=CUDA)
    print('\nInitial Oracle Sample Loss : %.4f' % oracle_loss)

    for epoch in range(ADV_TRAIN_EPOCHS):
        print('\n--------\nEPOCH %d\n--------' % (epoch + 1))
        # TRAIN GENERATOR
        print('\nAdversarial Training Generator : ', end='')
        sys.stdout.flush()
        train_generator_PG(gen, gen_optimizer, oracle, dis, 1)

        # TRAIN DISCRIMINATOR
        print('\nAdversarial Training Discriminator : ')
        train_discriminator(dis, dis_optimizer, oracle_samples, gen, oracle, 5,
Exemplo n.º 10
0
# PRETRAIN DISCRIMINATOR
dis_optimizer = optim.Adagrad(dis.parameters())
if args.pre_d_load is not None:
    print("Load pretrained D")
    dis.load_state_dict(torch.load(args.pre_d_load))
else:
    print('\nStarting Discriminator Training...')
    train_discriminator(dis, dis_optimizer, oracle_samples, gen, oracle,
                        args.d_pre_steps, args.d_pre_epochs, args)
if args.pre_d_save is not None:
    torch.save(dis.state_dict(), args.pre_d_save)

# ADVERSARIAL TRAINING
print('\nStarting Adversarial Training...')
oracle_loss = helpers.batchwise_oracle_nll(gen, oracle, args.num_eval, args)
print('\nInitial Oracle Sample Loss : %.4f' % oracle_loss)

for epoch in range(args.mle_epochs, args.mle_epochs + args.adv_epochs):
    print('\n--------\nEPOCH %d\n--------' % (epoch + 1))
    # TRAIN GENERATOR
    print('\nAdversarial Training Generator : ', end='')
    sys.stdout.flush()
    train_generator_PG(gen, gen_optimizer, dis, oracle, args)
    # sample from generator and compute oracle NLL
    oracle_loss = helpers.batchwise_oracle_nll(gen, oracle, args.num_eval,
                                               args)
    print(' oracle_sample_NLL = %.4f' % oracle_loss)
    logger.scalar_summary("oracle_loss", oracle_loss, epoch + 1)

    # TRAIN DISCRIMINATOR