Пример #1
0
def pretrain(sess, generator, target_lstm, train_discriminator):
    # samples = generate_samples(sess, target_lstm, BATCH_SIZE, generated_num)
    gen_data_loader = Gen_Data_loader(BATCH_SIZE)
    gen_data_loader.create_batches(positive_samples)
    results = OrderedDict({'exp_name': PREFIX})

    #  pre-train generator
    print('Start pre-training...')
    start = time.time()
    for epoch in tqdm(range(PRE_EPOCH_NUM)):
        print(' gen pre-train')
        loss = pre_train_epoch(sess, generator, gen_data_loader)
        if epoch == 10 or epoch % 40 == 0:
            samples = generate_samples(sess, generator, BATCH_SIZE, SAMPLE_NUM)
            likelihood_data_loader.create_batches(samples)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            print('\t test_loss {}, train_loss {}'.format(test_loss, loss))
            mm.compute_results(samples, train_samples, ord_dict, results)

    samples = generate_samples(sess, generator, BATCH_SIZE, SAMPLE_NUM)
    likelihood_data_loader.create_batches(samples)
    test_loss = target_loss(sess, target_lstm, likelihood_data_loader)

    samples = generate_samples(sess, generator, BATCH_SIZE, SAMPLE_NUM)
    likelihood_data_loader.create_batches(samples)

    print('Start training discriminator...')
    for i in tqdm(range(dis_alter_epoch)):
        print(' discriminator pre-train')
        d_loss, acc, ypred_for_auc = train_discriminator()
    end = time.time()
    print('Total time was {:.4f}s'.format(end - start))
    return
Пример #2
0
def pretrain(sess, generator, target_lstm, train_discriminator):
    # samples = generate_samples(sess, target_lstm, BATCH_SIZE, generated_num)
    gen_data_loader = Gen_Data_loader(BATCH_SIZE)
    gen_data_loader.create_batches(positive_samples)
    results = OrderedDict({'exp_name': PREFIX})

    #  pre-train generator
    print('Start pre-training...')
    for epoch in range(PRE_EPOCH_NUM):
        print('pre-train epoch:', epoch)
        loss = pre_train_epoch(sess, generator, gen_data_loader)
        if epoch == 10 or epoch % 40 == 0:
            samples = generate_samples(sess, generator, BATCH_SIZE, SAMPLE_NUM)
            likelihood_data_loader.create_batches(samples)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            print('\t test_loss {}, train_loss {}'.format(test_loss, loss))
            mm.compute_results(samples, train_samples, ord_dict, results)

    samples = generate_samples(sess, generator, BATCH_SIZE, SAMPLE_NUM)
    likelihood_data_loader.create_batches(samples)
    test_loss = target_loss(sess, target_lstm, likelihood_data_loader)

    samples = generate_samples(sess, generator, BATCH_SIZE, SAMPLE_NUM)
    likelihood_data_loader.create_batches(samples)

    print('Start training discriminator...')
    for i in range(dis_alter_epoch):
        print('epoch {}'.format(i))
        d_loss, acc = train_discriminator()

    return
Пример #3
0
def main():
    random.seed(SEED)
    np.random.seed(SEED)
    stringGenerator = TextGenerator('../corpus/index2word.pickle',
                                    '../corpus/word2index.pickle',
                                    '../corpus/all.code')

    assert START_TOKEN == 0

    gen_data_loader = Gen_Data_loader(BATCH_SIZE)
    likelihood_data_loader = Likelihood_data_loader(BATCH_SIZE)
    #vocab_size = 5000
    vocab_size = len(stringGenerator.index2Word)

    generator = get_trainable_model(vocab_size)
    target_params = cPickle.load(open('save/target_params.pkl'))
    target_params[00] = np.random.rand(vocab_size, 32).astype(np.float32)
    target_params[-2] = np.random.rand(32, vocab_size).astype(np.float32)
    target_params[-1] = np.random.rand(vocab_size).astype(np.float32)
    target_lstm = TARGET_LSTM(vocab_size, 64, 32, 32, 20, 0, target_params)

    config = tf.ConfigProto()
    # config.gpu_options.per_process_gpu_memory_fraction = 0.5
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.initialize_all_variables())

    #generate_samples(sess, target_lstm, 64, 10000, positive_file)
    stringGenerator.saveSamplesToFile(20, 10000, positive_file)
    gen_data_loader.create_batches(positive_file)

    log = open('log/experiment-log.txt', 'w')
    #  pre-train generator
    print 'Start pre-training...'
    log.write('pre-training...\n')
    for epoch in xrange(PRE_EPOCH_NUM):
        print 'pre-train epoch:', epoch
        loss = pre_train_epoch(sess, generator, gen_data_loader)
        if epoch % 5 == 0:
            #generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
            stringGenerator.saveSamplesToFile(20, generated_num, eval_file)
            likelihood_data_loader.create_batches(eval_file)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            print 'pre-train epoch ', epoch, 'test_loss ', test_loss
            buffer = str(epoch) + ' ' + str(test_loss) + '\n'
            log.write(buffer)

    #generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
    stringGenerator.saveSamplesToFile(20, generated_num, eval_file)
    likelihood_data_loader.create_batches(eval_file)
    test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
    buffer = 'After supervised-training:' + ' ' + str(test_loss) + '\n'
    log.write(buffer)

    log.close()
Пример #4
0
def main():
    random.seed(SEED)
    np.random.seed(SEED)

    assert START_TOKEN == 0

    # load data (likelihood?)
    gen_data_loader = Gen_Data_loader(BATCH_SIZE)
    likelihood_data_loader = Likelihood_data_loader(BATCH_SIZE)
    vocab_size = 68


    # load generator with parameters
    generator = get_trainable_model(vocab_size)
    # target_params = cPickle.load(open('save/target_params.pkl'))
    target_params = initialize_parameters(68)

    target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, target_params)

    config = tf.ConfigProto()
    # config.gpu_options.per_process_gpu_memory_fraction = 0.5
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())

    # generating synthetic data which constitute to  original data
    # generate_samples(sess, target_lstm, 64, 100, positive_file)


    gen_data_loader.create_batches(positive_file)

    log = open('log/experiment-log.txt', 'w')
    #  pre-train generator
    print 'Start pre-training...'
    log.write('pre-training...\n')
    for epoch in xrange(PRE_EPOCH_NUM):
        print 'pre-train epoch:', epoch
        loss = pre_train_epoch(sess, generator, gen_data_loader)
        if epoch % 5 == 0:
            generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
            likelihood_data_loader.create_batches(eval_file)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            print 'pre-train epoch ', epoch, 'test_loss ', test_loss
            buffer = str(epoch) + ' ' + str(test_loss) + '\n'
            log.write(buffer)

    generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
    likelihood_data_loader.create_batches(eval_file)
    test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
    print 'final pre-train epoch ', 'test_loss ', test_loss
    buffer = 'After supervised-training:' + ' ' + str(test_loss) + '\n'
    log.write(buffer)

    log.close()
Пример #5
0
def main():
    random.seed(SEED)
    np.random.seed(SEED)

    # assert START_TOKEN == 0

    gen_data_loader = Gen_Data_loader(BATCH_SIZE)
    likelihood_data_loader = Likelihood_data_loader(BATCH_SIZE)
    vocab_size = 5000

    best_score = 9.5

    generator = get_trainable_model(vocab_size)
    # target_lstm = TARGET_LSTM(vocab_size, 64, 32, 32, 20, 0)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())

    # generate_samples(sess, target_lstm, 64, 10000, positive_file)
    positive_data = np.load(positive_file).tolist()
    gen_data_loader.create_batches(positive_data)

    log = open('log/ss_experiment-log.txt', 'w')
    #  pre-train generator
    print 'Start scheduled sampling training...'
    log.write('scheduled sampling training...\n')
    curriculum_rate = .9
    for epoch in xrange(EPOCH_NUM):
        curriculum_rate = max(0.0, curriculum_rate - 0.002)
        loss = pre_train_epoch(sess, generator, gen_data_loader, curriculum_rate)
        # generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
        # likelihood_data_loader.create_batches(eval_file)
        # test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
        # print 'pre-train epoch ', epoch, 'curriculum rate:', curriculum_rate, 'test_loss ', test_loss
        # buffer = str(epoch) + ' ' + str(curriculum_rate) + ' ' + str(test_loss) + '\n'
        buffer = str(epoch) + ' ' + str(curriculum_rate) + ' ' + str(loss) + '\n'
        print(buffer)
        log.write(buffer)

        # if test_loss < best_score:
        #     best_score = test_loss
        #     print 'best score: ', test_loss
        #     generate_samples(sess, generator, BATCH_SIZE, 100000, eval_file)
        #     likelihood_data_loader.create_batches(eval_file)
        #     significance_test(sess, target_lstm, likelihood_data_loader, 'significance/schedule_sampling.txt')

    log.close()

    generate_samples(sess, generator, BATCH_SIZE, 100, final_trans_file)
Пример #6
0
def main():
    random.seed(SEED)
    np.random.seed(SEED)

    assert START_TOKEN == 0

    gen_data_loader = Gen_Data_loader(BATCH_SIZE)
    likelihood_data_loader = Likelihood_data_loader(BATCH_SIZE)
    vocab_size = 5000

    generator = get_trainable_model(vocab_size)
    target_params = cPickle.load(open('save/target_params.pkl'))
    target_lstm = TARGET_LSTM(vocab_size, 64, 32, 32, 20, 0, target_params)

    config = tf.ConfigProto()
    # config.gpu_options.per_process_gpu_memory_fraction = 0.5
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer)

    generate_samples(sess, target_lstm, 64, 10000, positive_file)
    gen_data_loader.create_batches(positive_file)

    log = open('log/experiment-log.txt', 'w')
    #  pre-train generator
    print 'Start pre-training...'
    log.write('pre-training...\n')
    for epoch in xrange(PRE_EPOCH_NUM):
        print 'pre-train epoch:', epoch
        loss = pre_train_epoch(sess, generator, gen_data_loader)
        if epoch % 5 == 0:
            generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
            likelihood_data_loader.create_batches(eval_file)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            print 'pre-train epoch ', epoch, 'test_loss ', test_loss
            buffer = str(epoch) + ' ' + str(test_loss) + '\n'
            log.write(buffer)

    generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
    likelihood_data_loader.create_batches(eval_file)
    test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
    buffer = 'After supervised-training:' + ' ' + str(test_loss) + '\n'
    log.write(buffer)

    log.close()
Пример #7
0

def pre_train_epoch(sess, trainable_model, data_loader):
    supervised_g_losses = []
    data_loader.reset_pointer()

    for it in range(data_loader.num_batch):
        batch = data_loader.next_batch()
        _, g_loss, g_pred = trainable_model.pretrain_step(sess, batch)
        supervised_g_losses.append(g_loss)

    return np.mean(supervised_g_losses)


# This is a hack. I don't even use LIkelihood data loader tbh
likelihood_data_loader = Gen_Data_loader(BATCH_SIZE)


def pretrain(sess, generator, target_lstm, train_discriminator):
    # samples = generate_samples(sess, target_lstm, BATCH_SIZE, generated_num)
    gen_data_loader = Gen_Data_loader(BATCH_SIZE)
    gen_data_loader.create_batches(positive_samples)
    results = OrderedDict({'exp_name': PREFIX})

    #  pre-train generator
    print('Start pre-training...')
    start = time.time()
    for epoch in tqdm(range(PRE_EPOCH_NUM)):
        print(' gen pre-train')
        loss = pre_train_epoch(sess, generator, gen_data_loader)
        if epoch == 10 or epoch % 40 == 0:
Пример #8
0
def main():
    random.seed(SEED)
    np.random.seed(SEED)

    assert START_TOKEN == 0

    gen_data_loader = Gen_Data_loader(BATCH_SIZE)
    likelihood_data_loader = Likelihood_data_loader(BATCH_SIZE)
    vocab_size = 5000

    best_score = 9.1
    generator = get_trainable_model(vocab_size)
    target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer)

    generate_samples(sess, target_lstm, 64, 10000, positive_file)
    ################################################################
    gen_data_loader.create_batches(positive_file)
    references = load_references(positive_file)

    log = open('log/experiment-log.txt', 'w')
    #  pre-train generator
    print 'Start pre-training...'
    log.write('pre-training...\n')
    for epoch in xrange(PRE_EPOCH_NUM):
        print 'pre-train epoch:', epoch
        loss = pre_train_epoch(sess, generator, gen_data_loader)
        if epoch % 5 == 0:
            generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
            likelihood_data_loader.create_batches(eval_file)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            print 'pre-train epoch ', epoch, 'test_loss ', test_loss
            buffer = str(epoch) + ' ' + str(test_loss) + '\n'
            log.write(buffer)

    generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
    likelihood_data_loader.create_batches(eval_file)
    test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
    buffer = 'After pre-training:' + ' ' + str(test_loss) + '\n'
    log.write(buffer)

    generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
    likelihood_data_loader.create_batches(eval_file)
    significance_test(sess, target_lstm, likelihood_data_loader, 'significance/supervise.txt')

    rollout = ROLLOUT(generator, references)

    print '#########################################################################'
    print 'Start Reinforcement Training Generator...'
    log.write('Reinforcement Training...\n')

    for total_batch in range(TOTAL_BATCH):
        for it in range(TRAIN_ITER):
            samples = generator.generate(sess)
            print 'start calculating BLEU...'
            rewards = rollout.get_reward(sess, samples, 1, (1.0 / 3, 1.0 / 3, 1.0 / 3))
            feed = {generator.x: samples, generator.rewards: rewards}
            _, g_loss = sess.run([generator.g_updates, generator.g_loss], feed_dict=feed)

        if total_batch % 1 == 0 or total_batch == TOTAL_BATCH - 1:
            generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
            likelihood_data_loader.create_batches(eval_file)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            buffer = str(total_batch) + ' ' + str(test_loss) + '\n'
            print 'total_batch: ', total_batch, 'test_loss: ', test_loss
            log.write(buffer)

            if test_loss < best_score:
                best_score = test_loss
                print 'best score: ', test_loss
                significance_test(sess, target_lstm, likelihood_data_loader, 'significance/pg_bleu.txt')

        rollout.update_params()

    log.close()
Пример #9
0
def main():
    random.seed(SEED)
    np.random.seed(SEED)

    assert START_TOKEN == 0

    gen_data_loader = Gen_Data_loader(BATCH_SIZE)
    likelihood_data_loader = Likelihood_data_loader(BATCH_SIZE)
    vocab_size = 68
    dis_data_loader = Dis_dataloader()

    best_score = 1000
    # load generator with parameters
    generator = get_trainable_model(vocab_size)
    target_params = initialize_parameters(vocab_size)

    target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM,
                              SEQ_LENGTH, START_TOKEN, target_params)

    # CNNs
    with tf.variable_scope('discriminator'):
        cnn = TextCNN(sequence_length=SEQ_LENGTH,
                      num_classes=2,
                      vocab_size=vocab_size,
                      embedding_size=dis_embedding_dim,
                      filter_sizes=dis_filter_sizes,
                      num_filters=dis_num_filters,
                      l2_reg_lambda=dis_l2_reg_lambda)

    cnn_params = [
        param for param in tf.trainable_variables()
        if 'discriminator' in param.name
    ]
    # Define Discriminator Training procedure
    dis_global_step = tf.Variable(0, name="global_step", trainable=False)
    dis_optimizer = tf.train.AdamOptimizer(1e-4)
    dis_grads_and_vars = dis_optimizer.compute_gradients(cnn.loss,
                                                         cnn_params,
                                                         aggregation_method=2)
    dis_train_op = dis_optimizer.apply_gradients(dis_grads_and_vars,
                                                 global_step=dis_global_step)

    config = tf.ConfigProto()
    # config.gpu_options.per_process_gpu_memory_fraction = 0.5
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())

    # generate_samples(sess, target_lstm, 64, 10000, positive_file)
    gen_data_loader.create_batches(positive_file)

    log = open(logpath, 'w')
    #  pre-train generator
    print 'Start pre-training...'
    log.write('pre-training...\n')
    for epoch in xrange(PRE_EPOCH_NUM):
        print 'pre-train epoch:', epoch
        loss = pre_train_epoch(sess, generator, gen_data_loader)
        if epoch % 5 == 0:
            file_name = 'target_generate/pretrain_epoch' + str(epoch) + '.pkl'
            generate_samples(sess, generator, BATCH_SIZE, generated_num,
                             file_name)
            likelihood_data_loader.create_batches(file_name)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            print 'pre-train epoch ', epoch, 'test_loss ', test_loss
            buffer = str(epoch) + ' ' + str(test_loss) + '\n'
            log.write(buffer)

            if epoch % 100 != 0:
                os.remove(file_name)

    file_name = 'target_generate/pretrain_finished.pkl'
    generate_samples(sess, generator, BATCH_SIZE, generated_num, file_name)
    likelihood_data_loader.create_batches(file_name)
    test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
    buffer = 'After pre-training:' + ' ' + str(test_loss) + '\n'
    log.write(buffer)

    file_name = 'target_generate/supervise.pkl'
    generate_samples(sess, generator, BATCH_SIZE, generated_num, file_name)
    likelihood_data_loader.create_batches(file_name)
    significance_test(sess, target_lstm, likelihood_data_loader,
                      'significance/supervise.txt')

    os.remove(file_name)

    print 'Start training discriminator...'
    for i in range(dis_alter_epoch):
        print 'dis_alter_epoch : ' + str(i)
        generate_samples(sess, generator, BATCH_SIZE, generated_num,
                         negative_file)

        #  train discriminator
        dis_x_train, dis_y_train = dis_data_loader.load_train_data(
            positive_file, negative_file)
        dis_batches = dis_data_loader.batch_iter(zip(dis_x_train, dis_y_train),
                                                 dis_batch_size,
                                                 dis_num_epochs)

        for batch in dis_batches:
            try:
                x_batch, y_batch = zip(*batch)
                feed = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: dis_dropout_keep_prob
                }
                _, step = sess.run([dis_train_op, dis_global_step], feed)
            except ValueError:
                pass

    rollout = ROLLOUT(generator, 0.8)

    print '#########################################################################'
    print 'Start Reinforcement Training Generator...'
    log.write('Reinforcement Training...\n')

    for total_batch in range(TOTAL_BATCH):
        for it in range(TRAIN_ITER):
            samples = generator.generate(sess)
            rewards = rollout.get_reward(sess, samples, 16, cnn)
            feed = {generator.x: samples, generator.rewards: rewards}
            _, g_loss = sess.run([generator.g_updates, generator.g_loss],
                                 feed_dict=feed)

        if total_batch % 1 == 0 or total_batch == TOTAL_BATCH - 1:

            file_name = 'target_generate/reinforce_batch' + str(
                total_batch) + '.pkl'

            generate_samples(sess, generator, BATCH_SIZE, generated_num,
                             file_name)
            likelihood_data_loader.create_batches(file_name)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            buffer = str(total_batch) + ' ' + str(test_loss) + '\n'
            print 'total_batch: ', total_batch, 'test_loss: ', test_loss
            log.write(buffer)

            if total_batch % 50 != 0:
                os.remove(file_name)

            if test_loss < best_score:
                best_score = test_loss
                print 'best score: ', test_loss
                significance_test(sess, target_lstm, likelihood_data_loader,
                                  'significance/seqgan.txt')

        rollout.update_params()

        # generate for discriminator
        print 'Start training discriminator'
        for _ in range(5):
            # for _ in range(2):

            generate_samples(sess, generator, BATCH_SIZE, generated_num,
                             negative_file)

            dis_x_train, dis_y_train = dis_data_loader.load_train_data(
                positive_file, negative_file)
            dis_batches = dis_data_loader.batch_iter(
                zip(dis_x_train, dis_y_train), dis_batch_size, 3)

            for batch in dis_batches:
                try:
                    x_batch, y_batch = zip(*batch)
                    feed = {
                        cnn.input_x: x_batch,
                        cnn.input_y: y_batch,
                        cnn.dropout_keep_prob: dis_dropout_keep_prob
                    }
                    _, step = sess.run([dis_train_op, dis_global_step], feed)
                except ValueError:
                    pass

    log.close()
Пример #10
0
def main():
    random.seed(SEED)
    np.random.seed(SEED)

    gen_data_loader = Gen_Data_loader(BATCH_SIZE)
    likelihood_data_loader = Likelihood_data_loader(BATCH_SIZE)
    vocab_size = 5000
    dis_data_loader = Dis_dataloader()

    best_score = 1000
    generator = get_trainable_model(vocab_size)

    # oracle model : target lstm
    # target_params = cPickle.load(open('save/target_params.pkl'))
    # target_lstm = TARGET_LSTM(vocab_size, 64, 32, 32, SEQ_LENGTH, 0, target_params)

    with tf.variable_scope('discriminator'):
        cnn = TextCNN(
            sequence_length=SEQ_LENGTH,
            num_classes=2,
            vocab_size=vocab_size,
            embedding_size=dis_embedding_dim,
            filter_sizes=dis_filter_sizes,
            num_filters=dis_num_filters,
            l2_reg_lambda=dis_l2_reg_lambda)

    cnn_params = [param for param in tf.trainable_variables() if 'discriminator' in param.name]
    # Define Discriminator Training procedure
    dis_global_step = tf.Variable(0, name="global_step", trainable=False)
    dis_optimizer = tf.train.AdamOptimizer(1e-4)
    dis_grads_and_vars = dis_optimizer.compute_gradients(cnn.loss, cnn_params, aggregation_method=2)
    dis_train_op = dis_optimizer.apply_gradients(dis_grads_and_vars, global_step=dis_global_step)

    config = tf.ConfigProto()
    # config.gpu_options.per_process_gpu_memory_fraction = 0.5
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())

    # czq
    # generate real data
    # generate_samples(sess, target_lstm, 64, 10000, positive_file)

    # store real data for next step
    positive_data = np.load(positive_file).tolist()
    gen_data_loader.create_batches(positive_data)

    log = open('log/seq_mle_experiment-log.txt', 'w')
    #  pre-train generator
    print '#########################################################################'
    print 'Start pre-training generator...'
    log.write('pre-training...\n')

    for epoch in xrange(PRE_EPOCH_NUM):
        # print 'pre-train epoch:', epoch
        loss = pre_train_epoch(sess, generator, gen_data_loader)
        if epoch % 5 == 0:
            # generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
            # likelihood_data_loader.create_batches(eval_file)
            # test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            # print 'pre-train epoch ', epoch, 'test_loss ', test_loss
            # buffer = str(epoch) + ' ' + str(test_loss) + '\n'
            buffer = 'pre-trained generator:' + str(epoch) + ' ' + str(loss)
            print(buffer)
            log.write(buffer + '\n')

    # generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
    # likelihood_data_loader.create_batches(eval_file)
    # test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
    # buffer = 'After pre-training:' + ' ' + str(test_loss) + '\n'
    buffer = 'After pre-training:' + ' ' + str(loss)
    print(buffer)
    log.write(buffer + '\n')

    # generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
    # likelihood_data_loader.create_batches(eval_file)
    # significance_test(sess, target_lstm, likelihood_data_loader, 'significance/supervise.txt')

    # test purpose only
    generate_samples(sess, generator, BATCH_SIZE, 100, final_trans_file_mle)

    # exit(0)

    print 'Start pre-training discriminator...'
    for _ in range(dis_alter_epoch):
        generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file)

        #  train discriminator
        dis_x_train, dis_y_train = dis_data_loader.load_train_data(positive_file, negative_file)
        dis_batches = dis_data_loader.batch_iter(
            zip(dis_x_train, dis_y_train), dis_batch_size, dis_num_epochs
        )

        for batch in dis_batches:
            try:
                x_batch, y_batch = zip(*batch)
                feed = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: dis_dropout_keep_prob
                }
                _, step = sess.run([dis_train_op, dis_global_step], feed)
            except Exception as e:
                # print str(e)
                raise

        loss = sess.run(cnn.loss, feed)
        buffer = 'pre-train discriminator' + ' ' + str(loss)
        print buffer
        log.write(buffer + '\n')

    rollout = ROLLOUT(generator, 0.8)
    print('Before GAN')
    print '#########################################################################'
    print 'Start Reinforcement Training Generator...'
    log.write('Reinforcement Training...\n')

    # for tensorboard
    # writer = tf.summary.FileWriter('./tb_logs', graph=tf.get_default_graph())

    for total_batch in range(TOTAL_BATCH):
        print 'progress', total_batch, '/', TOTAL_BATCH
        for it in range(TRAIN_ITER):
            samples = generator.generate(sess)
            rewards = rollout.get_reward(sess, samples, 16, cnn)
            feed = {generator.x: samples, generator.rewards: rewards}
            _, g_loss, pre_loss = sess.run([generator.g_updates, generator.g_loss, generator.pretrain_loss],
                                           feed_dict=feed)
            buffer = 'G-step:' + str(TRAIN_ITER) + ':' + str(g_loss) + '|' + str(pre_loss)
            log.write(buffer + '\n')
            print(buffer)
            # if total_batch % 1 == 0 or total_batch == TOTAL_BATCH - 1:
            #     generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
            #     likelihood_data_loader.create_batches(eval_file)
            #     # test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            #     # buffer = str(total_batch) + ' ' + str(test_loss) + '\n'
            #     # print 'total_batch: ', total_batch, 'test_loss: ', test_loss
            #     log.write(buffer)

            # if test_loss < best_score:
            #     best_score = test_loss
            #     print 'best score: ', test_loss
            #     significance_test(sess, target_lstm, likelihood_data_loader, 'significance/seqgan.txt')

        rollout.update_params()

        # generate for discriminator
        print('Start training discriminator')
        log.write('training discriminator...\n')

        for _ in range(5):
            generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file)

            dis_x_train, dis_y_train = dis_data_loader.load_train_data(positive_file, negative_file)
            dis_batches = dis_data_loader.batch_iter(zip(dis_x_train, dis_y_train), dis_batch_size, 3)

            for batch in dis_batches:
                try:
                    x_batch, y_batch = zip(*batch)
                    feed = {
                        cnn.input_x: x_batch,
                        cnn.input_y: y_batch,
                        cnn.dropout_keep_prob: dis_dropout_keep_prob
                    }
                    _, step = sess.run([dis_train_op, dis_global_step], feed)
                except ValueError:
                    pass

            loss = sess.run(cnn.loss, feed)
            buffer = 'discriminator' + ' ' + str(loss)
            print buffer
            log.write(buffer + '\n')

    log.close()

    # save the model
    # saver = tf.train.Saver({"gen": generator})
    # saver.save(sess, 'my-model')

    # generate samples
    generate_samples(sess, generator, BATCH_SIZE, 100, final_trans_file_seqgan)
Пример #11
0
def main():
    random.seed(SEED)
    np.random.seed(SEED)

    assert START_TOKEN == 0

    gen_data_loader = Gen_Data_loader(BATCH_SIZE)
    likelihood_data_loader = Likelihood_data_loader(BATCH_SIZE)
    vocab_size = 5000

    best_score = 9.1
    generator = get_trainable_model(vocab_size)
    # target_lstm = TARGET_LSTM(vocab_size, BATCH_SIZE, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())

    # generate_samples(sess, target_lstm, 64, 10000, positive_file)
    ################################################################
    positive_data = np.load(positive_file).tolist()
    gen_data_loader.create_batches(positive_data)
    references = load_references(positive_data)

    log = open('log/pg_experiment-log.txt', 'w')
    #  pre-train generator
    print 'Start pre-training...'
    log.write('pre-training...\n')
    for epoch in xrange(PRE_EPOCH_NUM):
        print 'pre-train epoch:', epoch
        loss = pre_train_epoch(sess, generator, gen_data_loader)
        if epoch % 5 == 0:
            #     generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
            #     likelihood_data_loader.create_batches(eval_file)
            # test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            # print 'pre-train epoch ', epoch, 'test_loss ', test_loss
            # buffer = str(epoch) + ' ' + str(test_loss) + '\n'
            # log.write(buffer)
            print 'pre-train epoch ', epoch, 'loss ', loss
            buffer = str(epoch) + ' ' + str(loss) + '\n'
            log.write(buffer)

    # generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
    # likelihood_data_loader.create_batches(eval_file)
    # test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
    # buffer = 'After pre-training:' + ' ' + str(test_loss) + '\n'
    print 'After pre-train epoch ', loss
    buffer = str(loss) + '\n'
    log.write(buffer)

    # generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
    # likelihood_data_loader.create_batches(eval_file)
    # significance_test(sess, target_lstm, likelihood_data_loader, 'significance/supervise.txt')

    rollout = ROLLOUT(generator, references)

    print '#########################################################################'
    print 'Start Reinforcement Training Generator...'
    log.write('Reinforcement Training...\n')

    for total_batch in range(TOTAL_BATCH):
        for it in range(TRAIN_ITER):
            samples = generator.generate(sess)
            print 'start calculating BLEU...'
            rewards = rollout.get_reward(sess, samples, 1, (1.0 / 3, 1.0 / 3, 1.0 / 3))
            feed = {generator.x: samples, generator.rewards: rewards}
            _, g_loss = sess.run([generator.g_updates, generator.g_loss], feed_dict=feed)

            # if total_batch % 1 == 0 or total_batch == TOTAL_BATCH - 1:
            #     generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
            #     likelihood_data_loader.create_batches(eval_file)
            # test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            # buffer = str(total_batch) + ' ' + str(test_loss) + '\n'
            # print 'total_batch: ', total_batch, 'test_loss: ', test_loss
            # log.write(buffer)

            # if test_loss < best_score:
            #     best_score = test_loss
            #     print 'best score: ', test_loss
            #     significance_test(sess, target_lstm, likelihood_data_loader, 'significance/pg_bleu.txt')
            print('Current loss:' + str(total_batch) + ':' + str(g_loss))
        rollout.update_params()

    log.close()

    generate_samples(sess, generator, BATCH_SIZE, 100, final_trans_file)
Пример #12
0
def main():
    random.seed(SEED)
    np.random.seed(SEED)

    stringGenerator = TextGenerator('../corpus/index2word.pickle',
                                    '../corpus/word2index.pickle',
                                    '../corpus/all.code')

    assert START_TOKEN == 0

    gen_data_loader = Gen_Data_loader(BATCH_SIZE)
    likelihood_data_loader = Likelihood_data_loader(BATCH_SIZE)
    vocab_size = len(stringGenerator.index2Word)
    dis_data_loader = Dis_dataloader()

    best_score = 1000
    generator = get_trainable_model(vocab_size)
    target_params = cPickle.load(open('save/target_params.pkl'))
    target_params[00] = np.random.rand(vocab_size, 32).astype(np.float32)
    target_params[-2] = np.random.rand(32, vocab_size).astype(np.float32)
    target_params[-1] = np.random.rand(vocab_size).astype(np.float32)
    target_lstm = TARGET_LSTM(vocab_size, 64, 32, 32, 20, 0, target_params)

    with tf.variable_scope('discriminator'):
        cnn = TextCNN(sequence_length=20,
                      num_classes=2,
                      vocab_size=vocab_size,
                      embedding_size=dis_embedding_dim,
                      filter_sizes=dis_filter_sizes,
                      num_filters=dis_num_filters,
                      l2_reg_lambda=dis_l2_reg_lambda)

    cnn_params = [
        param for param in tf.trainable_variables()
        if 'discriminator' in param.name
    ]
    # Define Discriminator Training procedure
    dis_global_step = tf.Variable(0, name="global_step", trainable=False)
    dis_optimizer = tf.train.AdamOptimizer(1e-4)
    dis_grads_and_vars = dis_optimizer.compute_gradients(cnn.loss,
                                                         cnn_params,
                                                         aggregation_method=2)
    dis_train_op = dis_optimizer.apply_gradients(dis_grads_and_vars,
                                                 global_step=dis_global_step)

    config = tf.ConfigProto()
    # config.gpu_options.per_process_gpu_memory_fraction = 0.5
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.initialize_all_variables())

    #generate_samples(sess, target_lstm, 64, 10000, positive_file)
    stringGenerator.saveSamplesToFile(20, 10000, positive_file)
    gen_data_loader.create_batches(positive_file)

    log = open('log/experiment-log.txt', 'w')
    #  pre-train generator
    print 'Start pre-training...'
    log.write('pre-training...\n')
    for epoch in xrange(PRE_EPOCH_NUM):
        print 'pre-train epoch:', epoch
        loss = pre_train_epoch(sess, generator, gen_data_loader)
        if epoch % 5 == 0:
            generate_samples(sess, generator, BATCH_SIZE, generated_num,
                             eval_file)
            likelihood_data_loader.create_batches(eval_file)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            print 'pre-train epoch ', epoch, 'test_loss ', test_loss
            buffer = str(epoch) + ' ' + str(test_loss) + '\n'
            log.write(buffer)

    generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
    likelihood_data_loader.create_batches(eval_file)
    test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
    buffer = 'After pre-training:' + ' ' + str(test_loss) + '\n'
    log.write(buffer)

    generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
    likelihood_data_loader.create_batches(eval_file)
    significance_test(sess, target_lstm, likelihood_data_loader,
                      'significance/supervise.txt')

    print 'Start training discriminator...'
    for _ in range(dis_alter_epoch):
        generate_samples(sess, generator, BATCH_SIZE, generated_num,
                         negative_file)

        #  train discriminator
        dis_x_train, dis_y_train = dis_data_loader.load_train_data(
            positive_file, negative_file)
        dis_batches = dis_data_loader.batch_iter(zip(dis_x_train, dis_y_train),
                                                 dis_batch_size,
                                                 dis_num_epochs)

        for batch in dis_batches:
            try:
                x_batch, y_batch = zip(*batch)
                feed = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: dis_dropout_keep_prob
                }
                _, step = sess.run([dis_train_op, dis_global_step], feed)
            except ValueError:
                pass

    rollout = ROLLOUT(generator, 0.8)

    print '#########################################################################'
    print 'Start Reinforcement Training Generator...'
    log.write('Reinforcement Training...\n')

    for total_batch in range(TOTAL_BATCH):
        for it in range(TRAIN_ITER):
            samples = generator.generate(sess)
            rewards = rollout.get_reward(sess, samples, 16, cnn)
            feed = {generator.x: samples, generator.rewards: rewards}
            _, g_loss = sess.run([generator.g_updates, generator.g_loss],
                                 feed_dict=feed)

        if total_batch % 1 == 0 or total_batch == TOTAL_BATCH - 1:
            generate_samples(sess, generator, BATCH_SIZE, generated_num,
                             eval_file)
            likelihood_data_loader.create_batches(eval_file)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            buffer = str(total_batch) + ' ' + str(test_loss) + '\n'
            print 'total_batch: ', total_batch, 'test_loss: ', test_loss
            log.write(buffer)

            if test_loss < best_score:
                best_score = test_loss
                print 'best score: ', test_loss
                significance_test(sess, target_lstm, likelihood_data_loader,
                                  'significance/seqgan.txt')

        rollout.update_params()

        # generate for discriminator
        print 'Start training discriminator'
        for _ in range(5):
            generate_samples(sess, generator, BATCH_SIZE, generated_num,
                             negative_file)

            dis_x_train, dis_y_train = dis_data_loader.load_train_data(
                positive_file, negative_file)
            dis_batches = dis_data_loader.batch_iter(
                zip(dis_x_train, dis_y_train), dis_batch_size, 3)

            for batch in dis_batches:
                try:
                    x_batch, y_batch = zip(*batch)
                    feed = {
                        cnn.input_x: x_batch,
                        cnn.input_y: y_batch,
                        cnn.dropout_keep_prob: dis_dropout_keep_prob
                    }
                    _, step = sess.run([dis_train_op, dis_global_step], feed)
                except ValueError:
                    pass

    log.close()
Пример #13
0
def main():
    random.seed(SEED)
    np.random.seed(SEED)

    assert START_TOKEN == 0

    gen_data_loader = Gen_Data_loader(BATCH_SIZE)
    likelihood_data_loader = Likelihood_data_loader(BATCH_SIZE)
    vocab_size = 5000
    dis_data_loader = Dis_dataloader()

    best_score = 1000
    generator = get_trainable_model(vocab_size)
    target_params = cPickle.load(open('save/target_params.pkl'))
    target_lstm = TARGET_LSTM(vocab_size, 64, 32, 32, 20, 0, target_params)

    with tf.variable_scope('discriminator'):
        cnn = TextCNN(
            sequence_length=20,
            num_classes=2,
            vocab_size=vocab_size,
            embedding_size=dis_embedding_dim,
            filter_sizes=dis_filter_sizes,
            num_filters=dis_num_filters,
            l2_reg_lambda=dis_l2_reg_lambda)

    cnn_params = [param for param in tf.trainable_variables() if 'discriminator' in param.name]
    # Define Discriminator Training procedure
    dis_global_step = tf.Variable(0, name="global_step", trainable=False)
    dis_optimizer = tf.train.AdamOptimizer(1e-4)
    dis_grads_and_vars = dis_optimizer.compute_gradients(cnn.loss, cnn_params, aggregation_method=2)
    dis_train_op = dis_optimizer.apply_gradients(dis_grads_and_vars, global_step=dis_global_step)

    config = tf.ConfigProto()
    # config.gpu_options.per_process_gpu_memory_fraction = 0.5
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer)

    generate_samples(sess, target_lstm, 64, 10000, positive_file)
    gen_data_loader.create_batches(positive_file)

    log = open('log/experiment-log.txt', 'w')
    #  pre-train generator
    print 'Start pre-training...'
    log.write('pre-training...\n')
    for epoch in xrange(PRE_EPOCH_NUM):
        print 'pre-train epoch:', epoch
        loss = pre_train_epoch(sess, generator, gen_data_loader)
        if epoch % 5 == 0:
            generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
            likelihood_data_loader.create_batches(eval_file)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            print 'pre-train epoch ', epoch, 'test_loss ', test_loss
            buffer = str(epoch) + ' ' + str(test_loss) + '\n'
            log.write(buffer)

    generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
    likelihood_data_loader.create_batches(eval_file)
    test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
    buffer = 'After pre-training:' + ' ' + str(test_loss) + '\n'
    log.write(buffer)

    generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
    likelihood_data_loader.create_batches(eval_file)
    significance_test(sess, target_lstm, likelihood_data_loader, 'significance/supervise.txt')

    print 'Start training discriminator...'
    for _ in range(dis_alter_epoch):
        generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file)

        #  train discriminator
        dis_x_train, dis_y_train = dis_data_loader.load_train_data(positive_file, negative_file)
        dis_batches = dis_data_loader.batch_iter(
            zip(dis_x_train, dis_y_train), dis_batch_size, dis_num_epochs
        )

        for batch in dis_batches:
            try:
                x_batch, y_batch = zip(*batch)
                feed = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: dis_dropout_keep_prob
                }
                _, step = sess.run([dis_train_op, dis_global_step], feed)
            except ValueError:
                pass

    rollout = ROLLOUT(generator, 0.8)

    print '#########################################################################'
    print 'Start Reinforcement Training Generator...'
    log.write('Reinforcement Training...\n')

    for total_batch in range(TOTAL_BATCH):
        for it in range(TRAIN_ITER):
            samples = generator.generate(sess)
            rewards = rollout.get_reward(sess, samples, 16, cnn)
            feed = {generator.x: samples, generator.rewards: rewards}
            _, g_loss = sess.run([generator.g_updates, generator.g_loss], feed_dict=feed)

        if total_batch % 1 == 0 or total_batch == TOTAL_BATCH - 1:
            generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
            likelihood_data_loader.create_batches(eval_file)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            buffer = str(total_batch) + ' ' + str(test_loss) + '\n'
            print 'total_batch: ', total_batch, 'test_loss: ', test_loss
            log.write(buffer)

            if test_loss < best_score:
                best_score = test_loss
                print 'best score: ', test_loss
                significance_test(sess, target_lstm, likelihood_data_loader, 'significance/seqgan.txt')

        rollout.update_params()

        # generate for discriminator
        print 'Start training discriminator'
        for _ in range(5):
            generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file)

            dis_x_train, dis_y_train = dis_data_loader.load_train_data(positive_file, negative_file)
            dis_batches = dis_data_loader.batch_iter(zip(dis_x_train, dis_y_train), dis_batch_size, 3)

            for batch in dis_batches:
                try:
                    x_batch, y_batch = zip(*batch)
                    feed = {
                        cnn.input_x: x_batch,
                        cnn.input_y: y_batch,
                        cnn.dropout_keep_prob: dis_dropout_keep_prob
                    }
                    _, step = sess.run([dis_train_op, dis_global_step], feed)
                except ValueError:
                    pass

    log.close()
Пример #14
0
def main():
    random.seed(SEED)
    np.random.seed(SEED)

    assert START_TOKEN == 0

    gen_data_loader = Gen_Data_loader(BATCH_SIZE)
    likelihood_data_loader = Likelihood_data_loader(BATCH_SIZE)
    vocab_size = 5000
    dis_data_loader = Dis_dataloader()

    best_score = 1000

    # initialize a LSTM object and use the LSTM object to initialize PoemGen object
    generator = get_trainable_model(vocab_size)

    # cPickle is a object serialization library

    # the loaded picle object will be an array of numbers
    # later, these params will be used to initalize the target LSTM
    target_params = cPickle.load(open('save/target_params.pkl'))
    # print target_params

    time.sleep(1000)

    # This seems like the generator model which used RNN
    target_lstm = TARGET_LSTM(vocab_size, 64, 32, 32, 20, 0, target_params)

    # This is the discriminator which uses CNN
    with tf.variable_scope('discriminator'):
        cnn = TextCNN(sequence_length=20,
                      num_classes=2,
                      vocab_size=vocab_size,
                      embedding_size=dis_embedding_dim,
                      filter_sizes=dis_filter_sizes,
                      num_filters=dis_num_filters,
                      l2_reg_lambda=dis_l2_reg_lambda)

    cnn_params = [
        param for param in tf.trainable_variables()
        if 'discriminator' in param.name
    ]
    # Define Discriminator Training procedure
    dis_global_step = tf.Variable(0, name="global_step", trainable=False)
    dis_optimizer = tf.train.AdamOptimizer(1e-4)
    dis_grads_and_vars = dis_optimizer.compute_gradients(cnn.loss,
                                                         cnn_params,
                                                         aggregation_method=2)
    dis_train_op = dis_optimizer.apply_gradients(dis_grads_and_vars,
                                                 global_step=dis_global_step)

    config = tf.ConfigProto()
    # config.gpu_options.per_process_gpu_memory_fraction = 0.5
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.initialize_all_variables())

    generate_samples(sess, target_lstm, 64, 10000, positive_file)
    gen_data_loader.create_batches(positive_file)

    log = open('log/experiment-log.txt', 'w')
    #  pre-train generator
    # Initialize the generator with MLE estimators
    print 'Start pre-training...'
    log.write('pre-training...\n')
    for epoch in xrange(PRE_EPOCH_NUM):
        print 'pre-train epoch:', epoch
        loss = pre_train_epoch(sess, generator, gen_data_loader)
        if epoch % 5 == 0:
            generate_samples(sess, generator, BATCH_SIZE, generated_num,
                             eval_file)
            likelihood_data_loader.create_batches(eval_file)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            print 'pre-train epoch ', epoch, 'test_loss ', test_loss
            buffer = str(epoch) + ' ' + str(test_loss) + '\n'
            log.write(buffer)

    generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
    likelihood_data_loader.create_batches(eval_file)
    test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
    buffer = 'After pre-training:' + ' ' + str(test_loss) + '\n'
    log.write(buffer)

    generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
    likelihood_data_loader.create_batches(eval_file)
    significance_test(sess, target_lstm, likelihood_data_loader,
                      'significance/supervise.txt')

    print 'Start training discriminator...'
    for _ in range(dis_alter_epoch):
        generate_samples(sess, generator, BATCH_SIZE, generated_num,
                         negative_file)

        #  train discriminator
        dis_x_train, dis_y_train = dis_data_loader.load_train_data(
            positive_file, negative_file)
        dis_batches = dis_data_loader.batch_iter(zip(dis_x_train, dis_y_train),
                                                 dis_batch_size,
                                                 dis_num_epochs)

        for batch in dis_batches:
            try:
                x_batch, y_batch = zip(*batch)
                feed = {
                    cnn.input_x: x_batch,
                    cnn.input_y: y_batch,
                    cnn.dropout_keep_prob: dis_dropout_keep_prob
                }
                _, step = sess.run([dis_train_op, dis_global_step], feed)
            except ValueError:
                pass

    rollout = ROLLOUT(generator, 0.8)

    print '#########################################################################'
    print 'Start Reinforcement Training Generator...'
    log.write('Reinforcement Training...\n')

    for total_batch in range(TOTAL_BATCH):
        for it in range(TRAIN_ITER):
            samples = generator.generate(sess)
            rewards = rollout.get_reward(sess, samples, 16, cnn)
            feed = {generator.x: samples, generator.rewards: rewards}
            _, g_loss = sess.run([generator.g_updates, generator.g_loss],
                                 feed_dict=feed)

        if total_batch % 1 == 0 or total_batch == TOTAL_BATCH - 1:
            # The trainable model 'generator' is a RNN model from PoemGen

            generate_samples(sess, generator, BATCH_SIZE, generated_num,
                             eval_file)
            likelihood_data_loader.create_batches(eval_file)
            test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
            buffer = str(total_batch) + ' ' + str(test_loss) + '\n'
            print 'total_batch: ', total_batch, 'test_loss: ', test_loss
            log.write(buffer)

            if test_loss < best_score:
                best_score = test_loss
                print 'best score: ', test_loss
                significance_test(sess, target_lstm, likelihood_data_loader,
                                  'significance/seqgan.txt')

        # rollout policy???
        rollout.update_params()

        # generate for discriminator
        print 'Start training discriminator'
        for _ in range(5):
            generate_samples(sess, generator, BATCH_SIZE, generated_num,
                             negative_file)

            dis_x_train, dis_y_train = dis_data_loader.load_train_data(
                positive_file, negative_file)
            dis_batches = dis_data_loader.batch_iter(
                zip(dis_x_train, dis_y_train), dis_batch_size, 3)

            for batch in dis_batches:
                try:
                    x_batch, y_batch = zip(*batch)
                    feed = {
                        cnn.input_x: x_batch,
                        cnn.input_y: y_batch,
                        cnn.dropout_keep_prob: dis_dropout_keep_prob
                    }
                    _, step = sess.run([dis_train_op, dis_global_step], feed)
                except ValueError:
                    pass

    log.close()