Exemplo n.º 1
0
def main():
    args = parser.parse_args()
    pp.pprint(vars(args))
    config = vars(args)

    # train with different datasets
    if args.dataset == 'oracle':
        oracle_model = OracleLstm(num_vocabulary=args.vocab_size, batch_size=args.batch_size, emb_dim=args.gen_emb_dim,
                                  hidden_dim=args.hidden_dim, sequence_length=args.seq_len,
                                  start_token=args.start_token)
        oracle_loader = OracleDataLoader(args.batch_size, args.seq_len)
        gen_loader = OracleDataLoader(args.batch_size, args.seq_len)

        generator = models.get_generator(args.g_architecture, vocab_size=args.vocab_size, batch_size=args.batch_size,
                                         seq_len=args.seq_len, gen_emb_dim=args.gen_emb_dim, mem_slots=args.mem_slots,
                                         head_size=args.head_size, num_heads=args.num_heads, hidden_dim=args.hidden_dim,
                                         start_token=args.start_token)
        discriminator = models.get_discriminator(args.d_architecture, batch_size=args.batch_size, seq_len=args.seq_len,
                                                 vocab_size=args.vocab_size, dis_emb_dim=args.dis_emb_dim,
                                                 num_rep=args.num_rep, sn=args.sn)
        oracle_train(generator, discriminator, oracle_model, oracle_loader, gen_loader, config)

    elif args.dataset in ['image_coco', 'emnlp_news']:
        data_file = os.path.join(args.data_dir, '{}.txt'.format(args.dataset))
        seq_len, vocab_size = text_precess(data_file)
        config['seq_len'] = seq_len # override the sequence length
        config['vocab_size'] = vocab_size
        print('seq_len: %d, vocab_size: %d' % (seq_len, vocab_size))

        oracle_loader = RealDataLoader(args.batch_size, args.seq_len)

        generator = models.get_generator(args.g_architecture, vocab_size=vocab_size, batch_size=args.batch_size,
                                         seq_len=seq_len, gen_emb_dim=args.gen_emb_dim, mem_slots=args.mem_slots,
                                         head_size=args.head_size, num_heads=args.num_heads, hidden_dim=args.hidden_dim,
                                         start_token=args.start_token)
        discriminator = models.get_discriminator(args.d_architecture, batch_size=args.batch_size, seq_len=seq_len,
                                                 vocab_size=vocab_size, dis_emb_dim=args.dis_emb_dim,
                                                 num_rep=args.num_rep, sn=args.sn)
        f_classifier = models.get_classifier(args.f_architecture, scope="f_classifier", batch_size=args.batch_size, seq_len=seq_len,
                                                 vocab_size=vocab_size, dis_emb_dim=args.f_emb_dim,
                                                 num_rep=args.num_rep, sn=args.sn)
        real_train(generator, discriminator, f_classifier, oracle_loader, config)

    else:
        raise NotImplementedError('{}: unknown dataset!'.format(args.dataset))
Exemplo n.º 2
0
data_file = os.path.join('data', '{}.txt'.format(dataset))
oracle_file = os.path.join(test_samples_dir, 'oracle_{}.txt'.format(dataset))
test_file = os.path.join('data', 'testdata/test.txt')

if not os.path.exists(test_samples_dir):
    os.makedirs(test_samples_dir)

seq_len, vocab_size = text_precess(data_file)
print('seq_len: %d, vocab_size: %d' % (seq_len, vocab_size))

generator = models.get_generator(args.g_architecture, vocab_size=vocab_size, batch_size=args.batch_size,
                                 seq_len=seq_len, gen_emb_dim=args.gen_emb_dim, mem_slots=args.mem_slots,
                                 head_size=args.head_size, num_heads=args.num_heads, hidden_dim=args.hidden_dim,
                                 start_token=args.start_token)

oracle_loader = RealDataLoader(args.batch_size, seq_len)

# placeholder definitions
x_real = tf.placeholder(tf.int32, [args.batch_size, seq_len], name="x_real")  # tokens of oracle sequences

temperature = tf.Variable(1., trainable=False, name='temperature')

x_fake_onehot_appr, x_fake, g_pretrain_loss, gen_o = generator(x_real=x_real, temperature=temperature)


with tf.Session() as sess:
    # tf.global_variables_initializer().run()
    new_saver = tf.train.import_meta_graph(meta_file)
    new_saver.restore(sess, tf.train.latest_checkpoint(checkpoint_dir))

    index_word_dict = get_oracle_file(data_file, oracle_file, seq_len)
Exemplo n.º 3
0
def main():
    args = parser.parse_args()
    # pp.pprint(vars(args))
    config = vars(args)

    # train with different datasets
    if args.dataset == 'oracle':
        oracle_model = OracleLstm(num_vocabulary=args.vocab_size,
                                  batch_size=args.batch_size,
                                  emb_dim=args.gen_emb_dim,
                                  hidden_dim=args.hidden_dim,
                                  sequence_length=args.seq_len,
                                  start_token=args.start_token)
        oracle_loader = OracleDataLoader(args.batch_size, args.seq_len)
        gen_loader = OracleDataLoader(args.batch_size, args.seq_len)

        generator = models.get_generator(args.g_architecture,
                                         vocab_size=args.vocab_size,
                                         batch_size=args.batch_size,
                                         seq_len=args.seq_len,
                                         gen_emb_dim=args.gen_emb_dim,
                                         mem_slots=args.mem_slots,
                                         head_size=args.head_size,
                                         num_heads=args.num_heads,
                                         hidden_dim=args.hidden_dim,
                                         start_token=args.start_token)
        discriminator = models.get_discriminator(args.d_architecture,
                                                 batch_size=args.batch_size,
                                                 seq_len=args.seq_len,
                                                 vocab_size=args.vocab_size,
                                                 dis_emb_dim=args.dis_emb_dim,
                                                 num_rep=args.num_rep,
                                                 sn=args.sn)
        oracle_train(generator, discriminator, oracle_model, oracle_loader,
                     gen_loader, config)

    elif args.dataset in ['image_coco', 'emnlp_news', 'emnlp_news_small']:
        data_file = os.path.join(args.data_dir, '{}.txt'.format(args.dataset))
        seq_len, vocab_size, word_index_dict, index_word_dict = text_precess(
            data_file)
        config['seq_len'] = seq_len
        config['vocab_size'] = vocab_size
        # print('seq_len: %d, vocab_size: %d' % (seq_len, vocab_size))

        oracle_loader = RealDataLoader(args.batch_size, args.seq_len)

        generator = models.get_generator(args.g_architecture,
                                         vocab_size=vocab_size,
                                         batch_size=args.batch_size,
                                         seq_len=seq_len,
                                         gen_emb_dim=args.gen_emb_dim,
                                         mem_slots=args.mem_slots,
                                         head_size=args.head_size,
                                         num_heads=args.num_heads,
                                         hidden_dim=args.hidden_dim,
                                         start_token=args.start_token)
        discriminator = models.get_discriminator(args.d_architecture,
                                                 batch_size=args.batch_size,
                                                 seq_len=seq_len,
                                                 vocab_size=vocab_size,
                                                 dis_emb_dim=args.dis_emb_dim,
                                                 num_rep=args.num_rep,
                                                 sn=args.sn)

        # print("gen params = ", count_params(generator.trainable_variables))
        # print("disc params = ", count_params(discriminator.trainable_variables))
        # sys.stdout.flush()

        load_model = False
        if config['load_saved_model'] != "":
            log_dir_path = os.path.dirname(config['load_saved_model'])
            config['log_dir'] = log_dir_path
            config['sample_dir'] = os.path.join(
                os.path.split(log_dir_path)[0], 'samples')
            index_word_dict = load_index_to_word_dict(
                os.path.join(config['log_dir'], "index_to_word_dict.json"))
            word_index_dict = {v: k for k, v in index_word_dict.items()}
            load_model = True
        else:
            if not os.path.exists(config['log_dir']):
                os.makedirs(config['log_dir'])
            json.dump(
                index_word_dict,
                open(
                    os.path.join(config['log_dir'], "index_to_word_dict.json"),
                    'w'))
            json.dump(
                word_index_dict,
                open(
                    os.path.join(config['log_dir'], "word_to_index_dict.json"),
                    'w'))

        pp.pprint(config)
        print('seq_len: %d, vocab_size: %d' % (seq_len, vocab_size))
        sys.stdout.flush()
        real_train(generator,
                   discriminator,
                   oracle_loader,
                   config,
                   word_index_dict,
                   index_word_dict,
                   load_model=load_model)

        if args.dataset == "emnlp_news" or args.dataset == "emnlp_news_small":
            call([
                "python", 'bleu_post_training_emnlp.py',
                os.path.join(os.path.split(config['log_dir'])[0], 'samples'),
                'na'
            ],
                 cwd=".")
        elif args.dataset == "image_coco":
            call([
                "python", 'bleu_post_training.py',
                os.path.join(os.path.split(config['log_dir'])[0], 'samples'),
                'na'
            ],
                 cwd=".")

    elif args.dataset in ['ace0_small']:
        # data_file = os.path.join(args.data_dir, '{}.txt'.format(args.dataset))
        # seq_len, vocab_size, word_index_dict, index_word_dict = text_precess(data_file)
        seq_len = config['seq_len']
        vocab_size = config['vocab_size']
        # # print('seq_len: %d, vocab_size: %d' % (seq_len, vocab_size))

        # oracle_loader = RealDataLoader(args.batch_size, args.seq_len)

        generator = models.get_generator(args.g_architecture,
                                         vocab_size=config['vocab_size'],
                                         batch_size=args.batch_size,
                                         seq_len=config['seq_len'],
                                         gen_emb_dim=args.gen_emb_dim,
                                         mem_slots=args.mem_slots,
                                         head_size=args.head_size,
                                         num_heads=args.num_heads,
                                         hidden_dim=args.hidden_dim,
                                         start_token=args.start_token)
        discriminator = models.get_discriminator(
            args.d_architecture,
            batch_size=args.batch_size,
            seq_len=config['seq_len'],
            vocab_size=config['vocab_size'],
            dis_emb_dim=args.dis_emb_dim,
            num_rep=args.num_rep,
            sn=args.sn)

        # print("gen params = ", count_params(generator.trainable_variables))
        # print("disc params = ", count_params(discriminator.trainable_variables))
        # sys.stdout.flush()

        load_model = False
        if config['load_saved_model'] != "":
            log_dir_path = os.path.dirname(config['load_saved_model'])
            config['log_dir'] = log_dir_path
            config['sample_dir'] = os.path.join(
                os.path.split(log_dir_path)[0], 'samples')
            index_word_dict = load_index_to_word_dict(
                os.path.join(config['log_dir'], "index_to_word_dict.json"))
            word_index_dict = {v: k for k, v in index_word_dict.items()}
            load_model = True
        else:
            if not os.path.exists(config['log_dir']):
                os.makedirs(config['log_dir'])
            # json.dump(index_word_dict, open(os.path.join(config['log_dir'], "index_to_word_dict.json"), 'w'))
            # json.dump(word_index_dict, open(os.path.join(config['log_dir'], "word_to_index_dict.json"), 'w'))

        pp.pprint(config)
        print('seq_len: %d, vocab_size: %d' % (seq_len, vocab_size))
        sys.stdout.flush()
        real_train_traj(generator,
                        discriminator,
                        None,
                        config,
                        None,
                        None,
                        load_model=load_model)

        # if args.dataset == "emnlp_news" or args.dataset == "emnlp_news_small":
        #     call(["python", 'bleu_post_training_emnlp.py', os.path.join(os.path.split(config['log_dir'])[0], 'samples'), 'na'], cwd=".")
        # elif args.dataset == "image_coco":
        #     call(["python", 'bleu_post_training.py', os.path.join(os.path.split(config['log_dir'])[0], 'samples'), 'na'], cwd=".")
    else:
        raise NotImplementedError('{}: unknown dataset!'.format(args.dataset))