def __init__(self, opt):
        super(RelGANInstructor, self).__init__(opt)

        # generator, discriminator
        self.gen = RelGAN_G(cfg.mem_slots, cfg.num_heads, cfg.head_size, cfg.gen_embed_dim, cfg.gen_hidden_dim,
                            cfg.vocab_size, cfg.max_seq_len, cfg.padding_idx, gpu=cfg.CUDA)
        self.dis = RelGAN_D(cfg.dis_embed_dim, cfg.max_seq_len, cfg.num_rep, cfg.vocab_size, cfg.padding_idx,
                            gpu=cfg.CUDA)
        self.init_model()

        # Optimizer
        self.gen_opt = optim.Adam(self.gen.parameters(), lr=cfg.gen_lr)
        self.gen_adv_opt = optim.Adam(self.gen.parameters(), lr=cfg.gen_adv_lr)
        self.dis_opt = optim.Adam(self.dis.parameters(), lr=cfg.dis_lr)

        # Criterion
        self.mle_criterion = nn.NLLLoss()
        self.adv_criterion = nn.BCEWithLogitsLoss()

        # DataLoader
        self.gen_data = GenDataIter(self.gen.sample(cfg.batch_size, cfg.batch_size))

        # Metrics
        self.bleu = BLEU(test_text=tensor_to_tokens(self.gen_data.target, self.index_word_dict),
                         real_text=tensor_to_tokens(self.test_data.target, self.test_data.index_word_dict),
                         gram=[2, 3, 4, 5])
        self.self_bleu = BLEU(test_text=tensor_to_tokens(self.gen_data.target, self.index_word_dict),
                              real_text=tensor_to_tokens(self.gen_data.target, self.index_word_dict),
                              gram=3)
def LSTM():
    #Step 1: Read data from files and put them into list
    test_sentences = list()
    with open(args.test_data, 'r') as f:
        for line in f:
            test_sentences.append(line.replace("<eos>", ""))
    temp = list()
    for i in range(0, len(test_sentences)):
        if test_sentences[i] != '\n':
            temp.append(test_sentences[i])
    test_sentences = temp
    print(test_sentences)


    real_sentences = list()
    with codecs.open(args.real_data,'r',encoding='utf8',errors='ignore') as f:
        for line in f:
            real_sentences.append(line)

    #Step 2: BLEU Score
    print("LSTM - LSTM double layer encoding")
    for i in range(1, args.gram + 1):
        bleu = BLEU(test_sentences, real_sentences, i)
        bleu_score = bleu.get_score(ignore=False)
        print("BLEU{} score:{}".format(i,bleu_score)) 
示例#3
0
    def __init__(self, opt):
        super(LeakGANInstructor, self).__init__(opt)

        # generator, discriminator
        self.gen = LeakGAN_G(cfg.gen_embed_dim, cfg.gen_hidden_dim, cfg.vocab_size, cfg.max_seq_len,
                             cfg.padding_idx, cfg.goal_size, cfg.step_size, cfg.CUDA)
        self.dis = LeakGAN_D(cfg.dis_embed_dim, cfg.vocab_size, cfg.padding_idx, gpu=cfg.CUDA)
        self.init_model()

        # optimizer
        mana_params, work_params = self.gen.split_params()
        mana_opt = optim.Adam(mana_params, lr=cfg.gen_lr)
        work_opt = optim.Adam(work_params, lr=cfg.gen_lr)

        self.gen_opt = [mana_opt, work_opt]
        self.dis_opt = optim.Adam(self.dis.parameters(), lr=cfg.dis_lr)

        # Criterion
        self.mle_criterion = nn.NLLLoss()
        self.dis_criterion = nn.BCEWithLogitsLoss()

        # DataLoader
        self.gen_data = GenDataIter(self.gen.sample(cfg.batch_size, cfg.batch_size, self.dis))
        self.dis_data = DisDataIter(self.gen_data.random_batch()['target'], self.train_data.random_batch()['target'])

        # Metrics
        self.bleu = BLEU(test_text=tensor_to_tokens(self.gen_data.target, self.index_word_dict),
                         real_text=tensor_to_tokens(self.test_data.target, self.test_data.index_word_dict), gram=3)
        self.self_bleu = BLEU(test_text=tensor_to_tokens(self.gen_data.target, self.index_word_dict),
                              real_text=tensor_to_tokens(self.gen_data.target, self.index_word_dict),
                              gram=3)
 def __init__(self, encoder_layer_num, decoder_layer_num, hidden_dim, batch_size, learning_rate, dropout, init_train = True):
     self.encoder_layer_num = encoder_layer_num
     self.decoder_layer_num = decoder_layer_num
     self.hidden_dim = hidden_dim
     self.batch_size = batch_size
     self.learning_rate = learning_rate
     self.dropout = dropout
     self.init_train = init_train
     #---------fix----------
     self.vocab_size = cfg.vocab_size
     self.max_length = cfg.max_length
     self.embedding_matrix = make_embedding_matrix(cfg.all_captions)
     self.SOS_token = cfg.SOS_token
     self.EOS_token = cfg.EOS_token
     self.idx2word_dict = load_dict()
     #----------------------
     
     self.bleu = BLEU('BLEU', gram=[2,3,4,5])
     #self.bleu.reset(test_text = gen_tokens, real_text = self.test_data.tokens)
           
     if init_train:
         self._init_train()
         train_week_stock, train_month_stock, t_month_stock,train_input_cap_vector, train_output_cap_vector = load_training_data()
         self.train_data = batch_generator(train_week_stock, train_month_stock, t_month_stock,train_input_cap_vector, train_output_cap_vector, self.batch_size)
         self.total_iter = len(train_input_cap_vector)
         
         self._init_eval()
         val_week_stock, val_month_stock, val_t_month_stock,val_input_cap_vector, val_output_cap_vector = load_val_data()
         self.val_data = batch_generator(val_week_stock, val_month_stock, val_t_month_stock,val_input_cap_vector, val_output_cap_vector, self.batch_size)
         self.val_total_iter = len(val_input_cap_vector)
    def __init__(self, opt):
        super(SeqGANInstructor, self).__init__(opt)

        # generator, discriminator
        self.gen = SeqGAN_G(cfg.gen_embed_dim, cfg.gen_hidden_dim, cfg.vocab_size, cfg.max_seq_len,
                            cfg.padding_idx, cfg.temperature, gpu=cfg.CUDA)
        self.dis = SeqGAN_D(cfg.dis_embed_dim, cfg.vocab_size, cfg.padding_idx, gpu=cfg.CUDA)
        self.init_model()

        # Optimizer
        self.gen_opt = optim.Adam(self.gen.parameters(), lr=cfg.gen_lr)
        self.gen_adv_opt = optim.Adam(self.gen.parameters(), lr=cfg.gen_lr)
        self.dis_opt = optim.Adam(self.dis.parameters(), lr=cfg.dis_lr)

        # Criterion
        self.mle_criterion = nn.NLLLoss()
        self.dis_criterion = nn.CrossEntropyLoss()

        # DataLoader
        self.gen_data = GenDataIter(self.gen.sample(cfg.batch_size, cfg.batch_size))
        self.dis_data = DisDataIter(self.train_data.random_batch()['target'], self.gen_data.random_batch()['target'])

        # Metrics
        self.bleu = BLEU(test_text=tensor_to_tokens(self.gen_data.target, self.index_word_dict),
                         real_text=tensor_to_tokens(self.test_data.target, self.test_data.index_word_dict), gram=3)
        self.self_bleu = BLEU(test_text=tensor_to_tokens(self.gen_data.target, self.index_word_dict),
                              real_text=tensor_to_tokens(self.gen_data.target, self.index_word_dict),
                          gram=3)
示例#6
0
    def __init__(self, opt):
        super(RelbarGANInstructor, self).__init__(opt)

        # generator, discriminator
        self.gen = RelbarGAN_G(cfg.mem_slots,
                               cfg.num_heads,
                               cfg.head_size,
                               cfg.gen_embed_dim,
                               cfg.gen_hidden_dim,
                               cfg.vocab_size,
                               cfg.max_seq_len,
                               cfg.padding_idx,
                               cfg.temperature,
                               cfg.eta,
                               gpu=cfg.CUDA)
        self.dis = RelbarGAN_D(cfg.dis_embed_dim,
                               cfg.max_seq_len,
                               cfg.num_rep,
                               cfg.vocab_size,
                               cfg.padding_idx,
                               gpu=cfg.CUDA)
        self.init_model()

        # Optimizer
        self.gen_opt = optim.Adam(self.gen.parameters(), lr=cfg.gen_lr)
        self.gen_adv_opt = optim.Adam(itertools.chain(
            self.gen.parameters(), [self.gen.temperature, self.gen.eta]),
                                      lr=cfg.gen_adv_lr)
        self.dis_opt = optim.Adam(self.dis.parameters(), lr=cfg.dis_lr)

        # Criterion
        self.mle_criterion = nn.NLLLoss()
        self.dis_pretrain_criterion = nn.BCEWithLogitsLoss()

        # DataLoader
        self.gen_data = GenDataIter(
            self.gen.sample(cfg.batch_size, cfg.batch_size))
        self.dis_data = DisDataIter(self.train_data.random_batch()['target'],
                                    self.gen_data.random_batch()['target'])

        # Metrics
        bleu_gram = list(range(2, cfg.max_seq_len +
                               1)) if cfg.max_seq_len < 5 else [2, 3, 4, 5]
        self.bleu = BLEU(test_text=tensor_to_tokens(self.gen_data.target,
                                                    self.index_word_dict),
                         real_text=tensor_to_tokens(
                             self.test_data.target,
                             self.test_data.index_word_dict),
                         gram=bleu_gram)
        self.self_bleu = BLEU(
            test_text=tensor_to_tokens(self.gen_data.target,
                                       self.index_word_dict),
            real_text=tensor_to_tokens(self.gen_data.target,
                                       self.index_word_dict),
            gram=3)
    def __init__(self, opt):
        self.log = create_logger(__name__, silent=False, to_disk=True,
                                 log_file=cfg.log_filename if cfg.if_test
                                 else [cfg.log_filename, cfg.save_root + 'log.txt'])
        self.sig = Signal(cfg.signal_file)
        self.opt = opt
        self.show_config()

        self.clas = None

        # load dictionary
        self.word2idx_dict, self.idx2word_dict = load_dict(cfg.dataset)

        # Dataloader
        try:
            self.train_data = GenDataIter(cfg.train_data)
            self.test_data = GenDataIter(cfg.test_data, if_test_data=True)
        except:
            pass

        try:
            self.train_data_list = [GenDataIter(cfg.cat_train_data.format(i)) for i in range(cfg.k_label)]
            self.test_data_list = [GenDataIter(cfg.cat_test_data.format(i), if_test_data=True) for i in
                                   range(cfg.k_label)]
            self.clas_data_list = [GenDataIter(cfg.cat_test_data.format(str(i)), if_test_data=True) for i in
                                   range(cfg.k_label)]

            self.train_samples_list = [self.train_data_list[i].target for i in range(cfg.k_label)]
            self.clas_samples_list = [self.clas_data_list[i].target for i in range(cfg.k_label)]
        except:
            pass

        # Criterion
        self.mle_criterion = nn.NLLLoss()
        self.dis_criterion = nn.CrossEntropyLoss()
        self.clas_criterion = nn.CrossEntropyLoss()

        # Optimizer
        self.clas_opt = None

        # Metrics
        self.bleu = BLEU('BLEU', gram=[2, 3, 4, 5], if_use=cfg.use_bleu)
        self.nll_gen = NLL('NLL_gen', if_use=cfg.use_nll_gen, gpu=cfg.CUDA)
        self.nll_div = NLL('NLL_div', if_use=cfg.use_nll_div, gpu=cfg.CUDA)
        self.self_bleu = BLEU('Self-BLEU', gram=[2, 3, 4], if_use=cfg.use_self_bleu)
        self.clas_acc = ACC(if_use=cfg.use_clas_acc)
        self.ppl = PPL(self.train_data, self.test_data, n_gram=5, if_use=cfg.use_ppl)
        self.all_metrics = [self.bleu, self.nll_gen, self.nll_div, self.self_bleu, self.ppl]
def leakGAN():
    test_sentences = list()
    with codecs.open(args.test_data, 'r',encoding='utf8',errors='ignore') as f:
        for line in f:
            line = line.split(' ', 1)[1] #remove initial EOS
            test_sentences.append(line)
    real_sentences = list()
    with codecs.open(args.real_data,'r',encoding='utf8',errors='ignore') as f:
        for line in f:
            real_sentences.append(line)

    #Step 2: BLEU Score
    print("LSTM - LeakGAN double layer encoding")
    for i in range(1, args.gram + 1):
        bleu = BLEU(test_sentences, real_sentences, i)
        bleu_score = bleu.get_score(ignore=False)
        print("BLEU{} score:{}".format(i,bleu_score))
示例#9
0
    def __init__(self, opt):
        super(LeakGANInstructor, self).__init__(opt)

        # generator, discriminator
        self.gen = LeakGAN_G(cfg.gen_embed_dim, cfg.gen_hidden_dim,
                             cfg.vocab_size, cfg.max_seq_len, cfg.padding_idx,
                             cfg.goal_size, cfg.step_size, cfg.CUDA)
        self.dis = LeakGAN_D(cfg.dis_embed_dim,
                             cfg.vocab_size,
                             cfg.padding_idx,
                             gpu=cfg.CUDA)

        #LSTM
        self.corpus = dataa.Corpus('dataset/emnlp_news/')
        self.lstm = LSTM.RNNModel('LSTM', len(self.corpus.dictionary), 200,
                                  600, 3, 0.2, False)
        if (cfg.CUDA):
            self.dis.cuda()
            self.gen.cuda()
        self.init_model()

        # optimizer
        mana_params, work_params = self.gen.split_params()
        mana_opt = optim.Adam(mana_params, lr=cfg.gen_lr)
        work_opt = optim.Adam(work_params, lr=cfg.gen_lr)

        self.gen_opt = [mana_opt, work_opt]
        self.dis_opt = optim.Adam(self.dis.parameters(), lr=cfg.dis_lr)

        # Criterion
        self.mle_criterion = nn.NLLLoss()
        self.dis_criterion = nn.CrossEntropyLoss()

        # DataLoader
        self.gen_data = GenDataIter(
            self.gen.sample(cfg.batch_size, cfg.batch_size, self.dis))
        self.dis_data = DisDataIter(self.gen_data.random_batch()['target'],
                                    self.oracle_data.random_batch()['target'])

        # Metrics
        self.bleu3 = BLEU(test_text=tensor_to_tokens(self.gen_data.target,
                                                     self.index_word_dict),
                          real_text=tensor_to_tokens(self.test_data.target,
                                                     self.index_word_dict),
                          gram=3)
示例#10
0
class BasicInstructor:
    def __init__(self, opt):
        self.log = create_logger(__name__,
                                 silent=False,
                                 to_disk=True,
                                 log_file=cfg.log_filename if cfg.if_test else
                                 [cfg.log_filename, cfg.save_root + 'log.txt'])
        self.sig = Signal(cfg.signal_file)
        self.opt = opt
        self.show_config()

        self.clas = None

        # load dictionary
        self.word2idx_dict, self.idx2word_dict = load_dict(cfg.dataset)

        # Dataloader
        try:
            self.train_data = GenDataIter(cfg.train_data)
            self.test_data = GenDataIter(cfg.test_data, if_test_data=True)
        except:
            pass

        try:
            self.train_data_list = [
                GenDataIter(cfg.cat_train_data.format(i))
                for i in range(cfg.k_label)
            ]
            self.test_data_list = [
                GenDataIter(cfg.cat_test_data.format(i), if_test_data=True)
                for i in range(cfg.k_label)
            ]
            self.clas_data_list = [
                GenDataIter(cfg.cat_test_data.format(str(i)),
                            if_test_data=True) for i in range(cfg.k_label)
            ]

            self.train_samples_list = [
                self.train_data_list[i].target for i in range(cfg.k_label)
            ]
            self.clas_samples_list = [
                self.clas_data_list[i].target for i in range(cfg.k_label)
            ]
        except:
            pass

        # Criterion
        self.mle_criterion = nn.NLLLoss()
        self.dis_criterion = nn.CrossEntropyLoss()
        self.clas_criterion = nn.CrossEntropyLoss()

        # Optimizer
        self.clas_opt = None

        # Metrics
        self.bleu = BLEU('BLEU', gram=[2, 3, 4, 5], if_use=cfg.use_bleu)
        self.nll_gen = NLL('NLL_gen', if_use=cfg.use_nll_gen, gpu=cfg.CUDA)
        self.nll_div = NLL('NLL_div', if_use=cfg.use_nll_div, gpu=cfg.CUDA)
        self.self_bleu = BLEU('Self-BLEU',
                              gram=[2, 3, 4],
                              if_use=cfg.use_self_bleu)
        self.clas_acc = ACC(if_use=cfg.use_clas_acc)
        self.ppl = PPL(self.train_data,
                       self.test_data,
                       n_gram=5,
                       if_use=cfg.use_ppl)
        self.all_metrics = [
            self.bleu, self.nll_gen, self.nll_div, self.self_bleu, self.ppl
        ]

    def _run(self):
        print('Nothing to run in Basic Instructor!')
        pass

    def _test(self):
        pass

    def init_model(self):
        if cfg.dis_pretrain:
            self.log.info('Load pre-trained discriminator: {}'.format(
                cfg.pretrained_dis_path))
            self.dis.load_state_dict(torch.load(cfg.pretrained_dis_path))
        if cfg.gen_pretrain:
            self.log.info('Load MLE pre-trained generator: {}'.format(
                cfg.pretrained_gen_path))
            self.gen.load_state_dict(torch.load(cfg.pretrained_gen_path))

        if cfg.CUDA:
            self.gen = self.gen.cuda()
            self.dis = self.dis.cuda()

    def train_gen_epoch(self, model, data_loader, criterion, optimizer):
        total_loss = 0
        for i, data in enumerate(data_loader):
            inp, target = data['input'], data['target']
            if cfg.CUDA:
                inp, target = inp.cuda(), target.cuda()

            hidden = model.init_hidden(data_loader.batch_size)
            pred = model.forward(inp, hidden)
            loss = criterion(pred, target.view(-1))
            self.optimize(optimizer, loss, model)
            total_loss += loss.item()
        return total_loss / len(data_loader)

    def train_dis_epoch(self, model, data_loader, criterion, optimizer):
        total_loss = 0
        total_acc = 0
        total_num = 0
        for i, data in enumerate(data_loader):
            inp, target = data['input'], data['target']
            if cfg.CUDA:
                inp, target = inp.cuda(), target.cuda()

            pred = model.forward(inp)
            loss = criterion(pred, target)
            self.optimize(optimizer, loss, model)

            total_loss += loss.item()
            total_acc += torch.sum((pred.argmax(dim=-1) == target)).item()
            total_num += inp.size(0)

        total_loss /= len(data_loader)
        total_acc /= total_num
        return total_loss, total_acc

    def train_classifier(self, epochs):
        """
        Classifier for calculating the classification accuracy metric of category text generation.

        Note: the train and test data for the classifier is opposite to the generator.
        Because the classifier is to calculate the classification accuracy of the generated samples
        where are trained on self.train_samples_list.

        Since there's no test data in synthetic data (oracle data), the synthetic data experiments
        doesn't need a classifier.
        """
        import copy

        # Prepare data for Classifier
        clas_data = CatClasDataIter(self.clas_samples_list)
        eval_clas_data = CatClasDataIter(self.train_samples_list)

        max_acc = 0
        best_clas = None
        for epoch in range(epochs):
            c_loss, c_acc = self.train_dis_epoch(self.clas, clas_data.loader,
                                                 self.clas_criterion,
                                                 self.clas_opt)
            _, eval_acc = self.eval_dis(self.clas, eval_clas_data.loader,
                                        self.clas_criterion)
            if eval_acc > max_acc:
                best_clas = copy.deepcopy(
                    self.clas.state_dict())  # save the best classifier
                max_acc = eval_acc
            self.log.info(
                '[PRE-CLAS] epoch %d: c_loss = %.4f, c_acc = %.4f, eval_acc = %.4f, max_eval_acc = %.4f',
                epoch, c_loss, c_acc, eval_acc, max_acc)
        self.clas.load_state_dict(
            copy.deepcopy(best_clas))  # Reload the best classifier

    @staticmethod
    def eval_dis(model, data_loader, criterion):
        total_loss = 0
        total_acc = 0
        total_num = 0
        with torch.no_grad():
            for i, data in enumerate(data_loader):
                inp, target = data['input'], data['target']
                if cfg.CUDA:
                    inp, target = inp.cuda(), target.cuda()

                pred = model.forward(inp)
                loss = criterion(pred, target)
                total_loss += loss.item()
                total_acc += torch.sum((pred.argmax(dim=-1) == target)).item()
                total_num += inp.size(0)
            total_loss /= len(data_loader)
            total_acc /= total_num
        return total_loss, total_acc

    @staticmethod
    def optimize_multi(opts, losses):
        for i, (opt, loss) in enumerate(zip(opts, losses)):
            opt.zero_grad()
            loss.backward(retain_graph=True if i < len(opts) - 1 else False)
            opt.step()

    @staticmethod
    def optimize(opt, loss, model=None, retain_graph=False):
        opt.zero_grad()
        loss.backward(retain_graph=retain_graph)
        if model is not None:
            torch.nn.utils.clip_grad_norm_(model.parameters(), cfg.clip_norm)
        opt.step()

    def show_config(self):
        self.log.info(100 * '=')
        self.log.info('> training arguments:')
        for arg in vars(self.opt):
            self.log.info('>>> {0}: {1}'.format(arg, getattr(self.opt, arg)))
        self.log.info(100 * '=')

    def cal_metrics(self, fmt_str=False):
        """
        Calculate metrics
        :param fmt_str: if return format string for logging
        """
        with torch.no_grad():
            # Prepare data for evaluation
            eval_samples = self.gen.sample(cfg.samples_num, 4 * cfg.batch_size)
            gen_data = GenDataIter(eval_samples)
            gen_tokens = tensor_to_tokens(eval_samples, self.idx2word_dict)
            gen_tokens_s = tensor_to_tokens(self.gen.sample(200, 200),
                                            self.idx2word_dict)

            # Reset metrics
            self.bleu.reset(test_text=gen_tokens,
                            real_text=self.test_data.tokens)
            self.nll_gen.reset(self.gen, self.train_data.loader)
            self.nll_div.reset(self.gen, gen_data.loader)
            self.self_bleu.reset(test_text=gen_tokens_s, real_text=gen_tokens)
            self.ppl.reset(gen_tokens)

        if fmt_str:
            return ', '.join([
                '%s = %s' % (metric.get_name(), metric.get_score())
                for metric in self.all_metrics
            ])
        else:
            return [metric.get_score() for metric in self.all_metrics]

    def cal_metrics_with_label(self, label_i):
        assert type(label_i) == int, 'missing label'

        with torch.no_grad():
            # Prepare data for evaluation
            eval_samples = self.gen.sample(cfg.samples_num,
                                           8 * cfg.batch_size,
                                           label_i=label_i)
            gen_data = GenDataIter(eval_samples)
            gen_tokens = tensor_to_tokens(eval_samples, self.idx2word_dict)
            gen_tokens_s = tensor_to_tokens(
                self.gen.sample(200, 200, label_i=label_i), self.idx2word_dict)
            clas_data = CatClasDataIter([eval_samples], label_i)

            # Reset metrics
            self.bleu.reset(test_text=gen_tokens,
                            real_text=self.test_data_list[label_i].tokens)
            self.nll_gen.reset(self.gen, self.train_data_list[label_i].loader,
                               label_i)
            self.nll_div.reset(self.gen, gen_data.loader, label_i)
            self.self_bleu.reset(test_text=gen_tokens_s, real_text=gen_tokens)
            self.clas_acc.reset(self.clas, clas_data.loader)
            self.ppl.reset(gen_tokens)

        return [metric.get_score() for metric in self.all_metrics]

    def comb_metrics(self, fmt_str=False):
        all_scores = [
            self.cal_metrics_with_label(label_i)
            for label_i in range(cfg.k_label)
        ]
        all_scores = np.array(
            all_scores).T.tolist()  # each row for each metric

        if fmt_str:
            return ', '.join([
                '%s = %s' % (metric.get_name(), score)
                for (metric, score) in zip(self.all_metrics, all_scores)
            ])
        return all_scores

    def _save(self, phase, epoch):
        """Save model state dict and generator's samples"""
        if phase != 'ADV':
            torch.save(
                self.gen.state_dict(),
                cfg.save_model_root + 'gen_{}_{:05d}.pt'.format(phase, epoch))
        save_sample_path = cfg.save_samples_root + 'samples_{}_{}_{:05d}.txt'.format(
            phase, cfg.samples_num, epoch)
        samples = self.gen.sample(5000, cfg.batch_size)
        write_tokens(save_sample_path,
                     tensor_to_tokens(samples, self.idx2word_dict))

    def update_temperature(self, i, N):
        self.gen.temperature.data = torch.Tensor(
            [get_fixed_temperature(cfg.temperature, i, N, cfg.temp_adpt)])
        if cfg.CUDA:
            self.gen.temperature.data = self.gen.temperature.data.cuda()
示例#11
0
 def evaluate(self):
     ''' Evaluates the generator using various metrics. '''
     bleu = BLEU(model=self.G)
     perplexity = Perplexity(model=self.G)
     return bleu, perplexity
class LeakGANInstructor(BasicInstructor):
    def __init__(self, opt):
        super(LeakGANInstructor, self).__init__(opt)

        # generator, discriminator
        self.gen = LeakGAN_G(cfg.gen_embed_dim, cfg.gen_hidden_dim,
                             cfg.vocab_size, cfg.max_seq_len, cfg.padding_idx,
                             cfg.goal_size, cfg.step_size, cfg.CUDA)
        self.dis = LeakGAN_D(cfg.dis_embed_dim,
                             cfg.vocab_size,
                             cfg.padding_idx,
                             gpu=cfg.CUDA)
        self.init_model()

        # optimizer
        mana_params, work_params = self.gen.split_params()
        mana_opt = optim.Adam(mana_params, lr=cfg.gen_lr)
        work_opt = optim.Adam(work_params, lr=cfg.gen_lr)

        self.gen_opt = [mana_opt, work_opt]
        self.dis_opt = optim.Adam(self.dis.parameters(), lr=cfg.dis_lr)

        # Criterion
        self.mle_criterion = nn.NLLLoss()
        self.dis_criterion = nn.CrossEntropyLoss()

        # DataLoader
        self.gen_data = GenDataIter(
            self.gen.sample(cfg.batch_size, cfg.batch_size, self.dis))
        self.dis_data = DisDataIter(self.gen_data.random_batch()['target'],
                                    self.oracle_data.random_batch()['target'])

        # Metrics
        self.bleu3 = BLEU(test_text=tensor_to_tokens(self.gen_data.target,
                                                     self.index_word_dict),
                          real_text=tensor_to_tokens(self.test_data.target,
                                                     self.index_word_dict),
                          gram=3)

    def _run(self):
        for inter_num in range(cfg.inter_epoch):
            self.log.info('>>> Interleaved Round %d...' % inter_num)
            self.sig.update()  # update signal
            if self.sig.pre_sig:
                # =====DISCRIMINATOR PRE-TRAINING=====
                if not cfg.dis_pretrain:
                    self.log.info('Starting Discriminator Training...')
                    self.train_discriminator(cfg.d_step, cfg.d_epoch)
                    if cfg.if_save and not cfg.if_test:
                        torch.save(self.dis.state_dict(),
                                   cfg.pretrained_dis_path)
                        print('Save pre-trained discriminator: {}'.format(
                            cfg.pretrained_dis_path))

                # =====GENERATOR MLE TRAINING=====
                if not cfg.gen_pretrain:
                    self.log.info('Starting Generator MLE Training...')
                    self.pretrain_generator(cfg.MLE_train_epoch)
                    if cfg.if_save and not cfg.if_test:
                        torch.save(self.gen.state_dict(),
                                   cfg.pretrained_gen_path)
                        print('Save pre-trained generator: {}'.format(
                            cfg.pretrained_gen_path))
            else:
                self.log.info(
                    '>>> Stop by pre_signal! Skip to adversarial training...')
                break

        # =====ADVERSARIAL TRAINING=====
        self.log.info('Starting Adversarial Training...')
        self.log.info('Initial generator: %s' %
                      (str(self.cal_metrics(fmt_str=True))))

        for adv_epoch in range(cfg.ADV_train_epoch):
            self.log.info('-----\nADV EPOCH %d\n-----' % adv_epoch)
            self.sig.update()
            if self.sig.adv_sig:
                self.adv_train_generator(cfg.ADV_g_step)  # Generator
                self.train_discriminator(cfg.ADV_d_step, cfg.ADV_d_epoch,
                                         'ADV')  # Discriminator

                if adv_epoch % cfg.adv_log_step == 0:
                    if cfg.if_save and not cfg.if_test:
                        self._save('ADV', adv_epoch)
            else:
                self.log.info(
                    '>>> Stop by adv_signal! Finishing adversarial training...'
                )
                break

    def _test(self):
        print('>>> Begin test...')
        self._run()
        pass

    def pretrain_generator(self, epochs):
        """
        Max Likelihood Pretraining for the gen

        - gen_opt: [mana_opt, work_opt]
        """
        for epoch in range(epochs):
            self.sig.update()
            if self.sig.pre_sig:
                pre_mana_loss = 0
                pre_work_loss = 0

                # =====Train=====
                for i, data in enumerate(self.oracle_data.loader):
                    inp, target = data['input'], data['target']
                    if cfg.CUDA:
                        inp, target = inp.cuda(), target.cuda()

                    mana_loss, work_loss = self.gen.pretrain_loss(
                        target, self.dis)
                    self.optimize_multi(self.gen_opt, [mana_loss, work_loss])
                    pre_mana_loss += mana_loss.data.item()
                    pre_work_loss += work_loss.data.item()
                pre_mana_loss = pre_mana_loss / len(self.oracle_data.loader)
                pre_work_loss = pre_work_loss / len(self.oracle_data.loader)

                # =====Test=====
                if epoch % cfg.pre_log_step == 0:
                    self.log.info(
                        '[MLE-GEN] epoch %d : pre_mana_loss = %.4f, pre_work_loss = %.4f, %s'
                        % (epoch, pre_mana_loss, pre_work_loss,
                           self.cal_metrics(fmt_str=True)))

                    if cfg.if_save and not cfg.if_test:
                        self._save('MLE', epoch)
            else:
                self.log.info(
                    '>>> Stop by pre signal, skip to adversarial training...')
                break

    def adv_train_generator(self, g_step, current_k=0):
        """
        The gen is trained using policy gradients, using the reward from the discriminator.
        Training is done for num_batches batches.
        """

        rollout_func = rollout.ROLLOUT(self.gen, cfg.CUDA)
        adv_mana_loss = 0
        adv_work_loss = 0
        for step in range(g_step):
            with torch.no_grad():
                gen_samples = self.gen.sample(
                    cfg.batch_size, cfg.batch_size, self.dis,
                    train=True)  # !!! train=True, the only place
                inp, target = self.gen_data.prepare(gen_samples, gpu=cfg.CUDA)

            # =====Train=====
            rewards = rollout_func.get_reward_leakgan(
                target, cfg.rollout_num, self.dis,
                current_k).cpu()  # reward with MC search
            mana_loss, work_loss = self.gen.adversarial_loss(
                target, rewards, self.dis)

            # update parameters
            self.optimize_multi(self.gen_opt, [mana_loss, work_loss])
            adv_mana_loss += mana_loss.data.item()
            adv_work_loss += work_loss.data.item()
        # =====Test=====
        self.log.info(
            '[ADV-GEN] adv_mana_loss = %.4f, adv_work_loss = %.4f, %s' %
            (adv_mana_loss / g_step, adv_work_loss / g_step,
             self.cal_metrics(fmt_str=True)))

    def train_discriminator(self, d_step, d_epoch, phrase='MLE'):
        """
        Training the discriminator on real_data_samples (positive) and generated samples from gen (negative).
        Samples are drawn d_step times, and the discriminator is trained for d_epoch d_epoch.
        """
        for step in range(d_step):
            # prepare loader for training
            pos_samples = self.oracle_data.target
            neg_samples = self.gen.sample(cfg.samples_num, cfg.batch_size,
                                          self.dis)
            self.dis_data.reset(pos_samples, neg_samples)

            for epoch in range(d_epoch):
                # =====Train=====
                d_loss, train_acc = self.train_dis_epoch(
                    self.dis, self.dis_data.loader, self.dis_criterion,
                    self.dis_opt)

            # =====Test=====
            self.log.info(
                '[%s-DIS] d_step %d: d_loss = %.4f, train_acc = %.4f,' %
                (phrase, step, d_loss, train_acc))

    def cal_metrics(self, fmt_str=False):
        self.gen_data.reset(
            self.gen.sample(cfg.samples_num, cfg.batch_size, self.dis))
        self.bleu3.test_text = tensor_to_tokens(self.gen_data.target,
                                                self.index_word_dict)
        bleu3_score = self.bleu3.get_score(ignore=False)

        with torch.no_grad():
            gen_nll = 0
            for data in self.oracle_data.loader:
                inp, target = data['input'], data['target']
                if cfg.CUDA:
                    inp, target = inp.cuda(), target.cuda()
                loss = self.gen.batchNLLLoss(target, self.dis)
                gen_nll += loss.item()
            gen_nll /= len(self.oracle_data.loader)

        if fmt_str:
            return 'BLEU-3 = %.4f, gen_NLL = %.4f,' % (bleu3_score, gen_nll)
        return bleu3_score, gen_nll

    def _save(self, phrase, epoch):
        torch.save(
            self.gen.state_dict(),
            cfg.save_model_root + 'gen_{}_{:05d}.pt'.format(phrase, epoch))
        save_sample_path = cfg.save_samples_root + 'samples_{}_{:05d}.txt'.format(
            phrase, epoch)
        samples = self.gen.sample(cfg.batch_size, cfg.batch_size, self.dis)
        write_tokens(save_sample_path,
                     tensor_to_tokens(samples, self.index_word_dict))
class Model():
    def __init__(self, encoder_layer_num, decoder_layer_num, hidden_dim, batch_size, learning_rate, dropout, init_train = True):
        self.encoder_layer_num = encoder_layer_num
        self.decoder_layer_num = decoder_layer_num
        self.hidden_dim = hidden_dim
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.dropout = dropout
        self.init_train = init_train
        #---------fix----------
        self.vocab_size = cfg.vocab_size
        self.max_length = cfg.max_length
        self.embedding_matrix = make_embedding_matrix(cfg.all_captions)
        self.SOS_token = cfg.SOS_token
        self.EOS_token = cfg.EOS_token
        self.idx2word_dict = load_dict()
        #----------------------
        
        self.bleu = BLEU('BLEU', gram=[2,3,4,5])
        #self.bleu.reset(test_text = gen_tokens, real_text = self.test_data.tokens)
              
        if init_train:
            self._init_train()
            train_week_stock, train_month_stock, t_month_stock,train_input_cap_vector, train_output_cap_vector = load_training_data()
            self.train_data = batch_generator(train_week_stock, train_month_stock, t_month_stock,train_input_cap_vector, train_output_cap_vector, self.batch_size)
            self.total_iter = len(train_input_cap_vector)
            
            self._init_eval()
            val_week_stock, val_month_stock, val_t_month_stock,val_input_cap_vector, val_output_cap_vector = load_val_data()
            self.val_data = batch_generator(val_week_stock, val_month_stock, val_t_month_stock,val_input_cap_vector, val_output_cap_vector, self.batch_size)
            self.val_total_iter = len(val_input_cap_vector)
            
    # gpu 탄력적으로 사용.
    def gpu_session_config(self):
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        return config

    def _init_train(self):
        self.train_graph = tf.Graph()
        with self.train_graph.as_default():
            with tf.variable_scope('encoder_input'):
                self.week_input = tf.placeholder(tf.float64, shape= [None, 7], name='week_input')
                self.month_input = tf.placeholder(tf.float64, shape=[None, 28], name='month_input')
                self.t_month_input = tf.placeholder(tf.float64, shape=[None, 84], name='t_month_input')

            with tf.variable_scope("decoder_input"):
                self.decoder_input = tf.placeholder(tf.int32, [None, self.max_length], name='input')
                self.decoder_target = tf.placeholder(tf.int32, [None, self.max_length], name='target')
                self.decoder_targets_length = tf.placeholder(tf.int32, shape = [self.batch_size, ], name = 'targets_length')
                
            encoded_output, encoded_state = encoder_module(self.week_input,
                                                         self.month_input,
                                                         self.t_month_input,
                                                         self.encoder_layer_num,
                                                         self.decoder_layer_num,
                                                         self.hidden_dim)

            decoder_output, decoder_state = decoder_module(encoded_state,
                                                          encoded_output,
                                                          self.decoder_input,
                                                          self.decoder_targets_length,
                                                          self.embedding_matrix,
                                                          self.decoder_layer_num,
                                                          self.hidden_dim,
                                                          self.max_length,
                                                          self.vocab_size,
                                                          self.batch_size,
                                                          self.dropout,
                                                          self.SOS_token, 
                                                          self.EOS_token, 
                                                          train = True)

            self.logits = decoder_output.rnn_output
            # traning output
            self.sample_id = decoder_output.sample_id
            
            self._init_optimizer()
            
            self.train_init = tf.global_variables_initializer()
            self.train_saver = tf.train.Saver()
        self.train_session = tf.Session(graph=self.train_graph, config = self.gpu_session_config())
        

    def _init_optimizer(self):
        #loss mask
        mask = tf.cast(tf.sequence_mask(self.decoder_targets_length, self.max_length),tf.float64)
        self.loss = tf.contrib.seq2seq.sequence_loss(logits= self.logits,
                                                    targets = self.decoder_target,
                                                    weights = mask,
                                                    average_across_timesteps = True,
                                                    average_across_batch = True)
        #tf.summary.scalar('loss', self.loss)
        #self.summary_op = tf.summary.merge_all()
 
        params = tf.trainable_variables()
        gradients = tf.gradients(self.loss, params)
        clipped_gradients, _ = tf.clip_by_global_norm(gradients,5.0)
        self.optimizer = tf.train.AdamOptimizer(self.learning_rate).apply_gradients(zip(clipped_gradients, params))
        

    # batch 단위로 계산
    def cal_metrics(self, infer_text, real_text):
        self.bleu.reset(infer_text = infer_text, real_text = real_text)
        return self.bleu.get_score()

    # bleu, greedy/beam search init
    def _init_eval(self):
        self.eval_graph = tf.Graph()
        with self.eval_graph.as_default():
            self.eval_week_input = tf.placeholder(tf.float64, shape= [None, 7])
            self.eval_month_input = tf.placeholder(tf.float64, shape=[None, 28])
            self.eval_t_month_input = tf.placeholder(tf.float64, shape=[None, 84])
            self.eval_decoder_targets_length = tf.placeholder(tf.int32, shape = [self.batch_size, ])
            eval_encoded_output, eval_encoded_state = encoder_module(self.eval_week_input,
                                                                     self.eval_month_input,
                                                                     self.eval_t_month_input,
                                                                     self.encoder_layer_num,
                                                                     self.decoder_layer_num,
                                                                     self.hidden_dim)
                
            self.eval_decoder_output, eval_decoder_state = decoder_module(eval_encoded_state,
                                                                          eval_encoded_output,
                                                                          None,
                                                                          self.eval_decoder_targets_length,
                                                                          self.embedding_matrix,
                                                                          self.decoder_layer_num,
                                                                          self.hidden_dim,
                                                                          self.max_length,
                                                                          self.vocab_size,
                                                                          self.batch_size,
                                                                          self.dropout,
                                                                          self.SOS_token, 
                                                                          self.EOS_token, 
                                                                          train = False)
            
            self.predicted_ids = tf.identity(self.eval_decoder_output.predicted_ids)
            self.eval_saver = tf.train.Saver()
        self.eval_session = tf.Session(graph=self.eval_graph,config=self.gpu_session_config())       
            
    def train_epoch(self, epochs):
        if not self.init_train:
            raise Exception('Train graph is not inited')
        with self.train_graph.as_default():
            if os.path.isfile(cfg.save_path + '.meta'):
                print("##########################")
                print('#     Model restore..    #')
                print("##########################")
                self.train_saver.restore(self.train_session, cfg.save_path)
            else:
                self.train_session.run(self.train_init)
            total_loss = 0
            total_step = 0
            start_time =time.time()
            for e in range(epochs):
                for step in range(self.total_iter// self.batch_size):
                    data = next(self.train_data)
                    week_stock = data['week_stock']
                    month_stock = data['month_stock']
                    t_month_stock = data['t_month_stock']
                    decoder_input = data['decoder_input']
                    decoder_target = data['decoder_target']
                    batch_seq = batch_seq_len(data['decoder_target'])
                    _, loss, sample_id = self.train_session.run([self.optimizer, self.loss, self.sample_id], 
                                                            feed_dict = {self.week_input : week_stock,
                                                                         self.month_input : month_stock,
                                                                         self.t_month_input : t_month_stock,
                                                                         self.decoder_input : decoder_input,
                                                                         self.decoder_target : decoder_target,
                                                                        self.decoder_targets_length : batch_seq})
#                     total_loss += loss
#                 total_step += self.total_iter
#                 loss = total_loss/total_step
                end = time.time()
                print('epoch: {}|{}  minibatch loss: {:.6f}   Time: {:.1f} min'.format(e+1, epochs, loss, (end-start_time)/60 ))
                
                if e % 50 ==0:
                    self.train_saver.save(self.train_session, cfg.save_path)
                    #랜덤 sid 선택, training output_text
                    sid = random.randint(0, self.batch_size-1)
                    target_text = decode_text(decoder_target[sid],self.idx2word_dict)
                    output_text = decode_text(sample_id[sid],self.idx2word_dict)
                    print('============ training sample text =============')
                    print('training_target :' + target_text)
                    print('training_output :' + output_text)
                    print('===============================================')
                    self.eval()

    def eval(self):
        with self.eval_graph.as_default():
            self.eval_saver.restore(self.eval_session, cfg.save_path)
            all_bleu = [0] * 4
            eval_mask_weights = tf.ones(shape=[self.batch_size, self.max_length],dtype=tf.float64)
            for step in range(self.val_total_iter//self.batch_size):
                data = next(self.val_data)
                week_stock = data['week_stock']
                month_stock = data['month_stock']
                t_month_stock = data['t_month_stock']
                batch_seq = batch_seq_len(data['decoder_target'])
                #beam search_output
                beam_output = self.eval_session.run([self.predicted_ids], 
                                                    feed_dict = {self.eval_week_input : week_stock,
                                                                 self.eval_month_input : month_stock,
                                                                 self.eval_t_month_input : t_month_stock,
                                                                 self.eval_decoder_targets_length : batch_seq
                                                                })   
                
                target_text = idx_to_text(data['decoder_input'][:,1:],self.idx2word_dict) 
                target_text = remove_sent_pad(target_text)
                
                beam_output = np.squeeze(np.array(beam_output),axis=0)
                output_text = idx_to_text(beam_output[:,:,0], self.idx2word_dict)
                bleu_score = self.cal_metrics(target_text, output_text)

                for idx,score in enumerate(bleu_score):
                    all_bleu[idx] += score
            print('================ BLEU score ================')
            for idx, bleu in enumerate(bleu_score):#2,3,4,5
                print('BLEU-{} : {}'.format(idx+2, bleu))
            sid = random.randint(0, self.batch_size-1)
            target_text = decode_text(data['decoder_target'][sid],self.idx2word_dict)
            output_text = decode_text(beam_output[sid,:,0],self.idx2word_dict)
            print('============= Beam search text =============')
            print('infer_target : ' + target_text)
            print('beam_search  : ' + output_text)
            print('============================================')
示例#14
0
class LeakGANInstructor(BasicInstructor):
    def __init__(self, opt):
        super(LeakGANInstructor, self).__init__(opt)

        # generator, discriminator
        self.gen = LeakGAN_G(cfg.gen_embed_dim, cfg.gen_hidden_dim,
                             cfg.vocab_size, cfg.max_seq_len, cfg.padding_idx,
                             cfg.goal_size, cfg.step_size, cfg.CUDA)
        self.dis = LeakGAN_D(cfg.dis_embed_dim,
                             cfg.vocab_size,
                             cfg.padding_idx,
                             gpu=cfg.CUDA)

        #LSTM
        self.corpus = dataa.Corpus('dataset/emnlp_news/')
        self.lstm = LSTM.RNNModel('LSTM', len(self.corpus.dictionary), 200,
                                  600, 3, 0.2, False)
        if (cfg.CUDA):
            self.dis.cuda()
            self.gen.cuda()
        self.init_model()

        # optimizer
        mana_params, work_params = self.gen.split_params()
        mana_opt = optim.Adam(mana_params, lr=cfg.gen_lr)
        work_opt = optim.Adam(work_params, lr=cfg.gen_lr)

        self.gen_opt = [mana_opt, work_opt]
        self.dis_opt = optim.Adam(self.dis.parameters(), lr=cfg.dis_lr)

        # Criterion
        self.mle_criterion = nn.NLLLoss()
        self.dis_criterion = nn.CrossEntropyLoss()

        # DataLoader
        self.gen_data = GenDataIter(
            self.gen.sample(cfg.batch_size, cfg.batch_size, self.dis))
        self.dis_data = DisDataIter(self.gen_data.random_batch()['target'],
                                    self.oracle_data.random_batch()['target'])

        # Metrics
        self.bleu3 = BLEU(test_text=tensor_to_tokens(self.gen_data.target,
                                                     self.index_word_dict),
                          real_text=tensor_to_tokens(self.test_data.target,
                                                     self.index_word_dict),
                          gram=3)

    def _run(self):
        for inter_num in range(cfg.inter_epoch):
            self.log.info('>>> Interleaved Round %d...' % inter_num)
            self.sig.update()  # update signal
            if self.sig.pre_sig:
                # =====DISCRIMINATOR PRE-TRAINING=====
                if not cfg.dis_pretrain:
                    self.log.info('Starting Discriminator Training...')
                    self.train_discriminator(cfg.d_step, cfg.d_epoch)
                    if cfg.if_save and not cfg.if_test:
                        torch.save(self.dis.state_dict(),
                                   cfg.pretrained_dis_path)
                        print('Save pre-trained discriminator: {}'.format(
                            cfg.pretrained_dis_path))

                # =====GENERATOR MLE TRAINING=====
                if not cfg.gen_pretrain:
                    self.log.info('Starting Generator MLE Training...')
                    self.pretrain_generator(cfg.MLE_train_epoch)
                    if cfg.if_save and not cfg.if_test:
                        torch.save(self.gen.state_dict(),
                                   cfg.pretrained_gen_path)
                        print('Save pre-trained generator: {}'.format(
                            cfg.pretrained_gen_path))
            else:
                self.log.info(
                    '>>> Stop by pre_signal! Skip to adversarial training...')
                break

        # =====ADVERSARIAL TRAINING=====
        self.log.info('Starting Adversarial Training...')
        self.log.info('Initial generator: %s' %
                      (str(self.cal_metrics(fmt_str=True))))

        for adv_epoch in range(cfg.ADV_train_epoch):
            self.log.info('-----\nADV EPOCH %d\n-----' % adv_epoch)
            self.sig.update()
            if self.sig.adv_sig:
                self.adv_train_generator(cfg.ADV_g_step)  # Generator
                self.train_discriminator(cfg.ADV_d_step, cfg.ADV_d_epoch,
                                         'ADV')  # Discriminator

                if adv_epoch % cfg.adv_log_step == 0:
                    if cfg.if_save and not cfg.if_test:
                        self._save('ADV', adv_epoch)
            else:
                self.log.info(
                    '>>> Stop by adv_signal! Finishing adversarial training...'
                )
                break

    def string2bins(self, bit_string, n_bins):
        n_bits = int(math.log(n_bins, 2))
        return [
            bit_string[i:i + n_bits] for i in range(0, len(bit_string), n_bits)
        ]

    def LSTM_layer_1(self, intermediate_file, bins_num):
        print('>>> Begin test...')
        print('Begin with LSTM Layer')
        #First layer- LSTM layer
        epoch_start_time = time.time()
        seed = 1111
        data_root = './decode/'
        #Reproducibility
        torch.manual_seed(seed)
        if cfg.CUDA:
            torch.cuda.manual_seed(seed)
        with open("leakGAN_instructor/real_data/emnlp_news.pt", 'rb') as f:
            self.lstm = torch.load(f)
        if cfg.CUDA:
            self.lstm.cuda()
        emnlp_data = 'dataset/emnlp_news/'
        corpus = dataa.Corpus(emnlp_data)
        ntokens = len(corpus.dictionary)
        idx2word_file = data_root + "idx2word_1.txt"
        word2idx_file = data_root + "word2idx_1.txt"
        with open(idx2word_file, "wb") as fp:  #Pickling
            pickle.dump(corpus.dictionary.idx2word, fp)
        with open(word2idx_file, "wb") as fp:  #Pickling
            pickle.dump(corpus.dictionary.word2idx, fp)
        hidden = self.lstm.init_hidden(1)
        input = torch.randint(ntokens, (1, 1), dtype=torch.long)

        if cfg.CUDA:
            input.data = input.data.cuda()
        print("Finished Initializing LSTM Model")
        #Step 1: Get secret data
        secret_file = open("leakGAN_instructor/real_data/secret_file.txt", 'r')
        secret_data = secret_file.read()
        #Step 2: Compress string into binary string
        bit_string = ''.join(
            bin(ord(letter))[2:].zfill(8) for letter in secret_data)
        #print(bit_string)
        bit_string = '111011100101000111000011110111101111110111000110011010110110'
        #In the first step we will use 256 bins (8 bit representation each) to convert so that we can convert 64 bits into 8 word
        #Step 3: Divide into bins
        secret_text = [
            int(i, 2) for i in self.string2bins(bit_string, bins_num)
        ]  #convert to bins
        #Step 4: Divide vocabulary into bins, zero words not in the bin
        if bins_num >= 2:
            tokens = list(range(ntokens))  #indecies of words
            random.shuffle(tokens)  #randomize

            #Words in each bin
            words_in_bin = int(ntokens / bins_num)
            #leftovers should be also included in the
            leftover = int(ntokens % bins_num)
            bins = [
                tokens[i:i + words_in_bin]
                for i in range(0, ntokens - leftover, words_in_bin)
            ]  # words to keep in each bin
            for i in range(len(bins)):
                if (i == leftover):
                    break
                bins[i].append(tokens[i + words_in_bin * bins_num])
            print("Len of bins in 1st layer: {}".format(len(bins)))
            #save bins into key 1
            key1 = data_root + "lstm_key1.txt"
            with open(key1, "wb") as fp:  #Pickling
                pickle.dump(bins, fp)
            zero = [list(set(tokens) - set(bin_)) for bin_ in bins]

        print('Finished Initializing First LSTM Layer')
        print('time: {:5.2f}s'.format(time.time() - epoch_start_time))
        print('-' * 89)

        intermediate_file = data_root + intermediate_file
        with open(intermediate_file, 'w') as outf:
            w = 0
            i = 1
            temperature = 1.5
            bin_sequence_length = len(secret_text[:])  # 85
            print("bin sequence length", bin_sequence_length)  #32
            while i <= bin_sequence_length:
                epoch_start_time = time.time()
                output, hidden = self.lstm(input, hidden)

                zero_index = zero[secret_text[:][i - 1]]
                zero_index = torch.LongTensor(zero_index)
                word_weights = output.squeeze().data.div(
                    temperature).exp().cpu()
                word_weights.index_fill_(0, zero_index, 0)
                word_idx = torch.multinomial(word_weights, 1)[0]

                input.data.fill_(word_idx)
                word = corpus.dictionary.idx2word[word_idx]
                i += 1
                w += 1
                word = word.encode('ascii', 'ignore').decode('ascii')
                outf.write(word + ' ')
        print("Generated intermediate short steganographic text")
        print("Intermediate text saved in following file: {}".format(
            intermediate_file))

    def LSTM_layer_2(self, secret_file, final_file, bins_num):
        print('Final LSTM Layer')
        #First layer- LSTM layer
        data_root = './decode/'
        epoch_start_time = time.time()
        seed = 1111
        #Reproducibility
        torch.manual_seed(seed)
        if cfg.CUDA:
            torch.cuda.manual_seed(seed)
        with open("leakGAN_instructor/real_data/emnlp_news.pt", 'rb') as f:
            self.lstm = torch.load(f)
        if cfg.CUDA:
            self.lstm.cuda()
        emnlp_data = 'dataset/emnlp_news/'
        corpus = dataa.Corpus(emnlp_data)
        #save dictionary
        idx2word_file = data_root + "idx2word_2.txt"
        word2idx_file = data_root + "word2idx_2.txt"
        with open(idx2word_file, "wb") as fp:  #Pickling
            pickle.dump(corpus.dictionary.idx2word, fp)
        with open(word2idx_file, "wb") as fp:  #Pickling
            pickle.dump(corpus.dictionary.word2idx, fp)
        ntokens = len(corpus.dictionary)
        hidden = self.lstm.init_hidden(1)
        input = torch.randint(ntokens, (1, 1), dtype=torch.long)

        if cfg.CUDA:
            input.data = input.data.cuda()
        print("Finished Initializing LSTM Model")
        #Step 1: Get secret data
        secret_file = open(data_root + secret_file, 'r')
        secret_data = secret_file.read().split()
        #Step 2: Compress string into binary string
        bit_string = ''
        for data in secret_data:
            print("Data: {}".format(data))
            idWord = corpus.dictionary.word2idx[data]
            bit_string += '{0:{fill}13b}'.format(int(idWord), fill='0')
        #print(ntokens)
        print("Bit String: {}".format(bit_string))
        print("Length of Bit String: {}".format(len(bit_string)))
        #print(bit_string)
        #bit_string = '111011100101000111000011110111101111110111000110011010110110'
        #In the first step we will use 256 bins (8 bit representation each) to convert so that we can convert 64 bits into 8 word
        #Step 3: Divide into bins
        secret_text = [
            int(i, 2) for i in self.string2bins(bit_string, bins_num)
        ]  #convert to bins
        #Step 4: Divide vocabulary into bins, zero words not in the bin
        if bins_num >= 2:
            tokens = list(range(ntokens))  #indecies of words
            random.shuffle(tokens)  #randomize

            #Words in each bin
            words_in_bin = int(ntokens / bins_num)
            #leftovers should be also included in the
            leftover = int(ntokens % bins_num)
            bins = [
                tokens[i:i + words_in_bin]
                for i in range(0, ntokens - leftover, words_in_bin)
            ]  # words to keep in each bin

            for i in range(0, leftover):
                bins[i].append(tokens[i + words_in_bin * bins_num])

            #save bins into key 1
            key1 = data_root + "lstm_key2.txt"
            with open(key1, "wb") as fp:  #Pickling
                pickle.dump(bins, fp)
            zero = [list(set(tokens) - set(bin_)) for bin_ in bins]

        print('Finished Initializing Second LSTM Layer')
        print('time: {:5.2f}s'.format(time.time() - epoch_start_time))
        print('-' * 89)

        final_file = data_root + final_file
        with open(final_file, 'w') as outf:
            w = 0
            i = 1
            temperature = 1.5
            bin_sequence_length = len(secret_text[:])  # 85
            print("bin sequence length", bin_sequence_length)  #32
            while i <= bin_sequence_length:
                epoch_start_time = time.time()
                output, hidden = self.lstm(input, hidden)

                zero_index = zero[secret_text[:][i - 1]]
                zero_index = torch.LongTensor(zero_index)
                word_weights = output.squeeze().data.div(
                    temperature).exp().cpu()
                word_weights.index_fill_(0, zero_index, 0)
                word_idx = torch.multinomial(word_weights, 1)[0]

                input.data.fill_(word_idx)
                word = corpus.dictionary.idx2word[word_idx]
                i += 1
                w += 1
                word = word.encode('ascii', 'ignore').decode('ascii')
                outf.write(word + ' ')
        print("Generated final steganographic text")
        print("Final text saved in following file: {}".format(
            str(data_root + final_file)))

    def leakGAN_layer(self, secret_file, final_file, bins_num):
        #Second Layer = LeakGAN layer
        print('>>> Begin Second Layer...')
        data_root = './decode/'
        torch.nn.Module.dump_patches = True
        epoch_start_time = time.time()
        # Set the random seed manually for reproducibility.
        seed = 1111
        #Step 1: load the most accurate model
        with open("leakGAN_instructor/real_data/gen_ADV_00028.pt", 'rb') as f:
            self.gen.load_state_dict(torch.load(f))
        print("Finish Loading")
        self.gen.eval()

        #Step 1: Get Intermediate text
        secret_file = data_root + secret_file
        secret_file = open(secret_file, 'r')
        secret_data = secret_file.read().split()
        #Step 2: Compress string into binary string
        bit_string = ''
        #You need LSTM Corpus for that
        emnlp_data = 'dataset/emnlp_news/'
        corpus = dataa.Corpus(emnlp_data)
        for data in secret_data:
            print("Data: {}".format(data))
            idWord = corpus.dictionary.word2idx[data]
            bit_string += '{0:{fill}13b}'.format(int(idWord), fill='0')

        secret_text = [
            int(i, 2) for i in self.string2bins(bit_string, bins_num)
        ]  #convert to bins
        corpus_leak = self.index_word_dict
        if bins_num >= 2:
            ntokens = len(corpus_leak)
            tokens = list(range(ntokens))  # * args.replication_factor
            #print(ntokens)
            random.shuffle(tokens)
            #Words in each bin
            words_in_bin = int(ntokens / bins_num)
            #leftovers should be also included in the
            leftover = int(ntokens % bins_num)
            bins = [
                tokens[i:i + words_in_bin]
                for i in range(0, ntokens - leftover, words_in_bin)
            ]  # words to keep in each bin
            for i in range(0, leftover):
                bins[i].append(tokens[i + words_in_bin * bins_num])
            #save bins into leakGAN key
            key2 = data_root + 'leakGAN_key.txt'
            with open(key2, "wb") as fp:  #Pickling
                pickle.dump(bins, fp)
            zero = [list(set(tokens) - set(bin_)) for bin_ in bins]
        print('Finished Initializing Second LeakGAN Layer')
        print('time: {:5.2f}s'.format(time.time() - epoch_start_time))
        print('-' * 89)
        out_file = data_root + final_file
        w = 0
        i = 1
        bin_sequence_length = len(secret_text[:])
        print("bin sequence length", bin_sequence_length)
        batch_size = cfg.batch_size
        seq_len = cfg.max_seq_len

        feature_array = torch.zeros(
            (batch_size, seq_len + 1, self.gen.goal_out_size))
        goal_array = torch.zeros(
            (batch_size, seq_len + 1, self.gen.goal_out_size))
        leak_out_array = torch.zeros((batch_size, seq_len + 1, cfg.vocab_size))
        samples = torch.zeros(batch_size, seq_len + 1).long()
        work_hidden = self.gen.init_hidden(batch_size)
        mana_hidden = self.gen.init_hidden(batch_size)
        leak_inp = torch.LongTensor([cfg.start_letter] * batch_size)
        real_goal = self.gen.goal_init[:batch_size, :]

        if cfg.CUDA:
            feature_array = feature_array.cuda()
            goal_array = goal_array.cuda()
            leak_out_array = leak_out_array.cuda()

        goal_array[:, 0, :] = real_goal  # g0 = goal_init
        if_sample = True
        no_log = False
        index = cfg.start_letter
        while i <= seq_len:

            dis_inp = torch.zeros(batch_size, bin_sequence_length).long()
            if i > 1:
                dis_inp[:, :i - 1] = samples[:, :i - 1]  # cut sentences
                leak_inp = samples[:, i - 2]

            if torch.cuda.is_available():
                dis_inp = dis_inp.cuda()
                leak_inp = leak_inp.cuda()
            feature = self.dis.get_feature(dis_inp).unsqueeze(0)
            #print(feature)
            feature_array[:, i - 1, :] = feature.squeeze(0)
            out, cur_goal, work_hidden, mana_hidden = self.gen(index,
                                                               leak_inp,
                                                               work_hidden,
                                                               mana_hidden,
                                                               feature,
                                                               real_goal,
                                                               no_log=no_log,
                                                               train=False)
            leak_out_array[:, i - 1, :] = out

            goal_array[:, i, :] = cur_goal.squeeze(1)
            if i > 0 and i % self.gen.step_size == 0:
                real_goal = torch.sum(goal_array[:, i - 3:i + 1, :], dim=1)
                if i / self.gen.step_size == 1:
                    real_goal += self.gen.goal_init[:batch_size, :]
            # Sample one token
            if not no_log:
                out = torch.exp(out)
            zero_index = zero[secret_text[:][
                i -
                1]]  #indecies that has to be zeroed, as they are not in the current bin
            #zero_index.append(0)
            zero_index = torch.LongTensor(zero_index)
            if cfg.CUDA:
                zero_index = zero_index.cuda()
            temperature = 1.5
            word_weights = out
            word_weights = word_weights.index_fill_(
                1, zero_index,
                0)  #make all the indecies zero if they are not in the bin
            word_weights = torch.multinomial(word_weights, 1).view(
                -1)  #choose one word with highest probability for each sample
            #print("Out after: {}".format(word_weights))
            samples[:, i] = word_weights
            leak_inp = word_weights
            i += 1
            w += 1
        leak_out_array = leak_out_array[:, :seq_len, :]
        tokens = []
        write_tokens(out_file, tensor_to_tokens(samples, self.index_word_dict))
        print("Generated final steganographic text")
        print("Final steganographic text saved in following file: {}".format(
            out_file))

    def _test_2_layers(self):
        self.LSTM_layer_1("intermediate.txt", 4096)
        if cfg.leakGAN:
            self.leakGAN_layer("intermediate.txt", "final_leakgan.txt", 4)
        else:
            self.LSTM_layer_2("intermediate.txt", "final_lstm.txt", 4)

    def _test(self):
        print('>>> Begin test...')

    def pretrain_generator(self, epochs):
        """
        Max Likelihood Pretraining for the gen

        - gen_opt: [mana_opt, work_opt]
        """
        for epoch in range(epochs):
            self.sig.update()
            if self.sig.pre_sig:
                pre_mana_loss = 0
                pre_work_loss = 0

                # =====Train=====
                for i, data in enumerate(self.oracle_data.loader):
                    inp, target = data['input'], data['target']
                    if cfg.CUDA:
                        inp, target = inp.cuda(), target.cuda()

                    mana_loss, work_loss = self.gen.pretrain_loss(
                        target, self.dis)
                    self.optimize_multi(self.gen_opt, [mana_loss, work_loss])
                    pre_mana_loss += mana_loss.data.item()
                    pre_work_loss += work_loss.data.item()
                pre_mana_loss = pre_mana_loss / len(self.oracle_data.loader)
                pre_work_loss = pre_work_loss / len(self.oracle_data.loader)

                # =====Test=====
                if epoch % cfg.pre_log_step == 0:
                    self.log.info(
                        '[MLE-GEN] epoch %d : pre_mana_loss = %.4f, pre_work_loss = %.4f, %s'
                        % (epoch, pre_mana_loss, pre_work_loss,
                           self.cal_metrics(fmt_str=True)))

                    if cfg.if_save and not cfg.if_test:
                        self._save('MLE', epoch)
            else:
                self.log.info(
                    '>>> Stop by pre signal, skip to adversarial training...')
                break

    def adv_train_generator(self, g_step, current_k=0):
        """
        The gen is trained using policy gradients, using the reward from the discriminator.
        Training is done for num_batches batches.
        """

        rollout_func = rollout.ROLLOUT(self.gen, cfg.CUDA)
        adv_mana_loss = 0
        adv_work_loss = 0
        for step in range(g_step):
            with torch.no_grad():
                gen_samples = self.gen.sample(
                    cfg.batch_size, cfg.batch_size, self.dis,
                    train=True)  # !!! train=True, the only place
                inp, target = self.gen_data.prepare(gen_samples, gpu=cfg.CUDA)

            # =====Train=====
            rewards = rollout_func.get_reward_leakgan(
                target, cfg.rollout_num, self.dis,
                current_k).cpu()  # reward with MC search
            mana_loss, work_loss = self.gen.adversarial_loss(
                target, rewards, self.dis)

            # update parameters
            self.optimize_multi(self.gen_opt, [mana_loss, work_loss])
            adv_mana_loss += mana_loss.data.item()
            adv_work_loss += work_loss.data.item()
        # =====Test=====
        self.log.info(
            '[ADV-GEN] adv_mana_loss = %.4f, adv_work_loss = %.4f, %s' %
            (adv_mana_loss / g_step, adv_work_loss / g_step,
             self.cal_metrics(fmt_str=True)))

    def train_discriminator(self, d_step, d_epoch, phrase='MLE'):
        """
        Training the discriminator on real_data_samples (positive) and generated samples from gen (negative).
        Samples are drawn d_step times, and the discriminator is trained for d_epoch d_epoch.
        """
        for step in range(d_step):
            # prepare loader for training
            pos_samples = self.oracle_data.target
            neg_samples = self.gen.sample(cfg.samples_num, cfg.batch_size,
                                          self.dis)
            self.dis_data.reset(pos_samples, neg_samples)

            for epoch in range(d_epoch):
                # =====Train=====
                d_loss, train_acc = self.train_dis_epoch(
                    self.dis, self.dis_data.loader, self.dis_criterion,
                    self.dis_opt)

            # =====Test=====
            self.log.info(
                '[%s-DIS] d_step %d: d_loss = %.4f, train_acc = %.4f,' %
                (phrase, step, d_loss, train_acc))

    def cal_metrics(self, fmt_str=False):
        self.gen_data.reset(
            self.gen.sample(cfg.samples_num, cfg.batch_size, self.dis))
        self.bleu3.test_text = tensor_to_tokens(self.gen_data.target,
                                                self.index_word_dict)
        bleu3_score = self.bleu3.get_score(ignore=False)

        with torch.no_grad():
            gen_nll = 0
            for data in self.oracle_data.loader:
                inp, target = data['input'], data['target']
                if cfg.CUDA:
                    inp, target = inp.cuda(), target.cuda()
                loss = self.gen.batchNLLLoss(target, self.dis)
                gen_nll += loss.item()
            gen_nll /= len(self.oracle_data.loader)

        if fmt_str:
            return 'BLEU-3 = %.4f, gen_NLL = %.4f,' % (bleu3_score, gen_nll)
        return bleu3_score, gen_nll

    def _save(self, phrase, epoch):
        torch.save(
            self.gen.state_dict(),
            cfg.save_model_root + 'gen_{}_{:05d}.pt'.format(phrase, epoch))
        save_sample_path = cfg.save_samples_root + 'samples_{}_{:05d}.txt'.format(
            phrase, epoch)
        samples = self.gen.sample(cfg.batch_size, cfg.batch_size, self.dis)
        write_tokens(save_sample_path,
                     tensor_to_tokens(samples, self.index_word_dict))