Exemplos de write_tokens em Python, exemplos de utils.text_process.write_tokens em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: instructor.py Projeto: theblackcat102/TextGAN-PyTorch

    def _save(self, phase, epoch):
        """Save model state dict and generator's samples"""
        if phase != 'ADV':
            torch.save(self.gen.state_dict(), cfg.save_model_root + 'gen_{}_{:05d}.pt'.format(phase, epoch))
        save_sample_path = cfg.save_samples_root + 'samples_{}_{:05d}.txt'.format(phase, epoch)

        samples = self.gen.sample(10000, cfg.batch_size)
        write_tokens(save_sample_path, tensor_to_tokens(samples, self.idx2word_dict))

Exemplo n.º 2

0

Exibir arquivo

 def _save(self, phase, epoch):
     torch.save(
         self.gen.state_dict(),
         cfg.save_model_root + 'gen_{}_{:05d}.pt'.format(phase, epoch))
     save_sample_path = cfg.save_samples_root + 'samples_{}_{:05d}.txt'.format(
         phase, epoch)
     samples = self.gen.sample(cfg.batch_size, cfg.batch_size, self.dis)
     write_tokens(save_sample_path,
                  tensor_to_tokens(samples, self.idx2word_dict))

Exemplo n.º 3

0

Exibir arquivo

 def _save(self, phase, epoch):
     """Save model state dict and generator's samples"""
     for i in range(cfg.k_label):
         if phase != 'ADV':
             torch.save(self.gen_list[i].state_dict(),
                        cfg.save_model_root + 'gen{}_{}_{:05d}.pt'.format(i, phase, epoch))
         save_sample_path = cfg.save_samples_root + 'samples_d{}_{}_{:05d}.txt'.format(i, phase, epoch)
         samples = self.gen_list[i].sample(cfg.batch_size, cfg.batch_size)
         write_tokens(save_sample_path, tensor_to_tokens(samples, self.idx2word_dict))

Exemplo n.º 4

0

Exibir arquivo

Arquivo: catgan_instructor.py Projeto: sharan21/gan-exps

 def _save(self, phase, epoch, label_i=None):
     assert type(label_i) == int
     torch.save(
         self.gen.state_dict(),
         cfg.save_model_root + 'gen_{}_{:05d}.pt'.format(phase, epoch))
     save_sample_path = cfg.save_samples_root + 'samples_c{}_{}_{:05d}.txt'.format(
         label_i, phase, epoch)
     samples = self.gen.sample(cfg.batch_size,
                               cfg.batch_size,
                               label_i=label_i)
     write_tokens(save_sample_path,
                  tensor_to_tokens(samples, self.idx2word_dict))

Exemplo n.º 5

0

Exibir arquivo

    def leakGAN_layer(self, secret_file, final_file, bins_num):
        #Second Layer = LeakGAN layer
        print('>>> Begin Second Layer...')
        data_root = './decode/'
        torch.nn.Module.dump_patches = True
        epoch_start_time = time.time()
        # Set the random seed manually for reproducibility.
        seed = 1111
        #Step 1: load the most accurate model
        with open("leakGAN_instructor/real_data/gen_ADV_00028.pt", 'rb') as f:
            self.gen.load_state_dict(torch.load(f))
        print("Finish Loading")
        self.gen.eval()

        #Step 1: Get Intermediate text
        secret_file = data_root + secret_file
        secret_file = open(secret_file, 'r')
        secret_data = secret_file.read().split()
        #Step 2: Compress string into binary string
        bit_string = ''
        #You need LSTM Corpus for that
        emnlp_data = 'dataset/emnlp_news/'
        corpus = dataa.Corpus(emnlp_data)
        for data in secret_data:
            print("Data: {}".format(data))
            idWord = corpus.dictionary.word2idx[data]
            bit_string += '{0:{fill}13b}'.format(int(idWord), fill='0')

        secret_text = [
            int(i, 2) for i in self.string2bins(bit_string, bins_num)
        ]  #convert to bins
        corpus_leak = self.index_word_dict
        if bins_num >= 2:
            ntokens = len(corpus_leak)
            tokens = list(range(ntokens))  # * args.replication_factor
            #print(ntokens)
            random.shuffle(tokens)
            #Words in each bin
            words_in_bin = int(ntokens / bins_num)
            #leftovers should be also included in the
            leftover = int(ntokens % bins_num)
            bins = [
                tokens[i:i + words_in_bin]
                for i in range(0, ntokens - leftover, words_in_bin)
            ]  # words to keep in each bin
            for i in range(0, leftover):
                bins[i].append(tokens[i + words_in_bin * bins_num])
            #save bins into leakGAN key
            key2 = data_root + 'leakGAN_key.txt'
            with open(key2, "wb") as fp:  #Pickling
                pickle.dump(bins, fp)
            zero = [list(set(tokens) - set(bin_)) for bin_ in bins]
        print('Finished Initializing Second LeakGAN Layer')
        print('time: {:5.2f}s'.format(time.time() - epoch_start_time))
        print('-' * 89)
        out_file = data_root + final_file
        w = 0
        i = 1
        bin_sequence_length = len(secret_text[:])
        print("bin sequence length", bin_sequence_length)
        batch_size = cfg.batch_size
        seq_len = cfg.max_seq_len

        feature_array = torch.zeros(
            (batch_size, seq_len + 1, self.gen.goal_out_size))
        goal_array = torch.zeros(
            (batch_size, seq_len + 1, self.gen.goal_out_size))
        leak_out_array = torch.zeros((batch_size, seq_len + 1, cfg.vocab_size))
        samples = torch.zeros(batch_size, seq_len + 1).long()
        work_hidden = self.gen.init_hidden(batch_size)
        mana_hidden = self.gen.init_hidden(batch_size)
        leak_inp = torch.LongTensor([cfg.start_letter] * batch_size)
        real_goal = self.gen.goal_init[:batch_size, :]

        if cfg.CUDA:
            feature_array = feature_array.cuda()
            goal_array = goal_array.cuda()
            leak_out_array = leak_out_array.cuda()

        goal_array[:, 0, :] = real_goal  # g0 = goal_init
        if_sample = True
        no_log = False
        index = cfg.start_letter
        while i <= seq_len:

            dis_inp = torch.zeros(batch_size, bin_sequence_length).long()
            if i > 1:
                dis_inp[:, :i - 1] = samples[:, :i - 1]  # cut sentences
                leak_inp = samples[:, i - 2]

            if torch.cuda.is_available():
                dis_inp = dis_inp.cuda()
                leak_inp = leak_inp.cuda()
            feature = self.dis.get_feature(dis_inp).unsqueeze(0)
            #print(feature)
            feature_array[:, i - 1, :] = feature.squeeze(0)
            out, cur_goal, work_hidden, mana_hidden = self.gen(index,
                                                               leak_inp,
                                                               work_hidden,
                                                               mana_hidden,
                                                               feature,
                                                               real_goal,
                                                               no_log=no_log,
                                                               train=False)
            leak_out_array[:, i - 1, :] = out

            goal_array[:, i, :] = cur_goal.squeeze(1)
            if i > 0 and i % self.gen.step_size == 0:
                real_goal = torch.sum(goal_array[:, i - 3:i + 1, :], dim=1)
                if i / self.gen.step_size == 1:
                    real_goal += self.gen.goal_init[:batch_size, :]
            # Sample one token
            if not no_log:
                out = torch.exp(out)
            zero_index = zero[secret_text[:][
                i -
                1]]  #indecies that has to be zeroed, as they are not in the current bin
            #zero_index.append(0)
            zero_index = torch.LongTensor(zero_index)
            if cfg.CUDA:
                zero_index = zero_index.cuda()
            temperature = 1.5
            word_weights = out
            word_weights = word_weights.index_fill_(
                1, zero_index,
                0)  #make all the indecies zero if they are not in the bin
            word_weights = torch.multinomial(word_weights, 1).view(
                -1)  #choose one word with highest probability for each sample
            #print("Out after: {}".format(word_weights))
            samples[:, i] = word_weights
            leak_inp = word_weights
            i += 1
            w += 1
        leak_out_array = leak_out_array[:, :seq_len, :]
        tokens = []
        write_tokens(out_file, tensor_to_tokens(samples, self.index_word_dict))
        print("Generated final steganographic text")
        print("Final steganographic text saved in following file: {}".format(
            out_file))