示例#1
0
def batch_processing(batch_data, args):

    encode_seq_ids = []
    encode_seq_len = []
    decode_seq_ids = []
    decode_seq_len = []
    decoder_labels = []

    for data in batch_data:
        encode_seq_ids.append(data['encode_seq_ids'])
        encode_seq_len.append(len(data['encode_seq_ids']))
        decode_seq_ids.append(data['decode_seq_ids'])
        decode_seq_len.append(len(data['decode_seq_ids']))
        decoder_labels.append(data['decoder_labels'])

    encode_seq_ids = utils.pad_sequence(encode_seq_ids, padder=config.PAD_WORD)
    encode_seq_len = np.array(encode_seq_len).astype(np.float32)
    decode_seq_ids = utils.pad_sequence(decode_seq_ids, padder=config.PAD_WORD)
    decode_seq_len = np.array(decode_seq_len).astype(np.float32)
    decoder_labels = utils.pad_sequence(decoder_labels, padder=config.PAD_WORD)

    # sorted_idx = np.argsort(word_lengths)[::-1]

    return [encode_seq_ids, encode_seq_len, decode_seq_ids,
            decode_seq_len], decoder_labels
示例#2
0
    def __next__(self):
        if self.data_num - self.idx < self.batch_size:
            raise StopIteration
        index = self.indices[self.idx : self.idx + self.batch_size]
        
        input_seqs = []
        target_seqs = []

        # Choose pairs
        for i in index:
            pair = self.pairs[i]
            input_seqs.append(indexesFromSentence(self.lang, pair[0], self.vocab_size))
            target_seqs.append(indexesFromSentence(self.lang, pair[1], self.vocab_size))

        # Zip into pairs, sort by length (descending), unzip
        seq_pairs = sorted(zip(input_seqs, target_seqs), key=lambda p: len(p[0]), reverse=True)
        input_seqs, target_seqs = zip(*seq_pairs)

        # For input and target sequences, get array of lengths and pad with 0s to max length
        input_lengths = [len(s) for s in input_seqs]
        input_padded = [pad_sequence(s, max(input_lengths)) for s in input_seqs]
        target_lengths = [len(s) for s in target_seqs]
        target_padded = [pad_sequence(s, max(target_lengths)) for s in target_seqs]

        # Turn padded arrays into (batch x seq) tensors, transpose into (seq x batch)
        input_tensor = torch.LongTensor(input_padded).transpose(0, 1)
        target_tensor = torch.LongTensor(target_padded).transpose(0, 1)

        if self.use_cuda:
            input_tensor = input_tensor.cuda()
            target_tensor = target_tensor.cuda()

        self.idx += self.batch_size

        return input_tensor, input_lengths, target_tensor, target_lengths
示例#3
0
    def infer(
        self,
        reps_context,
        context_sizes,
        num_steps=None,
    ):
        # init
        num_episodes = len(reps_context)

        # init states
        states_p = self.rnn_p.init_state(num_episodes,
                                         [self.z_height, self.z_width])
        hiddens_p = [state_p[0] for state_p in states_p]
        latents = []
        init_input_p = False
        for i in range(num_steps if num_steps is not None else self.num_steps):
            if not init_input_p:
                reps_context = pad_sequence(reps_context, context_sizes)
                reps_context = torch.sum(reps_context, dim=1)
                reps_context = reps_context.view(-1, self.nc_context,
                                                 self.z_height, self.z_width)
                input_p = reps_context
                init_input_p = True

            # forward prior
            zs, means_p, logvars_p, hiddens_p, states_p = self.rnn_p(
                input_p, states_p)

            # append z to latent
            latents += [torch.cat(zs, dim=1).unsqueeze(1)
                        ] if len(zs) > 1 else [zs[0].unsqueeze(1)]

        return latents
示例#4
0
def query_model(sess, input_node, predictions, vocab, rev_vocab, max_seq_len,
                output_embs_for_all_vocab):
    with tf.gfile.GFile("data/definitions/concept_descriptions.tok",
                        mode="r") as data_file:
        with tf.gfile.GFile("data/output/concept_BOW.txt",
                            mode="w") as output_file:
            for line in data_file:
                top = 10
                token_ids = utils.sentence_to_token_ids(line, vocab)
                padded_ids = np.asarray(
                    utils.pad_sequence(token_ids[1:], max_seq_len))

                input_data = np.asarray([padded_ids])
                model_preds = sess.run(predictions,
                                       feed_dict={input_node: input_data})
                sims = 1 - np.squeeze(
                    dist.cdist(model_preds,
                               output_embs_for_all_vocab,
                               metric="cosine"))
                sims = np.nan_to_num(sims)
                candidate_ids = sims.argsort()[::-1][:top]
                candidates = [rev_vocab[idx] for idx in candidate_ids]
                for ii, cand in enumerate(candidates):
                    output_file.write(cand + " ")
                    print(cand + " ")
                output_file.write("\n")
                output_file.flush()
                print("\n")
示例#5
0
def query_model(sess, input_node, predictions, vocab, rev_vocab, max_seq_len,
                output_embs_for_all_vocab):
    while True:
        sys.stdout.write("Type a definition: ")
        sys.stdout.flush()
        sentence = sys.stdin.readline()
        sys.stdout.write("Number of candidates: ")
        sys.stdout.flush()
        top = int(sys.stdin.readline())
        token_ids = utils.sentence_to_token_ids(sentence, vocab)
        padded_ids = np.asarray(utils.pad_sequence(token_ids, max_seq_len))

        input_data = np.asarray([padded_ids])
        model_preds = sess.run(predictions, feed_dict={input_node: input_data})
        sims = 1 - np.squeeze(
            dist.cdist(model_preds, output_embs_for_all_vocab,
                       metric="cosine"))
        sims = np.nan_to_num(sims)
        candidate_ids = sims.argsort()[::-1][:top]
        candidates = [rev_vocab[idx] for idx in candidate_ids]

        print("\n Top %s candidates from the RNN model:" % top)
        for ii, cand in enumerate(candidates):
            print("%s: %s" % (ii + 1, cand))

        sys.stdout.flush()
        sentence = sys.stdin.readline()
示例#6
0
 def read_file(self, vocab_file, data_file, max_seq_len):
     with open(vocab_file, 'rb') as f:
         lang = pkl.load(f)
     df = pd.read_csv(data_file, delimiter='\t')
     lis = []
     for line in df['text']:
         l = [lang.word2index(s) for s in line.split(' ')]
         l = pad_sequence(l, max_seq_len)
         lis.append(l)
     return lis
示例#7
0
def batch_process_relation(batch_data, args, Train=None):
    t1_batch = []
    t2_batch = []
    label_batch = []
    t1_contexts = []
    t2_contexts = []
    aux_label_batch = []

    for data in batch_data:
        t1_batch.append(data['t1_id'])
        t2_batch.append(data['t2_id'])
        label_batch.append(data['label'])

        # sample neighbors
        t1_ctx = context_retriever(data['t1'], args.cos_neighbors,
                                   args.num_contexts, args.node_to_id)
        t1_contexts.append(t1_ctx)

        t2_ctx = context_retriever(data['t2'], args.cos_neighbors,
                                   args.num_contexts, args.node_to_id)
        t2_contexts.append(t2_ctx)

    t1_batch = np.array(t1_batch)
    t2_batch = np.array(t2_batch)
    t1_contexts = utils.pad_sequence(t1_contexts, padder=0)
    t2_contexts = utils.pad_sequence(t2_contexts, padder=0)
    label_batch = np.array(label_batch)

    if args.use_context and args.use_aux_loss:
        for i in range(len(batch_data)):
            cur_aux_label = []
            for t1 in t1_contexts[i]:
                for t2 in t2_contexts[i]:
                    if (int(t1), int(t2)) in args.link_set:
                        cur_aux_label.append(1)
                    else:
                        cur_aux_label.append(0)
            aux_label_batch.append(cur_aux_label)
        aux_label_batch = np.array(aux_label_batch)

    return [t1_batch, t1_contexts, t2_batch,
            t2_contexts], label_batch, aux_label_batch
示例#8
0
    def featurize(self, batch):
        feat = defaultdict(list)
        cls_token = self.bert_tokenizer.cls_token
        sep_token = self.bert_tokenizer.sep_token
        for ex in batch:
            if self.training:
                feat['query_pointer'].append(torch.tensor(ex['pointer_query']))

            feat['utterance'].append(
                torch.tensor(
                    self.bert_tokenizer.convert_tokens_to_ids(
                        [cls_token] + ex['g_question_toks'] + [sep_token])))
            tables = []
            tables_mask = []
            starts, ends = [], []
            for t in ex['query_context']:
                tens = torch.tensor(
                    self.bert_tokenizer.convert_tokens_to_ids(t['toks']))
                tables.append(tens)
                tables_mask.append(torch.ones_like(tens))
                starts.append([c['start'] for c in t['columns']])
                ends.append([c['end'] for c in t['columns']])
            feat['tables'].append(
                utils.pad_sequence(tables, self.bert_tokenizer.pad_token_id,
                                   self.device))
            feat['tables_mask'].append(
                utils.pad_sequence(tables_mask, 0, self.device).float())
            feat['starts'].append(starts)
            feat['ends'].append(ends)

        feat['query_pointer'] = utils.pad_sequence(
            feat['query_pointer'], self.pad_id,
            self.device) if self.training else None

        feat['utterance_mask'] = utils.pad_sequence(
            [torch.ones(len(t)) for t in feat['utterance']], 0, self.device)
        feat['utterance'] = utils.pad_sequence(feat['utterance'], self.pad_id,
                                               self.device)

        feat['batch'] = batch
        return feat
示例#9
0
    def __init__(self, dataset, batch_sampler):

        self.batches = []
        for batch_sample_id in batch_sampler:
            batch = []
            raw_batch = self._collate_fn(
                [dataset[sample_id] for sample_id in batch_sample_id])
            for data in raw_batch:
                if isinstance(data[0], np.ndarray):
                    data = pad_sequence(data)
                batch.append(data)
            self.batches.append(batch)
示例#10
0
def batch_process_term(batch_data, args):
    y = []
    word_ids = []
    word_len = []
    ngram_ids = []
    ngram_length = []

    for sample in batch_data:
        y.append(sample['y'])
        word_ids.append(sample['word_ids'])
        word_len.append(sample['word_len'])
        ngram_ids.append(sample['ngram_ids'])
        ngram_length.append(len(sample['ngram_ids']))

    y = np.array(y)
    word_ids = utils.pad_sequence(word_ids).astype(int)
    word_lengths = np.array(word_len).astype(np.float32)
    ngram_ids = utils.pad_sequence(ngram_ids).astype(int)
    ngram_length = np.array(ngram_length).astype(np.float32)

    return [word_ids, word_lengths, ngram_ids, ngram_length], y
示例#11
0
def batch_process_ns(batch_data, args):
    word_ids = []
    word_len = []
    ngram_ids = []
    ngram_length = []
    contexts = []

    for sample in batch_data:
        word_ids.append(sample['word_ids'])
        word_len.append(sample['word_len'])
        ngram_ids.append(sample['ngram_ids'])
        ngram_length.append(len(sample['ngram_ids']))
        contexts.append(sample['context'])

    word_ids = utils.pad_sequence(word_ids).astype(int)
    word_lengths = np.array(word_len).astype(np.float32)
    ngram_ids = utils.pad_sequence(ngram_ids).astype(int)
    ngram_length = np.array(ngram_length).astype(np.float32)
    contexts = np.array(contexts)

    return [word_ids, word_lengths, ngram_ids, ngram_length, contexts]
示例#12
0
文件: sql2nl.py 项目: vzhong/gazp
 def featurize(self, batch):
     feat = defaultdict(list)
     cls_token = self.bert_tokenizer.cls_token
     sep_token = self.bert_tokenizer.sep_token
     for ex in batch:
         tens = torch.tensor(
             self.bert_tokenizer.convert_tokens_to_ids(
                 ex['question_context']))
         feat['context'].append(tens)
         feat['context_mask'].append(torch.ones_like(tens))
         if self.training:
             feat['utt_pointer'].append(torch.tensor(
                 ex['pointer_question']))
     feat['context'] = utils.pad_sequence(feat['context'], self.pad_id,
                                          self.device)
     feat['context_mask'] = utils.pad_sequence(feat['context_mask'], 0,
                                               self.device).float()
     feat['utt_pointer'] = utils.pad_sequence(
         feat['utt_pointer'], self.pad_id,
         self.device) if self.training else None
     feat['batch'] = batch
     return feat
示例#13
0
 def read_file(self, vocab_file, data_file, max_seq_len, num_sample):
     with open(vocab_file, 'rb') as f:
         lang = pkl.load(f)
     df = pd.read_csv(data_file, delimiter='\t')
     if num_sample is None:
         df = df.sample(frac=1).reset_index(drop=True)
     else:
         df = df.sample(n=num_sample, replace=True)
     lis = []
     for line in df['text']:
         l = [lang.word2index(s) for s in line.split(' ')]
         l = pad_sequence(l, max_seq_len) # pad sequence for CNN
         lis.append(l)
     return lis
示例#14
0
    def index_sentences(self, sentences):
        """Index a batch of sentences and pack into a torch tensor.

    Args:
      sentences: (list) of one-line/string sentences.

    Returns:
      (torch.Tensor) [batch_size, max_length].
    """
        return torch.from_numpy(
            np.array([
                utils.pad_sequence(self.indexer.get_ids(sentence),
                                   constants.PAD, self.max_length)
                for sentence in sentences
            ]))
    def __call__(self, batch):
        if self._targets is not None:
            sequences, targets = list(zip(*batch))
        else:
            sequences = list(batch)

        input_ids, attention_mask = pad_sequence(
            sequences,
            max_seq_length=self._max_length,
            pad_token_id=self._pad_token_id)

        if self._targets is not None:
            output = input_ids, attention_mask, torch.tensor(targets)
        else:
            output = input_ids, attention_mask

        return output
示例#16
0
    def get_batch_data(self, indices, unit, neg_num):
        # txt
        if unit == 'phn':
            batch_txt = [self.txt_feat[index] for index in indices] \
                + [self.txt_feat[index] for index in indices[:len(indices)//2]]
            batch_txt_labels = [self.phn_idx_arrays[index] for index in indices] \
                + [self.phn_idx_arrays[index] for index in indices[:len(indices)//2]]
        elif unit == 'char':
            batch_txt = [self.txt_feat_char[index] for index in indices] \
                + [self.txt_feat_char[index] for index in indices[:len(indices)//2]]
            batch_txt_labels = [self.char_idx_arrays[index] for index in indices] \
                + [self.char_idx_arrays[index] for index in indices[:len(indices)//2]]
        else:
            raise
        batch_txt_length = torch.tensor([len(wrd) for wrd in batch_txt],
                                        device=device)
        batch_txt_order = np.array(
            sorted(range(len(batch_txt)),
                   key=lambda k: len(batch_txt[k]),
                   reverse=True))
        batch_txt = np.array(batch_txt)[batch_txt_order]
        batch_txt_labels = np.array(batch_txt_labels)[batch_txt_order]
        batch_txt_length = batch_txt_length[batch_txt_order]
        batch_txt = pad_sequence(batch_txt).to(device)
        batch_txt_labels = pad_sequence(batch_txt_labels).to(device)

        # target
        batch_data = [self.feat[index] for index in indices]

        # randomly select pos & neg
        pos_neg_indices = indices[:len(indices) // 2]

        # for idx in pos_neg_indices:
        # spk = self.wrd_idx2spk[idx]

        # # feat_pos
        # idx_pos = random.choice(self.spk2wrd_idx[spk])
        # batch_data.append(self.feat[idx_pos])

        # for idx in pos_neg_indices:
        # spk = self.wrd_idx2spk[idx]

        # # feat_neg
        # self.spks.remove(spk)
        # rand_spk = random.choice(self.spks)
        # self.spks.append(spk)
        # idx_neg = random.choice(self.spk2wrd_idx[rand_spk])
        # batch_data.append(self.feat[idx_neg])

        # neg paired
        for i in range(neg_num):
            for idx in pos_neg_indices:
                wrd = self.wrds[idx]
                neg_paired_index = idx
                neg_paired_wrd = wrd
                while neg_paired_wrd == wrd:
                    neg_paired_index = random.randint(0, len(self.wrds) - 1)
                    neg_paired_wrd = self.wrds[neg_paired_index]
                batch_data.append(self.feat[neg_paired_index])

        batch_length = torch.tensor([len(wrd) for wrd in batch_data],
                                    device=device)
        batch_order = np.array(
            sorted(range(len(batch_data)),
                   key=lambda k: len(batch_data[k]),
                   reverse=True))
        batch_data = np.array(batch_data)[batch_order]
        batch_length = batch_length[batch_order]
        batch_data = pad_sequence(batch_data).to(device)

        # invert indices for tracing
        batch_invert = np.zeros_like(batch_order)
        for i, j in enumerate(batch_order):
            batch_invert[j] = i
        batch_invert = torch.tensor(batch_invert, device=device)

        batch_txt_invert = np.zeros_like(batch_txt_order)
        for i, j in enumerate(batch_txt_order):
            batch_txt_invert[j] = i
        batch_txt_invert = torch.tensor(batch_txt_invert, device=device)

        # batch_data: target,( paired,) pos, neg
        # batch_txt: target,( paired)
        return batch_data, batch_length, batch_invert, \
               batch_txt, batch_txt_length, batch_txt_invert, batch_txt_labels
示例#17
0
    def get_train_test_data(self, run_id=1, protocol_type="xs"):

        # initialize train and test
        X_train = []
        Y_train = []

        X_test = []
        Y_test = []

        # sets
        train_sets = self.train_test_sets[protocol_type][run_id][0]
        test_sets = self.train_test_sets[protocol_type][run_id][1]


        # iterate through sequences
        for _, seq in enumerate(self.sequences):

            # poses & labels
            poses = np.array([f.pose.flatten() for f in seq.frames])

            if self.train_subclasses:
                targets = np.array([f.min_cls for f in seq.frames])
            else:
                targets = np.array([f.maj_cls for f in seq.frames])

            targets_one_hot = utils.one_hot_encoding(targets, nb_classes=self.get_nb_classes())

            # subsampling
            poses = utils.subsampling(poses, sampling_factor=self.sampling_factor)
            targets_one_hot = utils.subsampling(targets_one_hot, sampling_factor=self.sampling_factor)

            # assign to set
            if protocol_type == "xs":

                if seq.subject in train_sets:

                    X_train.append(poses)
                    Y_train.append(targets_one_hot)

                elif seq.subject in test_sets:

                    X_test.append(poses)
                    Y_test.append(targets_one_hot)

                else:

                    print("Sequence is not contained in TRAIN neither in TEST...")

            elif protocol_type == "xv":

                if seq.viewpoint in train_sets:

                    X_train.append(poses)
                    Y_train.append(targets_one_hot)

                elif seq.viewpoint in test_sets:

                    X_test.append(poses)
                    Y_test.append(targets_one_hot)

                else:

                    print("Sequence is not contained in TRAIN neither in TEST...")

        # maximal sequence length
        max_seq_len_train = max([len(s) for s in X_train])
        max_seq_len_test = max([len(s) for s in X_test])
        max_seq_len = max([max_seq_len_train, max_seq_len_test])

        # zero padding
        X_train = np.array([utils.pad_sequence(s, max_seq_len) for s in X_train])
        Y_train = np.array([utils.pad_sequence(s, max_seq_len) for s in Y_train])
        X_test = np.array([utils.pad_sequence(s, max_seq_len) for s in X_test])
        Y_test = np.array([utils.pad_sequence(s, max_seq_len) for s in Y_test])

        return X_train, Y_train, X_test, Y_test
示例#18
0
    def forward(
            self,
            reps_context,
            context_sizes,
            reps_target,
            target_sizes,
            input_tuples,
            #img_target=None, img_queries=None, img_batch_sizes=[], img_target_indices=[],
            #hpt_target=None, hpt_queries=None, hpt_batch_sizes=[], hpt_target_indices=[],
            num_steps=None,
            beta=1.0,
            std=1.0,
            is_grayscale=False,
            do_sum=True):
        # init
        num_episodes = len(reps_context)
        #assert len(set([index for _, _, mod_target_indices, _ in input_tuples for index in mod_target_indices])) == num_episodes
        loss_kl = 0
        ''' forward posterior / prior '''
        # init states
        states_p = self.rnn_p.init_state(num_episodes,
                                         [self.z_height, self.z_width])
        states_q = self.rnn_q.init_state(num_episodes,
                                         [self.z_height, self.z_width])
        hiddens_p = [state_p[0] for state_p in states_p]
        hiddens_q = [state_q[0] for state_q in states_q]
        latents = []
        init_input_q = False
        init_input_p = False
        for i in range(num_steps if num_steps is not None else self.num_steps):
            # aggregate observations (posterior)
            if not init_input_q:
                reps_context = pad_sequence(reps_context, context_sizes)
                reps_context = torch.sum(reps_context, dim=1)
                reps_context = reps_context.view(-1, self.nc_context,
                                                 self.z_height, self.z_width)

                reps_target = pad_sequence(reps_target, target_sizes)
                reps_target = torch.sum(reps_target, dim=1)
                reps_target = reps_target.view(-1, self.nc_context,
                                               self.z_height, self.z_width)

                input_q = torch.cat([reps_target, reps_context], dim=1)
                init_input_q = True

            # forward posterior
            means_q, logvars_q, hiddens_q, states_q = self.rnn_q(
                input_q, states_q, hiddens_p)

            # sample z from posterior
            zs = self.rnn_q.sample(means_q, logvars_q)

            # aggregate observations (prior)
            if not init_input_p:
                input_p = reps_context
                init_input_p = True

            # forward prior
            _, means_p, logvars_p, hiddens_p, states_p = self.rnn_p(
                input_p, states_p, latents_q=zs)

            # append z to latent
            latents += [torch.cat(zs, dim=1).unsqueeze(1)
                        ] if len(zs) > 1 else [zs[0].unsqueeze(1)]

            # update accumulated KL
            for j in range(self.num_layers):
                loss_kl += loss_kld_gaussian_vs_gaussian(means_q[j],
                                                         logvars_q[j],
                                                         means_p[j],
                                                         logvars_p[j],
                                                         do_sum=do_sum)
        ''' likelihood '''
        info = {}
        info['mod_likelihoods'] = []
        loss_likelihood = 0 if do_sum else loss_kl.new_zeros(loss_kl.size())
        mean_recons = []
        for idx, (dim, input_tuple) in enumerate(zip(self.dims, input_tuples)):
            channels, height, width, _, mtype = dim
            mod_target, mod_queries, mod_target_indices, mod_batch_sizes = input_tuple
            if len(mod_queries) > 0:  # is not None:
                num_mod_data = len(mod_target)
                assert sum(mod_batch_sizes) == num_mod_data

                # run renderer (likelihood)
                mod_mean_recon = self._forward_renderer(
                    idx, mod_queries, latents, num_episodes, mod_batch_sizes,
                    mod_target_indices)

                # convert to gray scale
                if mtype == 'image' and is_grayscale:
                    mod_mean_recon = rgb2gray(mod_mean_recon)
                    mod_target = rgb2gray(mod_target)

                # estimate recon loss
                loss_mod_likelihood = loss_recon_gaussian_w_fixed_var(
                    mod_mean_recon,
                    mod_target,
                    std=std,
                    add_logvar=False,
                    do_sum=do_sum)

                # estimate recon loss without std
                loss_mod_likelihood_nostd = loss_recon_gaussian_w_fixed_var(
                    mod_mean_recon.detach(), mod_target, do_sum=do_sum)
            else:
                mod_mean_recon = reps_context.new_zeros(
                    0, channels, height, width)
                loss_mod_likelihood = None
                loss_mod_likelihood_nostd = None

            # append to list
            mean_recons += [mod_mean_recon]
            info['mod_likelihoods'] += [loss_mod_likelihood_nostd]

            # add to loss_likelihood
            if loss_mod_likelihood is not None:
                # sum to each episode
                if not do_sum:
                    _mod_batch_sizes = [
                        0
                    ] + np.cumsum(mod_batch_sizes).tolist()
                    for i, t_idx in enumerate(mod_target_indices):
                        loss_likelihood[t_idx] += torch.sum(
                            loss_mod_likelihood[
                                _mod_batch_sizes[i]:_mod_batch_sizes[i + 1]])
                else:
                    loss_likelihood += loss_mod_likelihood
        ''' loss '''
        # sum loss
        loss = loss_likelihood + beta * loss_kl

        # additional loss info
        info['likelihood'] = loss_likelihood.detach()
        info['kl'] = loss_kl.detach()

        # return
        #return img_mean_recon, hpt_mean_recon, None, loss, info
        return mean_recons, latents, loss, info
示例#19
0
    def generate(
            self,
            reps_context,
            context_sizes,
            input_tuples,
            #img_queries, img_batch_sizes,
            #hpt_queries, hpt_batch_sizes,
            num_steps=None,
            is_grayscale=False):
        # init
        num_episodes = len(reps_context)

        # init states
        states_p = self.rnn_p.init_state(num_episodes,
                                         [self.z_height, self.z_width])
        hiddens_p = [state_p[0] for state_p in states_p]
        latents = []
        init_input_p = False
        for i in range(num_steps if num_steps is not None else self.num_steps):
            if not init_input_p:
                reps_context = pad_sequence(reps_context, context_sizes)
                reps_context = torch.sum(reps_context, dim=1)
                reps_context = reps_context.view(-1, self.nc_context,
                                                 self.z_height, self.z_width)
                input_p = reps_context
                init_input_p = True

            # forward prior
            zs, means_p, logvars_p, hiddens_p, states_p = self.rnn_p(
                input_p, states_p)

            # append z to latent
            latents += [torch.cat(zs, dim=1).unsqueeze(1)
                        ] if len(zs) > 1 else [zs[0].unsqueeze(1)]
        ''' forward renderers '''
        mean_recons = []
        for idx, (dim, input_tuple) in enumerate(zip(self.dims, input_tuples)):
            channels, height, width, _, mtype = dim
            mod_queries, mod_batch_sizes = input_tuple

            # forward image renderer
            if len(mod_queries) > 0:
                # forward image renderer
                mod_mean_recon = self._forward_renderer(
                    idx, mod_queries, latents, num_episodes, mod_batch_sizes)

                # convert to gray scale
                if mtype == 'image' and is_grayscale:
                    mod_mean_recon = rgb2gray(mod_mean_recon)
            else:
                mod_mean_recon = None  #reps_context.new_zeros(1, channels, height, width)

            # append to list
            mean_recons += [mod_mean_recon]

        ## temporary
        #img_mean_recon, hpt_mean_recon = mean_recons[0], mean_recons[1]

        # return
        #return img_mean_recon, hpt_mean_recon, None
        return mean_recons, None
示例#20
0
    def fit(self, x, y, epochs=5, generator_sample=32, discriminator_sample=8):
        gen_loss_tracker = keras.metrics.Mean(name="loss")
        gen_metric = keras.metrics.CategoricalAccuracy(name="categorical_acc")
        disc_loss_tracker = keras.metrics.Mean(name="loss")
        disc_metric = keras.metrics.CategoricalAccuracy(name="categorical_acc")

        x_encoder_sequences = pad_sequence([make_vectors(x[i], self.word2id) for i in range(len(x))], max_length=MAX_LENGTH)
        x_decoder_sequences = pad_sequence([make_vectors(y[i], self.word2id, is_target=True) for i in range(len(y))],
                                           max_length=MAX_LENGTH)
        y_target_main = pad_sequence([make_vectors(y[i], self.word2id) for i in range(len(x))], max_length=MAX_LENGTH)


        for epoch in range(epochs):
            # generator
            x_enc_sample, x_dec_sample, y_target_sample = sample_generator_data(x_encoder_sequences,
                                                                                x_decoder_sequences, y_target_main,
                                                                                len(self.word2id),
                                                                                sample_size=generator_sample)

            x_sample_1, x_sample_2, y_target = sample_discriminator_data(x_encoder_sequences, x_decoder_sequences,
                                                                         y_target_main, len(self.word2id), self.generator,
                                                                         sample_size=discriminator_sample)

            gen_indices = [ind for ind in range(discriminator_sample) if y_target[ind][0] == 1]
            x_sample_1_gen = np.array([x_sample_1[ind] for ind in gen_indices]).reshape((len(gen_indices), -1))
            x_sample_2_gen = np.array([x_sample_2[ind] for ind in gen_indices]).reshape((len(gen_indices), -1))
            y_target_gen = np.array([y_target[ind] for ind in gen_indices]).reshape((len(gen_indices), -1))

            # only generator - simple language model training
            with tf.GradientTape(persistent=True) as tape_gen:
                y_pred_gen = self.generator.model([x_enc_sample, x_dec_sample], training=True)  # Forward pass
                gen_loss = tf.reduce_sum(keras.losses.categorical_crossentropy(y_target_sample, y_pred_gen))

            gen_trainable_vars = self.generator.model.trainable_variables
            gen_gradients = tape_gen.gradient(gen_loss, gen_trainable_vars)
            # Update weights
            self.generator.model.optimizer.apply_gradients(zip(gen_gradients, gen_trainable_vars))

            # both generator and discriminator (GAN-style)
            with tf.GradientTape(persistent=True) as tape:
                y_pred_gen = self.generator.model([x_enc_sample, x_dec_sample], training=True)  # Forward pass
                y_pred_disc = self.discriminator.model([x_sample_1, x_sample_2], training=True) # Forward pass
                x_input = np.zeros((generator_sample, MAX_LENGTH, len(self.word2id)))
                for sample in range(generator_sample):
                    for seq_num in range(MAX_LENGTH):
                        x_input[sample][seq_num][x_enc_sample[sample][seq_num]] = 1
                target = to_categorical([0 for i in range(generator_sample)], num_classes=2)   # 0 if from generator 1 if real

                y_pred_disc_gen = self.discriminator.model([x_input, y_pred_gen], training=True)

                # Compute our own loss
                disc_loss = keras.losses.categorical_crossentropy(y_target, y_pred_disc)
                gen_loss = keras.losses.categorical_crossentropy(target, y_pred_disc_gen)

            # Compute gradients
            gen_trainable_vars = self.generator.model.trainable_variables
            gen_gradients = tape.gradient(gen_loss, gen_trainable_vars)
            disc_trainable_vars = self.discriminator.model.trainable_variables
            disc_gradients = tape.gradient(disc_loss, disc_trainable_vars)

            # Update weights
            self.generator.model.optimizer.apply_gradients(zip(gen_gradients, gen_trainable_vars))
            self.discriminator.model.optimizer.apply_gradients(zip(disc_gradients, disc_trainable_vars))

            # Compute our own metrics
            gen_loss_tracker.update_state(gen_loss)
            gen_metric.update_state(y_target_sample, y_pred_gen)
            disc_loss_tracker.update_state(disc_loss)
            disc_metric.update_state(y_target, y_pred_disc)

            print_metrics = {"gen_loss": gen_loss_tracker.result().numpy(),
                             "gen_metric": gen_metric.result().numpy(),
                             "disc_loss": disc_loss_tracker.result().numpy(),
                             "disc_metric": disc_metric.result().numpy()}
            print(f"Epoch - {epoch} \n Metrics - {print_metrics}")
示例#21
0
def batchify_fn(batch):
    raw_batch = [raw for raw in zip(*batch)]
    batch = [pad_sequence(data) for data in raw_batch]
    return batch
示例#22
0
    def predict(self,
                reps_context,
                context_sizes,
                reps_target,
                target_sizes,
                input_tuples,
                num_steps=None,
                beta=1.0,
                std=1.0,
                is_grayscale=False,
                use_uint8=True):
        # init
        num_episodes = len(reps_context)
        logprob_kl = 0
        loss_kl = 0
        ''' forward posterior / prior '''
        # init states
        states_p = self.rnn_p.init_state(num_episodes,
                                         [self.z_height, self.z_width])
        states_q = self.rnn_q.init_state(num_episodes,
                                         [self.z_height, self.z_width])
        hiddens_p = [state_p[0] for state_p in states_p]
        hiddens_q = [state_q[0] for state_q in states_q]
        latents = []
        init_input_q = False
        init_input_p = False
        for i in range(num_steps if num_steps is not None else self.num_steps):
            # aggregate observations (posterior)
            if not init_input_q:
                reps_context = pad_sequence(reps_context, context_sizes)
                reps_context = torch.sum(reps_context, dim=1)
                reps_context = reps_context.view(-1, self.nc_context,
                                                 self.z_height, self.z_width)

                reps_target = pad_sequence(reps_target, target_sizes)
                reps_target = torch.sum(reps_target, dim=1)
                reps_target = reps_target.view(-1, self.nc_context,
                                               self.z_height, self.z_width)

                input_q = torch.cat([reps_target, reps_context], dim=1)
                init_input_q = True

            # forward posterior
            means_q, logvars_q, hiddens_q, states_q = self.rnn_q(
                input_q, states_q, hiddens_p)

            # sample z from posterior
            zs = self.rnn_q.sample(means_q, logvars_q)

            # aggregate observations (prior)
            if not init_input_p:
                input_p = reps_context
                init_input_p = True

            # forward prior
            _, means_p, logvars_p, hiddens_p, states_p = self.rnn_p(
                input_p, states_p, latents_q=zs)

            # append z to latent
            latents += [torch.cat(zs, dim=1).unsqueeze(1)
                        ] if len(zs) > 1 else [zs[0].unsqueeze(1)]

            # update accumulated KL
            for j in range(self.num_layers):
                loss_kl += loss_kld_gaussian_vs_gaussian(
                    means_q[j], logvars_q[j], means_p[j], logvars_p[j])
                logprob_kl += logprob_gaussian(
                    means_p[j],  #.view(num_episodes, -1),
                    logvars_p[j],  #.view(num_episodes, -1),
                    zs[j],  #.view(num_episodes, -1),
                    do_sum=False)
                logprob_kl += -logprob_gaussian(
                    means_q[j],  #.view(num_episodes, -1),
                    logvars_q[j],  #.view(num_episodes, -1),
                    zs[j],  #.view(num_episodes, -1),
                    do_sum=False)
        ''' likelihood '''
        info = {}
        info['logprob_mod_likelihoods'] = []
        logprob_likelihood = 0
        info['mod_likelihoods'] = []
        loss_likelihood = 0
        mean_recons = []
        for idx, (dim, input_tuple) in enumerate(zip(self.dims, input_tuples)):
            channels, height, width, _, mtype = dim
            mod_target, mod_queries, mod_target_indices, mod_batch_sizes = input_tuple
            if len(mod_queries) > 0:  # is not None:
                num_mod_data = len(mod_target)
                assert sum(mod_batch_sizes) == num_mod_data

                # run renderer (likelihood)
                mod_mean_recon = self._forward_renderer(
                    idx, mod_queries, latents, num_episodes, mod_batch_sizes,
                    mod_target_indices).detach()

                # convert to gray scale
                if mtype == 'image' and is_grayscale:
                    mod_mean_recon = rgb2gray(mod_mean_recon)
                    mod_target = rgb2gray(mod_target)
                    if not use_uint8:
                        mod_mean_recon = mod_mean_recon / 255
                        mod_target = mod_target / 255
                elif mtype == 'image' and use_uint8:
                    mod_mean_recon = 255 * mod_mean_recon
                    mod_target = 255 * mod_target

                # estimate recon loss
                loss_mod_likelihood = loss_recon_gaussian_w_fixed_var(
                    mod_mean_recon, mod_target, std=std,
                    add_logvar=False).detach()
                logprob_mod_likelihood = logprob_gaussian_w_fixed_var(
                    mod_mean_recon,  #.view(num_episodes, -1),
                    mod_target,  #.view(num_episodes, -1),
                    std=std,
                    do_sum=False).detach()

                # estimate recon loss without std
                loss_mod_likelihood_nostd = loss_recon_gaussian_w_fixed_var(
                    mod_mean_recon.detach(), mod_target).detach()
                #logprob_mod_likelihood_nostd = logprob_gaussian_w_fixed_var(
                #            mod_mean_recon.detach(), #.view(num_episodes, -1),
                #            mod_target, #.view(num_episodes, -1),
                #            do_sum=False).detach()

                # sum per episode
                logprob_mod_likelihood = sum_tensor_per_episode(
                    logprob_mod_likelihood, mod_batch_sizes,
                    mod_target_indices, num_episodes)
            else:
                mod_mean_recon = reps_context.new_zeros(
                    0, channels, height, width)
                loss_mod_likelihood = None
                loss_mod_likelihood_nostd = None
                logprob_mod_likelihood = None

            # add to loss_likelihood
            if loss_mod_likelihood is not None:
                loss_likelihood += loss_mod_likelihood
            if logprob_mod_likelihood is not None:
                logprob_likelihood += logprob_mod_likelihood

            # append to list
            mean_recons += [mod_mean_recon]
            info['mod_likelihoods'] += [loss_mod_likelihood]
            info['logprob_mod_likelihoods'] += [logprob_mod_likelihood]
        ''' loss '''
        # sum loss
        loss = loss_likelihood + beta * loss_kl
        logprob = logprob_likelihood + logprob_kl

        # additional loss info
        info['likelihood'] = loss_likelihood.detach() if type(
            loss_likelihood) is not int else 0
        info['kl'] = loss_kl.detach()

        # return
        #return img_mean_recon, hpt_mean_recon, None, loss, info
        #return mean_recons, latents, loss, info
        return mean_recons, latents, logprob, info
示例#23
0
    def infogain(self,
                 reps_context,
                 context_sizes,
                 reps_target,
                 target_sizes,
                 input_tuples,
                 num_steps=None,
                 beta=1.0,
                 std=1.0):
        # init
        num_episodes = len(reps_context)
        loss_kl = 0
        ''' forward posterior / prior '''
        # init states
        states_p = self.rnn_p.init_state(num_episodes,
                                         [self.z_height, self.z_width])
        states_q = self.rnn_q.init_state(num_episodes,
                                         [self.z_height, self.z_width])
        hiddens_p = [state_p[0] for state_p in states_p]
        hiddens_q = [state_q[0] for state_q in states_q]
        latents = []
        init_input_q = False
        init_input_p = False
        for i in range(num_steps if num_steps is not None else self.num_steps):
            # aggregate observations (posterior)
            if not init_input_q:
                reps_context = pad_sequence(reps_context, context_sizes)
                reps_context = torch.sum(reps_context, dim=1)
                reps_context = reps_context.view(-1, self.nc_context,
                                                 self.z_height, self.z_width)

                reps_target = pad_sequence(reps_target, target_sizes)
                reps_target = torch.sum(reps_target, dim=1)
                reps_target = reps_target.view(-1, self.nc_context,
                                               self.z_height, self.z_width)

                input_q = torch.cat([reps_target, reps_context], dim=1)
                init_input_q = True

            # forward posterior
            means_q, logvars_q, hiddens_q, states_q = self.rnn_q(
                input_q, states_q, hiddens_p)

            # sample z from posterior
            zs = self.rnn_q.sample(means_q, logvars_q)

            # aggregate observations (prior)
            if not init_input_p:
                input_p = reps_context
                init_input_p = True

            # forward prior
            _, means_p, logvars_p, hiddens_p, states_p = self.rnn_p(
                input_p, states_p, latents_q=zs)

            # append z to latent
            latents += [torch.cat(zs, dim=1).unsqueeze(1)
                        ] if len(zs) > 1 else [zs[0].unsqueeze(1)]

            # update accumulated KL
            for j in range(self.num_layers):
                #loss_kl += loss_kld_gaussian_vs_gaussian(means_q[j], logvars_q[j], means_p[j], logvars_p[j], do_sum=False)
                loss_kl += logprob_gaussian(
                    means_q[j],  #.view(num_episodes, -1),
                    logvars_q[j],  #.view(num_episodes, -1),
                    zs[j],  #.view(num_episodes, -1),
                    do_sum=False)
                loss_kl += -logprob_gaussian(
                    means_p[j],  #.view(num_episodes, -1),
                    logvars_p[j],  #.view(num_episodes, -1),
                    zs[j],  #.view(num_episodes, -1),
                    do_sum=False)
        ''' loss '''
        # additional loss info
        info = {}
        info['kl'] = loss_kl.detach()

        # return
        #return img_mean_recon, hpt_mean_recon, None, loss, info
        #return mean_recons, latents, loss, info
        return None, latents, loss_kl.detach(), info
示例#24
0
def convert_example(example,
                    vocabs,
                    encoding_model='ernie-1.0',
                    feat=None,
                    mode='train',
                    fix_len=20):
    """Builds model inputs for dependency parsing task."""
    word_vocab, feat_vocab, rel_vocab = vocabs
    if encoding_model == "lstm":
        word_bos_index = word_vocab.to_indices("[BOS]")
        word_eos_index = word_vocab.to_indices("[EOS]")
    else:
        word_bos_index = word_vocab.to_indices("[CLS]")
        word_eos_index = word_vocab.to_indices("[SEP]")

    if feat_vocab:
        feat_bos_index = feat_vocab.to_indices("[BOS]")
        feat_eos_index = feat_vocab.to_indices("[EOS]")

    arc_bos_index, arc_eos_index = 0, 1

    rel_bos_index = rel_vocab.to_indices("[BOS]")
    rel_eos_index = rel_vocab.to_indices("[EOS]")

    if mode != "test":
        arcs = list(example["HEAD"])
        arcs = [arc_bos_index] + arcs + [arc_eos_index]
        arcs = np.array(arcs, dtype=int)

        rels = rel_vocab.to_indices(example["DEPREL"])
        rels = [rel_bos_index] + rels + [rel_eos_index]
        rels = np.array(rels, dtype=int)

    if encoding_model == "lstm":
        words = word_vocab.to_indices(example["FORM"])
        words = [word_bos_index] + words + [word_eos_index]
        words = np.array(words, dtype=int)

        if feat == "pos":
            feats = feat_vocab.to_indices(example["CPOS"])
            feats = [feat_bos_index] + feats + [feat_eos_index]
            feats = np.array(feats, dtype=int)
        else:
            feats = [[feat_vocab.to_indices(token) for token in word]
                     for word in example["FORM"]]
            feats = [[feat_bos_index]] + feats + [[feat_eos_index]]
            feats = pad_sequence(
                [np.array(ids[:fix_len], dtype=int) for ids in feats],
                fix_len=fix_len)
        if mode == "test":
            return words, feats
        return words, feats, arcs, rels
    else:
        words = [[word_vocab.to_indices(char) for char in word]
                 for word in example["FORM"]]
        words = [[word_bos_index]] + words + [[word_eos_index]]
        words = pad_sequence(
            [np.array(ids[:fix_len], dtype=int) for ids in words],
            fix_len=fix_len)
        if mode == "test":
            return [words]
        return words, arcs, rels
示例#25
0
(2) max_length
"""

max_len_inp_train = max_length(input_tensor_train)
max_len_inp_valid = max_length(input_tensor_val)
max_len_inp_test = max_length(input_tensor_test)

max_len_tgt_train = max_length(target_tensor_train)
max_len_tgt_valid = max_length(target_tensor_val)
max_len_tgt_test = max_length(target_tensor_test)
"""
(3) padding sequence
"""

input_tensor_train = [
    pad_sequence(x, max_len_inp_train) for x in input_tensor_train
]
target_tensor_train = [
    pad_sequence(x, max_len_tgt_train) for x in target_tensor_train
]

input_tensor_valid = [
    pad_sequence(x, max_len_inp_valid) for x in input_tensor_val
]
target_tensor_valid = [
    pad_sequence(x, max_len_tgt_valid) for x in target_tensor_val
]

input_tensor_test = [
    pad_sequence(x, max_len_inp_test) for x in input_tensor_test
]