def batch_processing(batch_data, args): encode_seq_ids = [] encode_seq_len = [] decode_seq_ids = [] decode_seq_len = [] decoder_labels = [] for data in batch_data: encode_seq_ids.append(data['encode_seq_ids']) encode_seq_len.append(len(data['encode_seq_ids'])) decode_seq_ids.append(data['decode_seq_ids']) decode_seq_len.append(len(data['decode_seq_ids'])) decoder_labels.append(data['decoder_labels']) encode_seq_ids = utils.pad_sequence(encode_seq_ids, padder=config.PAD_WORD) encode_seq_len = np.array(encode_seq_len).astype(np.float32) decode_seq_ids = utils.pad_sequence(decode_seq_ids, padder=config.PAD_WORD) decode_seq_len = np.array(decode_seq_len).astype(np.float32) decoder_labels = utils.pad_sequence(decoder_labels, padder=config.PAD_WORD) # sorted_idx = np.argsort(word_lengths)[::-1] return [encode_seq_ids, encode_seq_len, decode_seq_ids, decode_seq_len], decoder_labels
def __next__(self): if self.data_num - self.idx < self.batch_size: raise StopIteration index = self.indices[self.idx : self.idx + self.batch_size] input_seqs = [] target_seqs = [] # Choose pairs for i in index: pair = self.pairs[i] input_seqs.append(indexesFromSentence(self.lang, pair[0], self.vocab_size)) target_seqs.append(indexesFromSentence(self.lang, pair[1], self.vocab_size)) # Zip into pairs, sort by length (descending), unzip seq_pairs = sorted(zip(input_seqs, target_seqs), key=lambda p: len(p[0]), reverse=True) input_seqs, target_seqs = zip(*seq_pairs) # For input and target sequences, get array of lengths and pad with 0s to max length input_lengths = [len(s) for s in input_seqs] input_padded = [pad_sequence(s, max(input_lengths)) for s in input_seqs] target_lengths = [len(s) for s in target_seqs] target_padded = [pad_sequence(s, max(target_lengths)) for s in target_seqs] # Turn padded arrays into (batch x seq) tensors, transpose into (seq x batch) input_tensor = torch.LongTensor(input_padded).transpose(0, 1) target_tensor = torch.LongTensor(target_padded).transpose(0, 1) if self.use_cuda: input_tensor = input_tensor.cuda() target_tensor = target_tensor.cuda() self.idx += self.batch_size return input_tensor, input_lengths, target_tensor, target_lengths
def infer( self, reps_context, context_sizes, num_steps=None, ): # init num_episodes = len(reps_context) # init states states_p = self.rnn_p.init_state(num_episodes, [self.z_height, self.z_width]) hiddens_p = [state_p[0] for state_p in states_p] latents = [] init_input_p = False for i in range(num_steps if num_steps is not None else self.num_steps): if not init_input_p: reps_context = pad_sequence(reps_context, context_sizes) reps_context = torch.sum(reps_context, dim=1) reps_context = reps_context.view(-1, self.nc_context, self.z_height, self.z_width) input_p = reps_context init_input_p = True # forward prior zs, means_p, logvars_p, hiddens_p, states_p = self.rnn_p( input_p, states_p) # append z to latent latents += [torch.cat(zs, dim=1).unsqueeze(1) ] if len(zs) > 1 else [zs[0].unsqueeze(1)] return latents
def query_model(sess, input_node, predictions, vocab, rev_vocab, max_seq_len, output_embs_for_all_vocab): with tf.gfile.GFile("data/definitions/concept_descriptions.tok", mode="r") as data_file: with tf.gfile.GFile("data/output/concept_BOW.txt", mode="w") as output_file: for line in data_file: top = 10 token_ids = utils.sentence_to_token_ids(line, vocab) padded_ids = np.asarray( utils.pad_sequence(token_ids[1:], max_seq_len)) input_data = np.asarray([padded_ids]) model_preds = sess.run(predictions, feed_dict={input_node: input_data}) sims = 1 - np.squeeze( dist.cdist(model_preds, output_embs_for_all_vocab, metric="cosine")) sims = np.nan_to_num(sims) candidate_ids = sims.argsort()[::-1][:top] candidates = [rev_vocab[idx] for idx in candidate_ids] for ii, cand in enumerate(candidates): output_file.write(cand + " ") print(cand + " ") output_file.write("\n") output_file.flush() print("\n")
def query_model(sess, input_node, predictions, vocab, rev_vocab, max_seq_len, output_embs_for_all_vocab): while True: sys.stdout.write("Type a definition: ") sys.stdout.flush() sentence = sys.stdin.readline() sys.stdout.write("Number of candidates: ") sys.stdout.flush() top = int(sys.stdin.readline()) token_ids = utils.sentence_to_token_ids(sentence, vocab) padded_ids = np.asarray(utils.pad_sequence(token_ids, max_seq_len)) input_data = np.asarray([padded_ids]) model_preds = sess.run(predictions, feed_dict={input_node: input_data}) sims = 1 - np.squeeze( dist.cdist(model_preds, output_embs_for_all_vocab, metric="cosine")) sims = np.nan_to_num(sims) candidate_ids = sims.argsort()[::-1][:top] candidates = [rev_vocab[idx] for idx in candidate_ids] print("\n Top %s candidates from the RNN model:" % top) for ii, cand in enumerate(candidates): print("%s: %s" % (ii + 1, cand)) sys.stdout.flush() sentence = sys.stdin.readline()
def read_file(self, vocab_file, data_file, max_seq_len): with open(vocab_file, 'rb') as f: lang = pkl.load(f) df = pd.read_csv(data_file, delimiter='\t') lis = [] for line in df['text']: l = [lang.word2index(s) for s in line.split(' ')] l = pad_sequence(l, max_seq_len) lis.append(l) return lis
def batch_process_relation(batch_data, args, Train=None): t1_batch = [] t2_batch = [] label_batch = [] t1_contexts = [] t2_contexts = [] aux_label_batch = [] for data in batch_data: t1_batch.append(data['t1_id']) t2_batch.append(data['t2_id']) label_batch.append(data['label']) # sample neighbors t1_ctx = context_retriever(data['t1'], args.cos_neighbors, args.num_contexts, args.node_to_id) t1_contexts.append(t1_ctx) t2_ctx = context_retriever(data['t2'], args.cos_neighbors, args.num_contexts, args.node_to_id) t2_contexts.append(t2_ctx) t1_batch = np.array(t1_batch) t2_batch = np.array(t2_batch) t1_contexts = utils.pad_sequence(t1_contexts, padder=0) t2_contexts = utils.pad_sequence(t2_contexts, padder=0) label_batch = np.array(label_batch) if args.use_context and args.use_aux_loss: for i in range(len(batch_data)): cur_aux_label = [] for t1 in t1_contexts[i]: for t2 in t2_contexts[i]: if (int(t1), int(t2)) in args.link_set: cur_aux_label.append(1) else: cur_aux_label.append(0) aux_label_batch.append(cur_aux_label) aux_label_batch = np.array(aux_label_batch) return [t1_batch, t1_contexts, t2_batch, t2_contexts], label_batch, aux_label_batch
def featurize(self, batch): feat = defaultdict(list) cls_token = self.bert_tokenizer.cls_token sep_token = self.bert_tokenizer.sep_token for ex in batch: if self.training: feat['query_pointer'].append(torch.tensor(ex['pointer_query'])) feat['utterance'].append( torch.tensor( self.bert_tokenizer.convert_tokens_to_ids( [cls_token] + ex['g_question_toks'] + [sep_token]))) tables = [] tables_mask = [] starts, ends = [], [] for t in ex['query_context']: tens = torch.tensor( self.bert_tokenizer.convert_tokens_to_ids(t['toks'])) tables.append(tens) tables_mask.append(torch.ones_like(tens)) starts.append([c['start'] for c in t['columns']]) ends.append([c['end'] for c in t['columns']]) feat['tables'].append( utils.pad_sequence(tables, self.bert_tokenizer.pad_token_id, self.device)) feat['tables_mask'].append( utils.pad_sequence(tables_mask, 0, self.device).float()) feat['starts'].append(starts) feat['ends'].append(ends) feat['query_pointer'] = utils.pad_sequence( feat['query_pointer'], self.pad_id, self.device) if self.training else None feat['utterance_mask'] = utils.pad_sequence( [torch.ones(len(t)) for t in feat['utterance']], 0, self.device) feat['utterance'] = utils.pad_sequence(feat['utterance'], self.pad_id, self.device) feat['batch'] = batch return feat
def __init__(self, dataset, batch_sampler): self.batches = [] for batch_sample_id in batch_sampler: batch = [] raw_batch = self._collate_fn( [dataset[sample_id] for sample_id in batch_sample_id]) for data in raw_batch: if isinstance(data[0], np.ndarray): data = pad_sequence(data) batch.append(data) self.batches.append(batch)
def batch_process_term(batch_data, args): y = [] word_ids = [] word_len = [] ngram_ids = [] ngram_length = [] for sample in batch_data: y.append(sample['y']) word_ids.append(sample['word_ids']) word_len.append(sample['word_len']) ngram_ids.append(sample['ngram_ids']) ngram_length.append(len(sample['ngram_ids'])) y = np.array(y) word_ids = utils.pad_sequence(word_ids).astype(int) word_lengths = np.array(word_len).astype(np.float32) ngram_ids = utils.pad_sequence(ngram_ids).astype(int) ngram_length = np.array(ngram_length).astype(np.float32) return [word_ids, word_lengths, ngram_ids, ngram_length], y
def batch_process_ns(batch_data, args): word_ids = [] word_len = [] ngram_ids = [] ngram_length = [] contexts = [] for sample in batch_data: word_ids.append(sample['word_ids']) word_len.append(sample['word_len']) ngram_ids.append(sample['ngram_ids']) ngram_length.append(len(sample['ngram_ids'])) contexts.append(sample['context']) word_ids = utils.pad_sequence(word_ids).astype(int) word_lengths = np.array(word_len).astype(np.float32) ngram_ids = utils.pad_sequence(ngram_ids).astype(int) ngram_length = np.array(ngram_length).astype(np.float32) contexts = np.array(contexts) return [word_ids, word_lengths, ngram_ids, ngram_length, contexts]
def featurize(self, batch): feat = defaultdict(list) cls_token = self.bert_tokenizer.cls_token sep_token = self.bert_tokenizer.sep_token for ex in batch: tens = torch.tensor( self.bert_tokenizer.convert_tokens_to_ids( ex['question_context'])) feat['context'].append(tens) feat['context_mask'].append(torch.ones_like(tens)) if self.training: feat['utt_pointer'].append(torch.tensor( ex['pointer_question'])) feat['context'] = utils.pad_sequence(feat['context'], self.pad_id, self.device) feat['context_mask'] = utils.pad_sequence(feat['context_mask'], 0, self.device).float() feat['utt_pointer'] = utils.pad_sequence( feat['utt_pointer'], self.pad_id, self.device) if self.training else None feat['batch'] = batch return feat
def read_file(self, vocab_file, data_file, max_seq_len, num_sample): with open(vocab_file, 'rb') as f: lang = pkl.load(f) df = pd.read_csv(data_file, delimiter='\t') if num_sample is None: df = df.sample(frac=1).reset_index(drop=True) else: df = df.sample(n=num_sample, replace=True) lis = [] for line in df['text']: l = [lang.word2index(s) for s in line.split(' ')] l = pad_sequence(l, max_seq_len) # pad sequence for CNN lis.append(l) return lis
def index_sentences(self, sentences): """Index a batch of sentences and pack into a torch tensor. Args: sentences: (list) of one-line/string sentences. Returns: (torch.Tensor) [batch_size, max_length]. """ return torch.from_numpy( np.array([ utils.pad_sequence(self.indexer.get_ids(sentence), constants.PAD, self.max_length) for sentence in sentences ]))
def __call__(self, batch): if self._targets is not None: sequences, targets = list(zip(*batch)) else: sequences = list(batch) input_ids, attention_mask = pad_sequence( sequences, max_seq_length=self._max_length, pad_token_id=self._pad_token_id) if self._targets is not None: output = input_ids, attention_mask, torch.tensor(targets) else: output = input_ids, attention_mask return output
def get_batch_data(self, indices, unit, neg_num): # txt if unit == 'phn': batch_txt = [self.txt_feat[index] for index in indices] \ + [self.txt_feat[index] for index in indices[:len(indices)//2]] batch_txt_labels = [self.phn_idx_arrays[index] for index in indices] \ + [self.phn_idx_arrays[index] for index in indices[:len(indices)//2]] elif unit == 'char': batch_txt = [self.txt_feat_char[index] for index in indices] \ + [self.txt_feat_char[index] for index in indices[:len(indices)//2]] batch_txt_labels = [self.char_idx_arrays[index] for index in indices] \ + [self.char_idx_arrays[index] for index in indices[:len(indices)//2]] else: raise batch_txt_length = torch.tensor([len(wrd) for wrd in batch_txt], device=device) batch_txt_order = np.array( sorted(range(len(batch_txt)), key=lambda k: len(batch_txt[k]), reverse=True)) batch_txt = np.array(batch_txt)[batch_txt_order] batch_txt_labels = np.array(batch_txt_labels)[batch_txt_order] batch_txt_length = batch_txt_length[batch_txt_order] batch_txt = pad_sequence(batch_txt).to(device) batch_txt_labels = pad_sequence(batch_txt_labels).to(device) # target batch_data = [self.feat[index] for index in indices] # randomly select pos & neg pos_neg_indices = indices[:len(indices) // 2] # for idx in pos_neg_indices: # spk = self.wrd_idx2spk[idx] # # feat_pos # idx_pos = random.choice(self.spk2wrd_idx[spk]) # batch_data.append(self.feat[idx_pos]) # for idx in pos_neg_indices: # spk = self.wrd_idx2spk[idx] # # feat_neg # self.spks.remove(spk) # rand_spk = random.choice(self.spks) # self.spks.append(spk) # idx_neg = random.choice(self.spk2wrd_idx[rand_spk]) # batch_data.append(self.feat[idx_neg]) # neg paired for i in range(neg_num): for idx in pos_neg_indices: wrd = self.wrds[idx] neg_paired_index = idx neg_paired_wrd = wrd while neg_paired_wrd == wrd: neg_paired_index = random.randint(0, len(self.wrds) - 1) neg_paired_wrd = self.wrds[neg_paired_index] batch_data.append(self.feat[neg_paired_index]) batch_length = torch.tensor([len(wrd) for wrd in batch_data], device=device) batch_order = np.array( sorted(range(len(batch_data)), key=lambda k: len(batch_data[k]), reverse=True)) batch_data = np.array(batch_data)[batch_order] batch_length = batch_length[batch_order] batch_data = pad_sequence(batch_data).to(device) # invert indices for tracing batch_invert = np.zeros_like(batch_order) for i, j in enumerate(batch_order): batch_invert[j] = i batch_invert = torch.tensor(batch_invert, device=device) batch_txt_invert = np.zeros_like(batch_txt_order) for i, j in enumerate(batch_txt_order): batch_txt_invert[j] = i batch_txt_invert = torch.tensor(batch_txt_invert, device=device) # batch_data: target,( paired,) pos, neg # batch_txt: target,( paired) return batch_data, batch_length, batch_invert, \ batch_txt, batch_txt_length, batch_txt_invert, batch_txt_labels
def get_train_test_data(self, run_id=1, protocol_type="xs"): # initialize train and test X_train = [] Y_train = [] X_test = [] Y_test = [] # sets train_sets = self.train_test_sets[protocol_type][run_id][0] test_sets = self.train_test_sets[protocol_type][run_id][1] # iterate through sequences for _, seq in enumerate(self.sequences): # poses & labels poses = np.array([f.pose.flatten() for f in seq.frames]) if self.train_subclasses: targets = np.array([f.min_cls for f in seq.frames]) else: targets = np.array([f.maj_cls for f in seq.frames]) targets_one_hot = utils.one_hot_encoding(targets, nb_classes=self.get_nb_classes()) # subsampling poses = utils.subsampling(poses, sampling_factor=self.sampling_factor) targets_one_hot = utils.subsampling(targets_one_hot, sampling_factor=self.sampling_factor) # assign to set if protocol_type == "xs": if seq.subject in train_sets: X_train.append(poses) Y_train.append(targets_one_hot) elif seq.subject in test_sets: X_test.append(poses) Y_test.append(targets_one_hot) else: print("Sequence is not contained in TRAIN neither in TEST...") elif protocol_type == "xv": if seq.viewpoint in train_sets: X_train.append(poses) Y_train.append(targets_one_hot) elif seq.viewpoint in test_sets: X_test.append(poses) Y_test.append(targets_one_hot) else: print("Sequence is not contained in TRAIN neither in TEST...") # maximal sequence length max_seq_len_train = max([len(s) for s in X_train]) max_seq_len_test = max([len(s) for s in X_test]) max_seq_len = max([max_seq_len_train, max_seq_len_test]) # zero padding X_train = np.array([utils.pad_sequence(s, max_seq_len) for s in X_train]) Y_train = np.array([utils.pad_sequence(s, max_seq_len) for s in Y_train]) X_test = np.array([utils.pad_sequence(s, max_seq_len) for s in X_test]) Y_test = np.array([utils.pad_sequence(s, max_seq_len) for s in Y_test]) return X_train, Y_train, X_test, Y_test
def forward( self, reps_context, context_sizes, reps_target, target_sizes, input_tuples, #img_target=None, img_queries=None, img_batch_sizes=[], img_target_indices=[], #hpt_target=None, hpt_queries=None, hpt_batch_sizes=[], hpt_target_indices=[], num_steps=None, beta=1.0, std=1.0, is_grayscale=False, do_sum=True): # init num_episodes = len(reps_context) #assert len(set([index for _, _, mod_target_indices, _ in input_tuples for index in mod_target_indices])) == num_episodes loss_kl = 0 ''' forward posterior / prior ''' # init states states_p = self.rnn_p.init_state(num_episodes, [self.z_height, self.z_width]) states_q = self.rnn_q.init_state(num_episodes, [self.z_height, self.z_width]) hiddens_p = [state_p[0] for state_p in states_p] hiddens_q = [state_q[0] for state_q in states_q] latents = [] init_input_q = False init_input_p = False for i in range(num_steps if num_steps is not None else self.num_steps): # aggregate observations (posterior) if not init_input_q: reps_context = pad_sequence(reps_context, context_sizes) reps_context = torch.sum(reps_context, dim=1) reps_context = reps_context.view(-1, self.nc_context, self.z_height, self.z_width) reps_target = pad_sequence(reps_target, target_sizes) reps_target = torch.sum(reps_target, dim=1) reps_target = reps_target.view(-1, self.nc_context, self.z_height, self.z_width) input_q = torch.cat([reps_target, reps_context], dim=1) init_input_q = True # forward posterior means_q, logvars_q, hiddens_q, states_q = self.rnn_q( input_q, states_q, hiddens_p) # sample z from posterior zs = self.rnn_q.sample(means_q, logvars_q) # aggregate observations (prior) if not init_input_p: input_p = reps_context init_input_p = True # forward prior _, means_p, logvars_p, hiddens_p, states_p = self.rnn_p( input_p, states_p, latents_q=zs) # append z to latent latents += [torch.cat(zs, dim=1).unsqueeze(1) ] if len(zs) > 1 else [zs[0].unsqueeze(1)] # update accumulated KL for j in range(self.num_layers): loss_kl += loss_kld_gaussian_vs_gaussian(means_q[j], logvars_q[j], means_p[j], logvars_p[j], do_sum=do_sum) ''' likelihood ''' info = {} info['mod_likelihoods'] = [] loss_likelihood = 0 if do_sum else loss_kl.new_zeros(loss_kl.size()) mean_recons = [] for idx, (dim, input_tuple) in enumerate(zip(self.dims, input_tuples)): channels, height, width, _, mtype = dim mod_target, mod_queries, mod_target_indices, mod_batch_sizes = input_tuple if len(mod_queries) > 0: # is not None: num_mod_data = len(mod_target) assert sum(mod_batch_sizes) == num_mod_data # run renderer (likelihood) mod_mean_recon = self._forward_renderer( idx, mod_queries, latents, num_episodes, mod_batch_sizes, mod_target_indices) # convert to gray scale if mtype == 'image' and is_grayscale: mod_mean_recon = rgb2gray(mod_mean_recon) mod_target = rgb2gray(mod_target) # estimate recon loss loss_mod_likelihood = loss_recon_gaussian_w_fixed_var( mod_mean_recon, mod_target, std=std, add_logvar=False, do_sum=do_sum) # estimate recon loss without std loss_mod_likelihood_nostd = loss_recon_gaussian_w_fixed_var( mod_mean_recon.detach(), mod_target, do_sum=do_sum) else: mod_mean_recon = reps_context.new_zeros( 0, channels, height, width) loss_mod_likelihood = None loss_mod_likelihood_nostd = None # append to list mean_recons += [mod_mean_recon] info['mod_likelihoods'] += [loss_mod_likelihood_nostd] # add to loss_likelihood if loss_mod_likelihood is not None: # sum to each episode if not do_sum: _mod_batch_sizes = [ 0 ] + np.cumsum(mod_batch_sizes).tolist() for i, t_idx in enumerate(mod_target_indices): loss_likelihood[t_idx] += torch.sum( loss_mod_likelihood[ _mod_batch_sizes[i]:_mod_batch_sizes[i + 1]]) else: loss_likelihood += loss_mod_likelihood ''' loss ''' # sum loss loss = loss_likelihood + beta * loss_kl # additional loss info info['likelihood'] = loss_likelihood.detach() info['kl'] = loss_kl.detach() # return #return img_mean_recon, hpt_mean_recon, None, loss, info return mean_recons, latents, loss, info
def generate( self, reps_context, context_sizes, input_tuples, #img_queries, img_batch_sizes, #hpt_queries, hpt_batch_sizes, num_steps=None, is_grayscale=False): # init num_episodes = len(reps_context) # init states states_p = self.rnn_p.init_state(num_episodes, [self.z_height, self.z_width]) hiddens_p = [state_p[0] for state_p in states_p] latents = [] init_input_p = False for i in range(num_steps if num_steps is not None else self.num_steps): if not init_input_p: reps_context = pad_sequence(reps_context, context_sizes) reps_context = torch.sum(reps_context, dim=1) reps_context = reps_context.view(-1, self.nc_context, self.z_height, self.z_width) input_p = reps_context init_input_p = True # forward prior zs, means_p, logvars_p, hiddens_p, states_p = self.rnn_p( input_p, states_p) # append z to latent latents += [torch.cat(zs, dim=1).unsqueeze(1) ] if len(zs) > 1 else [zs[0].unsqueeze(1)] ''' forward renderers ''' mean_recons = [] for idx, (dim, input_tuple) in enumerate(zip(self.dims, input_tuples)): channels, height, width, _, mtype = dim mod_queries, mod_batch_sizes = input_tuple # forward image renderer if len(mod_queries) > 0: # forward image renderer mod_mean_recon = self._forward_renderer( idx, mod_queries, latents, num_episodes, mod_batch_sizes) # convert to gray scale if mtype == 'image' and is_grayscale: mod_mean_recon = rgb2gray(mod_mean_recon) else: mod_mean_recon = None #reps_context.new_zeros(1, channels, height, width) # append to list mean_recons += [mod_mean_recon] ## temporary #img_mean_recon, hpt_mean_recon = mean_recons[0], mean_recons[1] # return #return img_mean_recon, hpt_mean_recon, None return mean_recons, None
def fit(self, x, y, epochs=5, generator_sample=32, discriminator_sample=8): gen_loss_tracker = keras.metrics.Mean(name="loss") gen_metric = keras.metrics.CategoricalAccuracy(name="categorical_acc") disc_loss_tracker = keras.metrics.Mean(name="loss") disc_metric = keras.metrics.CategoricalAccuracy(name="categorical_acc") x_encoder_sequences = pad_sequence([make_vectors(x[i], self.word2id) for i in range(len(x))], max_length=MAX_LENGTH) x_decoder_sequences = pad_sequence([make_vectors(y[i], self.word2id, is_target=True) for i in range(len(y))], max_length=MAX_LENGTH) y_target_main = pad_sequence([make_vectors(y[i], self.word2id) for i in range(len(x))], max_length=MAX_LENGTH) for epoch in range(epochs): # generator x_enc_sample, x_dec_sample, y_target_sample = sample_generator_data(x_encoder_sequences, x_decoder_sequences, y_target_main, len(self.word2id), sample_size=generator_sample) x_sample_1, x_sample_2, y_target = sample_discriminator_data(x_encoder_sequences, x_decoder_sequences, y_target_main, len(self.word2id), self.generator, sample_size=discriminator_sample) gen_indices = [ind for ind in range(discriminator_sample) if y_target[ind][0] == 1] x_sample_1_gen = np.array([x_sample_1[ind] for ind in gen_indices]).reshape((len(gen_indices), -1)) x_sample_2_gen = np.array([x_sample_2[ind] for ind in gen_indices]).reshape((len(gen_indices), -1)) y_target_gen = np.array([y_target[ind] for ind in gen_indices]).reshape((len(gen_indices), -1)) # only generator - simple language model training with tf.GradientTape(persistent=True) as tape_gen: y_pred_gen = self.generator.model([x_enc_sample, x_dec_sample], training=True) # Forward pass gen_loss = tf.reduce_sum(keras.losses.categorical_crossentropy(y_target_sample, y_pred_gen)) gen_trainable_vars = self.generator.model.trainable_variables gen_gradients = tape_gen.gradient(gen_loss, gen_trainable_vars) # Update weights self.generator.model.optimizer.apply_gradients(zip(gen_gradients, gen_trainable_vars)) # both generator and discriminator (GAN-style) with tf.GradientTape(persistent=True) as tape: y_pred_gen = self.generator.model([x_enc_sample, x_dec_sample], training=True) # Forward pass y_pred_disc = self.discriminator.model([x_sample_1, x_sample_2], training=True) # Forward pass x_input = np.zeros((generator_sample, MAX_LENGTH, len(self.word2id))) for sample in range(generator_sample): for seq_num in range(MAX_LENGTH): x_input[sample][seq_num][x_enc_sample[sample][seq_num]] = 1 target = to_categorical([0 for i in range(generator_sample)], num_classes=2) # 0 if from generator 1 if real y_pred_disc_gen = self.discriminator.model([x_input, y_pred_gen], training=True) # Compute our own loss disc_loss = keras.losses.categorical_crossentropy(y_target, y_pred_disc) gen_loss = keras.losses.categorical_crossentropy(target, y_pred_disc_gen) # Compute gradients gen_trainable_vars = self.generator.model.trainable_variables gen_gradients = tape.gradient(gen_loss, gen_trainable_vars) disc_trainable_vars = self.discriminator.model.trainable_variables disc_gradients = tape.gradient(disc_loss, disc_trainable_vars) # Update weights self.generator.model.optimizer.apply_gradients(zip(gen_gradients, gen_trainable_vars)) self.discriminator.model.optimizer.apply_gradients(zip(disc_gradients, disc_trainable_vars)) # Compute our own metrics gen_loss_tracker.update_state(gen_loss) gen_metric.update_state(y_target_sample, y_pred_gen) disc_loss_tracker.update_state(disc_loss) disc_metric.update_state(y_target, y_pred_disc) print_metrics = {"gen_loss": gen_loss_tracker.result().numpy(), "gen_metric": gen_metric.result().numpy(), "disc_loss": disc_loss_tracker.result().numpy(), "disc_metric": disc_metric.result().numpy()} print(f"Epoch - {epoch} \n Metrics - {print_metrics}")
def batchify_fn(batch): raw_batch = [raw for raw in zip(*batch)] batch = [pad_sequence(data) for data in raw_batch] return batch
def predict(self, reps_context, context_sizes, reps_target, target_sizes, input_tuples, num_steps=None, beta=1.0, std=1.0, is_grayscale=False, use_uint8=True): # init num_episodes = len(reps_context) logprob_kl = 0 loss_kl = 0 ''' forward posterior / prior ''' # init states states_p = self.rnn_p.init_state(num_episodes, [self.z_height, self.z_width]) states_q = self.rnn_q.init_state(num_episodes, [self.z_height, self.z_width]) hiddens_p = [state_p[0] for state_p in states_p] hiddens_q = [state_q[0] for state_q in states_q] latents = [] init_input_q = False init_input_p = False for i in range(num_steps if num_steps is not None else self.num_steps): # aggregate observations (posterior) if not init_input_q: reps_context = pad_sequence(reps_context, context_sizes) reps_context = torch.sum(reps_context, dim=1) reps_context = reps_context.view(-1, self.nc_context, self.z_height, self.z_width) reps_target = pad_sequence(reps_target, target_sizes) reps_target = torch.sum(reps_target, dim=1) reps_target = reps_target.view(-1, self.nc_context, self.z_height, self.z_width) input_q = torch.cat([reps_target, reps_context], dim=1) init_input_q = True # forward posterior means_q, logvars_q, hiddens_q, states_q = self.rnn_q( input_q, states_q, hiddens_p) # sample z from posterior zs = self.rnn_q.sample(means_q, logvars_q) # aggregate observations (prior) if not init_input_p: input_p = reps_context init_input_p = True # forward prior _, means_p, logvars_p, hiddens_p, states_p = self.rnn_p( input_p, states_p, latents_q=zs) # append z to latent latents += [torch.cat(zs, dim=1).unsqueeze(1) ] if len(zs) > 1 else [zs[0].unsqueeze(1)] # update accumulated KL for j in range(self.num_layers): loss_kl += loss_kld_gaussian_vs_gaussian( means_q[j], logvars_q[j], means_p[j], logvars_p[j]) logprob_kl += logprob_gaussian( means_p[j], #.view(num_episodes, -1), logvars_p[j], #.view(num_episodes, -1), zs[j], #.view(num_episodes, -1), do_sum=False) logprob_kl += -logprob_gaussian( means_q[j], #.view(num_episodes, -1), logvars_q[j], #.view(num_episodes, -1), zs[j], #.view(num_episodes, -1), do_sum=False) ''' likelihood ''' info = {} info['logprob_mod_likelihoods'] = [] logprob_likelihood = 0 info['mod_likelihoods'] = [] loss_likelihood = 0 mean_recons = [] for idx, (dim, input_tuple) in enumerate(zip(self.dims, input_tuples)): channels, height, width, _, mtype = dim mod_target, mod_queries, mod_target_indices, mod_batch_sizes = input_tuple if len(mod_queries) > 0: # is not None: num_mod_data = len(mod_target) assert sum(mod_batch_sizes) == num_mod_data # run renderer (likelihood) mod_mean_recon = self._forward_renderer( idx, mod_queries, latents, num_episodes, mod_batch_sizes, mod_target_indices).detach() # convert to gray scale if mtype == 'image' and is_grayscale: mod_mean_recon = rgb2gray(mod_mean_recon) mod_target = rgb2gray(mod_target) if not use_uint8: mod_mean_recon = mod_mean_recon / 255 mod_target = mod_target / 255 elif mtype == 'image' and use_uint8: mod_mean_recon = 255 * mod_mean_recon mod_target = 255 * mod_target # estimate recon loss loss_mod_likelihood = loss_recon_gaussian_w_fixed_var( mod_mean_recon, mod_target, std=std, add_logvar=False).detach() logprob_mod_likelihood = logprob_gaussian_w_fixed_var( mod_mean_recon, #.view(num_episodes, -1), mod_target, #.view(num_episodes, -1), std=std, do_sum=False).detach() # estimate recon loss without std loss_mod_likelihood_nostd = loss_recon_gaussian_w_fixed_var( mod_mean_recon.detach(), mod_target).detach() #logprob_mod_likelihood_nostd = logprob_gaussian_w_fixed_var( # mod_mean_recon.detach(), #.view(num_episodes, -1), # mod_target, #.view(num_episodes, -1), # do_sum=False).detach() # sum per episode logprob_mod_likelihood = sum_tensor_per_episode( logprob_mod_likelihood, mod_batch_sizes, mod_target_indices, num_episodes) else: mod_mean_recon = reps_context.new_zeros( 0, channels, height, width) loss_mod_likelihood = None loss_mod_likelihood_nostd = None logprob_mod_likelihood = None # add to loss_likelihood if loss_mod_likelihood is not None: loss_likelihood += loss_mod_likelihood if logprob_mod_likelihood is not None: logprob_likelihood += logprob_mod_likelihood # append to list mean_recons += [mod_mean_recon] info['mod_likelihoods'] += [loss_mod_likelihood] info['logprob_mod_likelihoods'] += [logprob_mod_likelihood] ''' loss ''' # sum loss loss = loss_likelihood + beta * loss_kl logprob = logprob_likelihood + logprob_kl # additional loss info info['likelihood'] = loss_likelihood.detach() if type( loss_likelihood) is not int else 0 info['kl'] = loss_kl.detach() # return #return img_mean_recon, hpt_mean_recon, None, loss, info #return mean_recons, latents, loss, info return mean_recons, latents, logprob, info
def infogain(self, reps_context, context_sizes, reps_target, target_sizes, input_tuples, num_steps=None, beta=1.0, std=1.0): # init num_episodes = len(reps_context) loss_kl = 0 ''' forward posterior / prior ''' # init states states_p = self.rnn_p.init_state(num_episodes, [self.z_height, self.z_width]) states_q = self.rnn_q.init_state(num_episodes, [self.z_height, self.z_width]) hiddens_p = [state_p[0] for state_p in states_p] hiddens_q = [state_q[0] for state_q in states_q] latents = [] init_input_q = False init_input_p = False for i in range(num_steps if num_steps is not None else self.num_steps): # aggregate observations (posterior) if not init_input_q: reps_context = pad_sequence(reps_context, context_sizes) reps_context = torch.sum(reps_context, dim=1) reps_context = reps_context.view(-1, self.nc_context, self.z_height, self.z_width) reps_target = pad_sequence(reps_target, target_sizes) reps_target = torch.sum(reps_target, dim=1) reps_target = reps_target.view(-1, self.nc_context, self.z_height, self.z_width) input_q = torch.cat([reps_target, reps_context], dim=1) init_input_q = True # forward posterior means_q, logvars_q, hiddens_q, states_q = self.rnn_q( input_q, states_q, hiddens_p) # sample z from posterior zs = self.rnn_q.sample(means_q, logvars_q) # aggregate observations (prior) if not init_input_p: input_p = reps_context init_input_p = True # forward prior _, means_p, logvars_p, hiddens_p, states_p = self.rnn_p( input_p, states_p, latents_q=zs) # append z to latent latents += [torch.cat(zs, dim=1).unsqueeze(1) ] if len(zs) > 1 else [zs[0].unsqueeze(1)] # update accumulated KL for j in range(self.num_layers): #loss_kl += loss_kld_gaussian_vs_gaussian(means_q[j], logvars_q[j], means_p[j], logvars_p[j], do_sum=False) loss_kl += logprob_gaussian( means_q[j], #.view(num_episodes, -1), logvars_q[j], #.view(num_episodes, -1), zs[j], #.view(num_episodes, -1), do_sum=False) loss_kl += -logprob_gaussian( means_p[j], #.view(num_episodes, -1), logvars_p[j], #.view(num_episodes, -1), zs[j], #.view(num_episodes, -1), do_sum=False) ''' loss ''' # additional loss info info = {} info['kl'] = loss_kl.detach() # return #return img_mean_recon, hpt_mean_recon, None, loss, info #return mean_recons, latents, loss, info return None, latents, loss_kl.detach(), info
def convert_example(example, vocabs, encoding_model='ernie-1.0', feat=None, mode='train', fix_len=20): """Builds model inputs for dependency parsing task.""" word_vocab, feat_vocab, rel_vocab = vocabs if encoding_model == "lstm": word_bos_index = word_vocab.to_indices("[BOS]") word_eos_index = word_vocab.to_indices("[EOS]") else: word_bos_index = word_vocab.to_indices("[CLS]") word_eos_index = word_vocab.to_indices("[SEP]") if feat_vocab: feat_bos_index = feat_vocab.to_indices("[BOS]") feat_eos_index = feat_vocab.to_indices("[EOS]") arc_bos_index, arc_eos_index = 0, 1 rel_bos_index = rel_vocab.to_indices("[BOS]") rel_eos_index = rel_vocab.to_indices("[EOS]") if mode != "test": arcs = list(example["HEAD"]) arcs = [arc_bos_index] + arcs + [arc_eos_index] arcs = np.array(arcs, dtype=int) rels = rel_vocab.to_indices(example["DEPREL"]) rels = [rel_bos_index] + rels + [rel_eos_index] rels = np.array(rels, dtype=int) if encoding_model == "lstm": words = word_vocab.to_indices(example["FORM"]) words = [word_bos_index] + words + [word_eos_index] words = np.array(words, dtype=int) if feat == "pos": feats = feat_vocab.to_indices(example["CPOS"]) feats = [feat_bos_index] + feats + [feat_eos_index] feats = np.array(feats, dtype=int) else: feats = [[feat_vocab.to_indices(token) for token in word] for word in example["FORM"]] feats = [[feat_bos_index]] + feats + [[feat_eos_index]] feats = pad_sequence( [np.array(ids[:fix_len], dtype=int) for ids in feats], fix_len=fix_len) if mode == "test": return words, feats return words, feats, arcs, rels else: words = [[word_vocab.to_indices(char) for char in word] for word in example["FORM"]] words = [[word_bos_index]] + words + [[word_eos_index]] words = pad_sequence( [np.array(ids[:fix_len], dtype=int) for ids in words], fix_len=fix_len) if mode == "test": return [words] return words, arcs, rels
(2) max_length """ max_len_inp_train = max_length(input_tensor_train) max_len_inp_valid = max_length(input_tensor_val) max_len_inp_test = max_length(input_tensor_test) max_len_tgt_train = max_length(target_tensor_train) max_len_tgt_valid = max_length(target_tensor_val) max_len_tgt_test = max_length(target_tensor_test) """ (3) padding sequence """ input_tensor_train = [ pad_sequence(x, max_len_inp_train) for x in input_tensor_train ] target_tensor_train = [ pad_sequence(x, max_len_tgt_train) for x in target_tensor_train ] input_tensor_valid = [ pad_sequence(x, max_len_inp_valid) for x in input_tensor_val ] target_tensor_valid = [ pad_sequence(x, max_len_tgt_valid) for x in target_tensor_val ] input_tensor_test = [ pad_sequence(x, max_len_inp_test) for x in input_tensor_test ]