def draw_p_noise(self, batch_size, edit_dim): rand_draw = GPUVariable(torch.randn(batch_size, edit_dim)) rand_draw = rand_draw / torch.norm(rand_draw, p=2, dim=1).expand( batch_size, edit_dim) rand_norms = (torch.rand(batch_size, 1) * self.norm_max).expand( batch_size, edit_dim) return rand_draw * GPUVariable(rand_norms)
def sample_vMF(self, mu, kappa): """vMF sampler in pytorch. Args: mu (Tensor): of shape (batch_size, 2*word_dim) kappa (Float): controls dispersion. kappa of zero is no dispersion. """ batch_size, id_dim = mu.size() result_list = [] for i in range(batch_size): munorm = mu[i].norm().expand(id_dim) munoise = self.add_norm_noise(munorm, self.norm_eps) if float(mu[i].norm().data.cpu().numpy()) > 1e-10: # sample offset from center (on sphere) with spread kappa w = self._sample_weight(kappa, id_dim) wtorch = GPUVariable(w*torch.ones(id_dim)) # sample a point v on the unit sphere that's orthogonal to mu v = self._sample_orthonormal_to(mu[i]/munorm, id_dim) # compute new point scale_factr = torch.sqrt(GPUVariable(torch.ones(id_dim)) - torch.pow(wtorch,2)) orth_term = v * scale_factr muscale = mu[i] * wtorch / munorm sampled_vec = (orth_term + muscale)*munoise else: rand_draw = GPUVariable(torch.randn(id_dim)) rand_draw = rand_draw / torch.norm(rand_draw, p=2).expand(id_dim) rand_norms = (torch.rand(1) * self.norm_eps).expand(id_dim) sampled_vec = rand_draw*GPUVariable(rand_norms)#mu[i] result_list.append(sampled_vec) return torch.stack(result_list,0)
def test_split(self): input_embeds = GPUVariable(torch.LongTensor([ # batch item 1 [ [1, 2], [2, 3], [5, 6] ], # batch item 2 [ [4, 8], [3, 5], [0, 0] ], ])) input_mask = GPUVariable(torch.FloatTensor([ [1, 1, 1], [1, 1, 0], ])) sb = SequenceBatch(input_embeds, input_mask) elements = sb.split() input_list = [e.values for e in elements] mask_list = [e.mask for e in elements] assert len(input_list) == 3 assert_tensor_equal(input_list[0], [[1, 2], [4, 8]]) assert_tensor_equal(input_list[1], [[2, 3], [3, 5]]) assert_tensor_equal(input_list[2], [[5, 6], [0, 0]]) assert len(mask_list) == 3 assert_tensor_equal(mask_list[0], [[1], [1]]) assert_tensor_equal(mask_list[1], [[1], [1]]) assert_tensor_equal(mask_list[2], [[1], [0]])
def test_reduce_max(self, some_seq_batch): with pytest.raises(ValueError): # should complain about empty sequence SequenceBatch.reduce_max(some_seq_batch) values = GPUVariable( torch.FloatTensor([ [ [1, 2], [4, 5], [4, 4] ], # actual max is in later elements, but shd be suppressed by mask [[0, -4], [43, -5], [-1, -20]], # note that all elements in 2nd dim are negative ])) mask = GPUVariable(torch.FloatTensor([ [1, 0, 0], [1, 1, 0], ])) seq_batch = SequenceBatch(values, mask) result = SequenceBatch.reduce_max(seq_batch) assert_tensor_equal(result, [ [1, 2], [43, -4], ])
def from_sequences(cls, sequences, vocab, min_seq_length=0): """Convert a batch of sequences into a SequenceBatch. Args: sequences (list[list[unicode]]) vocab (WordVocab) min_seq_length (int): enforce that the Tensor representing the SequenceBatch have at least this many columns. Returns: SequenceBatch """ batch_size = len(sequences) if batch_size == 0: seq_length = 0 else: seq_length = max(len(seq) for seq in sequences) # max seq length in batch seq_length = max( seq_length, min_seq_length) # make sure it is at least min_seq_length shape = (batch_size, seq_length) values = np.zeros(shape, dtype=np.int64) # pad with zeros mask = np.zeros(shape, dtype=np.float32) for i, seq in enumerate(sequences): for j, word in enumerate(seq): values[i, j] = vocab.word2index(word) mask[i, j] = 1.0 return SequenceBatch(GPUVariable(torch.from_numpy(values)), GPUVariable(torch.from_numpy(mask)))
def sample_vMF(self, mu, kappa): """vMF sampler in pytorch. Args: mu (Tensor): of shape (batch_size, 2*word_dim) kappa (Float): controls dispersion. kappa of inf is no dispersion. """ batch_size, id_dim = mu.size() result_list = [] for i in range(batch_size): munorm = mu[i].norm().expand(id_dim) # sample offset from center (on sphere) with spread kappa w = self._sample_weight(kappa, id_dim) wtorch = GPUVariable(w * torch.ones(id_dim)) # sample a point v on the unit sphere that's orthogonal to mu v = self._sample_orthonormal_to(mu[i] / munorm, id_dim) # compute new point scale_factr = torch.sqrt( GPUVariable(torch.ones(id_dim)) - torch.pow(wtorch, 2)) orth_term = v * scale_factr muscale = mu[i] * wtorch / munorm sampled_vec = (orth_term + muscale) result_list.append(sampled_vec) return torch.stack(result_list, 0)
def encoder_generate_edits(self, encoder_input): """ Draw uniform random vectors with given norm, and use as edit vector """ source_words = encoder_input.source_words source_word_embeds = self.editor.encoder.token_embedder.embed_seq_batch(source_words) insert_embeds = self.editor.encoder.token_embedder.embed_seq_batch(encoder_input.insert_words) delete_embeds = self.editor.encoder.token_embedder.embed_seq_batch(encoder_input.delete_words) insert_embeds_exact = self.editor.encoder.token_embedder.embed_seq_batch(encoder_input.insert_exact_words) delete_embeds_exact = self.editor.encoder.token_embedder.embed_seq_batch(encoder_input.delete_exact_words) source_encoder_output = self.editor.encoder.source_encoder(source_word_embeds.split()) source_embeds_list = source_encoder_output.combined_states source_embeds = SequenceBatch.cat(source_embeds_list) # the final hidden states in both the forward and backward direction, concatenated source_embeds_final = torch.cat(source_encoder_output.final_states, 1) # (batch_size, hidden_dim) edit_encoded = self.editor.encoder.edit_encoder(insert_embeds, insert_embeds_exact, delete_embeds, delete_embeds_exact) # the random vector is computed as in rand_p_noise (see in edit_encoder) torch.manual_seed(7) batch_size, edit_dim = edit_encoded.size() rand_draw = GPUVariable(torch.randn(batch_size, edit_dim)) rand_draw = rand_draw / torch.norm(rand_draw, p=2, dim=1).expand(batch_size, edit_dim) rand_norms = (torch.rand(batch_size, 1) * self.editor.encoder.edit_encoder.norm_max).expand(batch_size, edit_dim) edit_embed = rand_draw * GPUVariable(rand_norms) agenda = self.editor.encoder.agenda_maker(source_embeds_final, edit_embed) return EncoderOutput(source_embeds, insert_embeds_exact, delete_embeds_exact, agenda)
def test_embed_indices(self, embedder): indices = GPUVariable(torch.LongTensor([ [0, 1], [2, 2], [4, 5], ])) embeds = embedder.embed_indices(indices) assert_tensor_equal(embeds, [ [[1, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0]], [[0, 0, 1, 0, 0, 0], [0, 0, 1, 0, 0, 0]], [[0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 1]], ]) indices = GPUVariable( torch.LongTensor([ [[0, 1], [1, 0]], [[2, 2], [3, 2]], ])) embeds = embedder.embed_indices(indices) assert_tensor_equal(embeds, [ [[[1, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0]], [[0, 1, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0]]], [[[0, 0, 1, 0, 0, 0], [0, 0, 1, 0, 0, 0]], [[0, 0, 0, 1, 0, 0], [0, 0, 1, 0, 0, 0]]], ])
def _drop_seq_batch(self, seq_batch, word_vocab, keep_rate): batch_sz, max_seq_len = seq_batch.values.size() keep = torch.rand(batch_sz, max_seq_len) < keep_rate keep[:,0] = torch.ones(batch_sz, 1) # do not drop start token kept = seq_batch.values * GPUVariable(torch.ByteTensor.long(keep)) unkd = GPUVariable(torch.ByteTensor.long((1 - keep) * word_vocab.word2index(word_vocab.UNK))) values = kept + unkd return SequenceBatch(values, seq_batch.mask)
def clear_cache(self): # Keep empty tuple cached, for SequenceBatch self._cache.clear() self._cache.cache( [tuple()], [ (GPUVariable(torch.zeros(self._embed_dim)), SequenceBatchElement( GPUVariable(torch.zeros(1, self._embed_dim)), GPUVariable(torch.zeros(1))) )])
def _query_embeds(self, states, query_entries): """Given a batch of states, embed the keys and values of each state's query. Args: states (list[MiniWoBState]) Returns: entry_embeds (SequenceBatch): batch x num_keys x (2 * embed_dim) the keys and values concatenated """ fields_batch = [state.fields for state in states] # list[list[list[unicode]]] (batch x num_keys x key length) values_batch = [[word_tokenize(value) for value in fields.values] for fields in fields_batch] keys_batch = [[word_tokenize(key) for key in fields.keys] for fields in fields_batch] # Pad batch_size = len(fields_batch) max_num_fields = max(len(values) for values in values_batch) max_num_fields = max(max_num_fields, 1) # Ensure non-empty mask = torch.ones(batch_size, max_num_fields) assert len(keys_batch) == len(values_batch) == len(mask) for keys, values, submask in zip(keys_batch, values_batch, mask): assert len(keys) == len(values) if len(keys) < max_num_fields: submask[len(keys):] = 0. keys.extend( [[UtteranceVocab.PAD] for _ in xrange( max_num_fields - len(keys))]) values.extend( [[UtteranceVocab.PAD] for _ in xrange( max_num_fields - len(values))]) # Flatten to list[list[unicode]] (batch * num_keys) x key length keys_batch = flatten(keys_batch) values_batch = flatten(values_batch) # Embed and mask (batch * num_keys) x embed_dim key_embeds, _ = self._utterance_embedder(keys_batch) key_embeds = key_embeds.view( batch_size, max_num_fields, self._utterance_embedder.embed_dim) value_embeds, _ = self._utterance_embedder(values_batch) value_embeds = value_embeds.view( batch_size, max_num_fields, self._utterance_embedder.embed_dim) key_embeds = SequenceBatch(key_embeds, GPUVariable(mask)) value_embeds = SequenceBatch(value_embeds, GPUVariable(mask)) entry_embed_values = torch.cat( [key_embeds.values, value_embeds.values], 2) entry_embeds = SequenceBatch(entry_embed_values, key_embeds.mask) return entry_embeds
def some_seq_batch(self): values = GPUVariable(torch.FloatTensor([ [[1, 2], [4, 5], [4, 4]], [[0, 4], [43, 5], [-1, 20]], [[-1, 20], [43, 5], [0, 0]], ])) mask = GPUVariable(torch.FloatTensor([ [1, 1, 0], [1, 0, 0], [0, 0, 0], ])) return SequenceBatch(values, mask)
def forward(self, dom_elements, alignment_fields): """Computes the alignments. An element aligns iff elem.text in utterance and elem.text != "" Args: dom_elements (list[list[DOMElement]]): batch of set of DOM elements (padded to be unragged) alignment_fields (list[Fields]): batch of fields. Alignments computed with the values of the fields. Returns: Variable[FloatTensor]: batch x num_elems x embed_dim The aligned embeddings per DOM element """ batch_size = len(dom_elements) assert batch_size > 0 num_dom_elems = len(dom_elements[0]) assert num_dom_elems > 0 # mask batch_size x num_dom_elems x num_buckets alignments = np.zeros( (batch_size, num_dom_elems, self._num_buckets)).astype(np.float32) # Calculate the alignment matrix between elems and fields for batch_idx in xrange(len(dom_elements)): for dom_idx, dom in enumerate(dom_elements[batch_idx]): keys = alignment_fields[batch_idx].keys vals = alignment_fields[batch_idx].values for key, val in zip(keys, vals): if dom.text and dom.text in val: align_idx = self._keys2index.word2index(key) alignments[batch_idx, dom_idx, align_idx] = 1. # Flatten alignments for SequenceBatch # (batch * num_dom_elems) x num_buckets alignments = GPUVariable( torch.from_numpy( alignments.reshape( (batch_size * num_dom_elems, self._num_buckets)))) # (batch * num_dom_elems) x num_buckets x embed_dim expanded_alignment_embeds = self._alignment_embeds.expand( batch_size * num_dom_elems, self._num_buckets, self.embed_dim) alignment_seq_batch = SequenceBatch(expanded_alignment_embeds, alignments, left_justify=False) # (batch * num_dom_elems) x alignment_embed_dim alignment_embeds = SequenceBatch.reduce_sum(alignment_seq_batch) return alignment_embeds.view(batch_size, num_dom_elems, self.embed_dim)
def from_sequences(cls, sequences, vocab_or_vocabs, min_seq_length=0, volatile=False): """Convert a batch of sequences into a SequenceBatch. Args: sequences (list[list[unicode]]) vocab_or_vocabs (WordVocab|list[WordVocab]): either a single vocab, or a list of vocabs, one per sequence min_seq_length (int): enforce that the Tensor representing the SequenceBatch have at least this many columns. volatile (bool): whether to make Variables volatile (don't track grads) Returns: SequenceBatch """ # determine dimensions batch_size = len(sequences) if batch_size == 0: seq_length = 0 else: seq_length = max(len(seq) for seq in sequences) # max seq length in batch seq_length = max( seq_length, min_seq_length) # make sure it is at least min_seq_length shape = (batch_size, seq_length) # set up vocabs if isinstance(vocab_or_vocabs, list): vocabs = vocab_or_vocabs assert len(vocabs) == batch_size else: # duplicate a single vocab assert isinstance(vocab_or_vocabs, Vocab) vocabs = [vocab_or_vocabs] * batch_size # build arrays values = np.zeros(shape, dtype=np.int64) # pad with zeros mask = np.zeros(shape, dtype=np.float32) for i, (seq, vocab) in enumerate(izip(sequences, vocabs)): for j, word in enumerate(seq): values[i, j] = vocab.word2index(word) mask[i, j] = 1.0 return SequenceBatch( GPUVariable(torch.from_numpy(values), volatile=volatile), GPUVariable(torch.from_numpy(mask), volatile=volatile))
def test_gated_update(): h = GPUVariable(torch.FloatTensor([ [1, 2, 3], [4, 5, 6], ])) h_new = GPUVariable(torch.FloatTensor([ [-1, 2, 3], [4, 8, 0], ])) update = GPUVariable(torch.FloatTensor([[0], [1] ])) # only update the second row out = gated_update(h, h_new, update) assert_tensor_equal(out, [[1, 2, 3], [4, 8, 0]])
def add_norm_noise(self, munorm, eps): """ KL loss is - log(maxvalue/eps) cut at maxvalue-eps, and add [0,eps] noise. """ trand = torch.rand(1).expand(munorm.size())*eps return (self.normclip(munorm) + GPUVariable(trand))
def __new__(cls, candidate_selector, candidate_probs, candidates=None): """Select candidates. Args: candidate_selector (Callable[[Variable[FloatTensor]], list[int]]): takes candidate_probs and returns a batch of selections (integers) candidate_probs (Variable[FloatTensor]): of shape (batch_size, num_candidates) candidates (list[list[object]]): a batch of candidate sets, where each set is a list of candidates """ indices = candidate_selector(candidate_probs) if candidates is not None: assert len(candidates) == len(indices) selected = [thing_list[index] for thing_list, index in zip( candidates, indices)] else: selected = None indices = GPUVariable(torch.LongTensor(indices)) # (batch_size,) probs = torch.gather(candidate_probs, 1, torch.unsqueeze(indices, 1)) # (batch_size, 1) probs = torch.squeeze(probs, 1) # (batch_size) cls._check_shapes(selected, probs, indices) self = super(Selection, cls).__new__(cls, selected, probs, indices, candidates, candidate_probs) return self
def forward(self, memory_cells, query): """Performs sentinel attention with a sentinel of 0. Returns the AttentionOutput where the weights do not include the sentinel weight. Args: memory_cells (Variable[FloatTensor]): batch x num_cells x cell_dim query (Variable[FloatTensor]): batch x query_dim Returns: AttentionOutput: weights do not include sentinel weights """ batch_size, _, cell_dim = memory_cells.values.size() sentinel = self._sentinel_embed.expand(batch_size, 1, cell_dim) sentinel_mask = GPUVariable(torch.ones(batch_size, 1)) cell_values_with_sentinel = torch.cat([memory_cells.values, sentinel], 1) cell_masks_with_sentinel = torch.cat( [memory_cells.mask, sentinel_mask], 1) cells_with_sentinel = SequenceBatch(cell_values_with_sentinel, cell_masks_with_sentinel, left_justify=False) attention_output = super(SentinelAttention, self).forward(cells_with_sentinel, query) weights_with_sentinel = attention_output.weights # TODO: Bring this line in after torch v0.2.0 # weights_without_sentinel = weights_with_sentinel[batch_size, :-1] # attention_output = AttentionOutput( # weights=weights_without_sentinel, context=attention_output.context) return attention_output
def __init__(self, num_embeddings, embedding_dim, initial_embeddings, **kwargs): """Constructs TrainFlagEmbedding with embeddings initialized with initial_embeddings. Args: num_embeddings (int) embedding_dim (int) initial_embeddings (np.array): (num_embeddings, embedding_dim) trainable (bool): if False, weights matrix will not change. (default True) kwargs: all other supported keywords in torch.nn.Embeddings. """ super(TrainFlagEmbedding, self).__init__() trainable = kwargs.pop("trainable", True) self._trainable = trainable if trainable: embedding = Embedding( num_embeddings, embedding_dim, **kwargs) embedding.weight.data.set_( torch.from_numpy(initial_embeddings)) self._embedding = embedding self._weight = embedding.weight else: self._weight = GPUVariable( torch.from_numpy(initial_embeddings))
def forward(self, states): states = GPUVariable( torch.FloatTensor( np.stack(state.goal.all_but_cum_reward for state in states))) hidden = F.relu(self._layer1(states)) output = F.relu(self._layer2(hidden)) return output
def generate_edits(self, encoder_input, norm): """ Draw uniform random vectors with given norm, and use as edit vector """ source_words = encoder_input.source_words source_word_embeds = self.token_embedder.embed_seq_batch(source_words) insert_embeds = self.token_embedder.embed_seq_batch( encoder_input.insert_words) delete_embeds = self.token_embedder.embed_seq_batch( encoder_input.delete_words) insert_embeds_exact = self.token_embedder.embed_seq_batch( encoder_input.insert_exact_words) delete_embeds_exact = self.token_embedder.embed_seq_batch( encoder_input.delete_exact_words) source_encoder_output = self.source_encoder(source_word_embeds.split()) source_embeds_list = source_encoder_output.combined_states source_embeds = SequenceBatch.cat(source_embeds_list) # the final hidden states in both the forward and backward direction, concatenated source_embeds_final = torch.cat(source_encoder_output.final_states, 1) # (batch_size, hidden_dim) edit_encoded = self.edit_encoder(insert_embeds, delete_embeds) rand_vec = torch.randn(edit_encoded.shape()) edit_embed = GPUVariable( rand_vec / torch.norm(rand_vec, 2, dim=1).expand_as(rand_vec) * norm) agenda = self.agenda_maker(source_embeds_final, edit_embed) return EncoderOutput(source_embeds, insert_embeds_exact, delete_embeds_exact, agenda)
def forward(self, states): cum_rewards = torch.LongTensor( np.stack(state.goal.cum_reward for state in states)) cum_rewards = cum_rewards.view(-1, 1) states = GPUVariable( torch.FloatTensor( np.stack(state.goal.all_but_cum_reward for state in states))) reward_one_hot = torch.FloatTensor(cum_rewards.shape[0], 5) reward_one_hot.zero_() reward_one_hot.scatter_(1, cum_rewards, 1) reward_one_hot = GPUVariable(reward_one_hot) reward_embed = F.relu(self._reward_embedder(reward_one_hot)) state_embed = F.relu(self._layer1(states)) hidden = torch.cat([state_embed, reward_embed], dim=1) output = F.relu(self._layer2(hidden)) return output
def forward(self, encoder_input, train_mode=True): """Encode. Args: encoder_input (EncoderInput) Returns: EncoderOutput, cost (0 in this case) """ context_agenda, all_channel_embeds = self.ctx_code_out(encoder_input) if self.use_vae and train_mode: if self.use_target: target_agenda = self.target_out(encoder_input) vae_agenda, vae_loss = self.vae_wrap( context_agenda + target_agenda, True) else: vae_agenda, vae_loss = self.vae_wrap(context_agenda, True) else: vae_agenda = context_agenda / torch.sqrt( torch.sum(context_agenda**2.0, dim=1)).expand_as(context_agenda) vae_loss = GPUVariable(torch.zeros(1)) return EncoderOutput(all_channel_embeds, vae_agenda, encoder_input.token_embedder), vae_loss
def _interpolate_examples(self, ex_a, ex_b): """ Args: [unicode], [unicode] Returns: [[unicode]] """ examples = [ex_a, ex_b] enc_input = self.encoder.preprocess(examples) agenda, _ = self.encoder(enc_input) agenda_ = agenda.data.cpu().numpy() agendas = self._interpolate_vectors(agenda_[0, :], agenda_[1, :]) samples = [] for i, ag_ in enumerate(agendas): ag = GPUVariable(torch.FloatTensor(ag_.reshape(1, self.agenda_dim))) # beam, _ = self.sample_decoder.decode( # [0], ag, beam_size=1, prefix_hints=[[]]) beam, _ = self.beam_decoder.decode([0], ag, weighted_value_estimators=[], beam_size=1, prefix_hints=[[]], sibling_penalty=0) samples.append(beam[0][0]) return samples
def seq_batch_noise(self, seq_batch, draw_noise): """ Returns a noisy version of seq_batch, in which every vector is noisy and unit norm. :param seq_batch(SequenceBatch): a sequence batch of elements :return: noisy version of seq-batch """ values = seq_batch.values mask = seq_batch.mask batch_size, max_edits, w_embed_size = values.size() new_values = GPUVariable( torch.from_numpy( np.zeros((batch_size, max_edits, w_embed_size), dtype=np.float32))) m_expand = mask.unsqueeze(2).expand(batch_size, max_edits, w_embed_size) for max_edit in range(max_edits): phint = self.sample_vMF(values[:, max_edit, :], self.noise_scaler) prand = self.draw_p_noise(batch_size, w_embed_size) new_values[:, max_edit, :] = phint * m_expand[:, max_edit, :] + prand * ( 1 - m_expand[:, max_edit, :] ) return SequenceBatch(values=new_values * draw_noise, mask=mask)
def test_embed(self): sequences = [ [], [1, 2, 3], [3, 3], [2] ] vocab = SimpleVocab([0, 1, 2, 3, 4]) indices = SequenceBatch.from_sequences(sequences, vocab) embeds = GPUVariable(torch.FloatTensor([ [0, 0], [2, 2], # 1 [3, 4], # 2 [-10, 1], # 3 [11, -1] # 4 ])) embedded = SequenceBatch.embed(indices, embeds) correct = np.array([ [[0, 0], [0, 0], [0, 0]], [[2, 2], [3, 4], [-10, 1]], [[-10, 1], [-10, 1], [0, 0]], [[3, 4], [0, 0], [0, 0]] ], dtype=np.float32) assert_tensor_equal(embedded.values, correct)
def _variable(self, v): batch_size = int(v.size()[0]) indices = np.expand_dims(np.arange(batch_size), 1) # (batch_size, 1) dup_indices = np.tile(indices, (1, self.beam_size)) # (batch_size, beam_size) dup_indices = dup_indices.flatten() # (batch_size * beam_size) dup_indices = GPUVariable(torch.from_numpy(dup_indices)) return torch.index_select(v, 0, dup_indices)
def _pad_elements(self, dom_elems): """Takes a batch of dom element lists. Returns the batch with pads so that each batch is the same length, and masks. Args: dom_elems (list[list[DOMElement]]): unpadded batch Returns: list[list[DOMElement]], Variable[FloatTensor]: batch x num_elems """ # Pad everything to be the same as longest list num_elems = max(len(dom_list) for dom_list in dom_elems) mask = torch.ones(len(dom_elems), num_elems) for dom_list, submask in zip(dom_elems, mask): # Avoid empty slice torch errors if len(dom_list) < num_elems: submask[len(dom_list): num_elems] = 0. dom_list.extend( [DOMElementPAD()] * (num_elems - len(dom_list))) # TODO: Get rid of these hack. # TODO(kelvin): WARNING: this hack also means that we cannot ATTEND to these items for i, elem in enumerate(dom_list): # never click text elements if elem.tag == "t": submask[i] = 0. return dom_elems, GPUVariable(mask)
def test_log_sum_exp(self): values = GPUVariable(torch.FloatTensor([ [0, 1, -2, -3], [-2, -5, 1, 0], ])) mask = GPUVariable(torch.FloatTensor([ [1, 1, 1, 0], [1, 1, 0, 0], ])) seq_batch = SequenceBatch(values, mask, left_justify=False) result = SequenceBatch.log_sum_exp(seq_batch) correct = [1.3490122167681864, -1.9514126484262577] assert_tensor_equal(result, correct)
def forward(self, states): # Normal embeddings states = GPUVariable( torch.FloatTensor(np.stack(state.goal.numpy() for state in states))) hidden = F.relu(self._layer1(states)) output = F.relu(self._layer2(hidden)) return output