def forward_i(self, data, iword_indicator, iword_numerals, iword_numeral_length): v = LT(data) v = v.cuda() if self.is_cuda else v embed = self.ivectors(v) # B x T x F if iword_numerals.size()[0] == 0: return embed iword_numerals = iword_numerals.cuda( ) if self.is_cuda else iword_numerals iword_numeral_length = iword_numeral_length.cuda( ) if self.is_cuda else iword_numeral_length iword_numeral_length_permuted, perm_idx = iword_numeral_length.sort( 0, descending=True) iword_numerals_permuted = iword_numerals[perm_idx] packed_input = pack_padded_sequence(iword_numerals_permuted, iword_numeral_length_permuted, batch_first=True) invert_perm_idx = self.invert_permutation(perm_idx) # assert t.equal(iword_numerals_permuted[invert_perm_idx], iword_numerals) # assert iword_indicator.sum() == iword_numerals.size()[0] if self.scheme == 'LSTM': _, (hn, cn) = self.digital_RNN_i(packed_input) else: _, hn = self.digital_RNN_i(packed_input) # TODO: how to check? embed[iword_indicator] = hn.squeeze(0)[invert_perm_idx] return embed
def forward_o(self, data, owords_indicator, owords_numerals, owords_numeral_length): v = LT(data) v = v.cuda() if self.ovectors.weight.is_cuda else v embed = self.ovectors(v) if owords_numerals.size()[0] == 0: return embed owords_numerals = owords_numerals.cuda( ) if self.is_cuda else owords_numerals owords_numeral_length = owords_numeral_length.cuda( ) if self.is_cuda else owords_numeral_length owords_numeral_length_permuted, perm_idx = owords_numeral_length.sort( 0, descending=True) owords_numerals_permuted = owords_numerals[perm_idx] packed_input = pack_padded_sequence(owords_numerals_permuted, owords_numeral_length_permuted, batch_first=True) invert_perm_idx = self.invert_permutation(perm_idx) assert t.equal(owords_numerals_permuted[invert_perm_idx], owords_numerals) assert owords_indicator.sum() == owords_numerals.size()[0] if self.scheme == 'LSTM': _, (hn, cn) = self.digital_RNN_o(packed_input) else: _, hn = self.digital_RNN_o(packed_input) embed[owords_indicator] = hn.squeeze(0)[invert_perm_idx] return embed
def decode(self, encoder_outputs: Tensor, encoder_output_lengths: Tensor) -> Tensor: """ Decode encoder_outputs. Args: encoder_outputs (torch.FloatTensor): A output sequence of encoder. `FloatTensor` of size ``(batch, seq_length, dimension)`` encoder_output_lengths (torch.LongTensor): The length of encoder outputs. ``(batch)`` Returns: * predicted_log_probs (torch.FloatTensor): Log probability of model predictions. """ hidden_states, attn = None, None outputs = list() batch_size = encoder_outputs.size(0) input_var = LongTensor([self.sos_id] * batch_size).view(batch_size, 1) if torch.cuda.is_available(): input_var = input_var.cuda() for di in range(self.max_length): step_outputs, hidden_states, attn = self.forward_step( input_var=input_var, hidden_states=hidden_states, encoder_outputs=encoder_outputs, attn=attn, ) input_var = step_outputs.topk(1)[1] outputs.append(input_var) outputs = torch.stack(outputs, dim=1).squeeze(2) return outputs
def predict(self, masked_sentence, fold_case=False): """Predict the masked word in `masked_sentence`. Note that the output probability distribution is unnormalized. Parameters ---------- masked_sentence : str Sentence with one token masked out fold_case : bool Whether or not to average predictions over different casings. Returns ------- pd.DataFrame The unnormalized probability distribution over BERT's vocab of each word in the masked position. """ tokens = START + self.tokenize(masked_sentence) + END target_index = tokens.index(MASK) token_ids = self.tokens_to_ids(tokens) tensor = LongTensor(token_ids).unsqueeze(0) if self.gpu: tensor = tensor.cuda() probs = self.model(tensor)[0][0, target_index] if self.gpu: probs = probs.cpu() probs = pd.DataFrame(probs.data.numpy(), index=self.index, columns=["p"]) if fold_case: probs.index = probs.index.str.lower() return probs.groupby("word").mean() return probs
def _computer_score(self, emissions: torch.Tensor, tags: torch.LongTensor, mask: torch.ByteTensor) -> torch.Tensor: # batch second assert emissions.dim() == 3 and tags.dim() == 2 assert emissions.shape[:2] == tags.shape assert emissions.size(2) == self.num_tags assert mask.shape == tags.shape assert mask[0].all() tags.cuda() # 62 32 seq_length, batch_size = tags.shape mask = mask.float().cuda() # self.start_transitions start 到其他tag(不包含end)的得分 score = self.start_transitions[ tags[0]] # tag[0].shape = [32] 每一句的第一个单词,start到其它tag的得分,随机给一个值 # code.interact(local = locals()) score += emissions[0, torch.arange(batch_size), tags[0]] # 计算所有句子中第一个单词的发射的得分 for i in range(1, seq_length): # [1,2,...,seq_length-1] # if mask[i].sum() == 0: # break # transitions[i][j] 表示从第i个tag 到第j个tag的分数 score += self.transitions[tags[i - 1], tags[i]] * mask[i] # Aij score += emissions[i, torch.arange(batch_size), tags[i]] * mask[i] # P{i,y_j} # 这里是为了获取每一个样本最后一个词的tag。 # shape: (batch_size,) 每一个batch 的真实长度 # .long 变成整型 .sum(dim=0) 计算每个句子中一共有多少个字 seq_ends = mask.long().sum(dim=0) - 1 # 每个样本最后一个词的tag last_tags = tags[seq_ends, torch.arange(batch_size)] # shape: (batch_size,) 每一个样本到最后一个词的得分加上之前的score score += self.end_transitions[last_tags] return score
def wrap(b: torch.LongTensor): if b is None: return b if len(b.size()) > 1 and isinstance(b, list): b = torch.stack(b, 0) b = b.contiguous() if self.cuda: b = b.cuda() b = Variable(b, volatile=self.volatile, requires_grad=False) return b
def forward_o(self, data, owords_indicator, owords_numerals): v = LT(data) v = v.cuda(self.ivectors.weight.device) if self.is_cuda else v embed = self.ovectors(v) if owords_numerals.size()[0] == 0: return embed numeral_embed = self.get_numeral_embed_batch(owords_numerals) # [num_of_numerals x prototype_size ] x [prototype_size x embedding_size] => [num_of_numeral x embedding_size] embed[owords_indicator] = numeral_embed return embed
def forward_o(self, data, owords_indicator, owords_numerals): v = LT(data) v = v.cuda(self.ivectors.weight.device) if self.is_cuda else v embed = self.ovectors(v) if owords_numerals.size()[0] == 0 or self.gmm_posterior is None: return embed prototype_weights = self.get_numeral_embed_weights_batch( owords_numerals) # [prototype_size x num_of_numerals] numeral_embed = t.matmul(prototype_weights, self.oprototypes_embeddings) # [num_of_numerals x prototype_size ] x [prototype_size x embedding_size] => [num_of_numeral x embedding_size] embed[owords_indicator] = numeral_embed return embed
def forward( self, entities: torch.LongTensor, # [e1, ..., en] : [batch, ent_n] relations: torch.LongTensor ) -> torch.FloatTensor: # [s1, ..., sm] : [batch, rel_size] assert entities.size()[-1] == relations.size( )[-1] - 1, "size entity list should match relation list" if torch.cuda.is_available(): entities, relations = entities.cuda(), relations.cuda() ent_embed = self.e_embedding(entities) # [batch, len_ent, e_embed] rel_embed = self.r_embedding( relations) # [batch, len_ent - 1, r_embed] null_to_cat = self.null.repeat(relations.size()[0], 1, 1) rel_embed = self.concat([rel_embed, null_to_cat], dim=1) ent_proj = self.W_eh(ent_embed) rel_proj = self.W_rh(rel_embed) rnn_out, _ = self.RNN(ent_proj + rel_proj) return self.sim_score(rnn_out, self.r_embedding.weight)
def forward_i(self, data, iword_indicator, iword_numerals): v = LT(data) v = v.cuda(self.ivectors.weight.device) if self.is_cuda else v embed = self.ivectors(v) if iword_numerals.size()[0] == 0: return embed # prototype_weights = self.get_numeral_embed_weights_batch(iword_numerals) # [ num_of_numerals x prototype_size] # numeral_embed = t.matmul(prototype_weights, self.iprototypes_embeddings) # [num_of_numerals x prototype_size ] x [prototype_size x embedding_size] => [num_of_numeral x embedding_size] numeral_embed = self.get_numeral_embed_batch(iword_numerals) embed[iword_indicator] = numeral_embed return embed
def next_target(self, mode, cuda, device_id): if mode == TRAIN_MODE: target_id = self.train.next_items(1)[0] elif mode == DEV_MODE: target_id = self.dev.next_items(1)[0] elif mode == TEST_MODE: target_id = self.test.next_items(1)[0] _1d_feature, _2d_feature = get_features(target_id) contact_map = read_contact_map(target_id) # Convert to FloatTensors _1d_feature = FloatTensor(np.expand_dims(_1d_feature, 0)) _2d_feature = FloatTensor(np.expand_dims(_2d_feature, 0)) contact_map = LongTensor(np.expand_dims(contact_map, 0)) if cuda: _1d_feature = _1d_feature.cuda(device_id) _2d_feature = _2d_feature.cuda(device_id) contact_map = contact_map.cuda(device_id) return target_id, _1d_feature, _2d_feature, contact_map
def forward(self, x: torch.LongTensor): [batch_size, sent_len] = x.size() padding_ = Variable(torch.LongTensor([self._c_pad] * self._context)) x = torch.cat((padding_, x, padding_), dim=1) if self.gpu: x = x.cuda() embedding = torch.stack([ embed(x[:, i:(i + sent_len)]) for i, embed in enumerate(self.embedding) ], dim=3) multiple = [] for i in range(2 * self._context + 1): multiple.append(embedding[:, :, :, self._context] * embedding[:, :, :, i]) multiple = torch.sum(multiple, dim=3) context = [ self.context.view(1, self._embed_size, self._attn) for _ in range(batch_size) ] context = torch.cat(context, 0).contiguous() multi_rep = F.tanh(self.attn_linear(multiple)) alpha = torch.bmm( multi_rep, context) # [batch, len, embed] x [batch, embed, attn] alpha = torch.softmax(alpha, 1) # [batch, len, attn] alpha = torch.transpose(alpha, 1, 2) # [batch, attn, len] multiple = torch.bmm(alpha, multiple).view(batch_size, -1) # [batch, attn x embed] return self.classifier(multiple)
def learn(self, state, action, reward, next_state, done): # Memorize experience self.memory.append((state, action, reward, next_state, done)) self.episode_reward += reward self.total_steps += 1 # End of episode if done: self.num_episode += 1 # Episode counter self.logger.log_dict( self.total_steps, { 'episode_reward': self.episode_reward, 'memory_size': len(self.memory), }) self.epsilons.append(self.epsilon) # Log epsilon value # Epislone decay self.epsilon = max(self.epsilon * self.epsilon_decay, self.epsilon_end) self.episode_reward = 0 # Periodically update target network with current one if self.num_episode % self.target_update_interval == 0: self.target_qnetwork.load_state_dict(self.qnetwork.state_dict()) # Train when we have enough experiences in the replay memory if len(self.memory) > self.batch_size: # Sample batch of experience batch = random.sample(self.memory, self.batch_size) state, action, reward, next_state, done = zip(*batch) action = LongTensor(action) reward = Tensor(reward) done = Tensor(done) if torch.cuda.is_available(): action = action.cuda() reward = reward.cuda() done = done.cuda() # Q-value for current state given current action q_values = self.qnetwork(state) q_value = q_values.gather(1, action.unsqueeze(1)).squeeze(1) # Compute the TD target next_q_values = self.target_qnetwork(next_state) next_q_value = next_q_values.max(1)[0] td_target = reward + self.gamma * next_q_value * (1 - done) # Optimize quadratic loss loss = (q_value - td_target.detach()).pow(2).mean() self.optimizer.zero_grad() loss.backward() self.optimizer.step() self.logger.log_dict( self.total_steps, { 'dqn/loss': loss.data.cpu().numpy(), 'dqn/reward': reward.mean().data.cpu().numpy(), })
def forward_2(self, nodes): v = LT(nodes.data.numpy()) v = v.cuda() if self.vectors_2.weight.is_cuda else v return self.vectors_2(v)
def get_activations(ims, model, batch_size=50, dims=2048, cuda=False, verbose=False): """Calculates the activations of the pool_3 layer for all images. Params: -- files : List of image files paths -- model : Instance of inception model -- batch_size : Batch size of images for the model to process at once. Make sure that the number of samples is a multiple of the batch size, otherwise some samples are ignored. This behavior is retained to match the original FID score implementation. -- dims : Dimensionality of features returned by Inception -- cuda : If set to True, use GPU -- verbose : If set to True and parameter out_step is given, the number of calculated batches is reported. Returns: -- A numpy array of dimension (num images, dims) that contains the activations of the given tensor when feeding inception with the query tensor. """ model.eval() # if ims.size(0) % batch_size != 0: # print(('Warning: number of images is not a multiple of the ' # 'batch size. Some samples are going to be ignored.')) # if batch_size > ims.size(0): # print(('Warning: batch size is bigger than the data size. ' # 'Setting batch size to data size')) # batch_size = ims.size(0) n_batches = ims.size(0) // batch_size n_used_imgs = n_batches * batch_size pred_arr = np.empty((n_used_imgs, dims)) for i in range(n_batches): if verbose: print('\rPropagating batch %d/%d' % (i + 1, n_batches), end='', flush=True) start = i * batch_size end = start + batch_size cur_index = LongTensor(range(start, end)) if cuda: cur_index = cur_index.cuda() batch = index_select(ims, 0, Variable(cur_index)) pred = model(batch)[0] # If model output is not scalar, apply global spatial average pooling. # This happens if you choose a dimensionality not equal 2048. if pred.shape[2] != 1 or pred.shape[3] != 1: pred = adaptive_avg_pool2d(pred, output_size=(1, 1)) pred_arr[start:end] = pred.cpu().data.numpy().reshape(batch_size, -1) if verbose: print(' done') return pred_arr
def forward(self, # type: ignore tokens: Dict[str, torch.LongTensor], verb_indicator: torch.LongTensor, tags: torch.LongTensor = None, training: bool = False, # added by ph to make function consistent with other model metadata: List[Dict[str, Any]] = None) -> Dict[str, torch.Tensor]: """ Parameters ---------- tokens : Dict[str, torch.LongTensor], required The output of ``TextField.as_array()``, which should typically be passed directly to a ``TextFieldEmbedder``. This output is a dictionary mapping keys to ``TokenIndexer`` tensors. At its most basic, using a ``SingleIdTokenIndexer`` this is: ``{"tokens": Tensor(batch_size, num_tokens)}``. This dictionary will have the same keys as were used for the ``TokenIndexers`` when you created the ``TextField`` representing your sequence. The dictionary is designed to be passed directly to a ``TextFieldEmbedder``, which knows how to combine different word representations into a single vector per token in your input. verb_indicator: torch.LongTensor, required. An integer ``SequenceFeatureField`` representation of the position of the verb in the sentence. This should have shape (batch_size, num_tokens) and importantly, can be all zeros, in the case that the sentence has no verbal predicate. tags : torch.LongTensor, optional (default = None) A torch tensor representing the sequence of integer gold class labels of shape ``(batch_size, num_tokens)`` metadata : ``List[Dict[str, Any]]``, optional, (default = None) metadata containing the original words in the sentence and the verb to compute the frame for, under 'words' and 'verb' keys, respectively. training : added by ph to make function consistent with other model - does nothing Returns ------- An output dictionary consisting of: logits : torch.FloatTensor A tensor of shape ``(batch_size, num_tokens, tag_vocab_size)`` representing unnormalised log probabilities of the tag classes. class_probabilities : torch.FloatTensor A tensor of shape ``(batch_size, num_tokens, tag_vocab_size)`` representing a distribution of the tag classes per word. loss : torch.FloatTensor, optional A scalar loss to be optimised. """ # added by ph tokens['tokens'] = tokens['tokens'].cuda() verb_indicator = verb_indicator.cuda() if tags is not None: tags = tags.cuda() embedded_text_input = self.embedding_dropout(self.text_field_embedder(tokens)) mask = get_text_field_mask(tokens) embedded_verb_indicator = self.binary_feature_embedding(verb_indicator.long()) # Concatenate the verb feature onto the embedded text. This now # has shape (batch_size, sequence_length, embedding_dim + binary_feature_dim). embedded_text_with_verb_indicator = torch.cat([embedded_text_input, embedded_verb_indicator], -1) batch_size, sequence_length, _ = embedded_text_with_verb_indicator.size() encoded_text = self.encoder(embedded_text_with_verb_indicator, mask) logits = self.tag_projection_layer(encoded_text) reshaped_log_probs = logits.view(-1, self.num_classes) class_probabilities = F.softmax(reshaped_log_probs, dim=-1).view([batch_size, sequence_length, self.num_classes]) output_dict = {"logits": logits, "class_probabilities": class_probabilities, "mask": mask} # We need to retain the mask in the output dictionary # so that we can crop the sequences to remove padding # when we do viterbi inference in self.decode. if tags is not None: loss = sequence_cross_entropy_with_logits(logits, tags, mask, label_smoothing=self._label_smoothing) output_dict["loss"] = loss # added by ph output_dict['softmax_3d'] = class_probabilities.detach().cpu().numpy() return output_dict
def forward_o(self, data): v = LT(data) v = v.cuda() if self.ovectors.weight.is_cuda else v return self.ovectors(v)
def forward_o(self, data): v = LT(data) v = v.cuda() if self.ovectors.weight.is_cuda else v return t.matmul(self.sm(self.ovectors(v)), t.transpose(self.oW, 1, 0))
def get_batches(self, enable_cuda): """Create batches from data in class. Args: enable_cuda (bool): cuda batches or not Returns: list of batches """ # Sort lines by the length of the English sentences sorted_lengths = [[ len(x), len(y), self.word_positions(x), self.word_positions(y), x, y ] for x, y in zip(self.lines_e, self.lines_f)] sorted_lengths.sort() batches = [] # Go through data in steps of batch size for i in range(0, len(sorted_lengths) - self.batch_size, self.batch_size): max_french = max( [x[1] for x in sorted_lengths[i:i + self.batch_size]]) max_english = max( [x[0] for x in sorted_lengths[i:i + self.batch_size]]) batch_french = LongTensor(self.batch_size, max_french) batch_english = LongTensor(self.batch_size, max_english) batch_english_pos = LongTensor(self.batch_size, max_english) batch_french_pos = LongTensor(self.batch_size, max_french) for j, data in enumerate(sorted_lengths[i:i + self.batch_size]): # Map words to indices and pad with EOS tag fline = self.pad_list(data[5], False, max_french, pad=self.dict_f.word2index['</s>']) eline = self.pad_list(data[4], True, max_english, pad=self.dict_e.word2index['</s>']) batch_french[j, :] = LongTensor(fline) batch_english[j, :] = LongTensor(eline) e_pos = data[2] + [data[2][-1]] * (max_english - len(data[2])) f_pos = data[3] + [data[3][-1]] * (max_french - len(data[3])) batch_english_pos[j, :] = LongTensor(e_pos) batch_french_pos[j, :] = LongTensor(f_pos) batch_english = Variable(batch_english) batch_english_pos = Variable(batch_english_pos) batch_french = Variable(batch_french) batch_french_pos = Variable(batch_french_pos) if enable_cuda: batch_english = batch_english.cuda() batch_english_pos = batch_english_pos.cuda() batch_french = batch_french.cuda() batch_french_pos = batch_french_pos.cuda() batches.append((batch_english, batch_english_pos, batch_french)) random.shuffle(batches) return batches
def learn(self, state, action, reward, next_state, done): # Memorize experience self.memory.append((state, action, reward, next_state, done)) self.episode_reward += reward self.total_steps += 1 if len(self.priorities) > 0: max_priority = np.max(self.priorities) else: max_priority = 1.0 self.priorities.append(max_priority) # End of episode if done: self.num_episode += 1 # Episode counter self.logger.log_dict( self.total_steps, { 'episode_reward': self.episode_reward, 'memory_size': len(self.memory), }) self.epsilons.append(self.epsilon) # Log epsilon value # Epislon decay self.epsilon = max(self.epsilon * self.epsilon_decay, self.epsilon_end) self.episode_reward = 0 # Periodically update target network with current one if self.num_episode % self.target_update_interval == 0: self.target_qnetwork.load_state_dict(self.qnetwork.state_dict()) # Train when we have enough experiences in the replay memory if len(self.memory) > self.batch_size: prios = np.array(self.priorities) probs = prios**self.alpha probs /= probs.sum() # Sample batch of experience indices = np.random.choice(len(self.memory), self.batch_size, p=probs) batch = [self.memory[idx] for idx in indices] state, action, reward, next_state, done = zip(*batch) # Importance sampling total = len(self.memory) weights = (total * probs[indices])**(-self.beta) weights /= weights.max() weights = np.array(weights, dtype=np.float32) action = LongTensor(action) reward = Tensor(reward) done = Tensor(done) weights = Tensor(weights) if torch.cuda.is_available(): action = action.cuda() reward = reward.cuda() done = done.cuda() weights = weights.cuda() # Q-value for current state given current action q_values = self.qnetwork(state) q_value = q_values.gather(1, action.unsqueeze(1)).squeeze(1) # Compute the TD target next_q_values = self.target_qnetwork(next_state) next_q_value = next_q_values.max(1)[0] td_target = reward + self.gamma * next_q_value * (1 - done) # Optimize quadratic loss loss = (q_value - td_target.detach()).abs() # We use the individual losses as priorities priorities = loss + 1e-5 for idx, prio in zip(indices, priorities): self.priorities[idx] = prio.item() # Optimize Q-network as usual loss = (loss * weights).pow(2).mean() self.optimizer.zero_grad() loss.backward() self.optimizer.step() self.logger.log_dict( self.total_steps, { 'dqn/loss': loss.data.cpu().numpy(), 'dqn/reward': reward.mean().data.cpu().numpy(), })