def __call__(self, tokens: torch.LongTensor, prefix_mask: torch.LongTensor): padding_mask = tokens.new_ones(tokens.size(), dtype=torch.bool) for pad in self.excludes: padding_mask &= (tokens != pad) padding_mask &= prefix_mask # Only mask prefixes since the others won't be attended # Create a uniformly random mask selecting either the original words or OOV tokens dropout_mask = (tokens.new_empty(tokens.size(), dtype=torch.float).uniform_() < self.mask_prob) oov_mask = dropout_mask & padding_mask oov_fill = tokens.new_empty(tokens.size(), dtype=torch.long).fill_(self.oov) result = torch.where(oov_mask, oov_fill, tokens) return result, oov_mask
def scatter_sort( src: Tensor, index: LongTensor, descending=False, dim_size=None, out: Optional[Tuple[Tensor, LongTensor]] = None, ) -> Tuple[Tensor, LongTensor]: if src.ndimension() > 1: raise ValueError("Only implemented for 1D tensors") if dim_size is None: dim_size = index.max() + 1 if out is None: result_values = torch.empty_like(src) result_indexes = index.new_empty(src.shape) else: result_values, result_indexes = out sizes = ( index.new_zeros(dim_size) .scatter_add_(dim=0, index=index, src=torch.ones_like(index)) .tolist() ) start = 0 for size in sizes: end = start + size values, indexes = torch.sort(src[start:end], dim=0, descending=descending) result_values[start:end] = values result_indexes[start:end] = indexes + start start = end return result_values, result_indexes
def token_dropout(tokens: torch.LongTensor, oov_token: int, exclude_tokens: List[int], p: float = 0.2, training: float = True) -> torch.LongTensor: """During training, randomly replaces some of the non-padding tokens to a mask token with probability ``p`` Adopted from https://github.com/Hyperparticle/udify Args: tokens: The current batch of padded sentences with word ids oov_token: The mask token exclude_tokens: The tokens for padding the input batch p: The probability a word gets mapped to the unknown token training: Applies the dropout if set to ``True`` tokens: torch.LongTensor: oov_token: int: exclude_tokens: List[int]: p: float: (Default value = 0.2) training: float: (Default value = True) Returns: A copy of the input batch with token dropout applied """ if training and p > 0: # This creates a mask that only considers unpadded tokens for mapping to oov padding_mask = tokens.new_ones(tokens.size(), dtype=torch.bool) for pad in exclude_tokens: padding_mask &= (tokens != pad) # Create a uniformly random mask selecting either the original words or OOV tokens dropout_mask = (tokens.new_empty(tokens.size(), dtype=torch.float).uniform_() < p) oov_mask = dropout_mask & padding_mask oov_fill = tokens.new_empty(tokens.size(), dtype=torch.long).fill_(oov_token) result = torch.where(oov_mask, oov_fill, tokens) return result else: return tokens
def forward(self, inp: torch.FloatTensor, tgt: torch.LongTensor): if inp.size(0) != tgt.size(0): raise RuntimeError('Input and target should have the same size ' 'in the batch dimension.') num_elms = 0 entry_size = tgt.size(0) output = inp.new_zeros(entry_size) # log probabilities gather_inds = tgt.new_empty(entry_size) # tgt indices in head for i in range(self.n_clusters + 1): target_mask, rel_inds = \ get_cluster_members(i, tgt, self.cutoffs, self.ent_slices) # members of the current cluster members = target_mask.nonzero().squeeze() if members.numel() == 0: continue if i == 0: # Head cluster # Head cluster also needs to compute relative indices gather_inds.index_copy_(0, members, rel_inds[target_mask]) else: # Tail clusters including entity clusters cluster_index = self.cutoffs[0] + i - 1 gather_inds.index_fill_(0, members, cluster_index) # Subset of input which elements should be in this cluster input_subset = inp.index_select(0, members) # Forward cluster_output = self.tail[i - 1](input_subset) cluster_logprob = F.log_softmax(cluster_output, dim=1) relative_target = rel_inds[target_mask] local_logprob = \ cluster_logprob.gather(1, relative_target.unsqueeze(1)) output.index_copy_(0, members, local_logprob.squeeze(1)) num_elms += members.numel() if num_elms != entry_size: logger.error('used_rows ({}) and batch_size ({}) does not match' ''.format(num_elms, entry_size)) raise RuntimeError("Target values should be in [0, {}], " "but values in range [{}, {}] " "were found. ".format(self.n_classes - 1, tgt.min().item(), tgt.max().item())) head_output = self.head(inp) head_logprob = F.log_softmax(head_output, dim=1) output += head_logprob.gather(1, gather_inds.unsqueeze(1)).squeeze() # return neglog return -output