Python pad_sequence_to_length示例

编程语言: Python

命名空间/包名称: stog.utils.string

方法/功能: pad_sequence_to_length

hotexamples.com的示例: 4

Python pad_sequence_to_length - 已找到4个示例。这些是从开源项目中提取的最受好评的stog.utils.string.pad_sequence_to_length现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

    def pad_token_sequence(
            self, tokens: Dict[str,
                               List[List[int]]], desired_num_tokens: Dict[str,
                                                                          int],
            padding_lengths: Dict[str, int]) -> Dict[str, List[List[int]]]:
        # Pad the tokens.
        # tokens has only one key...
        key = list(tokens.keys())[0]

        padded_tokens = pad_sequence_to_length(
            tokens[key],
            desired_num_tokens[key],
            default_value=self.get_padding_token)

        # Pad the characters within the tokens.
        desired_token_length = padding_lengths['num_token_characters']
        longest_token: List[int] = max(tokens[key], key=len, default=[])
        padding_value = 0
        if desired_token_length > len(longest_token):
            # Since we want to pad to greater than the longest token, we add a
            # "dummy token" so we can take advantage of the fast implementation of itertools.zip_longest.
            padded_tokens.append([padding_value] * desired_token_length)
        # pad the list of lists to the longest sublist, appending 0's
        padded_tokens = list(
            zip(*itertools.zip_longest(*padded_tokens,
                                       fillvalue=padding_value)))
        if desired_token_length > len(longest_token):
            # Removes the "dummy token".
            padded_tokens.pop()
        # Truncates all the tokens to the desired length, and return the result.
        return {
            key:
            [list(token[:desired_token_length]) for token in padded_tokens]
        }

示例#2

显示文件

文件： dep_label_indexer.py 项目： tonydeep/gtos

 def pad_token_sequence(
     self, tokens: Dict[str, List[int]], desired_num_tokens: Dict[str, int],
     padding_lengths: Dict[str, int]
 ) -> Dict[str, List[int]]:  # pylint: disable=unused-argument
     return {
         key: pad_sequence_to_length(val, desired_num_tokens[key])
         for key, val in tokens.items()
     }

示例#3

显示文件

 def as_tensor(self, padding_lengths: Dict[str, int]) -> DataArray:
     padded_field_list = pad_sequence_to_length(self.field_list,
                                                padding_lengths['num_fields'],
                                                self.field_list[0].empty_field)
     # Here we're removing the scoping on the padding length keys that we added in
     # `get_padding_lengths`; see the note there for more detail.
     child_padding_lengths = {key.replace('list_', '', 1): value
                              for key, value in padding_lengths.items()
                              if key.startswith('list_')}
     padded_fields = [field.as_tensor(child_padding_lengths)
                      for field in padded_field_list]
     return self.field_list[0].batch_tensors(padded_fields)

示例#4

显示文件

 def as_tensor(self, padding_lengths: Dict[str, int]) -> torch.Tensor:
     desired_num_tokens = padding_lengths['num_tokens']
     padded_tags = pad_sequence_to_length(self._indexed_labels, desired_num_tokens)
     tensor = torch.LongTensor(padded_tags)
     return tensor