def _sort_by_padding(self, instances: List[Instance]) -> List[Instance]: """ Sorts the instances by their padding lengths, using the keys in `sorting_keys` (in the order in which they are provided). `sorting_keys` is a list of `(field_name, padding_key)` tuples. """ if not self._sorting_keys: logger.info("No sorting keys given; trying to guess a good one") self._guess_sorting_keys(instances) logger.info(f"Using {self._sorting_keys} as the sorting keys") instances_with_lengths = [] for instance in instances: # Make sure instance is indexed before calling .get_padding instance.index_fields(self.vocab) padding_lengths = cast(Dict[str, Dict[str, float]], instance.get_padding_lengths()) if self._padding_noise > 0.0: noisy_lengths = {} for field_name, field_lengths in padding_lengths.items(): noisy_lengths[field_name] = add_noise_to_dict_values( field_lengths, self._padding_noise ) padding_lengths = noisy_lengths instance_with_lengths = ( [ padding_lengths[field_name][padding_key] for (field_name, padding_key) in self._sorting_keys ], instance, ) instances_with_lengths.append(instance_with_lengths) instances_with_lengths.sort(key=lambda x: x[0]) return [instance_with_lengths[-1] for instance_with_lengths in instances_with_lengths]
def _sort_by_padding( self, instances: List[Instance], sorting_keys: List[Tuple[str, str]], # pylint: disable=invalid-sequence-index padding_noise: float = 0.0 ) -> List[Instance]: """ Sorts the ``Instances`` in this ``Batch`` by their padding lengths, using the keys in ``sorting_keys`` (in the order in which they are provided). ``sorting_keys`` is a list of ``(field_name, padding_key)`` tuples. """ instances_with_lengths = [] for instance in instances: # Make sure instance is indexed before calling .get_padding instance.index_fields(self.vocab) padding_lengths = cast(Dict[str, Dict[str, float]], instance.get_padding_lengths()) if padding_noise > 0.0: noisy_lengths = {} for field_name, field_lengths in padding_lengths.items(): noisy_lengths[field_name] = add_noise_to_dict_values( field_lengths, padding_noise) padding_lengths = noisy_lengths instance_with_lengths = ([ padding_lengths[field_name][padding_key] for (field_name, padding_key) in sorting_keys ], instance) instances_with_lengths.append(instance_with_lengths) instances_with_lengths.sort(key=lambda x: x[0]) return [ instance_with_lengths[-1] for instance_with_lengths in instances_with_lengths ]
def _sort_dataset_by_padding( dataset: Dataset, sorting_keys: List[Tuple[str, str]], # pylint: disable=invalid-sequence-index padding_noise: float = 0.0 ) -> Dataset: """ Sorts the ``Instances`` in this ``Dataset`` by their padding lengths, using the keys in ``sorting_keys`` (in the order in which they are provided). ``sorting_keys`` is a list of ``(field_name, padding_key)`` tuples. """ instances_with_lengths = [] for instance in dataset.instances: padding_lengths = cast(Dict[str, Dict[str, float]], instance.get_padding_lengths()) if padding_noise > 0.0: noisy_lengths = {} for field_name, field_lengths in padding_lengths.items(): noisy_lengths[field_name] = add_noise_to_dict_values( field_lengths, padding_noise) padding_lengths = noisy_lengths instance_with_lengths = ([ padding_lengths[field_name][padding_key] for (field_name, padding_key) in sorting_keys ], instance) instances_with_lengths.append(instance_with_lengths) instances_with_lengths.sort(key=lambda x: x[0]) return Dataset([ instance_with_lengths[-1] for instance_with_lengths in instances_with_lengths ])
def sort_by_padding(instances: List[Instance], sorting_keys: List[Tuple[str, str]], # pylint: disable=invalid-sequence-index vocab: Vocabulary, padding_noise: float = 0.0) -> List[Instance]: """ Sorts the instances by their padding lengths, using the keys in ``sorting_keys`` (in the order in which they are provided). ``sorting_keys`` is a list of ``(field_name, padding_key)`` tuples. """ instances_with_lengths = [] for instance in instances: # Make sure instance is indexed before calling .get_padding instance.index_fields(vocab) padding_lengths = cast(Dict[str, Dict[str, float]], instance.get_padding_lengths()) if padding_noise > 0.0: noisy_lengths = {} for field_name, field_lengths in padding_lengths.items(): noisy_lengths[field_name] = add_noise_to_dict_values(field_lengths, padding_noise) padding_lengths = noisy_lengths instance_with_lengths = ([padding_lengths[field_name][padding_key] for (field_name, padding_key) in sorting_keys], instance) instances_with_lengths.append(instance_with_lengths) instances_with_lengths.sort(key=lambda x: x[0]) return [instance_with_lengths[-1] for instance_with_lengths in instances_with_lengths]
def sort_by_padding_modified( instances: List[Instance], sorting_keys: List[Tuple[str, str]], # pylint: disable=invalid-sequence-index vocab: Vocabulary, padding_noise: float = 0.0) -> List[Instance]: """ Sorts the instances by their padding lengths, using the keys in ``sorting_keys`` (in the order in which they are provided). ``sorting_keys`` is a list of ``(field_name, padding_key)`` tuples. """ instances_with_lengths = [] for instance in instances: # Make sure instance is indexed before calling .get_padding instance.index_fields(vocab) padding_lengths = instance.get_padding_lengths() padding_lengths["sentences"] = { "num_sentences": len(instance.fields['tokens'].field_list) } padding_lengths = cast(Dict[str, Dict[str, float]], padding_lengths) if padding_noise > 0.0: noisy_lengths = {} for field_name, field_lengths in padding_lengths.items(): noisy_lengths[field_name] = add_noise_to_dict_values( field_lengths, padding_noise) padding_lengths = noisy_lengths instance_with_lengths = ([ padding_lengths[field_name][padding_key] for (field_name, padding_key) in sorting_keys ], instance) instances_with_lengths.append(instance_with_lengths) instances_with_lengths.sort(key=lambda x: x[0]) return [ instance_with_lengths[-1] for instance_with_lengths in instances_with_lengths ]
def sort_by_padding(instances , sorting_keys , # pylint: disable=invalid-sequence-index vocab , padding_noise = 0.0) : u""" Sorts the instances by their padding lengths, using the keys in ``sorting_keys`` (in the order in which they are provided). ``sorting_keys`` is a list of ``(field_name, padding_key)`` tuples. """ instances_with_lengths = [] for instance in instances: # Make sure instance is indexed before calling .get_padding instance.index_fields(vocab) padding_lengths = cast(Dict[unicode, Dict[unicode, float]], instance.get_padding_lengths()) if padding_noise > 0.0: noisy_lengths = {} for field_name, field_lengths in list(padding_lengths.items()): noisy_lengths[field_name] = add_noise_to_dict_values(field_lengths, padding_noise) padding_lengths = noisy_lengths instance_with_lengths = ([padding_lengths[field_name][padding_key] for (field_name, padding_key) in sorting_keys], instance) instances_with_lengths.append(instance_with_lengths) instances_with_lengths.sort(key=lambda x: x[0]) return [instance_with_lengths[-1] for instance_with_lengths in instances_with_lengths]
def sort_by_padding( instances: List[Instance], sorting_keys: List[Tuple[str, str]], # pylint: disable=invalid-sequence-index vocab: Vocabulary, padding_noise: float = 0.0) -> List[Instance]: """ Sorts the instances by their padding lengths, using the keys in ``sorting_keys`` (in the order in which they are provided). ``sorting_keys`` is a list of ``(field_name, padding_key)`` tuples. """ indices_to_ignore = [] for i, instance in enumerate(instances): try: instance.index_fields(vocab) except: indices_to_ignore.append(i) if len(indices_to_ignore): logger.info("Ignored Instances:", len(indices_to_ignore)) for ind in sorted(indices_to_ignore, reverse=True): del instances[ind] instances_with_lengths = [] for instance in instances: # Make sure instance is indexed before calling .get_padding # instance.index_fields(vocab) padding_lengths = cast(Dict[str, Dict[str, float]], instance.get_padding_lengths()) if padding_noise > 0.0: noisy_lengths = {} for field_name, field_lengths in padding_lengths.items(): noisy_lengths[field_name] = add_noise_to_dict_values( field_lengths, padding_noise) padding_lengths = noisy_lengths instance_with_lengths = ([ padding_lengths[field_name][padding_key] for (field_name, padding_key) in sorting_keys ], instance) instances_with_lengths.append(instance_with_lengths) instances_with_lengths.sort(key=lambda x: x[0]) return [ instance_with_lengths[-1] for instance_with_lengths in instances_with_lengths ]