def transform(self, data_pack: DataPack, verbose: int = 1) -> DataPack: """ Apply transformation on data, create truncated length representation. :param data_pack: Inputs to be preprocessed. :param verbose: Verbosity. :return: Transformed data as :class:`DataPack` object. """ data_pack = data_pack.copy() data_pack.apply_on_text(chain_transform(self._units), inplace=True, verbose=verbose) # data_pack.apply_on_text(self._context['filter_unit'].transform, # mode='right', inplace=True, verbose=verbose) data_pack.apply_on_text(self._context['vocab_unit'].transform, mode='both', inplace=True, verbose=verbose) if self._truncated_length_left: data_pack.apply_on_text(self._left_truncatedlength_unit.transform, mode='left', inplace=True, verbose=verbose) if self._truncated_length_right: data_pack.apply_on_text(self._right_truncatedlength_unit.transform, mode='right', inplace=True, verbose=verbose) data_pack.append_text_length(inplace=True, verbose=verbose) data_pack.drop_empty(inplace=True) return data_pack
def transform(self, data_pack: DataPack, verbose: int = 1) -> DataPack: """ Apply transformation on data. :param data_pack: Inputs to be preprocessed. :param verbose: Verbosity. :return: Transformed data as :class:`DataPack` object. """ data_pack = data_pack.copy() data_pack.apply_on_text(self.bert_encode, mode='both', inplace=True, multiprocessing=self.multiprocessing, verbose=verbose) if self._truncated_length_left: data_pack.apply_on_text(ChainTransform( self._left_truncated_length_unit), mode='left', inplace=True, verbose=verbose) if self._truncated_length_right: data_pack.apply_on_text(ChainTransform( self._right_truncated_length_unit), mode='right', inplace=True, verbose=verbose) data_pack.append_text_length(inplace=True, verbose=verbose, multiprocessing=self.multiprocessing) data_pack.drop_empty(inplace=True) return data_pack
def transform(self, data_pack: DataPack, verbose: int = 1) -> DataPack: """ Apply transformation on data. :param data_pack: Inputs to be preprocessed. :param verbose: Verbosity. :return: Transformed data as :class:`DataPack` object. """ data_pack.apply_on_text(self._tokenizer.encode, mode='both', inplace=True, verbose=verbose) data_pack.append_text_length(inplace=True, verbose=verbose) data_pack.drop_empty(inplace=True) return data_pack
def transform(self, data_pack: DataPack, verbose: int = 1) -> DataPack: """ Apply transformation on data, create truncated length representation. :param data_pack: Inputs to be preprocessed. :param verbose: Verbosity. :return: Transformed data as :class:`DataPack` object. """ units_ = self._default_units() units_.append(self._context['vocab_unit']) units_.append( units.TruncatedLength(text_length=30, truncate_mode='post')) func = chain_transform(units_) data_pack.apply_on_text(func, inplace=True, verbose=verbose) data_pack.append_text_length(inplace=True, verbose=verbose) data_pack.drop_empty(inplace=True) return data_pack
def transform(self, data_pack: DataPack, verbose: int = 1) -> DataPack: data_pack = data_pack.copy() data_pack.apply_on_text(chain_transform(self._units), verbose=verbose) data_pack.apply_on_text(self._context['filter_unit'].transform, mode='right', inplace=True, verbose=verbose) data_pack.apply_on_text(self._context['vocab_unit'].transform, mode='both', inplace=True, verbose=verbose) if self._truncated_length_left: data_pack.apply_on_text(self._left_truncatedlength_unit.transform, mode='left', inplace=True, verbose=verbose) if self._truncated_length_right: data_pack.apply_on_text(self._right_truncatedlength_unit.transform, mode='right', inplace=True, verbose=verbose) data_pack.append_text_length(inplace=True, verbose=verbose) data_pack.drop_empty(inplace=True) return data_pack