def fit(self, data_pack: DataPack, verbose=1): """ Fit pre-processing context for transformation. :param data_pack: data_pack to be preprocessed. :param verbose: Verbosity. :return: class:`BasicPreprocessor` instance. """ units = self._default_processor_units() data_pack = data_pack.apply_on_text(chain_transform(units), verbose=verbose) fitted_filter_unit = build_unit_from_data_pack(self._filter_unit, data_pack, flatten=False, mode='right', verbose=verbose) data_pack = data_pack.apply_on_text(fitted_filter_unit.transform, mode='right', verbose=verbose) self._context['filter_unit'] = fitted_filter_unit vocab_unit = build_vocab_unit(data_pack, verbose=verbose) self._context['vocab_unit'] = vocab_unit self._context['vocab_size'] = len(vocab_unit.state['term_index']) + 1 self._context['input_shapes'] = [(self._fixed_length_left, ), (self._fixed_length_right, )] return self
def fit(self, data_pack: DataPack, verbose: int = 1): """ Fit pre-processing context for transformation. :param data_pack: data_pack to be preprocessed. :param verbose: Verbosity. :return: class:`NaivePreprocessor` instance. """ units = self._default_processor_units() data_pack = data_pack.apply_on_text(chain_transform(units), verbose=verbose) vocab_unit = build_vocab_unit(data_pack, verbose=verbose) self._context['vocab_unit'] = vocab_unit return self
def fit(self, data_pack: DataPack, verbose=1): """ Fit pre-processing context for transformation. :param verbose: Verbosity. :param data_pack: data_pack to be preprocessed. :return: class:`DSSMPreprocessor` instance. """ units = self._default_processor_units() data_pack = data_pack.apply_on_text(chain_transform(units), verbose=verbose) vocab_unit = build_vocab_unit(data_pack, verbose=verbose) self._context['vocab_unit'] = vocab_unit triletter_dim = len(vocab_unit.state['term_index']) + 1 self._context['input_shapes'] = [(triletter_dim, ), (triletter_dim, )] return self
def fit(self, data_pack: DataPack, verbose=1): """ Fit pre-processing context for transformation. :param verbose: Verbosity. :param data_pack: Data_pack to be preprocessed. :return: class:`CDSSMPreprocessor` instance. """ units = self._default_processor_units() units.append(processor_units.NgramLetterUnit()) data_pack = data_pack.apply_on_text(chain_transform(units), verbose=verbose) vocab_unit = build_vocab_unit(data_pack, verbose=verbose) self._context['vocab_unit'] = vocab_unit vocab_size = len(vocab_unit.state['term_index']) + 1 self._context['input_shapes'] = [(self._fixed_length_left, vocab_size), (self._fixed_length_right, vocab_size) ] return self