示例#1
0
 def _default_processor_units(cls) -> list:
     """Prepare needed process units."""
     return [
         processor_units.TokenizeUnit(),
         processor_units.LowercaseUnit(),
         processor_units.PuncRemovalUnit(),
         processor_units.StopRemovalUnit(),
     ]
示例#2
0
 def __init__(self,
              fixed_length_left: int = 30,
              fixed_length_right: int = 30,
              filter_mode: str = 'df',
              filter_low_freq: float = 2,
              filter_high_freq: float = float('inf'),
              remove_stop_words: bool = False):
     """Initialization."""
     super().__init__()
     self._fixed_length_left = fixed_length_left
     self._fixed_length_right = fixed_length_right
     self._left_fixedlength_unit = processor_units.FixedLengthUnit(
         self._fixed_length_left, pad_mode='post')
     self._right_fixedlength_unit = processor_units.FixedLengthUnit(
         self._fixed_length_right, pad_mode='post')
     self._filter_unit = processor_units.FrequencyFilterUnit(
         low=filter_low_freq, high=filter_high_freq, mode=filter_mode)
     self._default_units = self._default_processor_units()
     if remove_stop_words:
         self._default_units.append(processor_units.StopRemovalUnit())