Exemplo n.º 1
0
 def test_value_not_in_list(self):
     selector = LabelSelector(elements=[
         'ApfelBaum', 'BirnenBaum', 'AprikosenBaum', 'Katze', 'Hund'
     ])
     self.assertRaises(
         ValueError, lambda: selector.select(selection=['REGEX:.*Baum'],
                                             enable_regex=False))
Exemplo n.º 2
0
 def test_regex(self):
     selector = LabelSelector(elements=[
         'ApfelBaum', 'BirnenBaum', 'AprikosenBaum', 'Katze', 'Hund'
     ])
     result = selector.select(selection=['REGEX:.*Baum'], enable_regex=True)
     self.assertListEqual([0, 1, 2], result.indexes)
     self.assertListEqual([3, 4], result.indexes_unselected)
Exemplo n.º 3
0
    def _process2d(self, processor_input: StandardDataFormat) -> StandardDataFormat:
        selector = LabelSelector(processor_input.labels).select(self.fields)
        data = processor_input.data.copy()
        for col_id in selector.indexes:
            selection = data[:, col_id]
            ix_nans = np.isnan(selection)
            data[ix_nans, col_id] = self.replacement

        return processor_input.modify_copy(data=data)
    def get(self) -> StandardDataFormat:
        required_fields = self._get_fields()
        raw = self._fetch(required_fields)

        AbstractDatasourceAdapter._check_fields_availability(
            raw, required_fields, using_alias=self.source_returns_alias)

        labels_new = [f.alias for f in required_fields]

        if self.source_returns_alias:
            ix_selection = LabelSelector(elements=raw.labels).select(
                selection=[f.alias for f in required_fields]).indexes
        else:
            ix_selection = LabelSelector(elements=raw.labels).select(
                selection=[f.name for f in required_fields]).indexes

        # ensure ordering of columns are correct
        data = raw.data[:, ix_selection]

        return StandardDataFormat(timestamps=raw.timestamps,
                                  labels=labels_new,
                                  data=data)
Exemplo n.º 5
0
    def _process2d(self,
                   processor_input: StandardDataFormat) -> StandardDataFormat:
        partial_data = ColumnSelector(
            self.fields).process(processor_input).data
        transformer_restored = self.state
        if transformer_restored:
            partial_data = transformer_restored.transform(partial_data)
        else:
            transformer = create_instance(qualified_name=self.scaler,
                                          kwargs=self.kwargs)
            partial_data = transformer.fit_transform(partial_data)
            # note: state will be saved only during training process
            # each training has it unique identificator which is associated with state data
            self.state = transformer

        label_selection = LabelSelector(
            elements=processor_input.labels).select(self.fields)
        data = processor_input.data.copy()
        data[:, label_selection.indexes] = partial_data

        return processor_input.modify_copy(data=data)
Exemplo n.º 6
0
    def _process2d(self,
                   processor_input: StandardDataFormat) -> StandardDataFormat:
        fields_in = [
            ConfigReader.from_dict(g).get_or_error(
                key="inputField", context="OutlierRemover Config")
            for g in self.generate
        ]
        cols_selected = LabelSelector(elements=processor_input.labels).select(
            selection=fields_in).indexes

        # we expend feature-array to have a 3D-array with only one entry (of 2D-Array)
        grouped_data = np.ma.array(
            np.expand_dims(processor_input.data[:, cols_selected], axis=0))

        affected_index = Outlier(sequence=np.nan, generate=self.generate)\
            .affected_index(grouped_data=grouped_data)
        affected_index = np.squeeze(affected_index, axis=0)

        data = processor_input.data.copy()
        t = data[:, cols_selected]
        t[affected_index] = np.nan
        data[:, cols_selected] = t

        return processor_input.modify_copy(data=data)
Exemplo n.º 7
0
 def _process3d(self,
                processor_input: StandardDataFormat) -> StandardDataFormat:
     indexes = LabelSelector(elements=processor_input.labels).select(
         self._columns, enable_regex=self.enable_regex).indexes
     return self._select_columns(processor_input, indexes, is_3d=True)
Exemplo n.º 8
0
    def _process2d(self, processor_input: StandardDataFormat) -> StandardDataFormat:
        ix = LabelSelector(elements=processor_input.labels) \
            .without(self._columns).indexes

        # noinspection PyProtectedMember
        return ColumnSelector._select_columns(processor_input, ix)