Exemplo n.º 1
0
    def test_error_transform_num(self):
        X, y = load_xy(0)
        with pytest.raises(TypeError):
            _internal.transform_num(X, num_bins='')

        with pytest.raises(ValueError):
            _internal.transform_num(X, num_bins=-1)
Exemplo n.º 2
0
    def _set_data_categoric(self, transform_num: bool,
                            num_bins: bool = None) -> np.ndarray:
        """Returns categorical data from the fitted dataset.

        Parameters
        ----------
        transform_num : :obj:`bool`
            If True, then all numeric-type data are discretized using an
            equal-frequency histogram. Otherwise, this method ignores these
            attributes.

        num_bins : :obj:`bool`, optional
            Number of bins of the discretization histogram. This argument is
            used only if ``transform_num`` is True. If this argument value is
            :obj:`NoneType`, then it is set to min(2, c), where ``c`` is the
            cubic root of the number of instances of the fitted dataset.

        Returns
        -------
        :obj:`np.ndarray`
            Processed categorical data. If no need for changes from the
            original dataset, then this method does not create a copy of it to
            prevent unnecessary memory usage. Otherwise, this method returns a
            modified version of the original categorical data, thus consuming
            more memory.

        Raises
        ------
        TypeError:
            If either ``X`` or ``_attr_indexes_cat`` instance attributes are
            :obj:`NoneType`. This can be avoided passing valid data to fit and
            first calling ``_fill_col_ind_by_type`` instance method before this
            method.
        """
        if self.X is None:
            raise TypeError("It is necessary to fit valid data into the "
                            'model before setting up categoric data. ("X" '
                            'attribute is "NoneType").')

        if self._attr_indexes_cat is None:
            raise TypeError("No information about indexes of categoric "
                            "attributes. Please be sure to call method "
                            '"_fill_col_ind_by_type" before this method.')

        data_cat = self.X[:, self._attr_indexes_cat]

        if transform_num:
            data_num_discretized = _internal.transform_num(
                self.X[:, self._attr_indexes_num], num_bins=num_bins)

            if data_num_discretized is not None:
                data_cat = np.concatenate((data_cat, data_num_discretized),
                                          axis=1)

        return data_cat
    def _set_data_categoric(cls,
                            N,
                            C,
                            transform_num: bool,
                            num_bins: bool = None) -> np.ndarray:
        data_cat = C.to_numpy()

        if transform_num and not N.empty:
            data_num_discretized = _internal.transform_num(N,
                                                           num_bins=num_bins)

            if data_num_discretized is not None:
                data_cat = np.concatenate((data_cat, data_num_discretized),
                                          axis=1)
        return data_cat