def test_error_transform_num(self): X, y = load_xy(0) with pytest.raises(TypeError): _internal.transform_num(X, num_bins='') with pytest.raises(ValueError): _internal.transform_num(X, num_bins=-1)
def _set_data_categoric(self, transform_num: bool, num_bins: bool = None) -> np.ndarray: """Returns categorical data from the fitted dataset. Parameters ---------- transform_num : :obj:`bool` If True, then all numeric-type data are discretized using an equal-frequency histogram. Otherwise, this method ignores these attributes. num_bins : :obj:`bool`, optional Number of bins of the discretization histogram. This argument is used only if ``transform_num`` is True. If this argument value is :obj:`NoneType`, then it is set to min(2, c), where ``c`` is the cubic root of the number of instances of the fitted dataset. Returns ------- :obj:`np.ndarray` Processed categorical data. If no need for changes from the original dataset, then this method does not create a copy of it to prevent unnecessary memory usage. Otherwise, this method returns a modified version of the original categorical data, thus consuming more memory. Raises ------ TypeError: If either ``X`` or ``_attr_indexes_cat`` instance attributes are :obj:`NoneType`. This can be avoided passing valid data to fit and first calling ``_fill_col_ind_by_type`` instance method before this method. """ if self.X is None: raise TypeError("It is necessary to fit valid data into the " 'model before setting up categoric data. ("X" ' 'attribute is "NoneType").') if self._attr_indexes_cat is None: raise TypeError("No information about indexes of categoric " "attributes. Please be sure to call method " '"_fill_col_ind_by_type" before this method.') data_cat = self.X[:, self._attr_indexes_cat] if transform_num: data_num_discretized = _internal.transform_num( self.X[:, self._attr_indexes_num], num_bins=num_bins) if data_num_discretized is not None: data_cat = np.concatenate((data_cat, data_num_discretized), axis=1) return data_cat
def _set_data_categoric(cls, N, C, transform_num: bool, num_bins: bool = None) -> np.ndarray: data_cat = C.to_numpy() if transform_num and not N.empty: data_num_discretized = _internal.transform_num(N, num_bins=num_bins) if data_num_discretized is not None: data_cat = np.concatenate((data_cat, data_num_discretized), axis=1) return data_cat