def test_function_transformer_raise_error_with_mixed_dtype(X_type): """Check that `FunctionTransformer.check_inverse` raises error on mixed dtype.""" mapping = {"one": 1, "two": 2, "three": 3, 5: "five", 6: "six"} inverse_mapping = {value: key for key, value in mapping.items()} dtype = "object" data = ["one", "two", "three", "one", "one", 5, 6] data = _convert_container(data, X_type, columns_name=["value"], dtype=dtype) def func(X): return np.array( [mapping[_safe_indexing(X, i)] for i in range(X.size)], dtype=object ) def inverse_func(X): return _convert_container( [inverse_mapping[x] for x in X], X_type, columns_name=["value"], dtype=dtype, ) transformer = FunctionTransformer( func=func, inverse_func=inverse_func, validate=False, check_inverse=True ) msg = "'check_inverse' is only supported when all the elements in `X` is numerical." with pytest.raises(ValueError, match=msg): transformer.fit(data)
class LogLGBM(LGBMRegressor): def __init__(self, target=None, **kwargs): super().__init__(**kwargs) if target == "Oil_norm": self.target_scaler = PowerTransformer(method='box-cox', standardize=False) elif target == 'Gas_norm': self.target_scaler = FunctionTransformer(func=np.log1p, inverse_func=np.expm1) elif target == 'Water_norm': self.target_scaler = FunctionTransformer(func=np.log1p, inverse_func=np.expm1) def fit(self, X, Y, **kwargs): # y_train = np.log1p(Y) self.target_scaler.fit(Y.values.reshape(-1, 1) + 1) y_train = pd.Series( self.target_scaler.transform(Y.values.reshape(-1, 1) + 1).reshape( -1, )) super(LogLGBM, self).fit(X, y_train, **kwargs) return self def predict(self, X): preds = super(LogLGBM, self).predict(X).reshape(-1, 1) preds = self.target_scaler.inverse_transform(preds) - 1 return preds[:, 0]
class TensorScaler(TransformerMixin): """Scaling for 3D tensors. Assumes the size is (..., length, input_channels), reshapes to (..., input_channels), performs the method operation and then reshapes back. Arguments: method (str): Scaling method, one of ('stdsc', 'ma', 'mms'). scaling_function (transformer): Specification of an sklearn transformer that performs a scaling operation. Only one of this or scaling can be specified. """ def __init__(self, method="stdsc", scaling_function=None): self.scaling = method if all([method is None, scaling_function is None]): self.scaler = FunctionTransformer(func=None) elif isinstance(method, str): self.scaler = SCALERS.get(method)() assert ( self.scaler is not None), "Scalings allowed are {}, recieved {}.".format( SCALERS.keys(), method) else: self.scaler = scaling_function @apply_fit_to_channels def fit(self, data, labels=None): self.scaler.fit(data) return self @apply_transform_to_channels def transform(self, data): output_data = torch.Tensor(self.scaler.transform(data)) return output_data
class FunctionTransformerPrim(primitive): def __init__(self, random_state=0): super(FunctionTransformerPrim, self).__init__(name='FunctionTransformer') self.id = 11 self.hyperparams = [] self.type = 'feature preprocess' self.description = "Constructs a transformer from an arbitrary callable. A FunctionTransformer forwards its X (and optionally y) arguments to a user-defined function or function object and returns the result of this function. This is useful for stateless transformations such as taking the log of frequencies, doing custom scaling, etc." self.hyperparams_run = {'default': True} self.scaler = FunctionTransformer() self.accept_type = 'c_t' def can_accept(self, data): return self.can_accept_c(data) def is_needed(self, data): # data = handle_data(data) # Update return True def fit(self, data): data = handle_data(data) self.scaler.fit(data['X']) def produce(self, data): output = handle_data(data) cols = list(output['X'].columns) cols = ["{}_qntl".format(x) for x in cols] output['X'] = pd.DataFrame(self.scaler.transform(output['X']), columns=cols) final_output = {0: output} return final_output
def test_function_transformer(self): x = numpy.array([[6.1, -5], [3.5, -7.8]], dtype=numpy.float32) tr = FunctionTransformer(custom_fct) tr.fit(x) y_exp = tr.transform(x) self.assertEqualArray( numpy.array([[6.1, 0.], [3.5, 0.]], dtype=numpy.float32), y_exp) onnx_model = to_onnx(tr, x) oinf = OnnxInference(onnx_model) y_onx = oinf.run({'X': x}) self.assertEqualArray(y_exp, y_onx['variable'])
def test_function_transformer_fft_abs(self): for rt, fct in [('py', custom_fft_abs), ('ort', custom_fft_abs_ort)]: with self.subTest(runtime=rt): x = numpy.array([[6.1, -5], [3.5, -7.8]], dtype=numpy.float32) tr = FunctionTransformer(fct) tr.fit(x) y_exp = tr.transform(x) onnx_model = to_onnx(tr, x) oinf = OnnxInference(onnx_model) y_onx = oinf.run({'X': x}) self.assertEqualArray(y_exp, y_onx['variable'], decimal=5)
def test_function_transformer_pickle(self): x = numpy.array([[6.1, -5], [3.5, -7.8]], dtype=numpy.float32) tr = FunctionTransformer(custom_fct) tr.fit(x) y_exp = tr.transform(x) st = BytesIO() # import cloudpickle as pkl pkl = pickle pkl.dump(tr, st) cp = BytesIO(st.getvalue()) tr2 = pkl.load(cp) y_exp2 = tr2.transform(x) self.assertEqualArray(y_exp, y_exp2)
class _FunctionTransformerImpl: def __init__(self, **hyperparams): self._hyperparams = hyperparams self._wrapped_model = Op(**self._hyperparams) def fit(self, X, y=None): if y is not None: self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def transform(self, X): return self._wrapped_model.transform(X)
def test_functiontransformer_vs_sklearn(): # Compare msmbuilder.preprocessing.FunctionTransformer # with sklearn.preprocessing.FunctionTransformer functiontransformerr = FunctionTransformerR() functiontransformerr.fit(np.concatenate(trajs)) functiontransformer = FunctionTransformer() functiontransformer.fit(trajs) y_ref1 = functiontransformerr.transform(trajs[0]) y1 = functiontransformer.transform(trajs)[0] np.testing.assert_array_almost_equal(y_ref1, y1)
class Noise: """ This transformer adds gaussian noise to an embeddingset. Arguments: sigma: the amount of gaussian noise to add seed: seed value for random number generator Usage: ```python from whatlies.language import SpacyLanguage from whatlies.transformers import Noise words = ["prince", "princess", "nurse", "doctor", "banker", "man", "woman", "cousin", "neice", "king", "queen", "dude", "guy", "gal", "fire", "dog", "cat", "mouse", "red", "bluee", "green", "yellow", "water", "person", "family", "brother", "sister"] lang = SpacyLanguage("en_core_web_md") emb = lang[words] emb.transform(Noise(3)) ``` """ def __init__(self, sigma=0.1, seed=42): self.is_fitted = False self.seed = seed self.tfm = FunctionTransformer( lambda X: X + np.random.normal(0, sigma, X.shape)) def __call__(self, embset): if not self.is_fitted: self.fit(embset) return self.transform(embset) def fit(self, embset): names, X = embset_to_X(embset=embset) self.tfm.fit(X) self.is_fitted = True def transform(self, embset): names, X = embset_to_X(embset=embset) np.random.seed(self.seed) new_vecs = self.tfm.transform(X) new_dict = new_embedding_dict(names, new_vecs, embset) return EmbeddingSet( new_dict, name=f"{embset.name}", )
class PasstroughEncoder(BaseEstimator, TransformerMixin): def __init__(self, passthrough=True): self.passthrough = passthrough def fit(self, X, y=None): self.encoder = FunctionTransformer(None, validate=True) self.encoder.fit(X) # self.columns = np.array(X.columns) return self # def get_feature_names(self): # return self.columns def transform(self, X): return self.encoder.transform(X)
def get_target_variable_scaler(self, y, target_column, group_type=None): assert target_column in self.targetScaling scaler_type = self.targetScaling[target_column] if scaler_type == "identity": scaler = FunctionTransformer() elif scaler_type == "log": scaler = FunctionTransformer(func=np.log, inverse_func=np.exp) # Power Transform else: scaler = PowerTransformer() # Fit the scaler scaler.fit(y[:, np.newaxis]) # Be sure that the inverse transform works as expected # _y_transformed = pd.Series(scaler.transform(self.dataset.mainDataFrame[target_column][:, np.newaxis])[:, 0]) # _y_back = scaler.inverse_transform(_y_transformed[:, np.newaxis])[:, 0] # assert np.allclose(self.dataset.mainDataFrame[target_column], _y_back) self.targetScalers[target_column] = scaler return scaler
def FncTran(df, target): # split into X and y datasets X_init = df.drop(target, axis=1) y_init = df[target] dum = FunctionTransformer() scaled = RobScale(X_init) print('Function transformer fitting...') fit = dum.fit(scaled) print('Function transforming...') dfit = pd.DataFrame(fit.transform(scaled)) # drop any NaNs that may have been made (there were few in the landslides vectorization) dfity = pd.concat([dfit, y_init], axis=1, join_axes=[y_init.index]).dropna() print('The encoded data has shape:',dfity.shape,'\n\n') return dfity
def test_vectorize_sklearn(constraint, axis): # get dataset from sklearn.datasets import load_iris iris = load_iris() # build transform ineq = vectorize(constraint, axis) from sklearn.preprocessing import FunctionTransformer t = FunctionTransformer(func=ineq, validate=False) #XXX: inverse? # test transform import numpy as np iris_ = t.fit(iris.data).transform(iris.data) assert np.all(t._transform(iris_) == iris_)
def fit(self, X: pd.DataFrame, y=None): # Categorial encoders for feat in self._cat_feat: if feat in X: ohe = OneHotEncoder(handle_unknown='ignore', sparse=False) self._transf[feat] = ohe.fit(X.loc[~X[feat].isna(), [feat]]) # Labels and confidence for feat, encoder in zip([self._label_feat, self._conf_feat], [self.encode_labels, self.encode_confidence]): if feat in X: le = FunctionTransformer(func=encoder, validate=True) valid_mask = ~X[feat].isna() self._transf[feat] = le.fit(X.loc[valid_mask, [feat]]) return self
def test_check_inverse(): X_dense = np.array([1, 4, 9, 16], dtype=np.float64).reshape((2, 2)) X_list = [X_dense, sparse.csr_matrix(X_dense), sparse.csc_matrix(X_dense)] for X in X_list: if sparse.issparse(X): accept_sparse = True else: accept_sparse = False trans = FunctionTransformer( func=np.sqrt, inverse_func=np.around, accept_sparse=accept_sparse, check_inverse=True, validate=True, ) warning_message = ( "The provided functions are not strictly" " inverse of each other. If you are sure you" " want to proceed regardless, set" " 'check_inverse=False'." ) with pytest.warns(UserWarning, match=warning_message): trans.fit(X) trans = FunctionTransformer( func=np.expm1, inverse_func=np.log1p, accept_sparse=accept_sparse, check_inverse=True, validate=True, ) with warnings.catch_warnings(): warnings.simplefilter("error", UserWarning) Xt = trans.fit_transform(X) assert_allclose_dense_sparse(X, trans.inverse_transform(Xt)) # check that we don't check inverse when one of the func or inverse is not # provided. trans = FunctionTransformer( func=np.expm1, inverse_func=None, check_inverse=True, validate=True ) with warnings.catch_warnings(): warnings.simplefilter("error", UserWarning) trans.fit(X_dense) trans = FunctionTransformer( func=None, inverse_func=np.expm1, check_inverse=True, validate=True ) with warnings.catch_warnings(): warnings.simplefilter("error", UserWarning) trans.fit(X_dense)
class TransformedTargetRegressor(RegressorMixin, BaseEstimator): """Meta-estimator to regress on a transformed target. Useful for applying a non-linear transformation to the target ``y`` in regression problems. This transformation can be given as a Transformer such as the QuantileTransformer or as a function and its inverse such as ``log`` and ``exp``. The computation during ``fit`` is:: regressor.fit(X, func(y)) or:: regressor.fit(X, transformer.transform(y)) The computation during ``predict`` is:: inverse_func(regressor.predict(X)) or:: transformer.inverse_transform(regressor.predict(X)) Read more in the :ref:`User Guide <transformed_target_regressor>`. .. versionadded:: 0.20 Parameters ---------- regressor : object, default=None Regressor object such as derived from ``RegressorMixin``. This regressor will automatically be cloned each time prior to fitting. If regressor is ``None``, ``LinearRegression()`` is created and used. transformer : object, default=None Estimator object such as derived from ``TransformerMixin``. Cannot be set at the same time as ``func`` and ``inverse_func``. If ``transformer`` is ``None`` as well as ``func`` and ``inverse_func``, the transformer will be an identity transformer. Note that the transformer will be cloned during fitting. Also, the transformer is restricting ``y`` to be a numpy array. func : function, default=None Function to apply to ``y`` before passing to ``fit``. Cannot be set at the same time as ``transformer``. The function needs to return a 2-dimensional array. If ``func`` is ``None``, the function used will be the identity function. inverse_func : function, default=None Function to apply to the prediction of the regressor. Cannot be set at the same time as ``transformer`` as well. The function needs to return a 2-dimensional array. The inverse function is used to return predictions to the same space of the original training labels. check_inverse : bool, default=True Whether to check that ``transform`` followed by ``inverse_transform`` or ``func`` followed by ``inverse_func`` leads to the original targets. Attributes ---------- regressor_ : object Fitted regressor. transformer_ : object Transformer used in ``fit`` and ``predict``. Examples -------- >>> import numpy as np >>> from sklearn.linear_model import LinearRegression >>> from sklearn.compose import TransformedTargetRegressor >>> tt = TransformedTargetRegressor(regressor=LinearRegression(), ... func=np.log, inverse_func=np.exp) >>> X = np.arange(4).reshape(-1, 1) >>> y = np.exp(2 * X).ravel() >>> tt.fit(X, y) TransformedTargetRegressor(...) >>> tt.score(X, y) 1.0 >>> tt.regressor_.coef_ array([2.]) Notes ----- Internally, the target ``y`` is always converted into a 2-dimensional array to be used by scikit-learn transformers. At the time of prediction, the output will be reshaped to a have the same number of dimensions as ``y``. See :ref:`examples/compose/plot_transformed_target.py <sphx_glr_auto_examples_compose_plot_transformed_target.py>`. """ @_deprecate_positional_args def __init__(self, regressor=None, *, transformer=None, func=None, inverse_func=None, check_inverse=True): self.regressor = regressor self.transformer = transformer self.func = func self.inverse_func = inverse_func self.check_inverse = check_inverse def _fit_transformer(self, y): """Check transformer and fit transformer. Create the default transformer, fit it and make additional inverse check on a subset (optional). """ if (self.transformer is not None and (self.func is not None or self.inverse_func is not None)): raise ValueError("'transformer' and functions 'func'/" "'inverse_func' cannot both be set.") elif self.transformer is not None: self.transformer_ = clone(self.transformer) else: if self.func is not None and self.inverse_func is None: raise ValueError("When 'func' is provided, 'inverse_func' must" " also be provided") self.transformer_ = FunctionTransformer( func=self.func, inverse_func=self.inverse_func, validate=True, check_inverse=self.check_inverse) # XXX: sample_weight is not currently passed to the # transformer. However, if transformer starts using sample_weight, the # code should be modified accordingly. At the time to consider the # sample_prop feature, it is also a good use case to be considered. self.transformer_.fit(y) if self.check_inverse: idx_selected = slice(None, None, max(1, y.shape[0] // 10)) y_sel = _safe_indexing(y, idx_selected) y_sel_t = self.transformer_.transform(y_sel) if not np.allclose(y_sel, self.transformer_.inverse_transform(y_sel_t)): warnings.warn("The provided functions or transformer are" " not strictly inverse of each other. If" " you are sure you want to proceed regardless" ", set 'check_inverse=False'", UserWarning) def fit(self, X, y, **fit_params): """Fit the model according to the given training data. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features) Training vector, where n_samples is the number of samples and n_features is the number of features. y : array-like of shape (n_samples,) Target values. **fit_params : dict Parameters passed to the ``fit`` method of the underlying regressor. Returns ------- self : object """ y = check_array(y, accept_sparse=False, force_all_finite=True, allow_nd=True, ensure_2d=False, dtype='numeric') # store the number of dimension of the target to predict an array of # similar shape at predict self._training_dim = y.ndim # transformers are designed to modify X which is 2d dimensional, we # need to modify y accordingly. if y.ndim == 1: y_2d = y.reshape(-1, 1) else: y_2d = y self._fit_transformer(y_2d) # transform y and convert back to 1d array if needed y_trans = self.transformer_.transform(y_2d) # FIXME: a FunctionTransformer can return a 1D array even when validate # is set to True. Therefore, we need to check the number of dimension # first. if y_trans.ndim == 2 and y_trans.shape[1] == 1: y_trans = y_trans.squeeze(axis=1) if self.regressor is None: from ..linear_model import LinearRegression self.regressor_ = LinearRegression() else: self.regressor_ = clone(self.regressor) self.regressor_.fit(X, y_trans, **fit_params) return self def predict(self, X): """Predict using the base regressor, applying inverse. The regressor is used to predict and the ``inverse_func`` or ``inverse_transform`` is applied before returning the prediction. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features) Samples. Returns ------- y_hat : ndarray of shape (n_samples,) Predicted values. """ check_is_fitted(self) pred = self.regressor_.predict(X) if pred.ndim == 1: pred_trans = self.transformer_.inverse_transform( pred.reshape(-1, 1)) else: pred_trans = self.transformer_.inverse_transform(pred) if (self._training_dim == 1 and pred_trans.ndim == 2 and pred_trans.shape[1] == 1): pred_trans = pred_trans.squeeze(axis=1) return pred_trans def _more_tags(self): return {'poor_score': True, 'no_validation': True} @property def n_features_in_(self): # For consistency with other estimators we raise a AttributeError so # that hasattr() returns False the estimator isn't fitted. try: check_is_fitted(self) except NotFittedError as nfe: raise AttributeError( "{} object has no n_features_in_ attribute." .format(self.__class__.__name__) ) from nfe return self.regressor_.n_features_in_
ax7.plot(interp_data[:, 6]) plt.show() #normalise data either use this or transformation scalar = preprocessing.MinMaxScaler() scalar.fit(interp_data) norm = scalar.transform(interp_data) #transformation technique, keep for only for 1 type #either normalisation or transformation from sklearn.preprocessing import FunctionTransformer from sklearn import preprocessing from scipy import stats transformer = FunctionTransformer(stats.zscore) transformer.fit(interp_data) transf = transformer.transform(interp_data) #transf = transf.reshape(-1) #norm = norm.reshape(-1) #supervised t-1 w/o unnecessary columns #if scaled, use norm, if transformed, use transf temp = shift(transf, 1, cval=np.NaN) temp = temp[:, 1:] Last_column = shift(transf[:, 6], 1, cval=np.NaN) #reform to supervised data_t1 = np.column_stack([temp, Last_column]) #train and test data + reformulation (without NAN) #if scaled, use norm, if transformed, use transf data_t1 = data_t1[1:15000] Y = transf[1:15000, 0]
from sklearn.datasets import load_breast_cancer from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score, confusion_matrix from sklearn.preprocessing import FunctionTransformer from sklearn.model_selection import train_test_split # ------------------------------------------------------------------- data = load_breast_cancer() X = data.data y = data.target # ------------------------------------------------------------------- def function1(z): return np.sqrt(z) # return np.log1p(z) # return np.power(z,4) f = FunctionTransformer(func=function1, validate=True) f.fit(X) x_f = f.transform(X) # ------------------------------------------------------------------- x_train, x_test, y_train, y_test = train_test_split(x_f, y, test_size=0.2) # ------------------------------------------------------------------- logreg = LogisticRegression(max_iter=1000) logreg.fit(x_train, y_train) result = logreg.predict(x_test) print(accuracy_score(y_test, result)) conf = confusion_matrix(y_test, result) print('confusion matrix \n', conf)
class AdvancedTransformedTargetRegressor(TransformedTargetRegressor): """Expand :class:`sklearn.compose.TransformedTargetRegressor`.""" @property def coef_(self): """numpy.ndarray: Model coefficients.""" return self.regressor_.coef_ @property def feature_importances_(self): """numpy.ndarray: Feature importances.""" return self.regressor_.feature_importances_ def fit(self, x_data, y_data, **fit_kwargs): """Expand :meth:`fit` to accept kwargs.""" (y_2d, regressor_kwargs) = self.fit_transformer_only(y_data, **fit_kwargs) # Transform y and convert back to 1d array if necessary y_trans = self.transformer_.transform(y_2d) if y_trans.ndim == 2 and y_trans.shape[1] == 1: y_trans = y_trans.squeeze(axis=1) # Perform linear regression if regressor is not given if self.regressor is None: self.regressor_ = LinearRegression() else: self.regressor_ = clone(self.regressor) # Fit regressor with kwargs self.regressor_.fit(x_data, y_trans, **regressor_kwargs) return self def fit_transformer_only(self, y_data, **fit_kwargs): """Fit only ``transformer`` step.""" y_data = check_array(y_data, accept_sparse=False, force_all_finite=True, ensure_2d=False, dtype='numeric') self._training_dim = y_data.ndim # Process kwargs (_, regressor_kwargs) = self._get_fit_params(fit_kwargs) # Transformers are designed to modify X which is 2D, modify y_data # FIXME: Transformer does NOT use transformer_kwargs if y_data.ndim == 1: y_2d = y_data.reshape(-1, 1) else: y_2d = y_data self._fit_transformer(y_2d) return (y_2d, regressor_kwargs) def predict(self, x_data, always_return_1d=True, **predict_kwargs): """Expand :meth:`predict()` to accept kwargs.""" check_is_fitted(self) if not hasattr(self, 'regressor_'): raise NotFittedError( f"Regressor of {self.__class__} is not fitted yet, call fit() " f"first") # Kwargs for returning variance or covariance if ('return_std' in predict_kwargs and 'return_std' in getfullargspec( self.regressor_.predict).args): raise NotImplementedError( f"Using keyword argument 'return_std' for final regressor " f"{self.regressor_.__class__} is not supported yet, only " f"'return_var' is allowed. Expand the regressor to accept " f"'return_var' instead (see 'esmvaltool/diag_scripts/mlr" f"/models/gpr_sklearn.py' for an example)") mlr.check_predict_kwargs(predict_kwargs) return_var = predict_kwargs.get('return_var', False) return_cov = predict_kwargs.get('return_cov', False) # Prediction prediction = self.regressor_.predict(x_data, **predict_kwargs) if return_var or return_cov: pred = prediction[0] else: pred = prediction if pred.ndim == 1: pred_trans = self.transformer_.inverse_transform( pred.reshape(-1, 1)) else: pred_trans = self.transformer_.inverse_transform(pred) if self._to_be_squeezed(pred_trans, always_return_1d=always_return_1d): pred_trans = pred_trans.squeeze(axis=1) if not (return_var or return_cov): return pred_trans # Return scaled variance or covariance if desired err = prediction[1] if not hasattr(self.transformer_, 'scale_'): raise NotImplementedError( f"Transforming of additional prediction output (e.g. by " f"'return_var' or 'return_cov') is not supported for " f"transformer {self.transformer_.__class__} yet, the " f"necessary attribute 'scale_' is missing") scale = self.transformer_.scale_ if scale is not None: err *= scale**2 if self._to_be_squeezed(err, always_return_1d=always_return_1d): err = err.squeeze(axis=1) return (pred_trans, err) def _get_fit_params(self, fit_kwargs): """Separate ``transformer`` and ``regressor`` kwargs.""" steps = [ ('transformer', self.transformer), ('regressor', self.regressor), ] fit_params = _get_fit_parameters(fit_kwargs, steps, self.__class__) fit_params.setdefault('transformer', {}) fit_params.setdefault('regressor', {}) # FIXME if fit_params['transformer']: raise NotImplementedError( f"Fit parameters {fit_params['transformer']} for transformer " f"{self.transformer.__class__} of {self.__class__} are not " f"supported at the moment") return (fit_params['transformer'], fit_params['regressor']) def _fit_transformer(self, y_data): """Check transformer and fit transformer.""" if (self.transformer is not None and (self.func is not None or self.inverse_func is not None)): raise ValueError("'transformer' and functions 'func'/" "'inverse_func' cannot both be set.") if self.transformer is not None: self.transformer_ = clone(self.transformer) else: if self.func is not None and self.inverse_func is None: raise ValueError( "When 'func' is provided, 'inverse_func' must also be " "provided") self.transformer_ = FunctionTransformer( func=self.func, inverse_func=self.inverse_func, validate=True, check_inverse=self.check_inverse) self.transformer_.fit(y_data) if self.check_inverse: idx_selected = slice(None, None, max(1, y_data.shape[0] // 10)) y_sel = _safe_indexing(y_data, idx_selected) y_sel_t = self.transformer_.transform(y_sel) if not np.allclose(y_sel, self.transformer_.inverse_transform(y_sel_t)): warnings.warn( "The provided functions or transformer are " "not strictly inverse of each other. If " "you are sure you want to proceed regardless, " "set 'check_inverse=False'", UserWarning) def _to_be_squeezed(self, array, always_return_1d=True): """Check if ``array`` should be squeezed or not.""" squeeze = array.ndim == 2 and array.shape[1] == 1 if not always_return_1d: squeeze = squeeze and self._training_dim == 1 return squeeze
class KCenterGreedy(QueryStrategy): """K-Center-Greedy This class implements K-Center-Greedy active learning algorithm [1]_. Parameters ---------- transformer: :py:class:`An sklearn estimator supporting transform and/or fit_transform` object instance The base model used for training. References ---------- .. [1] Core-Set... """ def __init__(self, *args, **kwargs): super(KCenterGreedy, self).__init__(*args, **kwargs) self.transformer = kwargs.pop('transformer', None) if self.transformer is None: self.transformer = FunctionTransformer() if not hasattr(self.transformer, "transform"): raise TypeError( "transformer has method: .transform()" ) # initialize the transformer on labeled pool # Poy: We don't need it. # self.transformer.fit(self.dataset.X) def make_query(self, n=1): """Return the index of the sample to be queried and labeled and selection score of each sample. Read-only. No modification to the internal states. Returns ------- ask_ids : list The batch of indexes of the next unlabeled samples to be queried and labeled. """ dataset = self.dataset # Train CNNs (models) from scratch (retrain) after each iteration [1]_. X_lbl_curr, y_lbl_curr = dataset.get_labeled_entries() idx_lbl_mask = dataset.get_labeled_mask() X = dataset._X self.transformer.fit(X_lbl_curr, y_lbl_curr) embed = self.transformer.transform(X) # Reference. KH Huang # https://github.com/ariapoy/deep-active-learning/blob/master/query_strategies/kcenter_greedy.py#L15 # embed_label = embed[idx_lbl_mask] # embed_unlabel = embed[~idx_lbl_mask] # dist_mat = cdist(embed_unlabel, embed_label, metric="euclidean") dist_mat = cdist(embed, embed, metric="euclidean") dist_mat_ublxlbl = dist_mat[~idx_lbl_mask, :][:, idx_lbl_mask] # scores: min_{j \in s}, (s: label pool) res = [] for b in range(n): scores = np.min(dist_mat_ublxlbl, axis=1) ask_id_pos = np.argmax(scores) unlabeled_entry_ids, _ = dataset.get_unlabeled_entries() ask_id = unlabeled_entry_ids[ask_id_pos] res.append(ask_id) # update dist_mat_ublxlbl # solve ckp2 if idx_lbl_mask[ask_id] != True: idx_lbl_mask[ask_id] = True else: print("ind {0} in already selected".format(ask_id)) continue dist_mat_ublxlbl = np.delete(dist_mat_ublxlbl, ask_id_pos, 0) dist_mat_ublxlbl = np.append(dist_mat_ublxlbl, dist_mat[~idx_lbl_mask, ask_id][:, None], axis=1) return res
def empty_transformer(): transformer_ = FunctionTransformer(validate=True) X = np.random.uniform(20, 30, (1000, 10)) transformer_.fit(X) return transformer_
import warnings import tensorflow as tf warnings.filterwarnings("ignore", category=DeprecationWarning) seed = 444 np.random.seed(seed) tf.set_random_seed(seed) df = pd.read_csv("forestfires.csv", index_col=None) features = ['temp', 'RH', 'wind', 'rain'] Y = df['area'].values transformer = FunctionTransformer(np.log1p, inverse_func=np.expm1) transformer.fit(Y) unscaled_Y = Y Y = transformer.transform(Y)[0] X = df[features] input_size = len(features) def nn1_model(): model = Sequential() model.add(Dense(units=8, input_dim=input_size, activation='relu')) model.add(Dense(units=4, activation='relu')) model.add(Dense(units=1)) model.compile(loss='mean_absolute_error', optimizer='adam') return model
def log_transformer(): transformer_ = FunctionTransformer(np.log, np.exp, validate=True) X = np.random.uniform(20, 30, (1000, 10)) transformer_.fit(X) return transformer_
def custom_transformer(): transformer_ = FunctionTransformer(np.square, np.sqrt, validate=True) transformer_.func_inC = 'pow({x}, 2)' X = np.random.uniform(20, 30, (1000, 10)) transformer_.fit(X) return transformer_
class NNPredictorNumerical: def __init__(self, numerical_features, data, **kwargs): self.numerical_features = numerical_features self.scaler = StandardScaler() self.target_scaler = FunctionTransformer(func=np.log1p,inverse_func=np.expm1()) self.model = None self.data = data self.inputs = [] self.build_full_network(**kwargs) def build_full_network(self, optimizer=SGD(lr=0.001)): # Create the categorical embeddings first: input_num = Input(shape=(len(self.numerical_features),)) dense_num = Dense(256, activation="relu")(input_num) m = Dropout(rate=0.2)(dense_num) dense_num = Dense(128, activation="relu")(m) m = Dropout(rate=0.2)(dense_num) dense_num = Dense(64, activation="relu")(m) m = Dense(16, activation="relu")(dense_num) m = Dropout(rate=0.2)(m) m = Dense(8, activation="relu")(m) m = Dropout(rate=0.2)(m) m = Dense(4, activation="relu")(m) output = Dense(1, activation="linear")(m) model = Model(input_num, output) model.compile(loss="mae", optimizer=optimizer) self.model = model def fit(self, x, y, **kwargs): y = y.reshape(-1, 1) self.target_scaler.fit(y) y = self.target_scaler.transform(y) self.model.fit(x, y, **kwargs) def predict(self, x, **kwargs): y = self.model.predict(x, **kwargs) y = self.target_scaler.inverse_transform(y) return y def preprocess_data(self, X_train, X_val, X_test): input_list_train = [] input_list_val = [] input_list_test = [] for c in self.numerical_features: mu = np.nanmean(X_train[c]) X_train[c] = X_train[c].fillna(mu) X_test[c] = X_test[c].fillna(mu) X_val[c] = X_val[c].fillna(mu) # Fit scaler self.scaler.fit(X_train[self.numerical_features]) X_train[self.numerical_features] = self.scaler.transform( X_train[self.numerical_features] ) X_test[self.numerical_features] = self.scaler.transform( X_test[self.numerical_features] ) X_val[self.numerical_features] = self.scaler.transform( X_val[self.numerical_features] ) input_list_train.append(X_train[self.numerical_features].values) input_list_val.append(X_val[self.numerical_features].values) input_list_test.append(X_test[self.numerical_features].values) return input_list_train, input_list_val, input_list_test
import numpy as np from sklearn.preprocessing import FunctionTransformer X = [[4, 1, 2, 2], [1, 3, 9, 3], [5, 7, 5, 1]] def function1(z): return np.sqrt(z) FT = FunctionTransformer(func=function1) FT.fit(X) newdata = FT.transform(X) newdata