def _fit_single_estimator(estimator, X, y, sample_weight=None, message_clsname=None, message=None): """Private function used to fit an estimator within a job.""" if sample_weight is not None: try: with _print_elapsed_time(message_clsname, message): estimator.fit(X, y, sample_weight=sample_weight) except TypeError as exc: if "unexpected keyword argument 'sample_weight'" in str(exc): raise TypeError( "Underlying estimator {} does not support sample weights.". format(estimator.__class__.__name__)) from exc raise elif message_clsname == "catboost": kwargs = {} kwargs['X'] = X kwargs['y'] = y kwargs['cat_features'] = getCatgoricalFeatures(X_validation) kwargs['eval_set'] = (X_validation, y_validation) estimator.fit(**kwargs) else: with _print_elapsed_time(message_clsname, message): estimator.fit(X, y) return estimator
def fit(self, X: np.ndarray, y: np.ndarray = None, logger: ProcessLogger = None, prefix: str = None, **fit_params: Dict): if self.configuration is None and self.cfg_cache is None: raise ValueError( 'Pipeline is not configured yet. Either call set_hyperparameters or provide a ConfigGenerator' ) fit_params_steps = self._check_fit_params(**fit_params) Xt = self._fit(X, y, logger=logger, prefix=prefix, **fit_params_steps) with _print_elapsed_time("Pipeline", self._log_message(len(self.steps) - 1)): if self._final_estimator != "passthrough": # Configure estimator on the fly if necessary if self.configuration is None: config = self._get_config_for_step( len(self.steps) - 1, prefix, self.steps[-1][0], logger) self._final_estimator.set_hyperparameters( configuration=config.get_dictionary()) fit_params_last_step = fit_params_steps[self.steps[-1][0]] self._final_estimator.fit(Xt, y, **fit_params_last_step) return self
def _fit_transform_one(transformer, X_train, y_train, X_valid=None, y_valid=None, X_test=None, y_test=None, resource_manager=None, message_clsname='', message=None): """ Fits ``transformer`` to ``X`` and ``y``. The transformed result is returned with the fitted transformer. If ``weight`` is not ``None``, the result will be multiplied by ``weight``. """ transformer.resource_manager = resource_manager with _print_elapsed_time(message_clsname, message): if hasattr(transformer, 'fit_transform'): result = transformer.fit_transform(X_train, y_train, X_valid, y_valid, X_test, y_test) else: result = transformer.fit(X_train, y_train, X_valid, y_valid, X_test, y_test). \ transform(X_train, X_valid, X_test, y_train) transformer.resource_manager = None return result, transformer
def fit_predict(self, X, y=None, **fit_params): """Applies fit_predict of last step in pipeline after transforms. Applies fit_transforms of a pipeline to the data, followed by the fit_predict method of the final estimator in the pipeline. Valid only if the final estimator implements fit_predict. Parameters ---------- X : iterable Training data. Must fulfill input requirements of first step of the pipeline. y : iterable, default=None Training targets. Must fulfill label requirements for all steps of the pipeline. **fit_params : dict of string -> object Parameters passed to the ``fit`` method of each step, where each parameter name is prefixed such that parameter ``p`` for step ``s`` has key ``s__p``. Returns ------- y_pred : array-like """ fit_params_steps = self._check_fit_params(**fit_params) Xt, yt = self._fit(X, y, **fit_params_steps) fit_params_last_step = fit_params_steps[self.steps[-1][0]] with _print_elapsed_time('Pipeline', self._log_message(len(self.steps) - 1)): y_pred = self.steps[-1][-1].fit_predict(Xt, yt, **fit_params_last_step) return y_pred
def fit_transform(self, X, y=None, **fit_params): """Fit the model and transform with the final estimator Fits all the transforms one after the other and transforms the data, then uses fit_transform on transformed data with the final estimator. Parameters ---------- X : iterable Training data. Must fulfill input requirements of first step of the pipeline. y : iterable, default=None Training targets. Must fulfill label requirements for all steps of the pipeline. **fit_params : dict of string -> object Parameters passed to the ``fit`` method of each step, where each parameter name is prefixed such that parameter ``p`` for step ``s`` has key ``s__p``. Returns ------- Xt : array-like, shape = [n_samples, n_transformed_features] Transformed samples """ last_step = self._final_estimator Xt, yt, fit_params = self._fit(X, y, **fit_params) with _print_elapsed_time('Pipeline', self._log_message(len(self.steps) - 1)): if last_step == 'passthrough': return Xt, yt if hasattr(last_step, 'fit_transform'): return _wrap_result( last_step.fit_transform(Xt, yt, **fit_params), yt) else: return _wrap_result( last_step.fit(Xt, yt, **fit_params).transform(Xt), yt)
def _fit_transform_one(transformer, X, y, weight, message_clsname='', message=None, **fit_params): """ Fits ``transformer`` to ``X`` and ``y``. The transformed result is returned with the fitted transformer. If ``weight`` is not ``None``, the result will be multiplied by ``weight``. """ with _print_elapsed_time(message_clsname, message): if hasattr(transformer, 'fit_transform'): try: Xt, yt = transformer.fit_transform(X, y, **fit_params) except Exception: Xt = transformer.fit_transform(X, y, **fit_params) yt = y else: try: Xt, yt = transformer.fit(X, y, **fit_params).transform(X, y) except Exception: Xt = transformer.fit(X, y, **fit_params).transform(X) yt = y if weight is None: return Xt, yt, transformer return Xt * weight, yt, transformer
def fit(self, X, y=None, **fit_params): """Fit the model. Fit all the transforms/samplers one after the other and transform/sample the data, then fit the transformed/sampled data using the final estimator. Parameters ---------- X : iterable Training data. Must fulfill input requirements of first step of the pipeline. y : iterable, default=None Training targets. Must fulfill label requirements for all steps of the pipeline. **fit_params : dict of str -> object Parameters passed to the ``fit`` method of each step, where each parameter name is prefixed such that parameter ``p`` for step ``s`` has key ``s__p``. Returns ------- self : Pipeline This estimator. """ fit_params_steps = self._check_fit_params(**fit_params) Xt, yt = self._fit(X, y, **fit_params_steps) with _print_elapsed_time("Pipeline", self._log_message(len(self.steps) - 1)): if self._final_estimator != "passthrough": fit_params_last_step = fit_params_steps[self.steps[-1][0]] self._final_estimator.fit(Xt, yt, **fit_params_last_step) return self
def _fit_transform_one(transformer, X, y, weight, message_clsname='', message=None, **fit_params): """ Fits ``transformer`` to ``X`` and ``y``. The transformed result is returned with the fitted transformer. If ``weight`` is not ``None``, the result will be multiplied by ``weight``. """ with _print_elapsed_time(message_clsname, message): if hasattr(transformer, 'fit_transform'): res = transformer.fit_transform(X, y, **fit_params) else: res = transformer.fit(X, y, **fit_params).transform(X) if (isinstance(transformer, XAndYTransformer) or isinstance(transformer, XOrYTransformer)) and weight is None: return *res, transformer elif (isinstance(transformer, XAndYTransformer) or isinstance(transformer, XOrYTransformer)) and weight is not None: res = res * weight return *res, transformer elif weight is None: return res, transformer else: return res * weight, transformer
def fit(self, X, y=None, **fit_params): """ Fits the model, fits all the transforms one after the other and transform the data, then fit the transformed data using the final estimator. :param X: iterable Training data. Must fulfill input requirements of first step of the pipeline. :param y: iterable, default=None Training targets. Must fulfill label requirements for all steps of the pipeline. :param fit_params: dict of string -> object Parameters passed to the ``fit`` method of each step, where each parameter name is prefixed such that parameter ``p`` for step ``s`` has key ``s__p``. :return: self, Pipeline, this estimator """ fit_params_steps = self._check_fit_params(**fit_params) Xt = self._fit(X, y, **fit_params_steps) with _print_elapsed_time('OnnxPipeline', self._log_message(len(self.steps) - 1)): if self._final_estimator != 'passthrough': fit_params_last_step = fit_params_steps[self.steps[-1][0]] self._final_estimator.fit(Xt, y, **fit_params_last_step) return self
def _fit_single_estimator(estimator, X, y, sample_weight=None, message_clsname=None, message=None): """Private function used to fit an estimator within a job.""" if sample_weight is not None: try: with _print_elapsed_time(message_clsname, message): estimator.fit(X, y, sample_weight=sample_weight) except TypeError as exc: if "unexpected keyword argument 'sample_weight'" in str(exc): raise TypeError( "Underlying estimator {} does not support sample weights." .format(estimator.__class__.__name__) ) from exc raise else: with _print_elapsed_time(message_clsname, message): estimator.fit(X, y) return estimator
def _fit_resample_one(sampler, X, y, message_clsname="", message=None, **fit_params): with _print_elapsed_time(message_clsname, message): X_res, y_res = sampler.fit_resample(X, y, **fit_params) return X_res, y_res, sampler
def _fit_one(transformer, X, y, weight, message_clsname='', message=None, **fit_params): """ Fits ``transformer`` to ``X`` and ``y``. """ with _print_elapsed_time(message_clsname, message): return transformer.fit(X, y, **fit_params)
def _fit(self, X, y=None, **fit_params_steps): # shallow copy of steps - this should really be steps_ if hasattr(self, 'raw_steps_') and self.raw_steps_ is not None: # pylint: disable=E0203 # Let's reuse the previous training. self.steps = list(self.raw_steps_) # pylint: disable=E0203 self.raw_steps_ = list(self.raw_steps_) else: self.steps = list(self.steps) self.raw_steps_ = list(self.steps) self._validate_steps() # Setup the memory memory = check_memory(self.memory) fit_transform_one_cached = memory.cache(_fit_transform_one) for (step_idx, name, transformer) in self._iter(with_final=False, filter_passthrough=False): if (transformer is None or transformer == 'passthrough'): with _print_elapsed_time('Pipeline', self._log_message(step_idx)): continue if hasattr(memory, 'location'): # joblib >= 0.12 if memory.location is None: # we do not clone when caching is disabled to # preserve backward compatibility cloned_transformer = transformer else: cloned_transformer = clone(transformer) else: cloned_transformer = clone(transformer) # Fit or load from cache the current transformer x_train = X X, fitted_transformer = fit_transform_one_cached( cloned_transformer, X, y, None, message_clsname='Pipeline', message=self._log_message(step_idx), **fit_params_steps[name]) # Replace the transformer of the step with the fitted # transformer. This is necessary when loading the transformer # from the cache. self.raw_steps_[step_idx] = (name, fitted_transformer) self.steps[step_idx] = (name, self._to_onnx(name, fitted_transformer, x_train)) return X
def partial_fit(self, X, y=None, classes=None, **fit_params): """Fit the model. Fit all the transforms/samplers one after the other and transform/sample the data, then fit the transformed/sampled data using the final estimator. Parameters ---------- X : iterable Training data. Must fulfill input requirements of first step of the pipeline. y : iterable, default=None Training targets. Must fulfill label requirements for all steps of the pipeline. **fit_params : dict of str -> object Parameters passed to the ``fit`` method of each step, where each parameter name is prefixed such that parameter ``p`` for step ``s`` has key ``s__p``. Returns ------- self : Pipeline This estimator. """ try: self.Xt_ except: self.Xt_ = None self.yt_ = None if self.Xt_ is None or self.yt_ is None: Xt, yt, _ = self._fit(X, y) self.Xt_ = Xt self.yt_ = yt else: Xt = self.Xt_ yt = self.yt_ with _print_elapsed_time("Pipeline", self._log_message(len(self.steps) - 1)): if self._final_estimator != "passthrough": # the try...except block is a workaround until tune-sklearn updates try: self._final_estimator.partial_fit(Xt, yt, classes=classes, **fit_params) except TypeError: self._final_estimator.partial_fit(Xt, yt, **fit_params) self._carry_over_final_estimator_fit_vars() return self
def _fit(self, X, y=None, **fit_params_steps): self.steps = list(self.steps) self._validate_steps() # Setup the memory memory = check_memory(self.memory) fit_transform_one_cached = memory.cache(pipeline._fit_transform_one) fit_resample_one_cached = memory.cache(_fit_resample_one) for (step_idx, name, transformer) in self._iter(with_final=False, filter_passthrough=False, filter_resample=False): if transformer is None or transformer == "passthrough": with _print_elapsed_time("Pipeline", self._log_message(step_idx)): continue try: # joblib >= 0.12 mem = memory.location except AttributeError: mem = memory.cachedir finally: cloned_transformer = clone(transformer) if mem else transformer # Fit or load from cache the current transformer if hasattr(cloned_transformer, "transform") or hasattr( cloned_transformer, "fit_transform"): X, fitted_transformer = fit_transform_one_cached( cloned_transformer, X, y, None, message_clsname="Pipeline", message=self._log_message(step_idx), **fit_params_steps[name], ) elif hasattr(cloned_transformer, "fit_resample"): X, y, fitted_transformer = fit_resample_one_cached( cloned_transformer, X, y, message_clsname="Pipeline", message=self._log_message(step_idx), **fit_params_steps[name], ) # Replace the transformer of the step with the fitted # transformer. This is necessary when loading the transformer # from the cache. self.steps[step_idx] = (name, fitted_transformer) return X, y
def _fit(self, X, y=None, **fit_params): self.steps = list(self.steps) self._validate_steps() fit_params_steps = self._get_fit_params_steps(fit_params) if not MLCache.has_cache(self.cache_name): self.cache_ = MLCache.create_cache(self.cache_name) else: self.cache_ = MLCache.get_cache(self.cache_name) Xt = X for (step_idx, name, transformer) in self._iter(with_final=False, filter_passthrough=False): if (transformer is None or transformer == 'passthrough'): with _print_elapsed_time('Pipeline', self._log_message(step_idx)): continue params = transformer.get_params() params['__class__'] = transformer.__class__.__name__ params['X'] = Xt if ((hasattr(transformer, 'is_classifier') and transformer.is_classifier()) or (hasattr(transformer, 'is_regressor') and transformer.is_regressor())): params['y'] = y cached = self.cache_.get(params) if cached is None: cloned_transformer = clone(transformer) Xt, fitted_transformer = _fit_transform_one( cloned_transformer, Xt, y, None, message_clsname='PipelineCache', message=self._log_message(step_idx), **fit_params_steps[name]) self.cache_.cache(params, fitted_transformer) else: fitted_transformer = cached Xt = fitted_transformer.transform(Xt) self.steps[step_idx] = (name, fitted_transformer) if isskl023(): return Xt if self._final_estimator == 'passthrough': return Xt, {} return Xt, fit_params_steps[self.steps[-1][0]]
def fit_resample(self, X, y=None, sample_weight=None, **fit_params): """Fit the model and sample with the final estimator. Fits all the transformers/samplers one after the other and transform/sample the data, then uses fit_resample on transformed data with the final estimator. Parameters ---------- X : iterable Training data. Must fulfill input requirements of first step of the pipeline. y : iterable, default=None Training targets. Must fulfill label requirements for all steps of the pipeline. **fit_params : dict of string -> object Parameters passed to the ``fit`` method of each step, where each parameter name is prefixed such that parameter ``p`` for step ``s`` has key ``s__p``. Returns ------- Xt : array-like of shape (n_samples, n_transformed_features) Transformed samples. yt : array-like of shape (n_samples, n_transformed_features) Transformed target. """ fit_params_steps = self._check_fit_params(**fit_params) if sample_weight is None: Xt, yt = self._fit(X, y, **fit_params_steps) else: Xt, yt, sample_weight = self._fit(X, y, sample_weight=sample_weight, **fit_params_steps) last_step = self._final_estimator with _print_elapsed_time("Pipeline", self._log_message(len(self.steps) - 1)): if last_step == "passthrough": return Xt fit_params_last_step = fit_params_steps[self.steps[-1][0]] if hasattr(last_step, "fit_resample"): return last_step.fit_resample(Xt, yt, **fit_params_last_step)
def _fit_transform_one(transformer, X, y, weight, message_clsname='', message=None, **fit_params): with _print_elapsed_time(message_clsname, message): if hasattr(transformer, "fit_transform"): res = transformer.fit_transform(X, y, **fit_params) else: res = transformer.fit(X, y, **fit_params).transform(X) # if we have a weight for this transformer, multiply output if weight is None: return res, transformer return res * weight, transformer
def _fit(self, X, y=None, fit_index=None, **fit_params_steps): # shallow copy of steps - this should really be steps_ self.steps = list(self.steps) self._validate_steps() # For each transformer for (step_idx, name, transformer) in self._iter(with_final=False, filter_passthrough=False): with _print_elapsed_time('Pipeline', self._log_message(step_idx)): # Skip if passthrough if (transformer is None or transformer == 'passthrough'): continue # Clone transformer cloned_transformer = clone(transformer) # Get the correct fit_transform params fit_trans_params =\ _get_est_fit_params( estimator=cloned_transformer, mapping=self.mapping_, fit_index=fit_index, other_params=fit_params_steps[name], copy_mapping=False) # Fit transform the current transformer X = cloned_transformer.fit_transform(X=X, y=y, **fit_trans_params) # Print if an estimator is skipped, if verbose if cloned_transformer.estimator_ is None: if self.verbose: print('Skipping Step:', name, 'due to empty scope.', flush=True) # Replace the transformer of the step with the # cloned and now fitted transformer self.steps[step_idx] = (name, cloned_transformer) return X
def _fit(self, X: np.ndarray, y: np.ndarray = None, logger: ProcessLogger = None, prefix: str = None, **fit_params_steps: Dict): # shallow copy of steps - this should really be steps_ self.steps = list(self.steps) self._validate_steps() Xt = X for (step_idx, name, transformer) in self._iter(with_final=False, filter_passthrough=False): if transformer is None or transformer == 'passthrough': with _print_elapsed_time('Pipeline', self._log_message(step_idx)): continue cloned_transformer = clone(transformer) # Configure transformer on the fly if necessary if self.configuration is None: config: Configuration = self._get_config_for_step( step_idx, prefix, name, logger) cloned_transformer.set_hyperparameters( configuration=config.get_dictionary()) start = timeit.default_timer() Xt, fitted_transformer = _fit_transform_one( cloned_transformer, Xt, y, None, message_clsname='Pipeline', message=self._log_message(step_idx), **fit_params_steps[name]) self.fit_time += timeit.default_timer() - start # Replace the transformer of the step with the fitted # transformer. This is necessary when loading the transformer # from the cache. self.steps[step_idx] = (name, fitted_transformer) return Xt
def fit(self, X_train, y_train, X_valid=None, y_valid=None, X_test=None, y_test=None): ret = self._fit(X_train, y_train, X_valid, y_valid, X_test, y_test) X_train = ret["X_train"] X_valid = ret.get("X_valid") X_test = ret.get("X_test") y_train = ret.get("y_train") self.last_data = ret with _print_elapsed_time('Pipeline', self._log_message(len(self.steps) - 1)): if self._final_estimator != 'passthrough': self._final_estimator.fit(X_train, y_train, X_valid, y_valid, X_test, y_test) return self
def _fit_resample_one(sampler, X, y, sample_weight=None, message_clsname="", message=None, **fit_params): with _print_elapsed_time(message_clsname, message): out = sampler.fit_resample(X, y, sample_weight=sample_weight, **fit_params) if sample_weight is None: (X_res, y_res) = out return X_res, y_res, sampler else: (X_res, y_res, sample_weight_res) = out return X_res, y_res, sample_weight_res, sampler
def fit(self, X_train, y_train, X_valid=None, y_valid=None, X_test=None, y_test=None, intermediate_result=None): result = self._fit(X_train, y_train, X_valid, y_valid, X_test, y_test, intermediate_result) X_train = result["X_train"] X_valid = result.get("X_valid") X_test = result.get("X_test") y_train = result.get("y_train") self.last_data = result with _print_elapsed_time('Pipeline', self._log_message(len(self.steps) - 1)): self._final_estimator.resource_manager = self.resource_manager self._final_estimator.fit(X_train, y_train, X_valid, y_valid, X_test, y_test) self._final_estimator.resource_manager = None return self
def _fit_transform_one(transformer, X, y, weight, message_clsname='', message=None, **fit_params): """ Fits ``transformer`` to ``X`` and ``y``. The transformed result is returned with the fitted transformer. If ``weight`` is not ``None``, the result will be multiplied by ``weight``. """ with _print_elapsed_time(message_clsname, message): if hasattr(transformer, 'fit_transform'): res = transformer.fit_transform(X, y, **fit_params) else: res = transformer.fit(X, y, **fit_params).transform(X) X, y = _wrap_result(res, y) if weight is None: return X, y, transformer return X * weight, y, transformer
def _fit(self, X, y=None, **fit_params_steps): self.steps = list(self.steps) self._validate_steps() # Setup the memory memory = check_memory(self.memory) fit_transform_one_cached = memory.cache(skpipeline._fit_transform_one) conf_score = None for (step_idx, name, transformer) in self._iter(with_final=False, filter_passthrough=False): if transformer is None or transformer == 'passthrough': with _print_elapsed_time('Pipeline', self._log_message(step_idx)): continue if hasattr(memory, 'location'): # joblib >= 0.12 if memory.location is None: # we do not clone when caching is disabled to # preserve backward compatibility cloned_transformer = transformer else: cloned_transformer = clone(transformer) elif hasattr(memory, 'cachedir'): # joblib < 0.11 if memory.cachedir is None: # we do not clone when caching is disabled to # preserve backward compatibility cloned_transformer = transformer else: cloned_transformer = clone(transformer) else: cloned_transformer = clone(transformer) # Fit or load from cache the current transformer if hasattr(cloned_transformer, "transform") or hasattr( cloned_transformer, "fit_transform" ): res, fitted_transformer = fit_transform_one_cached( cloned_transformer, X, y, None, message_clsname='Pipeline', message=self._log_message(step_idx), **fit_params_steps[name] ) # This ugly if/else can be removed if Transformers return # additional values (i.e. `conf_score`) in dict. Can be # appended to `fit_params_steps` dict. if type(res) == tuple: if len(res) == 3: X, y, conf_score = res elif len(res) == 2: X, y = res else: X = res # Replace the transformer of the step with the fitted # transformer. This is necessary when loading the transformer # from the cache. self.steps[step_idx] = (name, fitted_transformer) return X, y, conf_score
def fit(self, X, y=None, mapping=None, fit_index=None, **fit_params): if isinstance(X, pd.DataFrame): # Set train data index fit_index = X.index # Cast to np array X = np.array(X) if isinstance(y, (pd.DataFrame, pd.Series)): # Cast to np array y = np.array(y) if self.cache_loc is not None: # Compute the hash for this fit # Store as an attribute self.hash_ = pipe_hash([X, y, mapping, fit_index, fit_params], self.steps) # Check if hash exists - if it does load if os.path.exists(self._get_hash_loc()): self._load_from_hash() # end / return! return self # Otherwise, continue to fit as normal # Set internal mapping as either passed mapping or # initialize a new 1:1 mapping. if mapping is not None: self.mapping_ = mapping.copy() else: self.mapping_ = {i: i for i in range(X.shape[1])} # The base parent fit # ------------------- # Get fit params as indexed by each step fit_params_steps = self._check_fit_params(**fit_params) # Fit and transform X for all but the last step. Xt = self._fit(X, y, fit_index=fit_index, **fit_params_steps) # Fit the final step with _print_elapsed_time('Pipeline', self._log_message(len(self.steps) - 1)): if self._final_estimator != 'passthrough': # Get last params fit params fit_params_last_step = fit_params_steps[self.steps[-1][0]] # Add mapping and train data index if valid fit_params_last_step =\ _get_est_fit_params(self._final_estimator, mapping=self.mapping_, fit_index=fit_index, other_params=fit_params_last_step, copy_mapping=False) # Fit the final estimator self._final_estimator.fit(Xt, y, **fit_params_last_step) # If cache fit enabled, hash fitted pipe here if self.cache_loc is not None: self._hash_fit() return self
def test_print_elapsed_time(message, expected, capsys, monkeypatch): monkeypatch.setattr(timeit, 'default_timer', lambda: 0) with _print_elapsed_time('ABC', message): monkeypatch.setattr(timeit, 'default_timer', lambda: 0.1) assert capsys.readouterr().out == expected
def _fit(self, X, y=None, **fit_params): self.steps = list(self.steps) self._validate_steps() # Setup the memory memory = check_memory(self.memory) fit_transform_one_cached = memory.cache(pipeline._fit_transform_one) fit_resample_one_cached = memory.cache(_fit_resample_one) fit_params_steps = { name: {} for name, step in self.steps if step is not None } for pname, pval in fit_params.items(): if '__' not in pname: raise ValueError( "Pipeline.fit does not accept the {} parameter. " "You can pass parameters to specific steps of your " "pipeline using the stepname__parameter format, e.g. " "`Pipeline.fit(X, y, logisticregression__sample_weight" "=sample_weight)`.".format(pname)) step, param = pname.split("__", 1) fit_params_steps[step][param] = pval for (step_idx, name, transformer) in self._iter(with_final=False, filter_passthrough=False, filter_resample=False): if (transformer is None or transformer == 'passthrough'): with _print_elapsed_time('Pipeline', self._log_message(step_idx)): continue if hasattr(memory, "location"): # joblib >= 0.12 if memory.location is None: # we do not clone when caching is disabled to # preserve backward compatibility cloned_transformer = transformer else: cloned_transformer = clone(transformer) elif hasattr(memory, "cachedir"): # joblib <= 0.11 if memory.cachedir is None: # we do not clone when caching is disabled to # preserve backward compatibility cloned_transformer = transformer else: cloned_transformer = clone(transformer) # Fit or load from cache the current transfomer if hasattr(cloned_transformer, "transform") or hasattr( cloned_transformer, "fit_transform"): X, fitted_transformer = fit_transform_one_cached( cloned_transformer, X, y, None, message_clsname='Pipeline', message=self._log_message(step_idx), **fit_params_steps[name]) elif hasattr(cloned_transformer, "fit_resample"): X, y, fitted_transformer = fit_resample_one_cached( cloned_transformer, X, y, message_clsname='Pipeline', message=self._log_message(step_idx), **fit_params_steps[name]) # Replace the transformer of the step with the fitted # transformer. This is necessary when loading the transformer # from the cache. self.steps[step_idx] = (name, fitted_transformer) if self._final_estimator == "passthrough": return X, y, {} return X, y, fit_params_steps[self.steps[-1][0]]
def _fit(self, X, y=None, **fit_params_steps): # shallow copy of steps - this should really be steps_ self.steps = list(self.steps) self._validate_steps() # Setup the memory memory = check_memory(self.memory) fit_transform_one_cached = memory.cache(_fit_transform_one) for (step_idx, name, transformer) in self._iter(with_final=False, filter_passthrough=False): if (transformer is None or transformer == 'passthrough'): with _print_elapsed_time('Pipeline', self._log_message(step_idx)): continue if hasattr(memory, 'location'): # joblib >= 0.12 if memory.location is None: # we do not clone when caching is disabled to # preserve backward compatibility cloned_transformer = transformer else: cloned_transformer = clone(transformer) elif hasattr(memory, 'cachedir'): # joblib < 0.11 if memory.cachedir is None: # we do not clone when caching is disabled to # preserve backward compatibility cloned_transformer = transformer else: cloned_transformer = clone(transformer) else: cloned_transformer = clone(transformer) if not self._skip_transform(cloned_transformer): # if cloned_transformer. if isinstance(cloned_transformer, YTransformer): y, fitted_transformer = fit_transform_one_cached( cloned_transformer, y, X, None, message_clsname='Pipeline', message=self._log_message(step_idx), **fit_params_steps[name]) elif isinstance(cloned_transformer, XAndYTransformer) \ or isinstance(cloned_transformer, XOrYTransformer): X, y, fitted_transformer = fit_transform_one_cached( cloned_transformer, X, y, None, message_clsname='Pipeline', message=self._log_message(step_idx), **fit_params_steps[name]) else: # Fit or load from cache the current transformer X, fitted_transformer = fit_transform_one_cached( cloned_transformer, X, y, None, message_clsname='Pipeline', message=self._log_message(step_idx), **fit_params_steps[name]) else: # do nothing if it is not trainmode and the trainonly wrapper set (true) fitted_transformer = cloned_transformer # Replace the transformer of the step with the fitted # transformer. This is necessary when loading the transformer # from the cache. self.steps[step_idx] = (name, fitted_transformer) return X, y
def _fit(self, X, y=None, **fit_params): self.steps = list(self.steps) self._validate_steps() # Setup the memory memory = check_memory(self.memory) fit_transform_one_cached = memory.cache(pipeline._fit_transform_one) fit_resample_one_cached = memory.cache(_fit_resample_one) fit_params_steps = {name: {} for name, step in self.steps if step is not None} for pname, pval in fit_params.items(): if "__" not in pname: raise ValueError( f"Pipeline.fit does not accept the {pname} parameter. " "You can pass parameters to specific steps of your " "pipeline using the stepname__parameter format, e.g. " "`Pipeline.fit(X, y, logisticregression__sample_weight" "=sample_weight)`." ) step, param = pname.split("__", 1) fit_params_steps[step][param] = pval for (step_idx, name, transformer) in self._iter( with_final=False, filter_passthrough=False, filter_resample=False ): if transformer is None or transformer == "passthrough": with _print_elapsed_time("Pipeline", self._log_message(step_idx)): continue try: # joblib >= 0.12 mem = memory.location except AttributeError: mem = memory.cachedir finally: cloned_transformer = clone(transformer) if mem else transformer # Fit or load from cache the current transformer if hasattr(cloned_transformer, "transform") or hasattr( cloned_transformer, "fit_transform" ): X, fitted_transformer = fit_transform_one_cached( cloned_transformer, X, y, None, message_clsname="Pipeline", message=self._log_message(step_idx), **fit_params_steps[name], ) elif hasattr(cloned_transformer, "fit_resample"): X, y, fitted_transformer = fit_resample_one_cached( cloned_transformer, X, y, message_clsname="Pipeline", message=self._log_message(step_idx), **fit_params_steps[name], ) # Replace the transformer of the step with the fitted # transformer. This is necessary when loading the transformer # from the cache. self.steps[step_idx] = (name, fitted_transformer) if self._final_estimator == "passthrough": return X, y, {} return X, y, fit_params_steps[self.steps[-1][0]]