示例#1
0
def _fit_single_estimator(estimator,
                          X,
                          y,
                          sample_weight=None,
                          message_clsname=None,
                          message=None):
    """Private function used to fit an estimator within a job."""
    if sample_weight is not None:
        try:
            with _print_elapsed_time(message_clsname, message):
                estimator.fit(X, y, sample_weight=sample_weight)
        except TypeError as exc:
            if "unexpected keyword argument 'sample_weight'" in str(exc):
                raise TypeError(
                    "Underlying estimator {} does not support sample weights.".
                    format(estimator.__class__.__name__)) from exc
            raise
    elif message_clsname == "catboost":
        kwargs = {}
        kwargs['X'] = X
        kwargs['y'] = y
        kwargs['cat_features'] = getCatgoricalFeatures(X_validation)
        kwargs['eval_set'] = (X_validation, y_validation)
        estimator.fit(**kwargs)
    else:
        with _print_elapsed_time(message_clsname, message):
            estimator.fit(X, y)
    return estimator
示例#2
0
    def fit(self,
            X: np.ndarray,
            y: np.ndarray = None,
            logger: ProcessLogger = None,
            prefix: str = None,
            **fit_params: Dict):
        if self.configuration is None and self.cfg_cache is None:
            raise ValueError(
                'Pipeline is not configured yet. Either call set_hyperparameters or provide a ConfigGenerator'
            )

        fit_params_steps = self._check_fit_params(**fit_params)
        Xt = self._fit(X, y, logger=logger, prefix=prefix, **fit_params_steps)
        with _print_elapsed_time("Pipeline",
                                 self._log_message(len(self.steps) - 1)):
            if self._final_estimator != "passthrough":
                # Configure estimator on the fly if necessary
                if self.configuration is None:
                    config = self._get_config_for_step(
                        len(self.steps) - 1, prefix, self.steps[-1][0], logger)
                    self._final_estimator.set_hyperparameters(
                        configuration=config.get_dictionary())

                fit_params_last_step = fit_params_steps[self.steps[-1][0]]
                self._final_estimator.fit(Xt, y, **fit_params_last_step)

        return self
示例#3
0
def _fit_transform_one(transformer,
                       X_train,
                       y_train,
                       X_valid=None,
                       y_valid=None,
                       X_test=None,
                       y_test=None,
                       resource_manager=None,
                       message_clsname='',
                       message=None):
    """
    Fits ``transformer`` to ``X`` and ``y``. The transformed result is returned
    with the fitted transformer. If ``weight`` is not ``None``, the result will
    be multiplied by ``weight``.
    """
    transformer.resource_manager = resource_manager
    with _print_elapsed_time(message_clsname, message):
        if hasattr(transformer, 'fit_transform'):
            result = transformer.fit_transform(X_train, y_train, X_valid,
                                               y_valid, X_test, y_test)
        else:
            result = transformer.fit(X_train, y_train, X_valid, y_valid, X_test, y_test). \
                transform(X_train, X_valid, X_test, y_train)
    transformer.resource_manager = None
    return result, transformer
示例#4
0
    def fit_predict(self, X, y=None, **fit_params):
        """Applies fit_predict of last step in pipeline after transforms.
        Applies fit_transforms of a pipeline to the data, followed by the
        fit_predict method of the final estimator in the pipeline. Valid
        only if the final estimator implements fit_predict.
        Parameters
        ----------
        X : iterable
            Training data. Must fulfill input requirements of first step of
            the pipeline.
        y : iterable, default=None
            Training targets. Must fulfill label requirements for all steps
            of the pipeline.
        **fit_params : dict of string -> object
            Parameters passed to the ``fit`` method of each step, where
            each parameter name is prefixed such that parameter ``p`` for step
            ``s`` has key ``s__p``.
        Returns
        -------
        y_pred : array-like
        """
        fit_params_steps = self._check_fit_params(**fit_params)
        Xt, yt = self._fit(X, y, **fit_params_steps)

        fit_params_last_step = fit_params_steps[self.steps[-1][0]]
        with _print_elapsed_time('Pipeline',
                                 self._log_message(len(self.steps) - 1)):
            y_pred = self.steps[-1][-1].fit_predict(Xt, yt,
                                                    **fit_params_last_step)
        return y_pred
示例#5
0
 def fit_transform(self, X, y=None, **fit_params):
     """Fit the model and transform with the final estimator
     Fits all the transforms one after the other and transforms the
     data, then uses fit_transform on transformed data with the final
     estimator.
     Parameters
     ----------
     X : iterable
         Training data. Must fulfill input requirements of first step of the
         pipeline.
     y : iterable, default=None
         Training targets. Must fulfill label requirements for all steps of
         the pipeline.
     **fit_params : dict of string -> object
         Parameters passed to the ``fit`` method of each step, where
         each parameter name is prefixed such that parameter ``p`` for step
         ``s`` has key ``s__p``.
     Returns
     -------
     Xt : array-like, shape = [n_samples, n_transformed_features]
         Transformed samples
     """
     last_step = self._final_estimator
     Xt, yt, fit_params = self._fit(X, y, **fit_params)
     with _print_elapsed_time('Pipeline',
                              self._log_message(len(self.steps) - 1)):
         if last_step == 'passthrough':
             return Xt, yt
         if hasattr(last_step, 'fit_transform'):
             return _wrap_result(
                 last_step.fit_transform(Xt, yt, **fit_params), yt)
         else:
             return _wrap_result(
                 last_step.fit(Xt, yt, **fit_params).transform(Xt), yt)
示例#6
0
def _fit_transform_one(transformer,
                       X,
                       y,
                       weight,
                       message_clsname='',
                       message=None,
                       **fit_params):
    """
    Fits ``transformer`` to ``X`` and ``y``. The transformed result is returned
    with the fitted transformer. If ``weight`` is not ``None``, the result will
    be multiplied by ``weight``.
    """
    with _print_elapsed_time(message_clsname, message):
        if hasattr(transformer, 'fit_transform'):
            try:
                Xt, yt = transformer.fit_transform(X, y, **fit_params)
            except Exception:
                Xt = transformer.fit_transform(X, y, **fit_params)
                yt = y

        else:
            try:
                Xt, yt = transformer.fit(X, y, **fit_params).transform(X, y)
            except Exception:
                Xt = transformer.fit(X, y, **fit_params).transform(X)
                yt = y

    if weight is None:
        return Xt, yt, transformer
    return Xt * weight, yt, transformer
    def fit(self, X, y=None, **fit_params):
        """Fit the model.

        Fit all the transforms/samplers one after the other and
        transform/sample the data, then fit the transformed/sampled
        data using the final estimator.

        Parameters
        ----------
        X : iterable
            Training data. Must fulfill input requirements of first step of the
            pipeline.

        y : iterable, default=None
            Training targets. Must fulfill label requirements for all steps of
            the pipeline.

        **fit_params : dict of str -> object
            Parameters passed to the ``fit`` method of each step, where
            each parameter name is prefixed such that parameter ``p`` for step
            ``s`` has key ``s__p``.

        Returns
        -------
        self : Pipeline
            This estimator.
        """
        fit_params_steps = self._check_fit_params(**fit_params)
        Xt, yt = self._fit(X, y, **fit_params_steps)
        with _print_elapsed_time("Pipeline",
                                 self._log_message(len(self.steps) - 1)):
            if self._final_estimator != "passthrough":
                fit_params_last_step = fit_params_steps[self.steps[-1][0]]
                self._final_estimator.fit(Xt, yt, **fit_params_last_step)
        return self
示例#8
0
文件: pipeline.py 项目: tcsvn/pyadlml
def _fit_transform_one(transformer,
                       X,
                       y,
                       weight,
                       message_clsname='',
                       message=None,
                       **fit_params):
    """
    Fits ``transformer`` to ``X`` and ``y``. The transformed result is returned
    with the fitted transformer. If ``weight`` is not ``None``, the result will
    be multiplied by ``weight``.
    """
    with _print_elapsed_time(message_clsname, message):
        if hasattr(transformer, 'fit_transform'):
            res = transformer.fit_transform(X, y, **fit_params)
        else:
            res = transformer.fit(X, y, **fit_params).transform(X)

    if (isinstance(transformer, XAndYTransformer) or isinstance(transformer, XOrYTransformer)) and weight is None:
        return *res, transformer
    elif (isinstance(transformer, XAndYTransformer) or isinstance(transformer, XOrYTransformer)) and weight is not None:
        res = res * weight
        return *res, transformer
    elif weight is None:
        return res, transformer
    else:
        return res * weight, transformer
示例#9
0
    def fit(self, X, y=None, **fit_params):
        """
        Fits the model, fits all the transforms one after the
        other and transform the data, then fit the transformed
        data using the final estimator.

        :param X: iterable
            Training data. Must fulfill input requirements of first step of the
            pipeline.
        :param y: iterable, default=None
            Training targets. Must fulfill label requirements for all steps of
            the pipeline.
        :param fit_params: dict of string -> object
            Parameters passed to the ``fit`` method of each step, where
            each parameter name is prefixed such that parameter ``p`` for step
            ``s`` has key ``s__p``.
        :return: self, Pipeline, this estimator
        """
        fit_params_steps = self._check_fit_params(**fit_params)
        Xt = self._fit(X, y, **fit_params_steps)
        with _print_elapsed_time('OnnxPipeline',
                                 self._log_message(len(self.steps) - 1)):
            if self._final_estimator != 'passthrough':
                fit_params_last_step = fit_params_steps[self.steps[-1][0]]
                self._final_estimator.fit(Xt, y, **fit_params_last_step)

        return self
示例#10
0
def _fit_single_estimator(estimator, X, y, sample_weight=None,
                          message_clsname=None, message=None):
    """Private function used to fit an estimator within a job."""
    if sample_weight is not None:
        try:
            with _print_elapsed_time(message_clsname, message):
                estimator.fit(X, y, sample_weight=sample_weight)
        except TypeError as exc:
            if "unexpected keyword argument 'sample_weight'" in str(exc):
                raise TypeError(
                    "Underlying estimator {} does not support sample weights."
                    .format(estimator.__class__.__name__)
                ) from exc
            raise
    else:
        with _print_elapsed_time(message_clsname, message):
            estimator.fit(X, y)
    return estimator
示例#11
0
def _fit_resample_one(sampler,
                      X,
                      y,
                      message_clsname="",
                      message=None,
                      **fit_params):
    with _print_elapsed_time(message_clsname, message):
        X_res, y_res = sampler.fit_resample(X, y, **fit_params)

        return X_res, y_res, sampler
示例#12
0
def _fit_one(transformer,
             X,
             y,
             weight,
             message_clsname='',
             message=None,
             **fit_params):
    """
    Fits ``transformer`` to ``X`` and ``y``.
    """
    with _print_elapsed_time(message_clsname, message):
        return transformer.fit(X, y, **fit_params)
示例#13
0
    def _fit(self, X, y=None, **fit_params_steps):
        # shallow copy of steps - this should really be steps_
        if hasattr(self, 'raw_steps_') and self.raw_steps_ is not None:  # pylint: disable=E0203
            # Let's reuse the previous training.
            self.steps = list(self.raw_steps_)  # pylint: disable=E0203
            self.raw_steps_ = list(self.raw_steps_)
        else:
            self.steps = list(self.steps)
            self.raw_steps_ = list(self.steps)

        self._validate_steps()
        # Setup the memory
        memory = check_memory(self.memory)

        fit_transform_one_cached = memory.cache(_fit_transform_one)

        for (step_idx, name,
             transformer) in self._iter(with_final=False,
                                        filter_passthrough=False):
            if (transformer is None or transformer == 'passthrough'):
                with _print_elapsed_time('Pipeline',
                                         self._log_message(step_idx)):
                    continue

            if hasattr(memory, 'location'):
                # joblib >= 0.12
                if memory.location is None:
                    # we do not clone when caching is disabled to
                    # preserve backward compatibility
                    cloned_transformer = transformer
                else:
                    cloned_transformer = clone(transformer)
            else:
                cloned_transformer = clone(transformer)

            # Fit or load from cache the current transformer
            x_train = X
            X, fitted_transformer = fit_transform_one_cached(
                cloned_transformer,
                X,
                y,
                None,
                message_clsname='Pipeline',
                message=self._log_message(step_idx),
                **fit_params_steps[name])
            # Replace the transformer of the step with the fitted
            # transformer. This is necessary when loading the transformer
            # from the cache.
            self.raw_steps_[step_idx] = (name, fitted_transformer)
            self.steps[step_idx] = (name,
                                    self._to_onnx(name, fitted_transformer,
                                                  x_train))
        return X
示例#14
0
    def partial_fit(self, X, y=None, classes=None, **fit_params):
        """Fit the model.

        Fit all the transforms/samplers one after the other and
        transform/sample the data, then fit the transformed/sampled
        data using the final estimator.

        Parameters
        ----------
        X : iterable
            Training data. Must fulfill input requirements of first step of the
            pipeline.

        y : iterable, default=None
            Training targets. Must fulfill label requirements for all steps of
            the pipeline.

        **fit_params : dict of str -> object
            Parameters passed to the ``fit`` method of each step, where
            each parameter name is prefixed such that parameter ``p`` for step
            ``s`` has key ``s__p``.

        Returns
        -------
        self : Pipeline
            This estimator.
        """
        try:
            self.Xt_
        except:
            self.Xt_ = None
            self.yt_ = None
        if self.Xt_ is None or self.yt_ is None:
            Xt, yt, _ = self._fit(X, y)
            self.Xt_ = Xt
            self.yt_ = yt
        else:
            Xt = self.Xt_
            yt = self.yt_
        with _print_elapsed_time("Pipeline",
                                 self._log_message(len(self.steps) - 1)):
            if self._final_estimator != "passthrough":
                # the try...except block is a workaround until tune-sklearn updates
                try:
                    self._final_estimator.partial_fit(Xt,
                                                      yt,
                                                      classes=classes,
                                                      **fit_params)
                except TypeError:
                    self._final_estimator.partial_fit(Xt, yt, **fit_params)
        self._carry_over_final_estimator_fit_vars()
        return self
示例#15
0
    def _fit(self, X, y=None, **fit_params_steps):
        self.steps = list(self.steps)
        self._validate_steps()
        # Setup the memory
        memory = check_memory(self.memory)

        fit_transform_one_cached = memory.cache(pipeline._fit_transform_one)
        fit_resample_one_cached = memory.cache(_fit_resample_one)

        for (step_idx, name,
             transformer) in self._iter(with_final=False,
                                        filter_passthrough=False,
                                        filter_resample=False):
            if transformer is None or transformer == "passthrough":
                with _print_elapsed_time("Pipeline",
                                         self._log_message(step_idx)):
                    continue

            try:
                # joblib >= 0.12
                mem = memory.location
            except AttributeError:
                mem = memory.cachedir
            finally:
                cloned_transformer = clone(transformer) if mem else transformer

            # Fit or load from cache the current transformer
            if hasattr(cloned_transformer, "transform") or hasattr(
                    cloned_transformer, "fit_transform"):
                X, fitted_transformer = fit_transform_one_cached(
                    cloned_transformer,
                    X,
                    y,
                    None,
                    message_clsname="Pipeline",
                    message=self._log_message(step_idx),
                    **fit_params_steps[name],
                )
            elif hasattr(cloned_transformer, "fit_resample"):
                X, y, fitted_transformer = fit_resample_one_cached(
                    cloned_transformer,
                    X,
                    y,
                    message_clsname="Pipeline",
                    message=self._log_message(step_idx),
                    **fit_params_steps[name],
                )
            # Replace the transformer of the step with the fitted
            # transformer. This is necessary when loading the transformer
            # from the cache.
            self.steps[step_idx] = (name, fitted_transformer)
        return X, y
示例#16
0
    def _fit(self, X, y=None, **fit_params):

        self.steps = list(self.steps)
        self._validate_steps()
        fit_params_steps = self._get_fit_params_steps(fit_params)
        if not MLCache.has_cache(self.cache_name):
            self.cache_ = MLCache.create_cache(self.cache_name)
        else:
            self.cache_ = MLCache.get_cache(self.cache_name)
        Xt = X
        for (step_idx, name,
             transformer) in self._iter(with_final=False,
                                        filter_passthrough=False):
            if (transformer is None or transformer == 'passthrough'):
                with _print_elapsed_time('Pipeline',
                                         self._log_message(step_idx)):
                    continue

            params = transformer.get_params()
            params['__class__'] = transformer.__class__.__name__
            params['X'] = Xt
            if ((hasattr(transformer, 'is_classifier')
                 and transformer.is_classifier())
                    or (hasattr(transformer, 'is_regressor')
                        and transformer.is_regressor())):
                params['y'] = y
            cached = self.cache_.get(params)
            if cached is None:
                cloned_transformer = clone(transformer)
                Xt, fitted_transformer = _fit_transform_one(
                    cloned_transformer,
                    Xt,
                    y,
                    None,
                    message_clsname='PipelineCache',
                    message=self._log_message(step_idx),
                    **fit_params_steps[name])
                self.cache_.cache(params, fitted_transformer)
            else:
                fitted_transformer = cached
                Xt = fitted_transformer.transform(Xt)

            self.steps[step_idx] = (name, fitted_transformer)
        if isskl023():
            return Xt
        if self._final_estimator == 'passthrough':
            return Xt, {}
        return Xt, fit_params_steps[self.steps[-1][0]]
    def fit_resample(self, X, y=None, sample_weight=None, **fit_params):
        """Fit the model and sample with the final estimator.

        Fits all the transformers/samplers one after the other and
        transform/sample the data, then uses fit_resample on transformed
        data with the final estimator.

        Parameters
        ----------
        X : iterable
            Training data. Must fulfill input requirements of first step of the
            pipeline.

        y : iterable, default=None
            Training targets. Must fulfill label requirements for all steps of
            the pipeline.

        **fit_params : dict of string -> object
            Parameters passed to the ``fit`` method of each step, where
            each parameter name is prefixed such that parameter ``p`` for step
            ``s`` has key ``s__p``.

        Returns
        -------
        Xt : array-like of shape (n_samples, n_transformed_features)
            Transformed samples.

        yt : array-like of shape (n_samples, n_transformed_features)
            Transformed target.
        """
        fit_params_steps = self._check_fit_params(**fit_params)

        if sample_weight is None:
            Xt, yt = self._fit(X, y, **fit_params_steps)
        else:
            Xt, yt, sample_weight = self._fit(X,
                                              y,
                                              sample_weight=sample_weight,
                                              **fit_params_steps)

        last_step = self._final_estimator
        with _print_elapsed_time("Pipeline",
                                 self._log_message(len(self.steps) - 1)):
            if last_step == "passthrough":
                return Xt
            fit_params_last_step = fit_params_steps[self.steps[-1][0]]
            if hasattr(last_step, "fit_resample"):
                return last_step.fit_resample(Xt, yt, **fit_params_last_step)
示例#18
0
def _fit_transform_one(transformer,
                       X,
                       y,
                       weight,
                       message_clsname='',
                       message=None,
                       **fit_params):
    with _print_elapsed_time(message_clsname, message):
        if hasattr(transformer, "fit_transform"):
            res = transformer.fit_transform(X, y, **fit_params)
        else:
            res = transformer.fit(X, y, **fit_params).transform(X)
    # if we have a weight for this transformer, multiply output
    if weight is None:
        return res, transformer
    return res * weight, transformer
示例#19
0
文件: BPtPipeline.py 项目: sahahn/BPt
    def _fit(self, X, y=None, fit_index=None, **fit_params_steps):

        # shallow copy of steps - this should really be steps_
        self.steps = list(self.steps)
        self._validate_steps()

        # For each transformer
        for (step_idx, name,
             transformer) in self._iter(with_final=False,
                                        filter_passthrough=False):

            with _print_elapsed_time('Pipeline', self._log_message(step_idx)):

                # Skip if passthrough
                if (transformer is None or transformer == 'passthrough'):
                    continue

                # Clone transformer
                cloned_transformer = clone(transformer)

                # Get the correct fit_transform params
                fit_trans_params =\
                    _get_est_fit_params(
                        estimator=cloned_transformer,
                        mapping=self.mapping_,
                        fit_index=fit_index,
                        other_params=fit_params_steps[name],
                        copy_mapping=False)

                # Fit transform the current transformer
                X = cloned_transformer.fit_transform(X=X,
                                                     y=y,
                                                     **fit_trans_params)

                # Print if an estimator is skipped, if verbose
                if cloned_transformer.estimator_ is None:
                    if self.verbose:
                        print('Skipping Step:',
                              name,
                              'due to empty scope.',
                              flush=True)

                # Replace the transformer of the step with the
                # cloned and now fitted transformer
                self.steps[step_idx] = (name, cloned_transformer)

        return X
示例#20
0
    def _fit(self,
             X: np.ndarray,
             y: np.ndarray = None,
             logger: ProcessLogger = None,
             prefix: str = None,
             **fit_params_steps: Dict):
        # shallow copy of steps - this should really be steps_
        self.steps = list(self.steps)
        self._validate_steps()

        Xt = X
        for (step_idx, name,
             transformer) in self._iter(with_final=False,
                                        filter_passthrough=False):
            if transformer is None or transformer == 'passthrough':
                with _print_elapsed_time('Pipeline',
                                         self._log_message(step_idx)):
                    continue

            cloned_transformer = clone(transformer)

            # Configure transformer on the fly if necessary
            if self.configuration is None:
                config: Configuration = self._get_config_for_step(
                    step_idx, prefix, name, logger)
                cloned_transformer.set_hyperparameters(
                    configuration=config.get_dictionary())

            start = timeit.default_timer()

            Xt, fitted_transformer = _fit_transform_one(
                cloned_transformer,
                Xt,
                y,
                None,
                message_clsname='Pipeline',
                message=self._log_message(step_idx),
                **fit_params_steps[name])

            self.fit_time += timeit.default_timer() - start

            # Replace the transformer of the step with the fitted
            # transformer. This is necessary when loading the transformer
            # from the cache.
            self.steps[step_idx] = (name, fitted_transformer)
        return Xt
示例#21
0
 def fit(self,
         X_train,
         y_train,
         X_valid=None,
         y_valid=None,
         X_test=None,
         y_test=None):
     ret = self._fit(X_train, y_train, X_valid, y_valid, X_test, y_test)
     X_train = ret["X_train"]
     X_valid = ret.get("X_valid")
     X_test = ret.get("X_test")
     y_train = ret.get("y_train")
     self.last_data = ret
     with _print_elapsed_time('Pipeline',
                              self._log_message(len(self.steps) - 1)):
         if self._final_estimator != 'passthrough':
             self._final_estimator.fit(X_train, y_train, X_valid, y_valid,
                                       X_test, y_test)
     return self
def _fit_resample_one(sampler,
                      X,
                      y,
                      sample_weight=None,
                      message_clsname="",
                      message=None,
                      **fit_params):
    with _print_elapsed_time(message_clsname, message):

        out = sampler.fit_resample(X,
                                   y,
                                   sample_weight=sample_weight,
                                   **fit_params)

        if sample_weight is None:
            (X_res, y_res) = out
            return X_res, y_res, sampler
        else:
            (X_res, y_res, sample_weight_res) = out
            return X_res, y_res, sample_weight_res, sampler
示例#23
0
 def fit(self,
         X_train,
         y_train,
         X_valid=None,
         y_valid=None,
         X_test=None,
         y_test=None,
         intermediate_result=None):
     result = self._fit(X_train, y_train, X_valid, y_valid, X_test, y_test,
                        intermediate_result)
     X_train = result["X_train"]
     X_valid = result.get("X_valid")
     X_test = result.get("X_test")
     y_train = result.get("y_train")
     self.last_data = result
     with _print_elapsed_time('Pipeline',
                              self._log_message(len(self.steps) - 1)):
         self._final_estimator.resource_manager = self.resource_manager
         self._final_estimator.fit(X_train, y_train, X_valid, y_valid,
                                   X_test, y_test)
         self._final_estimator.resource_manager = None
     return self
示例#24
0
def _fit_transform_one(transformer,
                       X,
                       y,
                       weight,
                       message_clsname='',
                       message=None,
                       **fit_params):
    """
    Fits ``transformer`` to ``X`` and ``y``. The transformed result is returned
    with the fitted transformer. If ``weight`` is not ``None``, the result will
    be multiplied by ``weight``.
    """
    with _print_elapsed_time(message_clsname, message):
        if hasattr(transformer, 'fit_transform'):
            res = transformer.fit_transform(X, y, **fit_params)
        else:
            res = transformer.fit(X, y, **fit_params).transform(X)

    X, y = _wrap_result(res, y)

    if weight is None:
        return X, y, transformer
    return X * weight, y, transformer
示例#25
0
    def _fit(self, X, y=None, **fit_params_steps):
        self.steps = list(self.steps)
        self._validate_steps()
        # Setup the memory
        memory = check_memory(self.memory)

        fit_transform_one_cached = memory.cache(skpipeline._fit_transform_one)

        conf_score = None
        for (step_idx,
             name,
             transformer) in self._iter(with_final=False,
                                        filter_passthrough=False):
            if transformer is None or transformer == 'passthrough':
                with _print_elapsed_time('Pipeline',
                                         self._log_message(step_idx)):
                    continue

            if hasattr(memory, 'location'):
                # joblib >= 0.12
                if memory.location is None:
                    # we do not clone when caching is disabled to
                    # preserve backward compatibility
                    cloned_transformer = transformer
                else:
                    cloned_transformer = clone(transformer)
            elif hasattr(memory, 'cachedir'):
                # joblib < 0.11
                if memory.cachedir is None:
                    # we do not clone when caching is disabled to
                    # preserve backward compatibility
                    cloned_transformer = transformer
                else:
                    cloned_transformer = clone(transformer)
            else:
                cloned_transformer = clone(transformer)

            # Fit or load from cache the current transformer
            if hasattr(cloned_transformer, "transform") or hasattr(
                    cloned_transformer, "fit_transform"
            ):
                res, fitted_transformer = fit_transform_one_cached(
                    cloned_transformer, X, y, None,
                    message_clsname='Pipeline',
                    message=self._log_message(step_idx),
                    **fit_params_steps[name]
                )
                # This ugly if/else can be removed if Transformers return
                # additional values (i.e. `conf_score`) in dict. Can be
                # appended to `fit_params_steps` dict.
                if type(res) == tuple:
                    if len(res) == 3:
                        X, y, conf_score = res
                    elif len(res) == 2:
                        X, y = res
                else:
                    X = res

            # Replace the transformer of the step with the fitted
            # transformer. This is necessary when loading the transformer
            # from the cache.
            self.steps[step_idx] = (name, fitted_transformer)

        return X, y, conf_score
示例#26
0
文件: BPtPipeline.py 项目: sahahn/BPt
    def fit(self, X, y=None, mapping=None, fit_index=None, **fit_params):

        if isinstance(X, pd.DataFrame):

            # Set train data index
            fit_index = X.index

            # Cast to np array
            X = np.array(X)

        if isinstance(y, (pd.DataFrame, pd.Series)):

            # Cast to np array
            y = np.array(y)

        if self.cache_loc is not None:

            # Compute the hash for this fit
            # Store as an attribute
            self.hash_ = pipe_hash([X, y, mapping, fit_index, fit_params],
                                   self.steps)

            # Check if hash exists - if it does load
            if os.path.exists(self._get_hash_loc()):
                self._load_from_hash()

                # end / return!
                return self

            # Otherwise, continue to fit as normal

        # Set internal mapping as either passed mapping or
        # initialize a new 1:1 mapping.
        if mapping is not None:
            self.mapping_ = mapping.copy()
        else:
            self.mapping_ = {i: i for i in range(X.shape[1])}

        # The base parent fit
        # -------------------

        # Get fit params as indexed by each step
        fit_params_steps = self._check_fit_params(**fit_params)

        # Fit and transform X for all but the last step.
        Xt = self._fit(X, y, fit_index=fit_index, **fit_params_steps)

        # Fit the final step
        with _print_elapsed_time('Pipeline',
                                 self._log_message(len(self.steps) - 1)):
            if self._final_estimator != 'passthrough':

                # Get last params fit params
                fit_params_last_step = fit_params_steps[self.steps[-1][0]]

                # Add mapping and train data index if valid
                fit_params_last_step =\
                    _get_est_fit_params(self._final_estimator,
                                        mapping=self.mapping_,
                                        fit_index=fit_index,
                                        other_params=fit_params_last_step,
                                        copy_mapping=False)

                # Fit the final estimator
                self._final_estimator.fit(Xt, y, **fit_params_last_step)

        # If cache fit enabled, hash fitted pipe here
        if self.cache_loc is not None:
            self._hash_fit()

        return self
示例#27
0
def test_print_elapsed_time(message, expected, capsys, monkeypatch):
    monkeypatch.setattr(timeit, 'default_timer', lambda: 0)
    with _print_elapsed_time('ABC', message):
        monkeypatch.setattr(timeit, 'default_timer', lambda: 0.1)
    assert capsys.readouterr().out == expected
示例#28
0
    def _fit(self, X, y=None, **fit_params):
        self.steps = list(self.steps)
        self._validate_steps()
        # Setup the memory
        memory = check_memory(self.memory)

        fit_transform_one_cached = memory.cache(pipeline._fit_transform_one)
        fit_resample_one_cached = memory.cache(_fit_resample_one)

        fit_params_steps = {
            name: {}
            for name, step in self.steps if step is not None
        }
        for pname, pval in fit_params.items():
            if '__' not in pname:
                raise ValueError(
                    "Pipeline.fit does not accept the {} parameter. "
                    "You can pass parameters to specific steps of your "
                    "pipeline using the stepname__parameter format, e.g. "
                    "`Pipeline.fit(X, y, logisticregression__sample_weight"
                    "=sample_weight)`.".format(pname))
            step, param = pname.split("__", 1)
            fit_params_steps[step][param] = pval
        for (step_idx, name,
             transformer) in self._iter(with_final=False,
                                        filter_passthrough=False,
                                        filter_resample=False):
            if (transformer is None or transformer == 'passthrough'):
                with _print_elapsed_time('Pipeline',
                                         self._log_message(step_idx)):
                    continue
            if hasattr(memory, "location"):
                # joblib >= 0.12
                if memory.location is None:
                    # we do not clone when caching is disabled to
                    # preserve backward compatibility
                    cloned_transformer = transformer
                else:
                    cloned_transformer = clone(transformer)
            elif hasattr(memory, "cachedir"):
                # joblib <= 0.11
                if memory.cachedir is None:
                    # we do not clone when caching is disabled to
                    # preserve backward compatibility
                    cloned_transformer = transformer
            else:
                cloned_transformer = clone(transformer)
            # Fit or load from cache the current transfomer
            if hasattr(cloned_transformer, "transform") or hasattr(
                    cloned_transformer, "fit_transform"):
                X, fitted_transformer = fit_transform_one_cached(
                    cloned_transformer,
                    X,
                    y,
                    None,
                    message_clsname='Pipeline',
                    message=self._log_message(step_idx),
                    **fit_params_steps[name])
            elif hasattr(cloned_transformer, "fit_resample"):
                X, y, fitted_transformer = fit_resample_one_cached(
                    cloned_transformer,
                    X,
                    y,
                    message_clsname='Pipeline',
                    message=self._log_message(step_idx),
                    **fit_params_steps[name])
            # Replace the transformer of the step with the fitted
            # transformer. This is necessary when loading the transformer
            # from the cache.
            self.steps[step_idx] = (name, fitted_transformer)
        if self._final_estimator == "passthrough":
            return X, y, {}
        return X, y, fit_params_steps[self.steps[-1][0]]
示例#29
0
文件: pipeline.py 项目: tcsvn/pyadlml
    def _fit(self, X, y=None, **fit_params_steps):
        # shallow copy of steps - this should really be steps_
        self.steps = list(self.steps)
        self._validate_steps()
        # Setup the memory
        memory = check_memory(self.memory)

        fit_transform_one_cached = memory.cache(_fit_transform_one)

        for (step_idx,
             name,
             transformer) in self._iter(with_final=False,
                                        filter_passthrough=False):
            if (transformer is None or transformer == 'passthrough'):
                with _print_elapsed_time('Pipeline',
                                         self._log_message(step_idx)):
                    continue

            if hasattr(memory, 'location'):
                # joblib >= 0.12
                if memory.location is None:
                    # we do not clone when caching is disabled to
                    # preserve backward compatibility
                    cloned_transformer = transformer
                else:
                    cloned_transformer = clone(transformer)
            elif hasattr(memory, 'cachedir'):
                # joblib < 0.11
                if memory.cachedir is None:
                    # we do not clone when caching is disabled to
                    # preserve backward compatibility
                    cloned_transformer = transformer
                else:
                    cloned_transformer = clone(transformer)
            else:
                cloned_transformer = clone(transformer)

            if not self._skip_transform(cloned_transformer):
                # if cloned_transformer.
                if isinstance(cloned_transformer, YTransformer):
                    y, fitted_transformer = fit_transform_one_cached(
                        cloned_transformer, y, X, None,
                        message_clsname='Pipeline',
                        message=self._log_message(step_idx),
                        **fit_params_steps[name])
                elif isinstance(cloned_transformer, XAndYTransformer) \
                        or isinstance(cloned_transformer, XOrYTransformer):
                    X, y, fitted_transformer = fit_transform_one_cached(
                        cloned_transformer, X, y, None,
                        message_clsname='Pipeline',
                        message=self._log_message(step_idx),
                        **fit_params_steps[name])
                else:
                    # Fit or load from cache the current transformer
                    X, fitted_transformer = fit_transform_one_cached(
                        cloned_transformer, X, y, None,
                        message_clsname='Pipeline',
                        message=self._log_message(step_idx),
                        **fit_params_steps[name])
            else:
                # do nothing if it is not trainmode and the trainonly wrapper set (true)
                fitted_transformer = cloned_transformer
            # Replace the transformer of the step with the fitted
            # transformer. This is necessary when loading the transformer
            # from the cache.
            self.steps[step_idx] = (name, fitted_transformer)
        return X, y
示例#30
0
def test_print_elapsed_time(message, expected, capsys, monkeypatch):
    monkeypatch.setattr(timeit, 'default_timer', lambda: 0)
    with _print_elapsed_time('ABC', message):
        monkeypatch.setattr(timeit, 'default_timer', lambda: 0.1)
    assert capsys.readouterr().out == expected
示例#31
0
    def _fit(self, X, y=None, **fit_params):
        self.steps = list(self.steps)
        self._validate_steps()
        # Setup the memory
        memory = check_memory(self.memory)

        fit_transform_one_cached = memory.cache(pipeline._fit_transform_one)
        fit_resample_one_cached = memory.cache(_fit_resample_one)

        fit_params_steps = {name: {} for name, step in self.steps if step is not None}
        for pname, pval in fit_params.items():
            if "__" not in pname:
                raise ValueError(
                    f"Pipeline.fit does not accept the {pname} parameter. "
                    "You can pass parameters to specific steps of your "
                    "pipeline using the stepname__parameter format, e.g. "
                    "`Pipeline.fit(X, y, logisticregression__sample_weight"
                    "=sample_weight)`."
                )
            step, param = pname.split("__", 1)
            fit_params_steps[step][param] = pval
        for (step_idx, name, transformer) in self._iter(
            with_final=False, filter_passthrough=False, filter_resample=False
        ):
            if transformer is None or transformer == "passthrough":
                with _print_elapsed_time("Pipeline", self._log_message(step_idx)):
                    continue

            try:
                # joblib >= 0.12
                mem = memory.location
            except AttributeError:
                mem = memory.cachedir
            finally:
                cloned_transformer = clone(transformer) if mem else transformer

            # Fit or load from cache the current transformer
            if hasattr(cloned_transformer, "transform") or hasattr(
                cloned_transformer, "fit_transform"
            ):
                X, fitted_transformer = fit_transform_one_cached(
                    cloned_transformer,
                    X,
                    y,
                    None,
                    message_clsname="Pipeline",
                    message=self._log_message(step_idx),
                    **fit_params_steps[name],
                )
            elif hasattr(cloned_transformer, "fit_resample"):
                X, y, fitted_transformer = fit_resample_one_cached(
                    cloned_transformer,
                    X,
                    y,
                    message_clsname="Pipeline",
                    message=self._log_message(step_idx),
                    **fit_params_steps[name],
                )
            # Replace the transformer of the step with the fitted
            # transformer. This is necessary when loading the transformer
            # from the cache.
            self.steps[step_idx] = (name, fitted_transformer)
        if self._final_estimator == "passthrough":
            return X, y, {}
        return X, y, fit_params_steps[self.steps[-1][0]]