Пример #1
0
 def fit_transform(self, X, y=None, sample_weight=None, **kwargs):
     from elm.sample_util.sample_pipeline import _split_pipeline_output
     kw = dict(y=y, sample_weight=sample_weight, **kwargs)
     kw.update(self.kwargs)
     func = import_callable(self.func)
     output = func(X, **kw)
     return _split_pipeline_output(output, X, y, sample_weight,
                                   'ModifySample')
Пример #2
0
    def create_sample(self, **data_source):
        '''
        Standardizes the output of a sampler or X, y, sample_weight
        to be a tuple of (X, y, sample_weight)

        Calls the sampler if given in data_source.  Typically the sampler
        takes

        :\*\*data_source: should have:

            :sampler: a function taking (\*args, \*\*kwargs), returning an
                      X ElmStore or tuple of (X, y, sample_weight) with X as
                      ElmStore.  Arguments to the sampler are sampler_args
                      and \*\*data_source is also passed.
            :sampler_args: if passed to this function, sampler_args are typically
                         created by unpacking of each of element of "args_list" given
                         to other methods in this class.

        OR the \*\*data_source may have:

            - :X:, :y:, and/or :sample_weight: keys/values, with X as an ElmStore,
                in which case, this function just passes them through.  See
                usage in ensemble
        '''
        from elm.sample_util.sample_pipeline import create_sample_from_data_source
        from elm.sample_util.sample_pipeline import _split_pipeline_output
        X = data_source.get("X", None)
        y = data_source.get('y', None)
        logger.info('Call create_sample')
        sample_weight = data_source.get('sample_weight', None)
        if not ('sampler' in data_source or 'args_list' in data_source):
            if not any(_ is not None for _ in (X, y, sample_weight)):
                raise ValueError(
                    'Expected "sampler" or "args_list" in "data_source" or X, y, and/or sample_weight'
                )
        if data_source.get('sampler') and X is None and y is None:
            output = create_sample_from_data_source(**data_source)
        else:
            output = (X, y, sample_weight)
        out = _split_pipeline_output(output,
                                     X=X,
                                     y=y,
                                     sample_weight=sample_weight,
                                     context=getattr(self, '_context',
                                                     repr(data_source)))
        return out
Пример #3
0
    def _run_steps(self,
                   X=None,
                   y=None,
                   sample_weight=None,
                   sampler=None,
                   args_list=None,
                   sklearn_method='fit',
                   method_kwargs=None,
                   new_params=None,
                   partial_fit_batches=1,
                   return_X=False,
                   **data_source):
        '''Evaluate each fit/transform step in self.steps.  Used
        by fit, transform, predict and related methods'''
        from elm.sample_util.sample_pipeline import _split_pipeline_output
        method_kwargs = method_kwargs or {}
        if y is None:
            y = method_kwargs.get('y')
        if sample_weight is None:
            sample_weight = method_kwargs.get('y')
        if not 'predict' in sklearn_method:
            prepare_for = 'train'
        else:
            prepare_for = 'predict'
        if new_params:
            self = self.unfitted_copy(**new_params)
        fit_func = None
        if X is None and y is None and sample_weight is None:
            X, y, sample_weight = self.create_sample(X=X,
                                                     y=y,
                                                     sampler=sampler,
                                                     args_list=args_list,
                                                     **data_source)
        else:
            X, y, sample_weight = _split_pipeline_output(
                X, X, y, sample_weight, sklearn_method)
        for idx, (_, step_cls) in enumerate(self.steps[:-1]):

            if prepare_for == 'train':
                fit_func = step_cls.fit_transform
            else:
                fit_func = step_cls.transform
                if not hasattr(getattr(step_cls, '_estimator', None),
                               'transform'):
                    # Estimator such as TSNE with no transform method, just fit_transform
                    fit_func = step_cls.fit_transform
            func_out = fit_func(X, y=y, sample_weight=sample_weight)
            if func_out is not None:
                X, y, sample_weight = _split_pipeline_output(
                    func_out, X, y, sample_weight, repr(fit_func))
        if fit_func and not isinstance(X, (ElmStore, xr.Dataset)):
            raise ValueError('Expected the return value of {} to be an '
                             'elm.readers:ElmStore'.format(fit_func))
        fitter_or_predict = getattr(self._estimator, sklearn_method, None)
        if fitter_or_predict is None:
            raise ValueError(
                'Final estimator in Pipeline {} has no method {}'.format(
                    self._estimator, sklearn_method))
        if not isinstance(self._estimator, STEPS.StepMixin):

            args, kwargs = self._post_run_pipeline(fitter_or_predict,
                                                   self._estimator,
                                                   X,
                                                   y=y,
                                                   prepare_for=prepare_for,
                                                   sample_weight=sample_weight,
                                                   method_kwargs=method_kwargs)

        else:
            kwargs = {'y': y, 'sample_weight': sample_weight}
            args = (X, )
        if 'predict' in sklearn_method:
            X = args[0]
            pred = fitter_or_predict(X.flat.values, **kwargs)
            if return_X:
                return pred, X
            return pred

        output = fitter_or_predict(*args, **kwargs)
        if sklearn_method in ('fit', 'partial_fit', 'fit_predict'):
            self._score_estimator(X, y=y, sample_weight=sample_weight)
            return self
        # transform or fit_transform most likely
        return _split_pipeline_output(output, X, y, sample_weight,
                                      'fit_transform')