def fit_transform(self, X, y=None, sample_weight=None, **kwargs): from elm.sample_util.sample_pipeline import _split_pipeline_output kw = dict(y=y, sample_weight=sample_weight, **kwargs) kw.update(self.kwargs) func = import_callable(self.func) output = func(X, **kw) return _split_pipeline_output(output, X, y, sample_weight, 'ModifySample')
def create_sample(self, **data_source): ''' Standardizes the output of a sampler or X, y, sample_weight to be a tuple of (X, y, sample_weight) Calls the sampler if given in data_source. Typically the sampler takes :\*\*data_source: should have: :sampler: a function taking (\*args, \*\*kwargs), returning an X ElmStore or tuple of (X, y, sample_weight) with X as ElmStore. Arguments to the sampler are sampler_args and \*\*data_source is also passed. :sampler_args: if passed to this function, sampler_args are typically created by unpacking of each of element of "args_list" given to other methods in this class. OR the \*\*data_source may have: - :X:, :y:, and/or :sample_weight: keys/values, with X as an ElmStore, in which case, this function just passes them through. See usage in ensemble ''' from elm.sample_util.sample_pipeline import create_sample_from_data_source from elm.sample_util.sample_pipeline import _split_pipeline_output X = data_source.get("X", None) y = data_source.get('y', None) logger.info('Call create_sample') sample_weight = data_source.get('sample_weight', None) if not ('sampler' in data_source or 'args_list' in data_source): if not any(_ is not None for _ in (X, y, sample_weight)): raise ValueError( 'Expected "sampler" or "args_list" in "data_source" or X, y, and/or sample_weight' ) if data_source.get('sampler') and X is None and y is None: output = create_sample_from_data_source(**data_source) else: output = (X, y, sample_weight) out = _split_pipeline_output(output, X=X, y=y, sample_weight=sample_weight, context=getattr(self, '_context', repr(data_source))) return out
def _run_steps(self, X=None, y=None, sample_weight=None, sampler=None, args_list=None, sklearn_method='fit', method_kwargs=None, new_params=None, partial_fit_batches=1, return_X=False, **data_source): '''Evaluate each fit/transform step in self.steps. Used by fit, transform, predict and related methods''' from elm.sample_util.sample_pipeline import _split_pipeline_output method_kwargs = method_kwargs or {} if y is None: y = method_kwargs.get('y') if sample_weight is None: sample_weight = method_kwargs.get('y') if not 'predict' in sklearn_method: prepare_for = 'train' else: prepare_for = 'predict' if new_params: self = self.unfitted_copy(**new_params) fit_func = None if X is None and y is None and sample_weight is None: X, y, sample_weight = self.create_sample(X=X, y=y, sampler=sampler, args_list=args_list, **data_source) else: X, y, sample_weight = _split_pipeline_output( X, X, y, sample_weight, sklearn_method) for idx, (_, step_cls) in enumerate(self.steps[:-1]): if prepare_for == 'train': fit_func = step_cls.fit_transform else: fit_func = step_cls.transform if not hasattr(getattr(step_cls, '_estimator', None), 'transform'): # Estimator such as TSNE with no transform method, just fit_transform fit_func = step_cls.fit_transform func_out = fit_func(X, y=y, sample_weight=sample_weight) if func_out is not None: X, y, sample_weight = _split_pipeline_output( func_out, X, y, sample_weight, repr(fit_func)) if fit_func and not isinstance(X, (ElmStore, xr.Dataset)): raise ValueError('Expected the return value of {} to be an ' 'elm.readers:ElmStore'.format(fit_func)) fitter_or_predict = getattr(self._estimator, sklearn_method, None) if fitter_or_predict is None: raise ValueError( 'Final estimator in Pipeline {} has no method {}'.format( self._estimator, sklearn_method)) if not isinstance(self._estimator, STEPS.StepMixin): args, kwargs = self._post_run_pipeline(fitter_or_predict, self._estimator, X, y=y, prepare_for=prepare_for, sample_weight=sample_weight, method_kwargs=method_kwargs) else: kwargs = {'y': y, 'sample_weight': sample_weight} args = (X, ) if 'predict' in sklearn_method: X = args[0] pred = fitter_or_predict(X.flat.values, **kwargs) if return_X: return pred, X return pred output = fitter_or_predict(*args, **kwargs) if sklearn_method in ('fit', 'partial_fit', 'fit_predict'): self._score_estimator(X, y=y, sample_weight=sample_weight) return self # transform or fit_transform most likely return _split_pipeline_output(output, X, y, sample_weight, 'fit_transform')