Пример #1
0
 def __init__(self,
              wrapped,
              epochs,
              fit_only=True,
              repeat_in_test_mode=False):
     BaseStep.__init__(self)
     MetaStepMixin.__init__(self, wrapped)
     self.repeat_in_test_mode = repeat_in_test_mode
     self.fit_only = fit_only
     self.epochs = epochs
Пример #2
0
    def __init__(self, wrapped: BaseStep, then_unflatten: bool = True):
        BaseStep.__init__(self)
        MetaStepMixin.__init__(self, wrapped)
        ResumableStepMixin.__init__(self)
        ForceHandleMixin.__init__(self)

        self.then_unflatten = then_unflatten

        self.len_di = []
        self.len_eo = []
Пример #3
0
    def __init__(self,
                 pipeline: Union[BaseStep, NamedTupleList],
                 validation_size: float = None,
                 batch_size: int = None,
                 batch_metrics: Dict[str, Callable] = None,
                 shuffle_in_each_epoch_at_train: bool = True,
                 seed: int = None,
                 n_epochs: int = 1,
                 epochs_metrics: Dict[str, Callable] = None,
                 scoring_function: Callable = None,
                 cache_folder: str = None,
                 print_epoch_metrics=False,
                 print_batch_metrics=False):
        """
        :param pipeline: pipeline to wrap with an epoch repeater, a validation split wrapper, and a mini batch sequential pipeline
        :param validation_size: ratio for validation size between 0 and 1
        :param batch_size: batch size for the mini batch sequential pipeline
        :param batch_metrics: metrics to calculate for each processed mini batch
        :param shuffle_in_each_epoch_at_train:
        :param seed: random seed for the data shuffling that can be done at each epoch when the param shuffle_in_each_epoch_at_train is True
        :param n_epochs: number of epochs
        :param epochs_metrics: metrics to calculate for each epoch
        :param scoring_function: scoring function with two arguments (y_true, y_pred)
        :param cache_folder: cache folder to be used inside the pipeline
        :param print_epoch_metrics: whether or not to print epoch metrics
        :param print_batch_metrics: whether or not to print batch metrics
        """
        if epochs_metrics is None:
            epochs_metrics = {}
        if batch_metrics is None:
            batch_metrics = {}

        self.final_scoring_metric = scoring_function
        self.epochs_metrics = epochs_metrics
        self.n_epochs = n_epochs
        self.shuffle_in_each_epoch_at_train = shuffle_in_each_epoch_at_train
        self.batch_size = batch_size
        self.batch_metrics = batch_metrics
        self.validation_size = validation_size
        self.print_batch_metrics = print_batch_metrics
        self.print_epoch_metrics = print_epoch_metrics

        wrapped = pipeline
        wrapped = self._create_mini_batch_pipeline(wrapped)

        if shuffle_in_each_epoch_at_train:
            wrapped = TrainShuffled(wrapped=wrapped, seed=seed)

        wrapped = self._create_validation_split(wrapped)
        wrapped = self._create_epoch_repeater(wrapped)

        BaseStep.__init__(self)
        MetaStepMixin.__init__(self, wrapped)
        EvaluableStepMixin.__init__(self)
        ForceHandleMixin.__init__(self, cache_folder)
Пример #4
0
    def __init__(
            self,
            pipeline: BaseStep,
            validation_splitter: 'BaseValidationSplitter',
            refit_trial: bool,
            scoring_callback: ScoringCallback,
            hyperparams_optimizer: BaseHyperparameterSelectionStrategy = None,
            hyperparams_repository: HyperparamsRepository = None,
            n_trials: int = 10,
            epochs: int = 1,
            callbacks: List[BaseCallback] = None,
            refit_scoring_function: Callable = None,
            print_func: Callable = None,
            cache_folder_when_no_handle=None):
        BaseStep.__init__(self)
        ForceHandleOnlyMixin.__init__(self,
                                      cache_folder=cache_folder_when_no_handle)

        self.validation_split_function: BaseValidationSplitter = validation_splitter

        if print_func is None:
            print_func = print

        if hyperparams_optimizer is None:
            hyperparams_optimizer = RandomSearchHyperparameterSelectionStrategy(
            )
        self.hyperparameter_optimizer: BaseHyperparameterSelectionStrategy = hyperparams_optimizer

        if hyperparams_repository is None:
            hyperparams_repository = HyperparamsJSONRepository(
                hyperparams_optimizer, cache_folder_when_no_handle)
        else:
            hyperparams_repository.set_strategy(hyperparams_optimizer)

        self.hyperparams_repository: HyperparamsJSONRepository = hyperparams_repository

        self.pipeline: BaseStep = pipeline
        self.print_func: Callable = print_func

        self.n_trial: int = n_trials
        self.hyperparams_repository: HyperparamsRepository = hyperparams_repository

        self.refit_scoring_function: Callable = refit_scoring_function

        if callbacks is None:
            callbacks = []

        callbacks: List[BaseCallback] = [scoring_callback] + callbacks

        self.refit_trial: bool = refit_trial

        self.trainer = Trainer(callbacks=callbacks,
                               epochs=epochs,
                               print_func=self.print_func)
Пример #5
0
 def __init__(self,
              transform_callback_function,
              fit_callback_function,
              more_arguments: List = tuple(),
              transform_function=None,
              hyperparams=None):
     BaseStep.__init__(self, hyperparams)
     self.transform_function = transform_function
     self.more_arguments = more_arguments
     self.fit_callback_function = fit_callback_function
     self.transform_callback_function = transform_callback_function
Пример #6
0
 def __init__(
         self,
         handle_fit_callback,
         handle_transform_callback,
         handle_fit_transform_callback
 ):
     ForceMustHandleMixin.__init__(self)
     BaseStep.__init__(self)
     self.handle_fit_callback = handle_fit_callback
     self.handle_fit_transform_callback = handle_fit_transform_callback
     self.handle_transform_callback = handle_transform_callback
Пример #7
0
    def __init__(self, wrapped=None, scoring_function: Callable = r2_score):
        """
        Base class For validation wrappers.
        It has a scoring function to calculate the score for the validation split.

        :param scoring_function: scoring function with two arguments (y_true, y_pred)
        :type scoring_function: Callable
        """
        BaseStep.__init__(self)
        MetaStepMixin.__init__(self, wrapped)
        self.scoring_function = scoring_function
Пример #8
0
    def __init__(self, wrapped):
        """
        Wrap a scikit-learn MetaEstimatorMixin for usage in Neuraxle. 
        This class is similar to the SKLearnWrapper class of Neuraxle that can wrap a scikit-learn BaseEstimator. 
        
        :param wrapped: a scikit-learn object of type "MetaEstimatorMixin". 
        """
        BaseStep.__init__(self)
        MetaStepMixin.__init__(self)

        self.wrapped_sklearn_metaestimator = wrapped  # TODO: use self.set_step of the MetaStepMixin instead?
Пример #9
0
    def __init__(self,
                 wrapped: BaseStep,
                 copy_op=copy.deepcopy,
                 cache_folder_when_no_handle=None):
        BaseStep.__init__(self)
        MetaStepMixin.__init__(self, wrapped)
        ForceHandleOnlyMixin.__init__(self, cache_folder_when_no_handle)

        self.set_step(wrapped)
        self.steps: List[BaseStep] = []
        self.copy_op = copy_op
Пример #10
0
    def __init__(self,
                 wrapped,
                 epochs,
                 repeat_in_test_mode=False,
                 cache_folder_when_no_handle=None):
        BaseStep.__init__(self)
        MetaStep.__init__(self, wrapped)
        ForceHandleOnlyMixin.__init__(self,
                                      cache_folder=cache_folder_when_no_handle)

        self.repeat_in_test_mode = repeat_in_test_mode
        self.epochs = epochs
Пример #11
0
    def __init__(self,
                 hyperparameter_optimizer: BaseHyperparameterOptimizer,
                 validation_technique: BaseCrossValidationWrapper = None,
                 higher_score_is_better=True):
        BaseStep.__init__(self)
        MetaStepMixin.__init__(self, None)

        if validation_technique is None:
            validation_technique = KFoldCrossValidationWrapper()
        self.validation_technique = validation_technique
        self.higher_score_is_better = higher_score_is_better
        self.hyperparameter_optimizer = hyperparameter_optimizer
Пример #12
0
    def save_best_model(self, step: BaseStep):
        """
        Save the best model inside the best retrained model folder.

        :param step: step to save
        :return: saved step
        """
        hyperparams = step.get_hyperparams().to_flat_as_dict_primitive()
        trial_hash = self._get_trial_hash(hyperparams)
        step.set_name(trial_hash).save(ExecutionContext(self.best_retrained_model_folder), full_dump=True)

        return step
Пример #13
0
    def __init__(self,
                 wrapped: BaseStep,
                 copy_op=copy.deepcopy,
                 cache_folder_when_no_handle=None):
        BaseStep.__init__(self)
        MetaStepMixin.__init__(self, wrapped)
        ForceHandleOnlyMixin.__init__(self, cache_folder_when_no_handle)
        self.savers.append(TruncableJoblibStepSaver())

        self.set_step(wrapped)
        self.steps_as_tuple: List[NamedTupleList] = []
        self.copy_op = copy_op
Пример #14
0
    def __init__(self, callback_function, more_arguments: List = tuple(),
                 hyperparams=None, fit_callback_function=None, transform_function=None):
        """
        Create the callback step with a function and extra arguments to send to the function

        :param callback_function: The function that will be called on events.
        :param more_arguments: Extra arguments that will be sent to the callback after the processed data (optional).
        """
        BaseStep.__init__(self, hyperparams=hyperparams)
        self.transform_function = transform_function
        self.callback_function = callback_function
        self.fit_callback_function = fit_callback_function
        self.more_arguments = more_arguments
Пример #15
0
 def __init__(self,
              wrapped,
              transform_callback_function,
              fit_callback_function,
              inverse_transform_callback_function=None,
              more_arguments: List = tuple(),
              hyperparams=None):
     MetaStepMixin.__init__(self, wrapped)
     BaseStep.__init__(self, hyperparams)
     self.inverse_transform_callback_function = inverse_transform_callback_function
     self.more_arguments = more_arguments
     self.fit_callback_function = fit_callback_function
     self.transform_callback_function = transform_callback_function
Пример #16
0
 def __init__(
         self,
         wrapped_sklearn_predictor,
         hyperparams_space: HyperparameterSpace = None,
         return_all_sklearn_default_params_on_get=False
 ):
     if not isinstance(wrapped_sklearn_predictor, BaseEstimator):
         raise ValueError("The wrapped_sklearn_predictor must be an instance of scikit-learn's BaseEstimator.")
     self.wrapped_sklearn_predictor = wrapped_sklearn_predictor
     params: HyperparameterSamples = HyperparameterSamples(wrapped_sklearn_predictor.get_params())
     BaseStep.__init__(self, hyperparams=params, hyperparams_space=hyperparams_space)
     self.return_all_sklearn_default_params_on_get = return_all_sklearn_default_params_on_get
     self.name += "_" + wrapped_sklearn_predictor.__class__.__name__
Пример #17
0
    def __init__(self, wrapped: BaseStep, enabled: bool = True, nullified_return_value=None):
        BaseStep.__init__(
            self,
            hyperparams=HyperparameterSamples({
                OPTIONAL_ENABLED_HYPERPARAM: enabled
            })
        )
        MetaStepMixin.__init__(self, wrapped)
        ForceMustHandleMixin.__init__(self)

        if nullified_return_value is None:
            nullified_return_value = []
        self.nullified_return_value = nullified_return_value
Пример #18
0
    def _set_hyperparams(self, hyperparams: HyperparameterSamples) -> BaseStep:
        """
        Set hyperparams for base step, and the wrapped sklearn_predictor.

        :param hyperparams:
        :return: self
        """
        # set the step hyperparams, and set the wrapped sklearn predictor params
        BaseStep._set_hyperparams(self, hyperparams)
        self.wrapped_sklearn_predictor.set_params(**hyperparams.with_separator(
            RecursiveDict.DEFAULT_SEPARATOR).to_flat_dict())

        return self.hyperparams
Пример #19
0
    def __init__(self,
                 wrapped: BaseStep,
                 enabled: bool = True,
                 nullified_return_value=None,
                 cache_folder_when_no_handle=None):
        BaseStep.__init__(self,
                          hyperparams=HyperparameterSamples(
                              {OPTIONAL_ENABLED_HYPERPARAM: enabled}))
        MetaStepMixin.__init__(self, wrapped)
        ForceHandleOnlyMixin.__init__(self, cache_folder_when_no_handle)

        if nullified_return_value is None:
            nullified_return_value = []
        self.nullified_return_value = nullified_return_value
Пример #20
0
    def __init__(
        self,
        wrapped: BaseStep,
        cache_folder: str = DEFAULT_CACHE_FOLDER,
        value_hasher: 'BaseValueHasher' = None,
    ):
        BaseStep.__init__(self)
        MetaStepMixin.__init__(self, wrapped)
        self.value_hasher = value_hasher

        if self.value_hasher is None:
            self.value_hasher = Md5Hasher()

        self.value_caching_folder = cache_folder
Пример #21
0
    def _update_hyperparams(self, hyperparams: HyperparameterSamples) -> BaseStep:
        """
        Update hyperparams for base step, and the wrapped sklearn_predictor.

        :param hyperparams:
        :return: self
        """
        # flatten the step hyperparams, and update the wrapped sklearn predictor params
        hyperparams = HyperparameterSamples(hyperparams)
        BaseStep._update_hyperparams(self, hyperparams.to_flat())
        self.wrapped_sklearn_predictor.set_params(
            **self.hyperparams.with_separator(RecursiveDict.DEFAULT_SEPARATOR).to_flat_as_dict_primitive()
        )

        return self.hyperparams.to_flat()
Пример #22
0
 def __init__(
     self,
     wrapped=None,
     n_iter: int = 10,
     higher_score_is_better: bool = True,
     validation_technique: BaseCrossValidation = KFoldCrossValidation(),
     refit=True,
 ):
     if wrapped is not None:
         MetaStepMixin.__init__(self, wrapped)
     BaseStep.__init__(self)
     self.n_iter = n_iter
     self.higher_score_is_better = higher_score_is_better
     self.validation_technique: BaseCrossValidation = validation_technique
     self.refit = refit
Пример #23
0
 def __init__(self,
              wrapped: BaseStep,
              test_size: float,
              scoring_function=r2_score,
              run_validation_split_in_test_mode=True):
     """
     :param wrapped: wrapped step
     :param test_size: ratio for test size between 0 and 1
     :param scoring_function: scoring function with two arguments (y_true, y_pred)
     """
     MetaStepMixin.__init__(self, wrapped)
     BaseStep.__init__(self)
     self.run_validation_split_in_test_mode = run_validation_split_in_test_mode
     self.test_size = test_size
     self.scoring_function = scoring_function
Пример #24
0
    def __init__(self,
                 wrapped: BaseStep,
                 metrics: Dict,
                 name: str = None,
                 print_metrics=False,
                 print_fun=print):
        BaseStep.__init__(self, name=name)
        MetaStepMixin.__init__(self, wrapped)

        self.metrics: Dict = metrics
        self._initialize_metrics(metrics)

        self.print_metrics = print_metrics
        self.print_fun = print_fun
        self.enabled = True
Пример #25
0
 def _get_hyperparams(self):
     if self.return_all_sklearn_default_params_on_get:
         hp = self.wrapped_sklearn_predictor.get_params()
         self._delete_base_estimator_from_dict(hp)
         return HyperparameterSamples(hp)
     else:
         return BaseStep._get_hyperparams(self)
Пример #26
0
    def _will_process(
            self, data_container: DataContainer,
            context: ExecutionContext) -> ('BaseStep', DataContainer):
        """
        Flatten data container before any processing is done on the wrapped step.

        :param data_container: data container to flatten
        :param context: execution context
        :return: (data container, execution context)
        :rtype: ('BaseStep', DataContainer)
        """
        data_container, context = BaseStep._will_process(
            self, data_container, context)

        if data_container.expected_outputs is None:
            expected_outputs = np.empty_like(
                np.array(data_container.data_inputs))
            expected_outputs.fill(np.nan)
            data_container.set_expected_outputs(expected_outputs)

        di, self.len_di = self._flatten_list(data_container.data_inputs)
        eo, self.len_eo = self._flatten_list(data_container.expected_outputs)

        flattened_data_container = DataContainer(
            summary_id=data_container.summary_id,
            data_inputs=di,
            expected_outputs=eo,
            sub_data_containers=data_container.sub_data_containers)

        return flattened_data_container, context
Пример #27
0
    def train(self, pipeline: BaseStep, data_inputs, expected_outputs=None) -> Trial:
        """
        Train pipeline using the validation splitter.
        Track training, and validation metrics for each epoch.
        Note: the present method is just a shortcut to using the `execute_trial` method with less boilerplate code needed. 
Refer to `execute_trial` for full flexibility

        :param pipeline: pipeline to train on
        :param data_inputs: data inputs
        :param expected_outputs: expected ouptuts to fit on
        :return: executed trial
        """
        validation_splits: List[Tuple[DataContainer, DataContainer]] = self.validation_split_function.split_data_container(
            DataContainer(data_inputs=data_inputs, expected_outputs=expected_outputs)
        )

        repo_trial: Trial = Trial(
            pipeline=pipeline,
            hyperparams=pipeline.get_hyperparams(),
            main_metric_name=self.get_main_metric_name()
        )

        self.execute_trial(
            pipeline=pipeline,
            trial_number=1,
            repo_trial=repo_trial,
            context=ExecutionContext(),
            validation_splits=validation_splits,
            n_trial=1,
            delete_pipeline_on_completion=False
        )

        return repo_trial
Пример #28
0
    def __init__(self,
                 transform_callback_function=None,
                 fit_callback_function=None,
                 more_arguments: List = tuple(),
                 transform_function=None,
                 hyperparams=None):
        BaseStep.__init__(self, hyperparams)

        if transform_callback_function is None:
            transform_callback_function = TapeCallbackFunction()
        if fit_callback_function is None:
            fit_callback_function = TapeCallbackFunction()

        self.transform_function = transform_function
        self.more_arguments = more_arguments
        self.fit_callback_function = fit_callback_function
        self.transform_callback_function = transform_callback_function
Пример #29
0
    def __init__(self,
                 wrapped: BaseStep,
                 auto_ml_algorithm: AutoMLAlgorithm,
                 hyperparams_repository: HyperparamsRepository = None,
                 n_iters: int = 100,
                 refit=True):
        BaseStep.__init__(self)

        self.refit = refit
        auto_ml_algorithm = auto_ml_algorithm.set_step(wrapped)

        MetaStepMixin.__init__(self, auto_ml_algorithm)
        NonTransformableMixin.__init__(self)

        if hyperparams_repository is None:
            hyperparams_repository = InMemoryHyperparamsRepository()
        self.hyperparams_repository = hyperparams_repository
        self.n_iters = n_iters
Пример #30
0
    def __init__(
            self,
            pipeline: Union[BaseStep, NamedTupleList],
            validation_size: float = 0.0,
            batch_size: int = None,
            batch_metrics: Dict[str, Callable] = None,
            shuffle_in_each_epoch_at_train: bool = True,
            seed: int = None,
            n_epochs: int = 1,
            epochs_metrics: Dict[str, Callable] = None,
            scoring_function: Callable = None,
            metrics_plotting_step: BaseStep = None,
            cache_folder: str = None,
            print_epoch_metrics=False,
            print_batch_metrics=False
    ):

        if epochs_metrics is None:
            epochs_metrics = {}
        if batch_metrics is None:
            batch_metrics = {}

        self.final_scoring_metric = scoring_function
        self.epochs_metrics = epochs_metrics
        self.n_epochs = n_epochs
        self.shuffle_in_each_epoch_at_train = shuffle_in_each_epoch_at_train
        self.batch_size = batch_size
        self.batch_metrics = batch_metrics
        self.validation_size = validation_size
        self.metrics_plotting_step = metrics_plotting_step
        self.print_batch_metrics = print_batch_metrics
        self.print_epoch_metrics = print_epoch_metrics

        wrapped = pipeline
        wrapped = self._create_mini_batch_pipeline(wrapped)

        if shuffle_in_each_epoch_at_train:
            wrapped = TrainShuffled(wrapped=wrapped, seed=seed)

        wrapped = self._create_validation_split(wrapped)
        wrapped = self._create_epoch_repeater(wrapped)

        BaseStep.__init__(self)
        Pipeline.__init__(self, [wrapped], cache_folder=cache_folder)