def __init__(self, steps: NamedTupleList, batch_size=None, cache_folder=None): Pipeline.__init__(self, steps=steps, cache_folder=cache_folder) ForceHandleMixin.__init__(self) self.__validate_barriers_batch_size(batch_size) self.__patch_missing_barrier(batch_size) self.__patch_barriers_batch_size(batch_size)
def __init__(self, wrapped: BaseTransformer, then_unflatten: bool = True): MetaStep.__init__(self, wrapped) ResumableStepMixin.__init__(self) ForceHandleMixin.__init__(self) self.then_unflatten = then_unflatten self.len_di = [] self.len_eo = []
def __init__( self, pipeline: Union[BaseStep, NamedTupleList], validation_size: int = None, batch_size: int = None, batch_metrics: Dict[str, Callable] = None, shuffle_in_each_epoch_at_train: bool = True, seed: int = None, n_epochs: int = 1, epochs_metrics: Dict[str, Callable] = None, scoring_function: Callable = None, cache_folder: str = None, print_epoch_metrics=False, print_batch_metrics=False ): """ :param pipeline: pipeline to wrap with an epoch repeater, a validation split wrapper, and a mini batch sequential pipeline :param validation_size: ratio for validation size between 0 and 1 :param batch_size: batch size for the mini batch sequential pipeline :param batch_metrics: metrics to calculate for each processed mini batch :param shuffle_in_each_epoch_at_train: :param seed: random seed for the data shuffling that can be done at each epoch when the param shuffle_in_each_epoch_at_train is True :param n_epochs: number of epochs :param epochs_metrics: metrics to calculate for each epoch :param scoring_function: scoring function with two arguments (y_true, y_pred) :param cache_folder: cache folder to be used inside the pipeline :param print_epoch_metrics: whether or not to print epoch metrics :param print_batch_metrics: whether or not to print batch metrics """ if epochs_metrics is None: epochs_metrics = {} if batch_metrics is None: batch_metrics = {} self.final_scoring_metric = scoring_function self.epochs_metrics = epochs_metrics self.n_epochs = n_epochs self.shuffle_in_each_epoch_at_train = shuffle_in_each_epoch_at_train self.batch_size = batch_size self.batch_metrics = batch_metrics self.validation_size = validation_size self.print_batch_metrics = print_batch_metrics self.print_epoch_metrics = print_epoch_metrics wrapped = pipeline wrapped = self._create_mini_batch_pipeline(wrapped) if shuffle_in_each_epoch_at_train: wrapped = TrainShuffled(wrapped=wrapped, seed=seed) wrapped = self._create_validation_split(wrapped) wrapped = self._create_epoch_repeater(wrapped) BaseStep.__init__(self) MetaStepMixin.__init__(self, wrapped) EvaluableStepMixin.__init__(self) ForceHandleMixin.__init__(self, cache_folder)
def __init__( self, pipeline: BaseStep, validation_splitter: 'BaseValidationSplitter', refit_trial: bool, scoring_callback: ScoringCallback, hyperparams_optimizer: BaseHyperparameterSelectionStrategy = None, hyperparams_repository: HyperparamsRepository = None, n_trials: int = 10, epochs: int = 1, callbacks: List[BaseCallback] = None, refit_scoring_function: Callable = None, print_func: Callable = None, cache_folder_when_no_handle=None, continue_loop_on_error=True ): BaseStep.__init__(self) ForceHandleMixin.__init__(self, cache_folder=cache_folder_when_no_handle) self.validation_splitter: BaseValidationSplitter = validation_splitter if print_func is None: print_func = print if hyperparams_optimizer is None: hyperparams_optimizer = RandomSearchHyperparameterSelectionStrategy() self.hyperparameter_optimizer: BaseHyperparameterSelectionStrategy = hyperparams_optimizer if hyperparams_repository is None: hyperparams_repository = HyperparamsJSONRepository(hyperparams_optimizer, cache_folder_when_no_handle) else: hyperparams_repository.set_strategy(hyperparams_optimizer) self.hyperparams_repository: HyperparamsJSONRepository = hyperparams_repository self.pipeline: BaseStep = pipeline self.print_func: Callable = print_func self.n_trial: int = n_trials self.hyperparams_repository: HyperparamsRepository = hyperparams_repository self.refit_scoring_function: Callable = refit_scoring_function self.refit_trial: bool = refit_trial self.error_types_to_raise = (SystemError, SystemExit, EOFError, KeyboardInterrupt) if continue_loop_on_error \ else (Exception,) self.trainer = Trainer( epochs=epochs, scoring_callback=scoring_callback, callbacks=callbacks, print_func=self.print_func, validation_splitter=validation_splitter, hyperparams_repository=hyperparams_repository )
def __init__(self, steps: NamedTupleList, batch_size=None, include_incomplete_batch: bool = None, default_value_data_inputs=None, default_value_expected_outputs=None, cache_folder=None): Pipeline.__init__(self, steps=steps, cache_folder=cache_folder) ForceHandleMixin.__init__(self) self.default_value_data_inputs = default_value_data_inputs self.default_value_expected_outputs = default_value_expected_outputs self.__validate_barriers_batch_size(batch_size=batch_size) self.__patch_missing_barrier( batch_size=batch_size, include_incomplete_batch=include_incomplete_batch, default_value_data_inputs=default_value_data_inputs, default_value_expected_outputs=default_value_expected_outputs) self.__patch_barriers_batch_size(batch_size)
def __init__(self, steps: NamedTupleList, batch_size=None, keep_incomplete_batch: bool = None, default_value_data_inputs=AbsentValuesNullObject(), default_value_expected_outputs=None, cache_folder=None, mute_joiner_batch_size_warning: bool = True): Pipeline.__init__(self, steps=steps, cache_folder=cache_folder) ForceHandleMixin.__init__(self) self.default_value_data_inputs = default_value_data_inputs self.default_value_expected_outputs = default_value_expected_outputs self.__validate_barriers_batch_size(batch_size=batch_size) self.__patch_missing_barrier( batch_size=batch_size, keep_incomplete_batch=keep_incomplete_batch, default_value_data_inputs=default_value_data_inputs, default_value_expected_outputs=default_value_expected_outputs) self.mute_joiner_batch_size_warning = mute_joiner_batch_size_warning self.__patch_barriers_batch_size(batch_size)
def __init__(self): BaseStep.__init__(self) ForceHandleMixin.__init__(self)
def __init__(self): BaseTransformer.__init__(self) ForceHandleMixin.__init__(self)
def __init__(self): Identity.__init__(self) ForceHandleMixin.__init__(self)
def __init__( self, pipeline: BaseStep, validation_splitter: 'BaseValidationSplitter', refit_trial: bool, scoring_callback: ScoringCallback, hyperparams_optimizer: BaseHyperparameterSelectionStrategy = None, hyperparams_repository: HyperparamsRepository = None, n_trials: int = 10, epochs: int = 1, callbacks: List[BaseCallback] = None, refit_scoring_function: Callable = None, cache_folder_when_no_handle=None, n_jobs=-1, continue_loop_on_error=True): """ Notes on multiprocess : Usage of a multiprocess-safe hyperparams repository is recommended, although it is, most of the time, not necessary. Beware of the behaviour of HyperparamsRepository's observers/subscribers. Context instances are not shared between trial but copied. So is the AutoML loop and the DataContainers. :param pipeline: The pipeline, or BaseStep, which will be use by the AutoMLloop :param validation_splitter: A :class:`BaseValidationSplitter` instance to split data between training and validation set. :param refit_trial: A boolean indicating whether to perform, after , a fit call with :param scoring_callback: The scoring callback to use during training :param hyperparams_optimizer: a :class:`BaseHyperparameterSelectionStrategy` instance that can be queried for new sets of hyperparameters. :param hyperparams_repository: a :class:`HyperparamsRepository` instance to store experiement status and results. :param n_trials: The number of different hyperparameters to try. :param epochs: The number of epoch to perform for each trial. :param callbacks: A list of callbacks to perform after each epoch. :param refit_scoring_function: A scoring function to use on a refit call :param cache_folder_when_no_handle: default cache folder used if auto_ml_loop isn't called through handler functions. :param n_jobs: If n_jobs in (-1, None, 1), then automl is executed in a single thread. if n_jobs > 1, then n_jobs thread are launched, if n_jobs < -1 then (n_cpus + 1 + n_jobs) thread are launched. :param continue_loop_on_error: """ BaseStep.__init__(self) ForceHandleMixin.__init__(self, cache_folder=cache_folder_when_no_handle) self.validation_splitter: BaseValidationSplitter = validation_splitter if hyperparams_optimizer is None: hyperparams_optimizer = RandomSearchHyperparameterSelectionStrategy( ) self.hyperparameter_optimizer: BaseHyperparameterSelectionStrategy = hyperparams_optimizer if hyperparams_repository is None: hyperparams_repository = HyperparamsJSONRepository( hyperparams_optimizer, cache_folder_when_no_handle) else: hyperparams_repository.set_strategy(hyperparams_optimizer) self.pipeline: BaseStep = pipeline self.n_trial: int = n_trials self.hyperparams_repository: HyperparamsRepository = hyperparams_repository self.refit_scoring_function: Callable = refit_scoring_function self.refit_trial: bool = refit_trial self.n_jobs = n_jobs self.error_types_to_raise = (SystemError, SystemExit, EOFError, KeyboardInterrupt) if continue_loop_on_error \ else (Exception,) self.trainer = Trainer(epochs=epochs, scoring_callback=scoring_callback, callbacks=callbacks, validation_splitter=validation_splitter)