def put(self, value: DataContainer): """ Put a queued pipeline task in queue. """ self.queue.put( QueuedPipelineTask(step_name=self.name, data_container=value.copy()))
def fit_trial_split(self, trial_split: TrialSplit, train_data_container: DataContainer, validation_data_container: DataContainer, context: ExecutionContext) -> TrialSplit: """ Train pipeline using the training data container. Track training, and validation metrics for each epoch. :param train_data_container: train data container :param validation_data_container: validation data container :param trial_split: trial to execute :param context: execution context :return: executed trial """ for i in range(self.epochs): context.logger.info('epoch {}/{}'.format(i + 1, self.epochs)) trial_split = trial_split.fit_trial_split( train_data_container.copy(), context.copy().set_execution_phase(ExecutionPhase.TRAIN)) y_pred_train = trial_split.predict_with_pipeline( train_data_container.copy(), context.copy().set_execution_phase(ExecutionPhase.VALIDATION)) y_pred_val = trial_split.predict_with_pipeline( validation_data_container.copy(), context.copy().set_execution_phase(ExecutionPhase.VALIDATION)) if self.callbacks.call( trial_split=trial_split, epoch_number=i, total_epochs=self.epochs, input_train=train_data_container, pred_train=y_pred_train, input_val=validation_data_container, pred_val=y_pred_val, context=context.copy().set_execution_phase( ExecutionPhase.VALIDATION), is_finished_and_fitted=False, ): break # Saves the metrics trial_split.save_parent_trial() return trial_split
def _fit_data_container(self, data_container: DataContainer, context: ExecutionContext): """ Find the best hyperparams using the wrapped AutoML strategy. :param data_container: data container to fit on :type data_container: DataContainer :param context: execution context :type context: ExecutionContext :return: fitted self :rtype: BaseStep """ for i in range(self.n_iters): auto_ml_trial_data_container: AutoMLContainer = self._load_auto_ml_data( i) hyperparams = self.wrapped.find_next_best_hyperparams( auto_ml_trial_data_container) self.wrapped = self.wrapped.update_hyperparams(hyperparams) self.hyperparams_repository.create_new_trial(hyperparams) try: self.wrapped, data_container_with_score = self.wrapped.handle_fit_transform( data_container.copy(), context) score = data_container_with_score.data_inputs self.hyperparams_repository.save_score_for_success_trial( hyperparams, score) except Exception as error: track = traceback.format_exc() print(track) self.hyperparams_repository.save_failure_for_trial( hyperparams, error) if self.refit: best_model = self._load_virgin_best_model() best_model = best_model.handle_fit(data_container.copy(), context) self.best_model = best_model return self
def fit_trial_split( self, trial_split: TrialSplit, train_data_container: DataContainer, validation_data_container: DataContainer, context: ExecutionContext ) -> TrialSplit: """ Train pipeline using the training data container. Track training, and validation metrics for each epoch. :param train_data_container: train data container :param validation_data_container: validation data container :param trial_split: trial to execute :param context: execution context :return: executed trial """ early_stopping = False for i in range(self.epochs): self.print_func('\nepoch {}/{}'.format(i + 1, self.epochs)) trial_split = trial_split.fit_trial_split(train_data_container.copy(), context) y_pred_train = trial_split.predict_with_pipeline(train_data_container.copy(), context) y_pred_val = trial_split.predict_with_pipeline(validation_data_container.copy(), context) if self.callbacks.call( trial=trial_split, epoch_number=i, total_epochs=self.epochs, input_train=train_data_container, pred_train=y_pred_train, input_val=validation_data_container, pred_val=y_pred_val, is_finished_and_fitted=early_stopping ): break return trial_split
def _fit_data_container(self, data_container: DataContainer, context: ExecutionContext) -> 'BaseStep': """ Fit wrapped step self.epochs times using wrapped step handle fit method. :param data_container: data container :type data_container: DataContainer :param context: execution context :type context: ExecutionContext :return: (fitted self, data container) :rtype: (BaseStep, DataContainer) """ epochs = self._get_epochs() for _ in range(epochs): self.wrapped = self.wrapped.handle_fit(data_container.copy(), context) return self
def test_handle_predict_should_handle_transform_with_initial_is_train_mode_after_predict( ): tape_fit = TapeCallbackFunction() tape_transform = TapeCallbackFunction() p = Pipeline([ TestOnlyWrapper( CallbackWrapper(MultiplyByN(2), tape_transform, tape_fit)), TrainOnlyWrapper( CallbackWrapper(MultiplyByN(4), tape_transform, tape_fit)) ]) data_container = DataContainer(data_inputs=np.array([1, 1]), expected_outputs=np.array([1, 1])) p.handle_predict(data_container=data_container.copy(), context=ExecutionContext()) data_container = p.handle_transform(data_container, ExecutionContext()) assert np.array_equal(data_container.data_inputs, np.array([4, 4]))
def _fit_transform_data_container(self, data_container: DataContainer, context: ExecutionContext) -> ( 'BaseStep', DataContainer): """ Fit transform wrapped step self.epochs times using wrapped step handle fit transform method. :param data_container: data container :type data_container: DataContainer :param context: execution context :type context: ExecutionContext :return: (fitted self, data container) :rtype: (BaseStep, DataContainer) """ if not self.fit_only: for _ in range(self.epochs - 1): self.wrapped = self.wrapped.handle_fit(data_container.copy(), context) self.wrapped, data_container = self.wrapped.handle_fit_transform(data_container, context) return self, data_container