def test_optimizer(self): class ResultNode(PipelineNode): def fit(self, X_train, Y_train): return { 'loss': X_train.shape[1], 'info': { 'train_a': X_train.shape[1], 'train_b': Y_train.shape[1] } } def get_hyperparameter_search_space(self, **pipeline_config): cs = CS.ConfigurationSpace() cs.add_hyperparameter( CSH.UniformIntegerHyperparameter('hyper', lower=0, upper=30)) return cs def get_pipeline_config_options(self): return [ ConfigOption("result_logger_dir", default=".", type="directory"), ConfigOption("optimize_metric", default="a", type=str), ] logger = logging.getLogger('hpbandster') logger.setLevel(logging.ERROR) logger = logging.getLogger('autonet') logger.setLevel(logging.ERROR) pipeline = Pipeline([OptimizationAlgorithm([ResultNode()])]) pipeline_config = pipeline.get_pipeline_config(num_iterations=1, budget_type='epochs', result_logger_dir=".") pipeline.fit_pipeline( pipeline_config=pipeline_config, X_train=np.random.rand(15, 10), Y_train=np.random.rand(15, 5), X_valid=None, Y_valid=None, result_loggers=[json_result_logger(directory=".", overwrite=True)], dataset_info=None, shutdownables=[]) result_of_opt_pipeline = pipeline[OptimizationAlgorithm.get_name( )].fit_output['optimized_hyperparameter_config'] print(pipeline[OptimizationAlgorithm.get_name()].fit_output) self.assertIn( result_of_opt_pipeline[ResultNode.get_name() + ConfigWrapper.delimiter + 'hyper'], list(range(0, 31)))
def fit(self, X_train, Y_train, X_valid=None, Y_valid=None, refit=True, **autonet_config): """Fit AutoNet to training data. Arguments: X_train {array} -- Training data. Y_train {array} -- Targets of training data. Keyword Arguments: X_valid {array} -- Validation data. Will be ignored if cv_splits > 1. (default: {None}) Y_valid {array} -- Validation data. Will be ignored if cv_splits > 1. (default: {None}) refit {bool} -- Whether final architecture should be trained again after search. (default: {True}) Returns: optimized_hyperparameter_config -- The best found hyperparameter config. final_metric_score -- The final score of the specified train metric. **autonet_config -- Configure AutoNet for your needs. You can also configure AutoNet in the constructor(). Call print_help() for more info. """ self.autonet_config = self.pipeline.get_pipeline_config(**dict(self.base_config, **autonet_config)) self.pipeline.fit_pipeline(pipeline_config=self.autonet_config, X_train=X_train, Y_train=Y_train, X_valid=X_valid, Y_valid=Y_valid) output = self.pipeline[OptimizationAlgorithm.get_name()].fit_output self.optimized_hyperparameter_config = output["optimized_hyperparamater_config"] self.optimized_hyperparameter_config_budget = output["budget"] if (refit): self.refit(X_train, Y_train, X_valid, Y_valid, self.optimized_hyperparameter_config, self.autonet_config) return self.optimized_hyperparameter_config, output['final_metric_score']
def score(self, X_test, Y_test, return_loss_value=False): """Calculate the sore on test data using the specified optimize_metric Arguments: X_test {array} -- The test data matrix. Y_test {array} -- The test targets. Returns: score -- The score for the test data. """ # run predict pipeline X_test, Y_test = self.check_data_array_types(X_test, Y_test) autonet_config = self.autonet_config or self.base_config self.pipeline.predict_pipeline(pipeline_config=autonet_config, X=X_test) Y_pred = self.pipeline[ OptimizationAlgorithm.get_name()].predict_output['Y'] # one hot encode Y OHE = self.pipeline[OneHotEncoding.get_name()] Y_test = OHE.transform_y(Y_test, OHE.fit_output['y_one_hot_encoder']) metric = self.pipeline[ MetricSelector.get_name()].fit_output['optimize_metric'] if return_loss_value: return metric.get_loss_value(Y_pred, Y_test) return metric(Y_pred, Y_test)
def test_optimizer(self): class ResultNode(PipelineNode): def fit(self, X_train, Y_train): return { 'loss': X_train.shape[1], 'info': { 'a': X_train.shape[1], 'b': Y_train.shape[1] } } def get_hyperparameter_search_space(self, **pipeline_config): cs = CS.ConfigurationSpace() cs.add_hyperparameter( CSH.UniformIntegerHyperparameter('hyper', lower=0, upper=30)) return cs logger = logging.getLogger('hpbandster') logger.setLevel(logging.ERROR) logger = logging.getLogger('autonet') logger.setLevel(logging.ERROR) pipeline = Pipeline([OptimizationAlgorithm([ResultNode()])]) pipeline_config = pipeline.get_pipeline_config(num_iterations=1, budget_type='epochs') pipeline.fit_pipeline(pipeline_config=pipeline_config, X_train=torch.rand(15, 10), Y_train=torch.rand(15, 5), X_valid=None, Y_valid=None, one_hot_encoder=None) result_of_opt_pipeline = pipeline[OptimizationAlgorithm.get_name( )].fit_output['optimized_hyperparamater_config'] self.assertIn( result_of_opt_pipeline[ResultNode.get_name() + ConfigWrapper.delimiter + 'hyper'], list(range(0, 31)))
def get_default_pipeline(cls): from autoPyTorch.pipeline.base.pipeline import Pipeline from autoPyTorch.pipeline.nodes.autonet_settings import AutoNetSettings from autoPyTorch.pipeline.nodes.optimization_algorithm import OptimizationAlgorithm from autoPyTorch.pipeline.nodes.cross_validation import CrossValidation from autoPyTorch.pipeline.nodes.imputation import Imputation from autoPyTorch.pipeline.nodes.normalization_strategy_selector import NormalizationStrategySelector from autoPyTorch.pipeline.nodes.one_hot_encoding import OneHotEncoding from autoPyTorch.pipeline.nodes.preprocessor_selector import PreprocessorSelector from autoPyTorch.pipeline.nodes.resampling_strategy_selector import ResamplingStrategySelector from autoPyTorch.pipeline.nodes.embedding_selector import EmbeddingSelector from autoPyTorch.pipeline.nodes.network_selector import NetworkSelector from autoPyTorch.pipeline.nodes.optimizer_selector import OptimizerSelector from autoPyTorch.pipeline.nodes.lr_scheduler_selector import LearningrateSchedulerSelector from autoPyTorch.pipeline.nodes.log_functions_selector import LogFunctionsSelector from autoPyTorch.pipeline.nodes.metric_selector import MetricSelector from autoPyTorch.pipeline.nodes.loss_module_selector import LossModuleSelector from autoPyTorch.pipeline.nodes.train_node import TrainNode # build the pipeline pipeline = Pipeline([ AutoNetSettings(), OptimizationAlgorithm([ CrossValidation([ Imputation(), NormalizationStrategySelector(), OneHotEncoding(), PreprocessorSelector(), ResamplingStrategySelector(), EmbeddingSelector(), NetworkSelector(), OptimizerSelector(), LearningrateSchedulerSelector(), LogFunctionsSelector(), MetricSelector(), LossModuleSelector(), TrainNode() ]) ]) ]) cls._apply_default_pipeline_settings(pipeline) return pipeline
def score(self, X_test, Y_test): """Calculate the sore on test data using the specified train_metric Arguments: X_test {array} -- The test data matrix. Y_test {array} -- The test targets. Returns: score -- The score for the test data. """ # run predict pipeline self.pipeline.predict_pipeline(pipeline_config=self.autonet_config, X=X_test) Y_pred = self.pipeline[OptimizationAlgorithm.get_name()].predict_output['Y'] # one hot encode Y OHE = self.pipeline[OneHotEncoding.get_name()] Y_test = OHE.transform_y(Y_test, OHE.fit_output['y_one_hot_encoder']) metric = self.pipeline[MetricSelector.get_name()].fit_output['train_metric'] return metric(torch.from_numpy(Y_test), torch.from_numpy(Y_pred))
def predict(self, X, return_probabilities=False): """Predict the targets for a data matrix X. Arguments: X {array} -- The data matrix. Keyword Arguments: return_probabilities {bool} -- Whether to return a tuple, where the second entry is the true network output (default: {False}) Returns: result -- The predicted targets. """ # run predict pipeline self.pipeline.predict_pipeline(pipeline_config=self.autonet_config, X=X) Y_pred = self.pipeline[OptimizationAlgorithm.get_name()].predict_output['Y'] # reverse one hot encoding OHE = self.pipeline[OneHotEncoding.get_name()] result = OHE.reverse_transform_y(Y_pred, OHE.fit_output['y_one_hot_encoder']) return result if not return_probabilities else (result, Y_pred)