def test_network_selector(self): pipeline = Pipeline([ NetworkSelector() ]) selector = pipeline[NetworkSelector.get_name()] selector.add_network("mlpnet", MlpNet) selector.add_network("shapedmlpnet", ShapedMlpNet) selector.add_final_activation('none', nn.Sequential()) pipeline_config = pipeline.get_pipeline_config() pipeline_config["random_seed"] = 42 hyper_config = pipeline.get_hyperparameter_search_space().sample_configuration() pipeline.fit_pipeline(hyperparameter_config=hyper_config, pipeline_config=pipeline_config, X=torch.rand(3,3), Y=torch.rand(3, 2), embedding=nn.Sequential()) sampled_network = pipeline[selector.get_name()].fit_output['network'] self.assertIn(type(sampled_network), [MlpNet, ShapedMlpNet])
def test_optimizer(self): class ResultNode(PipelineNode): def fit(self, X_train, Y_train): return { 'loss': X_train.shape[1], 'info': { 'a': X_train.shape[1], 'b': Y_train.shape[1] } } def get_hyperparameter_search_space(self, **pipeline_config): cs = CS.ConfigurationSpace() cs.add_hyperparameter( CSH.UniformIntegerHyperparameter('hyper', lower=0, upper=30)) return cs logger = logging.getLogger('hpbandster') logger.setLevel(logging.ERROR) logger = logging.getLogger('autonet') logger.setLevel(logging.ERROR) pipeline = Pipeline([OptimizationAlgorithm([ResultNode()])]) pipeline_config = pipeline.get_pipeline_config(num_iterations=1, budget_type='epochs') pipeline.fit_pipeline(pipeline_config=pipeline_config, X_train=torch.rand(15, 10), Y_train=torch.rand(15, 5), X_valid=None, Y_valid=None, one_hot_encoder=None) result_of_opt_pipeline = pipeline[OptimizationAlgorithm.get_name( )].fit_output['optimized_hyperparamater_config'] self.assertIn( result_of_opt_pipeline[ResultNode.get_name() + ConfigWrapper.delimiter + 'hyper'], list(range(0, 31)))
def test_loss_selector(self): pipeline = Pipeline([LossModuleSelector()]) selector = pipeline[LossModuleSelector.get_name()] selector.add_loss_module("L1", nn.L1Loss) selector.add_loss_module("cross_entropy", nn.CrossEntropyLoss, LossWeightStrategyWeighted(), True) pipeline_config = pipeline.get_pipeline_config( loss_modules=["L1", "cross_entropy"]) pipeline_hyperparameter_config = pipeline.get_hyperparameter_search_space( **pipeline_config).sample_configuration() pipeline_hyperparameter_config["LossModuleSelector:loss_module"] = "L1" pipeline.fit_pipeline( hyperparameter_config=pipeline_hyperparameter_config, train_indices=np.array([0, 1, 2]), X=np.random.rand(3, 3), Y=np.random.rand(3, 2), pipeline_config=pipeline_config, tmp=None) selected_loss = pipeline[ selector.get_name()].fit_output['loss_function'] self.assertEqual(type(selected_loss.function), nn.L1Loss) pipeline_hyperparameter_config[ "LossModuleSelector:loss_module"] = "cross_entropy" pipeline.fit_pipeline( hyperparameter_config=pipeline_hyperparameter_config, train_indices=np.array([0, 1, 2]), X=np.random.rand(3, 3), Y=np.array([[1, 0], [0, 1], [1, 0]]), pipeline_config=pipeline_config, tmp=None) selected_loss = pipeline[ selector.get_name()].fit_output['loss_function'] self.assertEqual(type(selected_loss.function), nn.CrossEntropyLoss) self.assertEqual( selected_loss(torch.tensor([[0.0, 10000.0]]), torch.tensor([[0, 1]])), 0)
def test_lr_scheduler_selector(self): pipeline = Pipeline([ NetworkSelector(), OptimizerSelector(), LearningrateSchedulerSelector(), ]) net_selector = pipeline[NetworkSelector.get_name()] net_selector.add_network("mlpnet", MlpNet) net_selector.add_network("shapedmlpnet", ShapedMlpNet) net_selector.add_final_activation('none', nn.Sequential()) opt_selector = pipeline[OptimizerSelector.get_name()] opt_selector.add_optimizer("adam", AdamOptimizer) opt_selector.add_optimizer("sgd", SgdOptimizer) lr_scheduler_selector = pipeline[ LearningrateSchedulerSelector.get_name()] lr_scheduler_selector.add_lr_scheduler("step", SchedulerStepLR) lr_scheduler_selector.add_lr_scheduler("exp", SchedulerExponentialLR) pipeline_config = pipeline.get_pipeline_config() pipeline_config["random_seed"] = 42 hyper_config = pipeline.get_hyperparameter_search_space( ).sample_configuration() pipeline.fit_pipeline(hyperparameter_config=hyper_config, pipeline_config=pipeline_config, X=torch.rand(3, 3), Y=torch.rand(3, 2), embedding=nn.Sequential(), training_techniques=[], train_indices=np.array([0, 1, 2])) sampled_lr_scheduler = pipeline[ lr_scheduler_selector.get_name()].fit_output[ 'training_techniques'][0].training_components['lr_scheduler'] self.assertIn(type(sampled_lr_scheduler), [lr_scheduler.ExponentialLR, lr_scheduler.StepLR])
def test_selector(self): pipeline = Pipeline([NetworkSelector(), OptimizerSelector()]) net_selector = pipeline[NetworkSelector.get_name()] net_selector.add_network("mlpnet", MlpNet) net_selector.add_network("shapedmlpnet", ShapedMlpNet) net_selector.add_final_activation('none', nn.Sequential()) opt_selector = pipeline[OptimizerSelector.get_name()] opt_selector.add_optimizer("adam", AdamOptimizer) opt_selector.add_optimizer("sgd", SgdOptimizer) pipeline_config = pipeline.get_pipeline_config() hyper_config = pipeline.get_hyperparameter_search_space( ).sample_configuration() pipeline.fit_pipeline(hyperparameter_config=hyper_config, pipeline_config=pipeline_config, X_train=torch.rand(3, 3), Y_train=torch.rand(3, 2), embedding=nn.Sequential()) sampled_optimizer = opt_selector.fit_output['optimizer'] self.assertIn(type(sampled_optimizer), [optim.Adam, optim.SGD])
class SubPipelineNode(PipelineNode): def __init__(self, sub_pipeline_nodes): super(SubPipelineNode, self).__init__() self.sub_pipeline = Pipeline(sub_pipeline_nodes) def set_pipeline(self, pipeline): super(SubPipelineNode, self).set_pipeline(pipeline) self.sub_pipeline.set_parent_pipeline(pipeline) def fit(self, **kwargs): return self.sub_pipeline.fit_pipeline(**kwargs) def predict(self, **kwargs): return self.sub_pipeline.predict_pipeline(**kwargs)
class SubPipelineNode(PipelineNode): def __init__(self, sub_pipeline_nodes): super(SubPipelineNode, self).__init__() self.sub_pipeline = Pipeline(sub_pipeline_nodes) def set_pipeline(self, pipeline): super(SubPipelineNode, self).set_pipeline(pipeline) self.sub_pipeline.set_parent_pipeline(pipeline) def fit(self, **kwargs): return self.sub_pipeline.fit_pipeline(**kwargs) def predict(self, **kwargs): return self.sub_pipeline.predict_pipeline(**kwargs) def clone(self): sub_pipeline = self.sub_pipeline.clone() new_node = super().clone(skip=("pipeline", "fit_output", "predict_output", "child_node", "sub_pipeline")) new_node.sub_pipeline = sub_pipeline return new_node
def test_cross_validation(self): class ResultNode(PipelineNode): def fit(self, X, Y, train_indices, valid_indices): return { 'loss': np.sum(X[valid_indices]), 'info': {'a': np.sum(X[train_indices]), 'b': np.sum(X[valid_indices])} } pipeline = Pipeline([ CrossValidation([ ResultNode() ]) ]) pipeline["CrossValidation"].add_cross_validator("k_fold", KFold, lambda x: x.reshape((-1 ,))) pipeline["CrossValidation"].add_cross_validator("stratified_k_fold", StratifiedKFold, lambda x: x.reshape((-1 ,))) x_train = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) y_train = np.array([[1], [0], [1]]) # test cv_splits pipeline_config = pipeline.get_pipeline_config(cross_validator="k_fold", cross_validator_args={"n_splits": 3}) pipeline_config_space = pipeline.get_hyperparameter_search_space(**pipeline_config) dataset_info = DataSetInfo() dataset_info.categorical_features = [None] * 3 dataset_info.x_shape = x_train.shape dataset_info.y_shape = y_train.shape pipeline_config["random_seed"] = 42 cv_result = pipeline.fit_pipeline(hyperparameter_config=pipeline_config_space, pipeline_config=pipeline_config, X_train=x_train, Y_train=y_train, X_valid=None, Y_valid=None, budget=5, budget_type=BudgetTypeEpochs, one_hot_encoder=None, optimize_start_time=time.time(), refit=False, dataset_info=dataset_info, rescore=False) self.assertEqual(cv_result['loss'], 15) self.assertDictEqual(cv_result['info'], {'a': 30, 'b': 15}) # test validation split pipeline_config = pipeline.get_pipeline_config(validation_split=0.3) pipeline_config_space = pipeline.get_hyperparameter_search_space(**pipeline_config) pipeline_config['random_seed'] = 42 dataset_info = DataSetInfo() dataset_info.categorical_features = [None] * 3 dataset_info.x_shape = x_train.shape dataset_info.y_shape = y_train.shape cv_result = pipeline.fit_pipeline(hyperparameter_config=pipeline_config_space, pipeline_config=pipeline_config, X_train=x_train, Y_train=y_train, X_valid=None, Y_valid=None, budget=5, budget_type=BudgetTypeEpochs, one_hot_encoder=None, optimize_start_time=time.time(), refit=False, dataset_info=dataset_info, rescore=False) self.assertEqual(cv_result['loss'], 24) self.assertDictEqual(cv_result['info'], {'a': 21, 'b': 24}) # test stratified cv split x_valid = x_train y_valid = y_train x_train = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15], [16, 17, 18]]) y_train = np.array([[1], [1], [0], [0], [1], [0]]) pipeline_config = pipeline.get_pipeline_config(cross_validator="stratified_k_fold", cross_validator_args={"n_splits": 3}) pipeline_config_space = pipeline.get_hyperparameter_search_space(**pipeline_config) pipeline_config['random_seed'] = 42 dataset_info = DataSetInfo() dataset_info.categorical_features = [None] * 3 dataset_info.x_shape = x_train.shape dataset_info.y_shape = y_train.shape cv_result = pipeline.fit_pipeline(hyperparameter_config=pipeline_config_space, pipeline_config=pipeline_config, X_train=x_train, Y_train=y_train, X_valid=None, Y_valid=None, budget=5, budget_type=BudgetTypeEpochs, one_hot_encoder=None, optimize_start_time=time.time(), refit=False, dataset_info=dataset_info, rescore=False) self.assertEqual(cv_result['loss'], 57) self.assertDictEqual(cv_result['info'], {'a': 114, 'b': 57}) pipeline_config = pipeline.get_pipeline_config() pipeline_config_space = pipeline.get_hyperparameter_search_space(**pipeline_config) pipeline_config['random_seed'] = 42 dataset_info = DataSetInfo() dataset_info.categorical_features = [None] * 3 dataset_info.x_shape = x_train.shape dataset_info.y_shape = y_train.shape cv_result = pipeline.fit_pipeline(hyperparameter_config=pipeline_config_space, pipeline_config=pipeline_config, X_train=x_train, Y_train=y_train, X_valid=x_valid, Y_valid=y_valid, budget=5, budget_type=BudgetTypeEpochs, one_hot_encoder=None, optimize_start_time=time.time(), refit=False, dataset_info=dataset_info, rescore=False) self.assertEqual(cv_result['loss'], 45) self.assertDictEqual(cv_result['info'], {'a': 171, 'b': 45})
def test_cross_validation(self): class ResultNode(PipelineNode): def fit(self, X_train, X_valid): return { 'loss': np.sum(X_valid), 'info': { 'a': np.sum(X_train), 'b': np.sum(X_valid) } } pipeline = Pipeline([CrossValidation([ResultNode()])]) x_train = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) y_train = np.array([[1], [0], [1]]) # test cv_splits pipeline_config = pipeline.get_pipeline_config(cv_splits=3) pipeline_config_space = pipeline.get_hyperparameter_search_space( **pipeline_config) pipeline_config['categorical_features'] = None cv_result = pipeline.fit_pipeline( hyperparameter_config=pipeline_config_space, pipeline_config=pipeline_config, X_train=x_train, Y_train=y_train, X_valid=None, Y_valid=None, budget=5, budget_type=BudgetTypeEpochs, one_hot_encoder=None, optimize_start_time=time.time()) self.assertEqual(cv_result['loss'], 15) self.assertDictEqual(cv_result['info'], {'a': 30, 'b': 15}) # test validation split pipeline_config = pipeline.get_pipeline_config(validation_split=0.3) pipeline_config_space = pipeline.get_hyperparameter_search_space( **pipeline_config) pipeline_config['categorical_features'] = None cv_result = pipeline.fit_pipeline( hyperparameter_config=pipeline_config_space, pipeline_config=pipeline_config, X_train=x_train, Y_train=y_train, X_valid=None, Y_valid=None, budget=5, budget_type=BudgetTypeEpochs, one_hot_encoder=None, optimize_start_time=time.time()) self.assertEqual(cv_result['loss'], 24) self.assertDictEqual(cv_result['info'], {'a': 21, 'b': 24}) # test stratified cv split x_train = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15], [16, 17, 18]]) y_train = np.array([[1], [1], [0], [0], [1], [0]]) pipeline_config = pipeline.get_pipeline_config( cv_splits=3, use_stratified_cv_split=True) pipeline_config_space = pipeline.get_hyperparameter_search_space( **pipeline_config) pipeline_config['categorical_features'] = None cv_result = pipeline.fit_pipeline( hyperparameter_config=pipeline_config_space, pipeline_config=pipeline_config, X_train=x_train, Y_train=y_train, X_valid=None, Y_valid=None, budget=5, budget_type=BudgetTypeEpochs, one_hot_encoder=None, optimize_start_time=time.time()) self.assertEqual(cv_result['loss'], 57) self.assertDictEqual(cv_result['info'], {'a': 114, 'b': 57})