def fit(self, pipeline_config, instance): assert pipeline_config['problem_type'] in [ 'feature_classification', 'feature_multilabel', 'feature_regression' ] dm = DataManager(verbose=pipeline_config["data_manager_verbose"]) dm.read_data(instance, is_classification=(pipeline_config["problem_type"] in [ 'feature_classification', 'feature_multilabel' ]), test_split=pipeline_config["test_split"]) return {"data_manager": dm}
def fit(self, pipeline_config, instance): # Get data manager for train, val, test data if pipeline_config['problem_type'] in ['feature_classification', 'feature_multilabel', 'feature_regression']: dm = DataManager(verbose=pipeline_config["data_manager_verbose"]) if pipeline_config['test_instances'] is not None: dm_test = DataManager(verbose=pipeline_config["data_manager_verbose"]) else: dm = ImageManager(verbose=pipeline_config["data_manager_verbose"]) if pipeline_config['test_instances'] is not None: dm_test = ImageManager(verbose=pipeline_config["data_manager_verbose"]) # Read data if pipeline_config['test_instances'] is not None: # Use given test set dm.read_data(instance, is_classification=(pipeline_config["problem_type"] in ['feature_classification', 'feature_multilabel', 'image_classification']), test_split=0.0) dm_test.read_data(pipeline_config['test_instances'], is_classification=(pipeline_config["problem_type"] in ['feature_classification', 'feature_multilabel', 'image_classification']), test_split=0.0) dm.X_test, dm.Y_test = dm_test.X_train, dm_test.Y_train.astype(np.int32) else: # Use test split dm.read_data(instance, is_classification=(pipeline_config["problem_type"] in ['feature_classification', 'feature_multilabel', 'image_classification']), test_split=pipeline_config["test_split"]) return {"data_manager": dm}
from autoPyTorch import AutoNetClassification, AutoNetMultilabel import autoPyTorch.pipeline.nodes as autonet_nodes from autoPyTorch.components.metrics.additional_logs import test_result import autoPyTorch.components.metrics as autonet_metrics from autoPyTorch.data_management.data_manager import DataManager dm = DataManager(verbose=1) dataset_dir = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', '..', 'datasets')) # choose between the 5 defined testcases TEST_CASE = 1 """ TEST CASE 1: Sparse data """ if TEST_CASE == 1: dm.read_data(os.path.join(dataset_dir, "automl/newsgroups/newsgroups_public.info"), is_classification=True) metric = "pac_metric" additional_metrices = ["accuracy"] """ TEST CASE 2: Sparse binary data """ if TEST_CASE == 2: dm.read_data(os.path.join(dataset_dir, "automl/dorothea/dorothea_public.info"), is_classification=True) metric = "auc_metric" additional_metrices = ["accuracy"] """ TEST CASE 3: Multilabel, sparse, binary, cv """ if TEST_CASE == 3: dm.read_data(os.path.join(dataset_dir, "automl/tania/tania_public.info"), is_classification=True) metric = "pac_metric" additional_metrices = []
import os import sys import logging from autoPyTorch import AutoNetClassification from autoPyTorch.data_management.data_manager import DataManager from autoPyTorch.utils.mem_test_thread import MemoryLogger dm = DataManager(verbose=1) dataset_dir = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', 'datasets')) dm.read_data(os.path.join(dataset_dir, "classification/dataset_28_optdigits.csv"), is_classification=True) # 5620 samples, 10 classes, 65 features ---> 98% validation accuracy mem_logger = MemoryLogger() mem_logger.start() try: autonet = AutoNetClassification(early_stopping_patience=15, budget_type='epochs', min_budget=1, max_budget=9, num_iterations=1, log_level='error') res = autonet.fit(X_train=dm.X,
import logging from autoPyTorch import AutoNetClassification from autoPyTorch.data_management.data_manager import DataManager dm = DataManager(verbose=1) dataset_dir = os.path.abspath( os.path.join(os.path.dirname(__file__), '..', '..', 'datasets')) # choose between the 10 classification testcases on real data. TEST_CASE = 4 if TEST_CASE == 1: dm.read_data(os.path.join(dataset_dir, "classification/dataset_22_mfeat-zernike.csv"), is_classification=True) # 2000 samples, 10 classes, 48 features if TEST_CASE == 2: dm.read_data(os.path.join(dataset_dir, "classification/phpbL6t4U.csv"), is_classification=True) # 13910 samples, 6 classes, 128 features if TEST_CASE == 3: dm.read_data(os.path.join(dataset_dir, "classification/php0iVrYT.csv"), is_classification=True) # 748 samples, 2 classes, 4 features if TEST_CASE == 4: dm.read_data(os.path.join(dataset_dir,
sys.path.append(os.path.abspath(os.path.join(__file__, "..", "..", ".."))) import logging from autoPyTorch import AutoNetClassification from autoPyTorch.data_management.data_manager import DataManager dm = DataManager(verbose=1) dataset_dir = os.path.abspath( os.path.join(os.path.dirname(__file__), '..', '..', 'datasets')) # choose between the 10 classification testcases on real data. TEST_CASE = 4 if TEST_CASE == 1: dm.read_data("openml:22", is_classification=True) # 2000 samples, 10 classes, 48 features if TEST_CASE == 2: dm.read_data("openml:1476", is_classification=True) # 13910 samples, 6 classes, 128 features if TEST_CASE == 3: dm.read_data("openml:1464", is_classification=True) # 748 samples, 2 classes, 4 features if TEST_CASE == 4: dm.read_data("openml:31", is_classification=True) if TEST_CASE == 5: dm.read_data("openml:28", is_classification=True)