def fit(self, pipeline_config, instance):
     assert pipeline_config['problem_type'] in [
         'feature_classification', 'feature_multilabel',
         'feature_regression'
     ]
     dm = DataManager(verbose=pipeline_config["data_manager_verbose"])
     dm.read_data(instance,
                  is_classification=(pipeline_config["problem_type"] in [
                      'feature_classification', 'feature_multilabel'
                  ]),
                  test_split=pipeline_config["test_split"])
     return {"data_manager": dm}
示例#2
0
    def fit(self, pipeline_config, instance):
        # Get data manager for train, val, test data
        if pipeline_config['problem_type'] in ['feature_classification', 'feature_multilabel', 'feature_regression']:
            dm = DataManager(verbose=pipeline_config["data_manager_verbose"])
            if pipeline_config['test_instances'] is not None:
                dm_test = DataManager(verbose=pipeline_config["data_manager_verbose"])
        else:
            dm = ImageManager(verbose=pipeline_config["data_manager_verbose"])
            if pipeline_config['test_instances'] is not None:
                dm_test = ImageManager(verbose=pipeline_config["data_manager_verbose"])

        # Read data
        if pipeline_config['test_instances'] is not None:
            # Use given test set
            dm.read_data(instance,
                     is_classification=(pipeline_config["problem_type"] in ['feature_classification', 'feature_multilabel', 'image_classification']),
                     test_split=0.0)
            dm_test.read_data(pipeline_config['test_instances'],
                              is_classification=(pipeline_config["problem_type"] in ['feature_classification', 'feature_multilabel', 'image_classification']),
                              test_split=0.0)
            dm.X_test, dm.Y_test = dm_test.X_train, dm_test.Y_train.astype(np.int32)

        else:
            # Use test split
            dm.read_data(instance,
                is_classification=(pipeline_config["problem_type"] in ['feature_classification', 'feature_multilabel', 'image_classification']),
                test_split=pipeline_config["test_split"])

        return {"data_manager": dm}
示例#3
0
from autoPyTorch import AutoNetClassification, AutoNetMultilabel
import autoPyTorch.pipeline.nodes as autonet_nodes
from autoPyTorch.components.metrics.additional_logs import test_result
import autoPyTorch.components.metrics as autonet_metrics

from autoPyTorch.data_management.data_manager import DataManager

dm = DataManager(verbose=1)
dataset_dir = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', '..', 'datasets'))

# choose between the 5 defined testcases
TEST_CASE = 1

""" TEST CASE 1: Sparse data """
if TEST_CASE == 1:
    dm.read_data(os.path.join(dataset_dir, "automl/newsgroups/newsgroups_public.info"), is_classification=True)
    metric = "pac_metric"
    additional_metrices = ["accuracy"]

""" TEST CASE 2: Sparse binary data """
if TEST_CASE == 2:
    dm.read_data(os.path.join(dataset_dir, "automl/dorothea/dorothea_public.info"), is_classification=True)
    metric = "auc_metric"
    additional_metrices = ["accuracy"]

""" TEST CASE 3: Multilabel, sparse, binary, cv """
if TEST_CASE == 3:
    dm.read_data(os.path.join(dataset_dir, "automl/tania/tania_public.info"), is_classification=True)
    metric = "pac_metric"
    additional_metrices = []
示例#4
0
import os
import sys
import logging

from autoPyTorch import AutoNetClassification

from autoPyTorch.data_management.data_manager import DataManager

from autoPyTorch.utils.mem_test_thread import MemoryLogger

dm = DataManager(verbose=1)
dataset_dir = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', 'datasets'))


dm.read_data(os.path.join(dataset_dir, "classification/dataset_28_optdigits.csv"), is_classification=True)
# 5620 samples, 10 classes, 65 features      --->    98% validation accuracy







mem_logger = MemoryLogger()
mem_logger.start()

try:
    autonet = AutoNetClassification(early_stopping_patience=15, budget_type='epochs', min_budget=1, max_budget=9, num_iterations=1, log_level='error')

    res = autonet.fit(X_train=dm.X,
import logging

from autoPyTorch import AutoNetClassification

from autoPyTorch.data_management.data_manager import DataManager

dm = DataManager(verbose=1)
dataset_dir = os.path.abspath(
    os.path.join(os.path.dirname(__file__), '..', '..', 'datasets'))

# choose between the 10 classification testcases on real data.
TEST_CASE = 4

if TEST_CASE == 1:
    dm.read_data(os.path.join(dataset_dir,
                              "classification/dataset_22_mfeat-zernike.csv"),
                 is_classification=True)
    # 2000 samples, 10 classes, 48 features

if TEST_CASE == 2:
    dm.read_data(os.path.join(dataset_dir, "classification/phpbL6t4U.csv"),
                 is_classification=True)
    # 13910 samples, 6 classes, 128 features

if TEST_CASE == 3:
    dm.read_data(os.path.join(dataset_dir, "classification/php0iVrYT.csv"),
                 is_classification=True)
    # 748 samples, 2 classes, 4 features

if TEST_CASE == 4:
    dm.read_data(os.path.join(dataset_dir,
示例#6
0
sys.path.append(os.path.abspath(os.path.join(__file__, "..", "..", "..")))
import logging

from autoPyTorch import AutoNetClassification

from autoPyTorch.data_management.data_manager import DataManager

dm = DataManager(verbose=1)
dataset_dir = os.path.abspath(
    os.path.join(os.path.dirname(__file__), '..', '..', 'datasets'))

# choose between the 10 classification testcases on real data.
TEST_CASE = 4

if TEST_CASE == 1:
    dm.read_data("openml:22", is_classification=True)
    # 2000 samples, 10 classes, 48 features

if TEST_CASE == 2:
    dm.read_data("openml:1476", is_classification=True)
    # 13910 samples, 6 classes, 128 features

if TEST_CASE == 3:
    dm.read_data("openml:1464", is_classification=True)
    # 748 samples, 2 classes, 4 features

if TEST_CASE == 4:
    dm.read_data("openml:31", is_classification=True)

if TEST_CASE == 5:
    dm.read_data("openml:28", is_classification=True)