Пример #1
0
 def test_add_classifier(self):
     self.assertEqual(len(classification_components._addons.components), 0)
     classification_components.add_classifier(DummyClassifier)
     self.assertEqual(len(classification_components._addons.components), 1)
     cs = SimpleClassificationPipeline().get_hyperparameter_search_space()
     self.assertIn('DummyClassifier', str(cs))
     del classification_components._addons.components['DummyClassifier']
Пример #2
0
 def test_add_classifier(self):
     self.assertEqual(len(classification_components._addons.components), 0)
     classification_components.add_classifier(DummyClassifier)
     self.assertEqual(len(classification_components._addons.components), 1)
     cs = SimpleClassificationPipeline().get_hyperparameter_search_space()
     self.assertIn('DummyClassifier', str(cs))
     del classification_components._addons.components['DummyClassifier']
# -*- encoding: utf-8 -*-

import numpy as np
from autosklearn.pipeline.components.classification import add_classifier
import time
import autosklearn.automl as autosk
from autosklearn.constants import BINARY_CLASSIFICATION
from component import DeepFeedNet

dataset_dir = 

# Load our training data
X_train = np.load(dataset_dir + 'train.npy')
y_train = np.load(dataset_dir + 'train_labels.npy')

add_classifier(DeepFeedNet.DeepFeedNet)

ensemble_size = 1
# Create model
modl = autosk.AutoML(time_left_for_this_task=600, per_run_time_limit=90,
                     delete_tmp_folder_after_terminate=False,
                     tmp_dir='tmp/autosk_tmp', output_dir='tmp/autosk_out',
                     log_dir='tmp/autosk_log',
                     include_estimators=['DeepFeedNet'],
                     include_preprocessors=['NoPreprocessing'],
                     ensemble_size=0,
                     ensemble_nbest=0,
                     initial_configurations_via_metalearning=0,
                     seed=50,
                     ml_memory_limit=2048,
                     metadata_directory=None,
Пример #4
0
import datetime
import math
import multiprocessing
import multiprocessing.pool
from multiprocessing import Queue
import pandas as pd
from six.moves import queue
import time
from sklearn.utils import check_X_y
from autosklearn.classification import AutoSklearnClassifier
from autosklearn.pipeline.components import classification
import gc
gc.enable()


classification.add_classifier(LogisticRegressionSK)
classification.add_classifier(LogisticRegressionSMAC)

ObjectiveFuncType = Callable[[trial_module.Trial], float]


def _name_estimators(estimators):
    """Generate names for estimators."""

    names = [type(estimator).__name__.lower() for estimator in estimators]
    namecount = defaultdict(int)
    for est, name in zip(estimators, names):
        namecount[name] += 1

    for k, v in list(six.iteritems(namecount)):
        if v == 1:
Пример #5
0
def evaluate_ml_algorithm(dataset,
                          algo,
                          run_id,
                          obj_metric,
                          time_limit=600,
                          seed=1,
                          task_type=None):
    if algo == 'lightgbm':
        _algo = ['LightGBM']
        add_classifier(LightGBM)
    elif algo == 'logistic_regression':
        _algo = ['Logistic_Regression']
        add_classifier(Logistic_Regression)
    else:
        _algo = [algo]
    print('EVALUATE-%s-%s-%s: run_id=%d' % (dataset, algo, obj_metric, run_id))
    train_data, test_data = load_train_test_data(dataset, task_type=task_type)
    if task_type in CLS_TASKS:
        task_type = BINARY_CLS if len(set(
            train_data.data[1])) == 2 else MULTICLASS_CLS
    print(set(train_data.data[1]))

    raw_data, test_raw_data = load_train_test_data(dataset,
                                                   task_type=MULTICLASS_CLS)
    X, y = raw_data.data
    X_test, y_test = test_raw_data.data
    feat_type = [
        'Categorical' if _type == CATEGORICAL else 'Numerical'
        for _type in raw_data.feature_types
    ]
    from autosklearn.metrics import balanced_accuracy as balanced_acc
    automl = AutoSklearnClassifier(
        time_left_for_this_task=int(time_limit),
        per_run_time_limit=180,
        n_jobs=1,
        include_estimators=_algo,
        initial_configurations_via_metalearning=0,
        ensemble_memory_limit=16384,
        ml_memory_limit=16384,
        # tmp_folder='/var/folders/0t/mjph32q55hd10x3qr_kdd2vw0000gn/T/autosklearn_tmp',
        ensemble_size=1,
        seed=int(seed),
        resampling_strategy='holdout',
        resampling_strategy_arguments={'train_size': 0.67})
    automl.fit(X.copy(), y.copy(), feat_type=feat_type, metric=balanced_acc)
    model_desc = automl.show_models()
    str_stats = automl.sprint_statistics()
    valid_results = automl.cv_results_['mean_test_score']
    print('Eval num: %d' % (len(valid_results)))

    validation_score = np.max(valid_results)

    # Test performance.
    automl.refit(X.copy(), y.copy())
    predictions = automl.predict(X_test)
    test_score = balanced_accuracy_score(y_test, predictions)

    # Print statistics about the auto-sklearn run such as number of
    # iterations, number of models failed with a time out.
    print(str_stats)
    print(model_desc)
    print('Validation Accuracy:', validation_score)
    print("Test Accuracy      :", test_score)

    save_path = save_dir + '%s-%s-%s-%d-%d.pkl' % (dataset, algo, obj_metric,
                                                   run_id, time_limit)
    with open(save_path, 'wb') as f:
        pickle.dump([dataset, algo, validation_score, test_score, task_type],
                    f)