Exemplo n.º 1
0
def run_benchmark_simulation(study, baseline_name):
    experiment = baseline_name
    baseline_configuration = baselines.get_baseline_configuration(baseline_name)
    task_id = baseline_configuration["task_id"]
    seed = 1
    epochs = 25
    steps = 200
    max_time = 60 * 60
    population_size = 20
    path = get_simulations_path().joinpath(study).joinpath(experiment)

    utility.prepare_OpenML()
    simulation = make_openml_light_classifier_simulation(study, experiment, baseline_name, task_id, seed, population_size, path)
    run_simulation(simulation, steps, epochs, max_time)
    autem.ReportManager(path).update_combined_reports()
Exemplo n.º 2
0
def run_test_simulation(baseline_name=None, seed=None):
    baseline_name = get_test_baseline_name(
    ) if baseline_name is None else baseline_name
    experiment = baseline_name if seed is None else "%s_%d" % (baseline_name,
                                                               seed)
    study = get_test_study()
    seed = seed if not seed is None else 2
    version = get_test_version()

    configuration = baselines.get_baseline_configuration(baseline_name)
    path = get_test_simulations_path().joinpath(study).joinpath(experiment)
    memory = str(path.joinpath("cache"))

    utility.prepare_OpenML()
    task_id = configuration["task_id"]
    task = openml.tasks.get_task(task_id)
    data_id = task.dataset_id
    dataset = openml.datasets.get_dataset(data_id)
    dataset_name = dataset.name
    simulation_name = "Test %s_%s_v%d" % (study, experiment, version)
    identity = {
        'study': study,
        'experiment': experiment,
        'dataset': dataset_name,
        'version': version
    }

    simulation = autem.Simulation(simulation_name, [
        loaders.OpenMLLoader(data_id),
        scorers.Accuracy(),
        workflows.Snapshot(),
        baselines.BaselineStats(baseline_name),
        reporters.Csv(path),
        hyper_learners.ClassificationSnapshot(),
    ],
                                  seed=seed,
                                  n_jobs=4,
                                  identity=identity,
                                  memory=memory)
    simulation.run()
Exemplo n.º 3
0
def run_balance_scale_mastery(seed):
    baseline_name = "balance-scale"
    experiment = "mastery_%s_s%d" % (baseline_name, seed)
    study = "DEV"
    version = benchmark.get_version()
    simulation_name = "%s_%s_v%d" % (study, experiment, version)

    configuration = baselines.get_baseline_configuration(baseline_name)
    path = benchmark.get_simulations_path().joinpath(study).joinpath(
        experiment)

    utility.prepare_OpenML()
    task_id = configuration["task_id"]
    task = openml.tasks.get_task(task_id)
    data_id = task.dataset_id
    dataset = openml.datasets.get_dataset(data_id)
    dataset_name = dataset.name

    identity = {
        'study': study,
        'experiment': experiment,
        'dataset': dataset_name,
        'version': version
    }

    simulation = autem.Simulation(simulation_name, [
        loaders.OpenMLLoader(data_id),
        scorers.LeagueScorer(scorers.accuracy_score),
        workflows.MasteryWorkflow(),
        baselines.BaselineStats(baseline_name),
        hyper_learners.ClassificationSVM(),
        reporters.Csv(path),
    ])

    settings = autem.SimulationSettings(simulation)
    settings.set_identity(identity)
    settings.set_n_jobs(4)
    settings.set_seed(seed)
    simulation.run()
Exemplo n.º 4
0
def make_benchmark_simulation(study, baseline_name, configuration, learner):
    experiment = baseline_name
    baseline_configuration = baselines.get_baseline_configuration(baseline_name)
    task_id = baseline_configuration["task_id"]
    task = openml.tasks.get_task(task_id)
    data_id = task.dataset_id
    version = get_version()

    configuration = baseline_configuration["Configuration"] if configuration is None else configuration
    learner = baseline_configuration["Learner"] if learner is None else learner
    configuration_valid = configuration in simulation_builders
    if not configuration_valid:
        print("Baseline %s configuration %s does not exist" % (baseline_name, configuration))
        return None

    name = "'%s_%s_%s v%d'" % (study, experiment, configuration, version)
    identity = {
        'study': study,
        'experiment': experiment,
        'dataset': baseline_name,
        'version': version,
        'configuration': configuration,
    }    

    n_jobs = get_n_jobs()
    seed = 1
    path = get_simulations_path().joinpath(study).joinpath(experiment)
    memory = str(path.joinpath("cache"))

    utility.prepare_OpenML()
    simulation_builder = simulation_builders[configuration]
    simulation = simulation_builder(name, identity, data_id, learner, path)
    settings = autem.SimulationSettings(simulation)
    settings.set_identity(identity)
    settings.set_n_jobs(4)
    settings.set_seed(seed)
    settings.set_memory(memory)
    return simulation
Exemplo n.º 5
0
def run_cylinder_bands_mastery():
    seed = 1
    study = "DEV"
    baseline_name = "cylinder-bands"
    experiment = baseline_name
    version = benchmark.get_version()
    simulation_name = "%s_%s_v%d" % (study, experiment, version)

    configuration = baselines.get_baseline_configuration(baseline_name)
    path = benchmark.get_simulations_path().joinpath(study).joinpath(
        experiment)

    utility.prepare_OpenML()
    task_id = configuration["task_id"]
    task = openml.tasks.get_task(task_id)
    data_id = task.dataset_id
    dataset = openml.datasets.get_dataset(data_id)
    dataset_name = dataset.name

    identity = {
        'study': study,
        'experiment': experiment,
        'dataset': dataset_name,
        'version': version
    }

    simulation = autem.Simulation(simulation_name, [
        loaders.OpenMLLoader(data_id),
        scorers.Accuracy(),
        workflows.Mastery(["Learner"]),
        baselines.BaselineStats(baseline_name),
        hyper_learners.ClassificationSVM(),
        reporters.Csv(path),
    ],
                                  seed=seed,
                                  n_jobs=4,
                                  identity=identity)
    simulation.run()
Exemplo n.º 6
0
import sklearn.compose
import sklearn.preprocessing
import sklearn.impute
import sklearn.feature_selection
import sklearn.tree
import sklearn.svm
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score, make_scorer
import benchmark.utility as utility
import pandas as pd
import numpy as np
import json
from pprint import pprint

utility.prepare_OpenML()

class ComponentReference:

    def __init__(self, key, step_name, argument_1 = None):
        self.key = key
        self.step_name = step_name
        self.argument_1 = argument_1

class BaseConverter:

    def __init__(self, class_name, factory = None):
        self.class_name = class_name

    def convert(self, component):
        raise NotImplementedError()
Exemplo n.º 7
0
def run_cylinder_bands_custom():
    seed = 1
    study = "DEV"
    baseline_name = "cylinder-bands"
    experiment = baseline_name
    version = benchmark.get_version()
    simulation_name = "%s_%s_v%d" % (study, experiment, version)

    configuration = baselines.get_baseline_configuration(baseline_name)
    path = benchmark.get_simulations_path().joinpath(study).joinpath(
        experiment)

    utility.prepare_OpenML()
    task_id = configuration["task_id"]
    task = openml.tasks.get_task(task_id)
    data_id = task.dataset_id
    dataset = openml.datasets.get_dataset(data_id)
    dataset_name = dataset.name

    identity = {
        'study': study,
        'experiment': experiment,
        'dataset': dataset_name,
        'version': version
    }

    simulation = autem.Simulation(
        simulation_name,
        [
            loaders.OpenMLLoader(data_id),
            scorers.Accuracy(),
            workflows.Standard(),
            baselines.BaselineStats(baseline_name),

            # Scalers
            autem.Choice("Scaler", [
                preprocessors.RobustScaler(),
                preprocessors.StandardScaler(),
                preprocessors.BoxCoxTransform(),
                preprocessors.YeoJohnsonTransform()
            ]),

            # Feature Selectors
            autem.Choice(
                "Selector",
                [
                    #preprocessors.NoSelector(),
                    #preprocessors.SelectPercentile(),
                    preprocessors.VarianceThreshold()
                ]),

            # Feature Reducers
            autem.Choice(
                "Reducer",
                [
                    preprocessors.NoReducer(),
                    #preprocessors.FastICA(),
                    #preprocessors.FeatureAgglomeration(),
                    #preprocessors.PCA(),
                ]),

            # Approximators
            autem.Choice("Approximator", [
                preprocessors.NoApproximator(),
            ]),
            autem.Choice(
                "Learner",
                [
                    # learners.LinearSVC(),
                    learners.PolySVC(),
                    # learners.RadialBasisSVC(),
                ]),
            reporters.Csv(path),
        ],
        seed=seed,
        n_jobs=4,
        identity=identity)
    simulation.run()