示例#1
0
def make_snapshot_simulation(name, identity, data_id, learner, path):
    simulation = autem.Simulation(
        name,
        [
            loaders.OpenMLLoader(data_id),
            scorers.LeagueScorer(scorers.accuracy_score, [ [ 1, 4, 5 ] ]),
            workflows.SnapshotWorkflow(),
            baselines.BaselineStats(identity['dataset']),
            learner_builders[learner](),
            reporters.Csv(path),
        ])
    return simulation
示例#2
0
def make_short_standard_simulation(name, identity, data_id, learner, path):
    simulation = autem.Simulation(
        name,
        [
            loaders.OpenMLLoader(data_id),
            scorers.LeagueScorer(scorers.accuracy_score, [ [ 1, 4, 5 ] ]),
            workflows.StandardWorkflow(),
            baselines.BaselineStats(identity['dataset']),
            learner_builders[learner](),
            reporters.Csv(path),
        ])
    settings = autem.SimulationSettings(simulation)
    settings.set_max_species(3)
    return simulation
示例#3
0
def run_test_simulation(baseline_name=None, seed=None):
    baseline_name = get_test_baseline_name(
    ) if baseline_name is None else baseline_name
    experiment = baseline_name if seed is None else "%s_%d" % (baseline_name,
                                                               seed)
    study = get_test_study()
    seed = seed if not seed is None else 2
    version = get_test_version()

    configuration = baselines.get_baseline_configuration(baseline_name)
    path = get_test_simulations_path().joinpath(study).joinpath(experiment)
    memory = str(path.joinpath("cache"))

    utility.prepare_OpenML()
    task_id = configuration["task_id"]
    task = openml.tasks.get_task(task_id)
    data_id = task.dataset_id
    dataset = openml.datasets.get_dataset(data_id)
    dataset_name = dataset.name
    simulation_name = "Test %s_%s_v%d" % (study, experiment, version)
    identity = {
        'study': study,
        'experiment': experiment,
        'dataset': dataset_name,
        'version': version
    }

    simulation = autem.Simulation(simulation_name, [
        loaders.OpenMLLoader(data_id),
        scorers.Accuracy(),
        workflows.Snapshot(),
        baselines.BaselineStats(baseline_name),
        reporters.Csv(path),
        hyper_learners.ClassificationSnapshot(),
    ],
                                  seed=seed,
                                  n_jobs=4,
                                  identity=identity,
                                  memory=memory)
    simulation.run()
示例#4
0
def run_balance_scale_mastery(seed):
    baseline_name = "balance-scale"
    experiment = "mastery_%s_s%d" % (baseline_name, seed)
    study = "DEV"
    version = benchmark.get_version()
    simulation_name = "%s_%s_v%d" % (study, experiment, version)

    configuration = baselines.get_baseline_configuration(baseline_name)
    path = benchmark.get_simulations_path().joinpath(study).joinpath(
        experiment)

    utility.prepare_OpenML()
    task_id = configuration["task_id"]
    task = openml.tasks.get_task(task_id)
    data_id = task.dataset_id
    dataset = openml.datasets.get_dataset(data_id)
    dataset_name = dataset.name

    identity = {
        'study': study,
        'experiment': experiment,
        'dataset': dataset_name,
        'version': version
    }

    simulation = autem.Simulation(simulation_name, [
        loaders.OpenMLLoader(data_id),
        scorers.LeagueScorer(scorers.accuracy_score),
        workflows.MasteryWorkflow(),
        baselines.BaselineStats(baseline_name),
        hyper_learners.ClassificationSVM(),
        reporters.Csv(path),
    ])

    settings = autem.SimulationSettings(simulation)
    settings.set_identity(identity)
    settings.set_n_jobs(4)
    settings.set_seed(seed)
    simulation.run()
示例#5
0
def run_cylinder_bands_mastery():
    seed = 1
    study = "DEV"
    baseline_name = "cylinder-bands"
    experiment = baseline_name
    version = benchmark.get_version()
    simulation_name = "%s_%s_v%d" % (study, experiment, version)

    configuration = baselines.get_baseline_configuration(baseline_name)
    path = benchmark.get_simulations_path().joinpath(study).joinpath(
        experiment)

    utility.prepare_OpenML()
    task_id = configuration["task_id"]
    task = openml.tasks.get_task(task_id)
    data_id = task.dataset_id
    dataset = openml.datasets.get_dataset(data_id)
    dataset_name = dataset.name

    identity = {
        'study': study,
        'experiment': experiment,
        'dataset': dataset_name,
        'version': version
    }

    simulation = autem.Simulation(simulation_name, [
        loaders.OpenMLLoader(data_id),
        scorers.Accuracy(),
        workflows.Mastery(["Learner"]),
        baselines.BaselineStats(baseline_name),
        hyper_learners.ClassificationSVM(),
        reporters.Csv(path),
    ],
                                  seed=seed,
                                  n_jobs=4,
                                  identity=identity)
    simulation.run()
示例#6
0
def make_standard_simulation(study, baseline_name, hyperlearner):
    prepare_OpenML()

    hyper_configuration = configuration.get_hyper_configuration(baseline_name)
    task_id = hyper_configuration["task_id"]
    task = openml.tasks.get_task(task_id)
    data_id = task.dataset_id
    name = "%s %s" % (baseline_name, study)
    path = configuration.get_hyper_simulations_path().joinpath(study).joinpath(
        baseline_name)
    n_jobs = 4
    seed = 1
    memory = str(path.joinpath("cache"))

    identity = {
        'study': study,
        'dataset': baseline_name,
        'scorer': 'League1x10',
        'workflow': 'standard',
        'learner': hyperlearner,
    }

    simulation = autem.Simulation(name, [
        loaders.OpenMLLoader(data_id),
        scorers.LeagueScorer(scorers.accuracy_score, [[1, 4, 5]]),
        workflows.StandardWorkflow(),
        learner_builders[hyperlearner](),
        reporters.Csv(path),
    ])

    settings = autem.SimulationSettings(simulation)
    settings.set_identity(identity)
    settings.set_n_jobs(4)
    settings.set_seed(seed)
    settings.set_memory(memory)

    return simulation
示例#7
0
def run_cylinder_bands_custom():
    seed = 1
    study = "DEV"
    baseline_name = "cylinder-bands"
    experiment = baseline_name
    version = benchmark.get_version()
    simulation_name = "%s_%s_v%d" % (study, experiment, version)

    configuration = baselines.get_baseline_configuration(baseline_name)
    path = benchmark.get_simulations_path().joinpath(study).joinpath(
        experiment)

    utility.prepare_OpenML()
    task_id = configuration["task_id"]
    task = openml.tasks.get_task(task_id)
    data_id = task.dataset_id
    dataset = openml.datasets.get_dataset(data_id)
    dataset_name = dataset.name

    identity = {
        'study': study,
        'experiment': experiment,
        'dataset': dataset_name,
        'version': version
    }

    simulation = autem.Simulation(
        simulation_name,
        [
            loaders.OpenMLLoader(data_id),
            scorers.Accuracy(),
            workflows.Standard(),
            baselines.BaselineStats(baseline_name),

            # Scalers
            autem.Choice("Scaler", [
                preprocessors.RobustScaler(),
                preprocessors.StandardScaler(),
                preprocessors.BoxCoxTransform(),
                preprocessors.YeoJohnsonTransform()
            ]),

            # Feature Selectors
            autem.Choice(
                "Selector",
                [
                    #preprocessors.NoSelector(),
                    #preprocessors.SelectPercentile(),
                    preprocessors.VarianceThreshold()
                ]),

            # Feature Reducers
            autem.Choice(
                "Reducer",
                [
                    preprocessors.NoReducer(),
                    #preprocessors.FastICA(),
                    #preprocessors.FeatureAgglomeration(),
                    #preprocessors.PCA(),
                ]),

            # Approximators
            autem.Choice("Approximator", [
                preprocessors.NoApproximator(),
            ]),
            autem.Choice(
                "Learner",
                [
                    # learners.LinearSVC(),
                    learners.PolySVC(),
                    # learners.RadialBasisSVC(),
                ]),
            reporters.Csv(path),
        ],
        seed=seed,
        n_jobs=4,
        identity=identity)
    simulation.run()