示例#1
0
def make_snapshot_simulation(name, identity, data_id, learner, path):
    simulation = autem.Simulation(
        name,
        [
            loaders.OpenMLLoader(data_id),
            scorers.LeagueScorer(scorers.accuracy_score, [ [ 1, 4, 5 ] ]),
            workflows.SnapshotWorkflow(),
            baselines.BaselineStats(identity['dataset']),
            learner_builders[learner](),
            reporters.Csv(path),
        ])
    return simulation
示例#2
0
def make_short_standard_simulation(name, identity, data_id, learner, path):
    simulation = autem.Simulation(
        name,
        [
            loaders.OpenMLLoader(data_id),
            scorers.LeagueScorer(scorers.accuracy_score, [ [ 1, 4, 5 ] ]),
            workflows.StandardWorkflow(),
            baselines.BaselineStats(identity['dataset']),
            learner_builders[learner](),
            reporters.Csv(path),
        ])
    settings = autem.SimulationSettings(simulation)
    settings.set_max_species(3)
    return simulation
示例#3
0
def run_test_simulation(baseline_name=None, seed=None):
    baseline_name = get_test_baseline_name(
    ) if baseline_name is None else baseline_name
    experiment = baseline_name if seed is None else "%s_%d" % (baseline_name,
                                                               seed)
    study = get_test_study()
    seed = seed if not seed is None else 2
    version = get_test_version()

    configuration = baselines.get_baseline_configuration(baseline_name)
    path = get_test_simulations_path().joinpath(study).joinpath(experiment)
    memory = str(path.joinpath("cache"))

    utility.prepare_OpenML()
    task_id = configuration["task_id"]
    task = openml.tasks.get_task(task_id)
    data_id = task.dataset_id
    dataset = openml.datasets.get_dataset(data_id)
    dataset_name = dataset.name
    simulation_name = "Test %s_%s_v%d" % (study, experiment, version)
    identity = {
        'study': study,
        'experiment': experiment,
        'dataset': dataset_name,
        'version': version
    }

    simulation = autem.Simulation(simulation_name, [
        loaders.OpenMLLoader(data_id),
        scorers.Accuracy(),
        workflows.Snapshot(),
        baselines.BaselineStats(baseline_name),
        reporters.Csv(path),
        hyper_learners.ClassificationSnapshot(),
    ],
                                  seed=seed,
                                  n_jobs=4,
                                  identity=identity,
                                  memory=memory)
    simulation.run()
示例#4
0
def run_balance_scale_mastery(seed):
    baseline_name = "balance-scale"
    experiment = "mastery_%s_s%d" % (baseline_name, seed)
    study = "DEV"
    version = benchmark.get_version()
    simulation_name = "%s_%s_v%d" % (study, experiment, version)

    configuration = baselines.get_baseline_configuration(baseline_name)
    path = benchmark.get_simulations_path().joinpath(study).joinpath(
        experiment)

    utility.prepare_OpenML()
    task_id = configuration["task_id"]
    task = openml.tasks.get_task(task_id)
    data_id = task.dataset_id
    dataset = openml.datasets.get_dataset(data_id)
    dataset_name = dataset.name

    identity = {
        'study': study,
        'experiment': experiment,
        'dataset': dataset_name,
        'version': version
    }

    simulation = autem.Simulation(simulation_name, [
        loaders.OpenMLLoader(data_id),
        scorers.LeagueScorer(scorers.accuracy_score),
        workflows.MasteryWorkflow(),
        baselines.BaselineStats(baseline_name),
        hyper_learners.ClassificationSVM(),
        reporters.Csv(path),
    ])

    settings = autem.SimulationSettings(simulation)
    settings.set_identity(identity)
    settings.set_n_jobs(4)
    settings.set_seed(seed)
    simulation.run()
示例#5
0
def run_cylinder_bands_mastery():
    seed = 1
    study = "DEV"
    baseline_name = "cylinder-bands"
    experiment = baseline_name
    version = benchmark.get_version()
    simulation_name = "%s_%s_v%d" % (study, experiment, version)

    configuration = baselines.get_baseline_configuration(baseline_name)
    path = benchmark.get_simulations_path().joinpath(study).joinpath(
        experiment)

    utility.prepare_OpenML()
    task_id = configuration["task_id"]
    task = openml.tasks.get_task(task_id)
    data_id = task.dataset_id
    dataset = openml.datasets.get_dataset(data_id)
    dataset_name = dataset.name

    identity = {
        'study': study,
        'experiment': experiment,
        'dataset': dataset_name,
        'version': version
    }

    simulation = autem.Simulation(simulation_name, [
        loaders.OpenMLLoader(data_id),
        scorers.Accuracy(),
        workflows.Mastery(["Learner"]),
        baselines.BaselineStats(baseline_name),
        hyper_learners.ClassificationSVM(),
        reporters.Csv(path),
    ],
                                  seed=seed,
                                  n_jobs=4,
                                  identity=identity)
    simulation.run()
示例#6
0
def make_standard_simulation(study, baseline_name, hyperlearner):
    prepare_OpenML()

    hyper_configuration = configuration.get_hyper_configuration(baseline_name)
    task_id = hyper_configuration["task_id"]
    task = openml.tasks.get_task(task_id)
    data_id = task.dataset_id
    name = "%s %s" % (baseline_name, study)
    path = configuration.get_hyper_simulations_path().joinpath(study).joinpath(
        baseline_name)
    n_jobs = 4
    seed = 1
    memory = str(path.joinpath("cache"))

    identity = {
        'study': study,
        'dataset': baseline_name,
        'scorer': 'League1x10',
        'workflow': 'standard',
        'learner': hyperlearner,
    }

    simulation = autem.Simulation(name, [
        loaders.OpenMLLoader(data_id),
        scorers.LeagueScorer(scorers.accuracy_score, [[1, 4, 5]]),
        workflows.StandardWorkflow(),
        learner_builders[hyperlearner](),
        reporters.Csv(path),
    ])

    settings = autem.SimulationSettings(simulation)
    settings.set_identity(identity)
    settings.set_n_jobs(4)
    settings.set_seed(seed)
    settings.set_memory(memory)

    return simulation
示例#7
0
def make_openml_light_classifier_simulation(study, experiment, baseline_name, task_id, seed, population_size, path, properties = {}):
    task = openml.tasks.get_task(task_id)
    data_id = task.dataset_id
    dataset = openml.datasets.get_dataset(data_id)
    dataset_name = dataset.name
    version = get_version()
    simulation_name = "%s_%s_v%d" % (study, experiment, version)
    properties['study'] = study
    properties['experiment'] = experiment
    properties['dataset'] = dataset_name        
    properties['version'] = version
    
    simulation = autem.Simulation(
        simulation_name,
        [
            loaders.OpenMLLoader(data_id),
            scorers.Accuracy(),

            evaluators.ChoicePredictedScoreEvaluator(),
            makers.TopChoiceMaker(),

            evaluators.ScoreEvaluator(),
            evaluators.AccuracyContest(),
            evaluators.DiverseContest(0.99),
            evaluators.VotingContest(),
            evaluators.SurvivalJudge(),
            evaluators.PromotionJudge(),

            evaluators.CrossValidationRater(),
            evaluators.OpenMLRater(task_id),
            evaluators.DummyClassifierAccuracy(),
            evaluators.ValidationAccuracy(),
            baselines.BaselineStats(baseline_name),

            reporters.Path(path),

            # Scalers
            autem.Choice("Scaler", [
                preprocessors.MaxAbsScaler(),
                preprocessors.MinMaxScaler(),
                preprocessors.Normalizer(),
                preprocessors.RobustScaler(),
                preprocessors.StandardScaler(),
                preprocessors.Binarizer(),
                preprocessors.BoxCoxTransform(),
                preprocessors.YeoJohnsonTransform()
            ]),


            # Feature Selectors
            autem.Choice("Selector", [
                preprocessors.NoSelector(),
                preprocessors.SelectPercentile(),
                preprocessors.VarianceThreshold()
            ]),

            # Feature Reducers
            autem.Choice("Reducer", [
                preprocessors.NoReducer(),
                preprocessors.FastICA(),
                preprocessors.FeatureAgglomeration(),
                preprocessors.PCA(),
            ]),

            # Approximators
            autem.Choice("Approximator", [
                preprocessors.NoApproximator(),
                preprocessors.RBFSampler(),
                preprocessors.Nystroem(),
            ]),

            autem.Choice("Learner", [
                learners.GaussianNB(),
                learners.BernoulliNB(),
                learners.MultinomialNB(),
                learners.DecisionTreeClassifier(),
                learners.KNeighborsClassifier(),
                learners.LinearSVC(),
                learners.RadialBasisSVC(),
                learners.PolySVC(),
                learners.LogisticRegression(),
                learners.LinearDiscriminantAnalysis(),

                learners.RandomForestClassifier(),
                learners.ExtraTreesClassifier(),
            ]),
        ], 
        population_size = population_size,
        seed = seed,
        properties = properties,
        n_jobs=6)
    return simulation
示例#8
0
def run_cylinder_bands_custom():
    seed = 1
    study = "DEV"
    baseline_name = "cylinder-bands"
    experiment = baseline_name
    version = benchmark.get_version()
    simulation_name = "%s_%s_v%d" % (study, experiment, version)

    configuration = baselines.get_baseline_configuration(baseline_name)
    path = benchmark.get_simulations_path().joinpath(study).joinpath(
        experiment)

    utility.prepare_OpenML()
    task_id = configuration["task_id"]
    task = openml.tasks.get_task(task_id)
    data_id = task.dataset_id
    dataset = openml.datasets.get_dataset(data_id)
    dataset_name = dataset.name

    identity = {
        'study': study,
        'experiment': experiment,
        'dataset': dataset_name,
        'version': version
    }

    simulation = autem.Simulation(
        simulation_name,
        [
            loaders.OpenMLLoader(data_id),
            scorers.Accuracy(),
            workflows.Standard(),
            baselines.BaselineStats(baseline_name),

            # Scalers
            autem.Choice("Scaler", [
                preprocessors.RobustScaler(),
                preprocessors.StandardScaler(),
                preprocessors.BoxCoxTransform(),
                preprocessors.YeoJohnsonTransform()
            ]),

            # Feature Selectors
            autem.Choice(
                "Selector",
                [
                    #preprocessors.NoSelector(),
                    #preprocessors.SelectPercentile(),
                    preprocessors.VarianceThreshold()
                ]),

            # Feature Reducers
            autem.Choice(
                "Reducer",
                [
                    preprocessors.NoReducer(),
                    #preprocessors.FastICA(),
                    #preprocessors.FeatureAgglomeration(),
                    #preprocessors.PCA(),
                ]),

            # Approximators
            autem.Choice("Approximator", [
                preprocessors.NoApproximator(),
            ]),
            autem.Choice(
                "Learner",
                [
                    # learners.LinearSVC(),
                    learners.PolySVC(),
                    # learners.RadialBasisSVC(),
                ]),
            reporters.Csv(path),
        ],
        seed=seed,
        n_jobs=4,
        identity=identity)
    simulation.run()