def run_benchmark_simulation(study, baseline_name): experiment = baseline_name baseline_configuration = baselines.get_baseline_configuration(baseline_name) task_id = baseline_configuration["task_id"] seed = 1 epochs = 25 steps = 200 max_time = 60 * 60 population_size = 20 path = get_simulations_path().joinpath(study).joinpath(experiment) utility.prepare_OpenML() simulation = make_openml_light_classifier_simulation(study, experiment, baseline_name, task_id, seed, population_size, path) run_simulation(simulation, steps, epochs, max_time) autem.ReportManager(path).update_combined_reports()
def run_test_simulation(baseline_name=None, seed=None): baseline_name = get_test_baseline_name( ) if baseline_name is None else baseline_name experiment = baseline_name if seed is None else "%s_%d" % (baseline_name, seed) study = get_test_study() seed = seed if not seed is None else 2 version = get_test_version() configuration = baselines.get_baseline_configuration(baseline_name) path = get_test_simulations_path().joinpath(study).joinpath(experiment) memory = str(path.joinpath("cache")) utility.prepare_OpenML() task_id = configuration["task_id"] task = openml.tasks.get_task(task_id) data_id = task.dataset_id dataset = openml.datasets.get_dataset(data_id) dataset_name = dataset.name simulation_name = "Test %s_%s_v%d" % (study, experiment, version) identity = { 'study': study, 'experiment': experiment, 'dataset': dataset_name, 'version': version } simulation = autem.Simulation(simulation_name, [ loaders.OpenMLLoader(data_id), scorers.Accuracy(), workflows.Snapshot(), baselines.BaselineStats(baseline_name), reporters.Csv(path), hyper_learners.ClassificationSnapshot(), ], seed=seed, n_jobs=4, identity=identity, memory=memory) simulation.run()
def run_balance_scale_mastery(seed): baseline_name = "balance-scale" experiment = "mastery_%s_s%d" % (baseline_name, seed) study = "DEV" version = benchmark.get_version() simulation_name = "%s_%s_v%d" % (study, experiment, version) configuration = baselines.get_baseline_configuration(baseline_name) path = benchmark.get_simulations_path().joinpath(study).joinpath( experiment) utility.prepare_OpenML() task_id = configuration["task_id"] task = openml.tasks.get_task(task_id) data_id = task.dataset_id dataset = openml.datasets.get_dataset(data_id) dataset_name = dataset.name identity = { 'study': study, 'experiment': experiment, 'dataset': dataset_name, 'version': version } simulation = autem.Simulation(simulation_name, [ loaders.OpenMLLoader(data_id), scorers.LeagueScorer(scorers.accuracy_score), workflows.MasteryWorkflow(), baselines.BaselineStats(baseline_name), hyper_learners.ClassificationSVM(), reporters.Csv(path), ]) settings = autem.SimulationSettings(simulation) settings.set_identity(identity) settings.set_n_jobs(4) settings.set_seed(seed) simulation.run()
def make_benchmark_simulation(study, baseline_name, configuration, learner): experiment = baseline_name baseline_configuration = baselines.get_baseline_configuration(baseline_name) task_id = baseline_configuration["task_id"] task = openml.tasks.get_task(task_id) data_id = task.dataset_id version = get_version() configuration = baseline_configuration["Configuration"] if configuration is None else configuration learner = baseline_configuration["Learner"] if learner is None else learner configuration_valid = configuration in simulation_builders if not configuration_valid: print("Baseline %s configuration %s does not exist" % (baseline_name, configuration)) return None name = "'%s_%s_%s v%d'" % (study, experiment, configuration, version) identity = { 'study': study, 'experiment': experiment, 'dataset': baseline_name, 'version': version, 'configuration': configuration, } n_jobs = get_n_jobs() seed = 1 path = get_simulations_path().joinpath(study).joinpath(experiment) memory = str(path.joinpath("cache")) utility.prepare_OpenML() simulation_builder = simulation_builders[configuration] simulation = simulation_builder(name, identity, data_id, learner, path) settings = autem.SimulationSettings(simulation) settings.set_identity(identity) settings.set_n_jobs(4) settings.set_seed(seed) settings.set_memory(memory) return simulation
def run_cylinder_bands_mastery(): seed = 1 study = "DEV" baseline_name = "cylinder-bands" experiment = baseline_name version = benchmark.get_version() simulation_name = "%s_%s_v%d" % (study, experiment, version) configuration = baselines.get_baseline_configuration(baseline_name) path = benchmark.get_simulations_path().joinpath(study).joinpath( experiment) utility.prepare_OpenML() task_id = configuration["task_id"] task = openml.tasks.get_task(task_id) data_id = task.dataset_id dataset = openml.datasets.get_dataset(data_id) dataset_name = dataset.name identity = { 'study': study, 'experiment': experiment, 'dataset': dataset_name, 'version': version } simulation = autem.Simulation(simulation_name, [ loaders.OpenMLLoader(data_id), scorers.Accuracy(), workflows.Mastery(["Learner"]), baselines.BaselineStats(baseline_name), hyper_learners.ClassificationSVM(), reporters.Csv(path), ], seed=seed, n_jobs=4, identity=identity) simulation.run()
def run_cylinder_bands_custom(): seed = 1 study = "DEV" baseline_name = "cylinder-bands" experiment = baseline_name version = benchmark.get_version() simulation_name = "%s_%s_v%d" % (study, experiment, version) configuration = baselines.get_baseline_configuration(baseline_name) path = benchmark.get_simulations_path().joinpath(study).joinpath( experiment) utility.prepare_OpenML() task_id = configuration["task_id"] task = openml.tasks.get_task(task_id) data_id = task.dataset_id dataset = openml.datasets.get_dataset(data_id) dataset_name = dataset.name identity = { 'study': study, 'experiment': experiment, 'dataset': dataset_name, 'version': version } simulation = autem.Simulation( simulation_name, [ loaders.OpenMLLoader(data_id), scorers.Accuracy(), workflows.Standard(), baselines.BaselineStats(baseline_name), # Scalers autem.Choice("Scaler", [ preprocessors.RobustScaler(), preprocessors.StandardScaler(), preprocessors.BoxCoxTransform(), preprocessors.YeoJohnsonTransform() ]), # Feature Selectors autem.Choice( "Selector", [ #preprocessors.NoSelector(), #preprocessors.SelectPercentile(), preprocessors.VarianceThreshold() ]), # Feature Reducers autem.Choice( "Reducer", [ preprocessors.NoReducer(), #preprocessors.FastICA(), #preprocessors.FeatureAgglomeration(), #preprocessors.PCA(), ]), # Approximators autem.Choice("Approximator", [ preprocessors.NoApproximator(), ]), autem.Choice( "Learner", [ # learners.LinearSVC(), learners.PolySVC(), # learners.RadialBasisSVC(), ]), reporters.Csv(path), ], seed=seed, n_jobs=4, identity=identity) simulation.run()