Exemplo n.º 1
0
def prepare_classifier(configuration_space_wrapper: ConfigSpaceWrapper,
                       task: openml.tasks.OpenMLTask, run_defaults: bool):
    configuration_space = configuration_space_wrapper.assemble()

    data_name = task.get_dataset().name
    data_qualities = task.get_dataset().qualities
    data_tuple = (task.task_id, data_name, data_qualities['NumberOfFeatures'],
                  data_qualities['NumberOfInstances'])
    logging.info('Obtained task %d (%s); %s attributes; %s observations' %
                 data_tuple)

    # obtain deserialized classifier
    nominal_indices = task.get_dataset().get_features_by_type(
        'nominal', [task.target_name])
    numeric_indices = task.get_dataset().get_features_by_type(
        'numeric', [task.target_name])
    if configuration_space_wrapper.wrapped_in_pipeline:
        classifier = sklearnbot.sklearn.as_pipeline(configuration_space,
                                                    numeric_indices,
                                                    nominal_indices)
    else:
        classifier = sklearnbot.sklearn.as_estimator(configuration_space,
                                                     False)

    # sample configuration and set hyperparameters
    if not run_defaults:
        configuration = configuration_space.sample_configuration(1)
        logging.info('Configuration: %s' % configuration.get_dictionary())
        classifier.set_params(**configuration.get_dictionary())
    else:
        logging.info('Running default configuration')
    return classifier
Exemplo n.º 2
0
def run_optimizer_on_task(task_id: int,
                          configuration_space_wrapper: ConfigSpaceWrapper,
                          output_dir: str,
                          upload_and_delete: bool) \
        -> typing.Tuple[bool, typing.Optional[int], typing.Optional[str]]:
    local_run_dir = os.path.join(output_dir, str(task_id), str(uuid.uuid4()))
    try:
        # obtain task
        task = openml.tasks.get_task(task_id)

        configuration_space = configuration_space_wrapper.assemble()

        data_name = task.get_dataset().name
        data_qualities = task.get_dataset().qualities
        data_tuple = (task.task_id, data_name,
                      data_qualities['NumberOfFeatures'],
                      data_qualities['NumberOfInstances'])
        logging.info('Obtained task %d (%s); %s attributes; %s observations' %
                     data_tuple)

        # obtain deserialized classifier
        nominal_indices = task.get_dataset().get_features_by_type(
            'nominal', [task.target_name])
        numeric_indices = task.get_dataset().get_features_by_type(
            'numeric', [task.target_name])

        # obtain prepared classifier
        optimizer = sklearnbot.sklearn.as_search_cv(configuration_space,
                                                    nominal_indices,
                                                    numeric_indices)

        # invoke OpenML run
        run = openml.runs.run_model_on_task(optimizer, task)
        score = run.get_metric_fn(sklearn.metrics.accuracy_score)
        logging.info('Task %d - %s; Accuracy: %0.2f' %
                     (task_id, task.get_dataset().name, score.mean()))
        run.to_filesystem(local_run_dir, store_model=False)
        if upload_and_delete:
            run = run.publish()
            shutil.rmtree(local_run_dir)
            local_run_dir = None
        return True, run.run_id, local_run_dir
    except openml.exceptions.OpenMLServerException:
        traceback.print_exc()
        return False, None, local_run_dir
    except openml.exceptions.OpenMLRunsExistError:
        traceback.print_exc()
        return False, None, local_run_dir