示例#1
0
    def run(self):
        # We optimize s on a log scale, as we expect that the performance varies
        # logarithmically across s
        s_max = self.x_train.shape[0]
        s_min = s_max // 27
        subsets = [27] * 8
        subsets.extend([9] * 4)
        subsets.extend([3] * 2)
        subsets.extend([1] * 1)
        # subsets = [64, 32, 16]

        # Defining the bounds and dimensions of the
        # input space (configuration space + environment space)
        # We also optimize the hyperparameters of the svm on a log scale
        lower = np.array([1e-3, 1e-5, 1e-5])
        upper = np.array([1e5, 10, 1e-1])

        # Start Fabolas to optimize the objective function
        try:
            results = fabolas(objective_function=self.objective_function,
                              lower=lower,
                              upper=upper,
                              s_min=s_min,
                              s_max=s_max,
                              n_init=len(subsets),
                              num_iterations=1000,
                              n_hypers=30,
                              subsets=subsets,
                              output_path=self.output_path,
                              inc_estimation="last_seen")
        except ValueError as e:
            print(e)
示例#2
0
    def test_bayesian_optimization(self):
        res = fabolas(objective_function=objective,
                      lower=self.lower,
                      upper=self.upper,
                      subsets=[10, 20],
                      s_min=10,
                      s_max=10000,
                      n_init=1,
                      num_iterations=3)

        assert len(res["x_opt"]) == self.lower.shape[0]
        assert np.all(np.array(res["x_opt"]) >= self.lower)
        assert np.all(np.array(res["x_opt"]) <= self.upper)
示例#3
0
    def test_bayesian_optimization(self):
        res = fabolas(objective_function=objective,
                      lower=self.lower,
                      upper=self.upper,
                      subsets=[10, 20],
                      s_min=10,
                      s_max=10000,
                      n_init=2,
                      num_iterations=3)

        assert len(res["x_opt"]) == self.lower.shape[0]
        assert np.all(np.array(res["x_opt"]) >= self.lower)
        assert np.all(np.array(res["x_opt"]) <= self.upper)
示例#4
0
# Initial data setup
#################################################
version = 1
n_init_num = 5
budget_iter = 80
initial = 0
x_init_dir = home_dir + '/IBO_master/experiments_IBO/' + exp_name + '/initial_data/'
x_init_name = 'x_init_{}_v{}.pkl'.format(exp_name, version)

#################################################
# IBO main function
#################################################
for it in range(n_runs):
    results_over_runs[it] = fabolas(objective_function,
                                    lower=lower,
                                    upper=upper,
                                    s_min=s_min,
                                    s_max=s_max,
                                    init_data=x_init_dir + x_init_name,
                                    subsets=[1],
                                    num_iterations=budget_iter)
#################################################
# Saving the results
#################################################
output_main_dir = home_dir + '/IBO_master/experiments_IBO/' + exp_name + '/output_main/'
pickle.dump(
    results_over_runs,
    open(
        output_main_dir + "results_{}_{}_init{}_budget{}_v{}.pkl".format(
            exp_name, method, n_init_num, budget_iter, version), "wb"))
示例#5
0
os.makedirs(output_path, exist_ok=True)


def objective(x, s):
    dataset_fraction = s / s_max

    res = f.objective_function(x, dataset_fraction=dataset_fraction)
    return res["function_value"], res["cost"]

info = f.get_meta_information()
bounds = np.array(info['bounds'])
lower = bounds[:, 0]
upper = bounds[:, 1]
results = fabolas(objective_function=objective, lower=lower, upper=upper,
                  s_min=s_min, s_max=s_max, n_init=10, num_iterations=num_iterations,
                  n_hypers=30, subsets=subsets,
                  rng=rng, output_path=output_path)

results["run_id"] = run_id
results['X'] = results['X'].tolist()
results['y'] = results['y'].tolist()
results['c'] = results['c'].tolist()

test_error = []
current_inc = None
current_inc_val = None

key = "incumbents"

for inc in results["incumbents"]:
    print(inc)
subsets.extend([32] * 2)
subsets.extend([4] * 1)


def objective(x, s):
    dataset_fraction = s / s_max

    res = f.objective_function(x, dataset_fraction=dataset_fraction)
    return res["function_value"], res["cost"]

info = f.get_meta_information()
bounds = np.array(info['bounds'])
lower = bounds[:, 0]
upper = bounds[:, 1]
results = fabolas(objective_function=objective, lower=lower, upper=upper,
                  s_min=s_min, s_max=s_max, n_init=len(subsets), num_iterations=num_iterations,
                  n_hypers=30, subsets=subsets, inc_estimation="mean",
                  rng=rng)

results["run_id"] = run_id
results['X'] = results['X'].tolist()
results['y'] = results['y'].tolist()
results['c'] = results['c'].tolist()

test_error = []
cum_cost = 0

for i, inc in enumerate(results["incumbents"]):
    y = f.objective_function_test(np.array(inc))["function_value"]
    test_error.append(y)

    # Compute the time it would have taken to evaluate this configuration
示例#7
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument("run_id")
    parser.add_argument("dataset")
    parser.add_argument("num_iterations", type=int)
    parser.add_argument("s_min")
    parser.add_argument("s_max")
    #    parser.add_argument(
    #        "subsets"
    #    )
    #    parser.add_argument(
    #        "seed"
    #    )

    args = parser.parse_args()

    benchmark_function = {
        "LeNet": lenet_function,
    }[args.dataset]

    s_min, s_max = int(args.s_min), int(args.s_max)

    config_space = ImageAugmentation.get_config_space()

    hyperparameters = config_space.get_hyperparameters()

    lower = []
    upper = []

    for hyperparameter in hyperparameters:
        if hasattr(hyperparameter, "lower"):
            lower_bound = hyperparameter.lower
            upper_bound = hyperparameter.upper
        else:
            domain = hyperparameter.choices
            lower_bound, upper_bound = min(domain), max(domain)

        lower.append(lower_bound)
        upper.append(upper_bound)

    lower = np.array(lower)
    upper = np.array(upper)

    # Start Fabolas to optimize the objective function
    results = fabolas(lambda x, s: benchmark_function(
        x=x, s=int(s), config_space=config_space),
                      lower=lower,
                      upper=upper,
                      s_min=s_min,
                      s_max=s_max,
                      num_iterations=50)

    x_best = results["x_opt"]
    print("best configuration", x_best)
    print(benchmark_function(x_best[:, :-1], s=x_best[:, None, -1]))

    path = path_join(
        abspath("."),
        "Workspace/MastersThesis/AutoDA/experiments/results/mnist/fabolas")
    with open(os.path.join(path, "fabolas_optimized_%d.json" % args.run_id),
              "w") as fh:
        json.dump(x_best, fh)
示例#8
0
    def optimize(self) -> TuningResult:
        """
        Method performs a hyperparameter optimization run according to the selected HPO-method.
        :return: result: TuningResult
            TuningResult-object that contains the results of this optimization run.
        :return:
        """

        # Convert the skopt hyperparameter space into a continuous space for RoBO
        hp_space_lower = np.zeros(shape=(len(self.hp_space), ))
        hp_space_upper = np.zeros(shape=(len(self.hp_space), ))

        for i in range(len(self.hp_space)):
            if type(self.hp_space[i]) == skopt.space.space.Integer:
                hp_space_lower[i, ] = self.hp_space[i].low
                hp_space_upper[i, ] = self.hp_space[i].high

            elif type(self.hp_space[i]) == skopt.space.space.Categorical:
                n_choices = len(list(self.hp_space[i].categories))
                hp_space_lower[i, ] = 0
                hp_space_upper[i, ] = n_choices - 1

            elif type(self.hp_space[i]) == skopt.space.space.Real:
                hp_space_lower[i, ] = self.hp_space[i].low
                hp_space_upper[i, ] = self.hp_space[i].high

            else:
                raise Exception(
                    'The skopt HP-space could not be converted correctly!')

        # Set the random seed of the random number generator
        rand_num_generator = np.random.RandomState(seed=self.random_seed)

        # Optimize on the predefined n_func_evals and measure the wall clock times
        start_time = time.time()
        self.times = []  # Initialize a list for saving the wall clock times

        # Use a warmstart configuration (only possible for BOHAMIANN, not FABOLAS)
        if self.do_warmstart == 'Yes':

            # Initialize numpy arrays for saving the warmstart configuration and the warmstart loss
            warmstart_config = np.zeros(shape=(1, len(self.hp_space)))
            warmstart_loss = np.zeros(shape=(1, 1))

            # Retrieve the default hyperparameters and the default loss for the ML-algorithm
            default_params = self.get_warmstart_configuration()

            try:

                # Dictionary for saving the warmstart HP-configuration (only contains the HPs, which are part of the
                # 'tuned' HP-space
                warmstart_dict = {}

                # Iterate over all HPs of this ML-algorithm's tuned HP-space and append the default values to
                # the numpy array
                for i in range(len(self.hp_space)):

                    this_param = self.hp_space[i].name

                    # Categorical HPs need to be encoded as integer values for RoBO
                    if type(self.hp_space[i]) == skopt.space.space.Categorical:

                        choices = self.hp_space[i].categories
                        this_warmstart_value_cat = default_params[this_param]
                        dict_value = this_warmstart_value_cat

                        # Find the index of the default / warmstart HP in the list of possible choices
                        for j in range(len(choices)):
                            if this_warmstart_value_cat == choices[j]:
                                this_warmstart_value = j

                    # For all non-categorical HPs
                    else:
                        this_warmstart_value = default_params[this_param]
                        dict_value = this_warmstart_value

                        # For some HPs (e.g. max_depth of RF) the default value is None, although their typical dtype is
                        # different (e.g. int)
                        if this_warmstart_value is None:
                            # Try to impute these values by the mean value
                            this_warmstart_value = int(
                                0.5 *
                                (self.hp_space[i].low + self.hp_space[i].high))
                            dict_value = this_warmstart_value

                    # Pass the warmstart value to the according numpy array
                    warmstart_config[0, i] = this_warmstart_value
                    warmstart_dict[this_param] = dict_value

                # Pass the default loss to the according numpy array
                warmstart_loss[0, 0] = self.get_warmstart_loss(
                    warmstart_dict=warmstart_dict)

                # Pass the warmstart configuration as a kwargs dict
                kwargs = {'X_init': warmstart_config, 'Y_init': warmstart_loss}

                # Set flag to indicate that a warmstart took place
                did_warmstart = True

            except:
                print('Warmstarting RoBO failed!')
                kwargs = {}

                # Set flag to indicate that NO warmstart took place
                did_warmstart = False

        # No warmstart requested
        else:
            kwargs = {}

            # Set flag to indicate that NO warmstart took place
            did_warmstart = False

        # Select the specified HPO-tuning method
        try:
            if self.hpo_method == 'Fabolas':

                # Budget correct? // Set further parameters?
                s_max = len(
                    self.x_train
                )  # Maximum number of data points for the training data set
                s_min = int(
                    0.05 * s_max
                )  # Maximum number of data points for the training data set
                n_init = int(self.n_func_evals /
                             3)  # Requirement of the fabolas implementation

                result_dict = fabolas(
                    objective_function=self.objective_fabolas,
                    s_min=s_min,
                    s_max=s_max,
                    lower=hp_space_lower,
                    upper=hp_space_upper,
                    num_iterations=self.n_func_evals,
                    rng=rand_num_generator,
                    n_init=n_init)
                run_successful = True

            elif self.hpo_method == 'Bohamiann':

                if did_warmstart:
                    # A single initial design point (warm start hyperparameter configuration)
                    kwargs['n_init'] = 1

                # Budget correct? // Set further parameters?
                result_dict = bayesian_optimization(
                    objective_function=self.objective_bohamiann,
                    lower=hp_space_lower,
                    upper=hp_space_upper,
                    model_type='bohamiann',
                    num_iterations=self.n_func_evals,
                    rng=rand_num_generator,
                    **kwargs)
                run_successful = True

            else:
                raise Exception('Unknown HPO-method!')

        # Algorithm crashed
        except:
            # Add a warning here
            run_successful = False

        # If the optimization run was successful, determine the optimization results
        if run_successful:

            for i in range(len(self.times)):
                # Subtract the start time to receive the wall clock time of each function evaluation
                self.times[i] = self.times[i] - start_time
            wall_clock_time = max(self.times)

            # Insert timestamp of 0.0 for the warm start hyperparameter configuration
            if did_warmstart:
                self.times.insert(0, 0.0)

            # Timestamps
            timestamps = self.times

            # Losses (not incumbent losses)
            losses = result_dict['y']

            evaluation_ids = list(range(1, len(losses) + 1))
            best_loss = min(losses)

            configurations = ()
            for config in result_dict['X']:
                # Cut off the unused Fabolas budget value at the end
                config = config[:len(self.hp_space)]
                config_dict = {}

                for i in range(len(config)):
                    if type(self.hp_space[i]) == skopt.space.space.Integer:
                        config_dict[self.hp_space[i].name] = int(
                            round(config[i]))

                    elif type(
                            self.hp_space[i]) == skopt.space.space.Categorical:
                        config_dict[self.hp_space[i].name] = list(
                            self.hp_space[i].categories)[int(round(config[i]))]

                    elif type(self.hp_space[i]) == skopt.space.space.Real:
                        config_dict[self.hp_space[i].name] = config[i]

                    else:
                        raise Exception(
                            'The continuous HP-space could not be converted correctly!'
                        )

                configurations = configurations + (config_dict, )

            # Find the best hyperparameter configuration (incumbent)
            best_configuration = {}
            x_opt = result_dict['x_opt']

            for i in range(len(x_opt)):
                if type(self.hp_space[i]) == skopt.space.space.Integer:
                    best_configuration[self.hp_space[i].name] = int(
                        round(x_opt[i]))

                elif type(self.hp_space[i]) == skopt.space.space.Categorical:
                    best_configuration[self.hp_space[i].name] = list(
                        self.hp_space[i].categories)[int(round(x_opt[i]))]

                elif type(self.hp_space[i]) == skopt.space.space.Real:
                    best_configuration[self.hp_space[i].name] = x_opt[i]

                else:
                    raise Exception(
                        'The continuous HP-space could not be converted correctly!'
                    )

        # Run not successful (algorithm crashed)
        else:
            evaluation_ids, timestamps, losses, configurations, best_loss, best_configuration, wall_clock_time = \
                self.impute_results_for_crash()

        # Pass the results to a TuningResult-Object
        result = TuningResult(evaluation_ids=evaluation_ids,
                              timestamps=timestamps,
                              losses=losses,
                              configurations=configurations,
                              best_loss=best_loss,
                              best_configuration=best_configuration,
                              wall_clock_time=wall_clock_time,
                              successful=run_successful,
                              did_warmstart=did_warmstart)

        return result
示例#9
0
    # Validate this hyperparameter configuration on the full validation data
    y = 1 - clf.score(X_val, y_val)

    c = time.time() - start_time

    return y, c

# Load the data
X_train, y_train, X_val, y_val, X_test, y_test = load_dataset()


# We optimize s on a log scale, as we expect that the performance varies
# logarithmically across s
s_min = 100
s_max = 50000

# Defining the bounds and dimensions of the
# input space (configuration space + environment space)
# We also optimize the hyperparameters of the svm on a log scale
lower = np.array([-10, -10])
upper = np.array([10, 10])

# Start Fabolas to optimize the objective function
res = fabolas(objective_function, lower=lower, upper=upper,
                   s_min=s_min, s_max=s_max, num_iterations=100)

x_best = res["x_opt"]
print(x_best)
print(objective_function(x_best[:, :-1], s=x_best[:, None, -1]))

示例#10
0
    dataset_fraction = s / s_max

    res = f.objective_function(x, dataset_fraction=dataset_fraction)
    return res["function_value"], res["cost"]


info = f.get_meta_information()
bounds = np.array(info['bounds'])
lower = bounds[:, 0]
upper = bounds[:, 1]
results = fabolas(objective_function=objective,
                  lower=lower,
                  upper=upper,
                  s_min=s_min,
                  s_max=s_max,
                  n_init=len(subsets),
                  num_iterations=num_iterations,
                  n_hypers=30,
                  subsets=subsets,
                  inc_estimation="mean",
                  rng=rng)

results["run_id"] = run_id
results['X'] = results['X'].tolist()
results['y'] = results['y'].tolist()
results['c'] = results['c'].tolist()

test_error = []
cum_cost = 0

for i, inc in enumerate(results["incumbents"]):
示例#11
0
    dataset_fraction = s / s_max

    res = f.objective_function(x, dataset_fraction=dataset_fraction)
    return res["function_value"], res["cost"]


info = f.get_meta_information()
bounds = np.array(info['bounds'])
lower = bounds[:, 0]
upper = bounds[:, 1]
results = fabolas(objective_function=objective,
                  lower=lower,
                  upper=upper,
                  s_min=s_min,
                  s_max=s_max,
                  n_init=len(subsets),
                  num_iterations=num_iterations,
                  n_hypers=30,
                  subsets=subsets,
                  rng=rng,
                  output_path=output_path,
                  inc_estimation="last_seen")

results["run_id"] = run_id
results['X'] = results['X'].tolist()
results['y'] = results['y'].tolist()
results['c'] = results['c'].tolist()

test_error = []
current_inc = None
current_inc_val = None
示例#12
0
os.makedirs(output_path, exist_ok=True)


def objective(x, s):
    dataset_fraction = s / s_max

    res = f.objective_function(x, dataset_fraction=dataset_fraction)
    return res["function_value"], res["cost"]

info = f.get_meta_information()
bounds = np.array(info['bounds'])
lower = bounds[:, 0]
upper = bounds[:, 1]
results = fabolas(objective_function=objective, lower=lower, upper=upper,
                  s_min=s_min, s_max=s_max, n_init=len(subsets), num_iterations=num_iterations,
                  n_hypers=30, subsets=subsets,
                  rng=rng, output_path=output_path, inc_estimation="last_seen")

results["run_id"] = run_id
results['X'] = results['X'].tolist()
results['y'] = results['y'].tolist()
results['c'] = results['c'].tolist()

test_error = []
current_inc = None
current_inc_val = None

key = "incumbents"

for inc in results["incumbents"]:
    print(inc)
示例#13
0
def run_fabolas(X_train,
                X_test,
                y_train,
                y_test,
                names,
                sensitive_ids,
                ranking_functions=[],
                clf=None,
                min_accuracy=0.0,
                min_fairness=0.0,
                min_robustness=0.0,
                max_number_features=None,
                max_search_time=np.inf,
                cv_splitter=None):

    X_train_fab, X_val, y_train_fab, y_val = train_test_split(X_train,
                                                              y_train,
                                                              test_size=0.2,
                                                              random_state=42,
                                                              stratify=y_train)

    start_time = time.time()

    auc_scorer = make_scorer(roc_auc_score,
                             greater_is_better=True,
                             needs_threshold=True)
    #fair_train = make_scorer(true_positive_rate_score, greater_is_better=True, sensitive_data=X_train[:, sensitive_ids[0]])
    fair_val = make_scorer(true_positive_rate_score,
                           greater_is_better=True,
                           sensitive_data=X_val[:, sensitive_ids[0]])

    def f_clf1(mask):
        model = Pipeline([('selection', MaskSelection(mask)), ('clf', clf)])

        return model

    def f_to_min1(x, s):

        opt_start_time = time.time()

        mask = x > 0.5

        model = f_clf1(mask)

        if np.sum(model.named_steps['selection'].mask) == 0:
            return 4, (time.time() - opt_start_time)

        s_max = y_train_fab.shape[0]
        shuffle = np.random.permutation(np.arange(s_max))

        train_subset = X_train_fab[shuffle[:s]]
        train_targets_subset = y_train_fab[shuffle[:s]]

        model.fit(train_subset, pd.DataFrame(train_targets_subset))

        cv_acc = auc_scorer(model, X_val, pd.DataFrame(y_val))
        cv_fair = 1.0 - fair_val(model, X_val, pd.DataFrame(y_val))
        cv_robust = 1.0 - robust_score_test(
            X_test=X_val,
            y_test=y_val,
            model=model.named_steps['clf'],
            feature_selector=model.named_steps['selection'],
            scorer=auc_scorer)

        cv_number_features = float(
            np.sum(
                model.named_steps['selection']._get_support_mask())) / float(
                    len(model.named_steps['selection']._get_support_mask()))

        print("accuracy: " + str(cv_acc) + ' fair: ' + str(cv_fair) + ' k: ' +
              str(cv_number_features))

        loss = 0.0
        if cv_acc >= min_accuracy and \
          cv_fair >= min_fairness and \
          cv_robust >= min_robustness and \
          cv_number_features <= max_number_features:
            if min_fairness > 0.0:
                loss += (min_fairness - cv_fair)
            if min_accuracy > 0.0:
                loss += (min_accuracy - cv_acc)
            if min_robustness > 0.0:
                loss += (min_robustness - cv_robust)
            if max_number_features < 1.0:
                loss += (cv_number_features - max_number_features)

        else:
            if min_fairness > 0.0 and cv_fair < min_fairness:
                loss += (min_fairness - cv_fair)**2
            if min_accuracy > 0.0 and cv_acc < min_accuracy:
                loss += (min_accuracy - cv_acc)**2
            if min_robustness > 0.0 and cv_robust < min_robustness:
                loss += (min_robustness - cv_robust)**2
            if max_number_features < 1.0 and cv_number_features > max_number_features:
                loss += (cv_number_features - max_number_features)**2

        print("loss: " + str(loss))

        return loss, (time.time() - opt_start_time)

    cv_fair = 0
    cv_acc = 0
    cv_robust = 0
    cv_number_features = 1.0

    number_of_evaluations = 0
    '''
	start fabolas
	'''

    # Defining the bounds and dimensions of the
    # input space (configuration space + environment space)
    # We also optimize the hyperparameters of the svm on a log scale
    lower = np.zeros(X_train.shape[1])
    upper = np.ones(X_train.shape[1])

    # Start Fabolas to optimize the objective function
    print("xshape: " + str(X_train.shape))
    res = fabolas(f_to_min1,
                  lower=lower,
                  upper=upper,
                  s_min=100,
                  s_max=X_train.shape[0],
                  num_iterations=1000,
                  n_init=10,
                  chain_length=5,
                  burnin=5,
                  n_hypers=10)
    '''
示例#14
0
    # Validate this hyperparameter configuration on the full validation data
    y = 1 - clf.score(X_val, y_val)

    c = time.time() - start_time

    return y, c

# Load the data
X_train, y_train, X_val, y_val, X_test, y_test = load_dataset()


# We optimize s on a log scale, as we expect that the performance varies
# logarithmically across s
s_min = 100
s_max = 50000

# Defining the bounds and dimensions of the
# input space (configuration space + environment space)
# We also optimize the hyperparameters of the svm on a log scale
lower = np.array([-10, -10])
upper = np.array([10, 10])

# Start Fabolas to optimize the objective function
res = fabolas(objective_function, lower=lower, upper=upper,
                   s_min=s_min, s_max=s_max, num_iterations=100)

x_best = res["x_opt"]
print(x_best)
print(objective_function(x_best[:, :-1], s=x_best[:, None, -1]))