Пример #1
0
    def transform(self, scenario: ASlibScenario):
        '''
            transform ASLib scenario data

            Arguments
            ---------
            scenario: data.aslib_scenario.ASlibScenario
                ASlib Scenario with all data in pandas

            Returns
            -------
            data.aslib_scenario.ASlibScenario
        '''
        if self.scaler:
            self.logger.debug("Applying StandardScaler")

            values = self.scaler.transform(
                np.array(scenario.feature_data.values))

            scenario.feature_data = pd.DataFrame(
                data=values,
                index=scenario.feature_data.index,
                columns=scenario.feature_data.columns)

        return scenario
Пример #2
0
    def fit(self, scenario: ASlibScenario, fold: int,
            amount_of_training_instances: int):
        print("Run fit on " + self.get_name() + " for fold " + str(fold))
        self.num_algorithms = len(scenario.algorithms)

        # create all bootstrap samples
        bootstrap_samples, out_of_sample_samples = self.generate_bootstrap_sample(
            scenario, fold, self.num_base_learner)

        weights_denorm = list()

        # train each base learner on a different sample
        for index in range(self.num_base_learner):
            self.current_iteration = index + 1
            self.base_learners.append(copy.deepcopy(self.base_learner))
            original_scenario = copy.deepcopy(scenario)
            scenario.feature_data, scenario.performance_data, scenario.runstatus_data, scenario.feature_runstatus_data, scenario.feature_cost_data = bootstrap_samples[
                index]
            self.base_learners[index].fit(scenario, fold,
                                          amount_of_training_instances)
            if self.weighting:
                if self.weight_type == 'oos':
                    scenario.feature_data, scenario.performance_data, scenario.runstatus_data, scenario.feature_runstatus_data, scenario.feature_cost_data = out_of_sample_samples[
                        index]
                elif self.weight_type == 'original_set':
                    scenario = original_scenario
                weights_denorm.append(
                    base_learner_performance(scenario,
                                             len(scenario.feature_data),
                                             self.base_learners[index]))

            #if self.current_iteration != self.num_base_learner:
            #    write_to_database(scenario, self, fold)

        # Turn around values (lowest (best) gets highest weight) and normalize
        weights_denorm = [
            max(weights_denorm) / float(i + 1) for i in weights_denorm
        ]
        self.weights = [float(i) / max(weights_denorm) for i in weights_denorm]
Пример #3
0
    def transform(self, scenario: ASlibScenario):
        '''
            transform ASLib scenario data

            Arguments
            ---------
            scenario: data.aslib_scenario.ASlibScenario
                ASlib Scenario with all data in pandas

            Returns
            -------
            data.aslib_scenario.ASlibScenario
        '''
        self.logger.debug("Impute Missing Feature Values")

        values = self.imputer.transform(np.array(scenario.feature_data.values))
        scenario.feature_data = pd.DataFrame(
            data=values,
            index=scenario.feature_data.index,
            columns=scenario.feature_data.columns)

        return scenario
Пример #4
0
    def transform(self, scenario: ASlibScenario):
        '''
            transform ASLib scenario data

            Arguments
            ---------
            scenario: data.aslib_scenario.ASlibScenario
                ASlib Scenario with all data in pandas

            Returns
            -------
            data.aslib_scenario.ASlibScenario
        '''
        if self.pca:
            self.logger.debug("Applying PCA")
            values = self.pca.transform(np.array(scenario.feature_data.values))

            scenario.feature_data = pd.DataFrame(
                data=values,
                index=scenario.feature_data.index,
                columns=["f%d" % (i) for i in range(values.shape[1])])

        return scenario
    def fit(self, scenario: ASlibScenario, fold: int,
            amount_of_training_instances: int):

        # setup the ensemble
        self.create_base_learner()
        self.scenario_name = scenario.scenario
        self.fold = fold
        self.num_algorithms = len(scenario.algorithms)

        num_instances = len(scenario.instances)
        feature_data = scenario.feature_data.to_numpy()
        performance_data = scenario.performance_data.to_numpy()

        # new features in matrix [instances x predictions]
        if self.new_feature_type == 'full':
            new_feature_data = np.zeros(
                (num_instances, self.num_algorithms * len(self.base_learners)))

        elif self.new_feature_type == 'small':
            new_feature_data = np.zeros(
                (num_instances, len(self.base_learners)))

        # if predictions are precomputed
        if self.pre_computed:
            for base_learner in self.base_learners:
                self.predictions.append(
                    load_pickle(filename='predictions/' +
                                base_learner.get_name() + '_' +
                                scenario.scenario + '_' + str(fold)))

        # create new features for every base learner on each instance
        for learner_index, base_learner in enumerate(self.base_learners):

            # load pre computed predictions
            if self.pre_computed:
                if self.cross_validation:
                    predictions = load_pickle(
                        filename='predictions/cross_validation_' +
                        base_learner.get_name() + '_' + scenario.scenario +
                        '_' + str(fold))
                else:
                    predictions = load_pickle(
                        filename='predictions/full_trainingdata_' +
                        base_learner.get_name() + '_' + scenario.scenario +
                        '_' + str(fold))

            # create predictions, if they are not pre computed
            else:

                # if cross validation is used (h2o)
                if self.cross_validation:
                    instance_counter = 0

                    for sub_fold in range(1, 11):
                        test_scenario, training_scenario = split_scenario(
                            scenario, sub_fold, num_instances)

                        # train base learner
                        base_learner.fit(training_scenario, fold,
                                         amount_of_training_instances)

                        # create new feature data
                        for instance_number in range(
                                instance_counter, instance_counter +
                                len(test_scenario.instances)):
                            prediction = base_learner.predict(
                                feature_data[instance_number], instance_number)
                            predictions[instance_number] = prediction.flatten()

                        instance_counter = instance_counter + len(
                            test_scenario.instances)

                    # fit base learner on the original training data
                    self.create_base_learner()
                    for base_learner in self.base_learners:
                        base_learner.fit(scenario, fold,
                                         amount_of_training_instances)

                # if no cross validation is used
                else:
                    base_learner.fit(scenario, fold,
                                     amount_of_training_instances)

                    predictions = np.zeros(
                        (len(scenario.instances), self.num_algorithms))

                    for instance_id, instance_feature in enumerate(
                            feature_data):
                        predictions[instance_id] = base_learner.predict(
                            instance_feature, instance_id)

            # insert predictions to new feature data matrix
            for i in range(num_instances):
                if self.new_feature_type == 'full':
                    for alo_num in range(self.num_algorithms):
                        new_feature_data[i][
                            alo_num + self.num_algorithms *
                            learner_index] = predictions[i][alo_num]

                elif self.new_feature_type == 'small':
                    new_feature_data[i][learner_index] = np.argmin(
                        predictions[i])

        # add predictions to the features of the instances
        if self.new_feature_type == 'full':
            new_columns = np.arange(self.num_algorithms *
                                    len(self.base_learners))

        elif self.new_feature_type == 'small':
            new_columns = np.arange(len(self.base_learners))

        new_feature_data = pd.DataFrame(new_feature_data,
                                        index=scenario.feature_data.index,
                                        columns=new_columns)

        if self.meta_learner_input == 'full':
            new_feature_data = pd.concat(
                [scenario.feature_data, new_feature_data], axis=1, sort=False)

        elif self.meta_learner_input == 'predictions_only':
            pass

        else:
            sys.exit('Wrong meta learner input type option')

        scenario.feature_data = new_feature_data

        # meta learner selection
        if self.meta_learner_type == 'per_algorithm_regressor':
            self.meta_learner = PerAlgorithmRegressor(
                feature_importances=self.feature_importance)
            self.algorithm_selection_algorithm = True
        elif self.meta_learner_type == 'SUNNY':
            self.meta_learner = SUNNY()
            self.algorithm_selection_algorithm = True
        elif self.meta_learner_type == 'ISAC':
            self.meta_learner = ISAC()
            self.algorithm_selection_algorithm = True
        elif self.meta_learner_type == 'SATzilla-11':
            self.meta_learner = SATzilla11()
            self.algorithm_selection_algorithm = True
        elif self.meta_learner_type == 'multiclass':
            self.meta_learner = MultiClassAlgorithmSelector(
                feature_importance=self.feature_importance)
            self.algorithm_selection_algorithm = True
        elif self.meta_learner_type == 'Expectation':
            self.meta_learner = SurrogateSurvivalForest(
                criterion='Expectation')
            self.algorithm_selection_algorithm = True
        elif self.meta_learner_type == 'PAR10':
            self.meta_learner = SurrogateSurvivalForest(criterion='PAR10')
            self.algorithm_selection_algorithm = True
        elif self.meta_learner_type == 'RandomForest':
            self.meta_learner = RandomForestClassifier(random_state=fold)
        elif self.meta_learner_type == 'SVM':
            self.meta_learner = LinearSVC(random_state=fold, max_iter=10000)

        # feature selection
        if self.feature_selection == 'variance_threshold':
            self.feature_selector = VarianceThreshold(threshold=.8 * (1 - .8))
            self.feature_selector.fit(scenario.feature_data)
            scenario.feature_data = pd.DataFrame(
                data=self.feature_selector.transform(scenario.feature_data))
        elif self.feature_selection == 'select_k_best':
            self.feature_selector = SelectKBest(f_classif,
                                                k=self.num_algorithms)
            label_performance_data = [np.argmin(x) for x in performance_data]
            self.imputer = SimpleImputer()
            scenario.feature_data = self.imputer.fit_transform(
                scenario.feature_data)
            self.feature_selector.fit(scenario.feature_data,
                                      label_performance_data)
            scenario.feature_data = pd.DataFrame(
                data=self.feature_selector.transform(scenario.feature_data))

        # fit meta learner
        if self.algorithm_selection_algorithm:
            self.meta_learner.fit(scenario, fold, amount_of_training_instances)
        else:
            label_performance_data = [np.argmin(x) for x in performance_data]

            self.pipe = Pipeline([('imputer', SimpleImputer()),
                                  ('standard_scaler', StandardScaler())])
            x_train = self.pipe.fit_transform(scenario.feature_data.to_numpy(),
                                              label_performance_data)

            self.meta_learner.fit(x_train, label_performance_data)
Пример #6
0
    def fit(self, scenario: ASlibScenario, fold: int,
            amount_of_training_instances: int):
        self.create_base_learner()
        self.scenario_name = scenario.scenario
        self.fold = fold
        self.num_algorithms = len(scenario.algorithms)
        num_instances = len(scenario.instances)
        feature_data = scenario.feature_data.to_numpy()
        performance_data = scenario.performance_data.to_numpy()
        new_feature_data = np.zeros(
            (num_instances, self.num_algorithms * len(self.base_learners)))

        for learner_index, base_learner in enumerate(self.base_learners):

            instance_counter = 0

            predictions = np.zeros((num_instances, self.num_algorithms))

            if self.pre_computed:
                predictions = load_pickle(
                    filename='predictions/cross_validation_' +
                    base_learner.get_name() + '_' + scenario.scenario + '_' +
                    str(fold))
            else:
                for sub_fold in range(1, 11):
                    test_scenario, training_scenario = split_scenario(
                        scenario, sub_fold, num_instances)

                    # train base learner
                    base_learner.fit(training_scenario, fold,
                                     amount_of_training_instances)

                    # create new feature data
                    for instance_number in range(
                            instance_counter,
                            instance_counter + len(test_scenario.instances)):
                        prediction = base_learner.predict(
                            feature_data[instance_number], instance_number)
                        predictions[instance_number] = prediction.flatten()

                    instance_counter = instance_counter + len(
                        test_scenario.instances)

            for i in range(num_instances):
                for alo_num in range(self.num_algorithms):
                    new_feature_data[i][
                        alo_num + self.num_algorithms *
                        learner_index] = predictions[i][alo_num]

        if self.pre_computed:
            for base_learner in self.base_learners:
                self.predictions.append(
                    load_pickle(filename='predictions/' +
                                base_learner.get_name() + '_' +
                                scenario.scenario + '_' + str(fold)))
        else:
            self.create_base_learner()
            for base_learner in self.base_learners:
                base_learner.fit(scenario, fold, amount_of_training_instances)

        # add predictions to the features of the instances
        new_feature_data = pd.DataFrame(
            new_feature_data,
            index=scenario.feature_data.index,
            columns=np.arange(self.num_algorithms * len(self.base_learners)))
        new_feature_data = pd.concat([scenario.feature_data, new_feature_data],
                                     axis=1,
                                     sort=False)
        scenario.feature_data = new_feature_data

        # meta learner training with or without feature selection
        if self.meta_learner_type == 'per_algorithm_regressor':
            self.meta_learner = PerAlgorithmRegressor()
            self.algorithm_selection_algorithm = True
        elif self.meta_learner_type == 'SUNNY':
            self.meta_learner = SUNNY()
            self.algorithm_selection_algorithm = True
        elif self.meta_learner_type == 'ISAC':
            self.meta_learner = ISAC()
            self.algorithm_selection_algorithm = True
        elif self.meta_learner_type == 'SATzilla-11':
            self.meta_learner = SATzilla11()
            self.algorithm_selection_algorithm = True
        elif self.meta_learner_type == 'multiclass':
            self.meta_learner = MultiClassAlgorithmSelector()
            self.algorithm_selection_algorithm = True
        elif self.meta_learner_type == 'Expectation':
            self.meta_learner = SurrogateSurvivalForest(
                criterion='Expectation')
            self.algorithm_selection_algorithm = True
        elif self.meta_learner_type == 'RandomForest':
            self.meta_learner = DecisionTreeClassifier()
        elif self.meta_learner_type == 'RandomForest':
            self.meta_learner = RandomForestClassifier()

        if self.algorithm_selection_algorithm:
            self.meta_learner.fit(scenario, fold, amount_of_training_instances)
        else:
            label_performance_data = [np.argmin(x) for x in performance_data]

            self.pipe = Pipeline([('imputer', SimpleImputer()),
                                  ('standard_scaler', StandardScaler())])
            X_train = self.pipe.fit_transform(scenario.feature_data.to_numpy(),
                                              label_performance_data)

            self.meta_learner.fit(X_train, label_performance_data)