示例#1
0
    def perform(self):
        # Adapted from https://github.com/JonathanTay/CS-7641-assignment-1/blob/master/Boosting.py
        # Search for good alphas
        alphas = np.arange(1, 11)

        max_depths = np.arange(1, 41, 1)  # np.arange(1, 11)
        base = learners.DTLearner(criterion='gini', class_weight='balanced', random_state=self._details.seed)
        of_base = learners.DTLearner(criterion='gini', class_weight='balanced', random_state=self._details.seed)

        booster = learners.BoostingLearner(algorithm='SAMME', learning_rate=1, base_estimator=base,
                                           random_state=self._details.seed)
        of_booster = learners.BoostingLearner(algorithm='SAMME', learning_rate=1, base_estimator=of_base,
                                              random_state=self._details.seed)

        # TODO: No 90 here?
        params = {'Boost__n_estimators': [1, 2, 5, 10, 20, 30, 45, 60, 80, 100],
                  'Boost__base_estimator__max_depth': max_depths}
        iteration_params = {'Boost__n_estimators': [1, 2, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]}
        of_params = {'Boost__base_estimator__max_depth': 100, 'Boost__n_estimators': 50}
        complexity_param = {'name': 'Boost__n_estimators', 'display_name': 'Estimator count', 'x_scale': 'log',
                            'values': [1, 2, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]}

        experiments.perform_experiment(self._details.ds, self._details.ds_name, self._details.ds_readable_name, booster,
                                       'Boost', 'Boost', params, complexity_param=complexity_param,
                                       seed=self._details.seed, threads=self._details.threads, verbose=self._verbose)
        experiments.perform_experiment(self._details.ds, self._details.ds_name, self._details.ds_readable_name,
                                       of_booster, 'Boost_OF', 'Boost', of_params, seed=self._details.seed,
                                       iteration_params=iteration_params, threads=self._details.threads,
                                       verbose=self._verbose, iteration_lc_only=True)
    def perform(self):
        # Adapted from https://github.com/JonathanTay/CS-7641-assignment-1/blob/master/Boosting.py
        max_depths = np.arange(1, 11, 1)

        # NOTE: Criterion may need to be adjusted here depending on the dataset
        base = learners.DTLearner(criterion='entropy', class_weight='balanced', max_depth=10,
                                  random_state=self._details.seed)
        of_base = learners.DTLearner(criterion='entropy', class_weight='balanced', random_state=self._details.seed)

        booster = learners.BoostingLearner(algorithm='SAMME', learning_rate=1, base_estimator=base,
                                           random_state=self._details.seed)
        of_booster = learners.BoostingLearner(algorithm='SAMME', learning_rate=1, base_estimator=of_base,
                                              random_state=self._details.seed)

        params = {'Boost__n_estimators': [1, 2, 5, 10, 20, 30, 45, 60, 80, 90, 100],
                  'Boost__learning_rate': [(2**x)/100 for x in range(7)]+[1],
                  'Boost__base_estimator__max_depth': max_depths}
        iteration_details = {
            'params': {'Boost__n_estimators': [1, 2, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]}
        }
        of_params = {'Boost__base_estimator__max_depth': None}
        #complexity_param = {'name': 'Boost__learning_rate', 'display_name': 'Learning rate', 'x_scale': 'log',
        #                    'values': [(2**x)/100 for x in range(7)]+[1]}
        complexity_param = {'name': 'Boost__n_estimators', 'display_name': 'N_estimators', 'x_scale': 'linear',
                           'values': [1, 2, 5, 10, 20, 30, 45, 60, 80, 90, 100]}

        best_params = None
        # Uncomment to select known best params from grid search. This will skip the grid search and just rebuild
        # the various graphs
        #
        # Dataset 1:
        # best_params = {'base_estimator__max_depth': 4, 'learning_rate': 0.32, 'n_estimators': 20}
        #
        # Dataset 2:
        best_params = {'base_estimator__max_depth': 5, 'learning_rate': 0.64, 'n_estimators': 45}

        if best_params is not None:
            booster.set_params(**best_params)
            of_booster.set_params(**best_params)

        experiments.perform_experiment(self._details.ds, self._details.ds_name, self._details.ds_readable_name, booster,
                                       'Boost', 'Boost', params, complexity_param=complexity_param,
                                       iteration_details=iteration_details, best_params=best_params,
                                       seed=self._details.seed, threads=self._details.threads, verbose=self._verbose)

        # TODO: This should turn OFF regularization
        experiments.perform_experiment(self._details.ds, self._details.ds_name, self._details.ds_readable_name,
                                       of_booster, 'Boost_OF', 'Boost', of_params, seed=self._details.seed,
                                       iteration_details=iteration_details,
                                       best_params=best_params, threads=self._details.threads,
                                       verbose=self._verbose, iteration_lc_only=True)
示例#3
0
    def perform(self):
        # TODO: Clean up the older alpha stuff?
        max_depths = np.arange(1, 51, 1)
        params = {
            'DT__criterion': ['gini', 'entropy'],
            'DT__max_depth': max_depths,
            'DT__class_weight': ['balanced', None]
        }  # , 'DT__max_leaf_nodes': max_leaf_nodes}
        complexity_param = {
            'name': 'DT__max_depth',
            'display_name': 'Max Depth',
            'values': max_depths
        }

        best_params = None
        learner = learners.DTLearner(random_state=self._details.seed)
        if self._details.ds_best_params is not None and 'DT' in self._details.ds_best_params:
            best_params = self._details.ds_best_params['DT']
        if best_params is not None:
            learner.set_params(**best_params)
        experiments.perform_experiment(self._details.ds,
                                       self._details.ds_name,
                                       self._details.ds_readable_name,
                                       learner,
                                       'DT',
                                       'DT',
                                       params,
                                       complexity_param=complexity_param,
                                       seed=self._details.seed,
                                       threads=self._details.threads,
                                       best_params=best_params,
                                       verbose=self._verbose)
示例#4
0
    def perform(self):
        # Adapted from https://github.com/JonathanTay/CS-7641-assignment-1/blob/master/DT.py
        max_depths = np.arange(1, 25, 1)
        params = {
            'DT__criterion': ['gini', 'entropy'],
            'DT__max_depth': max_depths,
            'DT__class_weight': ['balanced']
        }
        complexity_param = {
            'name': 'DT__max_depth',
            'display_name': 'Max Depth',
            'values': max_depths
        }

        learner = learners.DTLearner(random_state=self._details.seed)
        experiments.perform_experiment(self._details.ds,
                                       self._details.ds_name,
                                       self._details.ds_readable_name,
                                       learner,
                                       'DT',
                                       'DT',
                                       params,
                                       complexity_param=complexity_param,
                                       seed=self._details.seed,
                                       threads=self._details.threads,
                                       verbose=self._verbose)
示例#5
0
    def perform(self):
        # TODO: Clean up the older alpha stuff?
        max_depths = np.arange(1, 51, 1)
        params = {'DT__criterion': ['gini', 'entropy'], 'DT__max_depth': max_depths,
                  'DT__class_weight': ['balanced', None]}  # , 'DT__max_leaf_nodes': max_leaf_nodes}
        complexity_param = {'name': 'DT__max_depth', 'display_name': 'Max Depth', 'values': max_depths}

        best_params = None
        # Uncomment to select known best params from grid search. This will skip the grid search and just rebuild
        # the various graphs
        #
        # Dataset 1:
        # best_params = {'criterion': 'gini', 'max_depth': 5, 'class_weight': 'balanced'}
        #
        # Dataset 2:
        best_params = {'criterion': 'entropy', 'max_depth': 14, 'class_weight': 'balanced'}

        learner = learners.DTLearner(random_state=self._details.seed)
        if best_params is not None:
            learner.set_params(**best_params)

        experiments.perform_experiment(self._details.ds, self._details.ds_name, self._details.ds_readable_name,
                                       learner, 'DT', 'DT', params,
                                       complexity_param=complexity_param, seed=self._details.seed,
                                       threads=self._details.threads,
                                       best_params=best_params,
                                       verbose=self._verbose)
    def perform(self):
        # Adapted from https://github.com/JonathanTay/CS-7641-assignment-1/blob/master/Boosting.py
        max_depths = np.arange(1, 11, 1)

        # NOTE: Criterion may need to be adjusted here depending on the dataset
        base = learners.DTLearner(criterion='gini', class_weight='balanced', max_depth=7,
                                  random_state=self._details.seed)
        of_base = learners.DTLearner(criterion='gini', class_weight='balanced', random_state=self._details.seed)

        booster = learners.BoostingLearner(algorithm='SAMME', learning_rate=1, base_estimator=base,
                                           random_state=self._details.seed)
        of_booster = learners.BoostingLearner(algorithm='SAMME', learning_rate=1, base_estimator=of_base,
                                              random_state=self._details.seed)

        params = {'Boost__n_estimators': [1, 2, 5, 10, 20, 30, 45, 60, 80, 90, 100],
                  'Boost__learning_rate': [(2**x)/100 for x in range(7)]+[1],
                  'Boost__base_estimator__max_depth': max_depths}
        iteration_details = {
            'params': {'Boost__n_estimators': [1, 2, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]}
        }
        of_params = {'Boost__base_estimator__max_depth': None}
        complexity_param = {'name': 'Boost__learning_rate', 'display_name': 'Learning rate', 'x_scale': 'log',
                            'values': [(2**x)/100 for x in range(7)]+[1]}

        best_params = None
        if self._details.ds_best_params is not None and 'Boost' in self._details.ds_best_params: 
            best_params = self._details.ds_best_params['Boost']

        if best_params is not None:
            booster.set_params(**best_params)
            of_booster.set_params(**best_params)
        experiments.perform_experiment(self._details.ds, self._details.ds_name, self._details.ds_readable_name, booster,
                                       'Boost', 'Boost', params, complexity_param=complexity_param,
                                       iteration_details=iteration_details, best_params=best_params,
                                       seed=self._details.seed, threads=self._details.threads, verbose=self._verbose)

        # TODO: This should turn OFF regularization
        experiments.perform_experiment(self._details.ds, self._details.ds_name, self._details.ds_readable_name,
                                       of_booster, 'Boost_OF', 'Boost', of_params, seed=self._details.seed,
                                       iteration_details=iteration_details,
                                       best_params=best_params, threads=self._details.threads,
                                       verbose=self._verbose, iteration_lc_only=True)
示例#7
0
    def perform(self):
        # TODO: Clean up the older alpha stuff?
        max_depths = np.arange(1, 51, 1)
        params = {
            "DT__criterion": ["gini", "entropy"],
            "DT__max_depth": max_depths,
            "DT__class_weight": ["balanced", None]
        }  # , "DT__max_leaf_nodes": max_leaf_nodes}
        complexity_param = {
            "name": "DT__max_depth",
            "display_name": "Max Depth",
            "values": max_depths
        }

        # max_leaf_nodes = np.arange(10, 200, 10)
        # params = {"DT__criterion": ["gini", "entropy"],
        #           "DT__class_weight": ["balanced", None], "DT__max_leaf_nodes": max_leaf_nodes}
        # complexity_param = {
        #     "name": "DT__max_leaf_nodes", "display_name": "Max Leaf Nodes", "values": max_leaf_nodes}

        best_params = None
        # Uncomment to select known best params from grid search. This will skip the grid search and just rebuild
        # the various graphs
        #
        # Dataset 1:
        # Seed: 2702306879, 3882803657
        # best_params = {'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 11}
        #
        # Dataset 2:
        # best_params = {"criterion": "entropy", "max_depth": 4, "class_weight": "balanced"}

        learner = learners.DTLearner(random_state=self._details.seed)
        if best_params is not None:
            learner.set_params(**best_params)
            self.log(
                "Best parameters are provided, GridSearchCV will is skipped")
        else:
            self.log(
                "Best parameters are not provided, GridSearchCV is scheduled")

        experiments.perform_experiment(self._details.ds,
                                       self._details.ds_name,
                                       self._details.ds_readable_name,
                                       learner,
                                       "DT",
                                       "DT",
                                       params,
                                       complexity_param=complexity_param,
                                       seed=self._details.seed,
                                       threads=self._details.threads,
                                       best_params=best_params,
                                       verbose=self._verbose)
示例#8
0
    def perform(self):
        # TODO: Clean up the older alpha stuff?
        max_depths = np.arange(1, 50, 1)

        params = None
        complexity_param = None
        if self._details.ds_name == "poisonous_mushrooms":
            params = {
                "DT__criterion": ["gini"],
                "DT__max_depth": max_depths,
            }  # , 'DT__max_leaf_nodes': max_leaf_nodes}
            complexity_param = {
                "name": "DT__max_depth",
                "display_name": "Max Depth",
                "values": max_depths,
            }
        elif self._details.ds_name == "spam":
            params = {
                "DT__criterion": ["gini"],
                "DT__max_depth": max_depths,
            }  # , 'DT__max_leaf_nodes': max_leaf_nodes}
            complexity_param = {
                "name": "DT__max_depth",
                "display_name": "Max Depth",
                "values": max_depths,
            }

        best_params = None
        # if self._details.ds_name == "poisonous_mushrooms":
        #     best_params = {"criterion": "gini", "max_depth": 7}
        # elif self._details.ds_name == "spam":
        #     best_params = {"criterion": "gini", "max_depth": 50}

        learner = learners.DTLearner(random_state=self._details.seed)
        if best_params is not None:
            learner.set_params(**best_params)

        experiments.perform_experiment(
            self._details.ds,
            self._details.ds_name,
            self._details.ds_readable_name,
            learner,
            "DT",
            "DT",
            params,
            complexity_param=complexity_param,
            seed=self._details.seed,
            threads=self._details.threads,
            best_params=best_params,
            verbose=self._verbose,
        )
示例#9
0
    def perform(self):
        # Adapted from https://github.com/JonathanTay/CS-7641-assignment-1/blob/master/Boosting.py
        alphas = [x / 1000 for x in range(-10, 40, 4)]
        crit = "entropy"
        lr = [(2**x) / 100 for x in range(7)] + [1]
        n_estimators = [1, 2, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
        n_estimators_iter = [
            1, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 140, 160,
            200, 240, 300
        ]
        # /output-ew2
        if 'enhancer-b' == self._details.ds_name and self._details.bparams:
            alphas = [0.05]
            crit = "gini"
            lr = [0.16] + [10**(x / 8) for x in range(-32, 16)]
            lr = [0.32]
            n_estimators = n_estimators_iter
            n_estimators = [5]
        if 'wine-qual' == self._details.ds_name and self._details.bparams:
            alphas = [0.014]
            crit = "gini"
            lr = [0.16]  # use old lr range here
            n_estimators = [20]

        # NOTE: Criterion may need to be adjusted here depending on the dataset
        base = learners.DTLearner(criterion=crit,
                                  class_weight='balanced',
                                  random_state=self._details.seed)
        of_base = learners.DTLearner(criterion=crit,
                                     class_weight='balanced',
                                     random_state=self._details.seed)

        booster = learners.BoostingLearner(algorithm='SAMME.R',
                                           learning_rate=1,
                                           base_estimator=base,
                                           random_state=self._details.seed)
        of_booster = learners.BoostingLearner(algorithm='SAMME.R',
                                              learning_rate=1,
                                              base_estimator=of_base,
                                              random_state=self._details.seed)

        params = {
            'Boost__n_estimators': n_estimators,
            'Boost__learning_rate': lr,
            'Boost__base_estimator__alpha': alphas,
            'Boost__random_state': [self._details.seed],
            'Boost__base_estimator__random_state': [self._details.seed]
        }
        iteration_details = {
            'params': {
                'Boost__n_estimators': n_estimators_iter
            }
        }
        of_params = {'Boost__base_estimator__alpha': -1}
        complexity_param = {
            'name': 'Boost__learning_rate',
            'display_name': 'Learning rate',
            'x_scale': 'log',
            'values': [10**(x / 8) for x in range(-32, 16)]
        }

        best_params = None
        # Uncomment to select known best params from grid search. This will skip the grid search and just rebuild
        # the various graphs
        #
        # Dataset 1:
        # best_params = {'base_estimator__max_depth': 8, 'learning_rate': 0.32, 'n_estimators': 90}
        #
        # Dataset 2:
        # best_params = {'base_estimator__max_depth': 6, 'learning_rate': 0.16, 'n_estimators': 20}

        if best_params is not None:
            booster.set_params(**best_params)
            of_booster.set_params(**best_params)

        experiments.perform_experiment(self._details.ds,
                                       self._details.ds_name,
                                       self._details.ds_readable_name,
                                       booster,
                                       'Boost',
                                       'Boost',
                                       params,
                                       complexity_param=complexity_param,
                                       iteration_details=iteration_details,
                                       best_params=best_params,
                                       seed=self._details.seed,
                                       threads=self._details.threads,
                                       verbose=self._verbose)
示例#10
0
    def perform(self):
        # Adapted from https://github.com/JonathanTay/CS-7641-assignment-1/blob/master/Boosting.py
        max_depths = np.arange(1, 15, 1)

        # NOTE: Criterion may need to be adjusted here depending on the dataset
        base = learners.DTLearner(
            criterion="entropy",
            class_weight="balanced",
            max_depth=5,
            random_state=self._details.seed,
        )
        of_base = learners.DTLearner(
            criterion="entropy",
            class_weight="balanced",
            random_state=self._details.seed,
        )

        booster = learners.BoostingLearner(
            algorithm="SAMME",
            learning_rate=1,
            base_estimator=base,
            random_state=self._details.seed,
        )
        of_booster = learners.BoostingLearner(
            algorithm="SAMME",
            learning_rate=1,
            base_estimator=of_base,
            random_state=self._details.seed,
        )

        params = {
            "Boost__n_estimators": [1, 2, 5, 10, 20, 30, 45, 60, 80, 90, 100],
            "Boost__learning_rate": [(2**x) / 100 for x in range(7)] + [1],
            "Boost__base_estimator__max_depth": max_depths,
        }
        iteration_details = {
            "params": {
                "Boost__n_estimators": [
                    1,
                    2,
                    5,
                    10,
                    20,
                    30,
                    40,
                    50,
                    60,
                    70,
                    80,
                    90,
                    100,
                ]
            }
        }
        of_params = {"Boost__base_estimator__max_depth": None}
        complexity_param = {
            "name": "Boost__learning_rate",
            "display_name": "Learning rate",
            "x_scale": "log",
            "values": [(2**x) / 100 for x in range(7)] + [1],
        }

        best_params = None
        # # Uncomment to select known best params from grid search. This will skip the grid search and just rebuild
        # # the various graphs
        # #
        # if self._details.ds_name == "spam":
        #     best_params = {
        #         "base_estimator__max_depth": 10,
        #         "learning_rate": 0.32,
        #         "n_estimators": 30,
        #     }
        # elif self._details.ds_name == "poisonous_mushrooms":
        #     best_params = {
        #         "base_estimator__max_depth": 10,
        #         "learning_rate": 0.08,
        #         "n_estimators": 60,
        #     }
        # # Dataset 1:
        # # best_params = {'base_estimator__max_depth': 8, 'learning_rate': 0.32, 'n_estimators': 90}
        # #
        # # Dataset 2:
        # # best_params = {'base_estimator__max_depth': 6, 'learning_rate': 0.16, 'n_estimators': 20}

        # if best_params is not None:
        #     booster.set_params(**best_params)
        #     of_booster.set_params(**best_params)

        experiments.perform_experiment(
            self._details.ds,
            self._details.ds_name,
            self._details.ds_readable_name,
            booster,
            "Boost",
            "Boost",
            params,
            complexity_param=complexity_param,
            iteration_details=iteration_details,
            best_params=best_params,
            seed=self._details.seed,
            threads=self._details.threads,
            verbose=self._verbose,
        )
示例#11
0
    def perform(self):
        # TODO: Clean up the older alpha stuff?
        max_depths = np.arange(1, 21, 1)
        #alphas = [-1,-1e-3,-(1e-3)*10**-0.5, -1e-2, -(1e-2)*10**-0.5,-1e-1,-(1e-1)*10**-0.5, 0, (1e-1)*10**-0.5,1e-1,(1e-2)*10**-0.5,1e-2,(1e-3)*10**-0.5,1e-3]
        alphas = [x / 1000 for x in range(-40, 40, 4)]

        #params = {'DT__criterion': ['gini', 'entropy'],
        #          'DT__max_depth': max_depths,
        #          'alpha' : alphas,
        #          'DT__class_weight': ['balanced', None]
        #}  # , 'DT__max_leaf_nodes': max_leaf_nodes}
        params = {
            'DT__criterion': ['gini', 'entropy'],
            'DT__alpha': alphas,
            'DT__class_weight': ['balanced'],
            'DT__random_state': [self._details.seed]
        }

        complexity_param = {
            'name': 'DT__alpha',
            'display_name': 'alpha',
            'values': alphas
        }

        best_params = None
        # Uncomment to select known best params from grid search. This will skip the grid search and just rebuild
        # the various graphs
        #
        # Dataset 1:
        params_wine = {
            'DT__criterion': 'gini',
            'DT__alpha': 0.008,
            'DT__class_weight': 'balanced'
        }
        if self._details.ds_name == "wine-qual" and self._details.bparams:
            for k in params.keys():
                if k in params_wine.keys():
                    params[k] = [params_wine.get(k)]

        #
        # Dataset 2:
        params_enhancer = {
            'DT__criterion': 'gini',
            'DT__alpha': 0.008,
            'DT__class_weight': 'balanced'
        }
        if self._details.ds_name == "enhancer-b" and self._details.bparams:
            for k in params.keys():
                if k in params_enhancer.keys():
                    params[k] = [params_enhancer.get(k)]

        learner = learners.DTLearner(random_state=self._details.seed)
        if best_params is not None:
            learner.set_params(**best_params)

        best_params = experiments.perform_experiment(
            self._details.ds,
            self._details.ds_name,
            self._details.ds_readable_name,
            learner,
            'DT',
            'DT',
            params,
            complexity_param=complexity_param,
            seed=self._details.seed,
            threads=self._details.threads,
            best_params=best_params,
            verbose=self._verbose,
            apply_pruning=True)