示例#1
0
    def perform(self):
        # Adapted from https://github.com/JonathanTay/CS-7641-assignment-1/blob/master/ANN.py
        # Search for good alphas
        alphas = [10 ** -x for x in np.arange(-1, 5.01, 1 / 2)]

        # TODO: Allow for tuning of hidden layers based on dataset provided
        d = self._details.ds.features.shape[1]
        hiddens = [(h,) * l for l in [1, 2, 3] for h in [d, d // 2, d * 2]]

        params = {'MLP__activation': ['relu', 'tanh'], 'MLP__alpha': alphas,
                  'MLP__hidden_layer_sizes': hiddens}

        timing_params = {'MLP__early_stopping': True}
        iteration_params = {'MLP__max_iter':
                            [2 ** x for x in range(11)] + [2000, 2200, 2400, 2600, 2800,
                                                           3000]}
        complexity_param = {'name': 'MLP__alpha', 'display_name': 'Alpha', 'x_scale': 'log',
                            'values': alphas}

        learner = learners.ANNLearner(tol=1e-8, verbose=self._verbose)
        best_params = experiments.perform_experiment(
            self._details.ds, self._details.ds_name, self._details.ds_readable_name, learner, 'ANN', 'MLP',
            params,
            complexity_param=complexity_param,
            seed=self._details.seed,
            timing_params=timing_params,
            iteration_pipe_params=timing_params, iteration_params=iteration_params,
            threads=self._details.threads, verbose=self._verbose)

        of_params = best_params.copy()
        of_params['MLP__alpha'] = 0
        learner = learners.ANNLearner()
        experiments.perform_experiment(self._details.ds, self._details.ds_name, self._details.ds_readable_name, learner,
                                       'ANN_OF', 'MLP', of_params, seed=self._details.seed, timing_params=timing_params,
                                       iteration_pipe_params=timing_params, iteration_params=iteration_params,
                                       threads=self._details.threads, verbose=self._verbose,
                                       iteration_lc_only=True)
    def perform(self):
        # Adapted from https://github.com/JonathanTay/CS-7641-assignment-1/blob/master/ANN.py
        # Search for good alphas
        alphas = [10**-x for x in np.arange(-1, 9.01, 0.5)]

        # TODO: Allow for better tuning of hidden layers based on dataset provided
        d = self._details.ds.features.shape[1]
        hiddens = [(h, ) * l for l in [1, 2, 3] for h in [d, d // 2, d * 2]]
        learning_rates = sorted([(2**x) / 1000 for x in range(8)] + [0.000001])

        params = {
            'MLP__activation': ['relu', 'logistic'],
            'MLP__alpha': alphas,
            'MLP__learning_rate_init': learning_rates,
            'MLP__hidden_layer_sizes': hiddens
        }

        timing_params = {'MLP__early_stopping': False}
        iteration_details = {
            'x_scale': 'log',
            'params': {
                'MLP__max_iter': [2**x for x in range(12)] +
                [2100, 2200, 2300, 2400, 2500, 2600, 2700, 2800, 2900, 3000]
            },
            'pipe_params': timing_params
        }
        complexity_param = {
            'name': 'MLP__alpha',
            'display_name': 'Alpha',
            'x_scale': 'log',
            'values': alphas
        }

        best_params = None
        if self._details.ds_best_params is not None and 'ANN' in self._details.ds_best_params:
            best_params = self._details.ds_best_params['ANN']
        learner = learners.ANNLearner(max_iter=3000,
                                      early_stopping=True,
                                      random_state=self._details.seed,
                                      verbose=self._verbose)
        if best_params is not None:
            learner.set_params(**best_params)

        cv_best_params = experiments.perform_experiment(
            self._details.ds,
            self._details.ds_name,
            self._details.ds_readable_name,
            learner,
            'ANN',
            'MLP',
            params,
            complexity_param=complexity_param,
            seed=self._details.seed,
            timing_params=timing_params,
            iteration_details=iteration_details,
            best_params=best_params,
            threads=self._details.threads,
            verbose=self._verbose)

        # TODO: This should turn OFF regularization
        of_params = cv_best_params.copy()
        of_params['MLP__alpha'] = 0
        if best_params is not None:
            learner.set_params(**best_params)
        learner = learners.ANNLearner(max_iter=3000,
                                      early_stopping=True,
                                      random_state=self._details.seed,
                                      verbose=self._verbose)
        experiments.perform_experiment(self._details.ds,
                                       self._details.ds_name,
                                       self._details.ds_readable_name,
                                       learner,
                                       'ANN_OF',
                                       'MLP',
                                       of_params,
                                       seed=self._details.seed,
                                       timing_params=timing_params,
                                       iteration_details=iteration_details,
                                       best_params=best_params,
                                       threads=self._details.threads,
                                       verbose=self._verbose,
                                       iteration_lc_only=True)
示例#3
0
    def perform(self):
        # Adapted from https://github.com/JonathanTay/CS-7641-assignment-1/blob/master/ANN.py

        # TODO: Allow for better tuning of hidden layers based on dataset provided
        d = self._details.ds.features.shape[1]

        timing_params = {"MLP__early_stopping": False}
        iteration_details = {
            "x_scale": "log",
            "params": {
                "MLP__max_iter": [2 ** x for x in range(12)]
                + [2100, 2200, 2300, 2400, 2500, 2600, 2700, 2800, 2900, 3000]
            },
            "pipe_params": timing_params,
        }

        params = None
        complexity_param = None
        if self._details.ds_name == "spam":
            # Search for good alphas
            alphas = [10 ** -x for x in np.arange(0, 9.01, 0.5)]
            hiddens = [(h,) * l for l in [1, 2, 3] for h in [d, d // 2, d * 2]]
            learning_rates = [0.064]
            # learning_rates = sorted([(2 ** x) / 1000 for x in range(8)] + [0.000001])
            params = {
                "MLP__activation": ["logistic"],
                "MLP__alpha": alphas,
                "MLP__learning_rate_init": learning_rates,
                "MLP__hidden_layer_sizes": hiddens,
            }
            complexity_param = {
                "name": "MLP__alpha",
                "display_name": "Alpha",
                "x_scale": "log",
                "values": alphas,
            }
        elif self._details.ds_name == "poisonous_mushrooms":
            # Search for good alphas
            alphas = [10 ** -x for x in np.arange(0, 9.01, 0.5)]
            # hiddens = [(16,)]  # [(h,) * l for l in [1, 2] for h in [d, d // 2, d * 2]]
            hiddens = [(h,) * l for l in [1, 2] for h in [d, d // 2, d * 2]]
            learning_rates = [
                0.064
            ]  # sorted([(2**x)/1000 for x in range(8)]+[0.000001])
            params = {
                "MLP__activation": ["logistic"],
                "MLP__alpha": alphas,
                "MLP__learning_rate_init": learning_rates,
                "MLP__hidden_layer_sizes": hiddens,
            }
            complexity_param = {
                "name": "MLP__alpha",
                "display_name": "Alpha",
                "x_scale": "log",
                "values": alphas,
            }
        best_params = None
        # Uncomment to select known best params from grid search. This will skip the grid search and just rebuild
        # the various graphs
        #
        # if self._details.ds_name == 'spam':
        #     best_params = {'activation': 'logistic', 'alpha': 1.00E-06, 'hidden_layer_sizes': (16,), 'learning_rate_init': 0.064}
        # elif self._details.ds_name == 'poisonous_mushrooms':
        #     best_params = {'activation': 'logistic', 'alpha': 0.003162278, 'hidden_layer_sizes': (16,),
        #                    'learning_rate_init': 0.064}

        learner = learners.ANNLearner(
            max_iter=3000,
            early_stopping=True,
            random_state=self._details.seed,
            verbose=self._verbose,
        )
        if best_params is not None:
            learner.set_params(**best_params)
        cv_best_params = experiments.perform_experiment(
            self._details.ds,
            self._details.ds_name,
            self._details.ds_readable_name,
            learner,
            "ANN",
            "MLP",
            params,
            complexity_param=complexity_param,
            seed=self._details.seed,
            iteration_details=iteration_details,
            timing_params=timing_params,
            best_params=best_params,
            threads=self._details.threads,
            verbose=self._verbose,
        )

        # TODO: This should turn OFF regularization
        of_params = cv_best_params.copy()
        of_params["MLP__alpha"] = 0
        if best_params is not None:
            learner.set_params(**best_params)
        learner = learners.ANNLearner(
            max_iter=3000,
            early_stopping=True,
            random_state=self._details.seed,
            verbose=self._verbose,
        )
        experiments.perform_experiment(
            self._details.ds,
            self._details.ds_name,
            self._details.ds_readable_name,
            learner,
            "ANN_OF",
            "MLP",
            of_params,
            seed=self._details.seed,
            timing_params=timing_params,
            iteration_details=iteration_details,
            best_params=best_params,
            threads=self._details.threads,
            verbose=self._verbose,
            iteration_lc_only=True,
        )
示例#4
0
    def perform(self):
        # Adapted from https://github.com/JonathanTay/CS-7641-assignment-1/blob/master/ANN.py
        # Search for good alphas
        alphas = [10**-x for x in np.arange(-3, 9.01, 0.5)]
        alphas = [0] + alphas
        # TODO: Allow for better tuning of hidden layers based on dataset provided
        d = self._details.ds.features.shape[1]
        hiddens = [(h, ) * l for l in [1, 2, 3] for h in [d, d // 2, d * 2]]
        learning_rates = sorted([(2**x) / 1000 for x in range(8)] + [0.000001])

        params = {
            'MLP__activation': ['relu', 'logistic', 'tanh'],
            'MLP__alpha': alphas,
            'MLP__learning_rate_init': learning_rates,
            'MLP__hidden_layer_sizes': hiddens,
            'MLP__random_state': [self._details.seed],
            'MLP__beta_1': [0.5, 0.9, 0.99, 0.999],
            'MLP__beta_2': [0.5, 0.9, 0.99, 0.999]
        }

        timing_params = {'MLP__early_stopping': False}
        iteration_details = {
            'x_scale': 'log',
            'params': {
                'MLP__max_iter':
                [2**x for x in range(12)] + [2100, 2400, 2700, 3000]
            },
            'pipe_params': timing_params
        }
        complexity_param = {
            'name': 'MLP__alpha',
            'display_name': 'Alpha',
            'x_scale': 'log',
            'values': alphas
        }

        best_params = None
        # Uncomment to select known best params from grid search. This will skip the grid search and just rebuild
        # the various graphs
        #
        # Dataset 1:
        # best_params = {'activation': 'relu', 'alpha': 1.0, 'hidden_layer_sizes': (36, 36),
        alpha = 0
        params_wine = {
            'MLP__activation': 'tanh',
            'MLP__alpha': 0.1,
            'MLP__learning_rate_init': 0.064,
            'MLP__hidden_layer_sizes': (12, 12),
            'MLP__beta_1': 0.99,
            'MLP__beta_2': 0.99
        }
        if self._details.ds_name == "wine-qual" and self._details.bparams:
            alpha = 0.1
            for k in params.keys():
                if k in params_wine.keys():
                    params[k] = [params_wine.get(k)]
        params_enhancer = {
            'MLP__activation': 'logistic',
            'MLP__alpha': 0.001,
            'MLP__learning_rate_init': 0.128,
            'MLP__hidden_layer_sizes': (38, 38),
            'MLP__beta_1': 0.5,
            'MLP__beta_2': 0.999
        }
        if self._details.ds_name == "enhancer-b" and self._details.bparams:
            alpha = 0.001
            for k in params.keys():
                if k in params_enhancer.keys():
                    params[k] = [params_enhancer.get(k)]
        #if self._details.ds_name == "wine-qual":
        #    best_params = params_wine
        #                'learning_rate_init': 0.016}
        # Dataset 2:
        # best_params = {'activation': 'relu', 'alpha': 1e-05, 'hidden_layer_sizes': (16, 16),
        #                'learning_rate_init': 0.064}

        learner = learners.ANNLearner(max_iter=3000,
                                      early_stopping=True,
                                      random_state=self._details.seed,
                                      verbose=self._verbose)
        if best_params is not None:
            learner.set_params(**best_params)
        cv_best_params = experiments.perform_experiment(
            self._details.ds,
            self._details.ds_name,
            self._details.ds_readable_name,
            learner,
            'ANN',
            'MLP',
            params,
            complexity_param=complexity_param,
            seed=self._details.seed,
            timing_params=timing_params,
            iteration_details=iteration_details,
            best_params=best_params,
            threads=self._details.threads,
            verbose=self._verbose)

        # TODO: This should turn OFF regularization
        of_params = cv_best_params.copy()
        of_params['MLP__alpha'] = alpha
        if best_params is not None:
            learner.set_params(**best_params)
        learner = learners.ANNLearner(max_iter=3000,
                                      early_stopping=True,
                                      random_state=self._details.seed,
                                      verbose=self._verbose)
        experiments.perform_experiment(self._details.ds,
                                       self._details.ds_name,
                                       self._details.ds_readable_name,
                                       learner,
                                       'ANN_',
                                       'MLP',
                                       of_params,
                                       seed=self._details.seed,
                                       timing_params=timing_params,
                                       iteration_details=iteration_details,
                                       best_params=best_params,
                                       threads=self._details.threads,
                                       verbose=self._verbose,
                                       iteration_lc_only=True)
示例#5
0
    def perform(self):
        # Adapted from https://github.com/JonathanTay/CS-7641-assignment-1/blob/master/ANN.py
        # Search for good alphas
        # alphas = [10 ** -x for x in np.arange(-1, 9.01, 0.5)]

        # YS trying a larger intervals

        # alphas = [10 ** -x for x in np.arange(-1, 5.01, 1)]
        alphas = [10**-x for x in np.arange(1, 3.01, 1)]

        # TODO: Allow for better tuning of hidden layers based on dataset provided
        d = self._details.ds.features.shape[1]
        hiddens = [(h, ) * l for l in [1, 2, 3] for h in [d, d // 2, d * 2]]
        # learning_rates = sorted([(2**x)/1000 for x in range(8)]+[0.000001])

        # YS trying a larger intervals
        learning_rates = sorted([(4**x) / 1000 for x in range(3)])

        # YS trying a larger intervals
        # params = {'MLP__activation': ['relu', 'logistic'], 'MLP__alpha': alphas,
        params = {
            'MLP__activation': ['relu', 'logistic'],
            'MLP__alpha': alphas,
            'MLP__learning_rate_init': learning_rates,
            'MLP__hidden_layer_sizes': hiddens
        }

        # YS changing early stopping to True
        # timing_params = {'MLP__early_stopping': False}
        timing_params = {'MLP__early_stopping': True}
        iteration_details = {
            'x_scale': 'log',

            # 'params': {'MLP__max_iter':
            #                 [2 ** x for x in range(12)] + [2100, 2200, 2300, 2400, 2500, 2600, 2700, 2800, 2900,
            #                                                3000]},
            # YS cutting the max_iter
            'params': {
                'MLP__max_iter': [2**x for x in range(11)]
            },
            'pipe_params': timing_params
        }
        complexity_param = {
            'name': 'MLP__alpha',
            'display_name': 'Alpha',
            'x_scale': 'log',
            'values': alphas
        }

        best_params = None
        # Uncomment to select known best params from grid search. This will skip the grid search and just rebuild
        # the various graphs
        #
        # Dataset 1:
        # best_params = {'activation': 'relu', 'alpha': 1.0, 'hidden_layer_sizes': (36, 36),
        #                'learning_rate_init': 0.016}
        # Dataset 2:
        # best_params = {'activation': 'relu', 'alpha': 1e-05, 'hidden_layer_sizes': (16, 16),
        #                'learning_rate_init': 0.064}

        # learner = learners.ANNLearner(max_iter=3000, early_stopping=True, random_state=self._details.seed,
        #                               verbose=self._verbose)
        # YS cutting the max_iter

        learner = learners.ANNLearner(max_iter=2000,
                                      early_stopping=True,
                                      random_state=self._details.seed,
                                      verbose=self._verbose)
        if best_params is not None:
            learner.set_params(**best_params)
        cv_best_params = experiments.perform_experiment(
            self._details.ds,
            self._details.ds_name,
            self._details.ds_readable_name,
            learner,
            'ANN',
            'MLP',
            params,
            complexity_param=complexity_param,
            seed=self._details.seed,
            timing_params=timing_params,
            iteration_details=iteration_details,
            best_params=best_params,
            threads=self._details.threads,
            verbose=self._verbose)

        # TODO: This should turn OFF regularization
        of_params = cv_best_params.copy()
        of_params['MLP__alpha'] = 0
        if best_params is not None:
            learner.set_params(**best_params)

        # learner = learners.ANNLearner(max_iter=3000, early_stopping=True, random_state=self._details.seed,
        #                               verbose=self._verbose)
# YS cutting the max_iter
        learner = learners.ANNLearner(max_iter=2000,
                                      early_stopping=True,
                                      random_state=self._details.seed,
                                      verbose=self._verbose)
        experiments.perform_experiment(self._details.ds,
                                       self._details.ds_name,
                                       self._details.ds_readable_name,
                                       learner,
                                       'ANN_OF',
                                       'MLP',
                                       of_params,
                                       seed=self._details.seed,
                                       timing_params=timing_params,
                                       iteration_details=iteration_details,
                                       best_params=best_params,
                                       threads=self._details.threads,
                                       verbose=self._verbose,
                                       iteration_lc_only=True)
示例#6
0
    def perform(self):
        # Adapted from https://github.com/JonathanTay/CS-7641-assignment-1/blob/master/ANN.py
        # Search for good alphas
        alphas = [3, 1, 0.5, 0.25, 0.10, 0.005, 0.001]

        # TODO: Allow for better tuning of hidden layers based on dataset provided
        d = self._details.ds.features.shape[1]
        hiddens = [(2, 2), (4, 4), (8, 8), (16, 16), (32, 32), (64, 64),
                   (128, 128)]
        learning_rates = [0.000001, 0.0001, 0.001, 0.01, 0.1, 0.5]

        params = {
            'MLP__activation': ['relu', 'logistic'],
            'MLP__alpha': alphas,
            'MLP__learning_rate_init': learning_rates,
            'MLP__hidden_layer_sizes': hiddens
        }

        timing_params = {'MLP__early_stopping': False}
        iteration_details = {
            'x_scale': 'log',
            'params': {
                'MLP__max_iter': [1000, 2500, 5000, 10000, 30000]
            },
            'pipe_params': timing_params
        }
        complexity_param = {
            'name': 'MLP__alpha',
            'display_name': 'Alpha',
            'x_scale': 'log',
            'values': alphas
        }

        best_params = None
        # Uncomment to select known best params from grid search. This will skip the grid search and just rebuild
        # the various graphs
        #
        # Dataset 1:
        # best_params = {'activation': 'relu', 'alpha': 1.0, 'hidden_layer_sizes': (36, 36),
        #                'learning_rate_init': 0.016}
        # Dataset 2:
        # best_params = {'activation': 'relu', 'alpha': 1e-05, 'hidden_layer_sizes': (16, 16),
        #                'learning_rate_init': 0.064}

        learner = learners.ANNLearner(max_iter=3000,
                                      early_stopping=True,
                                      random_state=self._details.seed,
                                      verbose=self._verbose)
        if best_params is not None:
            learner.set_params(**best_params)
        cv_best_params = experiments.perform_experiment(
            self._details.ds,
            self._details.ds_name,
            self._details.ds_readable_name,
            learner,
            'ANN',
            'MLP',
            params,
            complexity_param=complexity_param,
            seed=self._details.seed,
            timing_params=timing_params,
            iteration_details=iteration_details,
            best_params=best_params,
            threads=self._details.threads,
            verbose=self._verbose)

        # TODO: This should turn OFF regularization
        of_params = cv_best_params.copy()
        of_params['MLP__alpha'] = 0
        if best_params is not None:
            learner.set_params(**best_params)
        learner = learners.ANNLearner(max_iter=3000,
                                      early_stopping=True,
                                      random_state=self._details.seed,
                                      verbose=self._verbose)
        experiments.perform_experiment(self._details.ds,
                                       self._details.ds_name,
                                       self._details.ds_readable_name,
                                       learner,
                                       'ANN_OF',
                                       'MLP',
                                       of_params,
                                       seed=self._details.seed,
                                       timing_params=timing_params,
                                       iteration_details=iteration_details,
                                       best_params=best_params,
                                       threads=self._details.threads,
                                       verbose=self._verbose,
                                       iteration_lc_only=True)
示例#7
0
    def perform(self):
        # Adapted from https://github.com/JonathanTay/CS-7641-assignment-1/blob/master/ANN.py
        # Search for good alphas
        """
        alphas:
        - constrains/penalizes our max weights: high of 10 and diminishing drastically
        - recall: the larger the weight for an attr, the more it dominates, can lead to OVERFITTING
        """
        alphas = [10 ** -x for x in np.arange(-1, 9.01, 0.5)]

        # TODO: Allow for better tuning of hidden layers based on dataset provided
        d = self._details.ds.features.shape[1]
        """
        hiddens
        - based on the number of features (Xs, or attrs) in our data set
        - we test 1-2-3 layers using a multiple or division of # Xs.
        - ex: 23 attributes: test 11, 23, 46 hidden layer sizes
        """
        hiddens = [(h,) * l for l in [1, 2, 3] for h in [d, d // 2, d * 2]]
        """
        https://machinelearningmastery.com/understand-the-dynamics-of-learning-rate-on-deep-learning-neural-networks/
        learning_rates:
        - hyper-parameter that controls how much to change the model in response to the estimated error 
                each time the model weights are updated. 
        - a value too small may result in a long training process that could get stuck, 
        - a value too large may result in learning a sub-optimal set of weights too fast or an unstable training process
        - may be the most important hyper-parameter when configuring ANN.
        - small positive value, often in the range between 0.0 and 1.0.
        """
        learning_rates = sorted([(2**x)/1000 for x in range(8)]+[0.000001])

        """
        logistic: the logistic sigmoid function, returns f(x) = 1 / (1 + exp(-x)).
        relu: the rectified linear unit function, returns f(x) = max(0, x)
        """
        params = {'MLP__activation': ['relu', 'logistic'],
                  'MLP__alpha': alphas,
                  'MLP__learning_rate_init': learning_rates,
                  'MLP__hidden_layer_sizes': hiddens}

        timing_params = {'MLP__early_stopping': False}

        iteration_details = {'x_scale': 'log',
                             'params': {'MLP__max_iter': [2 ** x for x in range(12)] +
                                                         [2100, 2200, 2300, 2400, 2500, 2600, 2700, 2800, 2900, 3000]},
                             'pipe_params': timing_params}

        complexity_param = {'name': 'MLP__alpha',
                            'display_name': 'Alpha',
                            'x_scale': 'log',
                            'values': alphas}

        best_params = None
        # Uncomment to select known best params from grid search. This will skip the grid search and just rebuild
        # the various graphs
        #
        # Dataset 1:
        # best_params = {'activation': 'relu', 'alpha': 1.0, 'hidden_layer_sizes': (36, 36),
        #                'learning_rate_init': 0.016}
        # Dataset 2:
        # best_params = {'activation': 'relu', 'alpha': 1e-05, 'hidden_layer_sizes': (16, 16),
        #                'learning_rate_init': 0.064}

        learner = learners.ANNLearner(max_iter=3000,
                                      early_stopping=True,
                                      random_state=self._details.seed,
                                      verbose=self._verbose)

        if best_params is not None:
            learner.set_params(**best_params)
        cv_best_params = experiments.perform_experiment(self._details.ds,
                                                        self._details.ds_name,
                                                        self._details.ds_readable_name,
                                                        learner,
                                                        'ANN',
                                                        'MLP',
                                                        params,
                                                        complexity_param=complexity_param,
                                                        seed=self._details.seed,
                                                        timing_params=timing_params,
                                                        iteration_details=iteration_details,
                                                        best_params=best_params,
                                                        threads=self._details.threads,
                                                        verbose=self._verbose)

        # TODO: This should turn OFF regularization
        of_params = cv_best_params.copy()
        of_params['MLP__alpha'] = 0
        if best_params is not None:
            learner.set_params(**best_params)
        learner = learners.ANNLearner(max_iter=3000,
                                      early_stopping=True,
                                      random_state=self._details.seed,
                                      verbose=self._verbose)

        experiments.perform_experiment(self._details.ds,
                                       self._details.ds_name,
                                       self._details.ds_readable_name,
                                       learner,
                                       'ANN_OF',
                                       'MLP',
                                       of_params,
                                       seed=self._details.seed,
                                       timing_params=timing_params,
                                       iteration_details=iteration_details,
                                       best_params=best_params,
                                       threads=self._details.threads,
                                       verbose=self._verbose,
                                       iteration_lc_only=True)