Exemplo n.º 1
0
    def perform(self):
        # Adapted from https://github.com/JonathanTay/CS-7641-assignment-1/blob/master/SVM.py
        alphas = [10**-x for x in np.arange(1, 9.01, 1 / 2)]

        samples = self._details.ds.features.shape[0]

        gamma_fracs = np.arange(0.2, 2.1, 0.2)

        params = {
            'SVM__alpha': alphas,
            'SVM__max_iter': [int((1e6 / samples) / .8) + 1],
            'SVM__gamma_frac': gamma_fracs
        }
        complexity_param = {
            'name': 'SVM__gamma_frac',
            'display_name': 'Gamma Fraction',
            'values': gamma_fracs
        }

        iteration_params = {'SVM__max_iter': [2**x for x in range(12)]}

        learner = learners.SVMLearner(tol=None)
        best_params = experiments.perform_experiment(
            self._details.ds,
            self._details.ds_name,
            self._details.ds_readable_name,
            learner,
            'SVM_RBF',
            'SVM',
            params,
            complexity_param=complexity_param,
            seed=self._details.seed,
            iteration_params=iteration_params,
            threads=self._details.threads,
            verbose=self._verbose)

        of_params = best_params.copy()
        of_params['SVM__alpha'] = 1e-16
        learner = learners.SVMLearner(n_jobs=self._details.threads)
        experiments.perform_experiment(self._details.ds,
                                       self._details.ds_name,
                                       self._details.ds_readable_name,
                                       learner,
                                       'SVM_RBF_OF',
                                       'SVM',
                                       of_params,
                                       seed=self._details.seed,
                                       iteration_params=iteration_params,
                                       threads=self._details.threads,
                                       verbose=self._verbose,
                                       iteration_lc_only=True)
Exemplo n.º 2
0
    def perform(self):
        # Adapted from https://github.com/JonathanTay/CS-7641-assignment-1/blob/master/SVM.py
        samples = self._details.ds.features.shape[0]
        features = self._details.ds.features.shape[1]

        # original
        # gamma_fracs = np.arange(1/features, 2.1, 0.2)
        # tols = np.arange(1e-8, 1e-1, 0.01)
        # C_values = np.arange(0.001, 2.5, 0.25)
        # iters = [-1, int((1e6/samples)/.8)+1]

        # YS changed
        gamma_fracs = np.arange(1 / features, 1.1, 0.25)
        tols = np.arange(1e-3, 1e-1, 0.03)
        C_values = np.arange(0.1, 2.5, 0.5)
        iters = [-1, int((1e6 / samples) / .8) + 1]

        best_params_linear = None
        best_params_rbf = None
        # Uncomment to select known best params from grid search. This will skip the grid search and just rebuild
        # the various graphs
        #
        # Dataset 1:
        # best_params_linear = {'C': 0.5, 'class_weight': 'balanced', 'loss': 'squared_hinge',
        #                       'max_iter': 1478, 'tol': 0.06000001}
        # best_params_rbf = {'C': 2.0, 'class_weight': 'balanced', 'decision_function_shape': 'ovo',
        #                    'gamma': 0.05555555555555555, 'max_iter': -1, 'tol': 1e-08}
        # Dataset 2:
        # best_params_linear = {'C': 1.0, 'class_weight': 'balanced', 'loss': 'hinge', 'dual': True,
        #                       'max_iter': 70, 'tol': 0.08000001}
        # best_params_rbf = {'C': 1.5, 'class_weight': 'balanced', 'decision_function_shape': 'ovo',
        #                    'gamma': 0.125, 'max_iter': -1, 'tol': 0.07000001}

        # Linear SVM
        params = {
            'SVM__max_iter': iters,
            'SVM__tol': tols,
            'SVM__class_weight': ['balanced'],
            'SVM__C': C_values
        }

        # original
        # complexity_param = {'name': 'SVM__C', 'display_name': 'Penalty', 'values': np.arange(0.001, 2.5, 0.1)}

        # YS changed
        complexity_param = {
            'name': 'SVM__C',
            'display_name': 'Penalty',
            'values': np.arange(0.1, 2.5, 0.5)
        }

        iteration_details = {
            'x_scale': 'log',
            # original
            # 'params': {'SVM__max_iter': [2**x for x in range(12)]},
            # YS changed from range(12)
            'params': {
                'SVM__max_iter': [4**x for x in range(6)]
            },
        }

        # NOTE: If this is causing issues, try the RBFSVMLearner. Passing use_linear=True will use a linear kernel
        #       and passing use_linear=False will use the RBF kernel. This method is slower but if libsvm is not
        #       available it may be your only option
        learner = learners.LinearSVMLearner(dual=False)
        if best_params_linear is not None:
            learner.set_params(**best_params_linear)

        best_params = experiments.perform_experiment(
            self._details.ds,
            self._details.ds_name,
            self._details.ds_readable_name,
            learner,
            'SVMLinear',
            'SVM',
            params,
            complexity_param=complexity_param,
            seed=self._details.seed,
            iteration_details=iteration_details,
            best_params=best_params_linear,
            threads=self._details.threads,
            verbose=self._verbose)

        of_params = best_params.copy()
        learner = learners.LinearSVMLearner(dual=True)
        if best_params_linear is not None:
            learner.set_params(**best_params_linear)
        experiments.perform_experiment(self._details.ds,
                                       self._details.ds_name,
                                       self._details.ds_readable_name,
                                       learner,
                                       'SVMLinear_OF',
                                       'SVM',
                                       of_params,
                                       seed=self._details.seed,
                                       iteration_details=iteration_details,
                                       best_params=best_params_linear,
                                       threads=self._details.threads,
                                       verbose=self._verbose,
                                       iteration_lc_only=True)

        # RBF SVM
        params = {
            'SVM__max_iter': iters,
            'SVM__tol': tols,
            'SVM__class_weight': ['balanced'],
            'SVM__C': C_values,
            'SVM__decision_function_shape': ['ovo', 'ovr'],
            'SVM__gamma': gamma_fracs
        }
        # original
        # complexity_param = {'name': 'SVM__C', 'display_name': 'Penalty', 'values': np.arange(0.001, 2.5, 0.1)}
        # YS changed
        complexity_param = {
            'name': 'SVM__C',
            'display_name': 'Penalty',
            'values': np.arange(0.1, 2.5, 0.5)
        }

        learner = learners.SVMLearner(kernel='rbf')
        if best_params_rbf is not None:
            learner.set_params(**best_params_rbf)
        best_params = experiments.perform_experiment(
            self._details.ds,
            self._details.ds_name,
            self._details.ds_readable_name,
            learner,
            'SVM_RBF',
            'SVM',
            params,
            complexity_param=complexity_param,
            seed=self._details.seed,
            iteration_details=iteration_details,
            best_params=best_params_rbf,
            threads=self._details.threads,
            verbose=self._verbose)

        of_params = best_params.copy()
        learner = learners.SVMLearner(kernel='rbf')
        if best_params_rbf is not None:
            learner.set_params(**best_params_rbf)
        experiments.perform_experiment(self._details.ds,
                                       self._details.ds_name,
                                       self._details.ds_readable_name,
                                       learner,
                                       'SVM_RBF_OF',
                                       'SVM',
                                       of_params,
                                       seed=self._details.seed,
                                       iteration_details=iteration_details,
                                       best_params=best_params_rbf,
                                       threads=self._details.threads,
                                       verbose=self._verbose,
                                       iteration_lc_only=True)
Exemplo n.º 3
0
reducer names -> functions that take reduced dim as an arg
"""
REDUCERS = {
    "select": reduction.SelectKBestReduction,
    "pca-linear":
    lambda dim: reduction.KernelPCAReduction(dim, kernel='linear'),
    "pca-cosine":
    lambda dim: reduction.KernelPCAReduction(dim, kernel='cosine'),
    "none": lambda dim: reduction.NoopReduction(),
}
"""
learner names -> functions that return learner instances
"""
LEARNERS = {
    "nb": learners.GaussianNBLearner,
    "svm-linear": lambda: learners.SVMLearner(kernel='linear'),
    "svm-rbf": lambda: learners.SVMLearner(kernel='rbf'),
    "svm-poly": lambda: learners.SVMLearner(kernel='poly'),
    "knn": lambda: learners.KNeighborsLearner(),
    "tree": lambda: learners.DecisionTreeLearner(),
}
"""
Data format:

{
  "body": string
  "post_ups": int
  "subreddit_id": string
  "created": float (timestamp)
  "downs": int
  "author": string
Exemplo n.º 4
0
    def perform(self):
        # Adapted from https://github.com/JonathanTay/CS-7641-assignment-1/blob/master/SVM.py
        samples = self._details.ds.features.shape[0]
        features = self._details.ds.features.shape[1]

        gamma_fracs = np.arange(1 / features, 2.1, 0.2)
        tols = np.arange(1e-8, 1e-1, 0.01)
        C_values = np.arange(0.001, 2.5, 0.25)
        iters = [-1, int((1e6 / samples) / .8) + 1]

        best_params_linear = None
        if self._details.ds_best_params is not None and 'SVM_Linear' in self._details.ds_best_params:
            best_params_linear = self._details.ds_best_params['SVM_Linear']
        best_params_rbf = None
        if self._details.ds_best_params is not None and 'SVM_RBF' in self._details.ds_best_params:
            best_params_rbf = self._details.ds_best_params['SVM_RBF']

        # Linear SVM
        params = {
            'SVM__max_iter': iters,
            'SVM__tol': tols,
            'SVM__class_weight': ['balanced'],
            'SVM__C': C_values
        }
        complexity_param = {
            'name': 'SVM__C',
            'display_name': 'Penalty',
            'values': np.arange(0.001, 2.5, 0.1)
        }

        iteration_details = {
            'x_scale': 'log',
            'params': {
                'SVM__max_iter': [2**x for x in range(12)]
            },
        }

        # RBF SVM
        if len(np.unique(self._details.ds.classes)) > 2:
            decision_functions = ['ovo']
        else:
            decision_functions = ['ovo', 'ovr']
        params = {
            'SVM__max_iter': iters,
            'SVM__tol': tols,
            'SVM__class_weight': ['balanced'],
            'SVM__C': C_values,
            'SVM__decision_function_shape': decision_functions,
            'SVM__gamma': gamma_fracs
        }
        complexity_param = {
            'name': 'SVM__C',
            'display_name': 'Penalty',
            'values': np.arange(0.001, 2.5, 0.1)
        }

        learner = learners.SVMLearner(kernel='rbf')
        if best_params_rbf is not None:
            learner.set_params(**best_params_rbf)
        best_params = experiments.perform_experiment(
            self._details.ds,
            self._details.ds_name,
            self._details.ds_readable_name,
            learner,
            'SVM_RBF',
            'SVM',
            params,
            complexity_param=complexity_param,
            seed=self._details.seed,
            iteration_details=iteration_details,
            best_params=best_params_rbf,
            threads=self._details.threads,
            verbose=self._verbose)

        of_params = best_params.copy()
        learner = learners.SVMLearner(kernel='rbf')
        if best_params_rbf is not None:
            learner.set_params(**best_params_rbf)
        experiments.perform_experiment(self._details.ds,
                                       self._details.ds_name,
                                       self._details.ds_readable_name,
                                       learner,
                                       'SVM_RBF_OF',
                                       'SVM',
                                       of_params,
                                       seed=self._details.seed,
                                       iteration_details=iteration_details,
                                       best_params=best_params_rbf,
                                       threads=self._details.threads,
                                       verbose=self._verbose,
                                       iteration_lc_only=True)
Exemplo n.º 5
0
    def perform(self):
        # Adapted from https://github.com/JonathanTay/CS-7641-assignment-1/blob/master/SVM.py
        samples = self._details.ds.features.shape[0]
        features = self._details.ds.features.shape[1]

        gamma_fracs = np.arange(1 / features, 2.1, 0.2)
        tols = np.arange(1e-8, 1e-1, 0.01)
        C_values = np.arange(0.001, 2.5, 0.25)
        iters = [-1, int((1e6 / samples) / 0.8) + 1]

        best_params_linear = None
        best_params_rbf = None
        # # Uncomment to select known best params from grid search. This will skip the grid search and just rebuild
        # # the various graphs
        # #
        # if self._details.ds_name == "spam":
        #     best_params_linear = {
        #         "C": 0.101,
        #         "class_weight": "balanced",
        #         "loss": "squared_hinge",
        #         "max_iter": 33,
        #         "tol": 1.00e-08,
        #     }
        #     best_params_rbf = {
        #         "C": 0.251,
        #         "class_weight": "balanced",
        #         "decision_function_shape": "ovo",
        #         "tol": 0.07000001,
        #     }
        # elif self._details.ds_name == "poisonous_mushrooms":
        #     best_params_linear = {
        #         "C": 0.001,
        #         "class_weight": "balanced",
        #         "loss": "squared_hinge",
        #         "max_iter": 42,
        #         "tol": 1.00e-08,
        #     }
        #     best_params_rbf = {
        #         "C": 0.251,
        #         "class_weight": "balanced",
        #         "decision_function_shape": "ovo",
        #         "tol": 0.06000001,
        #     }

        # Linear SVM
        params = {
            "SVM__max_iter": iters,
            "SVM__tol": tols,
            "SVM__class_weight": [{
                1: 10
            }],
            "SVM__C": C_values,
        }
        complexity_param = {
            "name": "SVM__C",
            "display_name": "Penalty",
            "values": np.arange(0.001, 2.5, 0.1),
        }

        iteration_details = {
            "x_scale": "log",
            "params": {
                "SVM__max_iter": [2**x for x in range(12)]
            },
        }

        # NOTE: If this is causing issues, try the RBFSVMLearner. Passing use_linear=True will use a linear kernel
        #       and passing use_linear=False will use the RBF kernel. This method is slower but if libsvm is not
        #       available it may be your only option
        learner = learners.LinearSVMLearner(dual=False)
        if best_params_linear is not None:
            learner.set_params(**best_params_linear)
        #
        best_params = experiments.perform_experiment(
            self._details.ds,
            self._details.ds_name,
            self._details.ds_readable_name,
            learner,
            "SVMLinear",
            "SVM",
            params,
            complexity_param=complexity_param,
            seed=self._details.seed,
            best_params=best_params_linear,
            threads=self._details.threads,
            verbose=self._verbose,
        )

        of_params = best_params.copy()
        learner = learners.LinearSVMLearner(dual=True)
        if best_params_linear is not None:
            learner.set_params(**best_params_linear)
        experiments.perform_experiment(
            self._details.ds,
            self._details.ds_name,
            self._details.ds_readable_name,
            learner,
            "SVMLinear_OF",
            "SVM",
            of_params,
            seed=self._details.seed,
            iteration_details=iteration_details,
            best_params=best_params_linear,
            threads=self._details.threads,
            verbose=self._verbose,
            iteration_lc_only=True,
        )

        # RBF SVM
        params = {
            "SVM__max_iter": iters,
            "SVM__tol": tols,
            "SVM__class_weight": ["balanced"],
            "SVM__C": C_values,
            "SVM__decision_function_shape": ["ovo", "ovr"],
            "SVM__gamma": gamma_fracs,
        }
        params = {
            "SVM__C": C_values,
            "SVM__class_weight": ["balanced"],
            "SVM__tol": tols,
            "SVM__decision_function_shape": ["ovo", "ovr"],
        }
        complexity_param = {
            "name": "SVM__C",
            "display_name": "Penalty",
            "values": np.arange(0.001, 2.5, 0.25),
        }
        #
        learner = learners.SVMLearner(kernel="rbf")
        if best_params_rbf is not None:
            learner.set_params(**best_params_rbf)
        best_params = experiments.perform_experiment(
            self._details.ds,
            self._details.ds_name,
            self._details.ds_readable_name,
            learner,
            "SVM_RBF",
            "SVM",
            params,
            complexity_param=complexity_param,
            seed=self._details.seed,
            best_params=best_params_rbf,
            threads=self._details.threads,
            verbose=self._verbose,
        )

        of_params = best_params.copy()
        learner = learners.SVMLearner(kernel="rbf")
        if best_params_rbf is not None:
            learner.set_params(**best_params_rbf)
        experiments.perform_experiment(
            self._details.ds,
            self._details.ds_name,
            self._details.ds_readable_name,
            learner,
            "SVM_RBF_OF",
            "SVM",
            of_params,
            seed=self._details.seed,
            iteration_details=iteration_details,
            best_params=best_params_rbf,
            threads=self._details.threads,
            verbose=self._verbose,
            iteration_lc_only=True,
        )
Exemplo n.º 6
0
    def perform(self):
        # Adapted from https://github.com/JonathanTay/CS-7641-assignment-1/blob/master/SVM.py
        samples = self._details.ds.features.shape[0]
        features = self._details.ds.features.shape[1]

        gamma_fracs = np.arange(1 / features, 2.1, 0.2)
        tols = np.arange(1e-8, 1e-1, 0.01)
        C_values = np.arange(0.001, 2.5, 0.25)
        iters = [-1, int((1e6 / samples) / .8) + 1]

        best_params_linear = None
        best_params_rbf = None
        # Uncomment to select known best params from grid search. This will skip the grid search and just rebuild
        # the various graphs
        #
        # Dataset 1 (credit default):
        '''
        best_params_linear = {
            "C": 0.251,
            "class_weight": "balanced",
            "dual": False,
            "fit_intercept": True,
            "intercept_scaling": 1,
            "loss": "squared_hinge",
            "max_iter": 42,
            "multi_class": "ovr",
            "penalty": "l2",
            "tol": 0.020000010000000002,
            "verbose": False
          }
        best_params_rbf = {
            "C": 0.751,
            "cache_size": 200,
            "class_weight": "balanced",
            "coef0": 0,
            "decision_function_shape": "ovo",
            "degree": 3,
            "gamma": 0.043478260869565216,
            "kernel": "rbf",
            "max_iter": -1,
            "probability": False,
            "shrinking": True,
            "tol": 0.08000001,
            "verbose": False
          }
        # Dataset 2:
        best_params_linear = {
            "C": 0.251,
            "class_weight": "balanced",
            "dual": False,
            "fit_intercept": True,
            "intercept_scaling": 1,
            "loss": "squared_hinge",
            "max_iter": 42,
            "multi_class": "ovr",
            "penalty": "l2",
            "tol": 0.020000010000000002,
            "verbose": False
          }
        best_params_rbf = {
            "C": 1.501,
            "cache_size": 200,
            "class_weight": "balanced",
            "coef0": 0,
            "decision_function_shape": "ovo",
            "degree": 3,
            "gamma": 0.0056179775280898875,
            "kernel": "rbf",
            "max_iter": -1,
            "probability": False,
            "shrinking": True,
            "tol": 0.09000000999999999,
            "verbose": False
          }
        '''
        # Linear SVM
        params = {
            'SVM__max_iter': iters,
            'SVM__tol': tols,
            'SVM__class_weight': ['balanced'],
            'SVM__C': C_values
        }
        complexity_param = {
            'name': 'SVM__C',
            'display_name': 'Penalty',
            'values': np.arange(0.001, 2.5, 0.1)
        }

        iteration_details = {
            'x_scale': 'log',
            'params': {
                'SVM__max_iter': [2**x for x in range(12)]
            },
        }

        # NOTE: If this is causing issues, try the RBFSVMLearner. Passing use_linear=True will use a linear kernel
        #       and passing use_linear=False will use the RBF kernel. This method is slower but if libsvm is not
        #       available it may be your only option
        learner = learners.LinearSVMLearner(dual=False)
        if best_params_linear is not None:
            learner.set_params(**best_params_linear)

        best_params = experiments.perform_experiment(
            self._details.ds,
            self._details.ds_name,
            self._details.ds_readable_name,
            learner,
            'SVMLinear',
            'SVM',
            params,
            complexity_param=complexity_param,
            seed=self._details.seed,
            iteration_details=iteration_details,
            best_params=best_params_linear,
            threads=self._details.threads,
            verbose=self._verbose)

        of_params = best_params.copy()
        learner = learners.LinearSVMLearner(dual=True)
        if best_params_linear is not None:
            learner.set_params(**best_params_linear)
        experiments.perform_experiment(self._details.ds,
                                       self._details.ds_name,
                                       self._details.ds_readable_name,
                                       learner,
                                       'SVMLinear_OF',
                                       'SVM',
                                       of_params,
                                       seed=self._details.seed,
                                       iteration_details=iteration_details,
                                       best_params=best_params_linear,
                                       threads=self._details.threads,
                                       verbose=self._verbose,
                                       iteration_lc_only=True)

        # RBF SVM
        params = {
            'SVM__max_iter': iters,
            'SVM__tol': tols,
            'SVM__class_weight': ['balanced'],
            'SVM__C': C_values,
            'SVM__decision_function_shape': ['ovo', 'ovr'],
            'SVM__gamma': gamma_fracs
        }
        complexity_param = {
            'name': 'SVM__C',
            'display_name': 'Penalty',
            'values': np.arange(0.001, 2.5, 0.1)
        }

        learner = learners.SVMLearner(kernel='rbf')
        if best_params_rbf is not None:
            learner.set_params(**best_params_rbf)
        best_params = experiments.perform_experiment(
            self._details.ds,
            self._details.ds_name,
            self._details.ds_readable_name,
            learner,
            'SVM_RBF',
            'SVM',
            params,
            complexity_param=complexity_param,
            seed=self._details.seed,
            iteration_details=iteration_details,
            best_params=best_params_rbf,
            threads=self._details.threads,
            verbose=self._verbose)

        of_params = best_params.copy()
        learner = learners.SVMLearner(kernel='rbf')
        if best_params_rbf is not None:
            learner.set_params(**best_params_rbf)
        experiments.perform_experiment(self._details.ds,
                                       self._details.ds_name,
                                       self._details.ds_readable_name,
                                       learner,
                                       'SVM_RBF_OF',
                                       'SVM',
                                       of_params,
                                       seed=self._details.seed,
                                       iteration_details=iteration_details,
                                       best_params=best_params_rbf,
                                       threads=self._details.threads,
                                       verbose=self._verbose,
                                       iteration_lc_only=True)