예제 #1
0
    def preliminary_search(self, dataset, to_fix, repetitions):
        """
        This function implements a preliminary search in order to fix some
        of the best hyperparameters to the current best values and then
        search again for the remaining hyperparameters

        Parameters
        ----------
        dataset: int
            the dataset's index in self.monk

        to_fix: list
            a list of hyperparameters' names to fix

        repetitions: int
            cross validation's repetitions

        Returns
        -------
        """
        for prm in to_fix:
            self.selection.\
                search(self.train_set[:, 1:],
                       self.train_set[:, 0].reshape(-1, 1), save_results=True,
                       fname='../data/model_selection_results_monk_{}.json'.
                       format(dataset + 1), par_name=prm.upper())

            best_hyps = self.selection.\
                select_best_hyperparams(
                    fname='../data/model_selection_results_monk_{}.json'.
                    format(dataset + 1))
            self.param_ranges[prm] = best_hyps[0]['hyperparams'][prm]

            self.grid = val.HyperGrid(self.param_ranges,
                                      size=self.grid_size,
                                      seed=datetime.now())
            self.selection = val.ModelSelectionCV(self.grid, repetitions=1)
예제 #2
0
# defining grid
param_ranges = dict()
param_ranges['eta'] = (0.02, 2.0)
param_ranges['alpha'] = 0.001
param_ranges['batch_size'] = (1, 100)
param_ranges['hidden_sizes'] = [(1, 100), (10, 20)]
param_ranges['reg_lambda'] = (0.0, 0.1)
param_ranges['reg_method'] = 'l2'
param_ranges['epochs'] = 200

imp.reload(val)

# uniform grid
grid_size = 3
grid = val.HyperGrid(param_ranges, size=grid_size, random=False)

i = 0
for hyperparam in grid:
    pprint(hyperparam)
    i += 1

# random grid
grid_size = 10
grid = val.HyperGrid(param_ranges, size=grid_size, random=True)
len(grid)

selection = val.ModelSelectionCV(grid=grid, repetitions=2)

start = time.time()
selection.search(X, y, nfolds=3)
예제 #3
0
    def test(self,
             dataset,
             repetitions=1,
             preliminary_search=False,
             to_fix=[]):
        """
        This function implements the testing procedure for the monks' datasets.
        It permits two kind of search for the best hyperparameters. The first
        kind of search simply performs a search using one HyperGrid, and
        selecting the set of hyperparameters which returns the best result.
        The second kind of search performs a deeper search, by searching first
        the best value for some hyperparameters, fixing them, and searching
        again the values for the remaing ones.

        Parameters
        ----------
        dataset: int or list
            either a single index or a list of indexes, each one representing
            a dataset in self.monks

        repetitions: int
            cross validation's repetitions
            (Default value = 1)

        preliminary_search: bool
            whether or not to execute a preliminary search for the best value
            for some hyperparameters, fix them, and search again for the
            remaining hyperparameters
            (Default value = False)

        to_fix: list
            a list of hyperparameters that must be fixed
            (Default value = [])

        size: int
            the new hypergrid's size for the new search for the best
            hyperparameters in the preliminary_search function
            (Default value = 0)

        Returns
        -------
        """
        if type(dataset) == int:
            assert dataset >= 0 and dataset <= 2
            dataset = [dataset]
        else:
            assert len(dataset) > 0 and len(dataset) <= 3

        for ds in dataset:
            print 'TESTING MONK DATASET {}\n'.format(ds + 1)

            self.train_set = pd.\
                read_csv(self.monks[ds][0], names=['class'] +
                         ['x{}'.format(j) for j in range(17)]).values
            self.test_set = pd.\
                read_csv(self.monks[ds][1], names=['class'] +
                         ['x{}'.format(j) for j in range(17)]).values

            self.grid = val.HyperGrid(self.param_ranges,
                                      size=self.grid_size,
                                      seed=datetime.now())
            self.selection = val.ModelSelectionCV(self.grid,
                                                  repetitions=repetitions)

            # PRELIMINARY SEARCH FOR SOME OF THE PARAMETERS ###################

            assert len(to_fix) != 0
            self.preliminary_search(ds, to_fix, repetitions)

            # SEARCHING FOR THE OTHER PARAMETERS ##############################

            self.selection.search(
                self.train_set[:, 1:],
                self.train_set[:, 0].reshape(-1, 1),
                save_results=True,
                fname='../data/model_selection_results_monk_{}.json'.format(
                    ds + 1),
                par_name='REMAINING PARAMETERS')

            best_model = self.selection.\
                select_best_model(
                    self.train_set[:, 1:],
                    self.train_set[:, 0].reshape(-1, 1),
                    fname='../data/model_selection_results_monk_{}.json'.
                    format(ds + 1))

            y_pred = best_model.predict(self.test_set[:, 1:])
            y_pred = np.apply_along_axis(lambda x: 0 if x < .5 else 1, 1,
                                         y_pred).reshape(-1, 1)
            print '\n\n\n'
            bca = metrics.BinaryClassifierAssessment(
                self.test_set[:, 0].reshape(-1, 1), y_pred)

            self.save_best_result(ds, best_model, bca)
            self.plot_best_result(ds, best_model)

            self.param_ranges = self.param_ranges_backup.copy()
예제 #4
0
    print 'STARTED WITH LOSS {}, ENDED WITH {}'.\
        format(neural_net.error_per_epochs[0], neural_net.error_per_epochs[-1])

    u.plot_learning_curve(neural_net,
                          fname='/Users/gianmarco/Desktop/learning_curve.pdf')

if raw_input('TESTING K-FOLD CROSS VALIDATION?[Y/N] ') == 'Y':
    cross_val = val.KFoldCrossValidation(X, y, neural_net)
    tqdm.write('AGGREGATED RESULTS: \n')
    pprint(cross_val.aggregated_results)

if raw_input('TESTING GRID SEARCH?[Y/N] ') == 'Y':
    param_ranges = dict()
    param_ranges['eta'] = (0.02, 2.0)
    param_ranges['alpha'] = 0.001
    param_ranges['batch_size'] = (1, 100)
    param_ranges['hidden_sizes'] = [(1, 100), (10, 20)]
    param_ranges['reg_lambda'] = (0.0, 0.1)
    param_ranges['reg_method'] = 'l2'
    param_ranges['epochs'] = 200

    grid_size = 10
    grid = val.HyperGrid(param_ranges, size=grid_size)

    selection = val.ModelSelectionCV(grid, repetitions=2)
    selection.search(X, y)
    results = selection.load_results()

    best_model = selection.select_best_model(X, y)
예제 #5
0
        if beta_m == 'mhs':
            param_ranges['rho'] = (0., 1.)
        else:
            param_ranges['rho'] = 0.0

        if beta_m == 'dl':
            param_ranges['t'] = (0., 1.)
        else:
            param_ranges['t'] = 0.0

    param_ranges['optimizer'] = opt
    param_ranges['hidden_sizes'] = [4, 8]
    param_ranges['activation'] = 'sigmoid'
    param_ranges['task'] = 'classifier'

    grid = val.HyperGrid(param_ranges, grid_size, random=True)
    selection = val.ModelSelectionCV(
        grid,
        fname=fpath +
        'monks_{}_experiment_{}_results.json.gz'.format(ds, experiment))
    selection.search(X_design, y_design, nfolds=nfolds)
    best_hyperparameters = selection.select_best_hyperparams()

    json_name = ''

    if opt == 'SGD':
        json_name = '../data/final_setup/{}/{}/'.format(opt, type_m) + \
            'monks_{}_best_hyperparameters_{}.json'.format(ds, opt.lower())
    else:
        json_name = '../data/final_setup/{}/'.format(opt) + \
            'monks_{}_best_hyperparameters_{}_{}.json'.format(ds, opt.lower(),