def _fit_learner(self, verbose=False):
        def time_seconds():
            return default_timer()

        # Create learner from configuration
        learner = self.model(**self.configurations)
        # Train learner
        if self.__class__.__bases__[0] == EvaluatorSklearn:
            start_time = time_seconds()
            learner.fit(
                get_input_variables(self.training_set).values,
                get_target_variable(self.training_set).values)
            training_time = time_seconds() - start_time
        else:
            start_time = time_seconds()
            learner.fit(
                get_input_variables(self.training_set).values,
                get_target_variable(self.training_set).values, self.metric,
                verbose)
            training_time = time_seconds() - start_time
        # testing_value = self._calculate_value(learner, self.testing_set)
        return {
            'learner': learner,
            # 'testing_value': testing_value,
            'training_time': training_time
        }
Exemplo n.º 2
0
 def test_fit(self):
     X = get_input_variables(self.training).as_matrix()
     y = get_target_variable(self.training).as_matrix()
     self.ftne.fit(X, y, RootMeanSquaredError, verbose=True)
     self.assertTrue(expr=self.ftne.champion)
     prediction = self.ftne.predict(get_input_variables(self.validation).as_matrix())
     self.assertEqual(len(prediction), len(get_target_variable(self.validation).as_matrix()))
    def test_predict(self):
        print("testing predict()...")
        self.ensemble_learner.fit(get_input_variables(self.training).values,
                                  get_target_variable(self.training).values, RootMeanSquaredError, verbose=True)

        prediction = self.ensemble_learner.predict(get_input_variables(self.validation).values)
        self.assertTrue(expr=len(prediction) == len(get_target_variable(self.validation).values))
Exemplo n.º 4
0
 def _select_best_learner(self,
                          time_limit=TIME_LIMIT_SECONDS,
                          time_buffer=TIME_BUFFER,
                          verbose=False):
     # Best learner found (lowest validation error).
     best_learner = None
     # Lowest validation error found.
     best_validation_value = float(
         '-Inf') if self.metric.greater_is_better else float('Inf')
     # Validation error list.
     validation_value_list = list()
     # Current time in seconds.
     time_seconds = lambda: default_timer()
     # Random order of configurations.
     shuffle(self.configurations)
     # Number of configurations run.
     number_of_runs = 0
     # Start of run.
     run_start = time_seconds()
     # Time left.
     time_left = lambda: time_limit - (time_seconds() - run_start)
     # Iterate though all configurations.
     for configuration in tqdm(self.configurations):
         # Create learner from configuration.
         learner = self.model(**configuration)
         # Train learner.
         if self.__class__.__bases__[0] == EvaluatorSklearn:
             learner.fit(
                 get_input_variables(self.training_set).as_matrix(),
                 get_target_variable(self.training_set).as_matrix())
         else:
             learner.fit(
                 get_input_variables(self.training_set).as_matrix(),
                 get_target_variable(self.training_set).as_matrix(),
                 self.metric, verbose)
         # Calculate validation value.
         validation_value = self._calculate_value(learner,
                                                  self.validation_set)
         # If validation error lower than best validation error, set learner as best learner and validation error as best validation error.
         if is_better(validation_value, best_validation_value, self.metric):
             best_learner = learner
             best_validation_value = validation_value
         # Add configuration and validation error to validation error list.
         validation_value_list.append((configuration, validation_value))
         # Increase number of runs.
         number_of_runs += 1
         # Calculate time left.
         run_end = time_left()
         # Calculate time expected for next run.
         run_expected = (time_limit - run_end) / number_of_runs
         # If no time left or time expected for next run is greater than time left, break.
         if run_end < 0 or run_end * (1 + time_buffer) < run_expected:
             break
     # When all configurations tested, return best learner.
     return {
         'best_learner': best_learner,
         'validation_value_list': validation_value_list
     }
Exemplo n.º 5
0
    def test_predict(self):
        self.ensemble_learner.fit(
            get_input_variables(self.training).as_matrix(),
            get_target_variable(self.training).as_matrix(),
            RootMeanSquaredError,
            verbose=True)

        prediction = self.ensemble_learner.predict(
            get_input_variables(self.validation).as_matrix())
        self.assertTrue(expr=len(prediction) == len(
            get_target_variable(self.validation).as_matrix()))
Exemplo n.º 6
0
 def test_predict(self): 
     print("testing predict()...")
     base_learner = SemanticLearningMachine(50, ErrorDeviationVariationCriterion(0.25), 2, 1, 10, Mutation2())
     ensemble_learner = EnsembleRandomIndependentWeighting(base_learner, 100, weight_range=2)
     X = get_input_variables(self.training).values
     y = get_target_variable(self.training).values
     def time_seconds(): return default_timer()
     start_time = time_seconds()
     ensemble_learner.fit(X, y, RootMeanSquaredError, verbose=False)
     print("time to train algorithm: ", (time_seconds()-start_time))
     start_time = time_seconds()
     prediction = ensemble_learner.predict(get_input_variables(self.validation).values)
     print("time to predict algorithm: ", (time_seconds()-start_time))
     self.assertTrue(expr=len(prediction) == len(get_target_variable(self.validation).values))
     print()
Exemplo n.º 7
0
    def pickup(self):

        print('Entering ensemble pickup for dataset:', self.data_set_name)

        outer_cv = 0

        outer_folds = self._get_outer_folds(outer_cv)
        for training_outer_index, testing_index in outer_folds.split(
                get_input_variables(self.samples).values,
                get_target_variable(self.samples).values):

            print('\tIndex of outer fold:', outer_cv)

            training_outer, testing = pd.DataFrame(
                self.samples.values[training_outer_index]), pd.DataFrame(
                    self.samples.values[testing_index])

            algorithm = self.best_result[outer_cv]['best_overall_algorithm']
            configuration = self.best_result[outer_cv][
                'best_overall_configuration']
            self._run_ensembles(outer_cv, algorithm.get_corresponding_algo(),
                                configuration, training_outer, testing,
                                self.metric)

            outer_cv += 1

        benchmark_to_pickle(self)

        print('Leaving ensemble pickup for dataset:', self.data_set_name)
Exemplo n.º 8
0
 def test_fit(self):
     self.ensemble_learner.fit(
         get_input_variables(self.training).as_matrix(),
         get_target_variable(self.training).as_matrix(),
         RootMeanSquaredError,
         verbose=True)
     self.assertTrue(expr=self.ensemble_learner.learners)
Exemplo n.º 9
0
 def _calculate_solution_value(self, solution, data_set, learner):
     X = get_input_variables(data_set).as_matrix()
     target = get_target_variable(data_set).as_matrix()
     neural_network = FeedForwardNetwork.create(solution,
                                                learner.configuration)
     prediction = self._predict_neural_network(neural_network, X)
     return self.metric.evaluate(prediction, target)
 def test_benchmark_slm(self):
     print('Test BenchmarkSLM()...')
     algorithm = BenchmarkSLM(10, MaxGenerationsCriterion(10), 3, 0.01, 50,
                              Mutation2())
     X = get_input_variables(self.training).as_matrix()
     y = get_target_variable(self.training).as_matrix()
     log = algorithm.fit(X, y, RootMeanSquaredError, verbose=True)
     self.assertTrue(expr=log)
     print()
 def test_ols(self):
     print('OLS tests of fit()...')
     algorithm = SemanticLearningMachine(100, MaxGenerationsCriterion(200),
                                         3, 'optimized', 50, Mutation2())
     X = get_input_variables(self.training).as_matrix()
     y = get_target_variable(self.training).as_matrix()
     algorithm.fit(X, y, RootMeanSquaredError, verbose=True)
     self.assertTrue(expr=algorithm.champion)
     print()
 def test_benchmark_neat(self):
     print('Test BenchmarkNEAT()...')
     algorithm = BenchmarkNEAT(10, MaxGenerationsCriterion(10), 4, 1, 1,
                               0.1, 0.1, 0.1, 0.1, 0.1, 0.1)
     X = get_input_variables(self.training).as_matrix()
     y = get_target_variable(self.training).as_matrix()
     log = algorithm.fit(X, y, RootMeanSquaredError, verbose=True)
     self.assertTrue(expr=log)
     print()
 def test_edv(self):
     print('EDV tests of fit()...')
     algorithm = SemanticLearningMachine(
         100, ErrorDeviationVariationCriterion(0.25), 3, 0.01, 50,
         Mutation2())
     X = get_input_variables(self.training).as_matrix()
     y = get_target_variable(self.training).as_matrix()
     algorithm.fit(X, y, RootMeanSquaredError, verbose=True)
     self.assertTrue(expr=algorithm.champion)
     print()
Exemplo n.º 14
0
 def test_ols(self):
     print('OLS tests of fit()...')
     def time_seconds(): return default_timer()
     start_time = time_seconds()
     algorithm = SemanticLearningMachine(100, MaxGenerationsCriterion(200), 3, 'optimized', 50, Mutation2(), RootMeanSquaredError, True)
     X = get_input_variables(self.training).values
     y = get_target_variable(self.training).values
     start_time = time_seconds()
     algorithm.fit(X, y, RootMeanSquaredError, verbose=False)
     print("time to train algorithm: ", (time_seconds()-start_time))
     self.assertTrue(expr=algorithm.champion)
     print()
 def test_slm_ols_wo_edv(self):
     print("testing fit() for SLM (OLS) without EDV ...")
     base_learner = SemanticLearningMachine(50, MaxGenerationsCriterion(20), 2, 'optimized', 10, Mutation2())
     ensemble_learner = EnsembleBoosting(base_learner, 100, meta_learner=median, learning_rate=1)
     X = get_input_variables(self.training).values
     y = get_target_variable(self.training).values
     def time_seconds(): return default_timer()
     start_time = time_seconds()
     ensemble_learner.fit(X, y, RootMeanSquaredError, verbose=False)
     print("time to train algorithm: ", (time_seconds()-start_time))
     self.assertTrue(expr=ensemble_learner.learners)
     print() 
Exemplo n.º 16
0
 def test_tie(self):
     print('TIE tests of fit()...')
     def time_seconds(): return default_timer()
     start_time = time_seconds()
     algorithm = SemanticLearningMachine(100, TrainingImprovementEffectivenessCriterion(0.25), 3, 0.01, 50, Mutation2(), RootMeanSquaredError, True)
     X = get_input_variables(self.training).values
     y = get_target_variable(self.training).values
     start_time = time_seconds()
     algorithm.fit(X, y, RootMeanSquaredError, verbose=False)
     print("time to train algorithm: ", (time_seconds()-start_time))
     self.assertTrue(expr=algorithm.champion)
     print()
 def test_benchmark_sga(self):
     print('Test BenchmarkSGA()...')
     topology = create_network_from_topology([2, 2])
     algorithm = BenchmarkSGA(10, MaxGenerationsCriterion(10), topology,
                              SelectionOperatorTournament(5),
                              MutationOperatorGaussian(0.1),
                              CrossoverOperatorArithmetic(), 0.01, 0.25)
     X = get_input_variables(self.training).as_matrix()
     y = get_target_variable(self.training).as_matrix()
     log = algorithm.fit(X, y, RootMeanSquaredError, verbose=True)
     self.assertTrue(expr=log)
     print()
Exemplo n.º 18
0
 def test_slm_fls(self):
     print("testing fit() for SLM (FLS) ...")
     base_learner = SemanticLearningMachine(50, MaxGenerationsCriterion(100), 2, 1, 10, Mutation2())
     ensemble_learner = EnsembleRandomIndependentWeighting(base_learner, 100, weight_range=1)
     X = get_input_variables(self.training).values
     y = get_target_variable(self.training).values
     def time_seconds(): return default_timer()
     start_time = time_seconds()
     ensemble_learner.fit(X, y, RootMeanSquaredError, verbose=False)
     print("time to train algorithm: ", (time_seconds()-start_time))
     self.assertTrue(expr=ensemble_learner.learners)
     print() 
 def test_fit(self):
     print("testing fit()...")
     self.ensemble_learner.fit(get_input_variables(self.training).values,
                               get_target_variable(self.training).values, RootMeanSquaredError, verbose=True)
     self.assertTrue(expr=self.ensemble_learner.learners)
Exemplo n.º 20
0
 def _calculate_value(self, learner, data_set):
     prediction = learner.predict(get_input_variables(data_set).as_matrix())
     target = get_target_variable(data_set).as_matrix()
     return self.metric.evaluate(prediction, target)
Exemplo n.º 21
0
    def run_nested_cv(self):
        """ runs benchmark study on a nested cross-validation environment """

        print('Entering run_nested_cv for dataset:', self.data_set_name)

        outer_cv = 0
        outer_folds = self._get_outer_folds(outer_cv)
        for training_outer_index, testing_index in outer_folds.split(
                get_input_variables(self.samples).values,
                get_target_variable(self.samples).values):

            print('\tIndex of outer fold:', outer_cv)

            training_outer, testing = pd.DataFrame(
                self.samples.values[training_outer_index]), pd.DataFrame(
                    self.samples.values[testing_index])

            if self.classification:
                best_overall_validation_value = float('-Inf')
            else:
                best_overall_validation_value = float(
                    '-Inf') if self.metric.greater_is_better else float('Inf')

            for key in self.models.keys():

                print('\t\tAlgorithm with key:', key)

                if not self.results[key][outer_cv]:

                    if self.classification:
                        best_validation_value = float('-Inf')
                    else:
                        best_validation_value = float(
                            '-Inf'
                        ) if self.metric.greater_is_better else float('Inf')

                    validation_value_list = list()
                    for configuration in range(
                            self.models[key]['max_combinations']):

                        print('\t\t\tIndex of algorithm configuration:',
                              len(validation_value_list))

                        if (len(self.models[key]['algorithms'])) > 1:
                            option = randint(0, 2)
                            algorithm = self.models[key]['algorithms'][option]
                            config = self.models[key]['configuration_method'](
                                option)
                        else:
                            algorithm = self.models[key]['algorithms'][0]
                            if (key == 'mlpc_sgd' or key == 'mlpc_adam'
                                    or key == 'mlpr_sgd'
                                    or key == 'mlpr_adam'):
                                # version from 01-22
                                # config = self.models[key]['configuration_method'](self.get_data_set_size(training_outer))
                                # version from 01-25
                                batch_size = int(training_outer.shape[0] /
                                                 _INNER_FOLDS)
                                # batch_size = int(training_outer.shape[0] / _INNER_FOLDS) * 2
                                config = self.models[key][
                                    'configuration_method'](batch_size)
                            else:
                                config = self.models[key][
                                    'configuration_method']()

                        inner_folds = self._get_inner_folds(outer_cv)
                        tmp_valid_training_values_list = list()
                        for training_inner_index, validation_index in inner_folds.split(
                                get_input_variables(training_outer).values,
                                get_target_variable(training_outer).values):

                            print('\t\t\t\tIndex of inner fold:',
                                  len(tmp_valid_training_values_list))

                            training_inner, validation = pd.DataFrame(
                                training_outer.values[training_inner_index]
                            ), pd.DataFrame(
                                training_outer.values[validation_index])

                            results = self._evaluate_algorithm(
                                algorithm=algorithm,
                                configurations=config,
                                training_set=training_inner,
                                validation_set=None,
                                testing_set=validation,
                                metric=self.metric)

                            # print('results[testing_value] =', results['testing_value'], ', results[training_value] =', results['training_value'])

                            if self.classification:
                                tmp_valid_training_values_list.append(
                                    (results['testing_accuracy'],
                                     results['training_accuracy']))
                            else:
                                tmp_valid_training_values_list.append(
                                    (results['testing_value'],
                                     results['training_value']))

                        # Calculate average validation value and check if the current value is better than the best one
                        average_validation_value = mean(
                            tmp_valid_training_values_list, axis=0)[0]
                        average_training_value = mean(
                            tmp_valid_training_values_list, axis=0)[1]
                        if self.classification:
                            if average_validation_value > best_validation_value:
                                best_algorithm = algorithm
                                best_key = key
                                best_configuration = config
                                best_validation_value = average_validation_value
                                best_training_value = average_training_value
                        else:
                            if is_better(average_validation_value,
                                         best_validation_value, self.metric):
                                best_algorithm = algorithm
                                best_key = key
                                best_configuration = config
                                best_validation_value = average_validation_value
                                best_training_value = average_training_value

                        # Add configuration and validation error to validation error list.
                        validation_value_list.append(
                            (configuration, average_validation_value))

                    self.results[key][outer_cv] = self._evaluate_algorithm(
                        algorithm=best_algorithm,
                        configurations=best_configuration,
                        training_set=training_outer,
                        validation_set=None,
                        testing_set=testing,
                        metric=self.metric)

                    self.results[key][outer_cv][
                        'best_configuration'] = best_configuration
                    self.results[key][outer_cv][
                        'avg_inner_validation_error'] = best_validation_value
                    self.results[key][outer_cv][
                        'avg_inner_training_error'] = best_training_value
                    if self.classification:
                        self.results[key][outer_cv][
                            'avg_inner_validation_accuracy'] = best_validation_value
                        self.results[key][outer_cv][
                            'avg_inner_training_accuracy'] = best_training_value

                    # # Serialize benchmark
                    # benchmark_to_pickle(self)

                    if self.classification:
                        if best_validation_value > best_overall_validation_value:
                            best_overall_key = best_key
                            best_overall_algorithm = best_algorithm
                            best_overall_configuration = best_configuration
                            best_overall_validation_value = best_validation_value
                    else:
                        if is_better(best_validation_value,
                                     best_overall_validation_value,
                                     self.metric):
                            best_overall_key = best_key
                            best_overall_algorithm = best_algorithm
                            best_overall_configuration = best_configuration
                            best_overall_validation_value = best_validation_value

            print(
                '\tBest overall configuration found for outer fold with index',
                outer_cv)

            self.best_result[outer_cv] = self._evaluate_algorithm(
                algorithm=best_overall_algorithm,
                configurations=best_overall_configuration,
                training_set=training_outer,
                validation_set=None,
                testing_set=testing,
                metric=self.metric)
            self.best_result[outer_cv][
                'best_overall_algorithm'] = best_overall_algorithm
            self.best_result[outer_cv][
                'best_overall_configuration'] = best_overall_configuration
            self.best_result[outer_cv]['best_overall_key'] = best_overall_key
            if self.ensembles != None:
                print('\tCreating ensembles')
                self._run_ensembles(
                    outer_cv, best_overall_algorithm.get_corresponding_algo(),
                    best_overall_configuration, training_outer, testing,
                    self.metric)
            else:
                print('\tNo ensembles to create')

            outer_cv += 1

        # Serialize benchmark
        benchmark_to_pickle(self)

        print('Leaving run_nested_cv for dataset:', self.data_set_name)
Exemplo n.º 22
0
 def _calculate_network_value(self, network, data_set):
     predictions = network.predict(
         get_input_variables(data_set).as_matrix())
     target = get_target_variable(data_set).as_matrix()
     return self.metric.evaluate(predictions, target)
Exemplo n.º 23
0
    def run_nested_cv(self):
        """ runs benchmark study on a nested cross-validation environment """

        #=======================================================================
        # print('self.learning_metric =', self.learning_metric)
        # print('self.selection_metric =', self.selection_metric)
        #=======================================================================

        print('Entering run_nested_cv for dataset:', self.dataset_name)

        outer_cv = 0
        outer_folds = self._get_outer_folds(outer_cv)
        for training_outer_index, testing_index in outer_folds.split(
                get_input_variables(self.samples).values,
                get_target_variable(self.samples).values):

            print('\n\tIndex of outer fold:', outer_cv)

            training_outer, testing = pd.DataFrame(
                self.samples.values[training_outer_index]), pd.DataFrame(
                    self.samples.values[testing_index])

            if self.classification:
                best_overall_validation_value = float('-Inf')
            else:
                best_overall_validation_value = float(
                    '-Inf'
                ) if self.selection_metric.greater_is_better else float('Inf')

            for key in self.models.keys():

                print('\t\tAlgorithm with key:', key)

                if not self.results[key][outer_cv]:

                    if self.classification:
                        best_validation_value = float('-Inf')
                    else:
                        best_validation_value = float(
                            '-Inf'
                        ) if self.selection_metric.greater_is_better else float(
                            'Inf')

                    validation_value_list = list()
                    for configuration in range(
                            self.models[key]['max_combinations']):

                        print('\n\t\t\tIndex of algorithm configuration:',
                              len(validation_value_list))

                        if (len(self.models[key]['algorithms'])) > 1:
                            option = randint(0, 2)
                            algorithm = self.models[key]['algorithms'][option]
                            config = self.models[key]['configuration_method'](
                                option)
                        else:
                            algorithm = self.models[key]['algorithms'][0]
                            #===================================================
                            # if (key == 'mlpc_sgd' or key == 'mlpc_adam' or key == 'mlpr_sgd' or key == 'mlpr_adam'):
                            #===================================================
                            if key.startswith('mlp'):
                                # version from 01-22
                                # config = self.models[key]['configuration_method'](self.get_dataset_size(training_outer))
                                # version from 01-25
                                batch_size = int(training_outer.shape[0] /
                                                 _INNER_FOLDS)
                                # batch_size = int(training_outer.shape[0] / _INNER_FOLDS) * 2
                                config = self.models[key][
                                    'configuration_method'](batch_size)
                            else:
                                config = self.models[key][
                                    'configuration_method']()

                        inner_folds = self._get_inner_folds(outer_cv)
                        tmp_valid_training_values_list = list()
                        for training_inner_index, validation_index in inner_folds.split(
                                get_input_variables(training_outer).values,
                                get_target_variable(training_outer).values):

                            print('\t\t\t\tIndex of inner fold:',
                                  len(tmp_valid_training_values_list))

                            training_inner, validation = pd.DataFrame(
                                training_outer.values[training_inner_index]
                            ), pd.DataFrame(
                                training_outer.values[validation_index])

                            results = self._evaluate_algorithm(
                                algorithm=algorithm,
                                configurations=config,
                                training_set=training_inner,
                                validation_set=None,
                                testing_set=validation,
                                metric=self.learning_metric)

                            # print('results[testing_value] =', results['testing_value'], ', results[training_value] =', results['training_value'])

                            if self.classification:
                                tmp_valid_training_values_list.append(
                                    (results['testing_accuracy'],
                                     results['training_accuracy']))
                            else:
                                tmp_valid_training_values_list.append(
                                    (results['testing_value'],
                                     results['training_value']))

                        # Calculate average validation value and check if the current value is better than the best one
                        average_validation_value = mean(
                            tmp_valid_training_values_list, axis=0)[0]
                        average_training_value = mean(
                            tmp_valid_training_values_list, axis=0)[1]

                        if self.classification:
                            print(
                                "\t\t\tAverage AUROC training vs. validation: %.3f vs. %.3f"
                                % (average_training_value,
                                   average_validation_value))
                        else:
                            print(
                                "\t\t\tAverage RMSE training vs. validation: %.3f vs. %.3f"
                                % (average_training_value,
                                   average_validation_value))

                        if self.classification:
                            if average_validation_value > best_validation_value:
                                #===============================================
                                # print('\n\t\t\t\t\tClassification: %.3f is better than %.3f\n' % (average_validation_value, best_validation_value))
                                #===============================================
                                best_algorithm = algorithm
                                best_key = key
                                best_configuration = config
                                best_validation_value = average_validation_value
                                best_training_value = average_training_value
                            #===================================================
                            # else:
                            #     print('\n\t\t\t\t\tClassification: %.3f is worse (!) than %.3f\n' % (average_validation_value, best_validation_value))
                            #===================================================
                        else:
                            if is_better(average_validation_value,
                                         best_validation_value,
                                         self.selection_metric):
                                #===============================================
                                # print('\n\t\t\t\t\tRegression: %.3f is better than %.3f\n' % (average_validation_value, best_validation_value))
                                #===============================================
                                best_algorithm = algorithm
                                best_key = key
                                best_configuration = config
                                best_validation_value = average_validation_value
                                best_training_value = average_training_value
                            #===================================================
                            # else:
                            #     print('\n\t\t\t\t\tRegression: %.3f is worse (!) than %.3f\n' % (average_validation_value, best_validation_value))
                            #===================================================

                        # Add configuration and validation error to validation error list.
                        validation_value_list.append(
                            (configuration, average_validation_value))
                    """ all allowed configurations assessed of a given variant/algorithm/method (key) """
                    print(
                        '\n\t\tEvaluating best configuration in outer fold with index',
                        outer_cv)
                    self.results[key][outer_cv] = self._evaluate_algorithm(
                        algorithm=best_algorithm,
                        configurations=best_configuration,
                        training_set=training_outer,
                        validation_set=None,
                        testing_set=testing,
                        metric=self.learning_metric)
                    self.results[key][outer_cv][
                        'best_configuration'] = best_configuration
                    self.results[key][outer_cv][
                        'avg_inner_validation_error'] = best_validation_value
                    self.results[key][outer_cv][
                        'avg_inner_training_error'] = best_training_value
                    if self.classification:
                        self.results[key][outer_cv][
                            'avg_inner_validation_accuracy'] = best_validation_value
                        self.results[key][outer_cv][
                            'avg_inner_training_accuracy'] = best_training_value

                    if self.classification:
                        print(
                            "\n\t\tAUROC training vs. test: %.3f vs. %.3f" %
                            (self.results[key][outer_cv]['training_accuracy'],
                             self.results[key][outer_cv]['testing_accuracy']))
                        #=======================================================
                        # print("\n\t\tAlgorithm %s, AUROC training vs. test: %.3f vs. %.3f" % (key, self.results[key][outer_cv]['training_accuracy'], self.results[key][outer_cv]['testing_accuracy']))
                        #=======================================================
                    else:
                        print("\n\t\tRMSE training vs. test: %.3f vs. %.3f" %
                              (self.results[key][outer_cv]['training_value'],
                               self.results[key][outer_cv]['testing_value']))
                        #=======================================================
                        # print("\n\t\tAlgorithm %s, RMSE training vs. test: %.3f vs. %.3f" % (key, self.results[key][outer_cv]['training_value'], self.results[key][outer_cv]['testing_value']))
                        #=======================================================

                    best_overall_algorithm = best_algorithm
                    best_overall_configuration = best_configuration
                    best_overall_key = best_key

                    self.best_result[outer_cv] = self.results[key][outer_cv]
                    self.best_result[outer_cv][
                        'best_overall_algorithm'] = best_overall_algorithm
                    self.best_result[outer_cv][
                        'best_overall_configuration'] = best_overall_configuration
                    self.best_result[outer_cv][
                        'best_overall_key'] = best_overall_key

                    # # Serialize benchmark
                    # benchmark_to_pickle(self)

            outer_cv += 1

        # Serialize benchmark
        benchmark_to_pickle(self)

        print('Leaving run_nested_cv for dataset:', self.dataset_name)
Exemplo n.º 24
0
 def _calculate_accuracy(self, learner, dataset):
     prediction = learner.predict(get_input_variables(dataset).values)
     target = get_target_variable(dataset).values
     return Accuracy.evaluate(prediction, target.astype(int))
 def test_fit(self):
     X = get_input_variables(self.training).as_matrix()
     y = get_target_variable(self.training).as_matrix()
     self.neat.fit(X, y, Accuracy, verbose=True)
     self.assertTrue(expr=self.neat.champion)