Exemplo n.º 1
0
    def predict(self, F, datainfo, timeinfo):
        print('\nFile: {} Class: {} Function: {} State: {}'.format(
            'architectures.py', 'OriginalEnsemble', 'predict', 'Start'))

        info = extract(datainfo, timeinfo)
        self._info.update(info)
        print_time_info(self._info)

        test_data = get_data(F, self._info)
        print('test_data.shape: {}'.format(test_data.shape))

        transformed_test_data = self._transform(test_data, DataType.TEST)
        train_data = self._transform(self._train_data, DataType.TRAIN)

        train_labels = self._train_labels
        print('transformed_test_data.shape: {}'.format(
            transformed_test_data.shape))
        print('train_data.shape: {}'.format(train_data.shape))

        size = len(train_data) if len(transformed_test_data) > len(
            train_data) else len(transformed_test_data)
        train_weights = correct_covariate_shift(
            train_data, self._test_sampler.sample(transformed_test_data, size),
            self._random_state, self._correction_threshold,
            self._correction_n_splits) if self._should_correct else None

        fixed_hyperparameters, search_space = Profile.parse_profile(
            self._profile)
        if self._best_hyperparameters is None:
            tuner = HyperparametersTuner(fixed_hyperparameters, search_space,
                                         self._max_evaluations)
            self._best_hyperparameters = tuner.get_best_hyperparameters(
                train_data, train_labels, self._validation_ratio,
                self._random_state)
            print('self._best_hyperparameters: {}'.format(
                self._best_hyperparameters))

        if has_sufficient_time(self._dataset_budget_threshold, self._info):
            t_d, validation_data, t_l, validation_labels = train_test_split(
                train_data,
                train_labels,
                test_size=self._validation_ratio,
                random_state=self._random_state,
                shuffle=True,
                stratify=train_labels)

            self._classifier = LGBMClassifier()
            self._classifier.set_params(**self._best_hyperparameters)
            self._classifier.fit(train_data,
                                 train_labels,
                                 sample_weight=train_weights)

        else:
            print('Time budget exceeded.')

        predictions = self._classifier.predict_proba(transformed_test_data)[:,
                                                                            1]
        self._iteration += 1
        print('predictions.shape: {}'.format(predictions.shape))
        print('File: {} Class: {} Function: {} State: {} \n'.format(
            'architectures.py', 'OriginalEnsemble', 'predict', 'End'))
        return predictions
Exemplo n.º 2
0
    def predict(self, F, datainfo, timeinfo):
        print('\nFile: {} Class: {} Function: {} State: {}'.format(
            'architectures.py', 'OriginalEnsemble', 'predict', 'Start'))

        info = extract(datainfo, timeinfo)
        self._info.update(info)
        print_time_info(self._info)

        test_data = get_data(F, self._info)
        print('test_data.shape: {}'.format(test_data.shape))

        transformed_test_data = self._transform(test_data, DataType.TEST)
        train_data = self._transform(self._train_data, DataType.TRAIN)

        train_labels = self._train_labels
        print('transformed_test_data.shape: {}'.format(
            transformed_test_data.shape))
        print('train_data.shape: {}'.format(train_data.shape))

        size = len(train_data) if len(transformed_test_data) > len(
            train_data) else len(transformed_test_data)
        train_weights = correct_covariate_shift(
            train_data, self._test_sampler.sample(transformed_test_data, size),
            self._random_state, self._correction_threshold,
            self._correction_n_splits) if self._should_correct else None

        fixed_hyperparameters, search_space = Profile.parse_profile(
            self._profile)
        if self._best_hyperparameters is None:
            tuner = HyperparametersTuner(fixed_hyperparameters, search_space,
                                         self._max_evaluations)
            self._best_hyperparameters = tuner.get_best_hyperparameters(
                train_data, train_labels, self._validation_ratio,
                self._random_state)
            print('self._best_hyperparameters: {}'.format(
                self._best_hyperparameters))

        if has_sufficient_time(self._dataset_budget_threshold,
                               self._info) or len(self._classifiers) == 0:
            t_d, validation_data, t_l, validation_labels = train_test_split(
                train_data,
                train_labels,
                test_size=self._validation_ratio,
                random_state=self._random_state,
                shuffle=True,
                stratify=train_labels)
            new_classifier = LGBMClassifier()
            new_classifier.set_params(**self._best_hyperparameters)
            new_classifier.fit(train_data,
                               train_labels,
                               sample_weight=train_weights)

            new_predictions = new_classifier.predict_proba(validation_data)[:,
                                                                            1]
            new_weight = compute_weight(new_predictions, validation_labels,
                                        self._epsilon)

            self._ensemble_weights = np.array([])
            for i in range(len(self._classifiers)):
                currrent_classifier = self._classifiers[i]
                currrent_classifier_predictions = currrent_classifier.predict_proba(
                    validation_data)[:, 1]
                currrent_classifier_weight = compute_weight(
                    currrent_classifier_predictions, validation_labels,
                    self._epsilon)
                self._ensemble_weights = np.append(self._ensemble_weights,
                                                   currrent_classifier_weight)

            self._classifiers = np.append(self._classifiers, new_classifier)
            self._ensemble_weights = np.append(self._ensemble_weights,
                                               new_weight)
            print('self._ensemble_weights: {}'.format(self._ensemble_weights))

            if len(self._classifiers) > self._ensemble_size:
                i = remove_worst_classifier(self._classifiers, validation_data,
                                            validation_labels)
                print('Removed classifier: {}'.format(i))
                self._classifiers = np.delete(self._classifiers, i)
                self._ensemble_weights = np.delete(self._ensemble_weights, i)
        else:
            print('Time budget exceeded.')

        if len(self._classifiers) == 1:
            predictions = self._classifiers[0].predict_proba(
                transformed_test_data)[:, 1]
        else:
            predictions = np.zeros(len(transformed_test_data))
            for i in range(len(self._classifiers)):
                predictions = np.add(
                    predictions, self._ensemble_weights[i] *
                    self._classifiers[i].predict_proba(
                        transformed_test_data)[:, 1])
            predictions = np.divide(predictions,
                                    np.sum(self._ensemble_weights))
        self._iteration += 1
        print('predictions.shape: {}'.format(predictions.shape))
        print('File: {} Class: {} Function: {} State: {} \n'.format(
            'architectures.py', 'OriginalEnsemble', 'predict', 'End'))
        return predictions