def create_and_save(self):
        """
        Create and save the dataset using the arguments passed in from the console.  This will shuffle the validation
        data, create a classifier, and select the correct classifications that are seen.  The resulting list
        of images that are correctly classified is then saved to an NPY file.
        :return:
        """
        parser = argparse.ArgumentParser()
        parser.add_argument('-rs',
                            '--random-state',
                            default=1024,
                            help='Random state for the shuffling.')
        parser.add_argument('-n',
                            '--number',
                            default=100,
                            help='Number of correct classifications to store.')
        arguments = vars(parser.parse_args())

        random_state = int(arguments['random_state'])
        number = int(arguments['number'])

        print('')
        print('Starting %s Image and Label Extraction...' % self.name)
        print('')
        print('Parameters:')
        print('    Random State: %s' % random_state)
        print('          Number: %s' % number)
        print('')

        use_project_path()

        # Load and shuffle the validation dataset
        image_df = pd.read_csv('data/full_image_dataset.csv')
        image_df = image_df.sample(
            frac=1.0, random_state=random_state).reset_index(drop=True)

        # Create the CNN to be tested
        model = self.network(weights='imagenet')

        accumulator = list()

        # Step through the dataframe and keep all images correctly classified.
        for index, row in image_df.iterrows():
            scaled_image = image.img_to_array(
                image.load_img(row['image'], target_size=(224, 224)))
            input = self.preprocessor(
                np.expand_dims(scaled_image.copy(), axis=0))
            raw_predictions = model.predict(input)
            if int(self.class_index[row['label']]) == int(
                    np.argmax(raw_predictions)):
                accumulator.append(
                    [scaled_image,
                     int(self.class_index[row['label']])])
            if len(accumulator) >= number:
                break

        np.save('data/%s_%s_correct.npy' % (self.name, number),
                np.array(accumulator))
예제 #2
0
from keras.applications.vgg19 import VGG19
from keras.applications.vgg19 import preprocess_input as vgg19_preprocess_input
from keras.applications.densenet import DenseNet201
from keras.applications.densenet import preprocess_input as densenet_preprocess_input
from keras.applications.resnet_v2 import ResNet152V2
from keras.applications.resnet_v2 import preprocess_input as resnet_preprocess_input
import numpy as np

from filter import FourierUniformFilter
from utility import OptimizationSearch
from utility import use_project_path
from utility import save_filter_search_scores


if __name__ == '__main__':
    use_project_path()

    parser = argparse.ArgumentParser()
    parser.add_argument('-n', '--network', default='vgg16',
                        help='The network architecture to test: vgg16, vgg19, densenet, or resnet')
    parser.add_argument('-s', '--sample', default=100,
                        help='The sample file to use for testing.')
    parser.add_argument('-i', '--iterations', default=25,
                        help='The number of iterations to use for testing.')
    arguments = vars(parser.parse_args())

    NETWORK = arguments['network']
    SAMPLE = arguments['sample']
    ITERATIONS = int(arguments['iterations'])

    print('')
def load_data_frame():
    use_project_path()
    data_frame = pd.read_csv(data_filename, dtype='float')
    return data_frame
예제 #4
0
    def run_classification_search_experiment(
            self,
            scoring,
            sample=None,
            random_state=None,
            test_size=0.25,
            n_jobs=-1,
            n_iter=2,
            cv=5,
            verbose=3,
            multiclass=False,
            record_predict_proba=False):
        """
        The classification search makes use of a bayesian search to find the best hyper-parameters.
        """
        use_project_path()

        logger = Logger('%s.txt' % self.name)

        search = BayesSearchCV(
            self.estimator,
            self.hyper_parameters.search_space,
            n_jobs=n_jobs,
            n_iter=n_iter,
            cv=cv,
            verbose=verbose,
            scoring=scoring,
            return_train_score=True
        )

        data_frame = self.df

        if sample is not None:
            data_frame = data_frame.sample(n=sample, random_state=random_state)

        x_train, x_test, y_train, y_test = train_test_split(data_frame, data_frame[self.target], test_size=test_size)

        logger.time_log('Starting HyperParameter Search...')
        results = search.fit(x_train, y_train)
        logger.time_log('Search Complete.\n')

        logger.time_log('Testing Training Partition...')
        y_train_predict = batch_predict(results.best_estimator_, x_train)
        logger.time_log('Testing Complete.\n')

        train_evaluation_frame = EvaluationFrame(y_train, y_train_predict)

        logger.time_log('Testing Holdout Partition...')
        y_test_predict = batch_predict(results.best_estimator_, x_test)
        logger.time_log('Testing Complete.\n')

        test_evaluation_frame = EvaluationFrame(y_test, y_test_predict)
        test_evaluation_frame.save('%s_predict.p' % self.name)

        test_proba_evaluation_frame = None
        if record_predict_proba:
            logger.time_log('Testing Holdout Partition (probability)...')
            y_test_predict_proba = batch_predict_proba(results.best_estimator_, x_test)
            test_proba_evaluation_frame = EvaluationFrame(y_test, y_test_predict_proba)
            test_proba_evaluation_frame.save('%s_predict_proba.p' % self.name)
            logger.time_log('Testing Complete.\n')

        evaluator = Evaluator(logger)
        evaluator.evaluate_classifier_result(
            results,
            test_evaluation_frame,
            train=train_evaluation_frame,
            test_proba=test_proba_evaluation_frame,
            multiclass=multiclass
        )

        logger.close()

        self.hyper_parameters.params = results.best_params_
        self.hyper_parameters.save('%s_params.p' % self.name)

        self.trained_estimator = results.best_estimator_
예제 #5
0
    def run_classification_experiment(
            self,
            sample=None,
            random_state=None,
            test_size=0.25,
            multiclass=False,
            record_predict_proba=False):
        """
        Running a classification experiment is used when only a single model run and fit is necessary.
        """
        use_project_path()

        logger = Logger('%s.txt' % self.name)

        data_frame = self.df

        if sample is not None:
            data_frame = data_frame.sample(n=sample, random_state=random_state)

        x_train, x_test, y_train, y_test = train_test_split(data_frame, data_frame[self.target], test_size=test_size)

        if self.hyper_parameters is not None:
            self.estimator.set_params(**self.hyper_parameters.params)

        logger.time_log('Training Model...')
        self.estimator.fit(x_train, y_train)
        logger.time_log('Training Complete.\n')

        logger.time_log('Testing Training Partition...')
        y_train_predict = batch_predict(self.estimator, x_train)
        logger.time_log('Testing Complete.\n')

        train_evaluation_frame = EvaluationFrame(y_train, y_train_predict)

        logger.time_log('Testing Holdout Partition...')
        y_test_predict = batch_predict(self.estimator, x_test)
        logger.time_log('Testing Complete.\n')

        test_evaluation_frame = EvaluationFrame(y_test, y_test_predict)
        test_evaluation_frame.save('%s_predict.p' % self.name)

        test_proba_evaluation_frame = None
        if record_predict_proba:
            logger.time_log('Testing Holdout Partition (probability)...')
            y_test_predict_proba = batch_predict_proba(self.estimator, x_test)
            test_proba_evaluation_frame = EvaluationFrame(y_test, y_test_predict_proba)
            test_proba_evaluation_frame.save('%s_predict_proba.p' % self.name)
            logger.time_log('Testing Complete.\n')

        evaluator = Evaluator(logger)
        evaluator.evaluate_classifier_result(
            self.estimator,
            test_evaluation_frame,
            train=train_evaluation_frame,
            test_proba=test_proba_evaluation_frame,
            multiclass=multiclass
        )

        logger.close()

        if self.hyper_parameters is not None:
            self.hyper_parameters.save('%s_params.p' % self.name)
예제 #6
0
    def run_classification_experiment(self,
                                      sample=None,
                                      random_state=None,
                                      test_size=0.20,
                                      multiclass=False,
                                      record_predict_proba=False,
                                      sampling=None,
                                      cv=5,
                                      verbose=True,
                                      transformer=None,
                                      fit_increment=None,
                                      warm_start=False,
                                      max_iters=None,
                                      n_jobs=-1):
        use_project_path()

        logger = Logger('%s.txt' % self.name)
        evaluator = Evaluator(logger)

        data_frame = self.df

        if sample is not None:
            data_frame = data_frame.sample(n=sample, random_state=random_state)

        x_train, x_test, y_train, y_test = train_test_split(
            data_frame, data_frame[self.target], test_size=test_size)

        if transformer is not None:
            logger.time_log('Fitting Transformer...')
            transformer.fit(x_train)
            logger.time_log('Transformer Fit Complete.\n')

        if sampling is not None:
            logger.time_log('Starting Data Re-Sampling...')
            logger.log('Original Training Shape is %s' % Counter(y_train))
            x_new, y_new = sampling.fit_resample(x_train, y_train)
            logger.log('Balanced Training Shape is %s' % Counter(y_new))
            if hasattr(x_train, 'columns'):
                x_new = pd.DataFrame(x_new, columns=x_train.columns)
            x_train, y_train = x_new, y_new
            logger.time_log('Re-Sampling Complete.\n')
            logger.time_log('Shuffling Re-Sampled Data.\n')
            x_train, y_train = shuffle(x_train,
                                       y_train,
                                       random_state=random_state)
            logger.time_log('Shuffling Complete.\n')

        if self.hyper_parameters is not None:
            self.estimator.set_params(**self.hyper_parameters.params)

        if cv is not None:
            kfold = StratifiedKFold(n_splits=cv, random_state=random_state)
            logger.time_log('Cross Validating Model...')
            fold_scores = Parallel(n_jobs=n_jobs, verbose=3)(
                delayed(crossfold_classifier)
                (clone(self.estimator), transformer, x_train, y_train,
                 train_index, test_index, record_predict_proba, verbose,
                 fit_increment, warm_start, max_iters, random_state)
                for train_index, test_index in kfold.split(x_train, y_train))
            logger.time_log('Cross Validation Complete.\n')

        logger.time_log('Training Model...')
        if fit_increment is not None:
            if max_iters is not None:
                for iter in range(max_iters):
                    x_iter_train, y_iter_train = shuffle(
                        x_train, y_train, random_state=random_state)
                    batch_fit_classifier(self.estimator,
                                         x_iter_train,
                                         y_iter_train,
                                         transformer=transformer,
                                         increment=fit_increment,
                                         verbose=verbose)
            else:
                batch_fit_classifier(self.estimator,
                                     x_train,
                                     y_train,
                                     transformer=transformer,
                                     increment=fit_increment,
                                     verbose=verbose)
        else:
            if transformer is not None:
                x_train_transformed = transformer.transform(x_train)
                self.estimator.fit(x_train_transformed, y_train)
            else:
                self.estimator.fit(x_train, y_train)
        logger.time_log('Training Complete.\n')

        logger.time_log('Testing Training Partition...')
        y_train_predict = batch_predict(self.estimator,
                                        x_train,
                                        transformer=transformer,
                                        verbose=verbose)
        logger.time_log('Testing Complete.\n')

        train_evaluation_frame = EvaluationFrame(y_train, y_train_predict)

        logger.time_log('Testing Holdout Partition...')
        y_test_predict = batch_predict(self.estimator,
                                       x_test,
                                       transformer=transformer,
                                       verbose=verbose)
        logger.time_log('Testing Complete.\n')

        test_evaluation_frame = EvaluationFrame(y_test, y_test_predict)
        test_evaluation_frame.save('%s_predict.p' % self.name)

        test_proba_evaluation_frame = None
        if record_predict_proba:
            logger.time_log('Testing Holdout Partition (probability)...')
            y_test_predict_proba = batch_predict_proba(self.estimator,
                                                       x_test,
                                                       transformer=transformer,
                                                       verbose=verbose)
            test_proba_evaluation_frame = EvaluationFrame(
                y_test, y_test_predict_proba)
            test_proba_evaluation_frame.save('%s_predict_proba.p' % self.name)
            logger.time_log('Testing Complete.\n')

        if cv is not None:
            evaluator.evaluate_fold_scores(fold_scores)

        evaluator.evaluate_classifier_result(
            self.estimator,
            test_evaluation_frame,
            train=train_evaluation_frame,
            test_proba=test_proba_evaluation_frame,
            multiclass=multiclass)

        logger.close()

        if self.hyper_parameters is not None:
            self.hyper_parameters.save('%s_params.p' % self.name)

        self.trained_estimator = self.estimator
예제 #7
0
    def run_classification_search_experiment(self,
                                             scoring,
                                             sample=None,
                                             random_state=None,
                                             test_size=0.20,
                                             n_jobs=-1,
                                             n_iter=2,
                                             cv=5,
                                             verbose=3,
                                             multiclass=False,
                                             record_predict_proba=False,
                                             sampling=None):
        use_project_path()

        logger = Logger('%s.txt' % self.name)

        search = BayesSearchCV(self.estimator,
                               self.hyper_parameters.search_space,
                               n_jobs=n_jobs,
                               n_iter=n_iter,
                               cv=cv,
                               verbose=verbose,
                               scoring=scoring,
                               return_train_score=True)

        data_frame = self.df

        if sample is not None:
            data_frame = data_frame.sample(n=sample, random_state=random_state)

        x_train, x_test, y_train, y_test = train_test_split(
            data_frame, data_frame[self.target], test_size=test_size)

        if sampling is not None:
            logger.time_log('Starting Data Re-Sampling...')
            logger.log('Original Training Shape is %s' % Counter(y_train))
            x_new, y_new = sampling.fit_resample(x_train, y_train)
            logger.log('Balanced Training Shape is %s' % Counter(y_new))
            if hasattr(x_train, 'columns'):
                x_new = pd.DataFrame(x_new, columns=x_train.columns)
            x_train, y_train = x_new, y_new
            logger.time_log('Re-Sampling Complete.\n')
            logger.time_log('Shuffling Re-Sampled Data.\n')
            x_train, y_train = shuffle(x_train,
                                       y_train,
                                       random_state=random_state)
            logger.time_log('Shuffling Complete.\n')

        logger.time_log('Starting HyperParameter Search...')
        results = search.fit(x_train, y_train)
        logger.time_log('Search Complete.\n')

        logger.time_log('Testing Training Partition...')
        y_train_predict = batch_predict(results.best_estimator_, x_train)
        logger.time_log('Testing Complete.\n')

        train_evaluation_frame = EvaluationFrame(y_train, y_train_predict)

        logger.time_log('Testing Holdout Partition...')
        y_test_predict = batch_predict(results.best_estimator_, x_test)
        logger.time_log('Testing Complete.\n')

        test_evaluation_frame = EvaluationFrame(y_test, y_test_predict)
        test_evaluation_frame.save('%s_predict.p' % self.name)

        test_proba_evaluation_frame = None
        if record_predict_proba:
            logger.time_log('Testing Holdout Partition (probability)...')
            y_test_predict_proba = batch_predict_proba(results.best_estimator_,
                                                       x_test)
            test_proba_evaluation_frame = EvaluationFrame(
                y_test, y_test_predict_proba)
            test_proba_evaluation_frame.save('%s_predict_proba.p' % self.name)
            logger.time_log('Testing Complete.\n')

        evaluator = Evaluator(logger)
        evaluator.evaluate_classifier_result(
            results,
            test_evaluation_frame,
            train=train_evaluation_frame,
            test_proba=test_proba_evaluation_frame,
            multiclass=multiclass)

        logger.close()

        self.hyper_parameters.params = results.best_params_
        self.hyper_parameters.save('%s_params.p' % self.name)

        self.trained_estimator = results.best_estimator_