def test_model_analysis(self): num_files = 1 data_tups = catalogue._data_tuples_from_fnames(input_path=data_path) data_storage = data_path + 'test_1.pkl' catalogue.rgb_list(data_tups, storage_location=data_storage) plot_tups = handling.pickled_data_loader(data_path, 'test_1') test_set_filenames = preprocessing.hold_out_test_set( data_path, number_of_files_per_class=num_files) test_set_list, learning_set_list = catalogue.data_set_split( plot_tups, test_set_filenames) train, val = catalogue.learning_set(image_list=learning_set_list, split=split, classes=['noise', 'one'], iterator_mode='arrays') testing_set = catalogue.test_set(image_list=test_set_list, classes=['noise', 'one'], iterator_mode='arrays') model, history = cnn.build_model(train, val, config_path='./hardy/test/') result = reporting.model_analysis(model, testing_set, test_set_list) assert isinstance(result, pd.DataFrame)
def test_report_on_metrics(self): train, val = learning_set(path, split=split, classes=classes, iterator_mode=None) testing = test_set(path, batch_size=batch_size, classes=classes, iterator_mode=None) model, history = cnn.build_model(train, val, config_path='./hardy/test/') conf_matrix, report = cnn.report_on_metrics( model, testing, target_names=['noisy', 'not_noisy']) assert isinstance(conf_matrix, np.ndarray), \ 'the confusion matrix should be contained in a numpy array' assert isinstance(report, str), 'the report should be a string'
def test_evaluate_model(self): # define the sets and the model to use for the rest of the testing train, val = learning_set(path, split=split, classes=classes, iterator_mode=None) testing = test_set(path, batch_size=batch_size, classes=classes, iterator_mode=None) model, history = cnn.build_model(train, val, config_path='./hardy/test/') results = cnn.evaluate_model(model, testing) assert isinstance(results, list), \ 'model performance should be store in a list' assert results[1] <= 1,\ 'the accuracy should be a number smaller than 1'
def test_build_model(self): train, val = learning_set(path, split=split, classes=classes, iterator_mode=None) model, history = cnn.build_model(train, val, config_path='./hardy/test/') assert isinstance(train, keras.preprocessing.image.DirectoryIterator),\ 'the training set should be an image iterator type of object' assert isinstance(val, keras.preprocessing.image.DirectoryIterator),\ 'the validation set should be an image iterator type of object' assert isinstance(model, keras.engine.sequential.Sequential),\ 'the CNN model should be a keras sequential model' assert isinstance(history, keras.callbacks.callbacks.History), \ 'the history should be the output of a allback function'
def test_plot_history(self): train, val = learning_set(path, split=split, classes=classes, iterator_mode=None) model, history = cnn.build_model(train, val, config_path='./hardy/test/') _, ax = plt.subplots(1, 2) ax = cnn.plot_history(history) epochs, loss = ax[0].lines[0].get_xydata().T assert (loss == history.history['loss']).all(), \ 'the plot should containg the loss value per epoch' epochs, acc = ax[1].lines[0].get_xydata().T assert (acc == history.history['accuracy']).all(), \ 'the plot should contain the accuracy value epoch' pass
def test_save_load_model(self): train, val = learning_set(path, split=split, classes=classes, iterator_mode=None) model, history = cnn.build_model(train, val, config_path='./hardy/test/') saved_model = cnn.save_load_model( model=model, save=True, filepath='./hardy/test/model') assert saved_model == 'the model was correctly saved' model_loaded = cnn.save_load_model( load=True, filepath='./hardy/test/model') assert model_loaded, 'the model was not correctly loaded' # delete the model file after testing os.remove('./hardy/test/model') print('the saved model was correctly deleted after testing') pass
def test_report_generation(self): config_path = './hardy/test/' train, val = learning_set(path, split=split, classes=classes, iterator_mode=None) model, history = cnn.build_model(train, val, config_path=config_path) metrics = cnn.evaluate_model(model, val) log_dir = './hardy/test/temp_report/' tuner.report_generation(model, history, metrics, log_dir, save_model=False, config_path=config_path) report_dir = log_dir + 'report/' report_location = os.listdir(report_dir) for item in report_location: if item.endswith('.yaml'): with open(report_dir + item, 'r') as file: report = yaml.load(file, Loader=yaml.FullLoader) assert isinstance(report, dict),\ 'The filetype returned in not a dictionary' # remove report files after checking they were # correctly created # for item in report_location: # if item.endswith('.yaml'): # os.remove(report_dir+item) shutil.rmtree(log_dir)
def classifier_wrapper(input_path, test_set_filenames, run_name, config_path, classifier='tuner', iterator_mode='arrays', split=0.1, target_size=(80, 80), color_mode='rgb', batch_size=32, image_path=None, classes=['class_1', 'class_2'], project_name='tuner_run', k_fold=False, k=None, **kwarg): ''' Single "Universal" Wrapping function to setup and run the CNN and Tuner on any properly labeled image set. Operates in either of two formats: "arrays" : Takes data as "List_of_Image_Tuples" "else" : Takes data as "image_path" of sorted image folders Parameters: ----------- input_datapath : str path to the raw .csv files containing the data to classify test_set_filenames : list The list containig the strings of filenames randomly selected to be part of the test set. config_path : str string containing the path to the yaml file representing the classifier hyperparameters run_name : str name use to create a folder for storing the results of this run iterator_mode : str option to use images from arrays directly or save the .png and use a directory iterator mode plot_format : str option for standard or RGB color gradient print_out : bool option for printing out feedback on conputational time taken to initialize the data and generate the images num_test_files_class : int or float numebr of files per class to select for the test set classifier : str option cnn or tuner split : float the percentage of the learning set to use for the validation step target_size : tuple image target size. Presented as a tuble indicating number of pixels composing the two dimensions of the image (w x h) batch_size : int The number of files to group up into a batch classes : list A list containing strings of the classes the data is divided in. The class name represent the folder name the files are contained in. project_name : str name of the folder to be created for storing the results of the tuning ''' if iterator_mode == 'arrays': # loading pickled data image_data = handling.pickled_data_loader(input_path, run_name) assert image_data, 'No image_data list provided' test_set_list, learning_set_list = to_catalogue.data_set_split( image_data, test_set_filenames) if k_fold: test_set = to_catalogue.test_set(image_list=test_set_list, target_size=target_size, classes=classes, color_mode=color_mode, iterator_mode='arrays', batch_size=batch_size) else: training_set, validation_set = to_catalogue.learning_set( image_list=learning_set_list, split=split, classes=classes, target_size=target_size, iterator_mode='arrays', batch_size=batch_size, color_mode=color_mode) test_set = to_catalogue.test_set(image_list=test_set_list, target_size=target_size, classes=classes, color_mode=color_mode, iterator_mode='arrays', batch_size=batch_size) else: assert image_path, 'no path to the image folders was provided' training_set, validation_set = to_catalogue.learning_set( image_path, split=split, target_size=target_size, iterator_mode='from_directory', batch_size=batch_size, classes=classes) test_set = to_catalogue.test_set(image_path, target_size=target_size, classes=classes, iterator_mode='from_directory', batch_size=batch_size) if k_fold: print('test set : {} batches of {} files'.format( len(test_set), batch_size)) else: print('training set : {} batches of {} files'.format( len(training_set), batch_size)) print('validation set : {} batches of {} files'.format( len(validation_set), batch_size)) print('test set : {} batches of {} files'.format( len(test_set), batch_size)) if classifier == 'tuner': # warn search_function, 'no search function provided, # using default RandomSearch' tuner.build_param(config_path) output_path = preprocessing.save_to_folder(input_path, project_name, run_name) tuned_model = tuner.run_tuner(training_set, validation_set, project_name=output_path) model, history, metrics = tuner.best_model(tuned_model, training_set, validation_set, test_set) conf_matrix, report = cnn.report_on_metrics(model, test_set) tuner.report_generation(model, history, metrics, output_path, tuner=tuned_model, save_model=True) else: if k_fold: assert k, 'the number of folds needs to be provided' validation_score, model, history, final_score = \ cnn.k_fold_model(k, config_path=config_path, target_size=target_size, classes=classes, batch_size=batch_size, color_mode=color_mode, iterator_mode=iterator_mode, image_list=learning_set_list, test_set=test_set) output_path = preprocessing.save_to_folder(input_path, project_name, run_name) conf_matrix, report = cnn.report_on_metrics(model, test_set) tuner.report_generation(model, history, final_score, output_path, tuner=None, save_model=True, config_path=config_path, k_fold=k_fold, k=k) else: model, history = cnn.build_model(training_set, validation_set, config_path=config_path) metrics = cnn.evaluate_model(model, test_set) output_path = preprocessing.save_to_folder(input_path, project_name, run_name) conf_matrix, report = cnn.report_on_metrics(model, test_set) tuner.report_generation(model, history, metrics, output_path, tuner=None, save_model=True, config_path=config_path) if iterator_mode == 'arrays': performance_evaluation = reporting.model_analysis( model, test_set, test_set_list) performance_evaluation.to_csv(output_path + 'report/model_evaluation.csv') else: performance_evaluation = reporting.model_analysis(model, test_set) performance_evaluation.to_csv(output_path + 'report/model_evaluation.csv') return