def eval_performance(pretrained_network: str, epochs: int, data_augmentation: bool, prefix: str, overwrite=False, dataset='test') -> float: """ Returns the accuracy of the model. If the model does not exist, trains a new model before computing the accuracy. Args: pretrained_network: pre trained network for bottleneck features. Can be ['vgg16', 'vgg19', 'resnet50', 'inceptionv3', 'xception'] epochs: number of epochs to train the model data_augmentation: use or not data augmentation prefix: prefix to save the overwrite: if True, trains and saves the weight file again, otherwise does nothing dataset: accuracy will be computed on training, validation or test set based on this input Possible options in ['train', 'test', 'valid']. Returns: accuracy in [0, 1] interval """ model_file = paths.get_weights_filename(pretrained_network, prefix, epochs, data_augmentation) if os.path.exists(model_file) and not overwrite: print(f"Model already trained.\nWeights' file at\t{model_file}") files, labels = datasets.load_dataset(dataset=dataset) tensors = list(map(preprocess.path_to_tensor, ct.progr(files))) bottle_file = os.path.join( paths.Folders().bottleneck_features, f'bottleneck_{pretrained_network.lower()}.pkl') if os.path.exists(bottle_file): bottleneck_features = bf.load_bottleneck(dataset=dataset, bottle_file=bottle_file) else: bottleneck_features = bf.extract_bottleneck_features_list( pretrained_network, tensors) model = bn.build_transfer_learning_netwok( input_shape=bottleneck_features[0].shape, n_of_classes=133) model.load_weights(model_file) pred = model.predict(bottleneck_features) acc = metrics.get_accuracy(np.array([np.argmax(x) for x in pred]), np.array([np.argmax(y) for y in labels])) return acc else: print("Model does not exist.\nTraining model") train_transfer_learning_net(pretrained_network, epochs, data_augmentation, prefix) return eval_performance(pretrained_network, epochs, data_augmentation, prefix, dataset=dataset)
def save_best_model() -> None: """ Saves the best model to 'data/saved_models/transfer_learning_model.h5' """ model, _ = load_best_model() model.save( os.path.join(paths.Folders().models, 'transfer_learning_model.h5'))
""" This module contains the functions to handle training, validation and test datasets """ import os import numpy as np import pandas as pd from sklearn.datasets import load_files from keras.utils import np_utils from dog_breed.common import paths folders = paths.Folders() def load_data(path: str): """ Loads file names and their labels (as categorical) Args: path: Returns: dog_files: list of filenames of dog images dog_targets: categorical (one hot encoded) labels for each file in the dog_files list """ print(f'Loading from file: {path}') data = load_files(path) dog_files = np.array(data['filenames']) dog_targets = np_utils.to_categorical(np.array(data['target']), 133) return dog_files, dog_targets def _load_training(training_folder=folders.training_data):
This module contains functions and methods to an initial data analysis. """ import pandas as pd import numpy as np from sklearn.datasets import load_files import matplotlib.pyplot as plt import seaborn as sns from dog_breed.data import datasets from dog_breed.common import paths from dog_breed.common import graph plt.style.use('seaborn') DOG_NAMES = datasets.get_dog_names() FOLDERS = paths.Folders() def _get_labels_from_data(data) -> pd.Series: unique, counts = np.unique(data['target'], return_counts=True) labels_set = pd.Series(index=[DOG_NAMES[u] for u in unique], data=counts) return labels_set.sort_values(ascending=False) def _get_labels_from_folder(folder) -> pd.Series: data = load_files(folder) return _get_labels_from_data(data) def get_train_labels(folder=FOLDERS.training_data) -> pd.Series: return _get_labels_from_folder(folder)
return pd.DataFrame() else: for i in saved_models_df.index: epochs = int(saved_models_df.loc[i, 'epochs']) data_augmentation = saved_models_df.loc[i, 'augmented'] saved_models_df.loc[i, 'test_accuracy'] = eval_cnn( epochs=epochs, data_augmentation=data_augmentation, dataset='test', ) return saved_models_df.sort_values(by='test_accuracy', ascending=False) if __name__ == '__main__': defaults = { 'models_folder': paths.Folders().models, 'report_folder': paths.Folders().models, } parser = argparse.ArgumentParser( description= 'Evaluates the performance (in terms of test accuracy) of the saved models.' ) parser.add_argument( '-m', '--models_folder', default=defaults['models_folder'], help= f"Folder containing the weights' (.hdf5) files (default: {defaults['models_folder']})", )
def train_transfer_learning_net(pretrained_network: str, epochs: int, data_augmentation: bool, prefix: str, overwrite=0) -> None: """ Computes bottleneck features for training and validation data and trains the Sequential network. Weights will be saved in 'data/saved_models/' with name if data augmentation is used -> <prefix>_<epochs>_A_weight.best.<pre_trained_net>.hdf5 else -> <prefix>_<epochs>_weight.best.<pre_trained_net>.hdf5 Args: pretrained_network: pre trained network for bottleneck features. Can be ['vgg16', 'vgg19', 'resnet50', 'inceptionv3', 'xception'] epochs: number of epochs to train the model data_augmentation: use or not data augmentation prefix: prefix to save the overwrite: if True, trains and saves the weight file again, otherwise does nothing Returns: None, but weights will be saved is the preselected location """ model_file = paths.get_weights_filename(pretrained_network, prefix, epochs, data_augmentation) if os.path.exists(model_file) and not overwrite: print(f"Model already trained.\nWeights' file at\t{model_file}") else: args_train = { 'data_augmentation': data_augmentation, 'epochs': epochs, 'prefix': prefix, 'overwrite': overwrite, 'bottleneck_network': pretrained_network, } # load train/validation file names and labels train_files, y_train = datasets.load_dataset(dataset='train') valid_files, y_valid = datasets.load_dataset(dataset='valid') # compute tensors tensors_train = list( map(preprocess.path_to_tensor, ct.progr(train_files))) tensors_valid = list( map(preprocess.path_to_tensor, ct.progr(valid_files))) # compute bottleneck features bottle_file = os.path.join( paths.Folders().bottleneck_features, f'bottleneck_{pretrained_network.lower()}.pkl') if os.path.exists(bottle_file): bottleneck_train, bottleneck_valid, _ = pickle.load( open(bottle_file, 'rb')) else: bottleneck_train = bf.extract_bottleneck_features_list( pretrained_network, tensors_train) bottleneck_valid = bf.extract_bottleneck_features_list( pretrained_network, tensors_valid) n_of_classes = len(datasets.get_dog_names()) model = bn.build_transfer_learning_netwok( input_shape=bottleneck_train[0].shape, n_of_classes=n_of_classes) # _ = trainAndPredict.train_network_tl(network=model, # training_data=bottleneck_train, training_target=y_train, # validation_data=bottleneck_valid, validation_target=y_valid, # **args_train, # ) train_and_predict_tl.train_network_tl( network=model, **args_train, training_set=(bottleneck_train, y_train), validation_set=(bottleneck_valid, y_valid), ) model_file = paths.get_weights_filename(pretrained_network, prefix, epochs, data_augmentation) print(f"Weights saved at\t{model_file}")