Пример #1
0
def eval_performance(pretrained_network: str,
                     epochs: int,
                     data_augmentation: bool,
                     prefix: str,
                     overwrite=False,
                     dataset='test') -> float:
    """ Returns the accuracy of the model.
    If the model does not exist, trains a new model before computing the accuracy.
    Args:
        pretrained_network: pre trained network for bottleneck features.
                            Can be ['vgg16', 'vgg19', 'resnet50', 'inceptionv3', 'xception']
        epochs: number of epochs to train the model
        data_augmentation: use or not data augmentation
        prefix: prefix to save the
        overwrite: if True, trains and saves the weight file again, otherwise does nothing
        dataset: accuracy will be computed on training, validation or test set based on this input
                 Possible options in ['train', 'test', 'valid'].
    Returns:
        accuracy in [0, 1] interval
    """
    model_file = paths.get_weights_filename(pretrained_network, prefix, epochs,
                                            data_augmentation)
    if os.path.exists(model_file) and not overwrite:
        print(f"Model already trained.\nWeights' file at\t{model_file}")
        files, labels = datasets.load_dataset(dataset=dataset)
        tensors = list(map(preprocess.path_to_tensor, ct.progr(files)))

        bottle_file = os.path.join(
            paths.Folders().bottleneck_features,
            f'bottleneck_{pretrained_network.lower()}.pkl')
        if os.path.exists(bottle_file):
            bottleneck_features = bf.load_bottleneck(dataset=dataset,
                                                     bottle_file=bottle_file)
        else:
            bottleneck_features = bf.extract_bottleneck_features_list(
                pretrained_network, tensors)

        model = bn.build_transfer_learning_netwok(
            input_shape=bottleneck_features[0].shape, n_of_classes=133)
        model.load_weights(model_file)
        pred = model.predict(bottleneck_features)
        acc = metrics.get_accuracy(np.array([np.argmax(x) for x in pred]),
                                   np.array([np.argmax(y) for y in labels]))
        return acc
    else:
        print("Model does not exist.\nTraining model")
        train_transfer_learning_net(pretrained_network, epochs,
                                    data_augmentation, prefix)
        return eval_performance(pretrained_network,
                                epochs,
                                data_augmentation,
                                prefix,
                                dataset=dataset)
Пример #2
0
def save_best_model() -> None:
    """    Saves the best model to 'data/saved_models/transfer_learning_model.h5'    """
    model, _ = load_best_model()
    model.save(
        os.path.join(paths.Folders().models, 'transfer_learning_model.h5'))
Пример #3
0
"""
This module contains the functions to handle training, validation and test datasets
"""
import os
import numpy as np
import pandas as pd

from sklearn.datasets import load_files
from keras.utils import np_utils

from dog_breed.common import paths

folders = paths.Folders()


def load_data(path: str):
    """ Loads file names and their labels (as categorical)
    Args:
        path:
    Returns:
        dog_files: list of filenames of dog images
        dog_targets: categorical (one hot encoded) labels for each file in the dog_files list
    """
    print(f'Loading from file: {path}')
    data = load_files(path)
    dog_files = np.array(data['filenames'])
    dog_targets = np_utils.to_categorical(np.array(data['target']), 133)
    return dog_files, dog_targets


def _load_training(training_folder=folders.training_data):
Пример #4
0
This module contains functions and methods to an initial data analysis.
"""
import pandas as pd
import numpy as np
from sklearn.datasets import load_files
import matplotlib.pyplot as plt
import seaborn as sns

from dog_breed.data import datasets
from dog_breed.common import paths
from dog_breed.common import graph

plt.style.use('seaborn')

DOG_NAMES = datasets.get_dog_names()
FOLDERS = paths.Folders()


def _get_labels_from_data(data) -> pd.Series:
    unique, counts = np.unique(data['target'], return_counts=True)
    labels_set = pd.Series(index=[DOG_NAMES[u] for u in unique], data=counts)
    return labels_set.sort_values(ascending=False)


def _get_labels_from_folder(folder) -> pd.Series:
    data = load_files(folder)
    return _get_labels_from_data(data)


def get_train_labels(folder=FOLDERS.training_data) -> pd.Series:
    return _get_labels_from_folder(folder)
Пример #5
0
        return pd.DataFrame()
    else:
        for i in saved_models_df.index:
            epochs = int(saved_models_df.loc[i, 'epochs'])
            data_augmentation = saved_models_df.loc[i, 'augmented']
            saved_models_df.loc[i, 'test_accuracy'] = eval_cnn(
                epochs=epochs,
                data_augmentation=data_augmentation,
                dataset='test',
            )
    return saved_models_df.sort_values(by='test_accuracy', ascending=False)


if __name__ == '__main__':
    defaults = {
        'models_folder': paths.Folders().models,
        'report_folder': paths.Folders().models,
    }

    parser = argparse.ArgumentParser(
        description=
        'Evaluates the performance (in terms of test accuracy) of the saved models.'
    )

    parser.add_argument(
        '-m',
        '--models_folder',
        default=defaults['models_folder'],
        help=
        f"Folder containing the weights' (.hdf5) files (default: {defaults['models_folder']})",
    )
Пример #6
0
def train_transfer_learning_net(pretrained_network: str,
                                epochs: int,
                                data_augmentation: bool,
                                prefix: str,
                                overwrite=0) -> None:
    """ Computes bottleneck features for training and validation data and trains the Sequential network.
    Weights will be saved in 'data/saved_models/' with name
    if data augmentation is used ->     <prefix>_<epochs>_A_weight.best.<pre_trained_net>.hdf5
                            else ->     <prefix>_<epochs>_weight.best.<pre_trained_net>.hdf5
    Args:
        pretrained_network: pre trained network for bottleneck features.
                            Can be ['vgg16', 'vgg19', 'resnet50', 'inceptionv3', 'xception']
        epochs: number of epochs to train the model
        data_augmentation: use or not data augmentation
        prefix: prefix to save the
        overwrite: if True, trains and saves the weight file again, otherwise does nothing
    Returns:
        None, but weights will be saved is the preselected location
    """
    model_file = paths.get_weights_filename(pretrained_network, prefix, epochs,
                                            data_augmentation)
    if os.path.exists(model_file) and not overwrite:
        print(f"Model already trained.\nWeights' file at\t{model_file}")
    else:

        args_train = {
            'data_augmentation': data_augmentation,
            'epochs': epochs,
            'prefix': prefix,
            'overwrite': overwrite,
            'bottleneck_network': pretrained_network,
        }
        # load train/validation file names and labels
        train_files, y_train = datasets.load_dataset(dataset='train')
        valid_files, y_valid = datasets.load_dataset(dataset='valid')
        # compute tensors
        tensors_train = list(
            map(preprocess.path_to_tensor, ct.progr(train_files)))
        tensors_valid = list(
            map(preprocess.path_to_tensor, ct.progr(valid_files)))
        # compute bottleneck features

        bottle_file = os.path.join(
            paths.Folders().bottleneck_features,
            f'bottleneck_{pretrained_network.lower()}.pkl')
        if os.path.exists(bottle_file):
            bottleneck_train, bottleneck_valid, _ = pickle.load(
                open(bottle_file, 'rb'))
        else:
            bottleneck_train = bf.extract_bottleneck_features_list(
                pretrained_network, tensors_train)
            bottleneck_valid = bf.extract_bottleneck_features_list(
                pretrained_network, tensors_valid)

        n_of_classes = len(datasets.get_dog_names())
        model = bn.build_transfer_learning_netwok(
            input_shape=bottleneck_train[0].shape, n_of_classes=n_of_classes)

        # _ = trainAndPredict.train_network_tl(network=model,
        #                                   training_data=bottleneck_train, training_target=y_train,
        #                                   validation_data=bottleneck_valid, validation_target=y_valid,
        #                                   **args_train,
        #                                   )
        train_and_predict_tl.train_network_tl(
            network=model,
            **args_train,
            training_set=(bottleneck_train, y_train),
            validation_set=(bottleneck_valid, y_valid),
        )

        model_file = paths.get_weights_filename(pretrained_network, prefix,
                                                epochs, data_augmentation)
        print(f"Weights saved at\t{model_file}")