Exemplo n.º 1
0
def load_cv_data(validation_data_folder, percent):
    """
    custom cv data
    """
    all_folds = []
    for fold in [1, 2, 3, 4, 5]:
        X_file = f"{validation_data_folder}/Fold{fold}/X_{percent}%.txt"
        y_file = f"{validation_data_folder}/Fold{fold}/y_{percent}%.txt"
        X = utils.load_data_from_txt_file(X_file)
        y = utils.load_data_from_txt_file(y_file, True)
        all_folds.append((X, y))
    return all_folds
Exemplo n.º 2
0
def load_learning_curve_data(learning_curve_data_folder):
    """
    Load all learning curve data
    
    Args:
        learning_curve_data_folder (str):
            Directory to the folder containing the data. This
            folder must contain the following files
                X_train_10%.txt, X_train_20%.txt,..., X_train_100%.txt
                y_train_10%.txt, y_train_10%.txt,..., y_train_100%.txt

    Returns:
        (all_X_train, all_y_train)
            all_X_train is a list of 10 numpy arrays, with increasing
            number of rows but with the same number of columns (features)

            all_y_train is a list of 10 numpy arrays, with increasing number
            of elements

    Example usage
        >>> subsets_X, subsets_y = load_learning_curve_data("/path/to/folder/with/data")
        >>> for i, X in enumerate(subsets_X):
        >>>     y = subsets_y[i]
        >>>     # Train on X and y
    """
    all_X_train = []

    for percent in range(10, 101, 10):  # For percent from 10, 20, ..., 100
        X_file = f"{learning_curve_data_folder}/X_train_{percent}%.txt"
        X = utils.load_data_from_txt_file(X_file)
        all_X_train.append(np.asarray(X))
    return np.asarray(all_X_train)
Exemplo n.º 3
0
def load_data_custom(learning_curve_data_folder):
    """
    I do what I want
    """
    all_X_train = []
    all_y_train = []

    for percent in range(10, 101, 90):  # For percent from 10, 20, ..., 100
        X_file = f"{learning_curve_data_folder}/X_train_{percent}%.txt"
        y_file = f"{learning_curve_data_folder}/y_train_{percent}%.txt"
        X = utils.load_data_from_txt_file(X_file)
        y = utils.load_data_from_txt_file(y_file, True)
        all_X_train.append(X)
        all_y_train.append(y)

    return (all_X_train, all_y_train)
Exemplo n.º 4
0
def generate_bmeans():
    part_bmean = []
    for num in range(1, 6):  # For percent from 10, 20, ..., 100
        X_file = f"P3/MeanInitialization/Part_b/mu_k_{num}.txt"
        X = utils.load_data_from_txt_file(X_file)
        part_bmean.append(np.asarray(X))
    part_bmean = np.asarray(part_bmean)
    return part_bmean
Exemplo n.º 5
0
def load_all_cross_validation_data(validation_data_folder):
    """
    Load all data to do cross validation experiment

    Args:
        validation_data_folder (str):
            Directory to the folder containing the data
            This directory must contains 5 sub-directories:
                Fold1
                Fold2
                Fold3
                Fold4
                Fold5

    Returns:
        all_folds (list)
            all_folds is a list of 5 elements. Each element is
            a tuple (X,y) where
            X is a numpy array of shape (N, d)
            y is a numpy array of shape (N,)

    Example usage:
        >>> all_folds = load_all_cross_validation_data("/path/to/folder/with/CV-data")
        >>> fold_number = 2 # Pick fold number 3 as leave out fold
        >>> leave_out_data, training_data = partition_cross_validation_fold(all_folds, fold_number)

    """
    all_folds = []

    for fold in [1, 2, 3, 4, 5]:
        X_file = f"{validation_data_folder}/X_train_fold{fold}.txt"
        y_file = f"{validation_data_folder}/X_test_fold{fold}.txt"
        X = utils.load_data_from_txt_file(X_file)
        y = utils.load_data_from_txt_file(y_file)
        all_folds.append((X, y))
    return all_folds
Exemplo n.º 6
0
# # Gaussian Mixture Models

import numpy as np
import matplotlib.pyplot as plt
import utils
from scipy.stats import multivariate_normal
from gmm import GaussianMixtureModel

X_test_all = utils.load_data_from_txt_file("P3/X_test.txt")
test_data = utils.load_data_from_txt_file("P3/X_test.txt")


def plot_contour_gaussian(ax, mean, covariance, eps=1e-2):
    """ Plot the contour of a 2d Gaussian distribution with given mean and 
    covariance matrix

    Args:
        ax (matplotlib.axes.Axes):
            Subplot used to plot the contour
        mean (numpy.array):
            Mean of the gaussian distribution
        covariance (numpy.array):
            Covariance matrix of the distribution
        eps:
            The cut off to draw the contour plot. The higher the value, 
            the smaller the contour plot.

    Returns:
        None

    """