def test_first_last_elements_X(self):
        Xl_first_row_first2c = 0.898881, 1.305756
        Xl_last_row_first2c = 1.393745, 0.1967
        Xl_first_row_lastc = -0.3353498396562418
        Xl_last_row_lastc = -0.025671111438439326

        Xu_first_row_first2c = 1.567927, 0.752978
        Xu_last_row_first2c = -0.256243, 0.481008
        Xu_first_row_lastc = 0.5988862660840178
        Xu_last_row_lastc = 0.3787646988802795

        Xl, yl, Xu = load_data(self.filename)

        np.testing.assert_array_almost_equal(
            x=Xl[0, :2],
            y=Xl_first_row_first2c,
            decimal=6,
            err_msg="Wrong values in Xl (first row, first 2 columns).")
        np.testing.assert_array_almost_equal(
            x=Xl[-1, :2],
            y=Xl_last_row_first2c,
            decimal=6,
            err_msg="Wrong values in Xl (last row, first 2 columns).")
        # TODO À compléter

        np.testing.assert_array_almost_equal(
            x=Xu[0, :2],
            y=Xu_first_row_first2c,
            decimal=6,
            err_msg="Wrong values in Xu (first row, first 2 columns).")
        np.testing.assert_array_almost_equal(
            x=Xu[-1, :2],
            y=Xu_last_row_first2c,
            decimal=6,
            err_msg="Wrong values in Xu (last row, first 2 columns).")
    def test_first_last_elements_y(self):
        y_first5r = np.array([1985, 2005, 1998, 1973, 1957])
        y_last5r = np.array([2006, 1960, 1976, 1988, 2003])

        Xl, yl, Xu = load_data(self.filename)

        np.testing.assert_array_equal(
            x=yl[:5],
            y=y_first5r,
            err_msg="Wrong values in yl(first 5 elements).")
示例#3
0
def learn_best_predictor_and_predict_test_data(filename):
    X_labeled, y_labeled, X_unlabled = load_data(filename)
    X_permute, y_permute = randomize_data(X_labeled, y_labeled)
    X_train = X_permute[:500,:]
    y_train = y_permute[:500]
    X_test2 = X_permute[500:,:]
    y_test2 = y_permute[500:]
    
    train_errors = []
    valid2_errors = []
    
    ln_Mp = learn_all_with_Mp(X_train, y_train)
    ln_Mp.fit(X_train, y_train)
    prediction_Mp_train = ln_Mp.predict(X_train)
    prediction_Mp_test2 =  ln_Mp.predict(X_test2)
    #rajouter les MSE trouvés dans la matrice des MSE train et test
    train_errors.append(mean_squared_error(y_train, prediction_Mp_train))
    valid2_errors.append(mean_squared_error(y_test2, prediction_Mp_test2))
    print("la performance du reste des données étiquetées = ",valid2_errors)
    
    y_test = ln_Mp.predict(X_unlabled)
    np.save("io/test_prediction_results.npy", y_test)    
    def test_data_shape(self):
        # Valeurs de référence
        n_lab_ex_expected = 4578
        data_dim_expected = 90
        n_unlab_ex_expected = 2289

        Xl, yl, Xu = load_data(self.filename)

        # Test des types
        self.assertIsInstance(Xl, np.ndarray)
        self.assertIsInstance(yl, np.ndarray)
        self.assertIsInstance(Xu, np.ndarray)

        # Test du nombre de dimensions
        self.assertEqual(Xl.ndim, 2)
        self.assertEqual(yl.ndim, 1)
        self.assertEqual(Xu.ndim, 2)

        # Test des formats
        np.testing.assert_array_equal(Xl.shape,
                                      (n_lab_ex_expected, data_dim_expected))
        np.testing.assert_array_equal(yl.shape, n_lab_ex_expected)
        np.testing.assert_array_equal(Xu.shape,
                                      (n_unlab_ex_expected, data_dim_expected))
示例#5
0
import matplotlib.pyplot as plt
from algorithms.data_utils import load_data
""" Build the histogram of the years of the songs from the training set and
export the figure to the image file hist_train.png
"""

#import data
X_labeled, y_labeled, X_unlabeled = load_data('io\YearPredictionMSD_100.npz')

#visualization
plt.figure("hist")
plt.title("The years present in the train data")
plt.xlabel("Years")
plt.ylabel("number of music")
plt.hist(y_labeled)
plt.savefig('plots/hist_year.png')
import numpy as np
from algorithms.data_utils import load_data


""" Load training data and print dimensions as well as a few coefficients
in the first and last places and at random locations.
"""

YearPredictionMSD_100 = load_data("io\YearPredictionMSD_100.npz")

print("Keys of dict data are : {}".format(list(YearPredictionMSD_100)))


for i in list(YearPredictionMSD_100):
    print("Features : {}".format(i))
    print("Type : {}".format(i.dtype))
    print("number of dimensions : {}".format(i.ndim))
    print("Shape : {}".format(i.shape))
    print("\n")
    
print("For y_labeled :")
print("first five values : {}".format(YearPredictionMSD_100[1][:5]))
print("last five values : {}".format(YearPredictionMSD_100[1][-5:]))

X_data = [0, 2]
X_data_names = ['X_labeled', 'X_unlabeled']
for i in range(2) :
    print("For feature : {}".format(X_data_names[i]))
    print("2 first coefficients of the first line : {} ".format(YearPredictionMSD_100[X_data[i]][0][:2]))
    print("2 first coefficients of the last line : {} ".format(YearPredictionMSD_100[X_data[i]][-1][:2]))
    print("last coefficient of the first line : {} ".format(YearPredictionMSD_100[X_data[i]][0][-1]))