Пример #1
0
    def test_f_stat_advertising(self):
        '''
        This is testing f-stat for using data from ISLR pg. 76 to check against.
        '''
        df = pd.read_csv('Advertising.csv')

        # Set Y to sales
        Y = df['sales']

        # Set X to newspaper
        X = pd.DataFrame(df[['TV', 'radio', 'newspaper']])

        # Create a linear model and train
        reg = LinearRegression()
        reg.fit(X, Y)

        # Get predictions for Y and then calculate items for t-stat
        Y_pred = reg.predict(X)
        tss = lms.tss(Y)
        rss = lms.rss(Y.values, Y_pred)
        f_stat = lms.f_stat(tss, rss, Y.size, X.columns.size)

        actual = round(f_stat)
        expected = 570

        self.assertEqual(actual, expected)
Пример #2
0
    def test_p_value_advertising_full_X(self):
        '''
        This is testing p-value for newspaper using data from ISLR pg. 74 to check against
        '''
        places = 5

        df = pd.read_csv('Advertising.csv')

        # Set Y to sales
        Y = df['sales']

        # Set X to newspaper
        X = pd.DataFrame(df[['TV', 'radio', 'newspaper']])

        # Create a linear model and train
        reg = LinearRegression()
        reg.fit(X, Y)

        # Get predictions for Y and then calculate rse from this.
        Y_pred = reg.predict(X)
        rss = lms.rss(Y.values, Y_pred)
        rse = lms.rse(rss, Y.values.size, X.columns.size)
        var = rse**2
        standard_error = lms.standard_error(var, X['newspaper'].values)
        t_stat = lms.t_stat(reg.coef_[2], standard_error)

        actual = standard_error
        expected = -0.0059

        self.assertAlmostEqual(actual, expected, places=4)
Пример #3
0
    def test_t_stat_advertising(self):
        '''
        This is testing standard error using data from ISLR pg. 68 and pg. 72 to check against.
        '''
        t_stat_data = [('TV', 17.67), ('radio', 9.92), ('newspaper', 3.30)]
        places = 2

        df = pd.read_csv('Advertising.csv')

        # Set Y to sales
        Y = df['sales']

        # Iterate over standard error data from ISLR and test each
        for i in range(len(t_stat_data)):
            t_stat_tup = t_stat_data[i]

            # Set X to column
            X = pd.DataFrame(df[t_stat_tup[0]])

            # Create a linear model and train
            reg = LinearRegression()
            reg.fit(X, Y)

            # Get predictions for Y and then calculate rse from this.
            Y_pred = reg.predict(X)
            rss = lms.rss(Y.values, Y_pred)
            rse = lms.rse(rss, Y.values.size, X.columns.size)
            var = rse**2
            standard_error = lms.standard_error(var, X[t_stat_tup[0]].values)

            actual = lms.t_stat(reg.coef_[0], standard_error)
            expected = t_stat_tup[1]

            self.assertAlmostEqual(actual, expected, places=places)
Пример #4
0
    def test_rss_basic(self):
        '''
        This is just a basic test with a small amount of variables that can be easily calculated.
        '''
        Y1 = np.array([1, 3, 7])
        Y2 = np.array([0, 6, 9])

        actual = lms.rss(Y1, Y2)
        expected = 14

        self.assertEqual(actual, expected)
Пример #5
0
    def test_rss_len(self):
        '''
        This test calculates RSS in a way where RSS can be calculated as a a factor of the length.
        '''
        Y1 = np.arange(100)

        for i in range(1, 15):
            Y2 = Y1 + i
            fac = i**2

            actual = lms.rss(Y1, Y2)
            expected = Y1.size * fac

            self.assertEqual(actual, expected)
Пример #6
0
    def test_rss_zero(self):
        '''
        .rss() should be zero due to being identical.
        '''
        Y1 = np.arange(100)
        Y2 = np.arange(100)

        actual = lms.rss(Y1, Y2)
        expected = 0

        self.assertEqual(actual, expected)

        Y1 = np.linspace(0, 100, 3)
        Y2 = np.linspace(0, 100, 3)

        self.assertEqual(actual, expected)
Пример #7
0
    def test_standard_error_advertising_full_X(self):
        '''
        This is testing standard error for all three columns using data from ISLR pg. 74 to check against
        '''
        standard_error_data = [('TV', 0.001395), ('radio', 0.008611),
                               ('newspaper', 0.005871)]
        places = 4

        df = pd.read_csv('Advertising.csv')

        # Set Y to sales
        Y = df['sales']

        # Set X to newspaper
        X = pd.DataFrame(df[['TV', 'radio', 'newspaper']])

        # Create a linear model and train
        reg = LinearRegression()
        reg.fit(X, Y)

        # Get predictions for Y
        Y_pred = reg.predict(X)
        rss = lms.rss(Y.values, Y_pred)
        rse = lms.rse(rss, Y.values.size, X.columns.size)
        var = rse**2

        for i in range(len(standard_error_data)):
            stan_e_tup = standard_error_data[i]

            standard_error = lms.standard_error(var, X[stan_e_tup[0]].values)

            actual = standard_error
            expected = stan_e_tup[1]

            print(stan_e_tup[0])
            print('Actual:\t\t', actual)
            print('Expected:\t', expected)
            print()
Пример #8
0
    def test_rse_basic(self):
        '''
        This test is testing rse in a basic case can be easily calculated manually.
        '''
        Y1 = np.array([1, 3, 7, 11])
        Y2 = np.array([0, 6, 9, 11])

        # rss = 14
        rss = lms.rss(Y1, Y2)

        actual = lms.rse(rss, Y1.size, 2)

        # Should be sqrt of 14
        expected = 3.74165738677

        self.assertAlmostEqual(actual, expected, places=7)

        actual = lms.rse(rss, Y1.size, 1)

        # Should be sqrt of 7
        expected = 2.64575131106

        self.assertAlmostEqual(actual, expected, places=7)