예제 #1
0
 def fit(self, X, y, X_val=None, y_val=None):
     X = normalize(polynomial_features(X, degree=self.degree))
     X_val = None if X_val is None else normalize(
         polynomial_features(X_val, degree=self.degree))
     super(PolynomialRegression, self).fit(X=X,
                                           y=y,
                                           X_val=X_val,
                                           y_val=y_val)
def test_polynomial_features():
    from utils import polynomial_features

    x = np.random.random((3, 2)).tolist()
    result = []
    for k in [2, 3]:
        result.append('[TEST polynomial_features],' +
                      weights_to_string(polynomial_features(x, k=k)))
    return result
 def test_data_procession(self):
     features, values = generate_data_part_3()
     train_features, train_values = features[:100], values[:100]
     valid_features, valid_values = features[100:120], values[100:120]
     test_features, test_values = features[120:], values[120:]
     assert len(train_features) == len(train_values) == 100
     assert len(valid_features) == len(valid_values) == 20
     assert len(test_features) == len(test_values) == 30
     best_mse, best_k = 1e10, -1
     for k in [1, 3, 10]:
         train_features_extended = polynomial_features(train_features, k)
         model = LinearRegression(nb_features=k)
         model.train(train_features_extended, train_values)
         train_mse = mean_squared_error(train_values, model.predict(train_features_extended))
         valid_features_extended = polynomial_features(valid_features, k)
         valid_mse = mean_squared_error(valid_values, model.predict(valid_features_extended))
         print(f'[part 1.4.1]\tk: {k:d}\t'
               f'train mse: {train_mse:.5f}\tvalid mse: {valid_mse:.5f}')
         if valid_mse < best_mse:
             best_mse, best_k = valid_mse, k
 def test_add_polynomial_feature(self):
     features, values = generate_data_part_2()
     plt.scatter([x[0] for x in features], values, label='origin');
     for k in [2, 4, 10]:
         # TODO: confirm polynomial feature and k = Xk
         features_extended = polynomial_features(features, k)
         model = LinearRegression(nb_features=k)
         model.train(features_extended, values)
         mse = mean_squared_error(values, model.predict(features_extended))
         print(f'[part 1.3.2]\tk: {k:d}\tmse: {mse:.5f}')
         plt.plot([x[0] for x in features], model.predict(features_extended), label=f'k={k}');
         plt.legend()
         plt.show()
 def predict(self, X_test, eti=False):
     X_test = np.array(X_test)
     #if polynominal features
     if self.poly_degree:
         X_test = polynomial_features(X_test, degree=self.poly_degree)
     y_pred = np.dot(X_test, self.w)
     #if the lower and upper boundaries for the 95% equal tail interval should be returned
     if eti:
         lower_w = self.eti[:, 0]
         upper_w = self.eti[:, 1]
         y_lower_pred = np.dot(X_test, lower_w)
         y_upper_pred = np.dot(X_test, upper_w)
         return y_pred, y_lower_pred, y_upper_pred
     return y_pred
예제 #6
0
    def fit(self, X, y):
        #if polynomial transformation
        if self.poly_degree:
            X = polynomial_features(X, degree=self.poly_degree)

        n_samples, n_features = np.shape(X)
        X_X = X.T.dot(X)

        #least squares approximate of beta
        beta_hat = np.linalg.pinv(X_X).dot(X.T).dot(y)

        # the posterior parameters can be determined analytically
        # since we assume conjugate priors for the likelihoods.

        #normal prior / likelihood => Normal posterior
        mu_n = np.linalg.pinv(X_X + self.omega0).dot(
            X_X.dot(beta_hat) + self.omega0.dot(self.mu0))
        omega_n = X_X + self.omega0
        #scaled inverse chi-squared prior / likelihood => scaled inverse chi-squared posterior
        nu_n = self.nu0 + n_samples
        sigma_sq_n = (1.0 / nu_n) * (
            self.nu0 * self.sigma_sq0 +
            (y.T.dot(y) + self.mu0.T.dot(self.omega0).dot(self.mu0) -
             mu_n.T.dot(omega_n.dot(mu_n))))

        # simulate parameter values for n_draws
        beta_draws = np.empty((self.n_draws, n_features))
        for i in range(self.n_draws):
            sigma_sq = self._draw_scaled_inv_chi_sq(n=1,
                                                    df=nu_n,
                                                    scale=sigma_sq_n)
            beta = multivariate_normal.rvs(size=1,
                                           mean=mu_n[:, 0],
                                           cov=sigma_sq *
                                           np.linalg.pinv(omega_n))
            #save parameter draws
            beta_draws[i, :] = beta

        #Select the mean of the simulated variables as the ones used to make predictions
        self.w = np.mean(beta_draws, axis=0)

        # lower and upper boundary of the credible interval
        l_eti = 50 - self.cred_int / 2
        u_eti = 50 - self.cred_int / 2
        self.eti = np.array([[
            np.percentile(beta_draws[:, i], q=l_eti),
            np.percentile(beta_draws[:, i], q=u_eti)
        ] for i in range(n_features)])
예제 #7
0
    def predict(self, X, eti=False):
        #if polynomial transformation
        if self.poly_degree:
            X = polynomial_features(X, degree=self.poly_degree)

        y_pred = X.dot(self.w)
        # if the lower and upper doundaries for the 95%
        # equal tail interval should be returned
        if eti:
            lower_w = self.eti[:, 0]
            upper_w = self.eti[:, 1]
            y_lower_pred = X.dot(lower_w)
            y_upper_pred = X.dot(upper_w)
            return y_pred, y_lower_pred, y_upper_pred

        return y_pred
예제 #8
0
    def fit(self, X, y):
        X = normalize(polynomial_features(X, degree=self.degree))
        # Insert constant ones for bias weights
        X = np.insert(X, 0, 1, axis=1)
        self.training_errors = []
        n_features = X.shape[1]

        # Initialize weights randomly [-1/N, 1/N]
        limit = 1 / math.sqrt(n_features)
        self.w = np.random.uniform(-limit, limit, (n_features, ))

        # Do gradient descent for n_iterations
        for i in range(self.n_iterations):
            y_pred = X.dot(self.w)
            # Calculate l2 loss
            mse = np.mean(0.5 * (y - y_pred)**2 +
                          self.alpha * np.linalg.norm(self.w))
            self.training_errors.append(mse)
            # Gradient of l2 loss w.r.t w
            grad_w = -(y - y_pred).dot(X) + self.alpha * np.sign(self.w)
            # Update the weights
            self.w -= self.learning_rate * grad_w
예제 #9
0
print('[part 1.3.1]\tmse: {mse:.5f}'.format(mse=mse))

plt.scatter([x[0] for x in features], values, label='origin');
plt.plot([x[0] for x in features], model.predict(features), label='predicted');
plt.legend()

from utils import polynomial_features

features, values = generate_data_part_2()
features = data[2]
values = data[3]

plt.scatter([x[0] for x in features], values, label='origin');

for k in [2, 4, 8]:
    features_extended = polynomial_features(features, k)
    #print(features_extended)
    model = LinearRegression(nb_features=k)
    model.train(features_extended, values)
    mse = mean_squared_error(values, model.predict(features_extended))
    print('[part 1.3.2]\tk: {k:d}\tmse: {mse:.5f}'.format(k=k, mse=mse))
    plt.plot([x[0] for x in features], model.predict(features_extended), label='k={k}'.format(k=k));
plt.legend()

from data import generate_data_part_3

features, values = generate_data_part_3()
features = data[4]
values = data[5]

train_features, train_values = features[:100], values[:100]
예제 #10
0
 def predict(self, X):
     X = normalize(polynomial_features(X, degree=self.degree))
     return super(ElasticNet, self).predict(X)
예제 #11
0
 def fit(self, X, y):
     X = normalize(polynomial_features(X, degree=self.degree))
     super(ElasticNet, self).fit(X, y)
예제 #12
0
 def predict(self, X):
     X = normalize(polynomial_features(X, degree=self.degree))
     return super(PolynomialRidgeRegression, self).predict(X)
예제 #13
0
 def fit(self, X, y):
     X = normalize(polynomial_features(X, degree=self.degree))
     super(PolynomialRidgeRegression, self).fit(X, y)
예제 #14
0
 def predict(self, X):
     X = polynomial_features(X, degree=self.degree)
     return super(PolynomialRegression, self).predict(X)
예제 #15
0
 def fit(self, X, y):
     X = polynomial_features(X, degree=self.degree)
     super(PolynomialRegression, self).fit(X, y)
 def test_polynomial_features(self):
     features = [[1.0, 2, 3], [4.0, 5, 6], [7.0, 8, 9]]
     assert_array_almost_equal(
         numpy.array([[1.0, 2, 3, 1, 4, 9], [4.0, 5, 6, 16, 25, 36],
                      [7.0, 8, 9, 49, 64, 81]]),
         numpy.array(polynomial_features(features, 2)))
예제 #17
0
 def predict(self, X_test):
     X_test = normalize(polynomial_features(X_test, degree=self.degree))
     super(PolynomialRegression, self).predict(X_test=X_test)
예제 #18
0
 def fit(self, X, y):
     X = polynomial_features(X, degree=self.degree)
     super().fit(X, y)
예제 #19
0
 def predict(self, X):
     X = normalize(polynomial_features(X, degree=self.degree))
     # Insert constant ones for bias weights
     X = np.insert(X, 0, 1, axis=1)
     y_pred = X.dot(self.w)
     return y_pred