Exemplo n.º 1
0
def test_ridge():
    """ Checking that the implemented Ordinary Least Squared method gives the
    same results as the scikit version for a few different functions,
    within a tolerance."""

    N = 100
    deg = 3
    lamb = 1.0

    x = np.random.uniform(0, 1, (N, 1))
    y = np.random.uniform(0, 1, (N, 1))

    z1 = 2 * x**2 + 2 * x * y
    z2 = 3 * y**4
    z3 = FrankeFunction(x, y)

    for z in [z1, z2, z3]:

        a = LinReg(x, y, z, deg)
        a.lamb = lamb
        beta = a.ridge()

        poly = PolynomialFeatures(degree=deg)
        XY_sk = poly.fit_transform(np.append(x, y, axis=1))
        ridge = RidgeCV([lamb], fit_intercept=False)
        ridge.fit(XY_sk, z)

        beta_sk = ridge.coef_.reshape(-1, 1)

        assert np.allclose(beta, beta_sk)
Exemplo n.º 2
0
def boot_stats():
    """
    Bootstrapping and creating bias variace statistics
    """

    lambdas = np.logspace(-4, 5, 10)
    model = LinReg(X, Y)

    models = []
    print("Bootstrapping models:")
    for regmethod in ['ols', 'ridge', 'lasso']:

        method = getattr(model, regmethod)

        for lamb in lambdas:

            model.lamb = lamb

            bias, variance, mse_train, mse_test, r2_train, r2_test = model.bootstrap(
                100, method)
            models.append([regmethod, lamb, mse_train, mse_test,\
                    r2_train, r2_test, bias, variance])

            if regmethod == 'ols':
                break

    print("\nMODEL ANALYSIS (BOOTSTRAP):")
    print("=" * 85)
    print(
        " Method | lambda | MSE Train | MSE Test | R2 Train |  R2 Test |   Bias   | Variance"
    )
    print("-" * 85)

    for i in range(len(models)):
        print("%8s|%8g|%11g|%10g|%10f|%10f|%10f|%10g|" % tuple(models[i]))

    print("-" * 85)
Exemplo n.º 3
0
def test_design_matrix():
    """ Checks that the design matrix made by the self-written code matches
    the one made by scikit learn, within a tolerance"""

    N = 100
    degrees = [2, 10, 20]

    x = np.random.uniform(0, 1, (N, 1))
    y = np.random.uniform(0, 1, (N, 1))
    z = x + y

    for deg in degrees:

        a = LinReg(x, y, z, deg)
        XY = a.XY

        poly = PolynomialFeatures(degree=deg)
        XY_sk = poly.fit_transform(np.append(x, y, axis=1))

        assert np.allclose(XY, XY_sk)
Exemplo n.º 4
0
def plot_stuff():
    """
    PLoting the matrix of the coupling constants
    """
    model = LinReg(X, Y)

    fig, axarr = plt.subplots(nrows=2, ncols=3)
    cmap_args = dict(vmin=-1., vmax=1., cmap='seismic')

    lambdas = [0.0001, 0.01]
    for i in range(len(lambdas)):

        model.lamb = lambdas[i]

        J_ols = model.ols().reshape(L, L)
        J_ridge = model.ridge().reshape(L, L)
        J_lasso = model.lasso().reshape(L, L)

        axarr[i][0].imshow(J_ols, **cmap_args)
        axarr[i][0].set_title('$\\mathrm{OLS}$', fontsize=16)
        axarr[i][0].tick_params(labelsize=16)

        axarr[i][1].imshow(J_ridge, **cmap_args)
        axarr[i][1].set_title('$\\mathrm{Ridge},\ \\lambda=%.4f$' %
                              (lambdas[i]),
                              fontsize=16)
        axarr[i][1].tick_params(labelsize=16)

        im = axarr[i][2].imshow(J_lasso, **cmap_args)
        axarr[i][2].set_title('$\\mathrm{LASSO},\ \\lambda=%.4f$' %
                              (lambdas[i]),
                              fontsize=16)
        axarr[i][2].tick_params(labelsize=16)

        divider = make_axes_locatable(axarr[i][2])
        cax = divider.append_axes("right", size="5%", pad=0.05)
        cbar = fig.colorbar(im, cax=cax)

        cbar.ax.set_yticklabels(np.arange(-1.0, 1.0 + 0.25, 0.25), fontsize=14)
        cbar.set_label('$J_{i,j}$',
                       labelpad=-40,
                       y=1.12,
                       fontsize=16,
                       rotation=0)

        #fig.subplots_adjust(right=2.0)
    plt.show()
Exemplo n.º 5
0
def stats():
    """
    Testing all linear regression models for different regularization strengths
    and calcualting MSE and R2 scores
    """
    lambdas = np.logspace(-4, 5, 10)
    model = LinReg(X, Y)

    models = []
    for regmethod in ['ols', 'ridge', 'lasso']:

        method = getattr(model, regmethod)

        for lamb in lambdas:

            model.lamb = lamb

            J = method(model.xTrain, model.yTrain)
            Ypred_train = model.xTrain @ J
            Ypred_test = model.xTest @ J

            mse_train = model.MSE(model.yTrain, Ypred_train)
            mse_test = model.MSE(model.yTest, Ypred_test)
            r2_train = model.R2(model.yTrain, Ypred_train)
            r2_test = model.R2(model.yTest, Ypred_test)

            models.append([regmethod, lamb, mse_train, mse_test,\
                    r2_train, r2_test])

            if regmethod == 'ols':
                break

    print("\nMODEL ANALYSIS:")
    print("=" * 85)
    print(" Method | lambda | MSE Train | MSE Test | R2 Train |  R2 Test |")
    print("-" * 85)

    for i in range(len(models)):
        print("%8s|%8g|%11g|%10g|%10f|%10f|" % tuple(models[i]))

    print("-" * 85)
Exemplo n.º 6
0
import matplotlib.pylab as plt
from matplotlib.ticker import MaxNLocator
import plotparams

N = 1500

x = np.random.uniform(0, 1, (N, 1))
y = np.random.uniform(0, 1, (N, 1))

for deg in [2, 5]:
    for sigma, noisy in zip([0, 0.5], ['', '_noisy']):
        noise = sigma * np.random.randn(N, 1)
        #z = 2*x**3 + y**2 + noise
        z = FrankeFunction(x, y) + noise

        a = LinReg(x, y, z, deg)
        a.split_data(frac=0.1)
        beta_ols = a.ols(a.XY_Train, a.z_Train)
        zpredict = a.XY_Test @ beta_ols
        mse = a.MSE(a.z_Test, zpredict)
        var = np.diag(a.var_ols)
        conf = a.conf_ols
        #print(beta_ols)
        #print(var)
        #print(conf)
        print("MSE: ", mse)

        ax = plt.figure().gca()

        ax.errorbar(range(len(var)),
                    beta_ols,