def _kernel_regression_and_rolling_mean(self, X, y, window):
     """
     Implementation of Nadaraya-Watson regression and rolling_mean to the time series.
     :param X: array-like of shape = [n_samples, n_features]
     The training input samples.
     :param y: array-like, shape = [n_samples]
     The target values
     :param window: int
     Size of the moving window. This is the number of observations used for
     calculating the statistic.
     :return: tuple of 3 array-like, shape = [n_samples]
     Predicted target values, rolling_mean target values and rolling_std of the target values
     """
     kr = KernelRegression(kernel="rbf", gamma=np.logspace(-2, 2, 10))
     y_kr = kr.fit(X, y).predict(X)
     y_rm = pd.rolling_mean(y_kr, window=window,axis=1)
     y_std = pd.rolling_std(y, window=window,axis=1)
     return y_kr, y_rm, y_std
###############################################################################
# Fit regression models
svr = GridSearchCV(SVR(kernel='rbf'),
                   cv=5,
                   param_grid={
                       "C": [1e-1, 1e0, 1e1, 1e2],
                       "gamma": np.logspace(-2, 2, 10)
                   })
kr = KernelRegression(kernel="rbf", gamma=np.logspace(-2, 2, 10))
t0 = time.time()
y_svr = svr.fit(X, y).predict(X)
print "SVR complexity and bandwidth selected and model fitted in %.3f s" \
    % (time.time() - t0)
t0 = time.time()
y_kr = kr.fit(X, y).predict(X)
print "KR including bandwith fitted in %.3f s" \
    % (time.time() - t0)

###############################################################################
# Visualize models
plt.scatter(X, y, c='k', label='data')
plt.hold('on')
plt.plot(X, y_kr, c='g', label='Kernel Regression')
plt.plot(X, y_svr, c='r', label='SVR')
plt.xlabel('data')
plt.ylabel('target')
plt.title('Kernel regression versus SVR')
plt.legend()

# Visualize learning curves
    if AUTOSCALE:
        scalefactor = max((1.0, obsdf.max().max(), fcdf.max().max()))
    else:
        scalefactor = 1.0
    scalefactors[node] = scalefactor

    outmeandict[node] = {}
    outvardict[node] = {}
    # Pull out each node's DF's seperately.
    for (k1, fcc), (k2, obsc) in izip(fcdf.iteritems(), obsdf.iteritems()):
        # print k1
        obsc = obsc.dropna()/scalefactor
        fcc = fcc.ix[obsc.index]/scalefactor

        # Kernelregression Forecasted => mean
        kreg.fit(fcc.values.reshape(-1, 1), obsc)
        meanpredict = kreg.predict(testx.reshape(-1, 1))
        # Select optimal bandwidth
        # kreg.gamma = kreg._optimize_gamma(gammas)

        # Our predicted mean based on point forecast
        # prediction = np.polyval(meanpolycoeff, fcc)
        prediction = np.interp(fcc, testx, meanpredict)
        # Calculate errors squared
        err2 = (prediction - obsc)**2
        # Fit variance curve
        kreg.fit(fcc.values.reshape(-1, 1), err2)
        varpredict = kreg.predict(testx.reshape(-1, 1))
        # Select optimal bandwidth
        # kreg.gamma = kreg._optimize_gamma(gammas)
        # Save the coefficients of the polynomial fit.
###############################################################################
# Add noise to targets
y += 0.5 * (0.5 - np.random.rand(y.size))

###############################################################################
# Fit regression models
svr = GridSearchCV(SVR(kernel='rbf'), cv=5,
                   param_grid={"C": [1e-1, 1e0, 1e1, 1e2],
                               "gamma": np.logspace(-2, 2, 10)})
kr = KernelRegression(kernel="rbf", gamma=np.logspace(-2, 2, 10))
t0 = time.time()
y_svr = svr.fit(X, y).predict(X)
print("SVR complexity and bandwidth selected and model fitted in %.3f s" \
    % (time.time() - t0))
t0 = time.time()
y_kr = kr.fit(X, y).predict(X)
print("KR including bandwith fitted in %.3f s" \
    % (time.time() - t0))

###############################################################################
# Visualize models
plt.scatter(X, y, c='k', label='data')
plt.hold('on')
plt.plot(X, y_kr, c='g', label='Kernel Regression')
plt.plot(X, y_svr, c='r', label='SVR')
plt.xlabel('data')
plt.ylabel('target')
plt.title('Kernel regression versus SVR')
plt.legend()

# Visualize learning curves
示例#5
0
import pandas as pd
from statsmodels.nonparametric.api import KernelReg
from kernel_regression import KernelRegression
from sklearn.cross_validation import train_test_split as sk_split
import numpy as np

df = pd.read_csv("abalone.data", header=None)
print df.shape
X = df.loc[:, 1:7].as_matrix()
y = df.loc[:, 8].as_matrix().reshape(-1, 1)
print X.shape
print y.shape
X_train, X_test, y_train, y_test = sk_split(X, y, test_size=0.20)
kr = KernelRegression(kernel="rbf")
kr.fit(X_train, y_train)
print len(X_test)
# Memory issues split X_test, just did two chunks here
X_test_1 = X_test[0:100, :]
X_test_2 = X_test[101:200, :]
pred_y = kr.predict(X_test_1)
pred_y = kr.predict(X_test_2)

示例#6
0
def plot_regression():
    np.random.seed(12345)
    fig, axes = plt.subplots(4, 4)
    for i, ax in enumerate(axes.flatten()):
        n_in = 1
        n_out = 1
        d = np.random.randint(1, 5)
        n_ex = np.random.randint(5, 500)
        std = np.random.randint(0, 1000)
        intercept = np.random.rand() * np.random.randint(-300, 300)
        X_train, y_train, X_test, y_test, coefs = random_regression_problem(
            n_ex, n_in, n_out, d=d, intercept=intercept, std=std, seed=i)

        LR = LinearRegression(fit_intercept=True)
        LR.fit(X_train, y_train)
        y_pred = LR.predict(X_test)
        loss = np.mean((y_test.flatten() - y_pred.flatten())**2)

        d = 3
        best_loss = np.inf
        for gamma in np.linspace(1e-10, 1, 100):
            for c0 in np.linspace(-1, 1000, 100):
                kernel = "PolynomialKernel(d={}, gamma={}, c0={})".format(
                    d, gamma, c0)
                KR_poly = KernelRegression(kernel=kernel)
                KR_poly.fit(X_train, y_train)
                y_pred_poly = KR_poly.predict(X_test)
                loss_poly = np.mean(
                    (y_test.flatten() - y_pred_poly.flatten())**2)
                if loss_poly <= best_loss:
                    KR_poly_best = kernel
                    best_loss = loss_poly

        print("Best kernel: {} || loss: {:.4f}".format(KR_poly_best,
                                                       best_loss))
        KR_poly = KernelRegression(kernel=KR_poly_best)
        KR_poly.fit(X_train, y_train)

        KR_rbf = KernelRegression(kernel="RBFKernel(gamma=0.01)")
        KR_rbf.fit(X_train, y_train)
        y_pred_rbf = KR_rbf.predict(X_test)
        loss_rbf = np.mean((y_test.flatten() - y_pred_rbf.flatten())**2)

        xmin = min(X_test) - 0.1 * (max(X_test) - min(X_test))
        xmax = max(X_test) + 0.1 * (max(X_test) - min(X_test))
        X_plot = np.linspace(xmin, xmax, 100)
        y_plot = LR.predict(X_plot)
        y_plot_poly = KR_poly.predict(X_plot)
        y_plot_rbf = KR_rbf.predict(X_plot)

        ax.scatter(X_test, y_test, alpha=0.5)
        ax.plot(X_plot, y_plot, label="OLS", alpha=0.5)
        ax.plot(X_plot,
                y_plot_poly,
                label="KR (poly kernel, d={})".format(d),
                alpha=0.5)
        ax.plot(X_plot, y_plot_rbf, label="KR (rbf kernel)", alpha=0.5)
        ax.legend()
        #  ax.set_title(
        #      "MSE\nLR: {:.2f} KR (poly): {:.2f}\nKR (rbf): {:.2f}".format(
        #          loss, loss_poly, loss_rbf
        #      )
        #  )

        ax.xaxis.set_ticklabels([])
        ax.yaxis.set_ticklabels([])

    plt.tight_layout()
    plt.savefig("img/kr_plots.png", dpi=300)
    plt.close("all")
示例#7
0
    def multivar_regress(self):
        #     X, y = self.regression_data()
        X, y = self.regression_data_split()
        X = np.array(X)
        y = np.array(y)

        pb = X[:, 0].argsort()
        Xb = X[pb]
        yb = y[pb]

        X1 = np.delete(X, 1, 1)
        p1 = X1[:, 0].argsort()
        X1 = X1[p1]
        y1 = y[p1]

        X2 = np.delete(X, 0, 1)
        p2 = X2[:, 0].argsort()
        X2 = X2[p2]
        y2 = y[p2]

        x_range = np.arange(0, 0.025, 0.001)  # generate a mesh
        y_range = np.arange(0, 1.3, 0.02)
        x_surf, y_surf = np.meshgrid(x_range, y_range)
        Xpred = np.stack((x_surf.flatten(), y_surf.flatten()), axis=1)

        svr = GridSearchCV(SVR(kernel='rbf'),
                           cv=5,
                           param_grid={
                               "C": [1e-1, 1e0, 1e1, 1e2],
                               "gamma": np.logspace(-2, 2, 10)
                           })
        kr = KernelRegression(kernel="rbf", gamma=np.logspace(-2, 2, 10))
        t0 = time.time()
        y_svrb = svr.fit(Xb, yb).predict(Xpred)
        print(
            "SVR complexity and bandwidth selected and model fitted in %.3f s"
            % (time.time() - t0))

        score_svr = svr.score(Xb, yb)
        y_svr1 = svr.fit(X1, y1).predict(np.expand_dims(x_range, 1))
        score_svr1 = svr.score(X1, y1)
        y_svr2 = svr.fit(X2, y2).predict(np.expand_dims(y_range, 1))
        score_svr2 = svr.score(X2, y2)

        t0 = time.time()
        y_krb = kr.fit(Xb, yb).predict(Xpred)
        print("KR including bandwith fitted in %.3f s" % (time.time() - t0))

        score_kr = kr.score(Xb, yb)
        y_kr1 = kr.fit(X1, y1).predict(np.expand_dims(x_range, 1))
        score_kr1 = kr.score(X1, y1)
        y_kr2 = kr.fit(X2, y2).predict(np.expand_dims(y_range, 1))
        score_kr2 = kr.score(X2, y2)

        print('R^2 / coeff determination:')
        print('  SVR model: cls_score=%0.3f bbox_pred=%0.3f both=%0.3f' %
              (score_svr1, score_svr2, score_svr))
        print('  KR model: cls_score=%0.3f bbox_pred=%0.3f both=%0.3f' %
              (score_kr1, score_kr2, score_kr))

        #     R^2 / coeff determination:
        #   SVR model: cls_score=0.675 bbox_pred=0.518 both=0.512
        #   KR model: cls_score=0.848 bbox_pred=0.320 both=0.881

        ###############################################################################
        # Visualize models
        #     fig = plt.figure()
        #     ax = fig.gca(projection='3d')               # to work in 3d
        #
        #     z_surf = np.reshape(y_krb, x_surf.shape)
        #     surf = ax.plot_surface(x_surf, y_surf, z_surf, cmap=cm.coolwarm, alpha=0.5, rstride=1, cstride=1);    # plot a 3d surface plot
        #     fig.colorbar(surf, shrink=0.5, aspect=5)
        #
        #     ax.scatter(X[:,0], X[:,1], y, s=1, c='k')                        # plot a 3d scatter plot
        #
        #     ax.set_xlabel('cls_score', fontsize=16)
        #     ax.set_ylabel('bbox_pred', fontsize=16)
        #     ax.set_zlabel('mAP', fontsize=16)
        #     plt.show()

        fig = plt.figure()
        plt.scatter(X1[:, 0], y1, c='k', s=1, label='data')
        #     plt.plot(x_range, y_kr1, c='g', label='Kernel Regression')
        #     plt.plot(x_range, y_svr1, c='r', label='SVR')
        plt.xlabel('cls_score')
        plt.ylabel('mAP')
        plt.ylim(0, 0.85)
        #     plt.title('Classification score difference as proxy for model performance/')
        plt.legend()
        plt.show()

        fig = plt.figure()
        plt.scatter(X2[:, 0], y2, c='k', s=1, label='data')
        #     plt.plot(y_range, y_kr2, c='g', label='Kernel Regression')
        #     plt.plot(y_range, y_svr2, c='r', label='SVR')
        plt.xlabel('bbox_pred')
        plt.ylabel('mAP')
        plt.ylim(0, 0.85)
        #     plt.title('Kernel regression versus SVR')
        plt.legend()
        plt.show()

        # Visualize learning curves
        plt.figure()
        train_sizes, train_scores_svr, test_scores_svr = \
            learning_curve(svr, X, y, train_sizes=np.linspace(0.1, 1, 10),
                           scoring="neg_mean_squared_error", cv=10)
        train_sizes_abs, train_scores_kr, test_scores_kr = \
            learning_curve(kr, X, y, train_sizes=np.linspace(0.1, 1, 10),
                           scoring="neg_mean_squared_error", cv=10)
        plt.plot(train_sizes,
                 test_scores_svr.mean(1),
                 'o-',
                 color="r",
                 label="SVR")
        plt.plot(train_sizes,
                 test_scores_kr.mean(1),
                 'o-',
                 color="g",
                 label="Kernel Regression")
        plt.yscale("symlog", linthreshy=1e-7)
        plt.ylim(-10, -0.01)
        plt.xlabel("Training size")
        plt.ylabel("Mean Squared Error")
        plt.title('Learning curves')
        plt.legend(loc="best")
        plt.show()