def regression_kriging_evaluation(year, features_=['all']): from pykrige.rk import RegressionKriging from sklearn.model_selection import LeaveOneOut from sklearn.svm import SVR from sklearn.linear_model import LinearRegression from sklearn.ensemble import RandomForestRegressor import warnings warnings.filterwarnings("ignore", category=DeprecationWarning) svr_model = SVR(kernel='rbf', C=10, gamma=0.001) lr_model = LinearRegression() rf_model = RandomForestRegressor(n_estimators=2) Y, D, P, Tf, Gd = extract_raw_samples(year, crime_t=['total']) coords = get_centroid_ca() errors = [] for k in range(77): X_train, X_test, Y_train, Y_test = build_features(Y, D, P, Tf, Y, Gd, Y, k, features=features_, taxi_norm="bydestination") if k == 0: print X_train.shape coords_train = np.delete(coords, k, axis=0) coords_test = np.array(coords)[k,None] m_rk = RegressionKriging(regression_model=rf_model) m_rk.fit(X_train, coords_train, Y_train) z = m_rk.predict(X_test, coords_test) errors.append(abs(Y_test - z[0])) print np.mean(errors), np.mean(errors)/np.mean(Y) return errors
# print("[DEBUG] p_test[:10] : {}".format(p_test[:10])) # print("[DEBUG] x_train[:10] : {}".format(x_train[:10])) # print("[DEBUG] x_test[:10] : {}".format(x_test[:10])) # print("[DEBUG] target_train[:10] : {}".format(target_train[:10])) # print("[DEBUG] target_test[:10] : {}".format(target_test[:10])) # assert False for m in models: print('=' * 40) print('regression model:', m.__class__.__name__) m_rk = RegressionKriging(regression_model=m, n_closest_points=10) m_rk.fit(p_train, x_train, target_train) print('Regression Score: ', m_rk.regression_model.score(p_test, target_test)) print('RK score: ', m_rk.score(p_test, x_test, target_test)) print("predict: {}".format(m_rk.predict(p_test, x_test))) print("truth value : {}".format(target_test)) # fit(self, p, x, y) # | fit the regression method and also Krige the residual # | # | Parameters # | ---------- # | p: ndarray # | (Ns, d) array of predictor variables (Ns samples, d dimensions) # | for regression # | x: ndarray # | ndarray of (x, y) points. Needs to be a (Ns, 2) array # | corresponding to the lon/lat, for example 2d regression kriging. # | array of Points, (x, y, z) pairs of shape (N, 3) for 3d kriging # | y: ndarray
x_y = x_grd[:, :-1] ##numpy array of surface elevation z = x_grd[:, -1:] #%% #constract a randomforest model rf_model = RandomForestRegressor(n_estimators=100) #fit the regression kriging model m_rk = RegressionKriging(regression_model=rf_model, n_closest_points=2, method='ordinary', variogram_model='spherical') #%% m_rk.fit(p, x, sub1.iloc[:, 39]) pred_sph = m_rk.predict(z, x_y) #%% sph = pred_sph #%% #filter values sph[sph < 0.3] = 0 sph.resize(src.height, src.width) show(sph) #%% with rio.Env(): # Write an array as a raster band to a new 8-bit file. For # the new file's profile, we start with the profile of the source profile = src.profile