def test_neighbors_regressors_zero_distance(): # Test radius-based regressor, when distance to a sample is zero. X = np.array([[1.0, 1.0], [1.0, 1.0], [2.0, 2.0], [2.5, 2.5]]) y = np.array([1.0, 1.5, 2.0, 0.0]) radius = 0.2 z = np.array([[1.1, 1.1], [2.0, 2.0]]) rnn_correct_labels = np.array([1.25, 2.0]) knn_correct_unif = np.array([1.25, 1.0]) knn_correct_dist = np.array([1.25, 2.0]) for algorithm in ALGORITHMS: # we don't test for weights=_weight_func since user will be expected # to handle zero distances themselves in the function. for weights in ['uniform', 'distance']: rnn = neighbors.RadiusNeighborsRegressor(radius=radius, weights=weights, algorithm=algorithm) rnn.fit(X, y) assert_array_almost_equal(rnn_correct_labels, rnn.predict(z)) for weights, corr_labels in zip(['uniform', 'distance'], [knn_correct_unif, knn_correct_dist]): knn = neighbors.KNeighborsRegressor(n_neighbors=2, weights=weights, algorithm=algorithm) knn.fit(X, y) assert_array_almost_equal(corr_labels, knn.predict(z))
def test_RadiusNeighborsRegressor_multioutput(n_samples=40, n_features=5, n_test_pts=10, n_neighbors=3, random_state=0): """Test k-neighbors in multi-output regression with various weight""" rng = np.random.RandomState(random_state) X = 2 * rng.rand(n_samples, n_features) - 1 y = np.sqrt((X ** 2).sum(1)) y /= y.max() y = np.vstack([y, y]).T y_target = y[:n_test_pts] weights = ['uniform', 'distance', _weight_func] for algorithm, weights in product(ALGORITHMS, weights): rnn = neighbors.RadiusNeighborsRegressor(n_neighbors=n_neighbors, weights=weights, algorithm=algorithm) rnn.fit(X, y) epsilon = 1E-5 * (2 * rng.rand(1, n_features) - 1) y_pred = rnn.predict(X[:n_test_pts] + epsilon) assert_equal(y_pred.shape, y_target.shape) assert_true(np.all(np.abs(y_pred - y_target) < 0.3))
def RNNPredict(self, rad = 1.0): """ predict by the RNN model @param rad:the radius of the RNN """ RNN_clf = neighbors.RadiusNeighborsRegressor(radius=rad) return RNN_clf
def test_RadiusNeighborsRegressor_multioutput_with_uniform_weight(): # Test radius neighbors in multi-output regression (uniform weight) rng = check_random_state(0) n_features = 5 n_samples = 40 n_output = 4 X = rng.rand(n_samples, n_features) y = rng.rand(n_samples, n_output) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) for algorithm, weights in product(ALGORITHMS, [None, 'uniform']): rnn = neighbors.RadiusNeighborsRegressor(weights=weights, algorithm=algorithm) rnn.fit(X_train, y_train) neigh_idx = rnn.radius_neighbors(X_test, return_distance=False) y_pred_idx = np.array( [np.mean(y_train[idx], axis=0) for idx in neigh_idx]) y_pred_idx = np.array(y_pred_idx) y_pred = rnn.predict(X_test) assert_equal(y_pred_idx.shape, y_test.shape) assert_equal(y_pred.shape, y_test.shape) assert_array_almost_equal(y_pred, y_pred_idx)
def test_radius_neighbors_regressor(n_samples=40, n_features=3, n_test_pts=10, radius=0.5, random_state=0): # Test radius-based neighbors regression rng = np.random.RandomState(random_state) X = 2 * rng.rand(n_samples, n_features) - 1 y = np.sqrt((X**2).sum(1)) y /= y.max() y_target = y[:n_test_pts] weight_func = _weight_func for algorithm in ALGORITHMS: for weights in ['uniform', 'distance', weight_func]: neigh = neighbors.RadiusNeighborsRegressor(radius=radius, weights=weights, algorithm=algorithm) neigh.fit(X, y) epsilon = 1E-5 * (2 * rng.rand(1, n_features) - 1) y_pred = neigh.predict(X[:n_test_pts] + epsilon) assert_true(np.all(abs(y_pred - y_target) < radius / 2)) # test that nan is returned when no nearby observations for weights in ['uniform', 'distance']: neigh = neighbors.RadiusNeighborsRegressor(radius=radius, weights=weights, algorithm='auto') neigh.fit(X, y) X_test_nan = np.ones((1, n_features)) * -1 empty_warning_msg = ("One or more samples have no neighbors " "within specified radius; predicting NaN.") pred = assert_warns_message(UserWarning, empty_warning_msg, neigh.predict, X_test_nan) assert_true(np.all(np.isnan(pred)))
def run_Radius_Regression(train_data, train_labels, test_data, test_labels, radius=1.0, weights='uniform', algorithm='auto', metric='minkowski'): print('Running {:f}-radius neighbors using the {:s} algorithm'.format( radius, algorithm)) print('Weights - {:s}, Metric - {:s}'.format(weights, metric)) rng = neighbors.RadiusNeighborsRegressor(radius=radius, weights=weights, algorithm=algorithm, metric=metric) rng.fit(train_data, train_labels) predicted_labels = rng.predict(test_data) results = compute_measure(predicted_labels, test_labels) print( 'Error - MSE: {:4f}, Mean: {:4f}, Median: {:4f}, Max: {:4f}, Min: {:4f}' .format(*results)) return results
def test_radius_neighbors_regressor(n_samples=40, n_features=3, n_test_pts=10, radius=0.5, random_state=0): """Test radius-based neighbors regression""" rng = np.random.RandomState(random_state) X = 2 * rng.rand(n_samples, n_features) - 1 y = np.sqrt((X ** 2).sum(1)) y /= y.max() y_target = y[:n_test_pts] weight_func = _weight_func for algorithm in ALGORITHMS: for weights in ['uniform', 'distance', weight_func]: neigh = neighbors.RadiusNeighborsRegressor(radius=radius, weights=weights, algorithm=algorithm) neigh.fit(X, y) epsilon = 1E-5 * (2 * rng.rand(1, n_features) - 1) y_pred = neigh.predict(X[:n_test_pts] + epsilon) assert_true(np.all(abs(y_pred - y_target) < radius / 2))
def interpolate_raw_data_obj(self, raw_lons, raw_lats, raw_inv_obj, raw_inv_obj_mask, interpolation_strategy): # Static grid static_grid_dir = self.get_static_grid_path() static_lons = np.load(os.path.join(static_grid_dir, 'lons.npy')) static_lats = np.load(os.path.join(static_grid_dir, 'lats.npy')) static_mask = np.load(os.path.join(static_grid_dir, 'mask.npy')).astype(np.bool) # Form mask for raw data from satellite to constrain it on static data lons_cut_mask = self.form_cut_mask_on_bounds( raw_lons, bounds=(static_lons[:, 0].min(), static_lons[:, -1].max())) lats_cut_mask = self.form_cut_mask_on_bounds( raw_lats, bounds=(static_lats[-1].min(), static_lats[0].max())) cut_mask = np.logical_and(lons_cut_mask, lats_cut_mask) # Constrain raw data to newly formed mask raw_lons = raw_lons[cut_mask] raw_lats = raw_lats[cut_mask] raw_inv_obj = raw_inv_obj[cut_mask] raw_inv_obj_mask = raw_inv_obj_mask[cut_mask] # Get from raw data only known points raw_lons_known = raw_lons[raw_inv_obj_mask] raw_lats_known = raw_lats[raw_inv_obj_mask] raw_lons_lats_known = np.c_[raw_lons_known, raw_lats_known] raw_inv_obj_known = raw_inv_obj[raw_inv_obj_mask] logger.info(f'Original investigated object statistics: \ \nmin: {raw_inv_obj_known.min()}, \nmax: {raw_inv_obj_known.max()}, \ \nmean: {raw_inv_obj_known.mean()}, \nmedian: {np.median(raw_inv_obj_known)}' ) if np.isfinite(self.investigated_obj__threshold): raw_inv_obj_known = np.clip(raw_inv_obj_known, 0, self.investigated_obj__threshold) logger.info( f'Original investigated object is clipped to: 0. - {self.investigated_obj__threshold}' ) # Grid on which we will interpolate int_lons_lats = np.c_[static_lons.flatten(), static_lats.flatten()] int_inv_obj_mask = np.zeros(shape=(int_lons_lats.shape[0]), dtype=np.bool) # Defining in which radius to interpolate # It is actually euclidean metric, because 1 component equal 0 # I do not consider diagonal points, because according to a + b < c, they are higher min_grid_distance_lon = abs(static_lons[0][0] - static_lons[0][1]) min_grid_distance_lat = abs(static_lats[0][0] - static_lats[1][0]) min_grid_distance = iutils.floor_float( np.min([min_grid_distance_lat, min_grid_distance_lon])) # Select points to be interpolated that lie in min grid distance radius from raw data tree = neighbors.KDTree(raw_lons_lats_known, leaf_size=2) for i, int_lon_lat in enumerate(int_lons_lats): # If near static node there are raw nodes => we use such static node if tree.query_radius(int_lon_lat.reshape(1, -1), r=min_grid_distance, count_only=True)[0] > 0: int_inv_obj_mask[i] = True int_inv_obj_mask = int_inv_obj_mask.reshape(static_lons.shape) # Interpolate filtered nodes, find value based on raw data # knr = neighbors.KNeighborsRegressor(n_neighbors=3, weights='distance') if interpolation_strategy == 'radius': knr = neighbors.RadiusNeighborsRegressor( radius=min_grid_distance * 5., weights='distance') # ~ 5 km elif interpolation_strategy == 'neighbours': knr = neighbors.KNeighborsRegressor(n_neighbors=3, weights='distance') else: raise NotImplementedError() knr.fit(raw_lons_lats_known, raw_inv_obj_known) # static_mask - shape of the lake, int_inv_obj_mask - points where we can interpolate int_inv_obj_mask = np.logical_and(static_mask, int_inv_obj_mask) int_lons_lats_known = int_lons_lats[int_inv_obj_mask.flatten()] int_inv_obj_known = knr.predict(int_lons_lats_known) # HACK: Tp be safe that indeed borders are correct int_inv_obj_known = np.clip(int_inv_obj_known, 0, self.investigated_obj__threshold) assert int_inv_obj_known.min() >= 0 \ and int_inv_obj_known.max() <= self.investigated_obj__threshold # Reconstruct int_inv_obj int_inv_obj = np.full(static_lons.shape, np.nan) int_inv_obj[int_inv_obj_mask] = int_inv_obj_known return int_inv_obj
clarity_tr_prob_lsvm = np.zeros(tr_len) clarity_val_prob_lsvm = np.zeros(val_len) clarity_tr_prob_nb = np.zeros(tr_len) clarity_val_prob_nb = np.zeros(val_len) clarity_tr_prob_mlp = np.zeros(tr_len) clarity_val_prob_mlp = np.zeros(val_len) clarity_tr_prob_rnr = np.zeros(tr_len) clarity_val_prob_rnr = np.zeros(val_len) clarity_tr_prob_lasso = np.zeros(tr_len) clarity_val_prob_lasso = np.zeros(val_len) knn = neighbors.KNeighborsClassifier(n_neighbors=7, weights='distance') lsvm = linear_model.SGDClassifier(loss='log', n_jobs=4) nb = GaussianNB() mlp = MLPClassifier(alpha=0.001, learning_rate='invscaling') rnr = neighbors.RadiusNeighborsRegressor() lasso = linear_model.Lasso(alpha=0.1) print("ensemble model with numeric feature input for clarity") print("ensemble xgb c") skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=151) for train_index, test_index in skf.split(tr_df_clarity_c, tr_clarity): X_train, X_test = tr_df_clarity_c[train_index], tr_df_clarity_c[ test_index] y_train, y_test = tr_clarity[train_index], tr_clarity[test_index] dtrain_clarity = xgb.DMatrix(X_train, label=y_train) dtrain_other_clarity = xgb.DMatrix(X_test, label=y_test) bst_clarity_train = xgb.train(param_clarity_c, dtrain_clarity, 250)
for val in range(0, len(tdf)): tlist.append([tdf['Lat'][val], tdf['Long'][val]]) coordlist = np.asarray(tlist) siglist = np.asarray(tdf['SinaldBm']) p = [] a = [] r = [] for d in raios: prevsinal = [] erroabs = [] errorel = [] rnn = neighbors.RadiusNeighborsRegressor(radius=d, weights='distance', metric=geodist) yf = rnn.fit(coordlist, siglist) prevsinal = [] for val in range(0, len(vdf)): pred = yf.predict([[vdf['Lat'][val], vdf['Long'][val]]]) prevsinal.append(pred[0]) erroabs.append(abs(pred[0] - vdf['SinaldBm'][val])) errorel.append((pred[0] - vdf['SinaldBm'][val]) / vdf['SinaldBm'][val]) p.append(prevsinal) a.append(erroabs) r.append(errorel) print("Erro mádio quadrático de d=" + str(d) + ": %.2f" % mean_squared_error(vdf['SinaldBm'], prevsinal)) print('Escore de Variância: %.2f' % r2_score(vdf['SinaldBm'], prevsinal)) print(prevsinal)
datetime.datetime(2020, 11, 10, 00, 10) ] }) ten_min_distance['ar_ct'] = ten_min_distance['ar_ct'].astype(int) ten_min_distance = ten_min_distance.to_numpy() ten_min_distance = scaler.transform(ten_min_distance)[:, 2] ten_min_distance = ten_min_distance[1] - ten_min_distance[0] X = data[:-3000, :3] y = labels[:-3000] T = data[-3000:, :3] Ty = labels[-3000:] print(Ty.max()) knn = neighbors.RadiusNeighborsRegressor(ten_min_distance, weights='uniform') print('fitting') y_ = knn.fit(X, y).predict(T) plt.scatter(Ty, y_, color='red', label='prediction') Ty = Ty[~np.isnan(y_)] y_ = y_[~np.isnan(y_)] # Create linear regression object regr = linear_model.LinearRegression() # Train the model using the training sets regr.fit(Ty.reshape(-1, 1), y_) reg_y = regr.predict(Ty.reshape(-1, 1)) plt.plot(Ty, reg_y, color='blue', linewidth=3) # plt.axis('tight')
return res def mean_absolute_percentage_error(y_true, y_pred): return np.mean(np.abs(percentage_error(np.asarray(y_true), np.asarray(y_pred)))) * 100 path = r'C:\Users\Sergio\Documents\LebronPrediction\dataset' df = pd.read_csv(path + "\\LJPredictionNew.csv",sep=";") df.head() X = pd.DataFrame(df['Games']) y = pd.DataFrame(df['Points']).astype(int) X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.2, random_state=12) X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.3, random_state=123) # ajuste del 1º modelo de regresion for n_radius in range(5, 10): for i, weights in enumerate(['uniform', 'distance']): knn = neighbors.RadiusNeighborsRegressor(radius=float(n_radius), weights=weights) knn.fit(X_train, Y_train) Y_pred = knn.predict(X_test).ravel() print("\n", "The scores for",float(n_radius), "radius, and weight", weights) print("The mean absolute error is", mean_absolute_error(Y_test, Y_pred)) print("The mean absolute percentage error is", mean_absolute_percentage_error(Y_test, Y_pred)) print("The R^2 score is", r2_score(Y_test, Y_pred))
from sklearn import cross_validation from sklearn.ensemble import RandomForestClassifier from sklearn.ensemble import RandomForestRegressor from sklearn import neighbors from sklearn.naive_bayes import GaussianNB from sklearn.pipeline import make_pipeline from sklearn.model_selection import train_test_split sfile = '/data2/mrs493/my_data2.csv' df = pd.read_csv(sfile, sep=',') KNR = neighbors.KNeighborsRegressor() RNR = neighbors.RadiusNeighborsRegressor() RFR = RandomForestRegressor() GNB = GaussianNB() pipeline = make_pipeline(RFR) train, test = train_test_split(df, test_size=0.2) colour_train = sp.reshape(train.colour.tolist(), (-1, 1)) colour_test = sp.reshape(test.colour.tolist(), (-1, 1)) temp_train = train.teff.tolist() temp_test = test.teff.tolist() pipeline.fit(colour_train, temp_train) #fit the model the the current training set