Exemplo n.º 1
0
def test_neighbors_regressors_zero_distance():
    # Test radius-based regressor, when distance to a sample is zero.

    X = np.array([[1.0, 1.0], [1.0, 1.0], [2.0, 2.0], [2.5, 2.5]])
    y = np.array([1.0, 1.5, 2.0, 0.0])
    radius = 0.2
    z = np.array([[1.1, 1.1], [2.0, 2.0]])

    rnn_correct_labels = np.array([1.25, 2.0])

    knn_correct_unif = np.array([1.25, 1.0])
    knn_correct_dist = np.array([1.25, 2.0])

    for algorithm in ALGORITHMS:
        # we don't test for weights=_weight_func since user will be expected
        # to handle zero distances themselves in the function.
        for weights in ['uniform', 'distance']:
            rnn = neighbors.RadiusNeighborsRegressor(radius=radius,
                                                     weights=weights,
                                                     algorithm=algorithm)
            rnn.fit(X, y)
            assert_array_almost_equal(rnn_correct_labels, rnn.predict(z))

        for weights, corr_labels in zip(['uniform', 'distance'],
                                        [knn_correct_unif, knn_correct_dist]):
            knn = neighbors.KNeighborsRegressor(n_neighbors=2,
                                                weights=weights,
                                                algorithm=algorithm)
            knn.fit(X, y)
            assert_array_almost_equal(corr_labels, knn.predict(z))
Exemplo n.º 2
0
def test_RadiusNeighborsRegressor_multioutput(n_samples=40,
                                              n_features=5,
                                              n_test_pts=10,
                                              n_neighbors=3,
                                              random_state=0):
    """Test k-neighbors in multi-output regression with various weight"""
    rng = np.random.RandomState(random_state)
    X = 2 * rng.rand(n_samples, n_features) - 1
    y = np.sqrt((X ** 2).sum(1))
    y /= y.max()
    y = np.vstack([y, y]).T

    y_target = y[:n_test_pts]
    weights = ['uniform', 'distance', _weight_func]

    for algorithm, weights in product(ALGORITHMS, weights):
        rnn = neighbors.RadiusNeighborsRegressor(n_neighbors=n_neighbors,
                                                 weights=weights,
                                                 algorithm=algorithm)
        rnn.fit(X, y)
        epsilon = 1E-5 * (2 * rng.rand(1, n_features) - 1)
        y_pred = rnn.predict(X[:n_test_pts] + epsilon)

        assert_equal(y_pred.shape, y_target.shape)
        assert_true(np.all(np.abs(y_pred - y_target) < 0.3))
Exemplo n.º 3
0
 def RNNPredict(self, rad = 1.0):
     """
         predict by the RNN model
         @param rad:the radius of the RNN
     """
     RNN_clf = neighbors.RadiusNeighborsRegressor(radius=rad)
     return RNN_clf
Exemplo n.º 4
0
def test_RadiusNeighborsRegressor_multioutput_with_uniform_weight():
    # Test radius neighbors in multi-output regression (uniform weight)

    rng = check_random_state(0)
    n_features = 5
    n_samples = 40
    n_output = 4

    X = rng.rand(n_samples, n_features)
    y = rng.rand(n_samples, n_output)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

    for algorithm, weights in product(ALGORITHMS, [None, 'uniform']):

        rnn = neighbors.RadiusNeighborsRegressor(weights=weights,
                                                 algorithm=algorithm)
        rnn.fit(X_train, y_train)

        neigh_idx = rnn.radius_neighbors(X_test, return_distance=False)
        y_pred_idx = np.array(
            [np.mean(y_train[idx], axis=0) for idx in neigh_idx])

        y_pred_idx = np.array(y_pred_idx)
        y_pred = rnn.predict(X_test)

        assert_equal(y_pred_idx.shape, y_test.shape)
        assert_equal(y_pred.shape, y_test.shape)
        assert_array_almost_equal(y_pred, y_pred_idx)
def test_radius_neighbors_regressor(n_samples=40,
                                    n_features=3,
                                    n_test_pts=10,
                                    radius=0.5,
                                    random_state=0):
    # Test radius-based neighbors regression
    rng = np.random.RandomState(random_state)
    X = 2 * rng.rand(n_samples, n_features) - 1
    y = np.sqrt((X**2).sum(1))
    y /= y.max()

    y_target = y[:n_test_pts]

    weight_func = _weight_func

    for algorithm in ALGORITHMS:
        for weights in ['uniform', 'distance', weight_func]:
            neigh = neighbors.RadiusNeighborsRegressor(radius=radius,
                                                       weights=weights,
                                                       algorithm=algorithm)
            neigh.fit(X, y)
            epsilon = 1E-5 * (2 * rng.rand(1, n_features) - 1)
            y_pred = neigh.predict(X[:n_test_pts] + epsilon)
            assert_true(np.all(abs(y_pred - y_target) < radius / 2))

    # test that nan is returned when no nearby observations
    for weights in ['uniform', 'distance']:
        neigh = neighbors.RadiusNeighborsRegressor(radius=radius,
                                                   weights=weights,
                                                   algorithm='auto')
        neigh.fit(X, y)
        X_test_nan = np.ones((1, n_features)) * -1
        empty_warning_msg = ("One or more samples have no neighbors "
                             "within specified radius; predicting NaN.")
        pred = assert_warns_message(UserWarning, empty_warning_msg,
                                    neigh.predict, X_test_nan)
        assert_true(np.all(np.isnan(pred)))
def run_Radius_Regression(train_data,
                          train_labels,
                          test_data,
                          test_labels,
                          radius=1.0,
                          weights='uniform',
                          algorithm='auto',
                          metric='minkowski'):
    print('Running {:f}-radius neighbors using the {:s} algorithm'.format(
        radius, algorithm))
    print('Weights - {:s}, Metric - {:s}'.format(weights, metric))
    rng = neighbors.RadiusNeighborsRegressor(radius=radius,
                                             weights=weights,
                                             algorithm=algorithm,
                                             metric=metric)
    rng.fit(train_data, train_labels)
    predicted_labels = rng.predict(test_data)

    results = compute_measure(predicted_labels, test_labels)
    print(
        'Error - MSE: {:4f}, Mean: {:4f}, Median: {:4f}, Max: {:4f}, Min: {:4f}'
        .format(*results))
    return results
Exemplo n.º 7
0
def test_radius_neighbors_regressor(n_samples=40,
                                    n_features=3,
                                    n_test_pts=10,
                                    radius=0.5,
                                    random_state=0):
    """Test radius-based neighbors regression"""
    rng = np.random.RandomState(random_state)
    X = 2 * rng.rand(n_samples, n_features) - 1
    y = np.sqrt((X ** 2).sum(1))
    y /= y.max()

    y_target = y[:n_test_pts]

    weight_func = _weight_func

    for algorithm in ALGORITHMS:
        for weights in ['uniform', 'distance', weight_func]:
            neigh = neighbors.RadiusNeighborsRegressor(radius=radius,
                                                       weights=weights,
                                                       algorithm=algorithm)
            neigh.fit(X, y)
            epsilon = 1E-5 * (2 * rng.rand(1, n_features) - 1)
            y_pred = neigh.predict(X[:n_test_pts] + epsilon)
            assert_true(np.all(abs(y_pred - y_target) < radius / 2))
Exemplo n.º 8
0
    def interpolate_raw_data_obj(self, raw_lons, raw_lats, raw_inv_obj,
                                 raw_inv_obj_mask, interpolation_strategy):
        # Static grid
        static_grid_dir = self.get_static_grid_path()
        static_lons = np.load(os.path.join(static_grid_dir, 'lons.npy'))
        static_lats = np.load(os.path.join(static_grid_dir, 'lats.npy'))
        static_mask = np.load(os.path.join(static_grid_dir,
                                           'mask.npy')).astype(np.bool)

        # Form mask for raw data from satellite to constrain it on static data
        lons_cut_mask = self.form_cut_mask_on_bounds(
            raw_lons,
            bounds=(static_lons[:, 0].min(), static_lons[:, -1].max()))
        lats_cut_mask = self.form_cut_mask_on_bounds(
            raw_lats, bounds=(static_lats[-1].min(), static_lats[0].max()))
        cut_mask = np.logical_and(lons_cut_mask, lats_cut_mask)

        # Constrain raw data to newly formed mask
        raw_lons = raw_lons[cut_mask]
        raw_lats = raw_lats[cut_mask]
        raw_inv_obj = raw_inv_obj[cut_mask]
        raw_inv_obj_mask = raw_inv_obj_mask[cut_mask]

        # Get from raw data only known points
        raw_lons_known = raw_lons[raw_inv_obj_mask]
        raw_lats_known = raw_lats[raw_inv_obj_mask]
        raw_lons_lats_known = np.c_[raw_lons_known, raw_lats_known]
        raw_inv_obj_known = raw_inv_obj[raw_inv_obj_mask]

        logger.info(f'Original investigated object statistics: \
            \nmin: {raw_inv_obj_known.min()}, \nmax: {raw_inv_obj_known.max()}, \
            \nmean: {raw_inv_obj_known.mean()}, \nmedian: {np.median(raw_inv_obj_known)}'
                    )

        if np.isfinite(self.investigated_obj__threshold):
            raw_inv_obj_known = np.clip(raw_inv_obj_known, 0,
                                        self.investigated_obj__threshold)
            logger.info(
                f'Original investigated object is clipped to: 0. - {self.investigated_obj__threshold}'
            )

        # Grid on which we will interpolate
        int_lons_lats = np.c_[static_lons.flatten(), static_lats.flatten()]
        int_inv_obj_mask = np.zeros(shape=(int_lons_lats.shape[0]),
                                    dtype=np.bool)

        # Defining in which radius to interpolate
        # It is actually euclidean metric, because 1 component equal 0
        # I do not consider diagonal points, because according to a + b < c, they are higher
        min_grid_distance_lon = abs(static_lons[0][0] - static_lons[0][1])
        min_grid_distance_lat = abs(static_lats[0][0] - static_lats[1][0])
        min_grid_distance = iutils.floor_float(
            np.min([min_grid_distance_lat, min_grid_distance_lon]))

        # Select points to be interpolated that lie in min grid distance radius from raw data
        tree = neighbors.KDTree(raw_lons_lats_known, leaf_size=2)
        for i, int_lon_lat in enumerate(int_lons_lats):
            # If near static node there are raw nodes => we use such static node
            if tree.query_radius(int_lon_lat.reshape(1, -1),
                                 r=min_grid_distance,
                                 count_only=True)[0] > 0:
                int_inv_obj_mask[i] = True

        int_inv_obj_mask = int_inv_obj_mask.reshape(static_lons.shape)

        # Interpolate filtered nodes, find value based on raw data
        # knr = neighbors.KNeighborsRegressor(n_neighbors=3, weights='distance')
        if interpolation_strategy == 'radius':
            knr = neighbors.RadiusNeighborsRegressor(
                radius=min_grid_distance * 5., weights='distance')  # ~ 5 km
        elif interpolation_strategy == 'neighbours':
            knr = neighbors.KNeighborsRegressor(n_neighbors=3,
                                                weights='distance')
        else:
            raise NotImplementedError()

        knr.fit(raw_lons_lats_known, raw_inv_obj_known)

        # static_mask - shape of the lake, int_inv_obj_mask - points where we can interpolate
        int_inv_obj_mask = np.logical_and(static_mask, int_inv_obj_mask)
        int_lons_lats_known = int_lons_lats[int_inv_obj_mask.flatten()]
        int_inv_obj_known = knr.predict(int_lons_lats_known)

        # HACK: Tp be safe that indeed borders are correct
        int_inv_obj_known = np.clip(int_inv_obj_known, 0,
                                    self.investigated_obj__threshold)
        assert int_inv_obj_known.min() >= 0 \
            and int_inv_obj_known.max() <= self.investigated_obj__threshold

        # Reconstruct int_inv_obj
        int_inv_obj = np.full(static_lons.shape, np.nan)
        int_inv_obj[int_inv_obj_mask] = int_inv_obj_known

        return int_inv_obj
Exemplo n.º 9
0
    clarity_tr_prob_lsvm = np.zeros(tr_len)
    clarity_val_prob_lsvm = np.zeros(val_len)
    clarity_tr_prob_nb = np.zeros(tr_len)
    clarity_val_prob_nb = np.zeros(val_len)
    clarity_tr_prob_mlp = np.zeros(tr_len)
    clarity_val_prob_mlp = np.zeros(val_len)
    clarity_tr_prob_rnr = np.zeros(tr_len)
    clarity_val_prob_rnr = np.zeros(val_len)
    clarity_tr_prob_lasso = np.zeros(tr_len)
    clarity_val_prob_lasso = np.zeros(val_len)

    knn = neighbors.KNeighborsClassifier(n_neighbors=7, weights='distance')
    lsvm = linear_model.SGDClassifier(loss='log', n_jobs=4)
    nb = GaussianNB()
    mlp = MLPClassifier(alpha=0.001, learning_rate='invscaling')
    rnr = neighbors.RadiusNeighborsRegressor()
    lasso = linear_model.Lasso(alpha=0.1)

    print("ensemble model with numeric feature input for clarity")

    print("ensemble xgb c")
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=151)
    for train_index, test_index in skf.split(tr_df_clarity_c, tr_clarity):
        X_train, X_test = tr_df_clarity_c[train_index], tr_df_clarity_c[
            test_index]
        y_train, y_test = tr_clarity[train_index], tr_clarity[test_index]

        dtrain_clarity = xgb.DMatrix(X_train, label=y_train)
        dtrain_other_clarity = xgb.DMatrix(X_test, label=y_test)

        bst_clarity_train = xgb.train(param_clarity_c, dtrain_clarity, 250)
Exemplo n.º 10
0
for val in range(0, len(tdf)):
    tlist.append([tdf['Lat'][val], tdf['Long'][val]])

coordlist = np.asarray(tlist)
siglist = np.asarray(tdf['SinaldBm'])

p = []
a = []
r = []

for d in raios:
    prevsinal = []
    erroabs = []
    errorel = []
    rnn = neighbors.RadiusNeighborsRegressor(radius=d,
                                             weights='distance',
                                             metric=geodist)
    yf = rnn.fit(coordlist, siglist)
    prevsinal = []
    for val in range(0, len(vdf)):
        pred = yf.predict([[vdf['Lat'][val], vdf['Long'][val]]])
        prevsinal.append(pred[0])
        erroabs.append(abs(pred[0] - vdf['SinaldBm'][val]))
        errorel.append((pred[0] - vdf['SinaldBm'][val]) / vdf['SinaldBm'][val])
    p.append(prevsinal)
    a.append(erroabs)
    r.append(errorel)
    print("Erro mádio quadrático de d=" + str(d) +
          ": %.2f" % mean_squared_error(vdf['SinaldBm'], prevsinal))
    print('Escore de Variância: %.2f' % r2_score(vdf['SinaldBm'], prevsinal))
    print(prevsinal)
            datetime.datetime(2020, 11, 10, 00, 10)
        ]
    })
    ten_min_distance['ar_ct'] = ten_min_distance['ar_ct'].astype(int)
    ten_min_distance = ten_min_distance.to_numpy()
    ten_min_distance = scaler.transform(ten_min_distance)[:, 2]
    ten_min_distance = ten_min_distance[1] - ten_min_distance[0]

    X = data[:-3000, :3]
    y = labels[:-3000]

    T = data[-3000:, :3]
    Ty = labels[-3000:]
    print(Ty.max())

    knn = neighbors.RadiusNeighborsRegressor(ten_min_distance,
                                             weights='uniform')
    print('fitting')
    y_ = knn.fit(X, y).predict(T)

    plt.scatter(Ty, y_, color='red', label='prediction')
    Ty = Ty[~np.isnan(y_)]
    y_ = y_[~np.isnan(y_)]
    # Create linear regression object
    regr = linear_model.LinearRegression()

    # Train the model using the training sets
    regr.fit(Ty.reshape(-1, 1), y_)
    reg_y = regr.predict(Ty.reshape(-1, 1))
    plt.plot(Ty, reg_y, color='blue', linewidth=3)

    # plt.axis('tight')
Exemplo n.º 12
0
    return res

def mean_absolute_percentage_error(y_true, y_pred): 
    return np.mean(np.abs(percentage_error(np.asarray(y_true), np.asarray(y_pred)))) * 100
    
path = r'C:\Users\Sergio\Documents\LebronPrediction\dataset'

df = pd.read_csv(path + "\\LJPredictionNew.csv",sep=";")
df.head()

X = pd.DataFrame(df['Games'])
y = pd.DataFrame(df['Points']).astype(int)

X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.2, random_state=12)

X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.3, random_state=123)

# ajuste del 1º modelo de regresion

for n_radius in range(5, 10):

    for i, weights in enumerate(['uniform', 'distance']):
        knn = neighbors.RadiusNeighborsRegressor(radius=float(n_radius), weights=weights)
        knn.fit(X_train, Y_train)

        Y_pred = knn.predict(X_test).ravel()

        print("\n", "The scores for",float(n_radius), "radius, and weight", weights)
        print("The mean absolute error is", mean_absolute_error(Y_test, Y_pred))
        print("The mean absolute percentage error is", mean_absolute_percentage_error(Y_test, Y_pred))
        print("The R^2 score is", r2_score(Y_test, Y_pred))
Exemplo n.º 13
0
from sklearn import cross_validation
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn import neighbors
from sklearn.naive_bayes import GaussianNB

from sklearn.pipeline import make_pipeline

from sklearn.model_selection import train_test_split

sfile = '/data2/mrs493/my_data2.csv'
df = pd.read_csv(sfile, sep=',')

KNR = neighbors.KNeighborsRegressor()
RNR = neighbors.RadiusNeighborsRegressor()
RFR = RandomForestRegressor()
GNB = GaussianNB()

pipeline = make_pipeline(RFR)
train, test = train_test_split(df, test_size=0.2)

colour_train = sp.reshape(train.colour.tolist(), (-1, 1))
colour_test = sp.reshape(test.colour.tolist(), (-1, 1))

temp_train = train.teff.tolist()
temp_test = test.teff.tolist()

pipeline.fit(colour_train, temp_train)
#fit the model the the current training set