Exemplo n.º 1
0
def ransac_linear_regression(X, y, draw=False):
    from sklearn.linear_model import RANSACRegressor
    from sklearn.linear_model import LinearRegression

    try:
        reg = RANSACRegressor(random_state=0).fit(X, y)
    except ValueError:
        try:
            reg = LinearRegression().fit(X, y)
        except:
            return None
    prediction = reg.predict
    if draw:
        plt.xlim([0, 1920])
        plt.ylim([0, 1000])
        plt.scatter(X, y, color='yellowgreen', marker='.', label='Inliers')
        line_y = prediction(X)
        plt.plot(X, line_y, color='navy', label='Linear regressor')
        plt.show()
    if reg.score(X, y) > 0 or np.sum(reg.inlier_mask_) >= 0.8 * reg.inlier_mask_.shape[0]:
        return prediction
    else:
        logger.debug("Regression score was too low ({}) and only {}% inliers, not accepting result."
                    .format(round(reg.score(X, y), 2),
                            100 * round(np.sum(reg.inlier_mask_) / reg.inlier_mask_.shape[0], 2)))
Exemplo n.º 2
0
def RANSAC_Regressor():
    lr = LinearRegression(normalize=True)
    lr.fit(X_train, y_train)
    rs = RANSACRegressor(lr)
    rs.fit(X, y)
    rs.score(X, y)
    rs.estimator_.intercept_
    rs.estimator_.coef_

    rs_score = rs.score(X, y)
    rs_score = cross_val_score(rs, X, y, cv=16, scoring='r2')
    print("RANSAC_Regressor : ", rs_score.mean())
Exemplo n.º 3
0
def test_ransac_score():
    X = np.arange(100)[:, None]
    y = np.zeros((100,))
    y[0] = 1
    y[1] = 100

    base_estimator = LinearRegression()
    ransac_estimator = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=0.5, random_state=0)
    ransac_estimator.fit(X, y)

    assert_equal(ransac_estimator.score(X[2:], y[2:]), 1)
    assert_less(ransac_estimator.score(X[:2], y[:2]), 1)
Exemplo n.º 4
0
def test_ransac_score():
    X = np.arange(100)[:, None]
    y = np.zeros((100, ))
    y[0] = 1
    y[1] = 100

    base_estimator = LinearRegression()
    ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
                                       residual_threshold=0.5, random_state=0)
    ransac_estimator.fit(X, y)

    assert_equal(ransac_estimator.score(X[2:], y[2:]), 1)
    assert_less(ransac_estimator.score(X[:2], y[:2]), 1)
Exemplo n.º 5
0
def test_only_fit_contains_sample_weight():
    mlflow.sklearn.autolog()

    from sklearn.linear_model import RANSACRegressor

    assert "sample_weight" in _get_arg_names(RANSACRegressor.fit)
    assert "sample_weight" not in _get_arg_names(RANSACRegressor.score)

    mock_obj = mock.Mock()

    def mock_score(self, X, y):  # pylint: disable=unused-argument
        mock_obj(X, y)
        return 0

    assert inspect.signature(
        RANSACRegressor.score) == inspect.signature(mock_score)

    RANSACRegressor.score = mock_score
    model = RANSACRegressor()
    X, y = get_iris()

    with mlflow.start_run() as run:
        model.fit(X, y)
        mock_obj.assert_called_once_with(X, y)

    run_id = run.info.run_id
    params, metrics, tags, artifacts = get_run_data(run_id)
    assert params == truncate_dict(
        stringify_dict_values(model.get_params(deep=True)))
    assert {TRAINING_SCORE: model.score(X, y)}.items() <= metrics.items()
    assert tags == get_expected_class_tags(model)
    assert MODEL_DIR in artifacts
    assert_predict_equal(load_model_by_run_id(run_id), model, X)
def make_forecast(local_array, local_mf_forecast_horizon_days,
                  local_days_in_focus_frame):
    local_forecast = []
    # simple normalization
    days = np.array([day for day in range(local_days_in_focus_frame)])
    days = np.divide(days, np.amax(days))
    x_y_data = np.zeros(shape=(days.shape[0], 2), dtype=np.dtype('float32'))
    x_y_data[:, 0] = days
    for local_time_serie in range(local_array.shape[0]):
        x_y_data[:, 1] = local_array[local_time_serie, :]
        x = x_y_data[:, 0].reshape(-1, 1)
        y = x_y_data[:, 1].reshape(-1, )
        y_max = np.amax(y)
        y = np.divide(y, y_max * (y_max != 0) + 1 * (y_max == 0))
        regression = RANSACRegressor(base_estimator=ARDRegression(),
                                     min_samples=29,
                                     max_trials=2000,
                                     random_state=0,
                                     loss='squared_loss',
                                     residual_threshold=2.0).fit(x, y)
        score = regression.score(x, y)
        print('time_serie, score of RANdom SAmple Consensus algorithm',
              local_time_serie, score)
        forecast_days = np.add(days, local_mf_forecast_horizon_days
                               )[-local_mf_forecast_horizon_days:].reshape(
                                   -1, 1)
        local_forecast_ts = regression.predict(forecast_days)
        local_forecast.append(local_forecast_ts)
    local_forecast = np.array(local_forecast)
    # simple denormalization
    local_array_max = np.amax(local_array, axis=1)
    local_forecast = np.multiply(
        local_forecast, local_array_max.reshape(local_array_max.shape[0], 1))
    print('local_forecast shape:', local_forecast.shape)
    return local_forecast
Exemplo n.º 7
0
 def fnRANSACRegressor(self, year, avgTemp, predictYear):
     feature_train, feature_test, target_train, target_test = train_test_split(
         year, avgTemp, test_size=0.1, random_state=42)
     rr = RANSACRegressor()
     rr.fit(feature_train[:, np.newaxis], target_train)
     return (rr.score(feature_test[:, np.newaxis],
                      target_test), rr.predict(predictYear))
Exemplo n.º 8
0
    def _cfunc_ransac(x, y):
        """
        Get random sample consensus (RANSAC) regression score for data set.

        Args:
            x: (list<float>) independent property (x-axis)
            y: (list<float>) dependent property (y-axis)

        Returns: (float) RANSAC score

        """
        from sklearn.linear_model import RANSACRegressor
        r = RANSACRegressor(random_state=21)
        x_coeff = np.array(x)[:, np.newaxis]
        r.fit(x_coeff, y)
        return r.score(x_coeff, y)
Exemplo n.º 9
0
    def _align_ransac_inner(self, sp, mzs, ints):
        hits = join_by_mz(
            self.target_spectrum,
            'mz',
            pd.DataFrame({
                'sample_mz': mzs,
                'sample_ints': ints
            }),
            'sample_mz',
            self.analyzer,
            self.align_sigma_1,
        )
        if len(hits) > 10:
            ints = hits.sample_ints * np.median(hits.ints / hits.sample_ints)
            ints_accuracy = 0.5 - (ints / (ints + 1))

            hits['weight'] = np.log(hits.sample_ints) * ints_accuracy
            hits = hits.sort_values('weight',
                                    ascending=False,
                                    ignore_index=True).iloc[:100]
            X = hits.sample_mz.values.reshape(-1, 1)
            y = hits.mz.values
            bins = np.histogram_bin_edges(X, 2)
            threshold = peak_width(X[:, 0], self.analyzer, self.jitter_sigma_1)
            ransac = RANSACRegressor(
                # max_trials=10000,
                min_samples=max(0.1, 3 / len(X)),
                residual_threshold=threshold,
                # Require subsets include values from both the higher and lower end of the mass range
                is_data_valid=lambda X_subset, y_subset: np.histogram(
                    X_subset, bins)[0].all(),
                loss='absolute_loss',
                stop_probability=1,
            )
            ransac.fit(X, y)
            return {
                'sp': sp,
                'M': ransac.estimator_.coef_[0],
                'C': ransac.estimator_.intercept_,
                'score': ransac.score(X, y),
                'inliers': np.count_nonzero(ransac.inlier_mask_),
                'align_peaks': len(hits),
                'align_min': hits.mz.min(),
                'align_max': hits.mz.max(),
            }
        else:
            return {'sp': sp, 'M': 1, 'C': 0, 'score': 0}
    def run(self, trainingDasaset, plotting):
        dataset = trainingDasaset
        accuracy = 0
        y = dataset['int_rate']
        X = dataset.drop(columns=[
            'int_rate',
        ])
        if plotting == True:
            X_train, X_test, y_train, y_test = train_test_split(X,
                                                                y,
                                                                test_size=0.2,
                                                                random_state=1)
            clf = RANSACRegressor(random_state=42)
            #clf=self.gridSearch(clf,X_train, y_train)
            clf.fit(X_train, y_train)
            print(
                "###################################RANSACRegressor#############################"
            )
            accuracy = clf.score(X_test, y_test)
            #pred = clf.predict(X_test)
            #accuracy = np.sqrt(metrics.mean_squared_error(y_test,pred))
            print("score:" + str(accuracy))

        else:
            clf = RANSACRegressor(random_state=42)
            #clf=self.gridSearch(clf,X_train, y_train)
            clf.fit(X, y)
            testData = pd.read_csv(
                "./SiameseNeuralNetworkProject/MachineLearningAlgorithmSuite/CleanedData/SiameseTrainingData.csv"
            )
            predictions = clf.predict(testData)
            np.savetxt(
                "./SiameseNeuralNetworkProject/MachineLearningAlgorithmSuite/OutputFiles/RANSACRegressorPredictions.csv",
                predictions,
                delimiter=",")

            testData = pd.read_csv(
                "./SiameseNeuralNetworkProject/MachineLearningAlgorithmSuite/CleanedData/OverallTestingData.csv"
            )
            predictions = clf.predict(testData)
            np.savetxt(
                "./SiameseNeuralNetworkProject/MachineLearningAlgorithmSuite/OutputFiles/RANSACRegressorPredictionsTestData.csv",
                predictions,
                delimiter=",")

        return accuracy
Exemplo n.º 11
0
def model_linear3(frame):
    # """
    # ML model applying Linear Regression to all features using RANSAC.
    # The outliers influence significantly linear regression, RANSAC will
    # select only inliers when fitting the model.

    # input: Pandas dataframe to use in modelling
    # output: (score, error, execution time)

    # """
    print("\n\n************************************************************")
    print("MODEL Linear regression ransac")

    y = frame.iloc[:, -1]  #target
    x = frame.iloc[:, :-7]
    print("features=", x.columns)

    #split train and test set
    validation_size = 0.3
    X_train, X_test, y_train, y_test = train_test_split(
        x, y, test_size=validation_size)

    ransac = RANSACRegressor(LinearRegression(),
                             max_trials=100,
                             min_samples=50,
                             loss='absolute_loss',
                             residual_threshold=5.0,
                             random_state=0)
    t0 = time.time()
    ransac.fit(X_train, y_train)
    execution = time.time() - t0
    y_pred = ransac.predict(X_test)
    l_reg = ransac.score(X_test, y_test)

    #show results of regression model
    print("score linear ransac: %.4f" % l_reg)
    regression_error = mean_absolute_error(y_test, y_pred)
    print("regression_error: %.4f" % regression_error)
    return l_reg, regression_error, execution
- Adjusted R²
"""

print('R² linear reg.: %.2f' % lin.score(X, Y))  #R²
print('MSE linear reg.: %.2f' % mean_squared_error(y_test, y_predlin))  # MSE

print('R²: %.2f' % ridge.score(X, Y))  #R²
print('MSE: %.2f' % mean_squared_error(y_test, y_ridge))  # MSE

print(lasso.score(X, Y))  #R²
print('MSE: %.2f' % mean_squared_error(y_test, y_lasso))  # MSE

print('R²: %.2f' % elast.score(X, Y))  #R²
print('MSE: %.2f' % mean_squared_error(y_test, y_predelast))

print('R²: %.2f' % ransac.score(X, Y))  #R²
print('MSE: %.2f' % mean_squared_error(y_test, y_predransac))

print('R²: %.2f' % ts.score(X, Y))  #R²
print('MSE: %.2f' % mean_squared_error(y_test, y_predts))

print('R²: %.2f' % huber.score(X, Y))  #R²
print('MSE: %.2f' % mean_squared_error(y_test, y_predhuber))
"""# Classification models"""

# 5.2.1 Logistic Regression
log = LogisticRegression(random_state=0,
                         solver='lbfgs',
                         multi_class='multinomial')
pred_log = log.fit(X_train, y_train).predict(
    X_test
Exemplo n.º 13
0
    def process_scan(self):

        # if there are no laser scans bail out
        if self.current_message is None:
            rospy.logwarn('no lidar scans received')
            return

        # to hold message to be published
        ld = lidar_data()

        # get last message
        msg = self.current_message

        # copy range readings from message to numpy array
        range_readings = np.array(msg.ranges)

        # get angle ranges from message
        angles = np.arange(msg.angle_min, msg.angle_max, msg.angle_increment)

        # patch warning: for some reason the simulation and physical units return messages of different sizes
        min_length = min(len(angles), len(range_readings))
        range_readings = range_readings[:min_length]
        angles = angles[:min_length]

        # polar to Cartesian coordinates change. Notice x, z flipping (because angle is measured towards the z axis,
        # and not towards x axis)
        z, x = polar2cart(range_readings, angles)

        # TODO: ugly patch!!
        if self.simulated_radar:
            x = -x

        else:
            z = -z

        # restrict to places where we have finite readings
        finite_indices = np.isfinite(x) & np.isfinite(z)
        x, z = x[finite_indices], z[finite_indices]

        # sort by x
        sorted_indices = np.argsort(x)
        x, z = x[sorted_indices], z[sorted_indices]

        # check that both x and z are not empty
        if len(x) * len(z) == 0:
            rospy.logerr("invalid laser scan readings")
            return

        # locations assumed to be the ground
        ground_indices = np.where(
            (x > LidarProcess.GROUND_PLANE_ESTIMATION_MIN_DISTANCE)
            & (x < LidarProcess.GROUND_PLANE_ESTIMATION_MAX_DISTANCE))
        ground_x, ground_z = x[ground_indices], z[ground_indices]

        if min(len(ground_x), len(ground_z)) == 0:
            rospy.logwarn('unable to detect ground readings')
            return

        # estimate ground line using RANSAC
        try:
            ransac = RANSACRegressor(random_state=0).fit(
                ground_x.reshape(-1, 1), ground_z)

        except ValueError as e:
            rospy.logwarn('RANSAC error {}'.format(e))
            return

        # keep ransac score
        ransac_score = np.abs(ransac.score(ground_x.reshape(-1, 1), ground_z))

        # if readings do not fit a line then report error
        if ransac_score > LidarProcess.GROUND_PLANE_ESTIMATION_THRESHOLD:
            rospy.logwarn(
                'unable to detect ground. score is {}'.format(ransac_score))
            return

        # estimate line at all x's
        l = ransac.predict(x.reshape(-1, 1))

        # record lidar height (minus sign because the lidar is above the floor)
        ld.lidar_height = -ransac.predict([[0]])[0]

        # floor angle
        inclination = np.arctan((l[0] - l[-1]) / (x[-1] - x[0]))
        ld.floor_inclination_degrees = np.rad2deg(inclination)

        # rotation matrix to make ground level
        R = np.array([[np.cos(inclination), -np.sin(inclination)],
                      [np.sin(inclination),
                       np.cos(inclination)]])

        # transform readings
        T = np.matmul(R, np.vstack((x, z)))

        # subtract ground height
        T[1, :] += ld.lidar_height

        # get real heights (after the rotations, etc.)
        heights = T[1, :]

        # disregard the ground indices
        # if something bad happened there then hopefully we'll see it in the ransac score
        heights[ground_indices] = 0

        # difference between successive x measurements. stick a zero at the beginning to preserve the length of the
        # vector
        diff_x = np.append([0], np.abs(np.diff(T[0, :])))

        # where are there either obstacles or gaps in X or large heights
        bad_indices = np.where(
            np.bitwise_or(heights > LidarProcess.MAX_HEIGHT_ABOVE_FLOOR,
                          diff_x > LidarProcess.MAX_FLOOR_X_DIFF))[0]

        # if none found
        if len(bad_indices) == 0:
            ld.visible_floor_distance = 1.0

        else:

            # find first place with obstacles or gaps
            first_idx = max([0, np.min(bad_indices) - 1])

            # floor is visible where there is no large gap in x and no obstacle in z
            ld.visible_floor_distance = T[0][first_idx]

        if self.visualize:
            from matplotlib import pylab as plt
            from drawnow import drawnow, figure

            def visualize():
                #plt.figure(1)
                # plt.subplot(1, 1, 1)
                #plt.scatter(x, z)
                #plt.scatter(ground_x, ground_z, color='blue')
                #plt.plot(x, ransac.predict(x.reshape(-1, 1)), color='red')

                #plt.figure(1)
                # plt.subplot(1, 1, 1)
                #plt.plot(range_readings)

                #plt.figure(2)
                #plt.plot(angles)
                #plt.plot(x, ransac.predict(x.reshape(-1, 1)), color='red')

                plt.figure(1)
                # plt.subplot(1, 1, 1)
                plt.scatter(T[0, :], T[1, :])
                # plt.scatter(x[1:], diff_z, color='green')
                # plt.scatter(T[0], T[1], color='green')
                plt.plot(ld.visible_floor_distance, 0, 'bx')

                plt.xlim([-0.5, 6])
                plt.ylim([-1, 3])

            drawnow(visualize)

        # publish results
        print("FLoor : " + str(ld.visible_floor_distance))
        self.lidar_proc.publish(ld)
Exemplo n.º 14
0
def fit_RANSAC(features_train, labels_train, features_pred):
	model = RANSACRegressor()
	model.fit(features_train, labels_train)
	labels_pred = model.predict(features_pred)
	print "RANSAC - coefficient of determination R^2 of the prediction: ", model.score(features_train, labels_train)
	return labels_pred
# predicted_values = model.predict(housing_data_input[:10])
# actual_values = housing_data_output[:10]
# print(np.sqrt((predicted_values -actual_values) ** 2))

ransac = RANSACRegressor(LinearRegression(),
                         min_samples=50,
                         max_trials=100,
                         residual_threshold=5.0)
ransac.fit(housing_data_input, housing_data_output)

# Check the accuracy score
inliers = housing_data_input[ransac.inlier_mask_]
outliers = housing_data_input[~ransac.inlier_mask_]
print(
    "Inliers RANSAC Model score: ",
    ransac.score(inliers, housing_data_output[ransac.inlier_mask_]),
)
print(
    "Outliers RANSAC Model score: ",
    ransac.score(outliers, housing_data_output[~ransac.inlier_mask_]),
)

for column_label in housing_data_input.columns:
    # print(column_label)
    plt.title(column_label)
    plt.scatter(
        inliers[column_label],
        housing_data_output[ransac.inlier_mask_],
        label="inlier original data",
    )
    # plt.scatter(inliers[column_label], model.predict(inliers), label = "inlier fitted data")
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.metrics import classification_report, confusion_matrix

#loading the dataset
train = pd.read_csv("C:/Users/HP/Desktop/train (1).csv")
test = pd.read_csv("C:/Users/HP/Desktop/test (2).csv")
train = train.dropna()
test = test.dropna()
train.head()

X_train = np.array(train.iloc[:, :-1].values)
y_train = np.array(train.iloc[:, 1].values)
X_test = np.array(test.iloc[:, :-1].values)
y_test = np.array(test.iloc[:, 1].values)

#RANSAC Regressor
from sklearn.linear_model import RANSACRegressor
model = RANSACRegressor()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = model.score(X_test, y_test)
plt.plot(X_train, model.predict(X_train), color='r')
plt.show()
print(accuracy)

print(accuracy)
Exemplo n.º 17
0
                       is_model_valid=None,
                       max_trials=100,
                       stop_n_inliers=inf,
                       stop_score=inf,
                       stop_probability=0.99,
                       residual_metric=None,
                       loss='absolute_loss',
                       random_state=None)
rg_2 = TheilSenRegressor(fit_intercept=True,
                         copy_X=True,
                         max_subpopulation=10000.0,
                         n_subsamples=None,
                         max_iter=300,
                         tol=0.001,
                         random_state=None,
                         n_jobs=1,
                         verbose=False)
rg_3 = HuberRegressor(epsilon=1.35,
                      max_iter=100,
                      alpha=0.0001,
                      warm_start=False,
                      fit_intercept=True,
                      tol=1e-05)

rg_1.fit(X_train, Y_train)
rg_2.fit(X_train, Y_train)
rg_3.fit(X_train, Y_train)

rg_1.score(X_test, Y_test)
rg_2.score(X_test, Y_test)
rg_3.score(X_test, Y_test)
Exemplo n.º 18
0
regressor.fit(X_train, y_train)
lr = LinearRegression(normalize=True)
lr.fit(X_train, y_train)
lr_score = lr.score(X_test, y_test)
print("LR : ", lr_score)

#LR with cross_val_score
lr_scores_cv = cross_val_score(lr, X, Y, cv=10, scoring='r2')
print("LR with cv :", lr_scores_cv.mean())

#RANSAC_Regressor RS
lr = LinearRegression(normalize=True)
lr.fit(X_train, y_train)
rs = RANSACRegressor(lr)
rs.fit(X, Y)
rs.score(X, Y)
rs.estimator_.intercept_
rs.estimator_.coef_

rs_score = rs.score(X, Y)
rs_score = cross_val_score(rs, X, Y, cv=10, scoring='r2')
print("RS: ", rs_score.mean())

#Ridge_and_Lasso -> RG
rg = Ridge(alpha=0.001, normalize=True)
rg = RidgeCV(alphas=(1.0, 0.1, 0.01, 0.005, 0.0025, 0.001, 0.00025),
             normalize=True)
rg.fit(X, Y)
rg_scores = cross_val_score(rg, X, Y, cv=10, scoring='r2')
score = rg_scores.mean()
print("RG : ", score)