def test_ransac_residual_metric(): residual_metric1 = lambda dy: np.sum(np.abs(dy), axis=1) residual_metric2 = lambda dy: np.sum(dy ** 2, axis=1) yyy = np.column_stack([y, y, y]) base_estimator = LinearRegression() ransac_estimator0 = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=5, random_state=0) ransac_estimator1 = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=5, random_state=0, residual_metric=residual_metric1) ransac_estimator2 = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=5, random_state=0, residual_metric=residual_metric2) # multi-dimensional ransac_estimator0.fit(X, yyy) ransac_estimator1.fit(X, yyy) ransac_estimator2.fit(X, yyy) assert_equal(ransac_estimator0.predict(X), ransac_estimator1.predict(X)) assert_equal(ransac_estimator0.predict(X), ransac_estimator2.predict(X)) # one-dimensional ransac_estimator0.fit(X, y) ransac_estimator2.fit(X, y) assert_equal(ransac_estimator0.predict(X), ransac_estimator2.predict(X))
def test_ransac_min_n_samples(): base_estimator = LinearRegression() ransac_estimator1 = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=5, random_state=0) ransac_estimator2 = RANSACRegressor(base_estimator, min_samples=2. / X.shape[0], residual_threshold=5, random_state=0) ransac_estimator3 = RANSACRegressor(base_estimator, min_samples=-1, residual_threshold=5, random_state=0) ransac_estimator4 = RANSACRegressor(base_estimator, min_samples=5.2, residual_threshold=5, random_state=0) ransac_estimator5 = RANSACRegressor(base_estimator, min_samples=2.0, residual_threshold=5, random_state=0) ransac_estimator6 = RANSACRegressor(base_estimator, residual_threshold=5, random_state=0) ransac_estimator7 = RANSACRegressor(base_estimator, min_samples=X.shape[0] + 1, residual_threshold=5, random_state=0) ransac_estimator1.fit(X, y) ransac_estimator2.fit(X, y) ransac_estimator5.fit(X, y) ransac_estimator6.fit(X, y) assert_equal(ransac_estimator1.predict(X), ransac_estimator2.predict(X)) assert_equal(ransac_estimator1.predict(X), ransac_estimator5.predict(X)) assert_equal(ransac_estimator1.predict(X), ransac_estimator6.predict(X)) assert_raises(ValueError, ransac_estimator3.fit, X, y) assert_raises(ValueError, ransac_estimator4.fit, X, y) assert_raises(ValueError, ransac_estimator7.fit, X, y)
def dumpster_ransac_fit(self, laser_points): # return ransac_fited points ransac = RANSACRegressor(min_samples=self._ransac_min_sample) points_X = laser_points[:, 1].reshape(-1, 1) points_y = laser_points[:, 0] #print(points_X, points_y) ransac.fit(points_X, points_y) # normalized errs. fit_err = np.sum( abs(ransac.predict(points_X) - points_y)) / len(points_y) inlier_mask = ransac.inlier_mask_ outlier_mask = np.logical_not(inlier_mask) inlier_points_X = sorted(points_X[inlier_mask], reverse=True) points_x_start = inlier_points_X[0] points_y_start = ransac.predict([points_x_start]) points_x_end = inlier_points_X[-1] points_y_end = ransac.predict([points_x_end]) # This return results are the fitting two end points # return fit_err, np.array([[points_y_start, points_x_start], [points_y_end, points_x_end]]).squeeze() # This return results are the dumpster location info: lateral_dis, side_offset, object_width lateral_dis = np.min([points_y_start, points_y_end]) side_offset = np.mean([points_x_end, points_x_start]) object_width = abs(points_x_start[0] - points_x_end[0]) return fit_err, np.array([[points_y_start, points_x_start], [points_y_end, points_x_end]]).squeeze(),\ lateral_dis, side_offset, object_width
def ransac(points, threshold): # 进行ransac,用于保障定位的鲁棒性 ransac_model = RANSACRegressor(LinearRegression(), max_trials=20, min_samples=3, loss='squared_loss', stop_n_inliers=8, residual_threshold=threshold, random_state=None) line_model = LinearRegression() x = points[0:1, :].T y = points[1:, :].T ransac_model.fit(x, y) inlier_mask = ransac_model.inlier_mask_ outlier_mask = np.logical_not(inlier_mask) if inlier_mask.tolist().count(True) < 3: d = 0 k = 0 else: line_model.fit(x[inlier_mask, :], y[inlier_mask, :]) k_temp = line_model.coef_ d_temp = line_model.intercept_ d = ransac_model.predict([[0]])[0, 0] k = ransac_model.predict([[1]])[0, 0] - ransac_model.predict([[0]])[0, 0] return k, d, inlier_mask
def test_loss_deprecated(old_loss, new_loss): est1 = RANSACRegressor(loss=old_loss, random_state=0) with pytest.warns(FutureWarning, match=f"The loss '{old_loss}' was deprecated"): est1.fit(X, y) est2 = RANSACRegressor(loss=new_loss, random_state=0) est2.fit(X, y) assert_allclose(est1.predict(X), est2.predict(X))
def test_loss_squared_loss_deprecated(): est1 = RANSACRegressor(loss="squared_loss", random_state=0) with pytest.warns(FutureWarning, match="The loss 'squared_loss' was deprecated"): est1.fit(X, y) est2 = RANSACRegressor(loss="squared_error", random_state=0) est2.fit(X, y) assert_allclose(est1.predict(X), est2.predict(X))
def test_ransac_none_estimator(): base_estimator = LinearRegression() ransac_estimator = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=5, random_state=0) ransac_none_estimator = RANSACRegressor(None, 2, 5, random_state=0) ransac_estimator.fit(X, y) ransac_none_estimator.fit(X, y) assert_array_almost_equal(ransac_estimator.predict(X), ransac_none_estimator.predict(X))
def test_ransac_none_estimator(): base_estimator = LinearRegression() ransac_estimator = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=5, random_state=0) ransac_none_estimator = RANSACRegressor(None, 2, 5, random_state=0) ransac_estimator.fit(X, y) ransac_none_estimator.fit(X, y) assert_equal(ransac_estimator.predict(X), ransac_none_estimator.predict(X))
def test_ransac_residual_loss(): def loss_multi1(y_true, y_pred): return np.sum(np.abs(y_true - y_pred), axis=1) def loss_multi2(y_true, y_pred): return np.sum((y_true - y_pred) ** 2, axis=1) def loss_mono(y_true, y_pred): return np.abs(y_true - y_pred) yyy = np.column_stack([y, y, y]) base_estimator = LinearRegression() ransac_estimator0 = RANSACRegressor( base_estimator, min_samples=2, residual_threshold=5, random_state=0 ) ransac_estimator1 = RANSACRegressor( base_estimator, min_samples=2, residual_threshold=5, random_state=0, loss=loss_multi1, ) ransac_estimator2 = RANSACRegressor( base_estimator, min_samples=2, residual_threshold=5, random_state=0, loss=loss_multi2, ) # multi-dimensional ransac_estimator0.fit(X, yyy) ransac_estimator1.fit(X, yyy) ransac_estimator2.fit(X, yyy) assert_array_almost_equal( ransac_estimator0.predict(X), ransac_estimator1.predict(X) ) assert_array_almost_equal( ransac_estimator0.predict(X), ransac_estimator2.predict(X) ) # one-dimensional ransac_estimator0.fit(X, y) ransac_estimator2.loss = loss_mono ransac_estimator2.fit(X, y) assert_array_almost_equal( ransac_estimator0.predict(X), ransac_estimator2.predict(X) ) ransac_estimator3 = RANSACRegressor( base_estimator, min_samples=2, residual_threshold=5, random_state=0, loss="squared_error", ) ransac_estimator3.fit(X, y) assert_array_almost_equal( ransac_estimator0.predict(X), ransac_estimator2.predict(X) )
def main(): # prepare training data and target variable features = ['RM'] D = HousingData(features) X, y = D.X, D.y # prepare and fit RANSAC regressor ransac = RANSACRegressor(LinearRegression(), max_trials=100, min_samples=50, loss='absolute_loss', residual_threshold=5.0, random_state=0) ransac.fit(X, y) # show prediction plot_predictions(X.flatten(), y, ransac, xlabel='RM', ylabel='MEDV') # plot inliers and outliers inlier_mask = ransac.inlier_mask_ outlier_mask = np.logical_not(inlier_mask) line_X = np.arange(3, 10, 1) line_y = ransac.predict(line_X[:, np.newaxis]) plt.scatter(X[inlier_mask], y[inlier_mask], c='steelblue', edgecolor='white', marker='o', label='inliers') plt.scatter(X[outlier_mask], y[outlier_mask], c='limegreen', edgecolor='white', marker='s', label='outliers') plt.plot(line_X, line_y, color='black') plt.xlabel('RM') plt.ylabel('MEDV') plt.legend() plt.show() # show a sample of non-standardized prediction and weights rm = [[5.0]] medv = ransac.predict(rm)[0] print('RM = 5.0 -> MEDV = {:.3e}'.format(medv)) # show weights print('intercept = {i:.3e}, slope = {s:.3e}'.format( i=ransac.estimator_.intercept_, s=ransac.estimator_.coef_[0]))
def dumpster_ransac_fit(self, laser_points): # return ransac_fited points ransac = RANSACRegressor(min_samples=self._ransac_min_sample) points_X = laser_points[:, 1].reshape(-1, 1) points_y = laser_points[:, 0] ransac.fit(points_X, points_y) inlier_mask = ransac.inlier_mask_ outlier_mask = np.logical_not(inlier_mask) inlier_points_X = sorted(points_X[inlier_mask]) inlier_pred = ransac.predict(inlier_points_X) inlier_start = ransac.predict([inlier_points_X[0]]) inlier_end = ransac.predict([inlier_points_X[-1]]) return np.array([[inlier_start, inlier_points_X[0]], [inlier_end, inlier_points_X[-1]]]).squeeze()
def runRANSAC(ds): print("\n\nrunning RANSAC") #split the data (trainingX,testingX,trainingY,testingY) = dataTransform(ds) #begin timing startTime = time.time() #run regressor ransac = RANSACRegressor(random_state=0) #fit the data ransac.fit(trainingX,trainingY) #make predictions prediction = ransac.predict(testingX) #show error print("Error score is: ",mean_squared_error(prediction,testingY)) endTime = time.time() print("Runtime in seconds: ", endTime - startTime)
def make_forecast(local_array, local_mf_forecast_horizon_days, local_days_in_focus_frame): local_forecast = [] # simple normalization days = np.array([day for day in range(local_days_in_focus_frame)]) days = np.divide(days, np.amax(days)) x_y_data = np.zeros(shape=(days.shape[0], 2), dtype=np.dtype('float32')) x_y_data[:, 0] = days for local_time_serie in range(local_array.shape[0]): x_y_data[:, 1] = local_array[local_time_serie, :] x = x_y_data[:, 0].reshape(-1, 1) y = x_y_data[:, 1].reshape(-1, ) y_max = np.amax(y) y = np.divide(y, y_max * (y_max != 0) + 1 * (y_max == 0)) regression = RANSACRegressor(base_estimator=ARDRegression(), min_samples=29, max_trials=2000, random_state=0, loss='squared_loss', residual_threshold=2.0).fit(x, y) score = regression.score(x, y) print('time_serie, score of RANdom SAmple Consensus algorithm', local_time_serie, score) forecast_days = np.add(days, local_mf_forecast_horizon_days )[-local_mf_forecast_horizon_days:].reshape( -1, 1) local_forecast_ts = regression.predict(forecast_days) local_forecast.append(local_forecast_ts) local_forecast = np.array(local_forecast) # simple denormalization local_array_max = np.amax(local_array, axis=1) local_forecast = np.multiply( local_forecast, local_array_max.reshape(local_array_max.shape[0], 1)) print('local_forecast shape:', local_forecast.shape) return local_forecast
def fnRANSACRegressor(self, year, avgTemp, predictYear): feature_train, feature_test, target_train, target_test = train_test_split( year, avgTemp, test_size=0.1, random_state=42) rr = RANSACRegressor() rr.fit(feature_train[:, np.newaxis], target_train) return (rr.score(feature_test[:, np.newaxis], target_test), rr.predict(predictYear))
def ransacCurveFit(seg_img, degree=7, trials=100): data = np.where(seg_img == 255) X, y = data[0], data[1] X = X.reshape(-1, 1) # Create poly feature to fit poly = PolynomialFeatures(degree=degree, include_bias=True) X = poly.fit_transform(X) # Create RANSAC model ransac = RANSACRegressor(min_samples=0.3, max_trials=trials) ransac.fit(X, y) # Prepare to plot the curve #low = min(X[:,1]) low = 55 upper = max(X[:, 1]) point_num = upper - low + 1 x_plot = np.linspace(low, upper, point_num).astype('int') x_plot_t = poly.fit_transform(x_plot.reshape(-1, 1)) y_plot = ransac.predict(x_plot_t).astype('int') curve_fit = np.zeros(seg_img.shape) for x, y in zip(x_plot, y_plot): if x < upper: curve_fit[x, y] = 255 return curve_fit
def evaluate_match(df_0, df_1, threshold_triangle=0.3, threshold_point=2): V_0, c_0 = get_vc(df_0) V_1, c_1 = get_vc(df_1) i0, i1, distances = nearest_neighbors(V_0, V_1) # matching triangles filt = distances < threshold_triangle X, Y = c_0[i0[filt]], c_1[i1[filt]] # minimum to proceed if sum(filt) < 5: return None, None, -1 # use matching triangles to define transformation model = RANSACRegressor() model.fit(X, Y) rotation = model.estimator_.coef_ translation = model.estimator_.intercept_ # score transformation based on triangle i,j centers distances = cdist(model.predict(c_0), c_1, metric='sqeuclidean') # could use a fraction of the data range or nearest neighbor # distances within one point set threshold_region = 50 filt = np.sqrt(distances.min(axis=0)) < threshold_region score = (np.sqrt(distances.min(axis=0))[filt] < threshold_point).mean() return rotation, translation, score
def get_outliers_by_ransac(self, table, column_indexes): ''' Get outliers using RANSAC regression, which deals better with large outliers in the y direction, and faster than Huber when the number of samples is very large. RANSAC outpus perfect precision (100%) but far from perfect recall (could be 50% - 60%) in our experiments. ''' X = table[ :, column_indexes[ :-1]].astype(float) X = utils.enforce_columns(X) y = table[ :, column_indexes[-1]].astype(float) # preprocessing doesn't make any difference for RANSAC in our experiments #x = preprocessing.minmax_scale(x) #y = preprocessing.minmax_scale(y) model_ransac = RANSACRegressor(LinearRegression()) model_ransac.fit(X, y) inlier_mask = model_ransac.inlier_mask_ outlier_mask = np.logical_not(inlier_mask) outliers = [idx for idx, val in enumerate(outlier_mask) if val] residuals = abs(model_ransac.predict(X) - y) confidences = preprocessing.minmax_scale(residuals[outliers])*0.09+0.9 return (outliers, confidences)
def ransac_fit(X, y): ''' 一个强健的fit :return: ''' from sklearn.linear_model import RANSACRegressor ransac = RANSACRegressor(LinearRegression(), max_trials=100, min_samples=50, residual_metric=lambda x: np.sum(np.abs(x), axis=1), residual_threshold=5.0, random_state=0) ransac.fit(X, y) # 输出斜率|截距等数据 print('Slope: %.3f' % ransac.estimator_.coef_[0]) print('Intercept: %.3f' % ransac.estimator_.intercept_) # plot inlier_mask = ransac.inlier_mask_ outlier_mask = np.logical_not(inlier_mask) line_X = np.arange(3, 10, 1) line_y_ransac = ransac.predict(line_X[:, np.newaxis]) plt.scatter(X[inlier_mask], y[inlier_mask], c='blue', marker='o', label='Inliers') plt.scatter(X[outlier_mask], y[outlier_mask], c='lightgreen', marker='s', label='Outliers') plt.plot(line_X, line_y_ransac, color='red') plt.xlabel('Average number of rooms [RM]') plt.ylabel('Price in $1000\'s [MEDV]') plt.legend(loc='upper left') plt.show()
def compute_heading(v, skip=10): dy = -100.0 dx = 100.0 ny, nx = v.shape # Get left edge ycoords = dy * np.arange(0, ny, skip) xcoords = np.full(ycoords.shape, np.nan) for cnt, i in enumerate(range(0, ny, skip)): good_ind = (v[i, :] > -20000).nonzero()[0] if len(good_ind) < 10: continue xcoords[cnt] = dx * good_ind[-1] # Solve linear mask = np.isfinite(xcoords) ycoords, xcoords = ycoords[mask], xcoords[mask] X = np.column_stack((ycoords, np.ones_like(ycoords))) solver = RANSACRegressor().fit(X, xcoords) fit = solver.predict(X) # Compute heading slope = solver.estimator_.coef_[0] heading = np.degrees(np.arctan(slope)) return heading
def display_results(dir_name, title): names = glob.glob('out/%s/*.txt' % dir_name) for n in names: clean_name = n.split('/')[-1].split('.')[0] x = np.arange(0, 256) plt.plot(x, (0.3 * x + 5), label='baseline', lw=7) x, y = np.loadtxt(n) y = np.square(y) plt.plot(x, y, ':', label=clean_name) ransac = RANSACRegressor(LinearRegression()) x = x.reshape(-1, 1) print(x.shape) # y.reshape(-1, 1) print(x, y) ransac.fit(x, y) y_ransac = ransac.predict(x) x = x.reshape(-1) plt.plot(x, y_ransac) # plt.legend() plt.show()
def __calculate_new_points(grid_points_inside, partition_point_cloud): # Calculate RANSCAC model model = RANSACRegressor().fit(partition_point_cloud.get_xy(), partition_point_cloud.get_z()) # With the ransac model, calculate the altitude for each grid point grid_points_altitude = model.predict(grid_points_inside.get_xy()) # Calculate color for new points [avg_red, avg_green, avg_blue] = np.mean(partition_point_cloud.rgb, axis=0) red = np.full(grid_points_inside.len(), avg_red) green = np.full(grid_points_inside.len(), avg_green) blue = np.full(grid_points_inside.len(), avg_blue) # Classify all new points as ground classification = np.full(grid_points_inside.len(), 2, dtype=np.uint8) # Split xy into columns [x, y] = np.hsplit(grid_points_inside.get_xy(), 2) # Return point cloud return PointCloud.with_dimensions(x.ravel(), y.ravel(), grid_points_altitude, classification, red, green, blue, grid_points_inside.indices)
def prioritize(df_info_0, df_info_1, matches): """Produces an Nx2 array of tile (site) identifiers that are predicted to match within a search radius, based on existing matches. Expects info tables to contain tile (site) identifier as index and two columns of coordinates. Matches should be supplied as an Nx2 array of tile (site) identifiers. """ a = df_info_0.loc[matches[:, 0]].values b = df_info_1.loc[matches[:, 1]].values with warnings.catch_warnings(): # ignore all caught warnings warnings.filterwarnings("ignore") model = RANSACRegressor(min_samples=2) model.fit(a, b) # rank all pairs by distance predicted = model.predict(df_info_0.values) distances = cdist(predicted, df_info_1, metric='sqeuclidean') ix = np.argsort(distances.flatten()) ix_0, ix_1 = np.unravel_index(ix, distances.shape) candidates = list(zip(df_info_0.index[ix_0], df_info_1.index[ix_1])) return remove_overlap(candidates, matches)
def log_log_robust_regression(cfs, y, kind=0): assert y.shape[0] == 40 y = y.reshape(40, -1) x = np.tile(cfs[:, np.newaxis], (1, y.shape[1])) y = np.log(y).ravel() x = np.log(x).ravel()[:, np.newaxis] if kind == 0: model = RANSACRegressor() elif kind == 1: model = TheilSenRegressor(n_jobs=-1) elif kind == 2: model = HuberRegressor() else: raise ValueError model.fit(x, y) yp = model.predict(x) u = np.square(y - yp) v = np.square(y - y.mean()) R2 = 1. - u / v if kind == 0: return model.estimator_.coef_, model.estimator_.intercept_, np.median( R2) elif kind in [1, 2]: return model.coef_, model.intercept_, np.median(R2) else: raise ValueError
def ransacregressor(X_train, X_test, y_train, y_test): from sklearn.linear_model import LinearRegression from sklearn.linear_model import RANSACRegressor ransac = RANSACRegressor(LinearRegression(), max_trials=100, min_samples=50, residual_threshold=5.0, random_state=1) ransac.fit(X_train, y_train) print('RANSAC Regressor') y_train_pred = ransac.predict(X_train) y_test_pred = ransac.predict(X_test) print('MSE train: %.3f, test: %.3f' % (mean_squared_error( y_train, y_train_pred), mean_squared_error(y_test, y_test_pred))) print('R^2 train: %.3f, test: %.3f' % (r2_score(y_train, y_train_pred), r2_score(y_test, y_test_pred))) return ransac
def run(self, trainingDasaset, plotting): dataset = trainingDasaset accuracy = 0 y = dataset['int_rate'] X = dataset.drop(columns=[ 'int_rate', ]) if plotting == True: X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1) clf = RANSACRegressor(random_state=42) #clf=self.gridSearch(clf,X_train, y_train) clf.fit(X_train, y_train) print( "###################################RANSACRegressor#############################" ) accuracy = clf.score(X_test, y_test) #pred = clf.predict(X_test) #accuracy = np.sqrt(metrics.mean_squared_error(y_test,pred)) print("score:" + str(accuracy)) else: clf = RANSACRegressor(random_state=42) #clf=self.gridSearch(clf,X_train, y_train) clf.fit(X, y) testData = pd.read_csv( "./SiameseNeuralNetworkProject/MachineLearningAlgorithmSuite/CleanedData/SiameseTrainingData.csv" ) predictions = clf.predict(testData) np.savetxt( "./SiameseNeuralNetworkProject/MachineLearningAlgorithmSuite/OutputFiles/RANSACRegressorPredictions.csv", predictions, delimiter=",") testData = pd.read_csv( "./SiameseNeuralNetworkProject/MachineLearningAlgorithmSuite/CleanedData/OverallTestingData.csv" ) predictions = clf.predict(testData) np.savetxt( "./SiameseNeuralNetworkProject/MachineLearningAlgorithmSuite/OutputFiles/RANSACRegressorPredictionsTestData.csv", predictions, delimiter=",") return accuracy
def _ransac_regression(pts, regressor): ransac = RANSACRegressor(regressor) x = np.array([a['peak_size'] for a in pts]) y = np.array([b['relative_peak_height'] for b in pts]) X = x[:, np.newaxis] ransac.fit(X, y) inlier_mask = ransac.inlier_mask_ ransac_mse = mean_squared_error(y[inlier_mask], ransac.predict(X[inlier_mask])) ** .5 ransac_r2 = r2_score(y[inlier_mask], ransac.predict(X[inlier_mask])) return { 'intercept': ransac.estimator_.intercept_, 'r_squared': ransac_r2, 'slope': ransac.estimator_.coef_[0], 'sd': ransac_mse }
def detect_hotspots(self, imrgb, imtiff, min_height=0): print("Detectando puntos calientes en imagen " + imrgb) ITERM_reg, IRGB = self.register_thermal_RGB(imrgb, imtiff, min_height) idx = np.where(ITERM_reg > 0) if len(idx[0]) == 0: return None, None vmean = ITERM_reg[np.where(ITERM_reg > 0)].mean() ITERM = tiff.imread(imtiff).astype(float) ITERM = cv2.cvtColor( np.maximum(np.minimum((ITERM - vmean + 100) / (1.2), 255), 0).astype(np.uint8), cv2.COLOR_GRAY2RGB) im2, ctrs, hier = cv2.findContours((ITERM_reg > 0).astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) IPRED = np.zeros(ITERM_reg.shape) for ctr in ctrs: I2 = (0 * ITERM_reg).astype(np.uint8) cv2.drawContours(I2, [ctr], -1, 255, -1) I3 = cv2.erode(I2, np.ones((10, 10))) idx = np.where(I3 > 0) idx_orig = np.where(I2 > 0) idx_contour = np.where((I3 == 0) & (I2 > 0)) ITERM_reg[idx_contour] = 0 if len(idx) and len(idx[0]) > 500: X = np.concatenate( (np.array(idx).T[:, ::-1], np.ones(len(idx[0])).reshape( -1, 1)), axis=1) Y = ITERM_reg[idx] model = RANSACRegressor(residual_threshold=20) model.fit(X, Y) pred = model.predict(X) IPRED[idx] = pred elif len(idx): ITERM_reg[idx] = 0 IFAIL = ((ITERM_reg - IPRED) > 75).astype(np.uint8) IFAIL = cv2.dilate(IFAIL, np.ones((10, 10))) im2, ctrs, hier = cv2.findContours(IFAIL, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) final_ctrs = [] for ctr in ctrs: if cv2.contourArea(ctr) < 700: cv2.drawContours(ITERM, [ctr], 0, (255, 0, 0), 2) final_ctrs.append(ctr) return ITERM, final_ctrs
def ransac_fit_predict(x, y, tx, iters=100): """ :param iters: total number of individual `RANSACRegressor` model. Return meaning of the prediction of all models. """ pred_res = [] for _ in range(iters): model_ransac = RANSACRegressor(LinearRegression(), max_trials=10000, random_state=SEED) model_ransac.fit(x[:, None], y) pred_res.append(model_ransac.predict(tx[:, None])) return np.mean(pred_res, axis=0)
def test_ransac_predict(): X = np.arange(100)[:, None] y = np.zeros((100,)) y[0] = 1 y[1] = 100 base_estimator = LinearRegression() ransac_estimator = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=0.5, random_state=0) ransac_estimator.fit(X, y) assert_equal(ransac_estimator.predict(X), np.zeros(100))
def ransac(v, d): ransac_v = RR() ransac_v.fit(v, d) inlier_mask = ransac_v.inlier_mask_ outlier_mask = np.logical_not(inlier_mask) line_v = np.arange(v.min(), v.max())[:, np.newaxis] line_d = ransac_v.predict(line_v) return line_v, line_d
def test_ransac_predict(): X = np.arange(100)[:, None] y = np.zeros((100, )) y[0] = 1 y[1] = 100 base_estimator = LinearRegression() ransac_estimator = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=0.5, random_state=0) ransac_estimator.fit(X, y) assert_equal(ransac_estimator.predict(X), np.zeros((100, 1)))
def RANSAC_m(X_ransac,y_ransac,predFeat=False): ransac=RANSACRegressor(LinearRegression(),max_trials=100,min_samples=10,residual_metric=lambda x:np.sum(np.abs(x),axis=1),residual_threshold=1.0,random_state=0) #max_trials为最大迭代次数,min_samples随机抽取作为内点的最小样本数量,residual_metric传递了一个lambda函数,拟合曲线与样本点间垂直距离的绝对值,residual_threshold残差阈值,只有小于该值的样本点从加入内点inliers中,否则为外电outliers中,默认使用MAD(Median Absolute Deviation中位数决定偏差)估计内点阈值 ransac.fit(X_ransac,y_ransac) print('Slope:%.3f;Intercept:%.3f'%(ransac.estimator_.coef_[0],ransac.estimator_.intercept_)) X=X_ransac y=y_ransac inlier_mask=ransac.inlier_mask_ #内点掩码 # print(inlier_mask) outlier_mask=np.logical_not(inlier_mask) #外点掩码 line_X=np.arange(0,5,0.5) line_y_ransac=ransac.predict(line_X[:,np.newaxis]) plt.scatter(X[inlier_mask],y[inlier_mask],c='blue',marker='o',label='Inliers') plt.scatter(X[outlier_mask],y[outlier_mask],c='lightgreen',marker='s',label='OutLiers') plt.plot(line_X,line_y_ransac,color='red') plt.xlabel('hygiene_num') plt.ylabel('Price in $1000') plt.legend(loc='upper left') plt.show() if type(predFeat).__module__=='numpy': #判断是否有空间几何数据输入 return ransac.predict(predFeat)
def regression(read_grad_value, read_concentration_value, gradient_file, number_of_outliers): """Find the robust regression of the concentration and reads values""" table_with_outliers = {} key = gradient_file.iloc[:, 0].ravel() concentration_log10 = np.log10(read_concentration_value.values) for gradient in read_grad_value: read_grad_value_log10 = np.log10(read_grad_value[gradient]) read_grad_value_log10[read_grad_value_log10 == -inf] = 0 concentration_log10[concentration_log10 == -inf] = 0 x_values = read_grad_value_log10.values.reshape(len(key), 1) y_values = concentration_log10.reshape(len(key), 1) clt_ransac = linear_model.RANSACRegressor( linear_model.LinearRegression()) clt_ransac = clt_ransac.fit(x_values, y_values) fig, axes = plt.subplots(ncols=2, figsize=(15, 10)) axes[0].scatter(x_values, y_values, c='g') axes[0].set_xlabel('Reads', fontsize=18) axes[0].set_ylabel('Concentration', fontsize=18) """Plot the robust regression, including inliers and outliers""" plt.plot(x_values, clt_ransac.predict(x_values), color='blue') robust_estimator = RANSACRegressor(random_state=0) robust_estimator.fit(x_values, y_values) distance_pred = robust_estimator.predict(x_values) mean_squared_error = (y_values - distance_pred)**2 index = np.argsort(mean_squared_error.ravel()) axes[1].scatter(x_values[index[:-number_of_outliers]], y_values[index[:-number_of_outliers]], c='b', label='inliers', alpha=0.5) axes[1].scatter(x_values[index[-number_of_outliers:]], y_values[index[-number_of_outliers:]], c='r', label='outliers', alpha=0.5) axes[1].set_xlabel('Reads', fontsize=18) axes[1].legend(loc=2) plt.title(gradient + '\n' "The intercept values is " + str(clt_ransac.estimator_.intercept_) + '\n' + "The slope values is " + str(clt_ransac.estimator_.coef_), fontsize=15) plt.savefig(gradient) plt.close() """Find the outliers using the number that you defined as parameter""" """Selecting first the number of outliers that you want to find in the dataset""" outliers_keys = key[index[-number_of_outliers:]] table_with_outliers[gradient] = outliers_keys outliers_df = pd.DataFrame(table_with_outliers) return outliers_df
def train_robust(self): # データ取得は共通か #print(X) #print(y) data_processor = data_processor_housing.Housing() result = data_processor.get_normal_data() # residual_metric :呼び出し可能、オプション # メトリックを使用して、多次元目標値y.shape[1] > 1に対して残差の次元数を1に減らします。 デフォルトでは絶対差の合計が使用されます: # 絶対値損失 # https://code-examples.net/ja/docs/scikit_learn/modules/generated/sklearn.linear_model.ransacregressor slr = RANSACRegressor(LinearRegression(), max_trials=100, min_samples=50, loss='absolute_loss', residual_threshold=5.0, random_state=0) slr.fit(result[0], result[1]) # 正常値 inlier_mask = slr.inlier_mask_ # 外れ値 outlier_mask = np.logical_not(inlier_mask) #print(slr.inlier_mask_) #print(np.logical_not(slr.inlier_mask_)) line_X = np.arange(3, 15, 1) line_y = slr.predict(line_X[:, np.newaxis]) # 縦にする #print(line_X[:, np.newaxis]) print(slr.predict(line_X[:, np.newaxis])) print(result[0].shape) print(result[0][inlier_mask].shape) print(result[0][outlier_mask].shape) # 予測値(最小にじょう) plt.plot(line_X, line_y, c='red') plt.ylabel('Average number of rooms [RM]') plt.xlabel('price MEDV') plt.show() plt.savefig(data_processor.output_training_report()) plt.close('all')
class RANSACRegressorImpl: def __init__(self, **hyperparams): self._hyperparams = hyperparams self._wrapped_model = Op(**self._hyperparams) def fit(self, X, y=None): if y is not None: self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def predict(self, X): return self._wrapped_model.predict(X)
def regression_information(dem, bilinear_interpolation_results): dem_shape = dem.shape # print dem_shape dem = dem.flatten() bilinear_interpolation_results = bilinear_interpolation_results.flatten() alt_data = np.column_stack((dem, bilinear_interpolation_results)) alt_data = alt_data[np.where(alt_data[:, 0] > 0)] RANSAC_lr = RANSACRegressor(LinearRegression()) RANSAC_lr.fit(alt_data[:, 0:1], alt_data[:, 1]) predict_result = RANSAC_lr.predict(alt_data[:, 0:1]).transpose()[0] # print predict_result # print predict_result.shape residual = bilinear_interpolation_results - predict_result residual = np.reshape(residual, dem_shape) return RANSAC_lr, residual
def test_ransac_residual_loss(): loss_multi1 = lambda y_true, y_pred: np.sum(np.abs(y_true - y_pred), axis=1) loss_multi2 = lambda y_true, y_pred: np.sum((y_true - y_pred) ** 2, axis=1) loss_mono = lambda y_true, y_pred : np.abs(y_true - y_pred) yyy = np.column_stack([y, y, y]) base_estimator = LinearRegression() ransac_estimator0 = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=5, random_state=0) ransac_estimator1 = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=5, random_state=0, loss=loss_multi1) ransac_estimator2 = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=5, random_state=0, loss=loss_multi2) # multi-dimensional ransac_estimator0.fit(X, yyy) ransac_estimator1.fit(X, yyy) ransac_estimator2.fit(X, yyy) assert_array_almost_equal(ransac_estimator0.predict(X), ransac_estimator1.predict(X)) assert_array_almost_equal(ransac_estimator0.predict(X), ransac_estimator2.predict(X)) # one-dimensional ransac_estimator0.fit(X, y) ransac_estimator2.loss = loss_mono ransac_estimator2.fit(X, y) assert_array_almost_equal(ransac_estimator0.predict(X), ransac_estimator2.predict(X)) ransac_estimator3 = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=5, random_state=0, loss="squared_loss") ransac_estimator3.fit(X, y) assert_array_almost_equal(ransac_estimator0.predict(X), ransac_estimator2.predict(X))
def fit_RANSAC(features_train, labels_train, features_pred): model = RANSACRegressor() model.fit(features_train, labels_train) labels_pred = model.predict(features_pred) print "RANSAC - coefficient of determination R^2 of the prediction: ", model.score(features_train, labels_train) return labels_pred
ransac = RANSACRegressor(LinearRegression(), max_trials=100, min_samples=50, loss='absolute_loss', residual_threshold=5.0, random_state=0) ransac.fit(X, y) inlier_mask = ransac.inlier_mask_ outlier_mask = np.logical_not(inlier_mask) line_X = np.arange(3, 10, 1) line_y_ransac = ransac.predict(line_X[:, np.newaxis]) plt.scatter(X[inlier_mask], y[inlier_mask], c='steelblue', edgecolor='white', marker='o', label='Inliers') plt.scatter(X[outlier_mask], y[outlier_mask], c='limegreen', edgecolor='white', marker='s', label='Outliers') plt.plot(line_X, line_y_ransac, color='black', lw=2) plt.xlabel('Average number of rooms [RM]') plt.ylabel('Price in $1000s [MEDV]') plt.legend(loc='upper left') #plt.savefig('images/10_08.png', dpi=300) plt.show()
# Passive Aggressive Regression print 'passive aggressive' par = PassiveAggressiveRegressor() par.fit(x, y) par_sts_scores = par.predict(xt) #par.fit(x[:, np.newaxis], y) #par_sts_scores = par.predict(xt[:, np.newaxis]) # RANSAC Regression print 'ransac' ransac = RANSACRegressor() #ransac.fit(x[:, np.newaxis], y) #ransac_sts_scores = ransac.predict(xt[:, np.newaxis]) ransac.fit(x, y) ransac_sts_scores = ransac.predict(xt) # Logistic Regression print 'logistic' lgr = LogisticRegression() #lgr.fit(x[:, np.newaxis], y) #lgr_sts_scores = lgr.predict(xt[:, np.newaxis]) lgr.fit(x, y) lgr_sts_scores = lgr.predict(xt) ''' # SLOW Regressors !!!! # Randomized Log Regression rlgr = RandomizedLogisticRegression()