def resolution_estimate(raw_data, n_spectra=25): slopes = [] intercepts = [] for i in range(n_spectra): mzs, intensities = read_random_spectrum(raw_data) peak_positions = np.array(gradient(mzs, intensities)[-1]) intensities_at_peaks = intensities[peak_positions] high_intensity_threshold = np.percentile(intensities_at_peaks, 40) peak_positions = peak_positions[intensities[peak_positions] > high_intensity_threshold] resolutions = [] for i, peak_pos in enumerate(peak_positions): resolutions.append(resolution_at_peak(peak_pos, mzs, intensities)) resolutions = np.array(resolutions) mzs = mzs[peak_positions] mzs = mzs[resolutions > 0] resolutions = resolutions[resolutions > 0] ransac = RANSACRegressor() ransac.fit(np.log(mzs).reshape((-1,1)), np.log(resolutions).reshape((-1,1))) slope = ransac.estimator_.coef_[0][0] intercept = ransac.estimator_.intercept_[0] slopes.append(slope) intercepts.append(intercept) slope = np.median(slopes) intercept = np.median(intercepts) return lambda mz: np.exp(intercept + slope * np.log(mz))
def get_outliers_by_ransac(self, table, column_indexes): ''' Get outliers using RANSAC regression, which deals better with large outliers in the y direction, and faster than Huber when the number of samples is very large. RANSAC outpus perfect precision (100%) but far from perfect recall (could be 50% - 60%) in our experiments. ''' X = table[ :, column_indexes[ :-1]].astype(float) X = utils.enforce_columns(X) y = table[ :, column_indexes[-1]].astype(float) # preprocessing doesn't make any difference for RANSAC in our experiments #x = preprocessing.minmax_scale(x) #y = preprocessing.minmax_scale(y) model_ransac = RANSACRegressor(LinearRegression()) model_ransac.fit(X, y) inlier_mask = model_ransac.inlier_mask_ outlier_mask = np.logical_not(inlier_mask) outliers = [idx for idx, val in enumerate(outlier_mask) if val] residuals = abs(model_ransac.predict(X) - y) confidences = preprocessing.minmax_scale(residuals[outliers])*0.09+0.9 return (outliers, confidences)
def test_ransac_residual_metric(): residual_metric1 = lambda dy: np.sum(np.abs(dy), axis=1) residual_metric2 = lambda dy: np.sum(dy ** 2, axis=1) yyy = np.column_stack([y, y, y]) base_estimator = LinearRegression() ransac_estimator0 = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=5, random_state=0) ransac_estimator1 = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=5, random_state=0, residual_metric=residual_metric1) ransac_estimator2 = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=5, random_state=0, residual_metric=residual_metric2) # multi-dimensional ransac_estimator0.fit(X, yyy) ransac_estimator1.fit(X, yyy) ransac_estimator2.fit(X, yyy) assert_equal(ransac_estimator0.predict(X), ransac_estimator1.predict(X)) assert_equal(ransac_estimator0.predict(X), ransac_estimator2.predict(X)) # one-dimensional ransac_estimator0.fit(X, y) ransac_estimator2.fit(X, y) assert_equal(ransac_estimator0.predict(X), ransac_estimator2.predict(X))
def identify_linear_outliers(pts, win_size=7): # this runs a sliding window across the trace, performing a RANSAC regression # for each window. A point is considered an outlier if the moving-RANSAC # never considers it an inlier. regressor = RANSACRegressor() x = np.arange(win_size, dtype=np.float64) x = np.expand_dims(x, axis=1) inlier_count = np.zeros_like(pts) npts = len(pts) for i in range(npts-win_size+1): y = pts[i:i+win_size] # RANSAC of this section of the trace try: regressor.fit(x, y) inlier_inds = regressor.inlier_mask_ except ValueError: # no consensus -- (almost) all the points were bad inlier_inds = [] # accumulate the number of times each point was an inlier for j, inlier in enumerate(inlier_inds): if inlier: inlier_count[i+j] += 1 # Note: the following line will always consider the first and last points outliers! # However, I don't think this will matter for downstream analysis. -BK outlier_mask = np.logical_or(inlier_count < 2, pts == 0) # outlier_inds = np.where(outlier_mask)[0] # # # points that are exactly zero are always considered outliers # outlier_inds = np.append(outlier_inds, np.where(pts==0)[0]) return outlier_mask
def ransac_fit(X, y): ''' 一个强健的fit :return: ''' from sklearn.linear_model import RANSACRegressor ransac = RANSACRegressor(LinearRegression(), max_trials=100, min_samples=50, residual_metric=lambda x: np.sum(np.abs(x), axis=1), residual_threshold=5.0, random_state=0) ransac.fit(X, y) # 输出斜率|截距等数据 print('Slope: %.3f' % ransac.estimator_.coef_[0]) print('Intercept: %.3f' % ransac.estimator_.intercept_) # plot inlier_mask = ransac.inlier_mask_ outlier_mask = np.logical_not(inlier_mask) line_X = np.arange(3, 10, 1) line_y_ransac = ransac.predict(line_X[:, np.newaxis]) plt.scatter(X[inlier_mask], y[inlier_mask], c='blue', marker='o', label='Inliers') plt.scatter(X[outlier_mask], y[outlier_mask], c='lightgreen', marker='s', label='Outliers') plt.plot(line_X, line_y_ransac, color='red') plt.xlabel('Average number of rooms [RM]') plt.ylabel('Price in $1000\'s [MEDV]') plt.legend(loc='upper left') plt.show()
def test_ransac_stop_n_inliers(): base_estimator = LinearRegression() ransac_estimator = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=5, stop_n_inliers=2, random_state=0) ransac_estimator.fit(X, y) assert_equal(ransac_estimator.n_trials_, 1)
def test_ransac_predict(): X = np.arange(100)[:, None] y = np.zeros((100,)) y[0] = 1 y[1] = 100 base_estimator = LinearRegression() ransac_estimator = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=0.5, random_state=0) ransac_estimator.fit(X, y) assert_equal(ransac_estimator.predict(X), np.zeros(100))
def test_ransac_sparse_csc(): X_sparse = sparse.csc_matrix(X) base_estimator = LinearRegression() ransac_estimator = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=5, random_state=0) ransac_estimator.fit(X_sparse, y) ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_).astype(np.bool_) ref_inlier_mask[outliers] = False assert_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
def test_ransac_none_estimator(): base_estimator = LinearRegression() ransac_estimator = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=5, random_state=0) ransac_none_estimator = RANSACRegressor(None, 2, 5, random_state=0) ransac_estimator.fit(X, y) ransac_none_estimator.fit(X, y) assert_array_almost_equal(ransac_estimator.predict(X), ransac_none_estimator.predict(X))
def test_ransac_score(): X = np.arange(100)[:, None] y = np.zeros((100,)) y[0] = 1 y[1] = 100 base_estimator = LinearRegression() ransac_estimator = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=0.5, random_state=0) ransac_estimator.fit(X, y) assert_equal(ransac_estimator.score(X[2:], y[2:]), 1) assert_less(ransac_estimator.score(X[:2], y[:2]), 1)
def test_ransac_default_residual_threshold(): base_estimator = LinearRegression() ransac_estimator = RANSACRegressor(base_estimator, min_samples=2, random_state=0) # Estimate parameters of corrupted data ransac_estimator.fit(X, y) # Ground truth / reference inlier mask ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_).astype(np.bool_) ref_inlier_mask[outliers] = False assert_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
def fit(self, angs, pts): print(angs.shape) print(pts.shape) model1 = RANSACRegressor(LinearRegression()) model2 = RANSACRegressor(LinearRegression()) model1.fit(angs[:,[0]], pts[:,0]) model2.fit(angs[:,[2]], pts[:,1]) self.m1, self.b1 = float(model1.estimator_.coef_), model1.estimator_.intercept_ self.m2, self.b2 = float(model2.estimator_.coef_), model2.estimator_.intercept_ print('Coefficients :') print(self.m1, self.b1, self.m2, self.b2)
def fit_plane(points): ''' fit a plane through a list of 3d points and return a, b, c, d that represents the plane as ax+by+cz+d=0 ''' X = [[p[0], p[1]] for p in points] X = np.matrix(X) y = [p[2] for p in points] model = RANSACRegressor(LinearRegression()) model.fit(X, y) d = list(model.estimator_.intercept_.flatten())[0] a, b = list(model.estimator_.coef_.flatten()) c = -1 return a, b, c, d
def test_ransac_max_trials(): base_estimator = LinearRegression() ransac_estimator = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=5, max_trials=0, random_state=0) assert_raises(ValueError, ransac_estimator.fit, X, y) ransac_estimator = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=5, max_trials=11, random_state=0) assert getattr(ransac_estimator, 'n_trials_', None) is None ransac_estimator.fit(X, y) assert_equal(ransac_estimator.n_trials_, 2)
def test_ransac_min_n_samples(): base_estimator = LinearRegression() ransac_estimator1 = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=5, random_state=0) ransac_estimator2 = RANSACRegressor(base_estimator, min_samples=2. / X.shape[0], residual_threshold=5, random_state=0) ransac_estimator3 = RANSACRegressor(base_estimator, min_samples=-1, residual_threshold=5, random_state=0) ransac_estimator4 = RANSACRegressor(base_estimator, min_samples=5.2, residual_threshold=5, random_state=0) ransac_estimator5 = RANSACRegressor(base_estimator, min_samples=2.0, residual_threshold=5, random_state=0) ransac_estimator6 = RANSACRegressor(base_estimator, residual_threshold=5, random_state=0) ransac_estimator7 = RANSACRegressor(base_estimator, min_samples=X.shape[0] + 1, residual_threshold=5, random_state=0) ransac_estimator1.fit(X, y) ransac_estimator2.fit(X, y) ransac_estimator5.fit(X, y) ransac_estimator6.fit(X, y) assert_equal(ransac_estimator1.predict(X), ransac_estimator2.predict(X)) assert_equal(ransac_estimator1.predict(X), ransac_estimator5.predict(X)) assert_equal(ransac_estimator1.predict(X), ransac_estimator6.predict(X)) assert_raises(ValueError, ransac_estimator3.fit, X, y) assert_raises(ValueError, ransac_estimator4.fit, X, y) assert_raises(ValueError, ransac_estimator7.fit, X, y)
def test_ransac_exceed_max_skips(): def is_data_valid(X, y): return False base_estimator = LinearRegression() ransac_estimator = RANSACRegressor( base_estimator, is_data_valid=is_data_valid, max_trials=5, max_skips=3 ) msg = "RANSAC skipped more iterations than `max_skips`" with pytest.raises(ValueError, match=msg): ransac_estimator.fit(X, y) assert ransac_estimator.n_skips_no_inliers_ == 0 assert ransac_estimator.n_skips_invalid_data_ == 4 assert ransac_estimator.n_skips_invalid_model_ == 0
def test_ransac_score(): X = np.arange(100)[:, None] y = np.zeros((100, )) y[0] = 1 y[1] = 100 base_estimator = LinearRegression() ransac_estimator = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=0.5, random_state=0) ransac_estimator.fit(X, y) assert ransac_estimator.score(X[2:], y[2:]) == 1 assert ransac_estimator.score(X[:2], y[:2]) < 1
def test_ransac_no_valid_model(): def is_model_valid(estimator, X, y): return False estimator = LinearRegression() ransac_estimator = RANSACRegressor(estimator, is_model_valid=is_model_valid, max_trials=5) msg = "RANSAC could not find a valid consensus set" with pytest.raises(ValueError, match=msg): ransac_estimator.fit(X, y) assert ransac_estimator.n_skips_no_inliers_ == 0 assert ransac_estimator.n_skips_invalid_data_ == 0 assert ransac_estimator.n_skips_invalid_model_ == 5
def test_ransac_final_model_fit_sample_weight(): X, y = make_regression(n_samples=1000, random_state=10) rng = check_random_state(42) sample_weight = rng.randint(1, 4, size=y.shape[0]) sample_weight = sample_weight / sample_weight.sum() ransac = RANSACRegressor(base_estimator=LinearRegression(), random_state=0) ransac.fit(X, y, sample_weight=sample_weight) final_model = LinearRegression() mask_samples = ransac.inlier_mask_ final_model.fit(X[mask_samples], y[mask_samples], sample_weight=sample_weight[mask_samples]) assert_allclose(ransac.estimator_.coef_, final_model.coef_, atol=1e-12)
def regression_information(dem, bilinear_interpolation_results): dem_shape = dem.shape # print dem_shape dem = dem.flatten() bilinear_interpolation_results = bilinear_interpolation_results.flatten() alt_data = np.column_stack((dem, bilinear_interpolation_results)) alt_data = alt_data[np.where(alt_data[:, 0] > 0)] RANSAC_lr = RANSACRegressor(LinearRegression()) RANSAC_lr.fit(alt_data[:, 0:1], alt_data[:, 1]) predict_result = RANSAC_lr.predict(alt_data[:, 0:1]).transpose()[0] # print predict_result # print predict_result.shape residual = bilinear_interpolation_results - predict_result residual = np.reshape(residual, dem_shape) return RANSAC_lr, residual
def test_ransac_sparse_csc(): X_sparse = sparse.csc_matrix(X) base_estimator = LinearRegression() ransac_estimator = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=5, random_state=0) ransac_estimator.fit(X_sparse, y) ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_).astype( np.bool_) ref_inlier_mask[outliers] = False assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
def run_ransac(self, window=149): #inlier_masks = [] #outlier_masks = [] ransac = RANSACRegressor() ransac.fit(self.wave.reshape(-1, 1), self.sp_diff) #inlier_masks.append(ransac.inlier_mask_) inlier_masks = ransac.inlier_mask_ #outlier_masks.append(np.logical_not(ransac.inlier_mask_)) outlier_masks = np.logical_not(ransac.inlier_mask_) ####5. Use `inlier_masks` to interpolate spec_inliers = np.interp(self.wave, self.wave[inlier_masks], self.flux[inlier_masks]) self.cont = medfilt(spec_inliers, window)
def dumpster_ransac_fit(self, laser_points): # return ransac_fited points ransac = RANSACRegressor(min_samples=self._ransac_min_sample) points_X = laser_points[:, 1].reshape(-1, 1) points_y = laser_points[:, 0] ransac.fit(points_X, points_y) inlier_mask = ransac.inlier_mask_ outlier_mask = np.logical_not(inlier_mask) inlier_points_X = sorted(points_X[inlier_mask]) inlier_pred = ransac.predict(inlier_points_X) inlier_start = ransac.predict([inlier_points_X[0]]) inlier_end = ransac.predict([inlier_points_X[-1]]) return np.array([[inlier_start, inlier_points_X[0]], [inlier_end, inlier_points_X[-1]]]).squeeze()
def impute_rows(data, X_cols, y_cols): rows_idx = np.argwhere( np.logical_and( np.isnan(data[:, y_cols]).all(axis=1), ~np.isnan(data[:, X_cols]).any(axis=1))) y_pred = np.zeros((len(rows_idx), len(y_cols))) if len(rows_idx) > 0: print("\tImputing", len(rows_idx), "rows") full_rows = np.argwhere( np.logical_and(~np.isnan(data[:, X_cols]).any(axis=1), ~np.isnan(data[:, y_cols]).any(axis=1))) reg = RANSACRegressor() reg.fit(data[full_rows, X_cols], data[full_rows, y_cols]) y_pred = reg.predict(data[rows_idx, X_cols]).clip(min=0) return (rows_idx, y_cols, y_pred)
def test_ransac_default_residual_threshold(): base_estimator = LinearRegression() ransac_estimator = RANSACRegressor(base_estimator, min_samples=2, random_state=0) # Estimate parameters of corrupted data ransac_estimator.fit(X, y) # Ground truth / reference inlier mask ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_).astype( np.bool_) ref_inlier_mask[outliers] = False assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
def _cfunc_ransac(x, y): """ Get random sample consensus (RANSAC) regression score for data set. Args: x: (list<float>) independent property (x-axis) y: (list<float>) dependent property (y-axis) Returns: (float) RANSAC score """ from sklearn.linear_model import RANSACRegressor r = RANSACRegressor(random_state=21) x_coeff = np.array(x)[:, np.newaxis] r.fit(x_coeff, y) return r.score(x_coeff, y)
def test_ransac_is_model_valid(): def is_model_valid(estimator, X, y): assert X.shape[0] == 2 assert y.shape[0] == 2 return False base_estimator = LinearRegression() ransac_estimator = RANSACRegressor( base_estimator, min_samples=2, residual_threshold=5, is_model_valid=is_model_valid, random_state=0, ) with pytest.raises(ValueError): ransac_estimator.fit(X, y)
def train_RANSACRegressionModel( X, y, base_estimator=None, min_samples=None, residual_threshold=None, is_data_valid=None, is_model_valid=None, max_trials=100, stop_n_inliers=inf, stop_score=inf, stop_probability=0.99, residual_metric=None, random_state=None, ): """ Train a RANSAC regression model """ model = RANSACRegressor( base_estimator=base_estimator, min_samples=min_samples, residual_threshold=residual_threshold, is_data_valid=is_data_valid, is_model_valid=is_model_valid, max_trials=max_trials, stop_n_inliers=stop_n_inliers, stop_score=stop_score, stop_probability=stop_probability, residual_metric=residual_metric, random_state=random_state, ) model = model.fit(X, y) return model
def test_ransac_resid_thresh_no_inliers(): # When residual_threshold=0.0 there are no inliers and a # ValueError with a message should be raised base_estimator = LinearRegression() ransac_estimator = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=0.0, random_state=0, max_trials=5) msg = ("RANSAC could not find a valid consensus set") with pytest.raises(ValueError, match=msg): ransac_estimator.fit(X, y) assert ransac_estimator.n_skips_no_inliers_ == 5 assert ransac_estimator.n_skips_invalid_data_ == 0 assert ransac_estimator.n_skips_invalid_model_ == 0
def test_ransac_multi_dimensional_targets(): base_estimator = LinearRegression() ransac_estimator = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=5, random_state=0) # 3-D target values yyy = np.column_stack([y, y, y]) # Estimate parameters of corrupted data ransac_estimator.fit(X, yyy) # Ground truth / reference inlier mask ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_).astype(np.bool_) ref_inlier_mask[outliers] = False assert_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
def _fit_line(self,X,Y,line_type): """ Fits a robust line (robust to outliers) using RANSAC Regressor and returns two points from the line """ model = RANSACRegressor() model.fit(X, Y) pred = model.predict(X).astype(int) if line_type == 'vertical': model_line = np.array([X[0][0],pred[0][0],X[-1][0],pred[-1][0]]) #if vertical predict y coordinates elif line_type == 'horizontal': model_line = np.array([pred[0][0],X[0][0],pred[-1][0],X[-1][0]]) # if horizontal predict x coordinates else : raise ValueError("Argument line_type only takes the values 'horizontal' and 'vertical'") return model_line
def _align_ransac_inner(self, sp, mzs, ints): hits = join_by_mz( self.target_spectrum, 'mz', pd.DataFrame({ 'sample_mz': mzs, 'sample_ints': ints }), 'sample_mz', self.analyzer, self.align_sigma_1, ) if len(hits) > 10: ints = hits.sample_ints * np.median(hits.ints / hits.sample_ints) ints_accuracy = 0.5 - (ints / (ints + 1)) hits['weight'] = np.log(hits.sample_ints) * ints_accuracy hits = hits.sort_values('weight', ascending=False, ignore_index=True).iloc[:100] X = hits.sample_mz.values.reshape(-1, 1) y = hits.mz.values bins = np.histogram_bin_edges(X, 2) threshold = peak_width(X[:, 0], self.analyzer, self.jitter_sigma_1) ransac = RANSACRegressor( # max_trials=10000, min_samples=max(0.1, 3 / len(X)), residual_threshold=threshold, # Require subsets include values from both the higher and lower end of the mass range is_data_valid=lambda X_subset, y_subset: np.histogram( X_subset, bins)[0].all(), loss='absolute_loss', stop_probability=1, ) ransac.fit(X, y) return { 'sp': sp, 'M': ransac.estimator_.coef_[0], 'C': ransac.estimator_.intercept_, 'score': ransac.score(X, y), 'inliers': np.count_nonzero(ransac.inlier_mask_), 'align_peaks': len(hits), 'align_min': hits.mz.min(), 'align_max': hits.mz.max(), } else: return {'sp': sp, 'M': 1, 'C': 0, 'score': 0}
def ransacregressor(X_train, X_test, y_train, y_test): from sklearn.linear_model import LinearRegression from sklearn.linear_model import RANSACRegressor ransac = RANSACRegressor(LinearRegression(), max_trials=100, min_samples=50, residual_threshold=5.0, random_state=1) ransac.fit(X_train, y_train) print('RANSAC Regressor') y_train_pred = ransac.predict(X_train) y_test_pred = ransac.predict(X_test) print('MSE train: %.3f, test: %.3f' % (mean_squared_error( y_train, y_train_pred), mean_squared_error(y_test, y_test_pred))) print('R^2 train: %.3f, test: %.3f' % (r2_score(y_train, y_train_pred), r2_score(y_test, y_test_pred))) return ransac
def run(self, trainingDasaset, plotting): dataset = trainingDasaset accuracy = 0 y = dataset['int_rate'] X = dataset.drop(columns=[ 'int_rate', ]) if plotting == True: X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1) clf = RANSACRegressor(random_state=42) #clf=self.gridSearch(clf,X_train, y_train) clf.fit(X_train, y_train) print( "###################################RANSACRegressor#############################" ) accuracy = clf.score(X_test, y_test) #pred = clf.predict(X_test) #accuracy = np.sqrt(metrics.mean_squared_error(y_test,pred)) print("score:" + str(accuracy)) else: clf = RANSACRegressor(random_state=42) #clf=self.gridSearch(clf,X_train, y_train) clf.fit(X, y) testData = pd.read_csv( "./SiameseNeuralNetworkProject/MachineLearningAlgorithmSuite/CleanedData/SiameseTrainingData.csv" ) predictions = clf.predict(testData) np.savetxt( "./SiameseNeuralNetworkProject/MachineLearningAlgorithmSuite/OutputFiles/RANSACRegressorPredictions.csv", predictions, delimiter=",") testData = pd.read_csv( "./SiameseNeuralNetworkProject/MachineLearningAlgorithmSuite/CleanedData/OverallTestingData.csv" ) predictions = clf.predict(testData) np.savetxt( "./SiameseNeuralNetworkProject/MachineLearningAlgorithmSuite/OutputFiles/RANSACRegressorPredictionsTestData.csv", predictions, delimiter=",") return accuracy
def _ransac_regression(pts, regressor): ransac = RANSACRegressor(regressor) x = np.array([a['peak_size'] for a in pts]) y = np.array([b['relative_peak_height'] for b in pts]) X = x[:, np.newaxis] ransac.fit(X, y) inlier_mask = ransac.inlier_mask_ ransac_mse = mean_squared_error(y[inlier_mask], ransac.predict(X[inlier_mask])) ** .5 ransac_r2 = r2_score(y[inlier_mask], ransac.predict(X[inlier_mask])) return { 'intercept': ransac.estimator_.intercept_, 'r_squared': ransac_r2, 'slope': ransac.estimator_.coef_[0], 'sd': ransac_mse }
def icp(a, na, b, nb, chronos={}): from sklearn.neighbors import KDTree kdt = KDTree(a) chronostart = timer() nndist, nnidx = kdt.query(b) nn_b_in_a = a[nnidx[:, 0], :] chrono = timer() - chronostart chrono_name = "Nearest neighbors" chronos[chrono_name] = chrono print("{} : {} ms".format(chrono_name, 1000. * chrono)) normals_b_in_a = na[nnidx[:, 0], :] rotvec = np.cross(normals_b_in_a, nb, axis=-1) from sklearn.linear_model import RANSACRegressor ransac = RANSACRegressor() chronostart = timer() ransac.fit(np.zeros((len(rotvec), 1)), rotvec) bestrotvec = ransac.predict([[0]])[0] chrono = timer() - chronostart chrono_name = "RANSAC" chronos[chrono_name] = chrono print("{} : {} ms".format(chrono_name, 1000. * chrono)) norm = np.linalg.norm(bestrotvec) theta = np.arcsin(norm) / 2 vec = bestrotvec / norm costh = np.cos(theta) ncosth = 1 - costh sinth = np.sin(theta) ux = vec[0] uy = vec[1] uz = vec[2] R = np.array([[ costh + ux * ux * ncosth, ux * uy * ncosth - uz * sinth, ux * uz * ncosth + uy * sinth ], [ uy * ux * ncosth + uz * sinth, costh + uy * uy * ncosth, uy * uz * ncosth - ux * sinth ], [ uz * ux * ncosth - uy * sinth, uz * uy * ncosth + ux * sinth, costh + uz * uz * ncosth ]]) b_rot = R.dot(a.T).T return b_rot, R
def test_ransac_custom_base_estimator(): base_estimator = DecisionTreeRegressor() estimator = RANSACRegressor(base_estimator=base_estimator, random_state=1) estimator.fit([[1], [2], [3]], [1, 2, 3]) assembler = RANSACModelAssembler(estimator) actual = assembler.assemble() expected = ast.IfExpr( ast.CompExpr( ast.FeatureRef(0), ast.NumVal(2.5), ast.CompOpType.LTE), ast.NumVal(2.0), ast.NumVal(3.0)) assert cmp_exprs(actual, expected)
def test_ransac_max_trials(): base_estimator = LinearRegression() ransac_estimator = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=5, max_trials=0, random_state=0) assert_raises(ValueError, ransac_estimator.fit, X, y) # there is a 1e-9 chance it will take these many trials. No good reason # 1e-2 isn't enough, can still happen # 2 is the what ransac defines as min_samples = X.shape[1] + 1 max_trials = _dynamic_max_trials( len(X) - len(outliers), X.shape[0], 2, 1 - 1e-9) ransac_estimator = RANSACRegressor(base_estimator, min_samples=2) for i in range(50): ransac_estimator.set_params(min_samples=2, random_state=i) ransac_estimator.fit(X, y) assert_less(ransac_estimator.n_trials_, max_trials + 1)
def ransacCurveFit(seg_img, pt_end = None, degree = 7, trials = 100, sampleNum = 100): # Find all the data points in mask data = np.where(seg_img==255) X,y = data[0], data[1] X = X.reshape(-1,1) # Create poly feature to fit poly = PolynomialFeatures(degree = degree, include_bias = True) X = poly.fit_transform(X) # Create RANSAC model ransac = RANSACRegressor(min_samples=0.3, max_trials=trials) ransac.fit(X,y) # Prepare to plot the curve low = 55 if pt_end = None: upper = max(X[:,1])
def test_ransac_multi_dimensional_targets(): base_estimator = LinearRegression() ransac_estimator = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=5, random_state=0) # 3-D target values yyy = np.column_stack([y, y, y]) # Estimate parameters of corrupted data ransac_estimator.fit(X, yyy) # Ground truth / reference inlier mask ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_ ).astype(np.bool_) ref_inlier_mask[outliers] = False assert_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
def test_ransac_is_data_valid(): def is_data_valid(X, y): assert X.shape[0] == 2 assert y.shape[0] == 2 return False rng = np.random.RandomState(0) X = rng.rand(10, 2) y = rng.rand(10, 1) base_estimator = LinearRegression() ransac_estimator = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=5, is_data_valid=is_data_valid, random_state=0) with pytest.raises(ValueError): ransac_estimator.fit(X, y)
def get_classifier(training_file): print('Extracting classfier from json file...') with open(training_file, 'r') as f: mean_dict = json.load(f) target_vals = [] input_vals = [] targets = [region for region in mean_dict.keys()] for region in mean_dict: ##### Using RANSAC for detecting outliers ransac = RANSACRegressor() power = np.array(mean_dict[region]) x = np.arange(len(power)).reshape(-1, 1) ransac.fit(x, power) inlier_mask = ransac.inlier_mask_ region_ind = targets.index(region) for val in power[inlier_mask]: input_vals.append(val) target_vals.append(region_ind) # ##### Using RANSAC for detecting outliers, and only mean of regions as input # ransac = RANSACRegressor() # power = np.array(mean_dict[region]) # x = np.arange(len(power)).reshape(-1,1) # ransac.fit(x, power) # y = ransac.predict(x) # input_vals.append(np.mean(y)) # region_ind = targets.index(region) # target_vals.append(region_ind) # #### Using values directly for classifier # region_ind = targets.index(region) # for val in mean_dict[region]: # target_vals.append(region_ind) # input_vals.append(val) X = np.array(input_vals).reshape(-1, 1) Y = np.array(target_vals) classifier = GaussianNB() # classifier = KNeighborsClassifier(n_neighbors=3) classifier.fit(X, Y) return classifier, targets
def ransac(): df = pd.read_csv( 'https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data', header=None, sep='\\s+') df.columns = [ 'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV' ] ransac = RANSACRegressor( LinearRegression(), max_trials=100, min_samples=50, residual_metric=lambda x: np.sum(np.abs(x), axis=1), residual_threshold=5.0, random_state=0) X = df[['RM']].values y = df['MEDV'].values ransac.fit(X, y) inlier_mask = ransac.inlier_mask_ outlier_mask = np.logical_not(inlier_mask) line_X = np.arange(3, 10, 1) line_y_ransac = ransac.predict(line_X[:, np.newaxis]) plt.scatter(X[inlier_mask], y[inlier_mask], c='blue', marker='o', label='Inliers') plt.scatter(X[outlier_mask], y[outlier_mask], c='lightgreen', marker='s', label='Outliers') plt.plot(line_X, line_y_ransac, color='red') plt.xlabel('Average number of rooms [RM]') plt.ylabel('Price in $1000\'s [MEDV]') plt.legend(loc='upper left') plt.tight_layout() plt.savefig(PL10 + 'ransac_fit.png', dpi=300) plt.close() print('Slope: %.3f' % ransac.estimator_.coef_[0]) print('Intercept: %.3f' % ransac.estimator_.intercept_)
def test_ransac_dynamic_max_trials(): # Numbers hand-calculated and confirmed on page 119 (Table 4.3) in # Hartley, R.~I. and Zisserman, A., 2004, # Multiple View Geometry in Computer Vision, Second Edition, # Cambridge University Press, ISBN: 0521540518 # e = 0%, min_samples = X assert _dynamic_max_trials(100, 100, 2, 0.99) == 1 # e = 5%, min_samples = 2 assert _dynamic_max_trials(95, 100, 2, 0.99) == 2 # e = 10%, min_samples = 2 assert _dynamic_max_trials(90, 100, 2, 0.99) == 3 # e = 30%, min_samples = 2 assert _dynamic_max_trials(70, 100, 2, 0.99) == 7 # e = 50%, min_samples = 2 assert _dynamic_max_trials(50, 100, 2, 0.99) == 17 # e = 5%, min_samples = 8 assert _dynamic_max_trials(95, 100, 8, 0.99) == 5 # e = 10%, min_samples = 8 assert _dynamic_max_trials(90, 100, 8, 0.99) == 9 # e = 30%, min_samples = 8 assert _dynamic_max_trials(70, 100, 8, 0.99) == 78 # e = 50%, min_samples = 8 assert _dynamic_max_trials(50, 100, 8, 0.99) == 1177 # e = 0%, min_samples = 10 assert _dynamic_max_trials(1, 100, 10, 0) == 0 assert _dynamic_max_trials(1, 100, 10, 1) == float('inf') base_estimator = LinearRegression() ransac_estimator = RANSACRegressor(base_estimator, min_samples=2, stop_probability=-0.1) with pytest.raises(ValueError): ransac_estimator.fit(X, y) ransac_estimator = RANSACRegressor(base_estimator, min_samples=2, stop_probability=1.1) with pytest.raises(ValueError): ransac_estimator.fit(X, y)
def test_ransac_fit_sample_weight(): ransac_estimator = RANSACRegressor(random_state=0) n_samples = y.shape[0] weights = np.ones(n_samples) ransac_estimator.fit(X, y, weights) # sanity check assert ransac_estimator.inlier_mask_.shape[0] == n_samples ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_).astype( np.bool_) ref_inlier_mask[outliers] = False # check that mask is correct assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask) # check that fit(X) = fit([X1, X2, X3],sample_weight = [n1, n2, n3]) where # X = X1 repeated n1 times, X2 repeated n2 times and so forth random_state = check_random_state(0) X_ = random_state.randint(0, 200, [10, 1]) y_ = np.ndarray.flatten(0.2 * X_ + 2) sample_weight = random_state.randint(0, 10, 10) outlier_X = random_state.randint(0, 1000, [1, 1]) outlier_weight = random_state.randint(0, 10, 1) outlier_y = random_state.randint(-1000, 0, 1) X_flat = np.append( np.repeat(X_, sample_weight, axis=0), np.repeat(outlier_X, outlier_weight, axis=0), axis=0, ) y_flat = np.ndarray.flatten( np.append( np.repeat(y_, sample_weight, axis=0), np.repeat(outlier_y, outlier_weight, axis=0), axis=0, )) ransac_estimator.fit(X_flat, y_flat) ref_coef_ = ransac_estimator.estimator_.coef_ sample_weight = np.append(sample_weight, outlier_weight) X_ = np.append(X_, outlier_X, axis=0) y_ = np.append(y_, outlier_y) ransac_estimator.fit(X_, y_, sample_weight) assert_allclose(ransac_estimator.estimator_.coef_, ref_coef_) # check that if base_estimator.fit doesn't support # sample_weight, raises error base_estimator = OrthogonalMatchingPursuit() ransac_estimator = RANSACRegressor(base_estimator, min_samples=10) err_msg = f"{base_estimator.__class__.__name__} does not support sample_weight." with pytest.raises(ValueError, match=err_msg): ransac_estimator.fit(X, y, weights)
def ransac(df, xcols): # function to deal with outliers y = df['target_proxy'] X = df[list(xcols)[0]] X = np.transpose(np.array([X])) # Standardize and split the training nad test data X_std = standardize(X) ts = 0.3 X_train, X_test, y_train, y_test = \ train_test_split(X_std, y, test_size=ts, random_state=0) ransac = RANSACRegressor( LinearRegression(), max_trials=100, min_samples=50, residual_metric=lambda x: np.sum(np.abs(x), axis=1), residual_threshold=5.0, random_state=0) ransac.fit(X, y) inlier_mask = ransac.inlier_mask_ outlier_mask = np.logical_not(inlier_mask) line_X = np.arange(3, 10, 1) line_y_ransac = ransac.predict(line_X[:, np.newaxis]) plt.scatter(X[inlier_mask], y[inlier_mask], c='blue', marker='o', label='Inliers') plt.scatter(X[outlier_mask], y[outlier_mask], c='lightgreen', marker='s', label='Outliers') plt.plot(line_X, line_y_ransac, color='red') plt.xlabel('x-val') plt.ylabel('Returns') plt.legend(loc='best') plt.tight_layout() plt.savefig(IMG_PATH + 'ransac_fit.png', dpi=300) plt.close()
def test_ransac_fit_sample_weight(): ransac_estimator = RANSACRegressor(random_state=0) n_samples = y.shape[0] weights = np.ones(n_samples) ransac_estimator.fit(X, y, weights) # sanity check assert_equal(ransac_estimator.inlier_mask_.shape[0], n_samples) ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_ ).astype(np.bool_) ref_inlier_mask[outliers] = False # check that mask is correct assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask) # check that fit(X) = fit([X1, X2, X3],sample_weight = [n1, n2, n3]) where # X = X1 repeated n1 times, X2 repeated n2 times and so forth random_state = check_random_state(0) X_ = random_state.randint(0, 200, [10, 1]) y_ = np.ndarray.flatten(0.2 * X_ + 2) sample_weight = random_state.randint(0, 10, 10) outlier_X = random_state.randint(0, 1000, [1, 1]) outlier_weight = random_state.randint(0, 10, 1) outlier_y = random_state.randint(-1000, 0, 1) X_flat = np.append(np.repeat(X_, sample_weight, axis=0), np.repeat(outlier_X, outlier_weight, axis=0), axis=0) y_flat = np.ndarray.flatten(np.append(np.repeat(y_, sample_weight, axis=0), np.repeat(outlier_y, outlier_weight, axis=0), axis=0)) ransac_estimator.fit(X_flat, y_flat) ref_coef_ = ransac_estimator.estimator_.coef_ sample_weight = np.append(sample_weight, outlier_weight) X_ = np.append(X_, outlier_X, axis=0) y_ = np.append(y_, outlier_y) ransac_estimator.fit(X_, y_, sample_weight) assert_almost_equal(ransac_estimator.estimator_.coef_, ref_coef_) # check that if base_estimator.fit doesn't support # sample_weight, raises error base_estimator = Lasso() ransac_estimator = RANSACRegressor(base_estimator) assert_raises(ValueError, ransac_estimator.fit, X, y, weights)
# # Fitting a robust regression model using RANSAC ransac = RANSACRegressor(LinearRegression(), max_trials=100, min_samples=50, loss='absolute_loss', residual_threshold=5.0, random_state=0) ransac.fit(X, y) inlier_mask = ransac.inlier_mask_ outlier_mask = np.logical_not(inlier_mask) line_X = np.arange(3, 10, 1) line_y_ransac = ransac.predict(line_X[:, np.newaxis]) plt.scatter(X[inlier_mask], y[inlier_mask], c='steelblue', edgecolor='white', marker='o', label='Inliers') plt.scatter(X[outlier_mask], y[outlier_mask], c='limegreen', edgecolor='white', marker='s', label='Outliers') plt.plot(line_X, line_y_ransac, color='black', lw=2) plt.xlabel('Average number of rooms [RM]') plt.ylabel('Price in $1000s [MEDV]')
def get_ransac(self, x, y): # RANSAC регрессор ransac = RANSACRegressor(LinearRegression(), residual_threshold=5) ransac.fit(x, y) return ransac
def fit_RANSAC(features_train, labels_train, features_pred): model = RANSACRegressor() model.fit(features_train, labels_train) labels_pred = model.predict(features_pred) print "RANSAC - coefficient of determination R^2 of the prediction: ", model.score(features_train, labels_train) return labels_pred
# Passive Aggressive Regression print 'passive aggressive' par = PassiveAggressiveRegressor() par.fit(x, y) par_sts_scores = par.predict(xt) #par.fit(x[:, np.newaxis], y) #par_sts_scores = par.predict(xt[:, np.newaxis]) # RANSAC Regression print 'ransac' ransac = RANSACRegressor() #ransac.fit(x[:, np.newaxis], y) #ransac_sts_scores = ransac.predict(xt[:, np.newaxis]) ransac.fit(x, y) ransac_sts_scores = ransac.predict(xt) # Logistic Regression print 'logistic' lgr = LogisticRegression() #lgr.fit(x[:, np.newaxis], y) #lgr_sts_scores = lgr.predict(xt[:, np.newaxis]) lgr.fit(x, y) lgr_sts_scores = lgr.predict(xt) ''' # SLOW Regressors !!!! # Randomized Log Regression
def test_ransac_residual_loss(): loss_multi1 = lambda y_true, y_pred: np.sum(np.abs(y_true - y_pred), axis=1) loss_multi2 = lambda y_true, y_pred: np.sum((y_true - y_pred) ** 2, axis=1) loss_mono = lambda y_true, y_pred : np.abs(y_true - y_pred) yyy = np.column_stack([y, y, y]) base_estimator = LinearRegression() ransac_estimator0 = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=5, random_state=0) ransac_estimator1 = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=5, random_state=0, loss=loss_multi1) ransac_estimator2 = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=5, random_state=0, loss=loss_multi2) # multi-dimensional ransac_estimator0.fit(X, yyy) ransac_estimator1.fit(X, yyy) ransac_estimator2.fit(X, yyy) assert_array_almost_equal(ransac_estimator0.predict(X), ransac_estimator1.predict(X)) assert_array_almost_equal(ransac_estimator0.predict(X), ransac_estimator2.predict(X)) # one-dimensional ransac_estimator0.fit(X, y) ransac_estimator2.loss = loss_mono ransac_estimator2.fit(X, y) assert_array_almost_equal(ransac_estimator0.predict(X), ransac_estimator2.predict(X)) ransac_estimator3 = RANSACRegressor(base_estimator, min_samples=2, residual_threshold=5, random_state=0, loss="squared_loss") ransac_estimator3.fit(X, y) assert_array_almost_equal(ransac_estimator0.predict(X), ransac_estimator2.predict(X))