示例#1
0
def test_ransac_max_trials():
    base_estimator = LinearRegression()

    ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
                                       residual_threshold=5, max_trials=0,
                                       random_state=0)
    assert_raises(ValueError, ransac_estimator.fit, X, y)

    # there is a 1e-9 chance it will take these many trials. No good reason
    # 1e-2 isn't enough, can still happen
    # 2 is the what ransac defines  as min_samples = X.shape[1] + 1
    max_trials = _dynamic_max_trials(
        len(X) - len(outliers), X.shape[0], 2, 1 - 1e-9)
    ransac_estimator = RANSACRegressor(base_estimator, min_samples=2)
    for i in range(50):
        ransac_estimator.set_params(min_samples=2, random_state=i)
        ransac_estimator.fit(X, y)
        assert_less(ransac_estimator.n_trials_, max_trials + 1)
示例#2
0
def test_ransac_max_trials():
    base_estimator = LinearRegression()

    ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
                                       residual_threshold=5, max_trials=0,
                                       random_state=0)
    assert_raises(ValueError, ransac_estimator.fit, X, y)

    # there is a 1e-9 chance it will take these many trials. No good reason
    # 1e-2 isn't enough, can still happen
    # 2 is the what ransac defines  as min_samples = X.shape[1] + 1
    max_trials = _dynamic_max_trials(
        len(X) - len(outliers), X.shape[0], 2, 1 - 1e-9)
    ransac_estimator = RANSACRegressor(base_estimator, min_samples=2)
    for i in range(50):
        ransac_estimator.set_params(min_samples=2, random_state=i)
        ransac_estimator.fit(X, y)
        assert_less(ransac_estimator.n_trials_, max_trials + 1)
def test_ransac_dynamic_max_trials():
    # Numbers hand-calculated and confirmed on page 119 (Table 4.3) in
    #   Hartley, R.~I. and Zisserman, A., 2004,
    #   Multiple View Geometry in Computer Vision, Second Edition,
    #   Cambridge University Press, ISBN: 0521540518

    # e = 0%, min_samples = X
    assert_equal(_dynamic_max_trials(100, 100, 2, 0.99), 1)

    # e = 5%, min_samples = 2
    assert_equal(_dynamic_max_trials(95, 100, 2, 0.99), 2)
    # e = 10%, min_samples = 2
    assert_equal(_dynamic_max_trials(90, 100, 2, 0.99), 3)
    # e = 30%, min_samples = 2
    assert_equal(_dynamic_max_trials(70, 100, 2, 0.99), 7)
    # e = 50%, min_samples = 2
    assert_equal(_dynamic_max_trials(50, 100, 2, 0.99), 17)

    # e = 5%, min_samples = 8
    assert_equal(_dynamic_max_trials(95, 100, 8, 0.99), 5)
    # e = 10%, min_samples = 8
    assert_equal(_dynamic_max_trials(90, 100, 8, 0.99), 9)
    # e = 30%, min_samples = 8
    assert_equal(_dynamic_max_trials(70, 100, 8, 0.99), 78)
    # e = 50%, min_samples = 8
    assert_equal(_dynamic_max_trials(50, 100, 8, 0.99), 1177)

    # e = 0%, min_samples = 10
    assert_equal(_dynamic_max_trials(1, 100, 10, 0), 0)
    assert_equal(_dynamic_max_trials(1, 100, 10, 1), float('inf'))

    base_estimator = LinearRegression()
    ransac_estimator = RANSACRegressor(base_estimator,
                                       min_samples=2,
                                       stop_probability=-0.1)
    assert_raises(ValueError, ransac_estimator.fit, X, y)
    ransac_estimator = RANSACRegressor(base_estimator,
                                       min_samples=2,
                                       stop_probability=1.1)
    assert_raises(ValueError, ransac_estimator.fit, X, y)
示例#4
0
def test_ransac_dynamic_max_trials():
    # Numbers hand-calculated and confirmed on page 119 (Table 4.3) in
    #   Hartley, R.~I. and Zisserman, A., 2004,
    #   Multiple View Geometry in Computer Vision, Second Edition,
    #   Cambridge University Press, ISBN: 0521540518

    # e = 0%, min_samples = X
    assert_equal(_dynamic_max_trials(100, 100, 2, 0.99), 1)

    # e = 5%, min_samples = 2
    assert_equal(_dynamic_max_trials(95, 100, 2, 0.99), 2)
    # e = 10%, min_samples = 2
    assert_equal(_dynamic_max_trials(90, 100, 2, 0.99), 3)
    # e = 30%, min_samples = 2
    assert_equal(_dynamic_max_trials(70, 100, 2, 0.99), 7)
    # e = 50%, min_samples = 2
    assert_equal(_dynamic_max_trials(50, 100, 2, 0.99), 17)

    # e = 5%, min_samples = 8
    assert_equal(_dynamic_max_trials(95, 100, 8, 0.99), 5)
    # e = 10%, min_samples = 8
    assert_equal(_dynamic_max_trials(90, 100, 8, 0.99), 9)
    # e = 30%, min_samples = 8
    assert_equal(_dynamic_max_trials(70, 100, 8, 0.99), 78)
    # e = 50%, min_samples = 8
    assert_equal(_dynamic_max_trials(50, 100, 8, 0.99), 1177)

    # e = 0%, min_samples = 10
    assert_equal(_dynamic_max_trials(1, 100, 10, 0), 0)
    assert_equal(_dynamic_max_trials(1, 100, 10, 1), float('inf'))

    base_estimator = LinearRegression()
    ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
                                       stop_probability=-0.1)
    assert_raises(ValueError, ransac_estimator.fit, X, y)
    ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
                                       stop_probability=1.1)
    assert_raises(ValueError, ransac_estimator.fit, X, y)
示例#5
0
    def fitSegm(self, X, y, segmList, sample_weight=None):

        merged = list(itertools.chain.from_iterable(segmList))

        X = check_array(X, accept_sparse='csr')
        y = check_array(y, ensure_2d=False)
        check_consistent_length(X, y)

        if self.base_estimator is not None:
            base_estimator = clone(self.base_estimator)
        else:
            base_estimator = LinearRegression()

        if self.min_samples is None:
            # assume linear model by default
            min_samples = X.shape[1] + 1 if len(segmList) < 2 else X.shape[
                1]  #MINIMUM SAMPLES
        elif 0 < self.min_samples < 1:
            min_samples = np.ceil(self.min_samples * X.shape[0])
        elif self.min_samples >= 1:
            if self.min_samples % 1 != 0:
                raise ValueError("Absolute number of samples must be an "
                                 "integer value.")
            min_samples = self.min_samples
        else:
            raise ValueError("Value for `min_samples` must be scalar and "
                             "positive.")
        if min_samples > X.shape[0]:
            raise ValueError("`min_samples` may not be larger than number "
                             "of samples: n_samples = %d." % (X.shape[0]))

        if self.stop_probability < 0 or self.stop_probability > 1:
            raise ValueError("`stop_probability` must be in range [0, 1].")

        if self.residual_threshold is None:
            # MAD (median absolute deviation)
            residual_threshold = np.percentile(
                np.abs(y - np.percentile(y, pervar)), pervar)
        else:
            residual_threshold = self.residual_threshold

        if self.loss == "absolute_loss":
            if y.ndim == 1:
                loss_function = lambda y_true, y_pred: np.abs(y_true - y_pred)
            else:
                loss_function = lambda \
                    y_true, y_pred: np.sum(np.abs(y_true - y_pred), axis=1)

        elif self.loss == "squared_loss":
            if y.ndim == 1:
                loss_function = lambda y_true, y_pred: (y_true - y_pred)**2
            else:
                loss_function = lambda \
                    y_true, y_pred: np.sum((y_true - y_pred) ** 2, axis=1)

        elif callable(self.loss):
            loss_function = self.loss

        else:
            raise ValueError(
                "loss should be 'absolute_loss', 'squared_loss' or a callable."
                "Got %s. " % self.loss)

        random_state = check_random_state(self.random_state)

        try:  # Not all estimator accept a random_state
            base_estimator.set_params(random_state=random_state)
        except ValueError:
            pass

        estimator_fit_has_sample_weight = has_fit_parameter(
            base_estimator, "sample_weight")
        estimator_name = type(base_estimator).__name__
        if (sample_weight is not None and not estimator_fit_has_sample_weight):
            raise ValueError("%s does not support sample_weight. Samples"
                             " weights are only used for the calibration"
                             " itself." % estimator_name)
        if sample_weight is not None:
            sample_weight = np.asarray(sample_weight)

        n_inliers_best = 1
        score_best = -np.inf
        inlier_mask_best = None
        X_inlier_best = None
        y_inlier_best = None
        aicc_ = None
        self.n_skips_no_inliers_ = 0
        self.n_skips_invalid_data_ = 0
        self.n_skips_invalid_model_ = 0

        # Generate a list of indices for each segment
        size_sl = [len(s) - 1 for s in segmList]
        n_segments = len(size_sl)

        # number of data samples
        n_samples = X.shape[0]
        sample_idxs = np.arange(n_samples)

        n_samples, _ = X.shape

        self.n_trials_ = 0
        max_trials = self.max_trials

        while self.n_trials_ < max_trials:
            self.n_trials_ += 1
            if (self.n_skips_no_inliers_ + self.n_skips_invalid_data_ +
                    self.n_skips_invalid_model_) > self.max_skips:
                break

            # choose random sample set
            ## antes:
            ## subset_idxs = sample_without_replacement(n_samples, min_samples, random_state=random_state)

            ## ahora:
            subset_idx_entries = sample_without_replacement(
                n_segments, min_samples, random_state=random_state)



            subset_idxs = np.asarray([segmList[ss][random.randint(0, size_sl[ss])] \
                           for ss in subset_idx_entries])

            X_subset = X[subset_idxs]
            y_subset = y[subset_idxs]

            # check if random sample set is valid
            if (self.is_data_valid is not None
                    and not self.is_data_valid(X_subset, y_subset)):
                self.n_skips_invalid_data_ += 1
                continue

            # fit model for current random sample set
            if sample_weight is None:
                base_estimator.fit(X_subset, y_subset)
            else:
                base_estimator.fit(X_subset,
                                   y_subset,
                                   sample_weight=sample_weight[subset_idxs])

            # check if estimated model is valid
            if (self.is_model_valid is not None and not self.is_model_valid(
                    base_estimator, X_subset, y_subset)):
                self.n_skips_invalid_model_ += 1
                continue

            # check if estimated model is valid (ii)
            y_pred_subset = base_estimator.predict(X_subset)
            residuals_ii = loss_function(y_subset, y_pred_subset)
            inlier_mask_subset_ii = residuals_ii < residual_threshold

            if np.sum(inlier_mask_subset_ii) < X.shape[1]:
                self.n_skips_invalid_model_ += 1
                continue

            ########################## Inlier evaluation

            # residuals of all data for current random sample model
            y_pred = base_estimator.predict(X[merged])
            residuals_subset = loss_function(y[merged], y_pred)

            # classify data into inliers and outliers
            inlier_mask_subset = residuals_subset < residual_threshold
            n_inliers_subset = np.sum(inlier_mask_subset)
            if False:
                print(
                    f"n_inliers_subset {n_inliers_subset} from {inlier_mask_subset.shape}"
                )

            # extract inlier data set
            inlier_idxs_subset = list(compress(merged, inlier_mask_subset))

            X_inlier_subset = X[inlier_idxs_subset]
            y_inlier_subset = y[inlier_idxs_subset]

            if (False):  # plain evaluation (basic approach)

                #check that the all points in sample are inliers
                if n_inliers_subset < min_samples:
                    continue

                # less inliers -> skip current random sample
                if n_inliers_subset < n_inliers_best:
                    self.n_skips_no_inliers_ += 1
                    continue

                # score of inlier data set
                score_subset = base_estimator.score(X_inlier_subset,
                                                    y_inlier_subset)

                # same number of inliers but worse score -> skip current random
                # sample
                if (n_inliers_subset == n_inliers_best
                        and score_subset <= score_best):
                    continue

            else:  #evaluation for each calibration point

                indScore = 0  # score that considers inliers of each calibration point
                cc = 0
                for sSeg, seg in zip(size_sl, segmList):

                    c_seg = range(cc, sSeg + cc)
                    #print(seg)
                    #sys.exit(0)
                    cc += sSeg

                    # classify data into inliers and outliers
                    nScore = np.sum(inlier_mask_subset[c_seg])
                    n_in_subset = nScore

                    indScore += poisson.cdf(nScore, 0.3 * sSeg)

                if (indScore <= score_best):
                    continue
                score_subset = indScore

            # save current random sample as best sample
            n_inliers_best = n_inliers_subset
            score_best = score_subset
            inlier_mask_best = inlier_mask_subset
            X_inlier_best = X_inlier_subset
            y_inlier_best = y_inlier_subset

            max_trials = min(
                max_trials,
                _dynamic_max_trials(n_inliers_best, n_samples, min_samples,
                                    self.stop_probability))

            # break if sufficient number of inliers or score is reached
            if n_inliers_best >= self.stop_n_inliers or \
                            score_best >= self.stop_score:
                break

    # if none of the iterations met the required criteria
        if inlier_mask_best is None:
            base_estimator.coef_ = -999
            if ((self.n_skips_no_inliers_ + self.n_skips_invalid_data_ +
                 self.n_skips_invalid_model_) > self.max_skips):
                raise ValueError(
                    "RANSAC skipped more iterations than `max_skips` without"
                    " finding a valid consensus set. Iterations were skipped"
                    " because each randomly chosen sub-sample failed the"
                    " passing criteria. See estimator attributes for"
                    " diagnostics (n_skips*).")
            else:
                raise ValueError(
                    "RANSAC could not find a valid consensus set. All"
                    " `max_trials` iterations were skipped because each"
                    " randomly chosen sub-sample failed the passing criteria."
                    " See estimator attributes for diagnostics (n_skips*).")
        else:
            if (self.n_skips_no_inliers_ + self.n_skips_invalid_data_ +
                    self.n_skips_invalid_model_) > self.max_skips:
                warnings.warn(
                    "RANSAC found a valid consensus set but exited"
                    " early due to skipping more iterations than"
                    " `max_skips`. See estimator attributes for"
                    " diagnostics (n_skips*).", ConvergenceWarning)
        # estimate final model using all inliers
            base_estimator.fit(X_inlier_best, y_inlier_best)
            self.estimator_ = base_estimator
            self.inlier_mask_ = inlier_mask_best
            return self