Пример #1
0
def get_fit_model(score_list, label_list):
    p_train = np.array(score_list)
    y_train = np.array(label_list)

    ir = IR()
    ir.fit( p_train, y_train )
    return ir
Пример #2
0
def test_isotonic_regression():
    y = np.array([3, 7, 5, 9, 8, 7, 10])
    y_ = np.array([3, 6, 6, 8, 8, 8, 10])
    assert_array_equal(y_, isotonic_regression(y))

    y = np.array([10, 0, 2])
    y_ = np.array([4, 4, 4])
    assert_array_equal(y_, isotonic_regression(y))

    x = np.arange(len(y))
    ir = IsotonicRegression(y_min=0., y_max=1.)
    ir.fit(x, y)
    assert_array_equal(ir.fit(x, y).transform(x), ir.fit_transform(x, y))
    assert_array_equal(ir.transform(x), ir.predict(x))

    # check that it is immune to permutation
    perm = np.random.permutation(len(y))
    ir = IsotonicRegression(y_min=0., y_max=1.)
    assert_array_equal(ir.fit_transform(x[perm], y[perm]),
                       ir.fit_transform(x, y)[perm])
    assert_array_equal(ir.transform(x[perm]), ir.transform(x)[perm])

    # check we don't crash when all x are equal:
    ir = IsotonicRegression()
    assert_array_equal(ir.fit_transform(np.ones(len(x)), y), np.mean(y))
Пример #3
0
class HTMLTime(object):
    """
    >>> htmlTime = HTMLTime(pathToIDX)
    >>> t = htmlTime(frameNumber)
    """

    def __init__(self, idx):
        super(HTMLTime, self).__init__()
        self.idx = idx

        # load .idx file using pandas
        df = read_table(
            self.idx, sep='\s+',
            names=['frame_number', 'frame_type', 'bytes', 'seconds']
        )
        x = np.array(df['frame_number'], dtype=np.float)
        y = np.array(df['seconds'], dtype=np.float)

        # train isotonic regression
        self.ir = IsotonicRegression(y_min=np.min(y), y_max=np.max(y))
        self.ir.fit(x, y)

        # frame number support
        self.xmin = np.min(x)
        self.xmax = np.max(x)

    def __call__(self, frameNumber):

        return self.ir.transform([min(self.xmax,
                                      max(self.xmin, frameNumber)
                                      )])[0]
Пример #4
0
def test_isotonic_regression_ties_secondary_():
    """
    Test isotonic regression fit, transform  and fit_transform
    against the "secondary" ties method and "pituitary" data from R
     "isotone" package, as detailed in: J. d. Leeuw, K. Hornik, P. Mair,
     Isotone Optimization in R: Pool-Adjacent-Violators Algorithm
    (PAVA) and Active Set Methods

    Set values based on pituitary example and
     the following R command detailed in the paper above:
    > library("isotone")
    > data("pituitary")
    > res1 <- gpava(pituitary$age, pituitary$size, ties="secondary")
    > res1$x

    `isotone` version: 1.0-2, 2014-09-07
    R version: R version 3.1.1 (2014-07-10)
    """
    x = [8, 8, 8, 10, 10, 10, 12, 12, 12, 14, 14]
    y = [21, 23.5, 23, 24, 21, 25, 21.5, 22, 19, 23.5, 25]
    y_true = [22.22222, 22.22222, 22.22222, 22.22222, 22.22222, 22.22222,
              22.22222, 22.22222, 22.22222, 24.25, 24.25]

    # Check fit, transform and fit_transform
    ir = IsotonicRegression()
    ir.fit(x, y)
    assert_array_almost_equal(ir.transform(x), y_true, 4)
    assert_array_almost_equal(ir.fit_transform(x, y), y_true, 4)
Пример #5
0
def test_fast_predict():
    # test that the faster prediction change doesn't
    # affect out-of-sample predictions:
    # https://github.com/scikit-learn/scikit-learn/pull/6206
    rng = np.random.RandomState(123)
    n_samples = 10 ** 3
    # X values over the -10,10 range
    X_train = 20.0 * rng.rand(n_samples) - 10
    y_train = np.less(rng.rand(n_samples),
                      expit(X_train)).astype('int64').astype('float64')

    weights = rng.rand(n_samples)
    # we also want to test that everything still works when some weights are 0
    weights[rng.rand(n_samples) < 0.1] = 0

    slow_model = IsotonicRegression(y_min=0, y_max=1, out_of_bounds="clip")
    fast_model = IsotonicRegression(y_min=0, y_max=1, out_of_bounds="clip")

    # Build interpolation function with ALL input data, not just the
    # non-redundant subset. The following 2 lines are taken from the
    # .fit() method, without removing unnecessary points
    X_train_fit, y_train_fit = slow_model._build_y(X_train, y_train,
                                                   sample_weight=weights,
                                                   trim_duplicates=False)
    slow_model._build_f(X_train_fit, y_train_fit)

    # fit with just the necessary data
    fast_model.fit(X_train, y_train, sample_weight=weights)

    X_test = 20.0 * rng.rand(n_samples) - 10
    y_pred_slow = slow_model.predict(X_test)
    y_pred_fast = fast_model.predict(X_test)

    assert_array_equal(y_pred_slow, y_pred_fast)
Пример #6
0
def _gspv_interpolate_cloud(powers, velocities):
    from sklearn.isotonic import IsotonicRegression
    from scipy.interpolate import InterpolatedUnivariateSpline
    regressor = IsotonicRegression()
    regressor.fit(powers, velocities)
    x = np.linspace(min(powers), max(powers))
    y = regressor.predict(x)
    return InterpolatedUnivariateSpline(x, y, k=1, ext=3)
Пример #7
0
def test_isotonic_duplicate_min_entry():
    x = [0, 0, 1]
    y = [0, 0, 1]

    ir = IsotonicRegression(increasing=True, out_of_bounds="clip")
    ir.fit(x, y)
    all_predictions_finite = np.all(np.isfinite(ir.predict(x)))
    assert_true(all_predictions_finite)
Пример #8
0
def test_isotonic_mismatched_dtype(y_dtype):
    # regression test for #15004
    # check that data are converted when X and y dtype differ
    reg = IsotonicRegression()
    y = np.array([2, 1, 4, 3, 5], dtype=y_dtype)
    X = np.arange(len(y), dtype=np.float32)
    reg.fit(X, y)
    assert reg.predict(X).dtype == X.dtype
Пример #9
0
def test_isotonic_duplicate_min_entry():
    x = [0, 0, 1]
    y = [0, 0, 1]

    ir = IsotonicRegression(increasing=True, out_of_bounds="clip")
    ir.fit(x, y)
    all_predictions_finite = np.all(np.isfinite(ir.predict(x)))
    assert all_predictions_finite
Пример #10
0
def predict_probs(model, train_class, train_features, test_features, normalize_probs=None):
    """
    Fit a given binary classification model to training sample features
    and return predicted probabilities for the positive class for
    the training and test samples.
    """
    model.fit(train_features, train_class)
    train_prob, test_prob = [model.predict_proba(f)[:, 1] for f in (train_features, test_features)]
    if normalize_probs == "ROCSlope":
        # calibrate probabilities based on the estimated local slope
        # of the ROC curve
        chunk_size = 10  # number of instances for slope estimation
        n_train_pos = 301  # total number of positive (preictal) instances
        n_train_neg = 3766  # total negative (interictal)
        n_chunk_tot = 4000.0 / float(chunk_size)  # estimated total in test data
        # sort training data classes by predicted probability
        sort_order = train_prob.argsort()
        p_sorted = train_prob[sort_order]
        c_sorted = train_class[sort_order]
        ix = np.array(range(len(train_prob)))
        # loop over chunks
        for i_ch in range(1 + (len(train_prob) - 1) / chunk_size):
            p_chunk, c_chunk = [
                x[np.where((ix >= i_ch * chunk_size) & (ix < (i_ch + 1) * chunk_size))[0]] for x in (p_sorted, c_sorted)
            ]
            pmin = np.min(p_chunk)
            pmax = np.max(p_chunk)
            # compute TPR/FPR (relative to the entire training set)
            tpr = np.sum(c_chunk) / float(n_train_pos)
            fpr = np.sum(1 - c_chunk) / float(n_train_neg)
            # compute probability transformation for this chunk
            qc = (2.0 / np.pi) * np.arctan(tpr / (fpr + 1.0e-3 / float(n_train_neg)))
            qmin = np.max((0.0, qc - 0.5 / float(n_chunk_tot)))
            qmax = np.min((1.0, qc + 0.5 / float(n_chunk_tot)))
            # transform probabilities
            tr_p_ch = np.where((train_prob > pmin) & (train_prob <= pmax))[0]
            train_prob[tr_p_ch] = qmin + (train_prob[tr_p_ch] - pmin) * (qmax - qmin) / (pmax - pmin)
            te_p_ch = np.where((test_prob > pmin) & (test_prob <= pmax))[0]
            test_prob[te_p_ch] = qmin + (test_prob[te_p_ch] - pmin) * (qmax - qmin) / (pmax - pmin)
    elif normalize_probs == "LogShift":
        # shift probabilities in log(p/(1-p)) so that a fraction f_pre
        # of the samples has probability > 0.5, where f_pre is the
        # fraction of preictal samples in the training data
        f_pre = len(np.where(train_class)[0]) / float(len(train_class))
        train_th, test_th = [sorted(p)[int((1.0 - f_pre) * len(p))] for p in (train_prob, test_prob)]
        train_prob, test_prob = [
            (1.0 - pth) * p / (pth + p - 2.0 * pth * p)
            for (pth, p) in zip((train_th, test_th), (train_prob, test_prob))
        ]
    elif normalize_probs == "IsoReg":
        # fit an isotonic regression model to training probabilities
        # and use the model to transform all probabilities
        prob_model = IsotonicRegression(out_of_bounds="clip")
        prob_model.fit(train_prob, train_class)
        train_prob, test_prob = [prob_model.transform(p) for p in (train_prob, test_prob)]
    elif normalize_probs is not None:
        sys.exit("Invalid value of normalize_probs:", str(normalize_probs))
    return (train_prob, test_prob)
Пример #11
0
 def calibrate_row(row):
     calibrator = IsotonicRegression(y_min=0, y_max=1)
     x = lab[~np.isnan(lab[row])][row].values
     y = lab[~np.isnan(lab[row])]['labels'].values
     calibrator.fit(x, y)
     lab[row] = calibrator.predict(lab[row].values)
     amb[row] = calibrator.predict(amb[row].values)
     unl[row] = calibrator.predict(unl[row].values)
     scr[row] = calibrator.predict(scr[row].values)
Пример #12
0
    def do_cv_pred(train, test, files):
        print("------- do preds --------")
        ensemble_col = [f[1] for f in files]
        train_x = train[ensemble_col]
        test_x = test[ensemble_col].values.reshape(-1)
        train_y = train["target"]

        submission = pd.DataFrame()
        submission["card_id"] = test["card_id"]
        submission["target"] = 0

        outliers = (train["target"] < -30).astype(int).values
        split_num = 5
        skf = model_selection.StratifiedKFold(n_splits=split_num,
                                              shuffle=True,
                                              random_state=4590)
        train_preds = []
        for idx, (train_index,
                  test_index) in enumerate(skf.split(train, outliers)):
            X_train, X_test = train_x.iloc[train_index], train_x.iloc[
                test_index]
            y_train, y_test = train_y.iloc[train_index], train_y.iloc[
                test_index]

            reg = IsotonicRegression()
            X_train = X_train.values.reshape(-1)
            X_test = X_test.values.reshape(-1)
            reg.fit(X_train, y_train)
            valid_set_pred = reg.predict(X_test)
            print(y_test.describe())
            temp = pd.DataFrame(valid_set_pred)
            print(temp.describe())
            score = evaluator.rmse(y_test, valid_set_pred)
            print(score)

            y_pred = reg.predict(test_x)
            submission["target"] = submission["target"] + y_pred
            train_id = train.iloc[test_index]
            train_cv_prediction = pd.DataFrame()
            train_cv_prediction["card_id"] = train_id["card_id"]
            train_cv_prediction["cv_pred"] = valid_set_pred
            train_preds.append(train_cv_prediction)

        train_output = pd.concat(train_preds, axis=0)

        submission["target"] = submission["target"] / split_num
        submission.to_csv(path_const.OUTPUT_SUB, index=False)

        train_output["cv_pred"] = np.clip(train_output["cv_pred"], -33.219281,
                                          18.0)
        train_output.to_csv(path_const.OUTPUT_OOF, index=False)

        df_pred = pd.merge(train[["card_id", "target"]],
                           train_output,
                           on="card_id")
        rmse_score = evaluator.rmse(df_pred["target"], df_pred["cv_pred"])
        print(rmse_score)
    def isotonic(self):

        clf = IsotonicRegression()
        train_x = self.train_x.to_list()
        train_y = self.train_y.to_list()
        test_x = self.test_x.to_list()
        clf.fit(train_x, train_y)
        test_y_pred = clf.predict(test_x)
        return test_y_pred
Пример #14
0
 def fit(self, A, Y, weights, fit_init=None, refit=False, increasing=True):
     #fit isotonic regression model.
     model = IsotonicRegression(increasing=increasing,
                                out_of_bounds="clip",
                                y_min=0.0,
                                y_max=1.0)
     model.fit(X=A, y=Y, sample_weight=weights)
     self.model_obj = model
     return (0)
Пример #15
0
    def linear_regression(self, exp1, exp2, min_samples=5):

        X = []
        Y = []
        Xi = []
        for i in sorted(exp1):
            if i in exp2:
                Xi.append(i)
                X.append(exp2[i])
                Y.append(exp1[i])
        X = np.r_[X]
        Y = np.r_[Y]
        Xi = np.r_[Xi]

        if X.size < min_samples:
            rscore = 0
            slope = 0
            warning = False
        else:
            # clean the inputs by isotonic regression
            warning, increasing_bool = check_increasing(Xi, Y)
            IR = IsotonicRegression(increasing=increasing_bool)
            IR.fit(Xi, Y)
            Y1 = IR.predict(Xi)
            vi = np.where(np.diff(Y1) < 0)[0]
            pieces = np.split(vi, np.where(np.diff(vi) != 1)[0] + 1)
            si = 0
            for i in range(len(pieces) - 1):
                p1 = pieces[i]
                p2 = pieces[i + 1]
                if p1.size / (p2[0] - p1[0]) > 0.5:
                    si = p1[0]
                    break

            if si / X.size > 0.3:  # if more than 1/4 data discarded
                si = vi[0]
                if si / X.size > 0.3:
                    si = 0

            X = X[si:]
            Y = Y[si:]

            X = X[:, np.newaxis]
            huber = HuberRegressor().fit(X, Y)
            inlier_mask = np.logical_not(huber.outliers_)
            if inlier_mask.sum() < min_samples:
                rscore = 0
                slope = 0
            else:
                sX = X[inlier_mask]
                sY = Y[inlier_mask]
                rscore = huber.score(sX, sY)
                slope = huber.coef_[0]

        return rscore, slope, warning
Пример #16
0
def train_rcir_cv(training_class,
                  training_scores,
                  validation_class,
                  validation_scores,
                  credible_level=.95,
                  y_min=0,
                  y_max=1,
                  merge_criterion='auc_roc'):
    isotonic_regression_model = IsotonicRegression(y_min=y_min,
                                                   y_max=y_max,
                                                   out_of_bounds='clip')
    isotonic_regression_model.fit(X=training_scores, y=training_class)
    models = []
    # Extract the interpolation model we need:
    tmp_x = isotonic_regression_model.f_.x
    tmp_y = isotonic_regression_model.f_.y
    # Do some corrections (if there are any)
    tmp = correct_for_point_bins(tmp_x, tmp_y)
    x = tmp['x']
    y = tmp['y']
    # Use new boundaries to create an interpolation model that does the heavy lifting of
    # reliably calibrated isotonic regression:
    interpolation_model = interp1d(x=x, y=y, bounds_error=False)
    interpolation_model._fill_value_below = min(y)
    interpolation_model._fill_value_above = max(y)
    training_probabilities = interpolation_model(training_scores)
    # The following array contains all information defining the IR transformation
    bin_summary = np.unique(training_probabilities, return_counts=True)
    credible_intervals = [
        credible_interval(np.round(p * n), n)
        for (p, n) in zip(bin_summary[0], bin_summary[1])
    ]
    width_of_intervals = np.array(
        [row['p_max'] - row['p_min'] for row in credible_intervals])
    rcir_model = {
        'model': interpolation_model,
        'credible level': credible_level,
        'credible intervals': credible_intervals,
        'width of intervals': width_of_intervals,
        'bin summary': bin_summary,
        'd': -1
    }
    metrics = estimate_performance(rcir_model['model'], validation_class,
                                   validation_scores)
    models.append([0, rcir_model['model'], metrics])
    while (len(rcir_model['width of intervals']) >
           2):  # There still exists bins to merge
        rcir_model = merge_bin(rcir_model, training_class, training_scores,
                               merge_criterion)
        metrics = estimate_performance(rcir_model['model'], validation_class,
                                       validation_scores)
        models.append([0, rcir_model['model'], metrics])
    best_model_idx = [item[2]['auc_roc'] for item in models
                      ].index(max([item[2]['auc_roc'] for item in models]))
    return (models[best_model_idx][1])
Пример #17
0
def test_isotonic_regression_oob_raise():
    # Set y and x
    y = np.array([3, 7, 5, 9, 8, 7, 10])
    x = np.arange(len(y))

    # Create model and fit
    ir = IsotonicRegression(increasing='auto', out_of_bounds="raise")
    ir.fit(x, y)

    # Check that an exception is thrown
    assert_raises(ValueError, ir.predict, [min(x) - 10, max(x) + 10])
Пример #18
0
    def fit(self, X, y):
        np.random.seed(self.random_state)
        n, m = X.shape
        idx = np.arange(n)
        self.estimators = []

        if (self.distribution == "bernoulli"
                and (np.sum(y) < 3 or np.sum(y) > n - 3)):
            logging.error(("the target (y) needs to have "
                           "at least one examples on each class"))
            return None

        i = 0
        while i < self.n_paloboost:
            mask = np.full(n, True)
            if self.block_size is not None:
                n_block = int(n / self.block_size) + 1
                mask_block = (np.random.rand(n_block) < self.subsample0)
                mask = np.repeat(mask_block, self.block_size)[:n]
            else:
                mask = (np.random.rand(n) < self.subsample0)

            X_i, y_i = X[mask, :], y[mask]
            X_j, y_j = X[~mask, :], y[~mask]

            if (self.distribution == "bernoulli"
                    and (np.unique(y_i).shape[0] == 1
                         or np.unique(y_j).shape[0] == 1)):
                continue

            est = PaloBoost(distribution=self.distribution,
                            learning_rate=self.learning_rate,
                            max_depth=self.max_depth,
                            n_estimators=self.n_estimators,
                            subsample=self.subsample1,
                            subsample_splts=self.subsample2,
                            random_state=i * self.n_estimators)
            est.fit(X_i, y_i)
            self.estimators.append(est)
            if self.feature_importances_ is None:
                self.feature_importances_ = est.feature_importances_
            else:
                self.feature_importances_ += est.feature_importances_

            if (self.distribution == "bernoulli" and self.calibrate):
                z_j = est.predict_proba(X_j)[:, 1]
                clb = IsotonicRegression(y_min=0,
                                         y_max=1,
                                         out_of_bounds="clip")
                clb.fit(z_j, y_j)
                self.calibrators.append(clb)
            i += 1

        self.feature_importances_ /= self.n_paloboost
Пример #19
0
def test_isotonic_regression_ties_max():
    # Setup examples with ties on maximum
    x = [1, 2, 3, 4, 5, 5]
    y = [1, 2, 3, 4, 5, 6]
    y_true = [1, 2, 3, 4, 5.5, 5.5]

    # Check that we get identical results for fit/transform and fit_transform
    ir = IsotonicRegression()
    ir.fit(x, y)
    assert_array_equal(ir.fit(x, y).transform(x), ir.fit_transform(x, y))
    assert_array_equal(y_true, ir.fit_transform(x, y))
Пример #20
0
def isotonicFit(thr, prec, maxThr=999):
    thr = np.array(thr)
    prec = np.array(prec)
    prec = prec[thr <= maxThr]
    thr = thr[thr <= maxThr]
    objFun = lambda thr, alpha, beta: alpha * thr**beta

    isoReg = IsotonicRegression(y_min=0, y_max=1)
    isoReg.fit(thr, prec)
    #  joblib.dump(isoReg, "/home/rsanchez/Tesis/rriPredMethod/pyCode/webApp/rriPredWeb/media/scoreToPrecModel/mixed.isotonic.joblib")
    return lambda x: isoReg.predict(x), "isotonic"
Пример #21
0
def test_isotonic_regression_oob_raise():
    # Set y and x
    y = np.array([3, 7, 5, 9, 8, 7, 10])
    x = np.arange(len(y))

    # Create model and fit
    ir = IsotonicRegression(increasing='auto', out_of_bounds="raise")
    ir.fit(x, y)

    # Check that an exception is thrown
    assert_raises(ValueError, ir.predict, [min(x) - 10, max(x) + 10])
Пример #22
0
def test_isotonic_regression_pickle():
    y = np.array([3, 7, 5, 9, 8, 7, 10])
    x = np.arange(len(y))

    # Create model and fit
    ir = IsotonicRegression(increasing='auto', out_of_bounds="clip")
    ir.fit(x, y)

    ir_ser = pickle.dumps(ir, pickle.HIGHEST_PROTOCOL)
    ir2 = pickle.loads(ir_ser)
    np.testing.assert_array_equal(ir.predict(x), ir2.predict(x))
Пример #23
0
def test_isotonic_regression_ties_max():
    # Setup examples with ties on maximum
    x = [1, 2, 3, 4, 5, 5]
    y = [1, 2, 3, 4, 5, 6]
    y_true = [1, 2, 3, 4, 5.5, 5.5]

    # Check that we get identical results for fit/transform and fit_transform
    ir = IsotonicRegression()
    ir.fit(x, y)
    assert_array_equal(ir.fit(x, y).transform(x), ir.fit_transform(x, y))
    assert_array_equal(y_true, ir.fit_transform(x, y))
Пример #24
0
def test_isotonic_regression_oob_bad():
    # Set y and x
    y = np.array([3, 7, 5, 9, 8, 7, 10])
    x = np.arange(len(y))

    # Create model and fit
    ir = IsotonicRegression(increasing='auto', out_of_bounds="xyz")
    ir.fit(x, y)

    # Make sure that we throw an error for bad out_of_bounds value
    assert_raises(ValueError, ir.predict, [min(x)-10, max(x)+10])
Пример #25
0
def isotonic_calibration_learner(df: pd.DataFrame,
                                 target_column: str = "target",
                                 prediction_column: str = "prediction",
                                 output_column: str = "calibrated_prediction",
                                 y_min: float = 0.0,
                                 y_max: float = 1.0) -> LearnerReturnType:
    """
    Fits a single feature isotonic regression to the dataset.

    Parameters
    ----------

    df : pandas.DataFrame
        A Pandas' DataFrame with features and target columns.
        The model will be trained to predict the target column
        from the features.

    target_column : str
        The name of the column in `df` that should be used as target for the model.
        This column should be binary, since this is a classification model.

    prediction_column : str
        The name of the column with the uncalibrated predictions from the model.

    output_column : str
        The name of the column with the calibrated predictions from the model.

    y_min: float
        Lower bound of Isotonic Regression

    y_max: float
        Upper bound of Isotonic Regression

    """

    clf = IsotonicRegression(y_min=y_min, y_max=y_max, out_of_bounds='clip')

    clf.fit(df[prediction_column], df[target_column])

    def p(new_df: pd.DataFrame) -> pd.DataFrame:
        return new_df.assign(**{output_column: clf.predict(new_df[prediction_column])})

    p.__doc__ = learner_pred_fn_docstring("isotonic_calibration_learner")

    log = {'isotonic_calibration_learner': {
        'output_column': output_column,
        'target_column': target_column,
        'prediction_column': prediction_column,
        'package': "sklearn",
        'package_version': sklearn.__version__,
        'training_samples': len(df)},
        'object': clf}

    return p, p(df), log
Пример #26
0
def test_isotonic_regression_pickle():
    y = np.array([3, 7, 5, 9, 8, 7, 10])
    x = np.arange(len(y))

    # Create model and fit
    ir = IsotonicRegression(increasing='auto', out_of_bounds="clip")
    ir.fit(x, y)

    ir_ser = pickle.dumps(ir, pickle.HIGHEST_PROTOCOL)
    ir2 = pickle.loads(ir_ser)
    np.testing.assert_array_equal(ir.predict(x), ir2.predict(x))
Пример #27
0
def test_isotonic_regression_oob_bad():
    # Set y and x
    y = np.array([3, 7, 5, 9, 8, 7, 10])
    x = np.arange(len(y))

    # Create model and fit
    ir = IsotonicRegression(increasing='auto', out_of_bounds="xyz")
    ir.fit(x, y)

    # Make sure that we throw an error for bad out_of_bounds value
    assert_raises(ValueError, ir.predict, [min(x) - 10, max(x) + 10])
Пример #28
0
    def fit(self, p_input, y):
        if self.method == 'isotonic':
            calibrator = IsotonicRegression(out_of_bounds='clip')
        elif self.method == 'sigmoid':
            calibrator = _SigmoidCalibration()
        calibrator.fit(p_input, y)
        if self.method == 'sigmoid':
            self.a = calibrator.a_
            self.b = calibrator.b_
        self.calibrator = calibrator

        return self
Пример #29
0
def test_isotonic_regression_oob_bad():
    # Set y and x
    y = np.array([3, 7, 5, 9, 8, 7, 10])
    x = np.arange(len(y))

    # Create model and fit
    ir = IsotonicRegression(increasing="auto", out_of_bounds="xyz")

    # Make sure that we throw an error for bad out_of_bounds value
    msg = "The argument ``out_of_bounds`` must be in 'nan', 'clip', 'raise'; got xyz"
    with pytest.raises(ValueError, match=msg):
        ir.fit(x, y)
Пример #30
0
def isotonicFit(thr, prec, maxThr=999):
    thr = np.array(thr)
    prec = np.array(prec)
    prec = prec[thr <= maxThr]
    thr = thr[thr <= maxThr]
    objFun = lambda thr, alpha, beta: alpha * thr**beta

    isoReg = IsotonicRegression(y_min=0, y_max=1)
    isoReg.fit(thr, prec)
    if save_isotonic_modelFname:
        joblib.dump(isoReg, save_isotonic_modelFname)
    return lambda x: isoReg.predict(x), "isotonic"
Пример #31
0
def test_isotonic_regression_oob_bad_after():
    # Set y and x
    y = np.array([3, 7, 5, 9, 8, 7, 10])
    x = np.arange(len(y))

    # Create model and fit
    ir = IsotonicRegression(increasing='auto', out_of_bounds="raise")

    # Make sure that we throw an error for bad out_of_bounds value in transform
    ir.fit(x, y)
    ir.out_of_bounds = "xyz"
    assert_raises(ValueError, ir.transform, x)
Пример #32
0
def test_isotonic_regression_oob_nan():
    # Set y and x
    y = np.array([3, 7, 5, 9, 8, 7, 10])
    x = np.arange(len(y))

    # Create model and fit
    ir = IsotonicRegression(increasing='auto', out_of_bounds="nan")
    ir.fit(x, y)

    # Predict from  training and test x and check that we have two NaNs.
    y1 = ir.predict([min(x) - 10, max(x) + 10])
    assert sum(np.isnan(y1)) == 2
Пример #33
0
def test_isotonic_regression_oob_bad_after():
    # Set y and x
    y = np.array([3, 7, 5, 9, 8, 7, 10])
    x = np.arange(len(y))

    # Create model and fit
    ir = IsotonicRegression(increasing='auto', out_of_bounds="raise")

    # Make sure that we throw an error for bad out_of_bounds value in transform
    ir.fit(x, y)
    ir.out_of_bounds = "xyz"
    assert_raises(ValueError, ir.transform, x)
Пример #34
0
def test_isotonic_regression_oob_nan():
    # Set y and x
    y = np.array([3, 7, 5, 9, 8, 7, 10])
    x = np.arange(len(y))

    # Create model and fit
    ir = IsotonicRegression(increasing='auto', out_of_bounds="nan")
    ir.fit(x, y)

    # Predict from  training and test x and check that we have two NaNs.
    y1 = ir.predict([min(x) - 10, max(x) + 10])
    assert_equal(sum(np.isnan(y1)), 2)
Пример #35
0
class RankScoreIsoRegression():
    """
    References
    ----------
    `Predicting Self-reported Customer Satisfaction of Interactions with a 
     Corporate Call Center <http://ecmlpkdd2017.ijs.si/papers/paperID598.pdf>`,

    """
    def __init__(self,
                 mask_size=100,
                 pr_args={},
                 ir_args={"out_of_bounds": "clip"}):
        """
        Parameters
        ----------
        mask_size : int, (default=100)
            Length of the mask for smoothing rank scores
        pr_args : dict, (default={})
            Keyword arguments to PairwiseRankClf constructor
        ir_args : dict, (default={"out_of_bounds":"clip"})
            Keyword arguments to IsotonicRegression constructor

        """
        self.ir_args = ir_args
        self.pr_args = pr_args
        self.pr_clf = PairwiseRankClf(**pr_args)
        self.ir_model = IsotonicRegression(**ir_args)
        self.mask_size = mask_size

    def fit(self, X, y):
        self.pr_clf.fit(X, y)
        rank_scores = self.pr_clf.decision_function(X)

        if self.mask_size is None:
            self.ir_model.fit(rank_scores, y)
        else:
            mask = 1 + np.zeros((self.mask_size, ))
            idx = np.argsort(rank_scores)
            rank_scores_ordered = rank_scores[idx]
            y_ordered = y[idx]
            rank_scores_smoothed = np.convolve(
                rank_scores_ordered, mask, mode="valid") / float(mask.size)
            y_smoothed = np.convolve(y_ordered, mask, mode="valid") / float(
                mask.size)
            self.ir_model.fit(rank_scores_smoothed, y_smoothed)
        return self

    def rank_scores(self, X):
        return self.pr_clf.decision_function(X)

    def predict(self, X):
        return self.ir_model.predict(self.rank_scores(X))
Пример #36
0
def test_isotonic_regression_oob_raise():
    # Set y and x
    y = np.array([3, 7, 5, 9, 8, 7, 10])
    x = np.arange(len(y))

    # Create model and fit
    ir = IsotonicRegression(increasing='auto', out_of_bounds="raise")
    ir.fit(x, y)

    # Check that an exception is thrown
    msg = 'A value in x_new is below the interpolation range'
    with pytest.raises(ValueError, match=msg):
        ir.predict([min(x) - 10, max(x) + 10])
Пример #37
0
def biasCorrection(model, tune, target):
	"""Computes an isotonic regression to calibrated a pre-trained model
	
	:param model: The pre-trained model to calibrate
	:param tune: A Pandas dataframe with the data to use for calibration
	:param target: List of true values
	:return: The calibrated model
	"""
	inputValues = model.predict(tune)
	corrected_model = IsotonicRegression(out_of_bounds = 'clip')
	corrected_model.fit(inputValues, target.values)
		
	return corrected_model
Пример #38
0
class RC30(ClassifierMixin, BaseEstimator):

    def __init__(self, 
                n_estimators=30, 
                max_depth=3,
                min_samples_split=2,
                min_samples_leaf=1, 
                ctype="isotonic"):
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.min_samples_leaf = min_samples_leaf
        self.ctype = ctype

    def fit(self, X, y):
        X, y = check_X_y(X, y)
        self.model = RandomForestClassifier(n_estimators=self.n_estimators,
                                            max_depth=self.max_depth,
                                            min_samples_split=self.min_samples_split,
                                            min_samples_leaf=self.min_samples_leaf)
        if self.ctype == "logistic":
            self.calibrator = LogisticRegression(C=1e20, solver="lbfgs")
        elif self.ctype == "isotonic":
            self.calibrator = IsotonicRegression(y_min=0, y_max=1,
                                                out_of_bounds="clip")
        X0, X1, y0, y1 = train_test_split(X, y, test_size=0.3) 
        self.model.fit(X0, y0)
        if self.ctype == "logistic":
            y_est = self.model.predict_proba(X1)[:,[1]]
            self.calibrator.fit(y_est, y1)
        elif self.ctype == "isotonic":
            y_est = self.model.predict_proba(X1)[:,1]
            self.calibrator.fit(y_est, y1)

        self.is_fitted_ = True
        return self
 
    def predict_proba(self, X):
        X = check_array(X)
        check_is_fitted(self, 'is_fitted_')
        
        if self.ctype == "logistic":
            return self.calibrator.predict_proba(
                    self.model.predict_proba(X)[:,[1]])
        elif self.ctype == "isotonic":
            n, m = X.shape
            y = np.zeros((n,2))
            y[:,1] = self.calibrator.predict(
                        self.model.predict_proba(X)[:,1])
            y[:,0] = 1 - y[:,1]
            return y
Пример #39
0
class IsotonicCalibrator(BaseEstimator, TransformerMixin):
    """
    Calculates a likelihood ratio of a score value, provided it is from one of
    two distributions. Uses isotonic regression for interpolation.
    """
    def __init__(self, add_one=False, add_misleading=0):
        """
        Arguments:
            add_one: deprecated (same as add_misleading=1)
            add_misleading: int: add misleading data points on both sides (default: 0)
        """
        if add_one:
            warnings.warn(
                'parameter `add_one` is deprecated; use `add_misleading=1` instead'
            )

        self.add_misleading = (1 if add_one else 0) + add_misleading
        self._ir = IsotonicRegression()

    def fit(self, X, y, **fit_params):
        # prevent extreme LRs
        if 'add_misleading' in fit_params:
            n_misleading = fit_params['add_misleading']
        elif 'add_one' in fit_params:
            warnings.warn(
                'parameter `add_one` is deprecated; use `add_misleading=1` instead'
            )
            n_misleading = 1 if fit_params['add_one'] else 0
        else:
            n_misleading = self.add_misleading

        if n_misleading > 0:
            X = np.concatenate([
                X,
                np.ones(n_misleading) * (X.max() + 1),
                np.ones(n_misleading) * (X.min() - 1)
            ])
            y = np.concatenate(
                [y, np.zeros(n_misleading),
                 np.ones(n_misleading)])

        prior = np.sum(y) / y.size
        weight = y * (1 - prior) + (1 - y) * prior
        self._ir.fit(X, y, sample_weight=weight)

        return self

    def transform(self, X):
        self.p1 = self._ir.transform(X)
        self.p0 = 1 - self.p1
        return to_odds(self.p1)
Пример #40
0
class IsotonicCalibration(BaseEstimator, TransformerMixin):
    """
    Построение модели изотонической регресии на наблюдениях:
    y_pred -> y_target
    """
    def __init__(self):
        self.calibration = IsotonicRegression(out_of_bounds="clip")

    def fit(self, y_pred: pd.Series, y_true: pd.Series):
        self.calibration.fit(y_pred, y_true)
        return self

    def transform(self, y_pred):
        return self.calibration.transform(y_pred)
Пример #41
0
def bootstrap_calibrate_prob(labels,
                             weights,
                             probs,
                             n_calibrations=30,
                             threshold=0.,
                             symmetrize=False):
    """
    Bootstrap isotonic calibration (borrowed from tata-antares/tagging_LHCb):
    * randomly divide data into train-test
    * on train isotonic is fitted and applyed to test
    * on test using calibrated probs p(B+) D2 and auc are calculated
    
    :param probs: probabilities, numpy.array of shape [n_samples]
    :param labels: numpy.array of shape [n_samples] with labels
    :param weights: numpy.array of shape [n_samples]
    :param threshold: float, to set labels 0/1j
    :param symmetrize: bool, do symmetric calibration, ex. for B+, B-
    
    :return: D2 array and auc array
    """

    import numpy as np
    from sklearn.isotonic import IsotonicRegression
    from sklearn.cross_validation import train_test_split
    from sklearn.metrics import roc_auc_score

    aucs = []
    D2_array = []
    labels = (labels > threshold) * 1

    for _ in range(n_calibrations):
        (train_probs, test_probs, train_labels, test_labels, train_weights,
         test_weights) = train_test_split(probs,
                                          labels,
                                          weights,
                                          train_size=0.5)
        iso_reg = IsotonicRegression(y_min=0, y_max=1, out_of_bounds='clip')
        if symmetrize:
            iso_reg.fit(np.r_[train_probs, 1 - train_probs],
                        np.r_[train_labels > 0, train_labels <= 0],
                        np.r_[train_weights, train_weights])
        else:
            iso_reg.fit(train_probs, train_labels, train_weights)

        probs_calib = iso_reg.transform(test_probs)
        alpha = (1 - 2 * probs_calib)**2
        aucs.append(
            roc_auc_score(test_labels, test_probs, sample_weight=test_weights))
        D2_array.append(np.average(alpha, weights=test_weights))
    return np.array(D2_array), np.array(aucs)
Пример #42
0
def test_isotonic_regression_oob_clip():
    # Set y and x
    y = np.array([3, 7, 5, 9, 8, 7, 10])
    x = np.arange(len(y))

    # Create model and fit
    ir = IsotonicRegression(increasing='auto', out_of_bounds="clip")
    ir.fit(x, y)

    # Predict from  training and test x and check that min/max match.
    y1 = ir.predict([min(x) - 10, max(x) + 10])
    y2 = ir.predict(x)
    assert max(y1) == max(y2)
    assert min(y1) == min(y2)
Пример #43
0
def test_isotonic_regression_oob_clip():
    # Set y and x
    y = np.array([3, 7, 5, 9, 8, 7, 10])
    x = np.arange(len(y))

    # Create model and fit
    ir = IsotonicRegression(increasing='auto', out_of_bounds="clip")
    ir.fit(x, y)

    # Predict from  training and test x and check that min/max match.
    y1 = ir.predict([min(x) - 10, max(x) + 10])
    y2 = ir.predict(x)
    assert_equal(max(y1), max(y2))
    assert_equal(min(y1), min(y2))
Пример #44
0
def bootstrap_calibrate_prob(labels, weights, probs, n_calibrations=30, group_column=None, threshold=0., symmetrize=False, plot=False):
    """
    Bootstrap isotonic calibration: 
     * randomly divide data into train-test
     * on train isotonic is fitted and applyed to test
     * on test using calibrated probs p(B+) D2 and auc are calculated 
    
    :param probs: probabilities, numpy.array of shape [n_samples]
    :param labels: numpy.array of shape [n_samples] with labels 
    :param weights: numpy.array of shape [n_samples]
    :param threshold: float, to set labels 0/1 
    :param symmetrize: bool, do symmetric calibration, ex. for B+, B-
    
    :return: D2 array and auc array
    """
    aucs = []
    D2_array = []
    labels = (labels > threshold) * 1
    
    for _ in range(n_calibrations):
        if group_column is not None:
            train_probs, test_probs, train_labels, test_labels, train_weights, test_weights = train_test_split_group(
                group_column, probs, labels, weights, train_size=0.5)
        else:
            train_probs, test_probs, train_labels, test_labels, train_weights, test_weights = train_test_split(
                probs, labels, weights, train_size=0.5)
        iso_est = IsotonicRegression(y_min=0, y_max=1, out_of_bounds='clip')
        if symmetrize:
            train_weights = 0.5*train_weights;
            iso_est.fit(numpy.r_[train_probs, 1-train_probs], 
                        numpy.r_[train_labels > 0, train_labels <= 0],
                        numpy.r_[train_weights, train_weights])
        else:
            iso_est.fit(train_probs, train_labels, train_weights)
            
        probs_calib = iso_est.transform(test_probs)

        if plot:
            plt.figure(1,figsize=(6,5))
            plt.scatter(train_probs, train_labels, color='black', zorder=20)
            X_test = numpy.linspace(0.001,0.999,500)
            y_test = iso_est.transform(X_test)
            plt.plot(X_test, y_test, color='blue', linewidth=3)
            plt.show()

        alpha = (1 - 2 * probs_calib) ** 2
        aucs.append(roc_auc_score(test_labels, test_probs, sample_weight=test_weights))
        D2_array.append(numpy.average(alpha, weights=test_weights))
    return D2_array, aucs
Пример #45
0
def train_rcir(
        training_class,
        training_scores,  # , validation_class, validation_scores,
        credible_level=.95,
        d=1,
        y_min=0,
        y_max=1,
        merge_criterion='auc_roc'):
    # Function for training reliably calibrated isotonic regression (RCIR)
    # Returns an RCIR-model
    # First, create an ordinary isotonic regression model
    isotonic_regression_model = IsotonicRegression(y_min=y_min,
                                                   y_max=y_max,
                                                   out_of_bounds='clip')
    isotonic_regression_model.fit(X=training_scores, y=training_class)
    # Extract the interpolation model we need:
    tmp_x = isotonic_regression_model.f_.x
    tmp_y = isotonic_regression_model.f_.y
    # Do some corrections (if there are any)
    tmp = correct_for_point_bins(tmp_x, tmp_y)
    x = tmp['x']
    y = tmp['y']
    # Use new boundaries to create an interpolation model that does the heavy lifting of
    # reliably calibrated isotonic regression:
    interpolation_model = interp1d(x=x, y=y, bounds_error=False)
    interpolation_model._fill_value_below = min(y)
    interpolation_model._fill_value_above = max(y)
    training_probabilities = interpolation_model(training_scores)
    # The following array contains all information defining the IR transformation
    bin_summary = np.unique(training_probabilities, return_counts=True)
    credible_intervals = [
        credible_interval(np.round(p * n), n)
        for (p, n) in zip(bin_summary[0], bin_summary[1])
    ]
    width_of_intervals = np.array(
        [row['p_max'] - row['p_min'] for row in credible_intervals])
    rcir_model = {
        'model': interpolation_model,
        'credible level': credible_level,
        'credible intervals': credible_intervals,
        'width of intervals': width_of_intervals,
        'bin summary': bin_summary,
        'd': d
    }
    while (max(rcir_model['width of intervals']) > d):
        # Merge one more bin.
        rcir_model = merge_bin(rcir_model, training_class, training_scores,
                               merge_criterion)
    return (rcir_model)
class Isotonic(Calibrator):
    def __init__(self):
        self.clf = IsotonicRegression(y_min=0.0,
                                      y_max=1.0,
                                      out_of_bounds='clip')

    def fit(self, y_pred, y_true):
        assert y_true is not None
        y_pred, y_true = Calibrator.validate(y_pred, y_true)
        self.clf.fit(y_pred, y_true)

    def predict(self, y_pred):
        y_pred, _ = Calibrator.validate(y_pred)
        y_calib = self.clf.predict(y_pred)
        return y_calib
Пример #47
0
def test_isotonic_regression():
    y = np.array([3, 7, 5, 9, 8, 7, 10])
    y_ = np.array([3, 6, 6, 8, 8, 8, 10])
    assert_array_equal(y_, isotonic_regression(y))

    x = np.arange(len(y))
    ir = IsotonicRegression(y_min=0.0, y_max=1.0)
    ir.fit(x, y)
    assert_array_equal(ir.fit(x, y).transform(x), ir.fit_transform(x, y))
    assert_array_equal(ir.transform(x), ir.predict(x))

    # check that it is immune to permutation
    perm = np.random.permutation(len(y))
    ir = IsotonicRegression(y_min=0.0, y_max=1.0)
    assert_array_equal(ir.fit_transform(x[perm], y[perm]), ir.fit_transform(x, y)[perm])
    assert_array_equal(ir.transform(x[perm]), ir.transform(x)[perm])
Пример #48
0
class LLRIsotonicRegression(LLR):
    """Log-likelihood ratio estimation by isotonic regression"""

    def __init__(self, equal_priors=False):
        super(LLRIsotonicRegression, self).__init__()
        self.equal_priors = equal_priors

    def fit(self, X, Y):

        self.prior = self._get_prior(X, Y)

        scores, ratios = self._get_scores_ratios(X, Y)

        y_min = np.min(ratios)
        y_max = np.max(ratios)
        self.ir = IsotonicRegression(y_min=y_min, y_max=y_max)
        self.ir.fit(scores, ratios)

        return self

    def toLogLikelihoodRatio(self, scores):
        """Get log-likelihood ratio given scores

        Parameters
        ----------
        scores : numpy array
            Test scores

        Returns
        -------
        llr : numpy array
            Log-likelihood ratio array with same shape as input `scores`
        """
        x_min = np.min(self.ir.X_)
        x_max = np.max(self.ir.X_)

        oob_min = np.where(scores < x_min)
        oob_max = np.where(scores > x_max)
        ok = np.where((scores >= x_min) * (scores <= x_max))

        calibrated = np.zeros(scores.shape)
        calibrated[ok] = self.ir.transform(scores[ok])
        calibrated[oob_min] = self.ir.y_min
        calibrated[oob_max] = self.ir.y_max
        return calibrated
Пример #49
0
def test_isotonic_dtype():
    y = [2, 1, 4, 3, 5]
    weights = np.array([.9, .9, .9, .9, .9], dtype=np.float64)
    reg = IsotonicRegression()

    for dtype in (np.int32, np.int64, np.float32, np.float64):
        for sample_weight in (None, weights.astype(np.float32), weights):
            y_np = np.array(y, dtype=dtype)
            expected_dtype = \
                check_array(y_np, dtype=[np.float64, np.float32],
                            ensure_2d=False).dtype

            res = isotonic_regression(y_np, sample_weight=sample_weight)
            assert_equal(res.dtype, expected_dtype)

            X = np.arange(len(y)).astype(dtype)
            reg.fit(X, y_np, sample_weight=sample_weight)
            res = reg.predict(X)
            assert_equal(res.dtype, expected_dtype)
Пример #50
0
 def calibrate_col(col):
     # isotonic not the best here, and faces numerical issues
     calibrator = IsotonicRegression(y_min=0, y_max=1)
     x = lab[~np.isnan(lab[col])][col].values
     y = lab[~np.isnan(lab[col])]['labels'].values
     # This worked with old sklearn
     try:
         # Old sklearn
         calibrator.fit(x.reshape(-1, 1), y)
         lab[col] = calibrator.predict(lab[col].values.reshape(-1, 1))
         amb[col] = calibrator.predict(amb[col].values.reshape(-1, 1))
         unl[col] = calibrator.predict(unl[col].values.reshape(-1, 1))
         scr[col] = calibrator.predict(scr[col].values.reshape(-1, 1))
     except ValueError:
         # Newer sklearn
         calibrator.fit(x.ravel(), y)
         lab[col] = calibrator.predict(lab[col].values.ravel())
         amb[col] = calibrator.predict(amb[col].values.ravel())
         unl[col] = calibrator.predict(unl[col].values.ravel())
         scr[col] = calibrator.predict(scr[col].values.ravel())
Пример #51
0
def test_isotonic_zero_weight_loop():
    # Test from @ogrisel's issue:
    # https://github.com/scikit-learn/scikit-learn/issues/4297

    # Get deterministic RNG with seed
    rng = np.random.RandomState(42)

    # Create regression and samples
    regression = IsotonicRegression()
    n_samples = 50
    x = np.linspace(-3, 3, n_samples)
    y = x + rng.uniform(size=n_samples)

    # Get some random weights and zero out
    w = rng.uniform(size=n_samples)
    w[5:8] = 0
    regression.fit(x, y, sample_weight=w)

    # This will hang in failure case.
    regression.fit(x, y, sample_weight=w)
class IDXHack(object):
    """

    Usage
    =====
    >>> from mediaeval_util.repere import IDXHack
    >>> frame2time = IDXHack(args['--idx'])
    >>> trueTime = frame2time(opencvFrame, opencvTime)

    """

    def __init__(self, idx=None):
        super(IDXHack, self).__init__()
        self.idx = idx

        if self.idx:

            # load .idx file using pandas
            df = read_table(
                self.idx, sep='\s+',
                names=['frame_number', 'frame_type', 'bytes', 'seconds']
            )
            x = np.array(df['frame_number'], dtype=np.float)
            y = np.array(df['seconds'], dtype=np.float)

            # train isotonic regression
            self.ir = IsotonicRegression(y_min=np.min(y), y_max=np.max(y))
            self.ir.fit(x, y)

            # frame number support
            self.xmin = np.min(x)
            self.xmax = np.max(x)

    def __call__(self, opencvFrame, opencvTime):

        if self.idx is None:
            return opencvTime

        return self.ir.transform([min(self.xmax,
                                      max(self.xmin, opencvFrame)
                                      )])[0]
Пример #53
0
def test_permutation_invariance():
    # check that fit is permutation invariant.
    # regression test of missing sorting of sample-weights
    ir = IsotonicRegression()
    x = [1, 2, 3, 4, 5, 6, 7]
    y = [1, 41, 51, 1, 2, 5, 24]
    sample_weight = [1, 2, 3, 4, 5, 6, 7]
    x_s, y_s, sample_weight_s = shuffle(x, y, sample_weight, random_state=0)
    y_transformed = ir.fit_transform(x, y, sample_weight=sample_weight)
    y_transformed_s = ir.fit(x_s, y_s, sample_weight=sample_weight_s).transform(x)

    assert_array_equal(y_transformed, y_transformed_s)
Пример #54
0
def bootstrap_calibrate_prob(labels, weights, probs, n_calibrations=30,
                             threshold=0., symmetrize=False):
    """
    Bootstrap isotonic calibration (borrowed from tata-antares/tagging_LHCb):
     * randomly divide data into train-test
     * on train isotonic is fitted and applyed to test
     * on test using calibrated probs p(B+) D2 and auc are calculated

    :param probs: probabilities, numpy.array of shape [n_samples]
    :param labels: numpy.array of shape [n_samples] with labels
    :param weights: numpy.array of shape [n_samples]
    :param threshold: float, to set labels 0/1j
    :param symmetrize: bool, do symmetric calibration, ex. for B+, B-

    :return: D2 array and auc array
    """
    aucs = []
    D2_array = []
    labels = (labels > threshold) * 1

    for _ in range(n_calibrations):
        (train_probs, test_probs,
         train_labels, test_labels,
         train_weights, test_weights) = train_test_split(
            probs, labels, weights, train_size=0.5)
        iso_reg = IsotonicRegression(y_min=0, y_max=1, out_of_bounds='clip')
        if symmetrize:
            iso_reg.fit(np.r_[train_probs, 1-train_probs],
                        np.r_[train_labels > 0, train_labels <= 0],
                        np.r_[train_weights, train_weights])
        else:
            iso_reg.fit(train_probs, train_labels, train_weights)

        probs_calib = iso_reg.transform(test_probs)
        alpha = (1 - 2 * probs_calib) ** 2
        aucs.append(roc_auc_score(test_labels, test_probs,
                                  sample_weight=test_weights))
        D2_array.append(np.average(alpha, weights=test_weights))
    return np.array(D2_array), np.array(aucs)
Пример #55
0
def test_isotonic_regression_with_ties_in_differently_sized_groups():
    """
    Non-regression test to handle issue 9432:
    https://github.com/scikit-learn/scikit-learn/issues/9432

    Compare against output in R:
    > library("isotone")
    > x <- c(0, 1, 1, 2, 3, 4)
    > y <- c(0, 0, 1, 0, 0, 1)
    > res1 <- gpava(x, y, ties="secondary")
    > res1$x

    `isotone` version: 1.1-0, 2015-07-24
    R version: R version 3.3.2 (2016-10-31)
    """
    x = np.array([0, 1, 1, 2, 3, 4])
    y = np.array([0, 0, 1, 0, 0, 1])
    y_true = np.array([0., 0.25, 0.25, 0.25, 0.25, 1.])
    ir = IsotonicRegression()
    ir.fit(x, y)
    assert_array_almost_equal(ir.transform(x), y_true)
    assert_array_almost_equal(ir.fit_transform(x, y), y_true)
Пример #56
0
class IsotonicCalibrator(BaseEstimator, RegressorMixin):
    """Probability calibration with isotonic regression.

    Note
    ----
    This class backports and extends `sklearn.isotonic.IsotonicRegression`.
    """

    def __init__(self, y_min=None, y_max=None, increasing=True,
                 interpolation=False):
        """Constructor.

        Parameters
        ----------
        * `y_min` [optional]:
            If not `None`, set the lowest value of the fit to `y_min`.

        * `y_max` [optional]:
            If not `None`, set the highest value of the fit to `y_max`.

        * `increasing` [boolean or string, default=`True`]:
            If boolean, whether or not to fit the isotonic regression with `y`
            increasing or decreasing.
            The string value `"auto"` determines whether `y` should increase or
            decrease based on the Spearman correlation estimate's sign.

        * `interpolation` [boolean, default=`False`]:
            Whether linear interpolation is enabled or not.
        """
        self.y_min = y_min
        self.y_max = y_max
        self.increasing = increasing
        self.interpolation = interpolation

    def fit(self, T, y, sample_weight=None):
        """Fit using `T`, `y` as training data.

        Parameters
        ----------
        * `T` [array-like, shape=(n_samples,)]:
            Training data.

        * `y` [array-like, shape=(n_samples,)]:
            Training target.

        * `sample_weight` [array-like, shape=(n_samples,), optional]:
            Weights. If set to None, all weights will be set to 1.

        Returns
        -------
        * `self` [object]:
            `self`.

        Notes
        -----
        `T` is stored for future use, as `predict` needs T to interpolate
        new input data.
        """
        # Check input
        T = column_or_1d(T)

        # Fit isotonic regression
        self.ir_ = IsotonicRegression(y_min=self.y_min,
                                      y_max=self.y_max,
                                      increasing=self.increasing,
                                      out_of_bounds="clip")
        self.ir_.fit(T, y, sample_weight=sample_weight)

        # Interpolators
        if self.interpolation:
            p = self.ir_.transform(T)

            change_mask1 = (p - np.roll(p, 1)) > 0
            change_mask2 = np.roll(change_mask1, -1)
            change_mask1[0] = True
            change_mask1[-1] = True
            change_mask2[0] = True
            change_mask2[-1] = True

            self.interp1_ = interp1d(T[change_mask1], p[change_mask1],
                                     bounds_error=False,
                                     fill_value=(0., 1.))
            self.interp2_ = interp1d(T[change_mask2], p[change_mask2],
                                     bounds_error=False,
                                     fill_value=(0., 1.))

        return self

    def predict(self, T):
        """Calibrate data.

        Parameters
        ----------
        * `T` [array-like, shape=(n_samples,)]:
            Data to calibrate.

        Returns
        -------
        * `Tt` [array, shape=(n_samples,)]:
            Calibrated data.
        """
        if self.interpolation:
            T = column_or_1d(T)
            return 0.5 * (self.interp1_(T) + self.interp2_(T))

        else:
            return self.ir_.transform(T)
Пример #57
0
def main(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
             dataset='mnist.pkl.gz', batch_size=20, n_hidden=[500, 500]):
    """
    Demonstrate stochastic gradient descent optimization for a multilayer
    perceptron

    This is demonstrated on MNIST.

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient

    :type L1_reg: float
    :param L1_reg: L1-norm's weight when added to the cost (see
    regularization)

    :type L2_reg: float
    :param L2_reg: L2-norm's weight when added to the cost (see
    regularization)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: the path of the MNIST dataset file from
                 http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz


   """
    if add_noise==True:
        datasets = load_data(dataset, nb_classes=nb_classes, binarize=binarize,
                             noise_prop=noise_proportion)
    else:
        datasets = load_data(dataset, nb_classes=nb_classes, binarize=binarize)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    print('Showing data samples')
    if nb_classes == 2:
        labels = ['odd', 'even']
    else:
        labels = [0,1,2,3,4,5,6,7,8,9]
    imshow_samples(train_set_x.get_value(), train_set_y,
            valid_set_x.get_value(), valid_set_y, num_samples=4, labels=labels)
    plt.pause(0.0001)
    diary.save_figure(plt, filename='samples', extension='svg')

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    rng = numpy.random.RandomState(1234)

    # construct the MLP class
    classifier = MLP(
        rng=rng,
        input=x,
        n_in=28 * 28,
        n_hidden=n_hidden,
        n_out=nb_classes
    )

    # start-snippet-4
    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = (
        classifier.negative_log_likelihood(y)
        + L1_reg * classifier.L1
        + L2_reg * classifier.L2_sqr
    )
    # end-snippet-4

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    training_error_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    test_loss_model = theano.function(
        inputs=[index],
        outputs=classifier.loss(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    validation_loss_model = theano.function(
        inputs=[index],
        outputs=classifier.loss(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    training_loss_model = theano.function(
        inputs=[index],
        outputs=classifier.loss(y),
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    test_accuracy_model = theano.function(
        inputs=[index],
        outputs=classifier.accuracy(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    validation_accuracy_model = theano.function(
        inputs=[index],
        outputs=classifier.accuracy(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    training_accuracy_model = theano.function(
        inputs=[index],
        outputs=classifier.accuracy(y),
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    # compiling a Theano function that computes the predictions on the
    # training data
    training_predictions_model = theano.function(
        inputs=[index],
        outputs=classifier.predictions(),
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
        }
    )

    validation_predictions_model = theano.function(
        inputs=[index],
        outputs=classifier.predictions(),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
        }
    )

    # compiling a Theano function that computes the predictions on the
    # training data
    training_scores_model = theano.function(
        inputs=[index],
        outputs=classifier.scores(),
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
        }
    )

    validation_scores_model = theano.function(
        inputs=[index],
        outputs=classifier.scores(),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
        }
    )

    # start-snippet-5
    # compute the gradient of cost with respect to theta (sotred in params)
    # the resulting gradients will be stored in a list gparams
    gparams = [T.grad(cost, param) for param in classifier.params]

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs

    # given two lists of the same length, A = [a1, a2, a3, a4] and
    # B = [b1, b2, b3, b4], zip generates a list C of same size, where each
    # element is a pair formed from the two lists :
    #    C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
    updates = [
        (param, param - learning_rate * gparam)
        for param, gparam in zip(classifier.params, gparams)
    ]

    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )
    # end-snippet-5

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience // 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    error_tra = np.zeros(n_epochs+1)
    error_val = np.zeros(n_epochs+1)
    accuracy_tra = np.zeros(n_epochs+1)
    accuracy_val = np.zeros(n_epochs+1)

    epoch = 0
    # Error in accuracy
    Y_train = train_set_y.eval()
    Y_valid = valid_set_y.eval()

    print('Model predict training scores')
    score_train = np.asarray([training_scores_model(i) for i
        in range(n_train_batches)]).reshape(-1,nb_classes)[:,1]

    if output_activation == 'isotonic_regression':
        # 4. Calibrate the network with isotonic regression in the full training
        ir = IsotonicRegression(increasing=True, out_of_bounds='clip',
                                y_min=_EPSILON, y_max=(1-_EPSILON))
        #   b. Calibrate the scores
        print('Learning Isotonic Regression from TRAINING set')
        ir.fit(score_train, Y_train)

    # 5. Evaluate the performance with probabilities
    #   b. Evaluation on validation set
    print('Model predict validation scores')
    score_val = np.asarray([validation_scores_model(i) for i
        in range(n_valid_batches)]).reshape(-1,nb_classes)[:,1]
    if output_activation == 'isotonic_regression':
        prob_train = ir.predict(score_train)
        print('IR predict validation probabilities')
        prob_val  = ir.predict(score_val)
    else:
        prob_train = score_train
        prob_val = score_val

    error_tra[epoch] = compute_loss(prob_train, Y_train, loss)
    error_val[epoch] = compute_loss(prob_val, Y_valid, loss)
    accuracy_tra[epoch] = compute_accuracy(prob_train, Y_train)
    accuracy_val[epoch] = compute_accuracy(prob_val, Y_valid)

    diary.add_entry('training', [error_tra[epoch], accuracy_tra[epoch]])
    diary.add_entry('validation', [error_val[epoch], accuracy_val[epoch]])

    done_looping = False
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in range(n_train_batches):

            minibatch_avg_cost = train_model(minibatch_index)
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in range(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)

                print(
                    'epoch %i, minibatch %i/%i, validation error %f %%' %
                    (
                        epoch,
                        minibatch_index + 1,
                        n_train_batches,
                        this_validation_loss * 100.
                    )
                )

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if (
                        this_validation_loss < best_validation_loss *
                        improvement_threshold
                    ):
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [test_model(i) for i
                                   in range(n_test_batches)]
                    test_score = numpy.mean(test_losses)

                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

        # Error in accuracy
        #training_loss = [training_loss_model(i) for i
        #                     in range(n_train_batches)]
        #validation_loss = [validation_loss_model(i) for i
        #                     in range(n_valid_batches)]
        #error_tra[epoch] = numpy.mean(training_loss)
        #error_val[epoch] = numpy.mean(validation_loss)
        #training_acc = [training_accuracy_model(i) for i
        #                     in range(n_train_batches)]
        #validation_acc = [validation_accuracy_model(i) for i
        #                     in range(n_valid_batches)]
        #accuracy_tra[epoch] = numpy.mean(training_acc)
        #accuracy_val[epoch] = numpy.mean(validation_acc)

        print('Model predict training scores')
        score_train = np.asarray([training_scores_model(i) for i
            in range(n_train_batches)]).reshape(-1,nb_classes)[:,1]

        if output_activation == 'isotonic_regression':
            # 4. Calibrate the network with isotonic regression in the full training
            #   b. Calibrate the scores
            print('Learning Isotonic Regression from TRAINING set')
            ir.fit(score_train, Y_train)

        # 5. Evaluate the performance with probabilities
        #   b. Evaluation on validation set
        print('Model predict validation scores')
        score_val = np.asarray([validation_scores_model(i) for i
            in range(n_valid_batches)]).reshape(-1,nb_classes)[:,1]
        if output_activation == 'isotonic_regression':
            prob_train = ir.predict(score_train)
            print('IR predict validation probabilities')
            prob_val  = ir.predict(score_val)
        else:
            prob_train = score_train
            prob_val = score_val

        error_tra[epoch] = compute_loss(prob_train, Y_train, loss)
        error_val[epoch] = compute_loss(prob_val, Y_valid, loss)
        accuracy_tra[epoch] = compute_accuracy(prob_train, Y_train)
        accuracy_val[epoch] = compute_accuracy(prob_val, Y_valid)

        diary.add_entry('training', [error_tra[epoch], accuracy_tra[epoch]])
        diary.add_entry('validation', [error_val[epoch], accuracy_val[epoch]])

        plot_error(error_tra, error_val, epoch, 'loss')
        diary.save_figure(plt, filename='error', extension='svg')
        plot_accuracy(accuracy_tra, accuracy_val, epoch)
        diary.save_figure(plt, filename='accuracy', extension='svg')
        if nb_classes == 2:
            #prob_train = np.asarray([training_scores_model(i) for i
            #                 in range(n_train_batches)]).reshape(-1,nb_classes)
            #prob_val = np.asarray([validation_scores_model(i) for i
            #                 in range(n_valid_batches)]).reshape(-1,nb_classes)
            if output_activation == 'isotonic_regression':
                prob_lin = ir.predict(score_lin)
                plot_reliability_diagram(prob_train, Y_train,
                                     prob_val, Y_valid, epoch,
                                     prob_lin, score_lin)
            else:
                plot_reliability_diagram(prob_train, Y_train,
                                     prob_val, Y_valid, epoch)
            diary.save_figure(plt, filename='reliability_diagram', extension='svg')
            plot_histogram_scores(prob_train, prob_val, epoch=epoch)
            diary.save_figure(plt, filename='histogram_scores', extension='svg')
        #from IPython import embed
        #embed()
        plt.pause(0.0001)

    end_time = timeit.default_timer()
    print(('Optimization complete. Best validation score of %f %% '
           'obtained at iteration %i, with test performance %f %%') %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
from sklearn.pipeline import Pipeline


"""
Predictions
"""
from keras.callbacks import History
hist = History()

model = Sequential()
model.add(Dense(21, input_dim=16, init='uniform', activation='relu'))
model.add(Dense(80, init='uniform', activation='relu'))
model.add(Dense(80, init='uniform', activation='relu'))
model.add(Dense(1, init='uniform', activation='sigmoid'))
	# Compile model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])


clf = GradientBoostingClassifier(n_estimators=100, verbose=2, learning_rate=0.05, max_depth=3, min_samples_leaf=1, random_state=1)
clf = RandomForestClassifier(n_estimators=100, verbose=2)
bdt = AdaBoostClassifier(base_estimator=clf, n_estimators=100)

bdt.fit(x2, training_target)


proba = bdt.predict_proba(y2)

ir = IsotonicRegression()

proba1 = ir.fit()
Пример #59
0
class InterpolatedIsotonicRegression(BaseEstimator, TransformerMixin,
                                     RegressorMixin):
    """Interpolated Isotonic Regression model.

        apply linear interpolation to transform piecewise constant isotonic
        regression model into piecewise linear model
    """

    def __init__(self, y_min=None, y_max=None, increasing=True,
                 out_of_bounds='nan'):
        self.y_min = y_min
        self.y_max = y_max
        self.increasing = increasing
        self.out_of_bounds = out_of_bounds

    def fit(self, X, y, sample_weight=None):
        """Fit the model using X, y as training data.
        Parameters
        ----------
        X : array-like, shape=(n_samples,)
            Training data.
        y : array-like, shape=(n_samples,)
            Training target.
        sample_weight : array-like, shape=(n_samples,), optional, default: None
            Weights. If set to None, all weights will be set to 1 (equal
            weights).
        Returns
        -------
        self : object
            Returns an instance of self.
        Notes
        -----
        X is stored for future use, as `transform` needs X to interpolate
        new input data.
        """
        self.iso_ = IsotonicRegression(y_min=self.y_min,
                                       y_max=self.y_max,
                                       increasing=self.increasing,
                                       out_of_bounds=self.out_of_bounds)
        self.iso_.fit(X, y, sample_weight=sample_weight)

        p = self.iso_.transform(X)
        change_mask1 = (p - np.roll(p, 1)) > 0
        change_mask2 = np.roll(change_mask1, -1)
        change_mask1[0] = True
        change_mask1[-1] = True
        change_mask2[0] = True
        change_mask2[-1] = True

        self.iso_interp1_ = interp1d(X[change_mask1],
                                     p[change_mask1],
                                     bounds_error=False,
                                     fill_value=(0., 1.))
        self.iso_interp2_ = interp1d(X[change_mask2],
                                     p[change_mask2],
                                     bounds_error=False,
                                     fill_value=(0., 1.))

        return self

    def transform(self, T):
        """Transform new data by linear interpolation
        Parameters
        ----------
        T : array-like, shape=(n_samples,)
            Data to transform.
        Returns
        -------
        T_ : array, shape=(n_samples,)
            The transformed data
        """
        return 0.5 * (self.iso_interp1_(T) + self.iso_interp2_(T))

    def predict(self, T):
        """Predict new data by linear interpolation.
        Parameters
        ----------
        T : array-like, shape=(n_samples,)
            Data to transform.
        Returns
        -------
        T_ : array, shape=(n_samples,)
            Transformed data.
        """
        return self.transform(T)
Пример #60
0
#error

from sklearn.cross_validation import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
from sklearn.svm import SVC
import numpy as np
import pandas as pd
from sklearn.isotonic import IsotonicRegression


df=pd.read_csv('newtest.csv')
df1=pd.read_csv('newtest1.csv')
x=df.drop(['tag'],axis=1)
y=df.drop(['kx','ky','kz','wa','wb','wc','wd','we','wf'],axis=1)
X=df1.drop(['tag'],axis=1)
Y=df1.drop(['kx','ky','kz','wa','wb','wc','wd','we','wf'],axis=1)
X_train , X_test , Y_train , Y_test = train_test_split(x,y , random_state=5)

ir=IsotonicRegression()
ir.fit(X_train,Y_train)


print ir.score(X_test,Y_test)