Пример #1
0
    def _get_RF(self, X_values, y_values, model_num, random_state):
        rkf = RepeatedKFold(n_splits=5, n_repeats=4, random_state=random_state)
        # RF
        RF_model_errors = np.asarray([])
        RF_resid = np.asarray([])
        for train_index, test_index in rkf.split(X_values):
            X_train, X_test = X_values[train_index], X_values[test_index]
            y_train, y_test = y_values[train_index], y_values[test_index]
            RF = rf.RF()
            RF.train(X_train, y_train, model_num)
            rf_pred, RF_errors = RF.predict(X_test, True)
            rf_res = y_test - rf_pred
            RF_model_errors = np.concatenate((RF_model_errors, RF_errors),
                                             axis=None)
            RF_resid = np.concatenate((RF_resid, rf_res), axis=None)

        return RF_resid, RF_model_errors
def find_stats(X_values, y_values, stdev):
    # define cross-validation splits
    rkf = RepeatedKFold(n_splits=5, n_repeats=4, random_state=91936274)
    # RF
    print("finding rf scale factors")
    RF_model_errors = np.asarray([])
    RF_resid = np.asarray([])
    for train_index, test_index in rkf.split(X_values):
        #print("RF: {}".format(ctr))
        #ctr = ctr + 1
        X_train, X_test = X_values[train_index], X_values[test_index]
        y_train, y_test = y_values[train_index], y_values[test_index]
        RF = rf.RF()
        RF.train_synth(X_train, y_train, std=stdev)
        rf_pred, RF_errors = RF.predict_no_divide(X_test, True)
        rf_res = y_test - rf_pred
        RF_model_errors = np.concatenate((RF_model_errors, RF_errors),
                                         axis=None)
        RF_resid = np.concatenate((RF_resid, rf_res), axis=None)

    abs_residuals = abs(RF_resid)

    return abs_residuals, RF_model_errors
Пример #3
0
	def _get_RF(self, X_train, y_train, X_test, y_test, model_num):
		RF = rf.RF()
		RF.train(X_train, y_train, model_num)
		predictions, model_errors = RF.predict(X_test, True)
		residuals = y_test - predictions
		return residuals, model_errors
Пример #4
0
        y_train, y_test = Y.iloc[train_index_2], Y.iloc[test_index_2]

        testGroup2 = np.delete(groups2, train_index_2)

        if checkAlreadyDone(testGroup2[0], alreadyDone):
            continue

        frames = [X_test_1, X_test]
        twoTest = pd.concat(frames)

        yTest = [y_test_1, y_test]
        yFrames = pd.concat(yTest)

        testFinal = np.concatenate((testGroup, testGroup2))

        RF = rf.RF()
        RF.train(X_train, y_train, std=y_std)

        GPR = gpr.GPR()
        GPR.train(X_train,
                  y_train,
                  userkernel=gprsavedkernel,
                  std=y_std,
                  optimizer_restarts=0)
        # Here instead of res, sigma try calculating domain prediction for the test data.

        gpr_pred, GPR_errors = GPR.predict(twoTest, True)
        rf_pred, RF_errors = RF.predict(twoTest, True)
        RF_errors = rfslope * RF_errors + rfintercept

        # Start measuring on different thresholds
def find_stats(X_values, y_values):
    RF_model_errors = np.asarray([])
    RF_residuals = np.asarray([])
    GPR_model_errors = np.asarray([])
    GPR_residuals = np.asarray([])
    # define cross-validation splits
    rkf = RepeatedKFold(n_splits=5, n_repeats=4, random_state=91936274)
    #GPR
    ctr = 1
    for train_index, test_index in rkf.split(X_values):
        print("GPR: {}/20 (iteration: {}/10)".format(ctr, outerctr))
        ctr = ctr + 1
        X_train, X_test = X_values[train_index], X_values[test_index]
        y_train, y_test = y_values[train_index], y_values[test_index]
        GPR = gpr.GPR()
        GPR.train_synth(X_train,
                        y_train,
                        std=standard_deviation,
                        kernelchoice=1,
                        optimizer_restarts=10)
        gpr_pred, gpr_errors = GPR.predict_no_divide(X_test, True)
        gpr_res = (y_test - gpr_pred) / standard_deviation
        gpr_errors = gpr_errors / standard_deviation
        GPR_model_errors = np.concatenate((GPR_model_errors, gpr_errors),
                                          axis=None)
        GPR_residuals = np.concatenate((GPR_residuals, gpr_res), axis=None)

    # define quantities to return
    GPR_model_error_std = np.std(GPR_model_errors)
    print("GPR standard deviation of model errors: {}".format(
        GPR_model_error_std))
    GPR_model_error_mean = np.mean(GPR_model_errors)
    print("GPR mean of model errors: {}".format(GPR_model_error_mean))

    cutoff = GPR_model_error_mean

    # RF
    print("finding rf scale factors")
    RF_model_errors = np.asarray([])
    RF_residuals = np.asarray([])
    for train_index, test_index in rkf.split(X_values):
        #print("RF: {}".format(ctr))
        #ctr = ctr + 1
        X_train, X_test = X_values[train_index], X_values[test_index]
        y_train, y_test = y_values[train_index], y_values[test_index]
        RF = rf.RF()
        RF.train_synth(X_train, y_train, std=standard_deviation)
        rf_pred, RF_errors = RF.predict_no_divide(X_test, True)
        rf_res = y_test - rf_pred
        RF_model_errors = np.concatenate((RF_model_errors, RF_errors),
                                         axis=None)
        RF_residuals = np.concatenate((RF_residuals, rf_res), axis=None)

    abs_residuals = abs(RF_residuals)

    res = np.asarray([])
    sigma = np.asarray([])
    # remove rf model errors and residuals that have gpr model error over cutoff
    for i in range(0, len(GPR_model_errors)):
        if GPR_model_errors[i] < cutoff:
            res = np.append(res, abs_residuals[i])
            sigma = np.append(sigma, RF_model_errors[i])

    return cutoff, res, sigma