Exemplo n.º 1
0
    def test_icp_regression_tree(self):
        # -----------------------------------------------------------------------------
        # Setup training, calibration and test indices
        # -----------------------------------------------------------------------------
        data = load_boston()

        idx = np.random.permutation(data.target.size)
        train = idx[:int(idx.size / 3)]
        calibrate = idx[int(idx.size / 3):int(2 * idx.size / 3)]
        test = idx[int(2 * idx.size / 3):]

        # -----------------------------------------------------------------------------
        # Without normalization
        # -----------------------------------------------------------------------------
        # Train and calibrate
        # -----------------------------------------------------------------------------
        underlying_model = RegressorAdapter(
            DecisionTreeRegressor(min_samples_leaf=5))
        nc = RegressorNc(underlying_model, AbsErrorErrFunc())
        icp = IcpRegressor(nc)
        icp.fit(data.data[train, :], data.target[train])
        icp.calibrate(data.data[calibrate, :], data.target[calibrate])

        # -----------------------------------------------------------------------------
        # Predict
        # -----------------------------------------------------------------------------
        prediction = icp.predict(data.data[test, :], significance=0.1)
        header = ["min", "max", "truth", "size"]
        size = prediction[:, 1] - prediction[:, 0]
        table = np.vstack([prediction.T, data.target[test], size.T]).T
        df = pd.DataFrame(table, columns=header)
        print(df)

        # -----------------------------------------------------------------------------
        # With normalization
        # -----------------------------------------------------------------------------
        # Train and calibrate
        # -----------------------------------------------------------------------------
        underlying_model = RegressorAdapter(
            DecisionTreeRegressor(min_samples_leaf=5))
        normalizing_model = RegressorAdapter(
            KNeighborsRegressor(n_neighbors=1))
        normalizer = RegressorNormalizer(underlying_model, normalizing_model,
                                         AbsErrorErrFunc())
        nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer)
        icp = IcpRegressor(nc)
        icp.fit(data.data[train, :], data.target[train])
        icp.calibrate(data.data[calibrate, :], data.target[calibrate])

        # -----------------------------------------------------------------------------
        # Predict
        # -----------------------------------------------------------------------------
        prediction = icp.predict(data.data[test, :], significance=0.1)
        header = ["min", "max", "truth", "size"]
        size = prediction[:, 1] - prediction[:, 0]
        table = np.vstack([prediction.T, data.target[test], size.T]).T
        df = pd.DataFrame(table, columns=header)
        print(df)
Exemplo n.º 2
0
    def test_acp_regression_tree(self):
        # -----------------------------------------------------------------------------
        # Experiment setup
        # -----------------------------------------------------------------------------
        data = load_diabetes()

        idx = np.random.permutation(data.target.size)
        train = idx[:int(2 * idx.size / 3)]
        test = idx[int(2 * idx.size / 3):]

        truth = data.target[test]
        columns = ["min", "max", "truth"]
        significance = 0.1

        # -----------------------------------------------------------------------------
        # Define models
        # -----------------------------------------------------------------------------

        models = {
            "ACP-RandomSubSampler":
            AggregatedCp(
                IcpRegressor(
                    RegressorNc(RegressorAdapter(DecisionTreeRegressor()))),
                RandomSubSampler(),
            ),
            "ACP-CrossSampler":
            AggregatedCp(
                IcpRegressor(
                    RegressorNc(RegressorAdapter(DecisionTreeRegressor()))),
                CrossSampler(),
            ),
            "ACP-BootstrapSampler":
            AggregatedCp(
                IcpRegressor(
                    RegressorNc(RegressorAdapter(DecisionTreeRegressor()))),
                BootstrapSampler(),
            ),
        }

        # -----------------------------------------------------------------------------
        # Train, predict and evaluate
        # -----------------------------------------------------------------------------
        for name, model in models.items():
            model.fit(data.data[train, :], data.target[train])
            prediction = model.predict(data.data[test, :])
            prediction_sign = model.predict(data.data[test, :],
                                            significance=significance)
            table = np.vstack((prediction_sign.T, truth)).T
            df = pd.DataFrame(table, columns=columns)
            print("\n{}".format(name))
            print("Error rate: {}".format(
                reg_mean_errors(prediction, truth, significance)))
            print(df)
Exemplo n.º 3
0
def CF_QuanVal(X, Y, estimator, conformalSignificance):
    print("Starting quantitative conformal prediction validation")

    icp = AggregatedCp(IcpRegressor(RegressorNc(RegressorAdapter(estimator))),
                       BootstrapSampler())

    # icp = AggregatedCp(IcpRegressor(RegressorNc(RegressorAdapter(estimator),
    #                               AbsErrorErrFunc(), RegressorNormalizer(estimator,
    #                                RegressorAdapter(copy.copy(estimator)), AbsErrorErrFunc()))))
    # icp_cv = RegIcpCvHelper(icp)
    # scores = conformal_cross_val_score(icp_cv,
    #                          X,
    #                          Y,
    #                          iterations=5,
    #                          folds=5,
    #                          scoring_funcs=[reg_mean_errors, reg_median_size, reg_mean_size],
    #                          significance_levels=[0.05, 0.1, 0.2, conformalSignificance])

    icp.fit(X[:30], Y[:30])
    prediction = icp.predict(X[30:])
    prediction_sign = icp.predict(X[30:], significance=0.25)

    interval = prediction_sign[:, 0] - prediction_sign[:, 1]
    print(np.mean(interval))
    print(interval)
    print("\n")
    print(prediction)
    print(prediction_sign)
    return (icp)
Exemplo n.º 4
0
    def CF_quantitative_validation(self):
        ''' Performs internal  validation for conformal quantitative models '''

        # Make a copy of original matrices.
        X = self.X.copy()
        Y = self.Y.copy()

        # Number of external validations for the aggregated conformal estimator.
        seeds = [5, 7, 35]
        # Interval means for each aggregated  conformal estimator (out of 3)
        interval_means = []
        # Accuracies for each aggregated conformal estimator (out of 3)
        accuracies = []
        results = []
        try:
            for i in range(len(seeds)):
                # Generate training a test sets
                X_train, X_test, Y_train, Y_test = train_test_split(
                    X, Y, test_size=0.25, random_state=i, shuffle=False)
                # Create the aggregated conformal regressor.
                conformal_pred = AggregatedCp(
                    IcpRegressor(RegressorNc(RegressorAdapter(
                        self.estimator))), BootstrapSampler())
                # Fit conformal regressor to the data
                conformal_pred.fit(X_train, Y_train)

                # Perform prediction on test set
                prediction = conformal_pred.predict(X_test,
                                                    self.conformalSignificance)
                # Add the n validation interval means
                interval_means.append(
                    np.mean(
                        np.abs(prediction[:, 0]) - np.abs(prediction[:, 1])))
                Y_test = Y_test.reshape(-1, 1)
                # Get boolean mask of instances within the applicability domain.
                inside_interval = ((prediction[:, 0].reshape(-1, 1) < Y_test) &
                                   (prediction[:, 1].reshape(-1, 1) > Y_test))
                # Compute the accuracy (number of instances within the AD).
                accuracy = np.sum(inside_interval) / len(Y_test)
                # Add validation result to the list of accuracies.
                accuracies.append(accuracy)
        except Exception as e:
            LOG.error(f'Quantitative conformal validation'
                      f' failed with exception: {e}')
            raise e

        # Compute mean interval_means and accuracy.
        interval_means = np.mean(interval_means)
        accuracies = np.mean(accuracies)
        # Cut into two decimals.
        self.conformal_accuracy = float("{0:.2f}".format(accuracies))
        self.conformal_mean_interval = float("{0:.2f}".format(interval_means))
        #Add quality metrics to results.

        results.append(('Conformal_mean_interval', 'Conformal mean interval',
                        self.conformal_mean_interval))
        results.append(('Conformal_accuracy', 'Conformal accuracy',
                        self.conformal_accuracy))

        return True, (results, )
Exemplo n.º 5
0
    def build(self):
        if not self.quantitative:
            print("PLSR only applies to quantitative data")
            return False, "PLSR only applies to quantitative data"

        if self.failed:
            return False, "Error initiating model"

        X = self.X.copy()
        Y = self.Y.copy()


        results = []
        results.append(('nobj', 'number of objects', self.nobj))
        results.append(('nvarx', 'number of predictor variables', self.nvarx))

        if self.cv:
            self.cv = getCrossVal(self.cv, 46, self.n, self.p)

        if self.tune:
            if self.optimiz == 'auto':
                super(PLSR, self).optimize(X, Y, PLS_r(
                    **self.estimator_parameters), self.tune_parameters)
            elif self.optimiz == 'manual':
                self.optimize(X, Y, PLS_r(
                    **self.estimator_parameters), self.tune_parameters)

            results.append(
                ('model', 'model type', 'PLSR quantitative (optimized)'))

        else:
            print("Building  Quantitative PLSR")
            self.estimator = PLS_r(**self.estimator_parameters)
            results.append(('model', 'model type', 'PLSR quantitative'))

        if self.conformal:
            underlying_model = RegressorAdapter(self.estimator)
            normalizing_model = RegressorAdapter(
                KNeighborsRegressor(n_neighbors=1))
            normalizing_model = RegressorAdapter(self.estimator)
            normalizer = RegressorNormalizer(
                underlying_model, normalizing_model, AbsErrorErrFunc())
            nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer)
            self.conformal_pred = AggregatedCp(IcpRegressor(nc),
                                               BootstrapSampler())

            # self.conformal_pred = AggregatedCp(IcpRegressor(RegressorNc(RegressorAdapter(self.estimator))),
            #                                    BootstrapSampler())

            self.conformal_pred.fit(X, Y)
            # overrides non-conformal
            results.append(
                ('model', 'model type', 'conformal PLSR quantitative'))

        self.estimator.fit(X, Y)

        return True, results
Exemplo n.º 6
0
def CF_QuanCal(X, Y, estimator):
    # X_train, X_test, y_train, y_test = train_test_split( X, Y, test_size=0.20, random_state=42)
    acp = AggregatedCp(
        IcpRegressor(
            RegressorNc(
                RegressorAdapter(estimator), AbsErrorErrFunc(),
                RegressorNormalizer(estimator, copy.copy(estimator),
                                    AbsErrorErrFunc())), RandomSubSampler()), )
    acp.fit(X, Y)
    # icp.calibrate(X_test, y_test)
    return acp
Exemplo n.º 7
0
    def __init__(self, model, sklearn_model: bool):
        r"""__init__ method

        This method is used to adapt the input `model` so it can be used for creating 
        confidente intervals with conformal prediction.

        Parameters
        ----------
        model:
            Model we want to use as the underlying model to generate predictions and the
            confidence interval. This model can only be a scikit learn model, LGBMRegressor,
            LGBMClassifier, XGBRegressor, XGBClassifier, CatBoostRegressor or CatBoostClassifier.
        sklearn_model: bool
            This variable indicates if the model belongs to scikit learn or not.

        Returns
        -------
        cp: obj: Adapt_to_CP
            The class of the adapted model.

        Examples
        --------
        >>> model = lightgbm.LGBMRegressor()
        >>> cp = Adapt_to_CP(model)
        """
        self.model = model
        if sklearn_model:
            if is_classifier(model):
                self.icp = IcpClassifier(NcFactory.create_nc(model))
            elif is_regressor(model):
                self.icp = IcpRegressor(NcFactory.create_nc(model))
        else:
            model_adapter = NonConformistAdapter(model)
            if is_classifier(model):
                self.icp = IcpClassifier(ClassifierNc(model_adapter))
            elif is_regressor(model):
                self.icp = IcpRegressor(RegressorNc(model_adapter))
            elif model.__class__.__name__ == "Booster":
                self.icp = IcpRegressor(RegressorNc(model_adapter))
Exemplo n.º 8
0
    def test_oob_calibration(self):
        # -----------------------------------------------------------------------------
        # Classification
        # -----------------------------------------------------------------------------
        data = load_iris()

        icp = OobCpClassifier(
            ClassifierNc(
                OobClassifierAdapter(
                    RandomForestClassifier(n_estimators=100, oob_score=True))))
        icp_cv = ClassIcpCvHelper(icp)

        scores = cross_val_score(
            icp_cv,
            data.data,
            data.target,
            iterations=5,
            folds=5,
            scoring_funcs=[class_mean_errors, class_avg_c],
            significance_levels=[0.05, 0.1, 0.2],
        )

        print("Classification: iris")
        scores = scores.drop(["fold", "iter"], axis=1)
        print(scores.groupby(["significance"]).mean())

        # -----------------------------------------------------------------------------
        # Regression, absolute error
        # -----------------------------------------------------------------------------
        data = load_diabetes()

        icp = OobCpRegressor(
            RegressorNc(
                OobRegressorAdapter(
                    RandomForestRegressor(n_estimators=100, oob_score=True))))
        icp_cv = RegIcpCvHelper(icp)

        scores = cross_val_score(
            icp_cv,
            data.data,
            data.target,
            iterations=5,
            folds=5,
            scoring_funcs=[reg_mean_errors, reg_median_size],
            significance_levels=[0.05, 0.1, 0.2],
        )

        print("Absolute error regression: diabetes")
        scores = scores.drop(["fold", "iter"], axis=1)
        print(scores.groupby(["significance"]).mean())
Exemplo n.º 9
0
    def build(self):

        # Make a copy of data matrices
        X = self.X.copy()
        Y = self.Y.copy()

        results = []
        results.append(('nobj', 'number of objects', self.nobj))
        results.append(('nvarx', 'number of predictor variables', self.nvarx))

        if self.param.getVal('tune'):

            # Optimize estimator using sklearn-gridsearch
            if self.estimator_parameters['optimize'] == 'auto':
                try:

                    LOG.info('Optimizing PLSR using SK-LearnGridSearch')

                    # Remove optimize key from parameter dictionary
                    # to avoid sklearn estimator error (unexpected keyword)
                    self.estimator_parameters.pop("optimize")   

                    super(PLSR, self).optimize(X, Y, PLS_r(
                        **self.estimator_parameters), 
                        self.param.getDict('PLSR_optimize'))

                except Exception as e:
                    LOG.error(f'Error performing SK-LearnGridSearch'
                              f' on PLSR estimator with exception {e}')
                    return False, f'Error performing SK-LearnGridSearch on PLSR estimator with exception {e}'

            # Optimize using flame implementation (recommended)
            elif self.estimator_parameters['optimize'] == 'manual':

                LOG.info('Optimizing PLSR using manual method')

                # Remove optimize key from parameter dictionary
                # to avoid sklearn estimator error (unexpected keyword)
                self.estimator_parameters.pop("optimize")   

                success, message = self.optimize(X, Y, PLS_r(
                    **self.estimator_parameters), 
                    self.param.getDict('PLSR_optimize'))

                if not success:
                    return False, message

            else: 
                LOG.error('Type of tune not recognized, check the input')
                return False, 'Type of tune not recognized, check the input'    

            results.append(('model', 'model type', 'PLSR quantitative (optimized)'))

        else:
            LOG.info('Building Quantitative PLSR with no optimization')
            try:
                # Remove optimize key from parameters to avoid error
                self.estimator_parameters.pop("optimize") 

                # as the sklearn estimator does not have this key
                self.estimator = PLS_r(**self.estimator_parameters)
            except Exception as e:
                LOG.error(f'Error at PLS_r instantiation with '
                          f'exception {e}')
                return False, f'Error at PLS_da instantiation with exception {e}'

            results.append(('model', 'model type', 'PLSR quantitative'))
        
        # Fit estimator to the data
        self.estimator.fit(X, Y)

        if not self.param.getVal('conformal'):
            return True, results

        self.estimator_temp = copy(self.estimator)
        try:
            
            LOG.info('Building PLSR aggregated conformal predictor')

            underlying_model = RegressorAdapter(self.estimator_temp)
            # normalizing_model = RegressorAdapter(
            #     KNeighborsRegressor(n_neighbors=1))
            normalizing_model = RegressorAdapter(self.estimator_temp)
            normalizer = RegressorNormalizer(underlying_model, normalizing_model, AbsErrorErrFunc())

            nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer)
            self.estimator = AggregatedCp(IcpRegressor(nc), BootstrapSampler())

        except Exception as e:
            LOG.error(f'Error building aggregated PLSR conformal'
                        f' regressor with exception: {e}')
            return False, f'Error building aggregated PLSR conformal regressor with exception: {e}'

            # self.conformal_pred = AggregatedCp(IcpRegressor(
            # RegressorNc(RegressorAdapter(self.estimator))),
            #                                    BootstrapSampler())

        # Fit conformal estimator to the data
        self.estimator.fit(X, Y)

        # overrides non-conformal
        results.append(('model', 'model type', 'conformal PLSR quantitative'))

        return True, results
Exemplo n.º 10
0
    def build(self):
        '''Build a new XGBOOST model with the X and Y numpy matrices '''

        try:
            from xgboost.sklearn import XGBClassifier
            from xgboost.sklearn import XGBRegressor
        except Exception as e:
            return False,  'XGboost not found, please revise your environment'

        # Make a copy of data matrices
        X = self.X.copy()
        Y = self.Y.copy()

        results = []
        results.append(('nobj', 'number of objects', self.nobj))
        results.append(('nvarx', 'number of predictor variables', self.nvarx))

        # If tune then call gridsearch to optimize the estimator
        if self.param.getVal('tune'):

            LOG.info("Optimizing XGBOOST estimator")
            
            try:
                # Check type of model
                if self.param.getVal('quantitative'):
                    self.estimator = XGBRegressor(
                                        **self.estimator_parameters)
                    self.optimize(X, Y, self.estimator, self.tune_parameters)
                    results.append(('model','model type','XGBOOST quantitative (optimized)'))
                else:
                    self.estimator = XGBClassifier(
                                        **self.estimator_parameters)
                    params = self.estimator.get_params()
                    params['num_class'] = 2
                    self.optimize(X, Y, self.estimator,
                                  self.tune_parameters)
                    results.append(('model','model type','XGBOOST qualitative (optimized)'))

            except Exception as e:
                return False, f'Exception optimizing XGBOOST estimator with exception {e}'
            
        else:
            try:
                if self.param.getVal('quantitative'):

                    LOG.info("Building Quantitative XGBOOST model")
                    # params = {
                    #     'objective': 'reg:squarederror',
                    #     'missing': -99.99999,
                    #     # 'max_depth': 20,
                    #     # 'learning_rate': 1.0,
                    #     # 'silent': 1,
                    #     # 'n_estimators': 25
                    #     }
                    # self.estimator = XGBRegressor(**params)
                    self.estimator = XGBRegressor(**self.estimator_parameters)
                    results.append(('model', 'model type', 'XGBOOST quantitative'))
                else:

                    LOG.info("Building Qualitative XGBOOST model")
                    # params = {
                    #     'objective': 'binary:logistic',
                    #      'max_depth': 3,
                    #      #'learning_rate': 0.7,
                    #      #'silent': 1,
                    #      'n_estimators': 100
                    #     }
                    self.estimator = XGBClassifier(**self.estimator_parameters)
                    results.append(('model', 'model type', 'XGBOOST qualitative'))

                self.estimator.fit(X, Y)
                print(self.estimator)

            except Exception as e:
                raise e
                return False, f'Exception building XGBOOST estimator with exception {e}'

        self.estimator_temp = copy(self.estimator)

        if not self.param.getVal('conformal'):
            return True, results
        # Create the conformal estimator
        try:
            # Conformal regressor
            if self.param.getVal('quantitative'):

                LOG.info("Building conformal Quantitative XGBOOST model")

                underlying_model = RegressorAdapter(self.estimator_temp)
                #normalizing_model = RegressorAdapter(
                    #KNeighborsRegressor(n_neighbors=5))
                normalizing_model = RegressorAdapter(self.estimator_temp)
                normalizer = RegressorNormalizer(
                                underlying_model,
                                normalizing_model,
                                AbsErrorErrFunc())
                nc = RegressorNc(underlying_model,
                                    AbsErrorErrFunc(),
                                    normalizer)

                # self.conformal_pred = AggregatedCp(IcpRegressor
                # (RegressorNc(RegressorAdapter(self.estimator))),
                #                                   BootstrapSampler())

                self.estimator = AggregatedCp(IcpRegressor(nc),
                                                BootstrapSampler())

                self.estimator.fit(X, Y)
                results.append(('model', 'model type', 'conformal XGBOOST quantitative'))

            # Conformal classifier
            else:

                LOG.info("Building conformal Qualitative XGBOOST model")

                self.estimator = AggregatedCp(
                                    IcpClassifier(
                                        ClassifierNc(
                                            ClassifierAdapter(self.estimator_temp),
                                            MarginErrFunc()
                                        )
                                    ),
                                    BootstrapSampler())

                # Fit estimator to the data
                self.estimator.fit(X, Y)
                results.append(('model', 'model type', 'conformal XGBOOST qualitative'))

        except Exception as e:
            raise e
            return False, f'Exception building conformal XGBOOST estimator with exception {e}'

        return True, results



## Overriding of parent methods

    # def CF_quantitative_validation(self):
    #     ''' performs validation for conformal quantitative models '''

      

    # def CF_qualitative_validation(self):
    #     ''' performs validation for conformal qualitative models '''


    # def quantitativeValidation(self):
    #     ''' performs validation for quantitative models '''

    # def qualitativeValidation(self):
    #     ''' performs validation for qualitative models '''


    # def validate(self):
    #     ''' Validates the model and computes suitable model quality scoring values'''


    # def optimize(self, X, Y, estimator, tune_parameters):
    #     ''' optimizes a model using a grid search over a range of values for diverse parameters'''


    # def regularProject(self, Xb, results):
    #     ''' projects a collection of query objects in a regular model, for obtaining predictions '''


    # def conformalProject(self, Xb, results):
    #     ''' projects a collection of query objects in a conformal model, for obtaining predictions '''


    # def project(self, Xb, results):
    #     ''' Uses the X matrix provided as argument to predict Y'''
Exemplo n.º 11
0
    def build(self):
        '''Build a new DL model with the X and Y numpy matrices '''

        try:
            from keras.wrappers.scikit_learn import KerasClassifier
            from keras.wrappers.scikit_learn import KerasRegressor
        except Exception as e:
            return False, 'Keras not found, please revise your environment'

        # Make a copy of data matrices
        X = self.X.copy()
        Y = self.Y.copy()

        results = []
        results.append(('nobj', 'number of objects', self.nobj))
        results.append(('nvarx', 'number of predictor variables', self.nvarx))

        # If tune then call gridsearch to optimize the estimator
        if self.param.getVal('tune'):

            LOG.info("Optimizing Keras estimator")

            try:
                # Check type of model
                if self.param.getVal('quantitative'):
                    self.estimator = KerasRegressor(
                        **self.estimator_parameters)
                    self.optimize(X, Y, self.estimator, self.tune_parameters)
                    results.append(('model', 'model type',
                                    'KERAS quantitative (optimized)'))
                else:
                    self.estimator = KerasClassifier(
                        **self.estimator_parameters)
                    #params = self.estimator.get_params()
                    #params['num_class'] = 2
                    self.optimize(X, Y, self.estimator, self.tune_parameters)
                    results.append(('model', 'model type',
                                    'KERAS qualitative (optimized)'))

            except Exception as e:
                return False, f'Exception optimizing KERAS estimator with exception {e}'

        else:
            try:
                if self.param.getVal('quantitative'):

                    LOG.info("Building Quantitative KERAS mode")
                    self.estimator = KerasRegressor(
                        build_fn=self.create_model,
                        **self.estimator_parameters,
                        verbose=0)
                    results.append(
                        ('model', 'model type', 'Keras quantitative'))
                else:

                    LOG.info("Building Qualitative Keras model")
                    self.estimator = KerasClassifier(
                        build_fn=self.create_model,
                        dim=self.X.shape[1],
                        **self.estimator_parameters,
                        verbose=0)
                    results.append(
                        ('model', 'model type', 'Keras qualitative'))

                self.estimator.fit(X, Y)
                print(self.estimator)

            except Exception as e:
                raise e
                return False, f'Exception building Keras estimator with exception {e}'

        self.estimator_temp = clone(self.estimator)

        if not self.param.getVal('conformal'):
            return True, results
        # Create the conformal estimator
        try:
            # Conformal regressor
            if self.param.getVal('quantitative'):

                LOG.info("Building conformal Quantitative Keras model")

                underlying_model = RegressorAdapter(self.estimator_temp)
                normalizing_model = RegressorAdapter(
                    KNeighborsRegressor(n_neighbors=15))
                # normalizing_model = RegressorAdapter(self.estimator_temp)
                normalizer = RegressorNormalizer(underlying_model,
                                                 normalizing_model,
                                                 AbsErrorErrFunc())
                nc = RegressorNc(underlying_model, AbsErrorErrFunc(),
                                 normalizer)

                # self.conformal_pred = AggregatedCp(IcpRegressor
                # (RegressorNc(RegressorAdapter(self.estimator))),
                #                                   BootstrapSampler())

                self.estimator = AggregatedCp(IcpRegressor(nc),
                                              BootstrapSampler())

                self.estimator.fit(X, Y)
                results.append(
                    ('model', 'model type', 'conformal Keras quantitative'))

            # Conformal classifier
            else:

                LOG.info("Building conformal Qualitative Keras model")

                self.estimator = AggregatedCp(
                    IcpClassifier(
                        ClassifierNc(ClassifierAdapter(self.estimator_temp),
                                     MarginErrFunc())), BootstrapSampler())

                # Fit estimator to the data
                print('build finished')
                self.estimator.fit(X, Y)
                results.append(
                    ('model', 'model type', 'conformal Keras qualitative'))

        except Exception as e:
            raise e
            return False, f'Exception building conformal Keras estimator with exception {e}'

        return True, []
Exemplo n.º 12
0
    def test_cross_validation(self):
        # -----------------------------------------------------------------------------
        # Classification
        # -----------------------------------------------------------------------------
        data = load_iris()

        icp = IcpClassifier(
            ClassifierNc(
                ClassifierAdapter(RandomForestClassifier(n_estimators=100)),
                MarginErrFunc()))
        icp_cv = ClassIcpCvHelper(icp)

        scores = cross_val_score(
            icp_cv,
            data.data,
            data.target,
            iterations=5,
            folds=5,
            scoring_funcs=[class_mean_errors, class_avg_c],
            significance_levels=[0.05, 0.1, 0.2],
        )

        print("Classification: iris")
        scores = scores.drop(["fold", "iter"], axis=1)
        print(scores.groupby(["significance"]).mean())

        # -----------------------------------------------------------------------------
        # Regression, absolute error
        # -----------------------------------------------------------------------------
        data = load_diabetes()

        icp = IcpRegressor(
            RegressorNc(
                RegressorAdapter(RandomForestRegressor(n_estimators=100)),
                AbsErrorErrFunc()))
        icp_cv = RegIcpCvHelper(icp)

        scores = cross_val_score(
            icp_cv,
            data.data,
            data.target,
            iterations=5,
            folds=5,
            scoring_funcs=[reg_mean_errors, reg_median_size],
            significance_levels=[0.05, 0.1, 0.2],
        )

        print("Absolute error regression: diabetes")
        scores = scores.drop(["fold", "iter"], axis=1)
        print(scores.groupby(["significance"]).mean())

        # -----------------------------------------------------------------------------
        # Regression, normalized absolute error
        # -----------------------------------------------------------------------------
        data = load_diabetes()

        underlying_model = RegressorAdapter(
            RandomForestRegressor(n_estimators=100))
        normalizer_model = RegressorAdapter(
            RandomForestRegressor(n_estimators=100))
        normalizer = RegressorNormalizer(underlying_model, normalizer_model,
                                         AbsErrorErrFunc())
        nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer)

        icp = IcpRegressor(nc)
        icp_cv = RegIcpCvHelper(icp)

        scores = cross_val_score(
            icp_cv,
            data.data,
            data.target,
            iterations=5,
            folds=5,
            scoring_funcs=[reg_mean_errors, reg_median_size],
            significance_levels=[0.05, 0.1, 0.2],
        )

        print("Normalized absolute error regression: diabetes")
        scores = scores.drop(["fold", "iter"], axis=1)
        print(scores.groupby(["significance"]).mean())

        # -----------------------------------------------------------------------------
        # Regression, normalized signed error
        # -----------------------------------------------------------------------------
        data = load_diabetes()

        icp = IcpRegressor(
            RegressorNc(
                RegressorAdapter(RandomForestRegressor(n_estimators=100)),
                SignErrorErrFunc()))
        icp_cv = RegIcpCvHelper(icp)

        scores = cross_val_score(
            icp_cv,
            data.data,
            data.target,
            iterations=5,
            folds=5,
            scoring_funcs=[reg_mean_errors, reg_median_size],
            significance_levels=[0.05, 0.1, 0.2],
        )

        print("Signed error regression: diabetes")
        scores = scores.drop(["fold", "iter"], axis=1)
        print(scores.groupby(["significance"]).mean())

        # -----------------------------------------------------------------------------
        # Regression, signed error
        # -----------------------------------------------------------------------------
        data = load_diabetes()

        underlying_model = RegressorAdapter(
            RandomForestRegressor(n_estimators=100))
        normalizer_model = RegressorAdapter(
            RandomForestRegressor(n_estimators=100))

        # The normalization model can use a different error function than is
        # used to measure errors on the underlying model
        normalizer = RegressorNormalizer(underlying_model, normalizer_model,
                                         AbsErrorErrFunc())
        nc = RegressorNc(underlying_model, SignErrorErrFunc(), normalizer)

        icp = IcpRegressor(nc)
        icp_cv = RegIcpCvHelper(icp)

        scores = cross_val_score(
            icp_cv,
            data.data,
            data.target,
            iterations=5,
            folds=5,
            scoring_funcs=[reg_mean_errors, reg_median_size],
            significance_levels=[0.05, 0.1, 0.2],
        )

        print("Normalized signed error regression: diabetes")
        scores = scores.drop(["fold", "iter"], axis=1)
        print(scores.groupby(["significance"]).mean())
Exemplo n.º 13
0
idx = np.random.permutation(data.target.size)
train = idx[:int(2 * idx.size / 3)]
test = idx[int(2 * idx.size / 3):]

truth = data.target[test]
columns = ['min', 'max', 'truth']
significance = 0.1

# -----------------------------------------------------------------------------
# Define models
# -----------------------------------------------------------------------------

models = {
    'ACP-RandomSubSampler':
    AggregatedCp(
        IcpRegressor(RegressorNc(RegressorAdapter(DecisionTreeRegressor()))),
        RandomSubSampler()),
    'ACP-CrossSampler':
    AggregatedCp(
        IcpRegressor(RegressorNc(RegressorAdapter(DecisionTreeRegressor()))),
        CrossSampler()),
    'ACP-BootstrapSampler':
    AggregatedCp(
        IcpRegressor(RegressorNc(RegressorAdapter(DecisionTreeRegressor()))),
        BootstrapSampler())
}

# -----------------------------------------------------------------------------
# Train, predict and evaluate
# -----------------------------------------------------------------------------
for name, model in models.iteritems():
Exemplo n.º 14
0
from nonconformist.nc import RegressorNc, abs_error, abs_error_inv


def split_data(data, n_train, n_test):
    n_train = n_train*len(data)//(n_train+n_test)
    n_test = len(data)-n_train
    ind = np.random.permutation(len(data))
    return data[ind[:n_train]], data[ind[n_train:n_train+n_test]]

data = Orange.data.Table("auto-mpg")
imp = Impute()
data = imp(data)

for sig in np.linspace(0.01, 0.1, 10):
    errs, szs = [], []
    for rep in range(10):
        train, test = split_data(data, 2, 1)
        train, calib = split_data(train, 2, 1)

        icp = IcpRegressor(RegressorNc(DecisionTreeRegressor(), abs_error, abs_error_inv))
        icp.fit(train.X, train.Y)
        icp.calibrate(calib.X, calib.Y)
        pred = icp.predict(test.X, significance=sig)

        acc = sum(p[0] <= y <= p[1] for p, y in zip(pred, test.Y))/len(pred)
        err = 1-acc
        sz = sum(p[1]-p[0] for p in pred)/len(pred)
        errs.append(err)
        szs.append(sz)
    print(sig, np.mean(errs), np.mean(szs))
def run_equalized_coverage_experiment(dataset_name,
                                      method,
                                      seed,
                                      save_to_csv=True,
                                      test_ratio=0.2):

    random_state_train_test = seed
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

    if os.path.isdir('/scratch'):
        local_machine = 0
    else:
        local_machine = 1

    if local_machine:
        dataset_base_path = '/Users/romano/mydata/regression_data/'
    else:
        dataset_base_path = '/scratch/users/yromano/data/regression_data/'

    # desired miscoverage error
    alpha = 0.1

    # desired quanitile levels
    quantiles = [0.05, 0.95]

    # name of dataset
    dataset_name_group_0 = dataset_name + "_non_white"
    dataset_name_group_1 = dataset_name + "_white"

    # load the dataset
    X, y = datasets.GetDataset(dataset_name, dataset_base_path)

    # divide the dataset into test and train based on the test_ratio parameter
    x_train, x_test, y_train, y_test = train_test_split(
        X, y, test_size=test_ratio, random_state=random_state_train_test)

    # In[2]:

    # compute input dimensions
    n_train = x_train.shape[0]
    in_shape = x_train.shape[1]

    # divide the data into proper training set and calibration set
    idx = np.random.permutation(n_train)
    n_half = int(np.floor(n_train / 2))
    idx_train, idx_cal = idx[:n_half], idx[n_half:2 * n_half]

    # zero mean and unit variance scaling
    scalerX = StandardScaler()
    scalerX = scalerX.fit(x_train[idx_train])

    # scale
    x_train = scalerX.transform(x_train)
    x_test = scalerX.transform(x_test)

    y_train = np.log(1.0 + y_train)
    y_test = np.log(1.0 + y_test)

    # reshape the data
    x_train = np.asarray(x_train)
    y_train = np.squeeze(np.asarray(y_train))
    x_test = np.asarray(x_test)
    y_test = np.squeeze(np.asarray(y_test))

    # display basic information
    print("Dataset: %s" % (dataset_name))
    print(
        "Dimensions: train set (n=%d, p=%d) ; test set (n=%d, p=%d)" %
        (x_train.shape[0], x_train.shape[1], x_test.shape[0], x_test.shape[1]))

    # In[3]:
    dataset_name_vec = []
    method_vec = []
    coverage_vec = []
    length_vec = []
    seed_vec = []
    test_ratio_vec = []

    if method == "net":

        # pytorch's optimizer object
        nn_learn_func = torch.optim.Adam

        # number of epochs
        epochs = 1000

        # learning rate
        lr = 0.0005

        # mini-batch size
        batch_size = 64

        # hidden dimension of the network
        hidden_size = 64

        # dropout regularization rate
        dropout = 0.1

        # weight decay regularization
        wd = 1e-6

        # ratio of held-out data, used in cross-validation
        cv_test_ratio = 0.1

        # seed for splitting the data in cross-validation.
        # Also used as the seed in quantile random forests function
        cv_random_state = 1

        # In[4]:

        model = helper.MSENet_RegressorAdapter(model=None,
                                               fit_params=None,
                                               in_shape=in_shape,
                                               hidden_size=hidden_size,
                                               learn_func=nn_learn_func,
                                               epochs=epochs,
                                               batch_size=batch_size,
                                               dropout=dropout,
                                               lr=lr,
                                               wd=wd,
                                               test_ratio=cv_test_ratio,
                                               random_state=cv_random_state)

        nc = RegressorNc(model, SignErrorErrFunc())

        y_lower, y_upper = helper.run_icp(nc, x_train, y_train, x_test,
                                          idx_train, idx_cal, alpha)

        method_name = "Marginal Conformal Neural Network"

        # compute and print average coverage and average length
        coverage_sample, length_sample = helper.compute_coverage_per_sample(
            y_test, y_lower, y_upper, alpha, method_name, x_test, condition)

        append_statistics(coverage_sample, length_sample, method_name,
                          dataset_name_vec, method_vec, coverage_vec,
                          length_vec, seed_vec, test_ratio_vec, seed,
                          test_ratio, dataset_name_group_0,
                          dataset_name_group_1)

        # In[]

        model = helper.MSENet_RegressorAdapter(model=None,
                                               fit_params=None,
                                               in_shape=in_shape,
                                               hidden_size=hidden_size,
                                               learn_func=nn_learn_func,
                                               epochs=epochs,
                                               batch_size=batch_size,
                                               dropout=dropout,
                                               lr=lr,
                                               wd=wd,
                                               test_ratio=cv_test_ratio,
                                               random_state=cv_random_state)
        nc = RegressorNc(model, SignErrorErrFunc())

        y_lower, y_upper = helper.run_icp(nc, x_train, y_train, x_test,
                                          idx_train, idx_cal, alpha, condition)

        method_name = "Conditional Conformal Neural Network (joint)"

        # compute and print average coverage and average length
        coverage_sample, length_sample = helper.compute_coverage_per_sample(
            y_test, y_lower, y_upper, alpha, method_name, x_test, condition)

        append_statistics(coverage_sample, length_sample, method_name,
                          dataset_name_vec, method_vec, coverage_vec,
                          length_vec, seed_vec, test_ratio_vec, seed,
                          test_ratio, dataset_name_group_0,
                          dataset_name_group_1)

        # In[6]

        category_map = np.array([
            condition((x_train[i, :], None)) for i in range(x_train.shape[0])
        ])
        categories = np.unique(category_map)

        estimator_list = []
        nc_list = []

        for i in range(len(categories)):

            # define a QRF model per group
            estimator_list.append(
                helper.MSENet_RegressorAdapter(model=None,
                                               fit_params=None,
                                               in_shape=in_shape,
                                               hidden_size=hidden_size,
                                               learn_func=nn_learn_func,
                                               epochs=epochs,
                                               batch_size=batch_size,
                                               dropout=dropout,
                                               lr=lr,
                                               wd=wd,
                                               test_ratio=cv_test_ratio,
                                               random_state=cv_random_state))

            # define the CQR object
            nc_list.append(RegressorNc(estimator_list[i], SignErrorErrFunc()))

        # run CQR procedure
        y_lower, y_upper = helper.run_icp_sep(nc_list, x_train, y_train,
                                              x_test, idx_train, idx_cal,
                                              alpha, condition)

        method_name = "Conditional Conformal Neural Network (groupwise)"

        # compute and print average coverage and average length
        coverage_sample, length_sample = helper.compute_coverage_per_sample(
            y_test, y_lower, y_upper, alpha, method_name, x_test, condition)

        append_statistics(coverage_sample, length_sample, method_name,
                          dataset_name_vec, method_vec, coverage_vec,
                          length_vec, seed_vec, test_ratio_vec, seed,
                          test_ratio, dataset_name_group_0,
                          dataset_name_group_1)

    # In[]

    if method == "qnet":

        # pytorch's optimizer object
        nn_learn_func = torch.optim.Adam

        # number of epochs
        epochs = 1000

        # learning rate
        lr = 0.0005

        # mini-batch size
        batch_size = 64

        # hidden dimension of the network
        hidden_size = 64

        # dropout regularization rate
        dropout = 0.1

        # weight decay regularization
        wd = 1e-6

        # desired quantiles
        quantiles_net = [0.05, 0.95]

        # ratio of held-out data, used in cross-validation
        cv_test_ratio = 0.1

        # seed for splitting the data in cross-validation.
        # Also used as the seed in quantile random forests function
        cv_random_state = 1

        # In[7]:

        # define quantile neural network model
        quantile_estimator = helper.AllQNet_RegressorAdapter(
            model=None,
            fit_params=None,
            in_shape=in_shape,
            hidden_size=hidden_size,
            quantiles=quantiles_net,
            learn_func=nn_learn_func,
            epochs=epochs,
            batch_size=batch_size,
            dropout=dropout,
            lr=lr,
            wd=wd,
            test_ratio=cv_test_ratio,
            random_state=cv_random_state,
            use_rearrangement=False)

        # define the CQR object, computing the absolute residual error of points
        # located outside the estimated quantile neural network band
        nc = RegressorNc(quantile_estimator, QuantileRegAsymmetricErrFunc())

        # run CQR procedure
        y_lower, y_upper = helper.run_icp(nc, x_train, y_train, x_test,
                                          idx_train, idx_cal, alpha)

        method_name = "Marginal CQR Neural Network"

        # compute and print average coverage and average length
        coverage_sample, length_sample = helper.compute_coverage_per_sample(
            y_test, y_lower, y_upper, alpha, method_name, x_test, condition)

        append_statistics(coverage_sample, length_sample, method_name,
                          dataset_name_vec, method_vec, coverage_vec,
                          length_vec, seed_vec, test_ratio_vec, seed,
                          test_ratio, dataset_name_group_0,
                          dataset_name_group_1)

        # In[]

        # define qnet model
        quantile_estimator = helper.AllQNet_RegressorAdapter(
            model=None,
            fit_params=None,
            in_shape=in_shape,
            hidden_size=hidden_size,
            quantiles=quantiles_net,
            learn_func=nn_learn_func,
            epochs=epochs,
            batch_size=batch_size,
            dropout=dropout,
            lr=lr,
            wd=wd,
            test_ratio=cv_test_ratio,
            random_state=cv_random_state,
            use_rearrangement=False)

        # define the CQR object
        nc = RegressorNc(quantile_estimator, QuantileRegAsymmetricErrFunc())

        # run CQR procedure
        y_lower, y_upper = helper.run_icp(nc, x_train, y_train, x_test,
                                          idx_train, idx_cal, alpha, condition)

        method_name = "Conditional CQR Neural Network (joint)"

        # compute and print average coverage and average length
        coverage_sample, length_sample = helper.compute_coverage_per_sample(
            y_test, y_lower, y_upper, alpha, method_name, x_test, condition)

        append_statistics(coverage_sample, length_sample, method_name,
                          dataset_name_vec, method_vec, coverage_vec,
                          length_vec, seed_vec, test_ratio_vec, seed,
                          test_ratio, dataset_name_group_0,
                          dataset_name_group_1)

        # In[6]

        category_map = np.array([
            condition((x_train[i, :], None)) for i in range(x_train.shape[0])
        ])
        categories = np.unique(category_map)

        quantile_estimator_list = []
        nc_list = []

        for i in range(len(categories)):

            # define a QRF model per group
            quantile_estimator_list.append(
                helper.AllQNet_RegressorAdapter(model=None,
                                                fit_params=None,
                                                in_shape=in_shape,
                                                hidden_size=hidden_size,
                                                quantiles=quantiles_net,
                                                learn_func=nn_learn_func,
                                                epochs=epochs,
                                                batch_size=batch_size,
                                                dropout=dropout,
                                                lr=lr,
                                                wd=wd,
                                                test_ratio=cv_test_ratio,
                                                random_state=cv_random_state,
                                                use_rearrangement=False))

            # append a CQR object
            nc_list.append(
                RegressorNc(quantile_estimator_list[i],
                            QuantileRegAsymmetricErrFunc()))

        # run CQR procedure
        y_lower, y_upper = helper.run_icp_sep(nc_list, x_train, y_train,
                                              x_test, idx_train, idx_cal,
                                              alpha, condition)

        method_name = "Conditional CQR Neural Network (groupwise)"

        # compute and print average coverage and average length
        coverage_sample, length_sample = helper.compute_coverage_per_sample(
            y_test, y_lower, y_upper, alpha, method_name, x_test, condition)

        append_statistics(coverage_sample, length_sample, method_name,
                          dataset_name_vec, method_vec, coverage_vec,
                          length_vec, seed_vec, test_ratio_vec, seed,
                          test_ratio, dataset_name_group_0,
                          dataset_name_group_1)

    # In[]

    ############### Summary

    coverage_str = 'Coverage (expected ' + str(100 - alpha * 100) + '%)'

    if save_to_csv:

        outdir = './results/'
        if not os.path.exists(outdir):
            os.mkdir(outdir)

        out_name = outdir + 'results.csv'

        df = pd.DataFrame({
            'name': dataset_name_vec,
            'method': method_vec,
            coverage_str: coverage_vec,
            'Avg. Length': length_vec,
            'seed': seed_vec,
            'train test ratio': test_ratio_vec
        })

        if os.path.isfile(out_name):
            df2 = pd.read_csv(out_name)
            df = pd.concat([df2, df], ignore_index=True)

        df.to_csv(out_name, index=False)
Exemplo n.º 16
0
    def build(self):
        '''Build a new RF model with the X and Y numpy matrices '''

        if self.failed:
            return False

        X = self.X.copy()
        Y = self.Y.copy()

        results = []

        results.append(('nobj', 'number of objects', self.nobj))
        results.append(('nvarx', 'number of predictor variables', self.nvarx))

        if self.cv:
            self.cv = getCrossVal(self.cv,
                                  self.estimator_parameters["random_state"],
                                  self.n, self.p)
        if self.tune:
            if self.quantitative:
                self.optimize(X, Y, RandomForestRegressor(),
                              self.tune_parameters)
                results.append(
                    ('model', 'model type', 'RF quantitative (optimized)'))
            else:
                self.optimize(X, Y, RandomForestClassifier(),
                              self.tune_parameters)
                results.append(
                    ('model', 'model type', 'RF qualitative (optimized)'))
        else:
            if self.quantitative:
                log.info("Building Quantitative RF model")
                self.estimator_parameters.pop('class_weight', None)

                self.estimator = RandomForestRegressor(
                    **self.estimator_parameters)
                results.append(('model', 'model type', 'RF quantitative'))

            else:
                log.info("Building Qualitative RF model")
                self.estimator = RandomForestClassifier(
                    **self.estimator_parameters)
                results.append(('model', 'model type', 'RF qualitative'))

        if self.conformal:
            if self.quantitative:
                underlying_model = RegressorAdapter(self.estimator)
                normalizing_model = RegressorAdapter(
                    KNeighborsRegressor(n_neighbors=5))
                normalizing_model = RegressorAdapter(self.estimator)
                normalizer = RegressorNormalizer(underlying_model,
                                                 normalizing_model,
                                                 AbsErrorErrFunc())
                nc = RegressorNc(underlying_model, AbsErrorErrFunc(),
                                 normalizer)
                # self.conformal_pred = AggregatedCp(IcpRegressor(RegressorNc(RegressorAdapter(self.estimator))),
                #                                   BootstrapSampler())

                self.conformal_pred = AggregatedCp(IcpRegressor(nc),
                                                   BootstrapSampler())
                self.conformal_pred.fit(X, Y)
                # overrides non-conformal
                results.append(
                    ('model', 'model type', 'conformal RF quantitative'))

            else:
                self.conformal_pred = AggregatedCp(
                    IcpClassifier(
                        ClassifierNc(ClassifierAdapter(self.estimator),
                                     MarginErrFunc())), BootstrapSampler())
                self.conformal_pred.fit(X, Y)
                # overrides non-conformal
                results.append(
                    ('model', 'model type', 'conformal RF qualitative'))

        self.estimator.fit(X, Y)

        return True, results


#### Overriding of parent methods

# def CF_quantitative_validation(self):
#     ''' performs validation for conformal quantitative models '''

# def CF_qualitative_validation(self):
#     ''' performs validation for conformal qualitative models '''

# def quantitativeValidation(self):
#     ''' performs validation for quantitative models '''

# def qualitativeValidation(self):
#     ''' performs validation for qualitative models '''

# def validate(self):
#     ''' Validates the model and computes suitable model quality scoring values'''

# def optimize(self, X, Y, estimator, tune_parameters):
#     ''' optimizes a model using a grid search over a range of values for diverse parameters'''

# def regularProject(self, Xb, results):
#     ''' projects a collection of query objects in a regular model, for obtaining predictions '''

# def conformalProject(self, Xb, results):
#     ''' projects a collection of query objects in a conformal model, for obtaining predictions '''

# def project(self, Xb, results):
#     ''' Uses the X matrix provided as argument to predict Y'''
Exemplo n.º 17
0
def run_experiment(dataset_name,
                   test_method,
                   random_state_train_test,
                   save_to_csv=True):
    """ Estimate prediction intervals and print the average length and coverage

    Parameters
    ----------

    dataset_name : array of strings, list of datasets
    test_method  : string, method to be tested, estimating
                   the 90% prediction interval
    random_state_train_test : integer, random seed to be used
    save_to_csv : boolean, save average length and coverage to csv (True)
                  or not (False)

    """

    dataset_name_vec = []
    method_vec = []
    coverage_vec = []
    length_vec = []
    seed_vec = []

    seed = random_state_train_test
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

    coverage_linear=0
    length_linear=0
    coverage_linear_local=0
    length_linear_local=0

    coverage_net=0
    length_net=0
    coverage_net_local=0
    length_net_local=0

    coverage_forest=0
    length_forest=0
    coverage_forest_local=0
    length_forest_local=0

    coverage_cp_qnet=0
    length_cp_qnet=0
    coverage_qnet=0
    length_qnet=0

    coverage_cp_sign_qnet=0
    length_cp_sign_qnet=0

    coverage_cp_re_qnet=0
    length_cp_re_qnet=0
    coverage_re_qnet=0
    length_re_qnet=0

    coverage_cp_sign_re_qnet=0
    length_cp_sign_re_qnet=0

    coverage_cp_qforest=0
    length_cp_qforest=0
    coverage_qforest=0
    length_qforest=0

    coverage_cp_sign_qforest=0
    length_cp_sign_qforest=0


    # determines the size of test set
    test_ratio = 0.2

    # conformal prediction miscoverage level
    significance = 0.1
    # desired quantile levels, used by the quantile regression methods
    quantiles = [0.05, 0.95]

    # Random forests parameters (shared by conditional quantile random forests
    # and conditional mean random forests regression).
    n_estimators = 1000 # usual random forests n_estimators parameter
    min_samples_leaf = 1 # default parameter of sklearn

    # Quantile random forests parameters.
    # See QuantileForestRegressorAdapter class for more details
    quantiles_forest = [5, 95]
    CV_qforest = True
    coverage_factor = 0.85
    cv_test_ratio = 0.05
    cv_random_state = 1
    cv_range_vals = 30
    cv_num_vals = 10

    # Neural network parameters  (shared by conditional quantile neural network
    # and conditional mean neural network regression)
    # See AllQNet_RegressorAdapter and MSENet_RegressorAdapter in helper.py
    nn_learn_func = torch.optim.Adam
    epochs = 1000
    lr = 0.0005
    hidden_size = 64
    batch_size = 64
    dropout = 0.1
    wd = 1e-6

    # Ask for a reduced coverage when tuning the network parameters by
    # cross-validation to avoid too conservative initial estimation of the
    # prediction interval. This estimation will be conformalized by CQR.
    quantiles_net = [0.1, 0.9]


    # local conformal prediction parameter.
    # See RegressorNc class for more details.
    beta = 1
    beta_net = 1

    # local conformal prediction parameter. The local ridge regression method
    # uses nearest neighbor regression as the MAD estimator.
    # Number of neighbors used by nearest neighbor regression.
    n_neighbors = 11

    print(dataset_name)
    sys.stdout.flush()

    try:
        # load the dataset
        X, y = datasets.GetDataset(dataset_name, base_dataset_path)
    except:
        print("CANNOT LOAD DATASET!")
        return

    # Dataset is divided into test and train data based on test_ratio parameter
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=test_ratio,
                                                        random_state=random_state_train_test)

    # zero mean and unit variance scaling of the train and test features
    scalerX = StandardScaler()
    scalerX = scalerX.fit(X_train)
    X_train = scalerX.transform(X_train)
    X_test = scalerX.transform(X_test)

    # scale the labels by dividing each by the mean absolute response
    max_ytrain = np.mean(np.abs(y_train))
    y_train = y_train/max_ytrain
    y_test = y_test/max_ytrain

    # fit a simple ridge regression model (sanity check)
    model = linear_model.RidgeCV()
    model = model.fit(X_train, y_train)
    predicted_data = model.predict(X_test).astype(np.float32)

    # calculate the normalized mean squared error
    print("Ridge relative error: %f" % (np.sum((y_test-predicted_data)**2)/np.sum(y_test**2)))
    sys.stdout.flush()

    # reshape the data
    X_train = np.asarray(X_train)
    y_train = np.squeeze(np.asarray(y_train))
    X_test = np.asarray(X_test)
    y_test = np.squeeze(np.asarray(y_test))

    # input dimensions
    n_train = X_train.shape[0]
    in_shape = X_train.shape[1]

    print("Size: train (%d, %d), test (%d, %d)" % (X_train.shape[0], X_train.shape[1], X_test.shape[0], X_test.shape[1]))
    sys.stdout.flush()

    # set seed for splitting the data into proper train and calibration
    np.random.seed(seed)
    idx = np.random.permutation(n_train)

    # divide the data into proper training set and calibration set
    n_half = int(np.floor(n_train/2))
    idx_train, idx_cal = idx[:n_half], idx[n_half:2*n_half]

    ######################## Linear

    if 'linear' == test_method:

        model = linear_model.RidgeCV()
        nc = RegressorNc(model)

        y_lower, y_upper = helper.run_icp(nc, X_train, y_train, X_test, idx_train, idx_cal, significance)
        if plot_results:
            helper.plot_func_data(y_test,y_lower,y_upper,"Ridge")
        coverage_linear, length_linear = helper.compute_coverage(y_test,y_lower,y_upper,significance,"Ridge")

        dataset_name_vec.append(dataset_name)
        method_vec.append('Ridge')
        coverage_vec.append(coverage_linear)
        length_vec.append(length_linear)
        seed_vec.append(seed)

        nc = NcFactory.create_nc(
            linear_model.RidgeCV(),
            normalizer_model=KNeighborsRegressor(n_neighbors=n_neighbors)
        )

        y_lower, y_upper = helper.run_icp(nc, X_train, y_train, X_test, idx_train, idx_cal, significance)
        if plot_results:
            helper.plot_func_data(y_test,y_lower,y_upper,"Ridge-L")
        coverage_linear_local, length_linear_local = helper.compute_coverage(y_test,y_lower,y_upper,significance,"Ridge-L")

        dataset_name_vec.append(dataset_name)
        method_vec.append('Ridge-L')
        coverage_vec.append(coverage_linear_local)
        length_vec.append(length_linear_local)
        seed_vec.append(seed)

    ######################### Neural net

    if 'neural_net' == test_method:

        model = helper.MSENet_RegressorAdapter(model=None,
                                               fit_params=None,
                                               in_shape = in_shape,
                                               hidden_size = hidden_size,
                                               learn_func = nn_learn_func,
                                               epochs = epochs,
                                               batch_size=batch_size,
                                               dropout=dropout,
                                               lr=lr,
                                               wd=wd,
                                               test_ratio=cv_test_ratio,
                                               random_state=cv_random_state)
        nc = RegressorNc(model)

        y_lower, y_upper = helper.run_icp(nc, X_train, y_train, X_test, idx_train, idx_cal, significance)
        if plot_results:
            helper.plot_func_data(y_test,y_lower,y_upper,"Net")
        coverage_net, length_net = helper.compute_coverage(y_test,y_lower,y_upper,significance,"Net")

        dataset_name_vec.append(dataset_name)
        method_vec.append('Net')
        coverage_vec.append(coverage_net)
        length_vec.append(length_net)
        seed_vec.append(seed)

        normalizer_adapter = helper.MSENet_RegressorAdapter(model=None,
                                                            fit_params=None,
                                                            in_shape = in_shape,
                                                            hidden_size = hidden_size,
                                                            learn_func = nn_learn_func,
                                                            epochs = epochs,
                                                            batch_size=batch_size,
                                                            dropout=dropout,
                                                            lr=lr,
                                                            wd=wd,
                                                            test_ratio=cv_test_ratio,
                                                            random_state=cv_random_state)
        adapter = helper.MSENet_RegressorAdapter(model=None,
                                                fit_params=None,
                                                in_shape = in_shape,
                                                hidden_size = hidden_size,
                                                learn_func = nn_learn_func,
                                                epochs = epochs,
                                                batch_size=batch_size,
                                                dropout=dropout,
                                                lr=lr,
                                                wd=wd,
                                                test_ratio=cv_test_ratio,
                                                random_state=cv_random_state)

        normalizer = RegressorNormalizer(adapter,
                                         normalizer_adapter,
                                         AbsErrorErrFunc())
        nc = RegressorNc(adapter, AbsErrorErrFunc(), normalizer, beta=beta_net)
        y_lower, y_upper = helper.run_icp(nc, X_train, y_train, X_test, idx_train, idx_cal, significance)
        if plot_results:
            helper.plot_func_data(y_test,y_lower,y_upper,"Net-L")
        coverage_net_local, length_net_local = helper.compute_coverage(y_test,y_lower,y_upper,significance,"Net-L")

        dataset_name_vec.append(dataset_name)
        method_vec.append('Net-L')
        coverage_vec.append(coverage_net_local)
        length_vec.append(length_net_local)
        seed_vec.append(seed)

    ################## Random Forest

    if 'random_forest' == test_method:

        model = RandomForestRegressor(n_estimators=n_estimators,min_samples_leaf=min_samples_leaf, random_state=0)
        nc = RegressorNc(model, AbsErrorErrFunc())

        y_lower, y_upper = helper.run_icp(nc, X_train, y_train, X_test, idx_train, idx_cal, significance)
        if plot_results:
            helper.plot_func_data(y_test,y_lower,y_upper,"RF")
        coverage_forest, length_forest = helper.compute_coverage(y_test,y_lower,y_upper,significance,"RF")

        dataset_name_vec.append(dataset_name)
        method_vec.append('RF')
        coverage_vec.append(coverage_forest)
        length_vec.append(length_forest)
        seed_vec.append(seed)

        normalizer_adapter = RandomForestRegressor(n_estimators=n_estimators, min_samples_leaf=min_samples_leaf, random_state=0)
        adapter = RandomForestRegressor(n_estimators=n_estimators, min_samples_leaf=min_samples_leaf, random_state=0)
        normalizer = RegressorNormalizer(adapter,
                                         normalizer_adapter,
                                         AbsErrorErrFunc())
        nc = RegressorNc(adapter, AbsErrorErrFunc(), normalizer, beta=beta)

        y_lower, y_upper = helper.run_icp(nc, X_train, y_train, X_test, idx_train, idx_cal, significance)
        if plot_results:
            helper.plot_func_data(y_test,y_lower,y_upper,"RF-L")
        coverage_forest_local, length_forest_local = helper.compute_coverage(y_test,y_lower,y_upper,significance,"RF-L")

        dataset_name_vec.append(dataset_name)
        method_vec.append('RF-L')
        coverage_vec.append(coverage_forest_local)
        length_vec.append(length_forest_local)
        seed_vec.append(seed)

    ################## Quantile Net

    if 'quantile_net' == test_method:

        model_full = helper.AllQNet_RegressorAdapter(model=None,
                                             fit_params=None,
                                             in_shape = in_shape,
                                             hidden_size = hidden_size,
                                             quantiles = quantiles,
                                             learn_func = nn_learn_func,
                                             epochs = epochs,
                                             batch_size=batch_size,
                                             dropout=dropout,
                                             lr=lr,
                                             wd=wd,
                                             test_ratio=cv_test_ratio,
                                             random_state=cv_random_state,
                                             use_rearrangement=False)
        model_full.fit(X_train, y_train)
        tmp = model_full.predict(X_test)
        y_lower = tmp[:,0]
        y_upper = tmp[:,1]
        if plot_results:
            helper.plot_func_data(y_test,y_lower,y_upper,"QNet")
        coverage_qnet, length_qnet = helper.compute_coverage(y_test,y_lower,y_upper,significance,"QNet")

        dataset_name_vec.append(dataset_name)
        method_vec.append('QNet')
        coverage_vec.append(coverage_qnet)
        length_vec.append(length_qnet)
        seed_vec.append(seed)

    if 'cqr_quantile_net' == test_method:

        model = helper.AllQNet_RegressorAdapter(model=None,
                                             fit_params=None,
                                             in_shape = in_shape,
                                             hidden_size = hidden_size,
                                             quantiles = quantiles_net,
                                             learn_func = nn_learn_func,
                                             epochs = epochs,
                                             batch_size=batch_size,
                                             dropout=dropout,
                                             lr=lr,
                                             wd=wd,
                                             test_ratio=cv_test_ratio,
                                             random_state=cv_random_state,
                                             use_rearrangement=False)
        nc = RegressorNc(model, QuantileRegErrFunc())

        y_lower, y_upper = helper.run_icp(nc, X_train, y_train, X_test, idx_train, idx_cal, significance)
        if plot_results:
            helper.plot_func_data(y_test,y_lower,y_upper,"CQR Net")
        coverage_cp_qnet, length_cp_qnet = helper.compute_coverage(y_test,y_lower,y_upper,significance,"CQR Net")


        dataset_name_vec.append(dataset_name)
        method_vec.append('CQR Net')
        coverage_vec.append(coverage_cp_qnet)
        length_vec.append(length_cp_qnet)
        seed_vec.append(seed)


    if 'cqr_asymmetric_quantile_net' == test_method:

        model = helper.AllQNet_RegressorAdapter(model=None,
                                             fit_params=None,
                                             in_shape = in_shape,
                                             hidden_size = hidden_size,
                                             quantiles = quantiles_net,
                                             learn_func = nn_learn_func,
                                             epochs = epochs,
                                             batch_size=batch_size,
                                             dropout=dropout,
                                             lr=lr,
                                             wd=wd,
                                             test_ratio=cv_test_ratio,
                                             random_state=cv_random_state,
                                             use_rearrangement=False)
        nc = RegressorNc(model, QuantileRegAsymmetricErrFunc())

        y_lower, y_upper = helper.run_icp(nc, X_train, y_train, X_test, idx_train, idx_cal, significance)
        if plot_results:
            helper.plot_func_data(y_test,y_lower,y_upper,"CQR Sign Net")
        coverage_cp_sign_qnet, length_cp_sign_qnet = helper.compute_coverage(y_test,y_lower,y_upper,significance,"CQR Sign Net")


        dataset_name_vec.append(dataset_name)
        method_vec.append('CQR Sign Net')
        coverage_vec.append(coverage_cp_sign_qnet)
        length_vec.append(length_cp_sign_qnet)
        seed_vec.append(seed)


    ################### Rearrangement Quantile Net

    if 'rearrangement' == test_method:

        model_full = helper.AllQNet_RegressorAdapter(model=None,
                                             fit_params=None,
                                             in_shape = in_shape,
                                             hidden_size = hidden_size,
                                             quantiles = quantiles,
                                             learn_func = nn_learn_func,
                                             epochs = epochs,
                                             batch_size=batch_size,
                                             dropout=dropout,
                                             lr=lr,
                                             wd=wd,
                                             test_ratio=cv_test_ratio,
                                             random_state=cv_random_state,
                                             use_rearrangement=True)
        model_full.fit(X_train, y_train)
        tmp = model_full.predict(X_test)
        y_lower = tmp[:,0]
        y_upper = tmp[:,1]
        if plot_results:
            helper.plot_func_data(y_test,y_lower,y_upper,"Rearrange QNet")
        coverage_re_qnet, length_re_qnet = helper.compute_coverage(y_test,y_lower,y_upper,significance,"Rearrange QNet")

        dataset_name_vec.append(dataset_name)
        method_vec.append('Rearrange QNet')
        coverage_vec.append(coverage_re_qnet)
        length_vec.append(length_re_qnet)
        seed_vec.append(seed)

    if 'cqr_rearrangement' == test_method:

        model = helper.AllQNet_RegressorAdapter(model=None,
                                                 fit_params=None,
                                                 in_shape = in_shape,
                                                 hidden_size = hidden_size,
                                                 quantiles = quantiles_net,
                                                 learn_func = nn_learn_func,
                                                 epochs = epochs,
                                                 batch_size=batch_size,
                                                 dropout=dropout,
                                                 lr=lr,
                                                 wd=wd,
                                                 test_ratio=cv_test_ratio,
                                                 random_state=cv_random_state,
                                                 use_rearrangement=True)
        nc = RegressorNc(model, QuantileRegErrFunc())

        y_lower, y_upper = helper.run_icp(nc, X_train, y_train, X_test, idx_train, idx_cal, significance)
        if plot_results:
            helper.plot_func_data(y_test,y_lower,y_upper,"Rearrange CQR Net")
        coverage_cp_re_qnet, length_cp_re_qnet = helper.compute_coverage(y_test,y_lower,y_upper,significance,"Rearrange CQR Net")


        dataset_name_vec.append(dataset_name)
        method_vec.append('Rearrange CQR Net')
        coverage_vec.append(coverage_cp_re_qnet)
        length_vec.append(length_cp_re_qnet)
        seed_vec.append(seed)


    if 'cqr_asymmetric_rearrangement' == test_method:

        model = helper.AllQNet_RegressorAdapter(model=None,
                                                 fit_params=None,
                                                 in_shape = in_shape,
                                                 hidden_size = hidden_size,
                                                 quantiles = quantiles_net,
                                                 learn_func = nn_learn_func,
                                                 epochs = epochs,
                                                 batch_size=batch_size,
                                                 dropout=dropout,
                                                 lr=lr,
                                                 wd=wd,
                                                 test_ratio=cv_test_ratio,
                                                 random_state=cv_random_state,
                                                 use_rearrangement=True)
        nc = RegressorNc(model, QuantileRegAsymmetricErrFunc())

        y_lower, y_upper = helper.run_icp(nc, X_train, y_train, X_test, idx_train, idx_cal, significance)
        if plot_results:
            helper.plot_func_data(y_test,y_lower,y_upper,"Rearrange CQR Sign Net")
        coverage_cp_sign_re_qnet, length_cp_sign_re_qnet = helper.compute_coverage(y_test,y_lower,y_upper,significance,"Rearrange CQR Net")


        dataset_name_vec.append(dataset_name)
        method_vec.append('Rearrange CQR Sign Net')
        coverage_vec.append(coverage_cp_sign_re_qnet)
        length_vec.append(length_cp_sign_re_qnet)
        seed_vec.append(seed)

    ################### Quantile Random Forest

    if 'quantile_forest' == test_method:

        params_qforest = dict()
        params_qforest["random_state"] = 0
        params_qforest["min_samples_leaf"] = min_samples_leaf
        params_qforest["n_estimators"] = n_estimators
        params_qforest["max_features"] = X_train.shape[1]

        params_qforest["CV"]=False
        params_qforest["coverage_factor"] = coverage_factor
        params_qforest["test_ratio"]=cv_test_ratio
        params_qforest["random_state"]=cv_random_state
        params_qforest["range_vals"] = cv_range_vals
        params_qforest["num_vals"] = cv_num_vals

        model_full = helper.QuantileForestRegressorAdapter(model = None,
                                                      fit_params=None,
                                                      quantiles=np.dot(100,quantiles),
                                                      params = params_qforest)
        model_full.fit(X_train, y_train)
        tmp = model_full.predict(X_test)
        y_lower = tmp[:,0]
        y_upper = tmp[:,1]
        if plot_results:
            helper.plot_func_data(y_test,y_lower,y_upper,"QRF")
        coverage_qforest, length_qforest = helper.compute_coverage(y_test,y_lower,y_upper,significance,"QRF")

        dataset_name_vec.append(dataset_name)
        method_vec.append('QRF')
        coverage_vec.append(coverage_qforest)
        length_vec.append(length_qforest)
        seed_vec.append(seed)

    if 'cqr_quantile_forest' == test_method:

        params_qforest = dict()
        params_qforest["random_state"] = 0
        params_qforest["min_samples_leaf"] = min_samples_leaf
        params_qforest["n_estimators"] = n_estimators
        params_qforest["max_features"] = X_train.shape[1]

        params_qforest["CV"]=CV_qforest
        params_qforest["coverage_factor"] = coverage_factor
        params_qforest["test_ratio"]=cv_test_ratio
        params_qforest["random_state"]=cv_random_state
        params_qforest["range_vals"] = cv_range_vals
        params_qforest["num_vals"] = cv_num_vals


        model = helper.QuantileForestRegressorAdapter(model = None,
                                                      fit_params=None,
                                                      quantiles=quantiles_forest,
                                                      params = params_qforest)

        nc = RegressorNc(model, QuantileRegErrFunc())
        y_lower, y_upper = helper.run_icp(nc, X_train, y_train, X_test, idx_train, idx_cal, significance)
        if plot_results:
            helper.plot_func_data(y_test,y_lower,y_upper,"CQR RF")
        coverage_cp_qforest, length_cp_qforest = helper.compute_coverage(y_test,y_lower,y_upper,significance,"CQR RF")

        dataset_name_vec.append(dataset_name)
        method_vec.append('CQR RF')
        coverage_vec.append(coverage_cp_qforest)
        length_vec.append(length_cp_qforest)
        seed_vec.append(seed)

    if 'cqr_asymmetric_quantile_forest' == test_method:

        params_qforest = dict()
        params_qforest["random_state"] = 0
        params_qforest["min_samples_leaf"] = min_samples_leaf
        params_qforest["n_estimators"] = n_estimators
        params_qforest["max_features"] = X_train.shape[1]

        params_qforest["CV"]=CV_qforest
        params_qforest["coverage_factor"] = coverage_factor
        params_qforest["test_ratio"]=cv_test_ratio
        params_qforest["random_state"]=cv_random_state
        params_qforest["range_vals"] = cv_range_vals
        params_qforest["num_vals"] = cv_num_vals


        model = helper.QuantileForestRegressorAdapter(model = None,
                                                      fit_params=None,
                                                      quantiles=quantiles_forest,
                                                      params = params_qforest)

        nc = RegressorNc(model, QuantileRegAsymmetricErrFunc())
        y_lower, y_upper = helper.run_icp(nc, X_train, y_train, X_test, idx_train, idx_cal, significance)
        if plot_results:
            helper.plot_func_data(y_test,y_lower,y_upper,"CQR Sign RF")
        coverage_cp_sign_qforest, length_cp_sign_qforest = helper.compute_coverage(y_test,y_lower,y_upper,significance,"CQR Sign RF")

        dataset_name_vec.append(dataset_name)
        method_vec.append('CQR Sign RF')
        coverage_vec.append(coverage_cp_sign_qforest)
        length_vec.append(length_cp_sign_qforest)
        seed_vec.append(seed)


#        tmp = model.predict(X_test)
#        y_lower = tmp[:,0]
#        y_upper = tmp[:,1]
#        if plot_results:
#            helper.plot_func_data(y_test,y_lower,y_upper,"QRF")
#        coverage_qforest, length_qforest = helper.compute_coverage(y_test,y_lower,y_upper,significance,"QRF")
#
#        dataset_name_vec.append(dataset_name)
#        method_vec.append('QRF')
#        coverage_vec.append(coverage_qforest)
#        length_vec.append(length_qforest)
#        seed_vec.append(seed)



    ############### Summary

    coverage_str = 'Coverage (expected ' + str(100 - significance*100) + '%)'
    results = np.array([[dataset_name, coverage_str, 'Avg. Length', 'Seed'],
                     ['CP Linear', coverage_linear, length_linear, seed],
                     ['CP Linear Local', coverage_linear_local, length_linear_local, seed],
                     ['CP Neural Net', coverage_net, length_net, seed],
                     ['CP Neural Net Local', coverage_net_local, length_net_local, seed],
                     ['CP Random Forest', coverage_forest, length_forest, seed],
                     ['CP Random Forest Local', coverage_forest_local, length_forest_local, seed],
                     ['CP Quantile Net', coverage_cp_qnet, length_cp_qnet, seed],
                     ['CP Asymmetric Quantile Net', coverage_cp_sign_qnet, length_cp_sign_qnet, seed],
                     ['Quantile Net', coverage_qnet, length_qnet, seed],
                     ['CP Rearrange Quantile Net', coverage_cp_re_qnet, length_cp_re_qnet, seed],
                     ['CP Asymmetric Rearrange Quantile Net', coverage_cp_sign_re_qnet, length_cp_sign_re_qnet, seed],
                     ['Rearrange Quantile Net', coverage_re_qnet, length_re_qnet, seed],
                     ['CP Quantile Random Forest', coverage_cp_qforest, length_cp_qforest, seed],
                     ['CP Asymmetric Quantile Random Forest', coverage_cp_sign_qforest, length_cp_sign_qforest, seed],
                     ['Quantile Random Forest', coverage_qforest, length_qforest, seed]])

    results_ = pd.DataFrame(data=results[1:,1:],
                      index=results[1:,0],
                      columns=results[0,1:])

    print("== SUMMARY == ")
    print("dataset name: " + dataset_name)
    print(results_)
    sys.stdout.flush()

    if save_to_csv:
        results = pd.DataFrame(results)

        outdir = './results/'
        if not os.path.exists(outdir):
            os.mkdir(outdir)

        out_name = outdir + 'results.csv'

        df = pd.DataFrame({'name': dataset_name_vec,
                           'method': method_vec,
                           coverage_str : coverage_vec,
                           'Avg. Length' : length_vec,
                           'seed': seed_vec})

        if os.path.isfile(out_name):
            df2 = pd.read_csv(out_name)
            df = pd.concat([df2, df], ignore_index=True)

        df.to_csv(out_name, index=False)
Exemplo n.º 18
0
Arquivo: RF.py Projeto: e7dal/flame
    def build(self):
        '''Build a new RF model with the X and Y numpy matrices '''

        # Make a copy of data matrices
        X = self.X.copy()
        Y = self.Y.copy()

        results = []
        results.append(('nobj', 'number of objects', self.nobj))
        results.append(('nvarx', 'number of predictor variables', self.nvarx))
        results.append(('model', 'model type', 'RF'))

        conformal = self.param.getVal('conformal')
        # If tune then call gridsearch to optimize the estimator
        if self.param.getVal('tune'):

            LOG.info("Optimizing RF estimator")

            try:
                # Check type of model
                if self.param.getVal('quantitative'):
                    self.estimator = RandomForestRegressor(
                        **self.estimator_parameters)
                    self.optimize(X, Y, self.estimator, self.tune_parameters)
                    # results.append(('model','model type','RF quantitative (optimized)'))
                else:
                    self.estimator = RandomForestClassifier(
                        **self.estimator_parameters)
                    self.optimize(X, Y, self.estimator, self.tune_parameters)
                    # results.append(('model','model type','RF qualitative (optimized)'))

            except Exception as e:
                return False, f'Exception optimizing RF estimator with exception {e}'

        else:
            try:
                if self.param.getVal('quantitative'):

                    self.estimator = RandomForestRegressor(
                        **self.estimator_parameters)

                    if not conformal:
                        LOG.info("Building Quantitative RF model")
                        # results.append(('model', 'model type', 'RF quantitative'))
                else:

                    self.estimator = RandomForestClassifier(
                        **self.estimator_parameters)

                    if not conformal:
                        LOG.info("Building Qualitative RF model")
                        # results.append(('model', 'model type', 'RF qualitative'))

                self.estimator.fit(X, Y)

            except Exception as e:
                return False, f'Exception building RF estimator with exception {e}'

        if not conformal:
            return True, results

        self.estimator_temp = copy(self.estimator)

        # Create the conformal estimator
        try:
            # Conformal regressor
            if self.param.getVal('quantitative'):
                conformal_settings = self.param.getDict('conformal_settings')
                LOG.info("Building conformal Quantitative RF model")

                underlying_model = RegressorAdapter(self.estimator_temp)
                self.normalizing_model = RegressorAdapter(
                    KNeighborsRegressor(
                        n_neighbors=conformal_settings['KNN_NN']))
                # normalizing_model = RegressorAdapter(self.estimator_temp)
                normalizer = RegressorNormalizer(underlying_model,
                                                 copy(self.normalizing_model),
                                                 AbsErrorErrFunc())
                nc = RegressorNc(underlying_model, AbsErrorErrFunc(),
                                 normalizer)

                # self.conformal_pred = AggregatedCp(IcpRegressor
                # (RegressorNc(RegressorAdapter(self.estimator))),
                #                                   BootstrapSampler())

                self.estimator = AggregatedCp(IcpRegressor(nc),
                                              BootstrapSampler())

                self.estimator.fit(X, Y)
                # results.append(('model', 'model type', 'conformal RF quantitative'))

            # Conformal classifier
            else:

                LOG.info("Building conformal Qualitative RF model")

                self.estimator = AggregatedCp(
                    IcpClassifier(
                        ClassifierNc(ClassifierAdapter(self.estimator_temp),
                                     MarginErrFunc())), BootstrapSampler())

                # Fit estimator to the data
                self.estimator.fit(X, Y)
                # results.append(('model', 'model type', 'conformal RF qualitative'))

        except Exception as e:
            return False, f'Exception building conformal RF estimator with exception {e}'

        return True, results


## Overriding of parent methods

# def CF_quantitative_validation(self):
#     ''' performs validation for conformal quantitative models '''

# def CF_qualitative_validation(self):
#     ''' performs validation for conformal qualitative models '''

# def quantitativeValidation(self):
#     ''' performs validation for quantitative models '''

# def qualitativeValidation(self):
#     ''' performs validation for qualitative models '''

# def validate(self):
#     ''' Validates the model and computes suitable model quality scoring values'''

# def optimize(self, X, Y, estimator, tune_parameters):
#     ''' optimizes a model using a grid search over a range of values for diverse parameters'''

# def regularProject(self, Xb, results):
#     ''' projects a collection of query objects in a regular model, for obtaining predictions '''

# def conformalProject(self, Xb, results):
#     ''' projects a collection of query objects in a conformal model, for obtaining predictions '''

# def project(self, Xb, results):
#     ''' Uses the X matrix provided as argument to predict Y'''
Exemplo n.º 19
0
                         iterations=5,
                         folds=5,
                         scoring_funcs=[class_mean_errors, class_avg_c],
                         significance_levels=[0.05, 0.1, 0.2])

print('Classification: iris')
scores = scores.drop(['fold', 'iter'], axis=1)
print(scores.groupby(['significance']).mean())

# -----------------------------------------------------------------------------
# Regression, absolute error
# -----------------------------------------------------------------------------
data = load_diabetes()

icp = IcpRegressor(
    RegressorNc(RegressorAdapter(RandomForestRegressor(n_estimators=100)),
                AbsErrorErrFunc()))
icp_cv = RegIcpCvHelper(icp)

scores = cross_val_score(icp_cv,
                         data.data,
                         data.target,
                         iterations=5,
                         folds=5,
                         scoring_funcs=[reg_mean_errors, reg_median_size],
                         significance_levels=[0.05, 0.1, 0.2])

print('Absolute error regression: diabetes')
scores = scores.drop(['fold', 'iter'], axis=1)
print(scores.groupby(['significance']).mean())

# -----------------------------------------------------------------------------
Exemplo n.º 20
0
def cv(df, parameters):
    end = len(df) - 120
    out = np.zeros(3)
    out2 = np.zeros(3)
    p = parameters.copy()
    p.pop('algorithm')
    p.pop('randomized_calibration')
    p.pop('alpha_')
    if parameters.get('algorithm') == 'RandomForest':
        algorithm = RandomForestRegressor(**p)
        d = {'n_estimators': parameters.get('n_estimators'),
             "criterion": parameters.get("criterion"),
             "max_features": parameters.get("max_features"),
             "min_samples_split": parameters.get("min_samples_split"),
             "min_samples_leaf": parameters.get("min_samples_leaf")
             }
    if parameters.get('algorithm') == 'K-NearestNeighbours':
        algorithm = KNeighborsRegressor(**p)
        d = {
            'n_neighbours': parameters.get('n_neighbours'),
            'weights': parameters.get('weights'),
            'metric': parameters.get('metric')
        }
    if parameters.get('algorithm') == 'LightGBM':
        algorithm = LGBMRegressor(**p)
        d = {"metric": parameters.get("metric"),
             "num_leaves": parameters.get('num_leaves'),
             "learning_rate": parameters.get('learning_rate'),
             "feature_fraction": parameters.get('feature_fraction'),
             "bagging_fraction": parameters.get('bagging_fraction'),
             "bagging_freq": parameters.get('bagging_freq'),
             }

    if parameters.get('algorithm') == 'LassoRegression':
        algorithm = Lasso(**p)
        d = {'alpha_': parameters.get('alpha_')}

    if parameters.get('algorithm') == 'NeuralNetwork':
        algorithm = NeuralNetworkAlgorithm(p)

    if parameters.get('algorithm') == 'LSTM':
        algorithm = BiLSTM(**p)
        d = {}
    d = p
    d['alpha_'] = parameters.get('alpha_')

    m, s = df['NetPosUsd'].mean(), df['NetPosUsd'].std()
    df=df.drop(['QdfTime' ], axis=1)
    mean = df.mean(axis=0)
    std = df.std(axis=0)
    df = (df - mean) / std

    for i, ratio in enumerate(([.5, 0.66, .84])):
        if parameters.get('randomized_calibration') == True:

            train_ = df.drop([  'NetPosUsd'], axis=1).iloc[:int(end * ratio), :].values
            choose = np.random.choice(len(train_), int(end / 6), replace=False)
            calibrate = train_[choose, :]
            mask = np.ones(len(train_), dtype=bool)
            mask[choose] = False

            train = train_[mask, :]
            test = (df.drop([  'NetPosUsd'], axis=1)).iloc[int(end * ratio):int(end * ratio) + int(end / 6),
                   :].values

            ytrain_ = df['NetPosUsd'][:int(end * ratio)].values

            ycalibrate = ytrain_[choose]
            ytrain = ytrain_[mask]

            ytest = df['NetPosUsd'].iloc[int(end * ratio):int(end * ratio) + int(end / 6)]

        else:
            train = df.drop([  'NetPosUsd'], axis=1).iloc[:int(end * ratio) - int(end / 6), :].values

            calibrate = df.drop([  'NetPosUsd'], axis=1).iloc[int(end * ratio) - int(end / 6):int(end * ratio),
                        :].values

            test = df.drop([  'NetPosUsd'], axis=1).iloc[int(end * ratio):int(end * ratio) + int(end / 6),
                   :].values

            ytrain = df['NetPosUsd'][:int(end * ratio) - int(end / 6)].values

            ycalibrate = df['NetPosUsd'][int(end * ratio) - int(end / 6):int(end * ratio)].values

            ytest = df['NetPosUsd'][int(end * ratio):int(end * ratio) + int(end / 6)].values
            # print(len(train),len(ytrain),len(calibrate),len(ycalibrate),len(test),len(ytest))

            # Train and calibrate
        # -----------------------------------------------------------------------------

        underlying_model = RegressorAdapter(algorithm)
        normalizing_model = RegressorAdapter(KNeighborsRegressor(n_neighbors=50))
        normalizer = RegressorNormalizer(underlying_model, normalizing_model, AbsErrorErrFunc())
        nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer)

        icp = IcpRegressor(nc)
        icp.fit(train, ytrain)
        icp.calibrate(calibrate, ycalibrate)

        # -----------------------------------------------------------------------------
        # Predict
        # -----------------------------------------------------------------------------
        prediction = icp.predict(test, significance=parameters.get('alpha_'))
        header = ['NCP_lower', 'NCP_upper', 'NetPosUsd', 'prediction']
        size = prediction[:, 1] / 2 + prediction[:, 0] / 2

        prediction = prediction * s + m
        ytest = ytest * s + m
        size = size * s + m

        table = np.vstack([prediction.T, ytest, size.T]).T

        dfncp = pd.DataFrame(table, columns=header)

        underlying_model = RegressorAdapter(algorithm)

        nc = RegressorNc(underlying_model, AbsErrorErrFunc())
        icp = IcpRegressor(nc)
        icp.fit(train, ytrain)
        icp.calibrate(calibrate, ycalibrate)

        prediction = icp.predict(test, significance=parameters.get('alpha_'))
        header = ['cp_lower', 'cp_upper']

        prediction = prediction * s + m

        table = np.vstack([prediction.T]).T

        dfcp = pd.DataFrame(table, columns=header)
        dfncp['CP_lower'] = dfcp['cp_lower']
        dfncp['CP_upper'] = dfcp['cp_upper']

        out[i] = qd_objective(dfncp.NetPosUsd, dfncp['CP_lower'], dfncp['CP_upper'], parameters.get('alpha_'))

        out2[i] = qd_objective(dfncp.NetPosUsd, dfncp['NCP_lower'], dfncp['NCP_upper'], parameters.get('alpha_'))

    d['CP_loss'] = np.mean(out)
    d['NCP_loss'] = np.mean(out2)

    if os.path.exists(parameters.get('algorithm') + '_cv.csv') == True:

        pd.DataFrame(data=d, index=[0]).to_csv(parameters.get('algorithm') + '_cv.csv', mode='a', header=False,
                                               index=False)

    else:
        pd.DataFrame(data=d, index=[0]).to_csv(parameters.get('algorithm') + '_cv.csv', encoding='utf-8', index=False)
Exemplo n.º 21
0
def run_experiment(cur_test_method, cur_dataset_name, cur_batch_size,
                   cur_lr_loss, cur_lr_dis, cur_loss_steps, cur_dis_steps,
                   cur_mu_val, cur_epochs, cur_model_type, cur_regression_type,
                   cur_random_state, cur_second_scale, num_experiments):

    method = cur_test_method

    seed = cur_random_state
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

    dataset = cur_dataset_name

    batch_size = cur_batch_size

    # step size to minimize loss
    lr_loss = cur_lr_loss

    # step size used to fit GAN's classifier
    lr_dis = cur_lr_dis

    # inner epochs to fit loss
    loss_steps = cur_loss_steps

    # inner epochs to fit GAN's classifier
    dis_steps = cur_dis_steps

    # total number of epochs
    epochs = cur_epochs

    # utility loss
    if cur_regression_type == "mreg":
        cost_pred = torch.nn.MSELoss()
        out_shape = 1
    else:
        raise

    model_type = cur_model_type

    metric = "equalized_odds"

    print(dataset)
    print(method)
    sys.stdout.flush()

    avg_length_0 = np.zeros(num_experiments)
    avg_length_1 = np.zeros(num_experiments)

    avg_coverage_0 = np.zeros(num_experiments)
    avg_coverage_1 = np.zeros(num_experiments)

    avg_p_val = np.zeros(num_experiments)
    mse = np.zeros(num_experiments)

    for i in range(num_experiments):

        # Split into train and test
        X, A, Y, X_cal, A_cal, Y_cal, X_test, A_test, Y_test = get_dataset.get_train_test_data(
            base_path, dataset, seed + i)
        in_shape = X.shape[1]

        print("n train = " + str(X.shape[0]) + " p = " + str(X.shape[1]))
        print("n calibration = " + str(X_cal.shape[0]))
        print("n test = " + str(X_test.shape[0]))

        sys.stdout.flush()

        if method == "AdversarialDebiasing":

            class RegAdapter(RegressorAdapter):
                def __init__(self, model=None, fit_params=None, params=None):
                    super(RegAdapter, self).__init__(model, fit_params)
                    # Instantiate model
                    self.learner = adv_debiasing.AdvDebiasingRegLearner(
                        lr=lr_loss,
                        N_CLF_EPOCHS=loss_steps,
                        N_ADV_EPOCHS=dis_steps,
                        N_EPOCH_COMBINED=epochs,
                        cost_pred=cost_pred,
                        in_shape=in_shape,
                        batch_size=batch_size,
                        model_type=model_type,
                        out_shape=out_shape,
                        lambda_vec=cur_mu_val)

                def fit(self, x, y):
                    self.learner.fit(x, y)

                def predict(self, x):
                    return self.learner.predict(x)

        elif method == 'FairDummies':

            class RegAdapter(RegressorAdapter):
                def __init__(self, model=None, fit_params=None, params=None):
                    super(RegAdapter, self).__init__(model, fit_params)
                    # Instantiate model
                    self.learner = fair_dummies_learning.EquiRegLearner(
                        lr=lr_loss,
                        pretrain_pred_epochs=0,
                        pretrain_dis_epochs=0,
                        epochs=epochs,
                        loss_steps=loss_steps,
                        dis_steps=dis_steps,
                        cost_pred=cost_pred,
                        in_shape=in_shape,
                        batch_size=batch_size,
                        model_type=model_type,
                        lambda_vec=cur_mu_val,
                        second_moment_scaling=cur_second_scale,
                        out_shape=out_shape)

                def fit(self, x, y):
                    self.learner.fit(x, y)

                def predict(self, x):
                    return self.learner.predict(x)

        elif method == 'HGR':

            class RegAdapter(RegressorAdapter):
                def __init__(self, model=None, fit_params=None, params=None):
                    super(RegAdapter, self).__init__(model, fit_params)
                    # Instantiate model

                    self.learner = continuous_fairness.HGR_Reg_Learner(
                        lr=lr_loss,
                        epochs=epochs,
                        mu=cur_mu_val,
                        cost_pred=cost_pred,
                        in_shape=in_shape,
                        out_shape=out_shape,
                        batch_size=batch_size,
                        model_type=model_type)

                def fit(self, x, y):
                    self.learner.fit(x, y)

                def predict(self, x):
                    return self.learner.predict(x)

        elif method == 'Baseline':

            class RegAdapter(RegressorAdapter):
                def __init__(self, model=None, fit_params=None, params=None):
                    super(RegAdapter, self).__init__(model, fit_params)
                    # Instantiate model
                    self.learner = fair_dummies_learning.EquiRegLearner(
                        lr=lr_loss,
                        pretrain_pred_epochs=epochs,
                        pretrain_dis_epochs=0,
                        epochs=0,
                        loss_steps=0,
                        dis_steps=0,
                        cost_pred=cost_pred,
                        in_shape=in_shape,
                        batch_size=batch_size,
                        model_type=model_type,
                        lambda_vec=0,
                        second_moment_scaling=0,
                        out_shape=out_shape)

                def fit(self, x, y):
                    self.learner.fit(x, y)

                def predict(self, x):
                    return self.learner.predict(x)

        fairness_reg = RegAdapter(model=None)

        if cur_regression_type == "mreg":
            nc = RegressorNc(fairness_reg, AbsErrorErrFunc())
        else:
            raise

        # function that extracts the group identifier
        def condition(x, y=None):
            return int(x[0][0] > 0)

        icp = IcpRegressor(nc, condition=condition)

        input_data_train = np.concatenate((A[:, np.newaxis], X), 1)
        icp.fit(input_data_train, Y)

        input_data_cal = np.concatenate((A_cal[:, np.newaxis], X_cal), 1)
        icp.calibrate(input_data_cal, Y_cal)

        input_data_test = np.concatenate((A_test[:, np.newaxis], X_test), 1)
        Yhat_test = icp.predict(input_data_test, significance=0.1)

        # compute and print average coverage and average length
        coverage_sample, length_sample = compute_coverage_per_sample(
            Y_test, Yhat_test[:, 0], Yhat_test[:, 1], 0.1, method,
            input_data_test, condition)

        avg_coverage, avg_length = compute_coverage_len(
            Y_test, Yhat_test[:, 0], Yhat_test[:, 1])
        avg_length_0[i] = np.mean(length_sample[0])
        avg_coverage_0[i] = np.mean(coverage_sample[0])
        avg_length_1[i] = np.mean(length_sample[1])
        avg_coverage_1[i] = np.mean(coverage_sample[1])

        Yhat_out_cal = fairness_reg.learner.predict(input_data_cal)
        Yhat_out_test = fairness_reg.learner.predict(input_data_test)

        if out_shape == 1:
            mse[i] = np.mean((Yhat_out_test - Y_test)**2)
            MSE_trivial = np.mean((np.mean(Y_test) - Y_test)**2)
            print("MSE = " + str(mse[i]) + "MSE Trivial = " + str(MSE_trivial))

        p_val = utility_functions.fair_dummies_test_regression(
            Yhat_out_cal,
            A_cal,
            Y_cal,
            Yhat_out_test,
            A_test,
            Y_test,
            num_reps=1,
            num_p_val_rep=1000,
            reg_func_name="Net")

        avg_p_val[i] = p_val

        print("experiment = " + str(i + 1))

        #        if out_shape==2:
        #            init_coverage, init_length = compute_coverage_len(Y_test, Yhat_out_test[:,0], Yhat_out_test[:,1])
        #            print("Init Coverage = " + str(init_coverage))
        #            print("Init Length = " + str(init_length))

        print("Coverage 0 = " + str(avg_coverage_0[i]))
        print("Coverage 1 = " + str(avg_coverage_1[i]))

        print("Length 0 = " + str(avg_length_0[i]))
        print("Length 1 = " + str(avg_length_1[i]))
        print("MSE = " + str(mse[i]))

        print("p_val = " + str(p_val))
        sys.stdout.flush()

        outdir = './results/'
        if not os.path.exists(outdir):
            os.mkdir(outdir)

        out_name = outdir + 'results.csv'

        full_name = cur_test_method + "_" + cur_model_type + "_" + cur_regression_type
        df = pd.DataFrame({
            'method': [cur_test_method],
            'dataset': [cur_dataset_name],
            'batch_size': [cur_batch_size],
            'lr_loss': [cur_lr_loss],
            'lr_dis': [cur_lr_dis],
            'loss_steps': [cur_loss_steps],
            'dis_steps': [cur_dis_steps],
            'mu_val': [cur_mu_val],
            'epochs': [cur_epochs],
            'random_state': [seed + i],
            'model_type': [cur_model_type],
            'metric': [metric],
            'cur_second_scale': [cur_second_scale],
            'regression_type': [cur_regression_type],
            'avg_length': [avg_length],
            'avg_coverage': [avg_coverage],
            'avg_length_0': [avg_length_0[i]],
            'avg_length_1': [avg_length_1[i]],
            'mse': [mse[i]],
            'avg_coverage_0': [avg_coverage_0[i]],
            'avg_coverage_1': [avg_coverage_1[i]],
            'p_val': [p_val],
            'full_name': [full_name]
        })

        if os.path.isfile(out_name):
            df2 = pd.read_csv(out_name)
            df = pd.concat([df2, df], ignore_index=True)

        df.to_csv(out_name, index=False)

        print(full_name)
        print(
            "Num experiments %02d | Avg MSE = %.4f | Avg Length 0 = %.4f | Avg Length 1 = %.4f | Avg Coverage 0 = %.4f | Avg Coverage 1 = %.4f | Avg p_val = %.4f | min p_val = %.4f"
            % (i + 1, np.mean(mse[:i + 1]), np.mean(avg_length_0[:i + 1]),
               np.mean(avg_length_1[:i + 1]), np.mean(avg_coverage_0[:i + 1]),
               np.mean(avg_coverage_1[:i + 1]), np.mean(
                   avg_p_val[:i + 1]), np.min(avg_p_val[:i + 1])))
        print("======== Done =========")
        sys.stdout.flush()
Exemplo n.º 22
0
def evaluate(model_filepath, train_filepath, test_filepath,
             calibrate_filepath):
    """Evaluate model to estimate power.

    Args:
        model_filepath (str): Path to model.
        train_filepath (str): Path to train set.
        test_filepath (str): Path to test set.
        calibrate_filepath (str): Path to calibrate set.

    """

    METRICS_FILE_PATH.parent.mkdir(parents=True, exist_ok=True)

    # Load parameters
    params = yaml.safe_load(open("params.yaml"))["evaluate"]
    params_train = yaml.safe_load(open("params.yaml"))["train"]
    params_split = yaml.safe_load(open("params.yaml"))["split"]

    test = np.load(test_filepath)
    X_test = test["X"]
    y_test = test["y"]

    # pandas data frame to store predictions and ground truth.
    df_predictions = None

    y_pred = None

    if params_split["calibrate_split"] == 0:
        model = models.load_model(model_filepath)
        y_pred = model.predict(X_test)
    else:
        trained_model = models.load_model(model_filepath)
        # mycustommodel = MyCustomModel(model_filepath)
        mycustommodel = MyCustomModel(trained_model)

        m = cnn(X_test.shape[-2],
                X_test.shape[-1],
                output_length=1,
                kernel_size=params_train["kernel_size"])

        nc = RegressorNc(
            mycustommodel,
            err_func=AbsErrorErrFunc(),  # non-conformity function
            # normalizer_model=KNeighborsRegressor(n_neighbors=15)  # normalizer
            # normalizer=m
        )

        # nc = NcFactory.create_nc(mycustommodel,
        #     err_func=AbsErrorErrFunc(),  # non-conformity function
        #     # normalizer_model=KNeighborsRegressor(n_neighbors=15)  # normalizer
        #     normalizer_model=m
        # )

        model = IcpRegressor(nc)

        # Fit the normalizer.
        train = np.load(train_filepath)
        X_train = train["X"]
        y_train = train["y"]

        y_train = y_train.reshape((y_train.shape[0], ))

        model.fit(X_train, y_train)

        # Calibrate model.
        calibrate = np.load(calibrate_filepath)
        X_calibrate = calibrate["X"]
        y_calibrate = calibrate["y"]
        y_calibrate = y_calibrate.reshape((y_calibrate.shape[0], ))
        model.calibrate(X_calibrate, y_calibrate)

        print(f"Calibration: {X_calibrate.shape}")

        # Set conformal prediction error. This should be a parameter specified by the user.
        error = 0.05

        # Predictions will contain the intervals. We need to compute the middle
        # points to get the actual predictions y.
        predictions = model.predict(X_test, significance=error)

        # Compute middle points.
        y_pred = predictions[:,
                             0] + (predictions[:, 1] - predictions[:, 0]) / 2

        # Reshape to put it in the same format as without calibration set.
        y_pred = y_pred.reshape((y_pred.shape[0], 1))

        # Build data frame with predictions.
        my_results = list(
            zip(np.reshape(y_test, (y_test.shape[0], )),
                np.reshape(y_pred, (y_pred.shape[0], )), predictions[:, 0],
                predictions[:, 1]))

        df_predictions = pd.DataFrame(my_results,
                                      columns=[
                                          'ground_truth', 'predicted',
                                          'lower_bound', 'upper_bound'
                                      ])

        save_predictions(df_predictions)

        plot_intervals(df_predictions)

    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    print("MSE: {}".format(mse))
    print("R2: {}".format(r2))

    plot_prediction(y_test, y_pred, inputs=X_test, info="(R2: {})".format(r2))
    plot_individual_predictions(y_test, y_pred)

    with open(METRICS_FILE_PATH, "w") as f:
        json.dump(dict(mse=mse, r2=r2), f)
# Setup training, calibration and test indices
# -----------------------------------------------------------------------------
data = load_boston()

idx = np.random.permutation(data.target.size)
train = idx[:int(idx.size / 3)]
calibrate = idx[int(idx.size / 3):int(2 * idx.size / 3)]
test = idx[int(2 * idx.size / 3):]

# -----------------------------------------------------------------------------
# Without normalization
# -----------------------------------------------------------------------------
# Train and calibrate
# -----------------------------------------------------------------------------
underlying_model = RegressorAdapter(DecisionTreeRegressor(min_samples_leaf=5))
nc = RegressorNc(underlying_model, AbsErrorErrFunc())
icp = IcpRegressor(nc)
icp.fit(data.data[train, :], data.target[train])
icp.calibrate(data.data[calibrate, :], data.target[calibrate])

# -----------------------------------------------------------------------------
# Predict
# -----------------------------------------------------------------------------
prediction = icp.predict(data.data[test, :], significance=0.1)
header = ['min','max','truth','size']
size = prediction[:, 1] - prediction[:, 0]
table = np.vstack([prediction.T, data.target[test], size.T]).T
df = pd.DataFrame(table, columns=header)
print(df)

# -----------------------------------------------------------------------------
Exemplo n.º 24
0
def train_and_test_quantile_QCP(parameters):
    params = parameters.copy()
    params.pop('algorithm')
    quantiles_forest = [(params['alpha_'] / 2), (100 - params['alpha_'] / 2)]
    params.pop('alpha_')
    validation = params['validation']
    params.pop('validation')

    for i in tqdm(range(29)):

        path = 'data\EURUSD_NETPOSUSD_hourly_for_regresion' + str(i) + '.csv'
        df = pd.read_csv(path).drop(['Unnamed: 0', 'QdfTime'], axis=1).fillna(0)
        train_test_split = len(df) - 120
        m, s = df['NetPosUsd'].mean(), df['NetPosUsd'].std()

        mean = df.mean(axis=0)
        std = df.std(axis=0)
        df = (df - mean) / std

        train_test_split = len(df) - 120
        train = 1 * df.drop(['NetPosUsd'], axis=1).iloc[:train_test_split, :].values
        test = 1 * (df.drop(['NetPosUsd'], axis=1)).iloc[train_test_split:, :].values

        ytrain = df['NetPosUsd'][:train_test_split].values
        ytest = df['NetPosUsd'].iloc[train_test_split:]

        idx_train = np.arange(train_test_split - validation)
        idx_cal = np.arange(train_test_split - validation, train_test_split)

        if parameters.get('algorithm') == 'QuantileGradientBoosting':
            quantile_estimator = helper.QuantileGradientBoosting(model=None, quantiles=quantiles_forest, params=params)

        if parameters.get('algorithm') == 'QuantileLightGBM':
            quantile_estimator = helper.QuantileLightGBM(model=None, quantiles=quantiles_forest, params=params)
        if parameters.get('algorithm') == 'QuantileRegression':
            quantile_estimator = helper.QuantileRegression(model=None, quantiles=quantiles_forest, params=params)
        if parameters.get('algorithm') == 'QuantileRandomForest':
            quantile_estimator = helper.QuantileForestRegressorAdapterNew(model=None, quantiles=quantiles_forest,
                                                                          params=params)
        if parameters.get('algorithm') == 'QuantileKNN':
            quantile_estimator = helper.QuantileKNN(model=None, quantiles=quantiles_forest, params=params)

        nc = RegressorNc(quantile_estimator, QuantileRegErrFunc())

        # run CQR procedure
        lower, upper = helper.run_icp(nc, train, ytrain, test, idx_train, idx_cal, alpha)

        lower = lower * s + m
        upper = upper * s + m
        ytest = ytest * s + m

        header = ['QCP_lower', 'QCP_upper', 'NetPosUsd', 'prediction']
        size = upper / 2 + lower / 2
        table = np.vstack([lower, upper, ytest, size]).T

        dfncp = pd.DataFrame(table, columns=header)

        if i == 0:
            dfncp.to_csv(
                'QCP' + parameters.get('algorithm') + '_' + str(
                    np.round(parameters.get('alpha_')).astype(int)) + '_' + str(validation) + '.csv',
                encoding='utf-8', index=False)
        else:
            dfncp.to_csv(
                'QCP' + parameters.get('algorithm') + '_' + str(
                    np.round(parameters.get('alpha_')).astype(int)) + '_' + str(validation) + '.csv', mode='a',
                header=False, index=False)
Exemplo n.º 25
0
    def CF_quantitative_validation(self):
        ''' Performs internal  validation for conformal quantitative models '''

        # Make a copy of original matrices.
        X = self.X.copy()
        Y = self.Y.copy()

        info = []
        kf = KFold(n_splits=self.param.getVal('ModelValidationN'),
                   shuffle=True,
                   random_state=46)
        # Copy Y vector to use it as template to assign predictions
        Y_pred = copy.copy(Y).tolist()
        try:
            for train_index, test_index in kf.split(X):
                # Generate training and test sets
                X_train, X_test = X[train_index], X[test_index]
                Y_train, Y_test = Y[train_index], Y[test_index]
                # Generate training a test sets
                # Create the aggregated conformal regressor.
                conformal_pred = AggregatedCp(
                    IcpRegressor(
                        RegressorNc(RegressorAdapter(self.estimator_temp))),
                    BootstrapSampler())
                # Fit conformal regressor to the data
                conformal_pred.fit(X_train, Y_train)

                # Perform prediction on test set
                prediction = conformal_pred.predict(
                    X_test, self.param.getVal('conformalSignificance'))
                # Assign the prediction its original index
                for index, el in enumerate(test_index):
                    Y_pred[el] = prediction[index]

        except Exception as e:
            LOG.error(f'Quantitative conformal validation'
                      f' failed with exception: {e}')
            raise e

        Y_pred = np.asarray(Y_pred)
        # Add the n validation interval means
        interval_mean = np.mean(np.abs((Y_pred[:, 0]) - (Y_pred[:, 1])))
        # Get boolean mask of instances
        #  within the applicability domain.
        inside_interval = ((Y_pred[:, 0].reshape(-1, 1) < Y) &
                           (Y_pred[:, 1].reshape(-1, 1) > Y))
        # Compute the accuracy (number of instances within the AD).
        accuracy = np.sum(inside_interval) / len(Y)

        # Cut into two decimals.
        self.conformal_interval_medians = (np.mean(Y_pred, axis=1))
        self.conformal_accuracy = float("{0:.2f}".format(accuracy))
        self.conformal_mean_interval = float("{0:.2f}".format(interval_mean))

        #Add quality metrics to results.
        info.append(('Conformal_mean_interval', 'Conformal mean interval',
                     self.conformal_mean_interval))
        info.append(('Conformal_accuracy', 'Conformal accuracy',
                     self.conformal_accuracy))
        info.append(
            ('Conformal_interval_medians', 'Conformal interval medians',
             self.conformal_interval_medians))
        info.append(('Conformal_prediction_ranges',
                     'Conformal prediction ranges', Y_pred))

        results = {}
        results['quality'] = info
        return True, results
Exemplo n.º 26
0
def train_and_test_cp_algo(i):
    window = 96
    p = {'window': window}
    algorithm = BiLSTM(p)

    path = 'data\EURUSD_NETPOSUSD_hourly_for_regresion' + str(i) + '.csv'
    df = pd.read_csv(path).drop(['QdfTime', 'Unnamed: 0'], axis=1).fillna(0)
    y_raw_test = df.NetPosUsd[-120:]
    median_ = df.NetPosUsd.median()
    mad_ = mad(df.NetPosUsd.values)
    df.NetPosUsd = mlog_trans(df.NetPosUsd.values)

    # mean = df.NetPosUsd.mean()
    # std = df.NetPosUsd.std()
    # df.NetPosUsd = (df.NetPosUsd - mean) / std

    data = df.NetPosUsd.values

    def generate_index(window, data_matrix):
        '''

        :return:
        '''

        num_elements = data_matrix.shape[0]

        for start, stop in zip(range(0, num_elements - window, 1), range(window, num_elements, 1)):
            yield data_matrix[stop - window:stop].reshape((-1, 1))

    cnt = []

    for sequence in generate_index(window, data):
        cnt.append(sequence)
    cnt = np.array(cnt)

    X = cnt
    y = data[window:]

    X = X.reshape(X.shape[0], X.shape[1])

    train_test_split = X.shape[0] - 120 - 3480
    train = X[:train_test_split, :]

    calibrate = X[train_test_split:train_test_split + 3480, :]

    test = X[-120:]

    ytrain = y[:train_test_split]

    ycalibrate = y[train_test_split:train_test_split + 3480]

    ytest = y[-120:]

    underlying_model = RegressorAdapter(algorithm)
    normalizing_model = RegressorAdapter(KNeighborsRegressor(n_neighbors=50))
    normalizer = RegressorNormalizer(underlying_model, normalizing_model, AbsErrorErrFunc())
    nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer)
    icp = IcpRegressor(nc)
    icp.fit(train, ytrain)
    icp.calibrate(calibrate, ycalibrate)

    underlying_model2 = RegressorAdapter(algorithm)
    nc2 = RegressorNc(underlying_model2, AbsErrorErrFunc())
    icp2 = IcpRegressor(nc2)
    icp2.fit(train, ytrain)
    icp2.calibrate(calibrate, ycalibrate)

    for a in tqdm(np.linspace(5, 95, 19)):

        # -----------------------------------------------------------------------------
        # Predict
        # -----------------------------------------------------------------------------
        prediction = icp.predict(test, significance=a / 100)
        header = ['NCP_lower', 'NCP_upper', 'NetPosUsd', 'prediction']
        lower, upper = prediction[:, 0], prediction[:, 1]

        lower = mlog_inverse(lower, median_, mad_)
        upper = mlog_inverse(upper, median_, mad_)
        ytest = mlog_inverse(ytest, median_, mad_)
        # lower=lower*std+mean
        # upper=upper*std+mean
        # ytest=ytest*std+mean
        size = upper / 2 + lower / 2
        table = np.vstack([lower, upper, y_raw_test, size.T]).T

        dfncp = pd.DataFrame(table, columns=header)

        # -----------------------------------------------------------------------------
        # Predict
        # -----------------------------------------------------------------------------
        prediction = icp2.predict(test, significance=a / 100)
        header = ['CP_lower', 'CP_upper', 'NetPosUsd', 'prediction']
        lower, upper = prediction[:, 0], prediction[:, 1]

        lower = mlog_inverse(lower, median_, mad_)
        upper = mlog_inverse(upper, median_, mad_)
        ytest = mlog_inverse(ytest, median_, mad_)

        # lower=lower*std+mean
        # upper=upper*std+mean
        # ytest=ytest*std+mean
        size = upper / 2 + lower / 2
        table = np.vstack([lower, upper, y_raw_test, size.T]).T

        dfcp = pd.DataFrame(table, columns=header)

        if i == 0:
            dfcp.to_csv(
                'CP' + '_' + 'cudaLSTM' + '_' + str(
                    np.round(a).astype(int)) + '_' + 'calibrationwindow' + str(
                    3480) + '.csv',
                encoding='utf-8', index=False)
        else:
            dfcp.to_csv(
                'CP' + '_' + 'cudaLSTM' + '_' + str(
                    np.round(a).astype(int)) + '_' + 'calibrationwindow' + str(
                    3480) + '.csv', mode='a',
                header=False, index=False)

        if i == 0:
            dfncp.to_csv(
                'NCP' + '_' + 'cudaLSTM' + '_' + str(
                    np.round(a).astype(int)) + '_' + 'calibrationwindow' + str(
                    3480) + '.csv',
                encoding='utf-8', index=False)
        else:
            dfncp.to_csv(
                'NCP' + '_' + 'cudaLSTM' + '_' + str(
                    np.round(a).astype(int)) + '_' + 'calibrationwindow' + str(
                    3480) + '.csv', mode='a',
                header=False, index=False)
Exemplo n.º 27
0
from nonconformist.base import RegressorAdapter
from nonconformist.icp import IcpRegressor
from nonconformist.nc import RegressorNc, AbsErrorErrFunc, SignErrorErrFunc

# -----------------------------------------------------------------------------
# Setup training, calibration and test indices
# -----------------------------------------------------------------------------
data = load_boston()

idx = np.random.permutation(data.target.size)
train = idx[:int(idx.size / 3)]
calibrate = idx[int(idx.size / 3):int(2 * idx.size / 3)]
test = idx[int(2 * idx.size / 3):]

# -----------------------------------------------------------------------------
# Train and calibrate
# -----------------------------------------------------------------------------
icp = IcpRegressor(
    RegressorNc(RegressorAdapter(DecisionTreeRegressor()), SignErrorErrFunc()))
icp.fit(data.data[train, :], data.target[train])
icp.calibrate(data.data[calibrate, :], data.target[calibrate])

# -----------------------------------------------------------------------------
# Predict
# -----------------------------------------------------------------------------
prediction = icp.predict(data.data[test, :], significance=0.05)
header = np.array(['min', 'max', 'Truth'])
table = np.vstack([prediction.T, data.target[test]]).T
df = pd.DataFrame(np.vstack([header, table]))
print(df)
Exemplo n.º 28
0
                         data.data,
                         data.target,
                         iterations=5,
                         folds=5,
                         scoring_funcs=[class_mean_errors, class_avg_c],
                         significance_levels=[0.05, 0.1, 0.2])

print('Classification: iris')
scores = scores.drop(['fold', 'iter'], axis=1)
print(scores.groupby(['significance']).mean())

# -----------------------------------------------------------------------------
# Regression, absolute error
# -----------------------------------------------------------------------------
data = load_diabetes()

icp = OobCpRegressor(RegressorNc(OobRegressorAdapter(RandomForestRegressor(n_estimators=100, oob_score=True))))
icp_cv = RegIcpCvHelper(icp)

scores = cross_val_score(icp_cv,
                         data.data,
                         data.target,
                         iterations=5,
                         folds=5,
                         scoring_funcs=[reg_mean_errors, reg_median_size],
                         significance_levels=[0.05, 0.1, 0.2])


print('Absolute error regression: diabetes')
scores = scores.drop(['fold', 'iter'], axis=1)
print(scores.groupby(['significance']).mean())
Exemplo n.º 29
0
    def build(self):
        '''Build a new SVM model with the X and Y numpy matrices'''

        # Make a copy of data matrices
        X = self.X.copy()
        Y = self.Y.copy()

        results = []
        results.append(('nobj', 'number of objects', self.nobj))
        results.append(('nvarx', 'number of predictor variables', self.nvarx))

        # If tune then call gridsearch to optimize the estimator
        if self.param.getVal('tune'):
            try:
                # Check type of model
                if self.param.getVal('quantitative'):
                    self.optimize(X, Y, svm.SVR(**self.estimator_parameters),
                                  self.tune_parameters)
                    results.append(('model', 'model type',
                                    'SVM quantitative (optimized)'))

                else:
                    self.optimize(X, Y, svm.SVC(**self.estimator_parameters),
                                  self.tune_parameters)
                    results.append(
                        ('model', 'model type', 'SVM qualitative (optimized)'))
                LOG.debug('SVM estimator optimized')
            except Exception as e:
                LOG.error(f'Exception optimizing SVM'
                          f'estimator with exception {e}')
        else:
            try:
                LOG.info("Building  SVM model")
                if self.param.getVal('quantitative'):
                    LOG.info("Building Quantitative SVM-R model")
                    self.estimator = svm.SVR(**self.estimator_parameters)
                    results.append(('model', 'model type', 'SVM quantitative'))
                else:
                    self.estimator = svm.SVC(**self.estimator_parameters)
                    results.append(('model', 'model type', 'SVM qualitative'))
            except Exception as e:
                LOG.error(f'Exception building SVM'
                          f'estimator with exception {e}')
        self.estimator.fit(X, Y)
        self.estimator_temp = copy(self.estimator)
        if self.param.getVal('conformal'):
            try:
                LOG.info("Building aggregated conformal SVM model")
                if self.param.getVal('quantitative'):
                    underlying_model = RegressorAdapter(self.estimator_temp)
                    # normalizing_model = RegressorAdapter(
                    # KNeighborsRegressor(n_neighbors=5))
                    normalizing_model = RegressorAdapter(self.estimator_temp)
                    normalizer = RegressorNormalizer(underlying_model,
                                                     normalizing_model,
                                                     AbsErrorErrFunc())
                    nc = RegressorNc(underlying_model, AbsErrorErrFunc(),
                                     normalizer)
                    # self.conformal_pred = AggregatedCp(IcpRegressor(
                    # RegressorNc(RegressorAdapter(self.estimator))),
                    #                                   BootstrapSampler())

                    self.estimator = AggregatedCp(IcpRegressor(nc),
                                                  BootstrapSampler())
                    self.estimator.fit(X, Y)
                    # overrides non-conformal
                    results.append(
                        ('model', 'model type', 'conformal SVM quantitative'))

                else:
                    self.estimator = AggregatedCp(
                        IcpClassifier(
                            ClassifierNc(
                                ClassifierAdapter(self.estimator_temp),
                                MarginErrFunc())), BootstrapSampler())
                    self.estimator.fit(X, Y)
                    # overrides non-conformal
                    results.append(
                        ('model', 'model type', 'conformal SVM qualitative'))
            except Exception as e:
                LOG.error(f'Exception building aggregated conformal SVM '
                          f'estimator with exception {e}')
        # Fit estimator to the data
        return True, results
Exemplo n.º 30
0
def train_and_test_cp_algo(parameters):
    p = parameters.copy()
    p.pop('algorithm')
    p.pop('randomized_calibration')
    p.pop('alpha_')
    p.pop('calibration_size')
    p.pop('WhichCP')

    for i in tqdm(range(29)):
        if parameters.get('algorithm') == 'RandomForest':
            algorithm = RandomForestRegressor(**p)
        if parameters.get('algorithm') == 'K-NearestNeighbours':
            algorithm = KNeighborsRegressor(**p)
        if parameters.get('algorithm') == 'LightGBM':
            algorithm = LGBMRegressor(**p)
        if parameters.get('algorithm') == 'LassoRegression':
            algorithm = Lasso(**p)
        if parameters.get('algorithm') == 'NeuralNetwork':
            algorithm = NeuralNetworkAlgorithm(p)
        if parameters.get('algorithm') == 'LSTM':
            algorithm = BiLSTM(**p)
        if parameters.get('algorithm') == 'GradientBoosting':
            algorithm =GradientBoostingRegressor(**p)


        path = 'data\EURUSD_NETPOSUSD_hourly_for_regresion' + str(i) + '.csv'
        df = pd.read_csv(path).drop(['Unnamed: 0','QdfTime'], axis=1).fillna(0)
        m, s = df['NetPosUsd'].mean(), df['NetPosUsd'].std()

        mean = df.mean(axis=0)
        std = df.std(axis=0)
        df = (df - mean) / std

        if parameters.get('randomized_calibration') == True:

            train_test_split = len(df) - 120
            train_ = df.drop([ 'NetPosUsd'], axis=1).iloc[:train_test_split, :].values
            choose = np.random.choice(len(train_), parameters.get("calibration_size"), replace=False)
            calibrate = train_[choose, :]
            mask = np.ones(len(train_), dtype=bool)
            mask[choose] = False
            train = train_[mask, :]

            test = (df.drop([  'NetPosUsd'], axis=1)).iloc[train_test_split:,
                   :].values

            ytrain_ = df['NetPosUsd'][:train_test_split].values

            ycalibrate = ytrain_[choose]
            ytrain = ytrain_[mask]

            ytest = df['NetPosUsd'].iloc[train_test_split:]


        else:
            train_test_split = len(df) - 120 - parameters.get("calibration_size")
            train = df.drop([  'NetPosUsd'], axis=1).iloc[:train_test_split, :].values

            calibrate = df.drop([ 'NetPosUsd'], axis=1).iloc[train_test_split:train_test_split + parameters.get("calibration_size"), :].values

            test = (df.drop([  'NetPosUsd'], axis=1)).iloc[-120:,:].values

            ytrain = df['NetPosUsd'][:train_test_split].values

            ycalibrate = df['NetPosUsd'][train_test_split:train_test_split + parameters.get("calibration_size")]

            ytest = df['NetPosUsd'].iloc[-120:]

        if parameters.get("WhichCP") == 'NCP':
            underlying_model = RegressorAdapter(algorithm)
            normalizing_model = RegressorAdapter(KNeighborsRegressor(n_neighbors=50))
            normalizer = RegressorNormalizer(underlying_model, normalizing_model, AbsErrorErrFunc())
            nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer)
            icp = IcpRegressor(nc)
            icp.fit(train, ytrain)
            icp.calibrate(calibrate, ycalibrate)

            # -----------------------------------------------------------------------------
            # Predict
            # -----------------------------------------------------------------------------
            prediction = icp.predict(test, significance=parameters.get('alpha_'))
            header = ['NCP_lower', 'NCP_upper', 'NetPosUsd', 'prediction']
            size = prediction[:, 1] / 2 + prediction[:, 0] / 2

            prediction=prediction*s+m
            ytest=ytest*s+m
            size=size*s+m

            table = np.vstack([prediction.T, ytest, size.T]).T

            dfncp = pd.DataFrame(table, columns=header)

        else:
            underlying_model = RegressorAdapter(algorithm)
            nc = RegressorNc(underlying_model, AbsErrorErrFunc())
            icp = IcpRegressor(nc)
            icp.fit(train, ytrain)
            icp.calibrate(calibrate, ycalibrate)

            # -----------------------------------------------------------------------------
            # Predict
            # -----------------------------------------------------------------------------
            prediction = icp.predict(test, significance=parameters.get('alpha_'))
            header = ['CP_lower', 'CP_upper', 'NetPosUsd', 'prediction']
            size = prediction[:, 1] / 2 + prediction[:, 0] / 2

            prediction = prediction * s + m
            ytest = ytest * s + m
            size = size * s + m

            table = np.vstack([prediction.T, ytest, size.T]).T

            dfncp = pd.DataFrame(table, columns=header)

        if i == 0:
            dfncp.to_csv(
                parameters.get("WhichCP") + '_' + parameters.get('algorithm') + '_' + str(
                    np.round(parameters.get('alpha_') * 100).astype(int)) + '_' + 'calibrationwindow' + str(
                    parameters.get('calibration_size')) + '.csv',
                encoding='utf-8', index=False)
        else:
            dfncp.to_csv(
                parameters.get("WhichCP") + '_' + parameters.get('algorithm') + '_' + str(
                    np.round(parameters.get('alpha_') * 100).astype(int)) + '_' + 'calibrationwindow' + str(
                    parameters.get('calibration_size')) + '.csv', mode='a',
                header=False, index=False)

        del algorithm