def __init__(self,
                 model_config: Point,
                 input_space: Hypergrid,
                 output_space: Hypergrid,
                 logger=None):
        if logger is None:
            logger = create_logger("HomogeneousRandomForestRegressionModel")
        self.logger = logger

        assert model_config in homogeneous_random_forest_config_store.parameter_space

        RegressionModel.__init__(self,
                                 model_type=type(self),
                                 model_config=model_config,
                                 input_space=input_space,
                                 output_space=output_space,
                                 fit_state=HomogeneousRandomForestFitState(
                                     input_space=input_space,
                                     output_space=output_space))

        self._input_space_adapter = HierarchicalToFlatHypergridAdapter(
            adaptee=self.input_space)
        self._output_space_adapter = HierarchicalToFlatHypergridAdapter(
            adaptee=self.output_space)

        self.target_dimension_names = [
            dimension.name
            for dimension in self._output_space_adapter.dimensions
        ]
        assert len(self.target_dimension_names
                   ) == 1, "Single target predictions for now."

        self._decision_trees = []
        self._create_estimators()
    def __init__(self,
                 model_config: Point,
                 input_space: Hypergrid,
                 output_space: Hypergrid,
                 logger=None):
        if logger is None:
            logger = create_logger(
                "RegressionEnhancedRandomForestRegressionModel")
        self.logger = logger

        assert model_config in RegressionEnhancedRandomForestRegressionModelConfig.CONFIG_SPACE

        RegressionModel.__init__(self,
                                 model_type=type(self),
                                 model_config=model_config,
                                 input_space=input_space,
                                 output_space=output_space)
        self.model_config = model_config

        # one hot encode categorical input dimensions
        self.one_hot_encoder_adapter = CategoricalToOneHotEncodedHypergridAdapter(
            adaptee=input_space,
            merge_all_categorical_dimensions=True,
            drop='first')
        # Explode continuous dimensions to polynomial features up to model config specified monomial degree
        # am using include_bias to produce constant term (all 1s) column to simplify one hot encoding logic
        self.polynomial_features_adapter = ContinuousToPolynomialBasisHypergridAdapter(
            adaptee=self.one_hot_encoder_adapter.target,
            degree=self.model_config.max_basis_function_degree,
            include_bias=True,
            interaction_only=False)
        self.input_space = input_space

        self.input_dimension_names = [
            dimension.name for dimension in self.input_space.dimensions
        ]
        self.output_dimension_names = [
            dimension.name for dimension in self.output_space.dimensions
        ]

        self.base_regressor_ = None
        self.random_forest_regressor_ = None

        self.random_forest_kwargs = None
        self.root_model_kwargs = None
        self.detected_feature_indices_ = None
        self.screening_root_model_coef_ = None
        self.fit_X_ = None
        self.partial_hat_matrix_ = None
        self.base_regressor_standard_error_ = None
        self.dof_ = None
        self.variance_estimate_ = None
        self.root_model_gradient_coef_ = None
        self.polynomial_features_powers_ = None

        self.categorical_zero_cols_idx_to_delete_ = None
        self.scaler_ = StandardScaler()

        self._trained = False
示例#3
0
    def __init__(self,
                 model_config: Point,
                 input_space: Hypergrid,
                 output_space: Hypergrid,
                 logger=None):
        if logger is None:
            logger = create_logger("DecisionTreeRegressionModel")
        self.logger = logger

        assert model_config in decision_tree_config_store.parameter_space
        RegressionModel.__init__(self,
                                 model_type=type(self),
                                 model_config=model_config,
                                 input_space=input_space,
                                 output_space=output_space)

        self._input_space_adapter = CategoricalToDiscreteHypergridAdapter(
            adaptee=self.input_space)

        self.input_dimension_names = [
            dimension.name
            for dimension in self._input_space_adapter.dimensions
        ]
        self.target_dimension_names = [
            dimension.name for dimension in self.output_space.dimensions
        ]
        self.logger.debug(
            f"Input dimensions: {str(self.input_dimension_names)}; Target dimensions: {str(self.target_dimension_names)}."
        )

        assert len(
            self.target_dimension_names
        ) == 1, "For now (and perhaps forever) we only support single target per tree."

        self._regressor = DecisionTreeRegressor(
            criterion=self.model_config.criterion,
            splitter=self.model_config.splitter,
            max_depth=self.model_config.max_depth
            if self.model_config.max_depth != 0 else None,
            min_samples_split=self.model_config.min_samples_split,
            min_samples_leaf=self.model_config.min_samples_leaf,
            min_weight_fraction_leaf=self.model_config.
            min_weight_fraction_leaf,
            max_features=self.model_config.max_features,
            random_state=self.model_config.get("random_state", None),
            max_leaf_nodes=self.model_config.max_leaf_nodes
            if self.model_config.max_leaf_nodes not in (0, 1) else None,
            min_impurity_decrease=self.model_config.min_impurity_decrease,
            ccp_alpha=self.model_config.ccp_alpha)

        # These are used to compute the variance in predictions
        self._observations_per_leaf = dict()
        self._mean_per_leaf = dict()
        self._mean_variance_per_leaf = dict()
        self._sample_variance_per_leaf = dict()
        self._count_per_leaf = dict()

        self._trained = False
示例#4
0
    def __init__(self,
                 model_config: Point,
                 input_space: Hypergrid,
                 output_space: Hypergrid,
                 logger=None):
        if logger is None:
            logger = create_logger(
                "RegressionEnhancedRandomForestRegressionModel")
        self.logger = logger

        assert model_config in RegressionEnhancedRandomForestRegressionModelConfig.CONFIG_SPACE

        RegressionModel.__init__(self,
                                 model_type=type(self),
                                 model_config=model_config,
                                 input_space=input_space,
                                 output_space=output_space)
        self.model_config = model_config

        # one hot encode categorical input dimensions
        self.one_hot_encoder_adapter = CategoricalToOneHotEncodedHypergridAdapter(
            adaptee=input_space,
            merge_all_categorical_dimensions=True,
            drop='first')
        self.input_space = input_space

        self.input_dimension_names = [
            dimension.name for dimension in self.input_space.dimensions
        ]
        self.output_dimension_names = [
            dimension.name for dimension in self.output_space.dimensions
        ]

        self.base_regressor_ = None
        self.random_forest_regressor_ = None

        # set up basis feature transform
        self.polynomial_features_transform_ = \
            PolynomialFeatures(degree=self.model_config.max_basis_function_degree)

        self.random_forest_kwargs = None
        self.root_model_kwargs = None
        self.detected_feature_indices_ = None
        self.screening_root_model_coef_ = None
        self.fit_X_ = None
        self.partial_hat_matrix_ = None
        self.base_regressor_standard_error_ = None
        self.dof_ = None
        self.variance_estimate_ = None
        self.root_model_gradient_coef_ = None
        self.polynomial_features_powers_ = None

        self.categorical_zero_cols_idx_to_delete_ = None
        self.scaler_ = StandardScaler()

        self._trained = False
    def __init__(self,
                 model_config: Point,
                 input_space: Hypergrid,
                 output_space: Hypergrid,
                 logger=None):
        if logger is None:
            logger = create_logger(
                "RegressionEnhancedRandomForestRegressionModel")
        self.logger = logger

        assert model_config in RegressionEnhancedRandomForestRegressionModelConfig.CONFIG_SPACE

        RegressionModel.__init__(self,
                                 model_type=type(self),
                                 model_config=model_config,
                                 input_space=input_space,
                                 output_space=output_space)
        self.model_config = model_config
        self.input_dimension_names = [
            dimension.name for dimension in self.input_space.dimensions
        ]
        self.output_dimension_names = [
            dimension.name for dimension in self.output_space.dimensions
        ]

        self.base_regressor_ = None
        self.random_forest_regressor_ = None

        # set up basis feature transform
        self.polynomial_features_transform_ = \
            PolynomialFeatures(degree=self.model_config.max_basis_function_degree)

        self.random_forest_kwargs = None
        self.root_model_kwargs = None
        self.detected_feature_indices_ = None
        self.screening_root_model_coef_ = None
        self.fit_X_ = None
        self.partial_hat_matrix_ = None
        self.base_regressor_standard_error_ = None
        self.dof_ = None
        self.variance_estimate_ = None
        self.root_model_gradient_coef_ = None
        self.polynomial_features_powers_ = None
        self.num_dummy_vars_ = None
        self.num_categorical_dims_ = None
        self.continuous_dim_col_names_ = None
        self.categorical_dim_col_names_ = None
        self.dummy_var_map_ = None
        self.dummy_var_cols_ = None
        self.categorical_zero_cols_idx_to_delete_ = None
示例#6
0
    def __init__(self,
                 model_config: Point,
                 input_space: Hypergrid,
                 output_space: Hypergrid,
                 logger=None):
        if logger is None:
            logger = create_logger("LassoRegressionModel")
        self.logger = logger

        assert model_config in lasso_cross_validated_config_store.parameter_space
        RegressionModel.__init__(self,
                                 model_type=type(self),
                                 model_config=model_config,
                                 input_space=input_space,
                                 output_space=output_space)

        # setup adapters to accommodate categorical dimensions
        # one hot encode categorical input dimensions
        self.one_hot_encoder_adapter = CategoricalToOneHotEncodedHypergridAdapter(
            adaptee=input_space,
            merge_all_categorical_dimensions=True,
            drop='first')

        self.input_dimension_names = [
            dimension.name
            for dimension in self.one_hot_encoder_adapter.dimensions
        ]
        self.continuous_dimension_names = [
            dimension.name
            for dimension in self.one_hot_encoder_adapter.target.dimensions
            if isinstance(dimension, ContinuousDimension)
        ]
        self.target_dimension_names = [
            dimension.name for dimension in self.output_space.dimensions
        ]
        self.logger.debug(
            f"Input dimensions: {str(self.input_dimension_names)}; Target dimensions: {str(self.target_dimension_names)}."
        )

        assert len(
            self.target_dimension_names
        ) == 1, "For now (and perhaps forever) we only support single target per Lasso model."

        self.lasso_model_kwargs = {
            'eps': self.model_config.eps,
            'n_alphas': self.model_config.num_alphas,
            'alphas': None,
            'fit_intercept': self.model_config.fit_intercept,
            'normalize': self.model_config.normalize,
            'precompute': self.model_config.precompute,
            'max_iter': self.model_config.max_iter,
            'tol': self.model_config.tol,
            'copy_X': self.model_config.copy_x,
            'cv': self.model_config.num_cross_validations,
            'verbose': self.model_config.verbose,
            'n_jobs': self.model_config.num_jobs,
            'positive': self.model_config.positive,
            'random_state': None,
            'selection': self.model_config.selection
        }
        self._regressor = LassoCV(**self.lasso_model_kwargs)
        self._trained: bool = False

        self.categorical_zero_cols_idx_to_delete_ = None
        self.dof_ = 0
        self.partial_hat_matrix_ = 0
        self.regressor_standard_error_ = 0
示例#7
0
    def __init__(self,
                 model_config: Point,
                 input_space: Hypergrid,
                 output_space: Hypergrid,
                 logger: logging.Logger = None):
        if logger is None:
            logger = create_logger("LassoRegressionModel")
        self.logger = logger

        assert model_config in lasso_cross_validated_config_store.parameter_space
        RegressionModel.__init__(self,
                                 model_type=type(self),
                                 model_config=model_config,
                                 input_space=input_space,
                                 output_space=output_space)

        # setup adapters to accommodate categorical dimensions
        # one hot encode categorical input dimensions
        self.one_hot_encoder_adapter = CategoricalToOneHotEncodedHypergridAdapter(
            adaptee=input_space,
            merge_all_categorical_dimensions=True,
            drop='first')
        self.input_dimension_names = self.input_space.dimension_names

        self._projected_input_dimension_names = [
            dimension.name
            for dimension in self.one_hot_encoder_adapter.dimensions
        ]
        self.continuous_dimension_names = [
            dimension.name
            for dimension in self.one_hot_encoder_adapter.target.dimensions
            if isinstance(dimension, ContinuousDimension)
        ]
        self.target_dimension_names = [
            dimension.name for dimension in self.output_space.dimensions
        ]
        self.logger.debug(
            f"Input dimensions: {str(self._projected_input_dimension_names)}; Target dimensions: {str(self.target_dimension_names)}."
        )

        assert len(
            self.target_dimension_names
        ) == 1, "For now (and perhaps forever) we only support single target per Lasso model."

        self.lasso_model_kwargs = {
            'eps': self.model_config.eps,
            'n_alphas': self.model_config.num_alphas,
            'alphas': None,
            'fit_intercept': self.model_config.fit_intercept,
            'normalize': self.model_config.normalize,
            'precompute': self.model_config.precompute,
            'max_iter': self.model_config.max_iter,
            'tol': self.model_config.tol,
            'copy_X': self.model_config.copy_x,
            'cv': self.model_config.num_cross_validations,
            'verbose': self.model_config.verbose,
            'n_jobs': self.model_config.num_jobs,
            'positive': self.model_config.positive,
            'random_state': None,
            'selection': self.model_config.selection
        }
        self._regressor = LassoCV(**self.lasso_model_kwargs)
        self._trained: bool = False
        self.last_refit_iteration_number = None

        self.categorical_zero_cols_idx_to_delete_ = None
        self.dof_ = 0
        self.partial_hat_matrix_ = 0
        self.regressor_standard_error_ = 0

        # When LassoCV is used as part of RERF, it cannot reasonably compute the upper and lower bounds on its input space dimensions,
        # as they are a polynomial combination of inputs to RERF. Thus, it approximates them with the empirical min and max.
        # These approximations are biased: the lower bound is too large, the upper bound is too small.
        # Consequently, during scoring, LassoCV is likely to see input outside of these bounds, but we still want
        # LassoCV to produce predictions for those points. So we introduce a little hack: whenever LassoCV is instantiated as part of RERF,
        # it should skip input filtering on predict. This field, controls this behavior.
        self.skip_input_filtering_on_predict = False
    def __init__(
            self,
            model_config: Point,
            input_space: Hypergrid,
            output_space: Hypergrid,
            logger: logging.Logger = None
    ):
        if logger is None:
            logger = create_logger("RegressionEnhancedRandomForestRegressionModel")
        self.logger = logger

        assert model_config in regression_enhanced_random_forest_config_store.parameter_space
        RegressionModel.__init__(
            self,
            model_type=type(self),
            model_config=model_config,
            input_space=input_space,
            output_space=output_space
        )

        self.model_config = model_config
        self.model_config.perform_initial_root_model_hyper_parameter_search = True

        # enforce model_config constraints (needed by sklearn regression model classes)
        #  For .lasso_regression_model_config.fit_intercept, the intercept term in added in the design_matrix construction
        #  For .lasso_regression_model_config.normalize, since the random forest would also need the scaled features,
        #     scaling would have to be managed by ReRF directly
        model_config.lasso_regression_model_config.fit_intercept = False
        model_config.lasso_regression_model_config.normalize = False
        if model_config.sklearn_random_forest_regression_model_config.oob_score:
            model_config.sklearn_random_forest_regression_model_config.bootstrap = True

        # Explode continuous dimensions to polynomial features up to model config specified monomial degree
        # am using include_bias to produce constant term (all 1s) column to simplify one hot encoding logic
        self.polynomial_features_adapter = ContinuousToPolynomialBasisHypergridAdapter(
            adaptee=input_space,
            degree=self.model_config.max_basis_function_degree,
            include_bias=True,
            interaction_only=False
        )
        # one hot encode categorical input dimensions
        self.one_hot_encoder_adapter = CategoricalToOneHotEncodedHypergridAdapter(
            adaptee=self.polynomial_features_adapter,
            merge_all_categorical_dimensions=True,
            drop='first'
        )

        self.input_dimension_names = [dimension.name for dimension in self.input_space.dimensions]
        self._projected_input_dimension_names = [dimension.name for dimension in self.one_hot_encoder_adapter.dimensions]
        self.continuous_dimension_names = [dimension.name for dimension in self.one_hot_encoder_adapter.target.dimensions
                                           if isinstance(dimension, ContinuousDimension)]
        self.output_dimension_names = [dimension.name for dimension in self.output_space.dimensions]

        self.base_regressor_ = None
        self.random_forest_regressor_ = None
        self.x_is_design_matrix = False

        self.random_forest_kwargs = None
        self.root_model_kwargs = None
        self.detected_feature_indices_ = None
        self.screening_root_model_coef_ = None
        self.fit_X_ = None
        self.partial_hat_matrix_ = None
        self.base_regressor_standard_error_ = None
        self.dof_ = None
        self.variance_estimate_ = None
        self.root_model_gradient_coef_ = None
        self.polynomial_features_powers_ = None

        self.categorical_zero_cols_idx_to_delete_ = None

        self._trained = False
        self.last_refit_iteration_number = None
示例#9
0
    def __init__(
            self,
            model_config: RegressionEnhancedRandomForestRegressionModelConfig,
            input_space: Hypergrid,
            output_space: Hypergrid,
            logger=None
    ):
        if logger is None:
            logger = create_logger("RegressionEnhancedRandomForestRegressionModel")
        self.logger = logger

        assert RegressionEnhancedRandomForestRegressionModelConfig.contains(model_config)
        RegressionModel.__init__(
            self,
            model_type=type(self),
            model_config=model_config,
            input_space=input_space,
            output_space=output_space
        )

        self.input_dimension_names = [dimension.name for dimension in self.input_space.dimensions]
        self.output_dimension_names = [dimension.name for dimension in self.output_space.dimensions]
        self._input_space_dimension_name_mappings = {
            dimension.name: Dimension.flatten_dimension_name(dimension.name)
            for dimension in self.input_space.dimensions
        }

        self._output_space_dimension_name_mappings = {
            dimension.name: Dimension.flatten_dimension_name(dimension.name)
            for dimension in self.output_space.dimensions
        }

        self.base_regressor_ = None
        self.base_regressor_config = dict()
        self.base_regressor_config = self.model_config.boosting_root_model_config
        if self.model_config.boosting_root_model_name == SklearnLassoRegressionModelConfig.__name__:
            self.base_regressor_ = linear_model.Lasso(
                alpha=self.base_regressor_config.alpha,
                fit_intercept=self.base_regressor_config.fit_intercept,
                normalize=self.base_regressor_config.normalize,
                precompute=self.base_regressor_config.precompute,
                copy_X=self.base_regressor_config.copy_x,
                max_iter=self.base_regressor_config.max_iter,
                tol=self.base_regressor_config.tol,
                warm_start=self.base_regressor_config.warm_start,
                positive=self.base_regressor_config.positive,
                random_state=self.base_regressor_config.random_state,
                selection=self.base_regressor_config.selection
            )
        elif self.model_config.boosting_root_model_name == SklearnRidgeRegressionModelConfig.__name__:
            self.base_regressor_ = linear_model.Ridge(
                alpha=self.base_regressor_config.alpha,
                fit_intercept=self.base_regressor_config.fit_intercept,
                normalize=self.base_regressor_config.normalize,
                copy_X=self.base_regressor_config.copy_x,
                max_iter=self.base_regressor_config.max_iter,
                tol=self.base_regressor_config.tol,
                random_state=self.base_regressor_config.random_state,
                solver=self.base_regressor_config.solver
            )
        else:
            self.logger('Boosting base model name "{0}" not supported currently.' \
                        .format(self.model_config.boosting_root_model_name))

        rf_config = self.model_config.random_forest_model_config
        self.random_forest_regressor_ = RandomForestRegressor(
            n_estimators=rf_config.n_estimators,
            criterion=rf_config.criterion,
            max_depth=rf_config.max_depth_value,
            min_samples_split=rf_config.min_samples_split,
            min_samples_leaf=rf_config.min_samples_leaf,
            min_weight_fraction_leaf=rf_config.min_weight_fraction_leaf,
            max_features=rf_config.max_features,
            max_leaf_nodes=rf_config.max_leaf_nodes_value,
            min_impurity_decrease=rf_config.min_impurity_decrease,
            bootstrap=rf_config.bootstrap,
            oob_score=rf_config.oob_score,
            n_jobs=rf_config.n_jobs,
            warm_start=rf_config.warm_start,
            ccp_alpha=rf_config.ccp_alpha,
            max_samples=rf_config.max_sample_value
        )

        # set up basis feature transform
        self.polynomial_features_transform_ = None
        if self.model_config.max_basis_function_degree > 1:
            self.polynomial_features_transform_ = \
                PolynomialFeatures(degree=self.model_config.max_basis_function_degree)

        self.random_forest_kwargs = None
        self.root_model_kwargs = None
        self.detected_feature_indices_ = None
        self.screening_root_model_coef_ = None
        self.fit_X_ = None
        self.partial_hat_matrix_ = None
        self.base_regressor_standard_error_ = None
        self.dof_ = None
        self.variance_estimate_ = None
        self.root_model_gradient_coef_ = None