def test_real(): """Tests for real scalars.""" assert params.real(1) == 1.0 assert params.real(-2.0) == -2.0 for arg in (True, False): with pytest.raises(InvalidParameterError): params.real(arg)
def shaded_line( self, positions: np.ndarray, values: List[np.ndarray], color_idx: int = 0, label: Optional[str] = None, quantile_width: float = 0.5, alpha: float = 0.2, show_extrema: bool = True, **kwargs, ): """Draw a line plot with shaded quantiles. Parameters: positions: 1-d array of point locations on the horizontal axis values: list of arrays, each one containing all of the values at a given location. len(values) must equal len(positions) color_idx: color index label: line label quantile_width: fraction of the range to shade. For the default value, 0.5, shade from the 25th percentile to the 75th percentile. alpha: shading alpha level show_extrema: whether or not to draw dashed lines at the best/worst point """ positions = params.real_vector(positions) values = params.tuple_(values, params.real_vector, arity=len(positions)) color_idx = params.integer(color_idx, from_=0, below=len(self.configuration.color_set)) quantile_width = params.real(quantile_width, from_=0, to=1) alpha = params.real(alpha, from_=0, to=1) color = self.configuration.color(color_idx) lower_bound = 0.5 - quantile_width / 2.0 upper_bound = 0.5 + quantile_width / 2.0 median = [np.median(samples) for samples in values] lower_shading = [np.quantile(samples, lower_bound) for samples in values] upper_shading = [np.quantile(samples, upper_bound) for samples in values] self.ax.plot(positions, median, linestyle="-", color=color, label=label, **kwargs) self.ax.fill_between( positions, lower_shading, upper_shading, color=color, alpha=alpha, **kwargs, ) if show_extrema: min_val = [np.min(samples) for samples in values] max_val = [np.max(samples) for samples in values] self.ax.plot(positions, min_val, linestyle="--", color=color, **kwargs) self.ax.plot(positions, max_val, linestyle="--", color=color, **kwargs)
def __init__( self, rng: int = None, num_seeds: int = 1, resolution: int = 64, max_relative_jump: float = 1.0, dimensions_varied: Union[str, float, int] = "all", max_iters: Optional[int] = None, max_evals: Optional[int] = None, **kwargs, ): """Initialize state. Parameters: rng: pseudo-random number generator seed num_seeds: the number of starting points, and the number of points chosen at the end of each iteration resolution: the number of points to sample along a single dimension for a single seed max_relative_jump: the maximum relative step size along a single dimension. If a given dimension has length `L` and a seed has value `x` along that dimension, then the candidates are `resolution` linearly spaced points from the range [x - max_relative_jump * L, x + max_relative_jump * L] (clipped by the bounds). `max_relative_jump must be on (0, 1]. For a value of 1, the entire range is always considered. dimensions_varied: how many randomly selected dimensions to explore with each step. 'all' indicates all dimensions. An integer directly specifies the number of dimensions. A float on (0, 1) indicates the fractional number of the total. max_iters: the maximum number of iterations max_evals: the maximum number of function evaluations (this is a soft maximum: once it is reached then the current iteration finishes) TODO: add tolerance stopping conditions """ super().__init__(rng=rng, **kwargs) self._num_seeds = params.integer(num_seeds, from_=1) self._resolution = params.integer(resolution, from_=2) self._max_relative_jump = params.real(max_relative_jump, above=0.0, to=1.0) self._dimensions_varied = params.any_( dimensions_varied, lambda arg: params.integer(arg, above=0), lambda arg: params.real(arg, above=0.0, below=1.0), lambda arg: params.enumeration(arg, {"all"}), ) self._max_iters = params.optional_( max_iters, lambda arg: params.integer(arg, from_=1)) self._max_evals = params.optional_( max_evals, lambda arg: params.integer(arg, from_=1)) if self._max_iters is None and self._max_evals is None: raise InvalidParameterError( "at least one stopping condition defined", "all Nones")
def __init__(self, mean=0.0, stddev=1.0, **kwargs): """Initialize state. Parameters: mean: mean of the normal distribution stddev: standard deviation of the normal distribution All parameters from base class 'Noise' initializer """ super().__init__(**kwargs) self._mean = params.real(mean) self._stddev = params.real(stddev, above=0)
def __init__(self, rng: int = None, maxiter: int = 1000, local_search_options: Optional[dict] = None, initial_temp: float = 5230.0, restart_temp_ratio: float = 2e-05, visit: float = 2.62, accept: float = -5.0, maxfun: int = 1e7, no_local_search: bool = False, **kwargs): """Initialize state. Scipy-specific parameters are passed through. Parameters: rng: integer seed. Will be used to generate a new seed each time the optimizer is run. maxiter: The maximum number of iterations, where one iteration is one round of simulated annealing followed by one use of a local optimizer to find a local min. local_search_options: an optional kwargs dictionary to pass to the local minimizer, scipy.optimize.minimize: https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html If no args are passed then the minimizer defaults to the L-BFGS-B method, since the problems being studied have bounds but no constraints. initial_temp: The initial temperature, use higher values to facilitates a wider search and more easily escape local minima. restart_temp_ratio: The temperature, relative to the initial temperature, at which the annealing process restarts. visit: a parameter of the visiting distribution. A higher value corresponds to a heavier tail and longer potential jumps. accept: a parameter of the acceptance distribution. A lower value means that uphill moves are less likely to be accepted. maxfun: soft limit for the total number of function evaluation calls that may be exceeded only during a local optimization step if the quota is reached therein. no_local_search: if true then the local search step is skipped, and this reduces to a generalized simulated annealing optimizer. """ super().__init__(rng=rng, **kwargs) self._maxiter = params.integer(maxiter, from_=1) self._local_search_options = local_search_options or { } # TODO: verify dictionaries self._initial_temp = params.real(initial_temp, above=0.01, to=5e4) self._restart_temp_ratio = params.real(restart_temp_ratio, above=0.0, below=1.0) self._visit = params.real(visit, above=0.0, to=3.0) self._accept = params.real(accept, above=-1e4, to=-5.0) self._maxfun = params.integer(maxfun, from_=1) self._no_local_search = params.boolean(no_local_search)
def __init__( self, visualization_type: str = "points", rectify: Union[float, bool] = False, **kwargs ): """Initialize generalized function plot. Parameters: visualization_type: how to visualize generalized functions. Either single value or list of appropriate length. Possible values: "points" (default), "box-whisker", "shaded-line" rectify: whether and by how much each curves' values will be horizontally displaced to visually disentangle markers from different curves at the same location. True indicates automatic displacement, False indicates no displacement. If not specified, horizontal axis positions are not modified (default). If the horizontal axis scaling is logarithmic, the rectification factor is applied in log-space. Examples: # show three curves with automatic horizontal rectification __init__(visualization_type=("points", "points", "box-whisker"), rectify=True) """ super().__init__(**kwargs) # parameter validation enum_f = lambda arg: params.enumeration(arg, {"points", "box-whisker", "shaded-line"}) self._visualization_type = params.any_( visualization_type, enum_f, lambda arg: params.tuple_(arg, enum_f) ) # arity can only be tested in evaluate() self._rectify = params.any_(rectify, lambda arg: params.real(arg, from_=0), params.boolean)
def __init__(self, internal_hp_optimization: bool = True, kernel: Optional[Kernel] = None, alpha: Union[float, Sequence] = 1e-5, optimizer="fmin_l_bfgs_b", n_restarts_optimizer=0, normalize_y=False, random_state: int = None, **kwargs): """Initialize state. sklearn-specific parameters are passed through to the implementation. Parameters: internal_hp_optimization: if True, hyperparameters are optimized "internally" by the Gaussian process, that is, scikit-learn optimizes hyperparameters and for smlb the learner has no hyperparameters; if False, hyperparameters are optimized by smlb (and scikit-learn does not optimize any hyperparameters) kernel: scikit-learn kernel; if None, a single Gaussian kernel is used as default alpha: regularization constant (scalar or vector); added as-is to kernel matrix diagonal. Equivalent to adding a "WhiteKernel"; the default is the corresponding value from scikit-learn's WhiteKernel, and different from scikit-learn's GaussianProcessRegressor. optimizer: hyperparameter optimization algorithm; used only if internal_hp_optimization is True n_restarts_optimizer: number of times optimizer is restarted; only used if internal_hp_optimization is True normalize_y: whether to subtract the mean of the labels random_state: integer seed See skl.gaussian_process.GaussianProcessRegressor parameters. """ super().__init__(**kwargs) internal_hp_optimization = params.boolean(internal_hp_optimization) kernel = params.any_(kernel, lambda arg: params.instance(arg, Kernel), params.none) # incomplete check for alpha as dimension becomes known only at fitting time alpha = params.any_( alpha, lambda arg: params.real(arg, from_=0), lambda arg: params.real_vector(arg, domain=[0, np.inf]), ) # todo: check optimizer, requires params.union (of string and callable) and params.function normalize_y = params.boolean(normalize_y) random_state = params.integer(random_state) if kernel is None: kernel = skl.gaussian_process.kernels.RBF( ) + skl.gaussian_process.kernels.WhiteKernel() assert internal_hp_optimization is True # external HP optimization not yet supported self._model = skl.gaussian_process.GaussianProcessRegressor( kernel=kernel, alpha=alpha, optimizer=optimizer, n_restarts_optimizer=n_restarts_optimizer, normalize_y=normalize_y, random_state=random_state, )
def __init__( self, optimizer_names: Optional[List[str]] = None, log_scale: bool = False, quantile_width: float = 0.5, show_extrama: bool = True, **kwargs, ): self._optimizer_names = params.optional_( optimizer_names, lambda arg: params.sequence(arg, type_=str) ) self._show_extrema = params.boolean(show_extrama) log_scale = params.boolean(log_scale) scale = "log" if log_scale else "linear" self._quantile_width = params.real(quantile_width, from_=0, to=1) kwargs["axes_scales"] = kwargs.get("axes_scales", (scale, "linear")) kwargs["axes_labels"] = kwargs.get( "axes_labels", ("function evaluations", "best score", None, None) ) kwargs["rectify"] = False kwargs["visualization_type"] = "shaded-line" super().__init__(**kwargs)
def __init__(self, target: float, goal: str = "maximize", **kwargs): super().__init__(**kwargs) self._target = params.real(target) goal = params.enumeration(goal, {"maximize", "minimize"}) if goal == "maximize": self._direction = 1 elif goal == "minimize": self._direction = -1
def __init__(self, value: float = 0, **kwargs): """Initialize state. Parameters: value: constant that will be returned All parameters from base class 'Noise' initializer """ super().__init__(**kwargs) self._value = params.real(value)
def __init__(self, bias_correction: float = 0, **kwargs): """Initialize metric. Parameters: bias_correction: no correction by default. if a positive value d is given, division is by n-d. Bessel's correction (d=1) is unbiased for variance estimators, but not for standard deviation estimators. While there is no value that works across all distributions, d=1.5 is a reasonably good correction. """ self._bias_correction = params.real(bias_correction, from_=0) super().__init__(**kwargs)
def __init__( self, fits: bool = True, fit_lambda: float = 1e-7, fit_weights: Optional[str] = None, base=10, **kwargs, ): """Initialize learning curve plot. Parameters: fits: if True, show estimated asymptotic fits fit_lambda: regularization strength for asymptotic fits; defaults to 1e-7 fit_weights: if and how to weight fits; one of None: no weighting, "variance": weigh by variance for each training set size base: base for logarithmic plotting All parameters from base classes, in particular GeneralizedFunctionPlot and Plot. """ # set learning curve-specific arguments if not explicitly set kwargs["axes_scales"] = kwargs.get("axes_scales", ("log", "log")) kwargs["axes_labels"] = kwargs.get( "axes_labels", ("training set size", "evaluation metric", None, None) ) super().__init__(**kwargs) # parameters self._fits = params.boolean(fits) self._fit_lambda = params.real(fit_lambda, from_=0) self._fit_weights = params.any_( fit_weights, lambda arg: params.enumeration(arg, {"variance"}), params.none ) self._base = params.real(base, from_=2) self._logf = lambda x: np.log(x) / np.log(self._base) self._powf = lambda x: np.power(self._base, x)
def evaluate(self, results, **kwargs): """Evaluate learning curve plot. Parameters: results: sequence of curve data, where each curve datum is a sequence of tuples (n,fx) of training set size n (positive integer) and performance values fx (sequence of real numbers). """ # parameter validation tuple_testf = lambda arg: params.tuple_(arg, lambda arg: params.real( arg, above=0), params.real_vector, arity=2) curve_testf = lambda arg: params.tuple_(arg, tuple_testf) results = params.tuple_(results, curve_testf) super().evaluate(results=results, **kwargs) ypowf = self._powf if self.axes_scales[1] == "log" else lambda arg: arg # asymptotic estimates if self._fits: asymptotic_fits = tuple( self.asymptotic_fit(fdata) for fdata in results) all_sizes = np.unique( [entry[0] for fdata in results for entry in fdata]) sizes = np.linspace(start=np.min(all_sizes), stop=np.max(all_sizes), num=25) self._fit_data = np.empty(shape=(len(results), 2, len(sizes))) for i, (offset, slope, _, _) in enumerate(asymptotic_fits): yvalues = [ ypowf(offset + slope * self._logf(n)) for n in sizes ] self._fit_data[i, 0, :] = sizes self._fit_data[i, 1, :] = yvalues self.add_auxiliary( "asymptotic_fits", tuple({ "offset": offset, "slope": slope, "residuals": residuals, "variance": variance, } for (offset, slope, residuals, variance) in asymptotic_fits), )
def _calculate_li_above(mean, stddev, target): """Calculate the likelihood of improvement, assuming the goal is to exceed the target. Parameters: mean: mean of the normal distribution stddev: standard deviation of the normal distribution target: value to exceed """ stddev = params.real(stddev, from_=0.0) if stddev == 0: if mean > target: return 1.0 else: return 0.0 return 0.5 * (1 - erf((target - mean) / (stddev * math.sqrt(2))))
def evaluate(self, results, **kwargs): """Evaluate optimization trajectory plot. Parameters: results: sequence of curve data, where each curve datum is a sequence of tuples (index, scores) of function evaluation number (positive integer) and best scores found after that many evaluations (sequence of real numbers). """ tuple_testf = lambda arg: params.tuple_( arg, lambda arg: params.real(arg, above=0), params.real_vector, arity=2 ) curve_testf = lambda arg: params.tuple_(arg, tuple_testf) results = params.tuple_(results, curve_testf) super().evaluate(results=results, **kwargs)
def __init__(self, D, r0, a, domain=(0, np.inf), **args): """Initialize state. Parameters: D: potential parameter determining well depth -D r0: potential parameter determining location r0 of minimum a: potential parameter, where 1/a is proportional to well width domain: domain of dataset; defaults to unit [0,inf) on which the potential is defined All parameters from base class 'ComputedLabelsVectorSpaceData' initializer Raises: InvalidParameterError: on invalid parameter values """ self._d = params.real(D, above=0) self._r0 = params.real(r0, above=0) self._a = params.real(a, above=0) def morsef(r): """Evaluate Morse potential at a sequence of vectors r. Parameters: r: n x 1 matrix of n one-dimensional vectors Returns: vector of Morse potential values at r """ r = params.real_matrix(r, ncols=1) n = len(r) gamma = np.exp(-self._a * (r - self._r0)) v = self._d * (np.square(gamma) - 2 * gamma) return v.reshape(n) super().__init__(dimensions=1, function=morsef, domain=domain, **args)
def __init__( self, uncertainties: Optional[str] = None, loss: str = "ls", alpha: float = 0.9, learning_rate: float = 0.1, subsample: float = 1.0, n_estimators: int = 100, criterion: str = "mse", max_depth: int = 3, min_samples_split: Union[int, float] = 2, min_samples_leaf: Union[int, float] = 1, min_weight_fraction_leaf: float = 0.0, max_features: Union[int, float, str, None] = None, max_leaf_nodes: Optional[int] = None, min_impurity_decrease: float = 0.0, # min_impurity_split deprecated random_state: int = None, ccp_alpha: float = 0.0, init: Optional[Any] = None, validation_fraction: float = 0.1, n_iter_no_change: Optional[int] = None, tol: float = 0.0001, **kwargs, ): """Initialize state. sklearn-specific parameters are passed through to the implementation. Parameters: uncertainties: whether and how to compute predictive uncertainties; possible choices are None; by default, RandomForestRegressor does not return any predictive uncertainties; loss: loss function to optimize; valid values are "ls" (least squares), "lad" (least absolute deviation), "huber" (Huber's loss), "quantile" (quantile regression). Use alpha parameter for huber and quantile. alpha: quantile for "huber" and "quantile" loss functions learning_rate: value by which to shrink contribution of consecutive trees; trade-off with num_estimators subsample: fraction of samples for fitting base learners; if <1 results in Stochastic Gradient Boosting. reducing subsample reduces variance and increases bias. n_estimators: number of decision trees criterion: either Friedman improved score ("friedman_rmse"), variance reduction ("mse", mean squared error), or, mean absolute error ("mae") max_depth: maximum depth of a tree; default is 3 min_samples_split: minimum number of samples required to split an internal node; float numbers indicate a fraction of number of training samples min_samples_leaf: minimum number of training samples required in a leaf node float numbers indicate a fraction of number of training samples min_weight_fraction_leaf: minimum weighted fraction of weights required in a leaf node max_features: number of features considered when splitting; integers directly specify the number, floating point values specify which fraction of all features to use; "auto" uses all features, "sqrt" and "log2" use square root and binary logarithm of number of features max_leaf_nodes: maximum number of leaves a tree can have min_impurity_decrease: minimum impurity decrease required for splitting random_state: pseudo-random number generator seed ccp_alpha: complexity parameter for minimal cost-complexity pruning. init: estimator for initial predictions; can be 'zero' for constant zero predictions validation_fraction: fraction of training data to set aside for early stopping; only with n_iter_no_change n_iter_no_change: set to integer to stop after no improvement (beyond tol) for that many rounds tol: tolerance for early stopping; only improvements larger than tol are considered The sklearn.GradientBoostingRegressor parameters `oob_score`, `verbose`, `warm_start` are not considered. See skl.ensemble.ExtraTreesRegressor parameters. """ super().__init__(**kwargs) # validate parameters self._uncertainties = params.enumeration(uncertainties, {None}) loss = params.enumeration(loss, {"ls", "lad", "huber", "quantile"}) alpha = params.real(alpha, above=0, below=1) learning_rate = params.real(learning_rate, above=0, to=1) subsample = params.real(subsample, above=0, to=1) n_estimators = params.integer(n_estimators, from_=1) criterion = params.enumeration(criterion, {"friedman_rmse", "mse", "mae"}) max_depth = params.any_(max_depth, lambda arg: params.integer(arg, from_=1), params.none) min_samples_split = params.any_( min_samples_split, lambda arg: params.integer(arg, from_=2), lambda arg: params.real(arg, above=0.0, to=1.0), ) min_samples_leaf = params.any_( min_samples_leaf, lambda arg: params.integer(arg, from_=1), lambda arg: params.real(arg, above=0.0, to=1.0), ) min_weight_fraction_leaf = params.real(min_weight_fraction_leaf, from_=0.0, to=1.0) max_features = params.any_( max_features, lambda arg: params.integer(arg, above=0), lambda arg: params.real(arg, above=0.0, to=1.0), lambda arg: params.enumeration(arg, {"auto", "sqrt", "log2"}), params.none, ) max_leaf_nodes = params.any_(max_leaf_nodes, lambda arg: params.integer(arg, from_=1), params.none) min_impurity_decrease = params.real(min_impurity_decrease, from_=0.0) random_state = params.integer(random_state) ccp_alpha = params.real(ccp_alpha, from_=0.0) # no validation for init (no class signature validator) validation_fraction = params.real(validation_fraction, above=0, below=1) n_iter_no_change = params.any_( n_iter_no_change, lambda arg: params.integer(arg, from_=0), params.none) tol = params.real(tol, from_=0) self._model = skl.ensemble.GradientBoostingRegressor( loss=loss, alpha=alpha, learning_rate=learning_rate, subsample=subsample, n_estimators=n_estimators, criterion=criterion, max_depth=max_depth, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, min_weight_fraction_leaf=min_weight_fraction_leaf, max_features=max_features, max_leaf_nodes=max_leaf_nodes, min_impurity_decrease=min_impurity_decrease, random_state=random_state, ccp_alpha=ccp_alpha, init=init, validation_fraction=validation_fraction, n_iter_no_change=n_iter_no_change, tol=tol, )
def __init__( self, num_trees: int = -1, use_jackknife: bool = True, bias_learner: Optional[BaseLoloLearner] = None, leaf_learner: Optional[BaseLoloLearner] = None, subset_strategy: Union[str, int, float] = "auto", min_leaf_instances: int = 1, max_depth: int = 2 ** 30, uncertainty_calibration: bool = False, randomize_pivot_location: bool = False, # randomly_rotate_features: bool = False, currently in develop branch **kwargs ): """Initialize random forest model. See lolo Scala source code for initialization parameters: https://github.com/CitrineInformatics/lolo/blob/develop/src/main/scala/io/citrine/lolo/learners/RandomForest.scala When using `uncertainty_calibration=False` (the default), the number of trees `num_trees` should be set to a multiple of the number n of training samples, `num_trees = 4 * n` or higher. When using `uncertainty_calibration=True`, `num_trees = 64` is sufficient. Parameters: num_trees: number of trees in the forest; -1 uses number of training samples use_jackknife: whether to use jackknife-based variance estimates bias_learner: algorithm used to model bias leaf_learner: algorithm used at each leaf of the random forest subset_strategy: strategy to determine number of features used at each split "auto": use the default for lolo (all features for regression, sqrt for classification) "log2": use the base 2 log of the number of features "sqrt": use the square root of the number of features integer: set the number of features explicitly float: use a certain fraction of the features min_leaf_instances: minimum number of features used at each leaf max_depth: maximum depth of decision trees uncertainty_calibration: whether to empirically re-calibrate predicted uncertainties based on out-of-bag residuals randomize_pivot_location: whether to draw pivots randomly or always select the midpoint randomly_rotate_features: whether to rotate real scalar fetures for each tree """ super().__init__(**kwargs) # validate parameters num_trees = params.any_( num_trees, lambda i: params.integer(i, above=0), lambda i: params.integer(i, from_=-1, to=-1), ) use_jackknife = params.boolean(use_jackknife) bias_learner = params.any_( bias_learner, lambda arg: params.instance(arg, BaseLoloLearner), params.none ) leaf_learner = params.any_( leaf_learner, lambda arg: params.instance(arg, BaseLoloLearner), params.none ) subset_strategy = params.any_( subset_strategy, lambda s: params.enumeration(s, {"auto", "log2", "sqrt"}), lambda s: params.integer(s, above=0), lambda s: params.real(s, above=0), ) min_leaf_instances = params.integer(min_leaf_instances, above=0) # the default 2**30 works for 32 bit or larger architectures max_depth = params.integer(max_depth, above=0) uncertainty_calibration = params.boolean(uncertainty_calibration) randomize_pivot_location = params.boolean(randomize_pivot_location) # randomly_rotate_features = params.boolean(randomly_rotate_features) # set up model try: self._model = RandomForestRegressor( num_trees=num_trees, use_jackknife=use_jackknife, bias_learner=bias_learner, leaf_learner=leaf_learner, subset_strategy=subset_strategy, min_leaf_instances=min_leaf_instances, max_depth=max_depth, uncertainty_calibration=uncertainty_calibration, randomize_pivot_location=randomize_pivot_location, # randomly_rotate_features=randomly_rotate_features, ) except Py4JJavaError as e: raise BenchmarkError("instantiating lolo model failed") from e self._with_uncertainties = use_jackknife # otherwise, deviations will be zero
def __init__(self, rng: int = None, strategy: str = "best1bin", maxiter: int = 1000, popsize: int = 15, tol: float = 0.01, mutation=(0.5, 1), recombination: float = 0.7, **kwargs): """Initialize state. Scipy-specific parameters are passed through. Parameters: rng: integer seed. Will be used to generate a new seed each time the optimizer is run. strategy: The differential evolution strategy to use. See documentation for complete list and explanations. maxiter: The maximum number of generations over which the entire population is evolved. popsize: A multiplier for setting the total population size. tol: Relative tolerance for convergence. mutation: The mutation constant. Either a number between 0 and 2 or a tuple (min, max) in which case the mutation constant is randomly selected uniformly from between min and max with each generation. recombination: The recombination constant. Must be between 0 and 1. """ super().__init__(rng=rng, **kwargs) allowed_strategies = { "best1bin", "best1exp", "rand1exp", "randtobest1exp", "currenttobest1exp", "best2exp", "rand2exp", "randtobest1bin", "currenttobest1bin", "best2bin", "rand2bin", "rand1bin", } self._strategy = params.enumeration(strategy, allowed_strategies) self._maxiter = params.integer(maxiter, from_=1) self._popsize = params.integer(popsize, from_=1) self._tol = params.real(tol, above=0.0) def test_mutation_range(arg, low=0.0): return params.real(arg, from_=low, to=2.0) self._mutation = params.any_( mutation, test_mutation_range, lambda pair: params.tuple_( pair, test_mutation_range, lambda arg2: test_mutation_range(arg2, low=pair[0]), arity=2, ), ) self._recombination = params.real(recombination, from_=0.0, to=1.0)
def test_mutation_range(arg, low=0.0): return params.real(arg, from_=low, to=2.0)
def __init__( self, rng: int = None, uncertainties: Optional[str] = None, n_estimators: int = 100, criterion: str = "mse", max_depth: Optional[int] = None, min_samples_split: Union[int, float] = 2, min_samples_leaf: Union[int, float] = 1, min_weight_fraction_leaf: float = 0.0, max_features: Union[int, float, str, None] = "auto", max_leaf_nodes: Optional[int] = None, min_impurity_decrease: float = 0.0, # min_impurity_split deprecated bootstrap: bool = True, n_jobs: Optional[int] = None, ccp_alpha: float = 0.0, max_samples: Optional[Union[int, float]] = None, **kwargs, ): """Initialize state. sklearn-specific parameters are passed through to the implementation. Parameters: uncertainties: whether and how to compute predictive uncertainties; choices are None; by default, RandomForestRegressor does not return predictive uncertainties; "naive"; uses the ensembles standard deviation n_estimators: number of decision trees criterion: either variance reduction ("mse", mean squared error), or, mean absolute error ("mae") max_depth: maximum depth of a tree; default is restricted only by min_samples_leaf min_samples_split: minimum number of samples required to split an internal node; float numbers indicate a fraction of number of training samples min_samples_leaf: minimum number of training samples required in a leaf node float numbers indicate a fraction of number of training samples min_weight_fraction_leaf: minimum weighted fraction of weights required in a leaf node max_features: number of features considered when splitting; integers directly specify the number, floating point values specify which fraction of all features to use; "auto" uses all features, "sqrt" and "log2" use square root and binary logarithm of number of features max_leaf_nodes: maximum number of leaves a tree can have min_impurity_decrease: minimum impurity decrease required for splitting bootstrap: if False, the whole dataset is used to build trees n_jobs: number of parallel jobs; -1 to use all available processors; None means 1 ccp_alpha: complexity parameter for minimal cost-complexity pruning. max_samples: number of input samples to draw during bootstrap; integers directly specify the number, floating point values specify which fraction of samples to use; all by default The sklearn.RandomForestRegressor parameters `oob_score`, `verbose`, `warm_restart` are not considered. See skl.ensemble.ExtraTreesRegressor parameters. """ super().__init__(rng=rng, **kwargs) # validate parameters self._uncertainties = params.enumeration(uncertainties, {None, "naive"}) n_estimators = params.integer(n_estimators, from_=1) criterion = params.enumeration(criterion, {"mse", "mae"}) max_depth = params.any_(max_depth, lambda arg: params.integer(arg, from_=1), params.none) min_samples_split = params.any_( min_samples_split, lambda arg: params.integer(arg, from_=2), lambda arg: params.real(arg, above=0.0, to=1.0), ) min_samples_leaf = params.any_( min_samples_leaf, lambda arg: params.integer(arg, from_=1), lambda arg: params.real(arg, above=0.0, to=1.0), ) min_weight_fraction_leaf = params.real(min_weight_fraction_leaf, from_=0.0, to=1.0) max_features = params.any_( max_features, lambda arg: params.integer(arg, above=0), lambda arg: params.real(arg, above=0.0, to=1.0), lambda arg: params.enumeration(arg, {"auto", "sqrt", "log2"}), params.none, ) max_leaf_nodes = params.any_( max_leaf_nodes, lambda arg: params.integer(arg, from_=1), params.none ) min_impurity_decrease = params.real(min_impurity_decrease, from_=0.0) bootstrap = params.boolean(bootstrap) n_jobs = params.any_( n_jobs, lambda arg: params.integer(arg, from_=-1, to=-1), lambda arg: params.integer(arg, from_=1), params.none, ) ccp_alpha = params.real(ccp_alpha, from_=0.0) max_samples = params.any_( max_samples, lambda arg: params.integer(arg, from_=1), lambda arg: params.real(arg, from_=0.0, to=1.0), params.none, ) self._model = ExtraTreesRegressor( n_estimators=n_estimators, criterion=criterion, max_depth=max_depth, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, min_weight_fraction_leaf=min_weight_fraction_leaf, max_features=max_features, max_leaf_nodes=max_leaf_nodes, min_impurity_decrease=min_impurity_decrease, bootstrap=bootstrap, n_jobs=n_jobs, ccp_alpha=ccp_alpha, max_samples=max_samples, )