def __init__( self, rng: int = None, num_seeds: int = 1, resolution: int = 64, max_relative_jump: float = 1.0, dimensions_varied: Union[str, float, int] = "all", max_iters: Optional[int] = None, max_evals: Optional[int] = None, **kwargs, ): """Initialize state. Parameters: rng: pseudo-random number generator seed num_seeds: the number of starting points, and the number of points chosen at the end of each iteration resolution: the number of points to sample along a single dimension for a single seed max_relative_jump: the maximum relative step size along a single dimension. If a given dimension has length `L` and a seed has value `x` along that dimension, then the candidates are `resolution` linearly spaced points from the range [x - max_relative_jump * L, x + max_relative_jump * L] (clipped by the bounds). `max_relative_jump must be on (0, 1]. For a value of 1, the entire range is always considered. dimensions_varied: how many randomly selected dimensions to explore with each step. 'all' indicates all dimensions. An integer directly specifies the number of dimensions. A float on (0, 1) indicates the fractional number of the total. max_iters: the maximum number of iterations max_evals: the maximum number of function evaluations (this is a soft maximum: once it is reached then the current iteration finishes) TODO: add tolerance stopping conditions """ super().__init__(rng=rng, **kwargs) self._num_seeds = params.integer(num_seeds, from_=1) self._resolution = params.integer(resolution, from_=2) self._max_relative_jump = params.real(max_relative_jump, above=0.0, to=1.0) self._dimensions_varied = params.any_( dimensions_varied, lambda arg: params.integer(arg, above=0), lambda arg: params.real(arg, above=0.0, below=1.0), lambda arg: params.enumeration(arg, {"all"}), ) self._max_iters = params.optional_( max_iters, lambda arg: params.integer(arg, from_=1)) self._max_evals = params.optional_( max_evals, lambda arg: params.integer(arg, from_=1)) if self._max_iters is None and self._max_evals is None: raise InvalidParameterError( "at least one stopping condition defined", "all Nones")
def __init__(self, font_size: int = 11, color_set: int = 1, **kwargs): """Initialize plot configuration. Parameters: font_size: base font size in absolute points color_set: color scheme """ super().__init__(**kwargs) self._font_size = params.integer(font_size, above=0) self._color_set = self.PREDEFINED_COLORSETS[params.integer( color_set, from_=0, below=len(self.PREDEFINED_COLORSETS))]
def __init__(self, rng: int = None, maxiter: int = 1000, local_search_options: Optional[dict] = None, initial_temp: float = 5230.0, restart_temp_ratio: float = 2e-05, visit: float = 2.62, accept: float = -5.0, maxfun: int = 1e7, no_local_search: bool = False, **kwargs): """Initialize state. Scipy-specific parameters are passed through. Parameters: rng: integer seed. Will be used to generate a new seed each time the optimizer is run. maxiter: The maximum number of iterations, where one iteration is one round of simulated annealing followed by one use of a local optimizer to find a local min. local_search_options: an optional kwargs dictionary to pass to the local minimizer, scipy.optimize.minimize: https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html If no args are passed then the minimizer defaults to the L-BFGS-B method, since the problems being studied have bounds but no constraints. initial_temp: The initial temperature, use higher values to facilitates a wider search and more easily escape local minima. restart_temp_ratio: The temperature, relative to the initial temperature, at which the annealing process restarts. visit: a parameter of the visiting distribution. A higher value corresponds to a heavier tail and longer potential jumps. accept: a parameter of the acceptance distribution. A lower value means that uphill moves are less likely to be accepted. maxfun: soft limit for the total number of function evaluation calls that may be exceeded only during a local optimization step if the quota is reached therein. no_local_search: if true then the local search step is skipped, and this reduces to a generalized simulated annealing optimizer. """ super().__init__(rng=rng, **kwargs) self._maxiter = params.integer(maxiter, from_=1) self._local_search_options = local_search_options or { } # TODO: verify dictionaries self._initial_temp = params.real(initial_temp, above=0.01, to=5e4) self._restart_temp_ratio = params.real(restart_temp_ratio, above=0.0, below=1.0) self._visit = params.real(visit, above=0.0, to=3.0) self._accept = params.real(accept, above=-1e4, to=-5.0) self._maxfun = params.integer(maxfun, from_=1) self._no_local_search = params.boolean(no_local_search)
def next_grid_size(self, data: VectorSpaceData, n: int): r"""Number of samples for smallest evenly-spaced grid with at least n vertices. \[ k = \ceil \sqrt[d](n) \rceil , \] where $d$ is the dimensionality of the vector space. Parameters: data: sampled dataset n: number of samples the grid must contain Returns: smallest number of samples per dimension for an evenly-spaced grid that has at least n points """ n = params.integer(n, above=0) d = data.dimensions # fails for n = 3125 due to rounding error: # int(math.ceil(math.pow(n, 1./self.dimensions))) # works, but adds dependency: # k = decimal.Decimal(n) ** ( decimal.Decimal(1) / decimal.Decimal(self.dimensions) ) # return int( k.to_integral_exact(rounding=decimal.ROUND_CEILING) ) k = int(np.floor(np.power(float(n), 1.0 / d))) return k if k ** d >= n else k + 1
def __init__(self, failmode, num_samples: int): """Initialize failure handler. Parameters: failmode: how to handle failed descriptor calculations, either due to rejected SMILES encodings or failing descriptor code. Possible values: "raise" [default]: raise a Benchmarexception "drop": drop the sample. Returned Data will have fewer samples ("mask", mask): where `mask` is a NumPy array with dtype bool whose entries will be set to False for failures ("index", index): where `index` is an empty list to which the indices of failed entries will be appended num_samples: number of samples that are transformed """ self.num_samples = params.integer(num_samples, from_=0) self.failmode = self.failmode(failmode) if is_sequence(self.failmode) and self.failmode[0] == "mask": self.failmode = "mask" if len(failmode[1]) != self.num_samples: raise InvalidParameterError( "failure mode mask length of {self.num_samples}", len(self.mask)) self.mask = failmode[1] self.mask.fill(False) if is_sequence(self.failmode) and self.failmode[0] == "index": self.failmode = "index" self.index = failmode[1] self.failures = [] # list of indices of failed samples
def __init__(self, dimensions: int, function: Optional[Callable[[np.ndarray], Sequence[L]]] = None, domain: Optional[Sequence[Tuple[float, float]]] = None, **kwargs): """Initialize vector space data. If no function is specified, data are unlabeled. If a domain is specified, samples must be within that domain. Parameters: dimensions: dimensionality of vector space; positive finite integer function: a function that accepts a real matrix (vectors are rows) and returns a corresponding sequence of labels. If not specified, Data are unlabeled. domain: domain in the form of a hypercube, if specified; given as a sequence of intervals [a,b], where a <= b. If only a single interval is specified it is used for all dimensions. Raises: InvalidParameterError for invalid arguments. """ self._dimensions = params.integer(dimensions, above=0) self._function = params.optional_( function, lambda arg: params.callable(arg, num_pos_or_kw=1)) self._domain = params.optional_( domain, lambda arg: params.hypercube_domain(arg, self._dimensions)) super().__init__(*kwargs)
def box_whisker(self, positions, values, color=0, widths=0.5, **kwargs): """Draw box-whisker plots. Parameter: positions: where to place plots on horizontal axis values: samples for each location color: color index widths: widths of boxes """ positions = params.real_vector(positions) point_set_f = lambda arg: params.real_vector(arg) values = params.tuple_(values, params.real_vector, arity=len(positions)) color = params.integer(color, from_=0, below=len(self.configuration.color_set)) widths = params.real_vector(widths, dimensions=len(positions), domain=(0, 999)) color = self.configuration.color(color) self.ax.boxplot( values, positions=positions, whis=(0, 100), bootstrap=None, widths=widths, notch=False, showmeans=True, boxprops={"color": color}, whiskerprops={"color": color}, capprops={"color": color}, meanprops={"marker": "*", "markerfacecolor": color, "markeredgecolor": color}, medianprops={"color": color}, manage_ticks=False, **kwargs, )
def __init__(self, internal_hp_optimization: bool = True, kernel: Optional[Kernel] = None, alpha: Union[float, Sequence] = 1e-5, optimizer="fmin_l_bfgs_b", n_restarts_optimizer=0, normalize_y=False, random_state: int = None, **kwargs): """Initialize state. sklearn-specific parameters are passed through to the implementation. Parameters: internal_hp_optimization: if True, hyperparameters are optimized "internally" by the Gaussian process, that is, scikit-learn optimizes hyperparameters and for smlb the learner has no hyperparameters; if False, hyperparameters are optimized by smlb (and scikit-learn does not optimize any hyperparameters) kernel: scikit-learn kernel; if None, a single Gaussian kernel is used as default alpha: regularization constant (scalar or vector); added as-is to kernel matrix diagonal. Equivalent to adding a "WhiteKernel"; the default is the corresponding value from scikit-learn's WhiteKernel, and different from scikit-learn's GaussianProcessRegressor. optimizer: hyperparameter optimization algorithm; used only if internal_hp_optimization is True n_restarts_optimizer: number of times optimizer is restarted; only used if internal_hp_optimization is True normalize_y: whether to subtract the mean of the labels random_state: integer seed See skl.gaussian_process.GaussianProcessRegressor parameters. """ super().__init__(**kwargs) internal_hp_optimization = params.boolean(internal_hp_optimization) kernel = params.any_(kernel, lambda arg: params.instance(arg, Kernel), params.none) # incomplete check for alpha as dimension becomes known only at fitting time alpha = params.any_( alpha, lambda arg: params.real(arg, from_=0), lambda arg: params.real_vector(arg, domain=[0, np.inf]), ) # todo: check optimizer, requires params.union (of string and callable) and params.function normalize_y = params.boolean(normalize_y) random_state = params.integer(random_state) if kernel is None: kernel = skl.gaussian_process.kernels.RBF( ) + skl.gaussian_process.kernels.WhiteKernel() assert internal_hp_optimization is True # external HP optimization not yet supported self._model = skl.gaussian_process.GaussianProcessRegressor( kernel=kernel, alpha=alpha, optimizer=optimizer, n_restarts_optimizer=n_restarts_optimizer, normalize_y=normalize_y, random_state=random_state, )
def __init__(self, seed): """Initializes state of pseudo-random number generator. Parameters: seed: key to initialize pseudo-random number generator """ seed = params.integer(seed, from_=0, to=2**32 - 1) self._random = np.random.RandomState(seed=seed)
def __init__(self, num_samples: int, domain: Optional[Any] = None, rng=None, **kwargs): super().__init__(rng=rng, **kwargs) self._num_samples = params.integer(num_samples, above=0) self._sampler = RandomVectorSampler(size=self._num_samples, domain=domain, rng=rng)
def _indices_testf(self, indices: Sequence[Any]): return params.optional_( indices, lambda arg: list( params.any_( # NumPy indexing expects a list arg, lambda arg: params.tuple_(arg, None, arity=0), # empty set lambda arg: params.tuple_( arg, lambda arg: params.integer( arg, from_=0, below=self.num_samples)), )), )
def shaded_line( self, positions: np.ndarray, values: List[np.ndarray], color_idx: int = 0, label: Optional[str] = None, quantile_width: float = 0.5, alpha: float = 0.2, show_extrema: bool = True, **kwargs, ): """Draw a line plot with shaded quantiles. Parameters: positions: 1-d array of point locations on the horizontal axis values: list of arrays, each one containing all of the values at a given location. len(values) must equal len(positions) color_idx: color index label: line label quantile_width: fraction of the range to shade. For the default value, 0.5, shade from the 25th percentile to the 75th percentile. alpha: shading alpha level show_extrema: whether or not to draw dashed lines at the best/worst point """ positions = params.real_vector(positions) values = params.tuple_(values, params.real_vector, arity=len(positions)) color_idx = params.integer(color_idx, from_=0, below=len(self.configuration.color_set)) quantile_width = params.real(quantile_width, from_=0, to=1) alpha = params.real(alpha, from_=0, to=1) color = self.configuration.color(color_idx) lower_bound = 0.5 - quantile_width / 2.0 upper_bound = 0.5 + quantile_width / 2.0 median = [np.median(samples) for samples in values] lower_shading = [np.quantile(samples, lower_bound) for samples in values] upper_shading = [np.quantile(samples, upper_bound) for samples in values] self.ax.plot(positions, median, linestyle="-", color=color, label=label, **kwargs) self.ax.fill_between( positions, lower_shading, upper_shading, color=color, alpha=alpha, **kwargs, ) if show_extrema: min_val = [np.min(samples) for samples in values] max_val = [np.max(samples) for samples in values] self.ax.plot(positions, min_val, linestyle="--", color=color, **kwargs) self.ax.plot(positions, max_val, linestyle="--", color=color, **kwargs)
def test_all_(): """Tests all_ meta test.""" # special case: single test assert params.any_(None, lambda arg: params.none(arg)) is None with pytest.raises(InvalidParameterError): params.all_("_", lambda arg: params.none(arg)) # special case: and assert ( params.all_( 2, lambda arg: params.integer(arg, above=1), lambda arg: params.integer(arg, from_=2) ) == 2 ) assert ( params.all_( 3, lambda arg: params.integer(arg, above=1), lambda arg: params.integer(arg, from_=2), lambda arg: params.integer(arg, from_=3), ) == 3 ) # fail in first testf with pytest.raises(InvalidParameterError): params.all_( 1, lambda arg: params.integer(arg, above=1), lambda arg: params.integer(arg, from_=2) ) # fail in last testf with pytest.raises(InvalidParameterError): params.all_( 2, lambda arg: params.integer(arg, above=1), lambda arg: params.integer(arg, from_=2), lambda arg: params.integer(arg, above=2), )
def __init__(self, size, domain=None, rng=None, **kwargs): """Initialize state. Parameters: size: number of samples to draw domain: (sub)domain to sample from; by default dataset's domain is used rng: pseudo-random number generator seed """ super().__init__(rng=rng, **kwargs) self._size = params.integer(size, from_=0) # no upper bound for infinite spaces self._domain = domain
def __init__(self, size, **kwargs): """Initialize state. Parameters: size: number of samples to draw All arguments of Sampler and Random base classes. """ super().__init__(**kwargs) self._size = params.integer( size, from_=0 ) # partial validation (upper bound validated in apply)
def test_optional_(): """Test optional_ meta test.""" # only testf and None are valid assert params.optional_(None, params.integer) is None assert params.optional_(1, params.integer) == 1 with pytest.raises(InvalidParameterError): params.optional_("x", params.integer) with pytest.raises(InvalidParameterError): params.optional_(1, lambda arg: params.integer(arg, above=1)) # default value assert params.optional_(1, params.integer, default=2) == 1 assert params.optional_(None, params.integer, default=2) == 2
def line(self, line, color=0, **kwargs): """Draw a line. Parameters: line: n x 2 matrix of n points in two dimensions color: color index """ line = params.real_matrix(line, ncols=2) color = params.integer(color, from_=0, below=len(self.configuration.color_set)) self.ax.plot( line[:, 0], line[:, 1], linestyle="-", color=self.configuration.color(color), **kwargs )
def __init__(self, dimensions=6, **kwargs): """Initialize state. Parameters: dimensions: dimensionality; at least 5; 6 in original paper; higher dimensions do not change function Raises: InvalidParameterError: on invalid parameter values """ dimensions = params.integer(dimensions, from_=5) domain = params.hypercube_domain((0, 1), dimensions=dimensions) super().__init__( dimensions=dimensions, function=self.__class__.friedman1979, domain=domain, **kwargs )
def color(self, i: int): """Query color from current colorset. Color in RGB color space. Colors do not cycle. Parameters: i: index of queried color The rationale for not cycling colors is to alert the user that there are not enough unique colors, as opposed to not being able to tell apart elements of the plot. If cycling is desired, pass i mod length of color scheme. """ i = params.integer(i, from_=0, below=len(self._color_set)) return self._color_set[i]
def __init__(self, size, domain: Optional[Any] = None, rng=None, **kwargs): """Initialize sampler. Parameters: size: number of vector samples to draw domain: (sub)domain to sample from; default is to use the data's domain if available, or the unit hypercube otherwise rng: pseudo-random number generator used Returns: IndexedFiniteData of vectors """ super().__init__(rng=rng, **kwargs) self._size = params.integer(size, from_=0) # no upper bound on number of vectors to draw self._domain = params.optional_(domain, lambda arg: params.hypercube_domain(arg))
def __init__( self, input_: TabularData, output: PredictiveDistribution, scores: Sequence[float], **kwargs ): super().__init__(**kwargs) self._input: TabularData = params.instance(input_, TabularData) self._output: PredictiveDistribution = params.instance(output, PredictiveDistribution) # total number of function evaluations during this step self._num_evaluations: int = params.integer(self._input.num_samples, from_=1) self._scores: Sequence[float] = params.any_( scores, lambda arg: params.sequence(arg, length=1, type_=float), lambda arg: params.sequence(arg, length=self._num_evaluations, type_=float), )
def __init__(self, rng=None, **kwargs): """Initialize state. Parameters: rng: seed (key) for pseudo-random number generator. This parameter must be specified to encourage correct usage of pseudo-random numbers throughout the benchmark. """ super().__init__(**kwargs) if rng is None: raise InvalidParameterError( "rng seed", "nothing", "pseudo-random number generator seed must be specified") rng = params.integer(rng, from_=0, to=2**32 - 1) self._random = PseudoRandomNumberGenerator(seed=rng)
def points(self, points, color=0, **kwargs): """Draw set of points. Parameters: points: n x 2 matrix of n points in two dimensions color: color index """ points = params.real_matrix(points, ncols=2) color = params.integer(color, from_=0, below=len(self.configuration.color_set)) self.ax.plot( points[:, 0], points[:, 1], linestyle="", marker="o", color=self.configuration.color(color), **kwargs, )
def __init__(self, dimensions: int, **kwargs): """Initialize Schwefel 26 test function. Parameters: dimensions: dimensionality of the problem Raises: InvalidParameterError: on invalid parameter values Examples: __init__(dimension=2) """ dimensions = params.integer(dimensions, above=0) domain = params.hypercube_domain((-500, 500), dimensions=dimensions) super().__init__(dimensions=dimensions, function=self.__class__.schwefel26_1981, domain=domain, **kwargs)
def apply(self, data: Data, **kwargs) -> Data: """Draw random subset of data. Parameters: data: dataset to sample from Returns: random subset of data """ data = params.instance(data, Data) if not data.is_finite: raise InvalidParameterError("finite Data", type(data).__name__) size = params.integer( self._size, from_=0, to=data.num_samples ) # validate upper bound (see __init__) ind = self.random.choice(data.num_samples, size=size, replace=False) return data.subset(ind)
def noise(self, shape=None): """Add Gaussian noise to labels. Parameters: shape: shape of noise vector, matrix or higher-order tensor Returns: a numerical array of given shape containing independent identically distributed Gaussian noise Raises: InvalidParameterError: for invalid parameters """ # valid shape are either positive integer or a tuple of positive integer is_nonneg_int = lambda arg: params.integer(arg, from_=1) is_tuple = lambda arg: params.tuple_(arg, is_nonneg_int) shape = params.any_(shape, is_nonneg_int, is_tuple) return self.random.normal(self._mean, self._stddev, size=shape)
def noise(self, shape=None): """Return no noise. A constant value is returned. Parameters: shape: shape of noise vector, matrix or higher-order tensor Returns: a numerical array of given shape containing a constant value Raises: InvalidParameterError: for invalid parameters """ # valid shape are either positive integer or a tuple of positive integer is_nonneg_int = lambda arg: params.integer(arg, from_=1) is_tuple = lambda arg: params.tuple_(arg, is_nonneg_int) shape = params.any_(shape, is_nonneg_int, is_tuple) return np.full(shape, self._value)
def __init__( self, data: VectorSpaceData, model: Learner, scorer: Scorer, optimizers: Sequence[Optimizer], evaluations: Sequence[Evaluation] = (OptimizationTrajectoryPlot(),), num_trials: int = 1, training_data: Optional[Data] = None, ): self._data = params.instance(data, VectorSpaceData) self._scorer = params.instance(scorer, Scorer) self._model = params.instance(model, Learner) self._optimizers = params.sequence(optimizers, type_=Optimizer) self._evaluations = params.tuple_( evaluations, lambda arg: params.instance(arg, Evaluation) ) self._num_trials = params.integer(num_trials, from_=1) self._training_data = params.optional_( training_data, lambda arg: params.instance(arg, Data) )
def best_score_trajectory( self, maximize: bool = True, length: Optional[int] = None ) -> Sequence[float]: """Calculate the best score found so far as a function of number of function evaluations. Parameters: maximize: whether the goal is to maximize (true) or minimize (false) the score length: total length of the result. If larger than the actual number of function evaluations, the result will be padded with the best value. If smaller than the actual number of evaluations, the result will be truncated. If None, the result is returned as-is. Returns: A sequence of floats, each one corresponding to the best score found at that point in the optimization trajectory. """ maximize = params.boolean(maximize) length = params.optional_(length, lambda arg: params.integer(arg, from_=1)) best_score = np.empty(self.num_evaluations) idx = 0 best_score_so_far = self.steps[0].scores[0] direction = 1.0 if maximize else -1.0 for optimization_iter in self.steps: for eval_ in optimization_iter.scores: if eval_ * direction > best_score_so_far * direction: best_score_so_far = eval_ best_score[idx] = best_score_so_far * direction idx += 1 if length is not None: extra_padding = length - len(best_score) if extra_padding < 0: return best_score[:extra_padding] # TODO: Raise a warning? return np.pad(best_score, ((0, extra_padding),), mode="edge") else: return best_score
def full_grid(self, data: VectorSpaceData, samples_per_dim: int, domain=None): """Full multi-dimensional evenly-spaced grid. For one sample per dimension, the result is a single vector, the mean of the domain. Parameters: data: sampled dataset samples_per_dim: number of evenly-spaced samples to take in each dimension domain: (sub)domain to sample from; by default, data's domain is used Returns: two-dimensional NumPy array where samples are rows """ data = params.instance(data, VectorSpaceData) k = params.integer(samples_per_dim, above=0) # positive integer domain = data.domain if domain is None else domain domain = params.hypercube_domain(domain, data.dimensions) if k == 1: return np.mean(domain, axis=1).reshape((1, -1)) locs = (np.linspace(xfrom, xto, k) for xfrom, xto in domain) return np.asfarray(list(itertools.product(*locs)))