def test_experiment_function() -> None: ifunc = base.ExperimentFunction( _arg_return, p.Instrumentation( # type: ignore p.Choice([1, 12]), "constant", p.Array(shape=(2, 2)), constkwarg="blublu", plop=p.Choice([3, 4]), )) np.testing.assert_equal(ifunc.dimension, 8) data = [-100.0, 100, 1, 2, 3, 4, 100, -100] args0, kwargs0 = ifunc.parametrization.spawn_child().set_standardized_data( data).value output = ifunc( *args0, **kwargs0 ) # this is very stupid and should be removed when Parameter is in use args: tp.Any = output[0] # type: ignore kwargs: tp.Any = output[1] # type: ignore testing.printed_assert_equal(args, [12, "constant", [[1, 2], [3, 4]]]) testing.printed_assert_equal(kwargs, {"constkwarg": "blublu", "plop": 3}) instru_str = ("Instrumentation(Tuple(Choice(choices=Tuple(1,12)," "weights=Array{(1,2)}),constant," "Array{(2,2)})," "Dict(constkwarg=blublu,plop=Choice(choices=Tuple(3,4)," "weights=Array{(1,2)})))") testing.printed_assert_equal( ifunc.descriptors, { "dimension": 8, "name": "_arg_return", "function_class": "ExperimentFunction", "parametrization": instru_str, }, )
def _make_pyomo_variable_to_parametrization(model_component: pyomo.Var, params: ParamDict) -> ParamDict: # https://pyomo.readthedocs.io/en/stable/pyomo_modeling_components/Sets.html # Refer to the implementation in pyomo/core/base/var.py # To further improve the readability function, we should find out how to represent {None: ng.p.Scalar(), 1: ng.p.Scalar()} in ng.p.Dict # We do not adopt nested parameterization, which will require type information between string and int. # Such conversion has to be done in _pyomo_obj_function_wrapper and _pyomo_constraint_wrapper, which slows down optimization. if not isinstance(model_component, (pyomo.base.var.IndexedVar, pyomo.base.var.SimpleVar)): raise NotImplementedError # Normally, Pyomo will create a set for the indices used by a variable for k, v in model_component._data.items(): if isinstance(v, pyomo.base.var._GeneralVarData): if v.is_fixed(): raise NotImplementedError if k is None: params_name = str(model_component.name) else: params_name = f"{model_component.name}[{_convert_to_ng_name(k)}]" if isinstance(v.domain, pyomo.RangeSet): params = _make_pyomo_range_set_to_parametrization(v.domain, params, params_name) elif isinstance(v.domain, pyomo.Set) and v.domain.isfinite(): if v.domain.isordered(): params[params_name] = p.Choice(list(v.domain.ordered_data())) else: params[params_name] = p.Choice(list(v.domain.data())) else: raise NotImplementedError(f"Cannot handle domain type {type(v.domain)}") else: raise NotImplementedError(f"Cannot handle variable type {type(v)}") return params
def test_experiment_function() -> None: param = p.Instrumentation( p.Choice([1, 12]), "constant", p.Array(shape=(2, 2)), constkwarg="blublu", plop=p.Choice([3, 4]), ) with pytest.raises(RuntimeError): base.ExperimentFunction(_arg_return, param) param.set_name("myparam") ifunc = base.ExperimentFunction(_arg_return, param) np.testing.assert_equal(ifunc.dimension, 8) data = [-100.0, 100, 1, 2, 3, 4, 100, -100] args0, kwargs0 = ifunc.parametrization.spawn_child().set_standardized_data( data).value output: tp.Any = ifunc(*args0, **kwargs0) args: tp.Any = output[0] kwargs: tp.Any = output[1] testing.printed_assert_equal(args, [12, "constant", [[1, 2], [3, 4]]]) testing.printed_assert_equal(kwargs, {"constkwarg": "blublu", "plop": 3}) testing.printed_assert_equal( ifunc.descriptors, { "dimension": 8, "name": "_arg_return", "function_class": "ExperimentFunction", "parametrization": "myparam" }, )
def test_deterministic_data_setter() -> None: instru = p.Instrumentation(p.Choice([0, 1, 2, 3]), y=p.Choice([0, 1, 2, 3])) ifunc = base.ExperimentFunction(_Callable(), instru) data = [0.01, 0, 0, 0, 0.01, 0, 0, 0] for _ in range(20): args, kwargs = ifunc.parametrization.spawn_child().set_standardized_data(data, deterministic=True).value testing.printed_assert_equal(args, [0]) testing.printed_assert_equal(kwargs, {"y": 0}) arg_sum, kwarg_sum = 0, 0 for _ in range(24): args, kwargs = ifunc.parametrization.spawn_child().set_standardized_data(data, deterministic=False).value arg_sum += args[0] kwarg_sum += kwargs["y"] assert arg_sum != 0 assert kwarg_sum != 0
def _make_pyomo_range_set_to_parametrization( domain: pyomo.RangeSet, params: ParamDict, params_name: str ) -> ParamDict: # https://pyomo.readthedocs.io/en/stable/pyomo_modeling_components/Sets.html # Refer to the implementation in pyomo/core/base/set.py ranges = list(domain.ranges()) num_ranges = len(ranges) if num_ranges == 1 and (ranges[0].step in [-1, 0, 1]): if isinstance(ranges[0], pyomo.base.range.NumericRange): lb, ub = ranges[0].start, ranges[0].end if ranges[0].step < 0: lb, ub = ub, lb if (lb is not None) and (not ranges[0].closed[0]): lb = float(np.nextafter(lb, 1)) if (ub is not None) and (not ranges[0].closed[1]): ub = float(np.nextafter(ub, -1)) params[params_name] = p.Scalar(lower=lb, upper=ub) if ranges[0].step in [-1, 1]: # May consider using nested param params[params_name].set_integer_casting() # type: ignore else: raise NotImplementedError(f"Cannot handle range type {type(ranges[0])}") elif isinstance(domain, pyomo.FiniteSimpleRangeSet): # Need to handle step size params[params_name] = p.Choice([range(*r) for r in domain.ranges()]) # Assume the ranges do not overlapped else: raise NotImplementedError(f"Cannot handle domain type {type(domain)}") return params
def test_bound_scaler() -> None: ref = p.Instrumentation( p.Array(shape=(1, 2)).set_bounds(-12, 12, method="arctan"), p.Array(shape=(2, )).set_bounds(-12, 12, full_range_sampling=False), lr=p.Log(lower=0.001, upper=1000), stuff=p.Scalar(lower=-1, upper=2), unbounded=p.Scalar(lower=-1, init=0.0), value=p.Scalar(), letter=p.Choice("abc"), ) param = ref.spawn_child() scaler = utils.BoundScaler(param) output = scaler.transform([1.0] * param.dimension, lambda x: x) param.set_standardized_data(output) (array1, array2), values = param.value np.testing.assert_array_almost_equal(array1, [[12, 12]]) np.testing.assert_array_almost_equal(array2, [1, 1]) assert values["stuff"] == 2 assert values["unbounded"] == 1 assert values["value"] == 1 np.testing.assert_almost_equal(values["lr"], 1000) # again, on the middle point output = scaler.transform([0] * param.dimension, lambda x: x) param.set_standardized_data(output) np.testing.assert_almost_equal(param.value[1]["lr"], 1.0) np.testing.assert_almost_equal(param.value[1]["stuff"], 0.5)
def test_bound_scaler() -> None: ref = p.Instrumentation( p.Array(shape=(1, 2)).set_bounds(-12, 12, method="arctan"), p.Array(shape=(2, )).set_bounds(-12, 12, full_range_sampling=False), lr=p.Log(lower=0.001, upper=1000), stuff=p.Scalar(lower=-1, upper=2), unbounded=p.Scalar(lower=-1, init=0.0), value=p.Scalar(), letter=p.Choice("abc"), ) # make sure the order is preserved using legacy split method expected = [x[1] for x in split_as_data_parameters(ref)] assert p.helpers.list_data(ref) == expected # check the bounds param = ref.spawn_child() scaler = utils.BoundScaler(param) output = scaler.transform([1.0] * param.dimension, lambda x: x) param.set_standardized_data(output) (array1, array2), values = param.value np.testing.assert_array_almost_equal(array1, [[12, 12]]) np.testing.assert_array_almost_equal(array2, [1, 1]) assert values["stuff"] == 2 assert values["unbounded"] == 1 assert values["value"] == 1 assert values["lr"] == pytest.approx(1000) # again, on the middle point output = scaler.transform([0] * param.dimension, lambda x: x) param.set_standardized_data(output) assert param.value[1]["lr"] == pytest.approx(1.0) assert param.value[1]["stuff"] == pytest.approx(0.5)
def _make_parametrization(name: str, dimension: int, bounding_method: str = "bouncing", rolling: bool = False) -> p.Array: """Creates appropriate parametrization for a Photonics problem Parameters name: str problem name, among bragg, chirped and morpho dimension: int size of the problem among 16, 40 and 60 (morpho) or 80 (bragg and chirped) bounding_method: str transform type for the bounding ("arctan", "tanh", "bouncing" or "clipping"see `Array.bounded`) Returns ------- Instrumentation the parametrization for the problem """ if name == "bragg": shape = (2, dimension // 2) bounds = [(2, 3), (30, 180)] elif name == "chirped": shape = (1, dimension) bounds = [(30, 180)] elif name == "morpho": shape = (4, dimension // 4) bounds = [(0, 300), (0, 600), (30, 600), (0, 300)] else: raise NotImplementedError(f"Transform for {name} is not implemented") divisor = max(2, len(bounds)) assert not dimension % divisor, f"points length should be a multiple of {divisor}, got {dimension}" assert shape[0] * shape[ 1] == dimension, f"Cannot work with dimension {dimension} for {name}: not divisible by {shape[0]}." b_array = np.array(bounds) assert b_array.shape[0] == shape[0] # pylint: disable=unsubscriptable-object init = np.sum(b_array, axis=1, keepdims=True).dot(np.ones(( 1, shape[1], ))) / 2 array = p.Array(init=init) if bounding_method not in ("arctan", "tanh"): # sigma must be adapted for clipping and constraint methods sigma = p.Array(init=[[10.0]] if name != "bragg" else [[0.03], [10.0]] ).set_mutation(exponent=2.0) # type: ignore array.set_mutation(sigma=sigma) if rolling: array.set_mutation(custom=p.Choice( ["gaussian", "cauchy", p.mutation.Translation(axis=1)])) array.set_bounds(b_array[:, [0]], b_array[:, [1]], method=bounding_method, full_range_sampling=True) array.set_recombination(p.mutation.Crossover(axis=1)).set_name("") assert array.dimension == dimension, f"Unexpected {array} for dimension {dimension}" return array
def test_instrumented_function_kwarg_order() -> None: ifunc = base.ExperimentFunction(_arg_return, p.Instrumentation( # type: ignore kw4=p.Choice([1, 0]), kw2="constant", kw3=p.Array(shape=(2, 2)), kw1=p.Scalar(2.0).set_mutation(sigma=2.0) )) np.testing.assert_equal(ifunc.dimension, 7) data = np.array([-1, 1, 2, 3, 4, 100, -100]) args0, kwargs0 = ifunc.parametrization.spawn_child().set_standardized_data(data).value # this is very stupid and should be removed when Parameter is in use kwargs: tp.Any = ifunc(*args0, **kwargs0)[1] # type: ignore testing.printed_assert_equal(kwargs, {"kw1": 0, "kw2": "constant", "kw3": [[1, 2], [3, 4]], "kw4": 1})
testing.printed_assert_equal(args, [0]) testing.printed_assert_equal(kwargs, {"y": 0}) arg_sum, kwarg_sum = 0, 0 for _ in range(24): args, kwargs = ifunc.parametrization.spawn_child( ).set_standardized_data(data, deterministic=False).value arg_sum += args[0] kwarg_sum += kwargs["y"] assert arg_sum != 0 assert kwarg_sum != 0 @testing.parametrized( floats=((p.Scalar(), p.Scalar(init=12.0)), True, False), array_int=((p.Scalar(), p.Array(shape=(1, )).set_integer_casting()), False, False), softmax_noisy=((p.Choice(["blue", "red"]), p.Array(shape=(1, ))), True, True), softmax_deterministic=((p.Choice(["blue", "red"], deterministic=True), p.Array(shape=(1, ))), False, False), ordered_discrete=((p.TransitionChoice([True, False]), p.Array(shape=(1, ))), False, False), ) def test_parametrization_continuous_noisy(variables: tp.Tuple[p.Parameter, ...], continuous: bool, noisy: bool) -> None: instru = p.Instrumentation(*variables) assert instru.descriptors.continuous == continuous assert instru.descriptors.deterministic != noisy
def __init__( self, regressor: str, data_dimension: tp.Optional[int] = None, dataset: str = "artificial", overfitter: bool = False ) -> None: self.regressor = regressor self.data_dimension = data_dimension self.dataset = dataset self.overfitter = overfitter self._descriptors: tp.Dict[str, tp.Any] = {} self.add_descriptors(regressor=regressor, data_dimension=data_dimension, dataset=dataset, overfitter=overfitter) self.name = regressor + f"Dim{data_dimension}" self.num_data = 120 # default for artificial function self._cross_val_num = 10 # number of cross validation # Dimension does not make sense if we use a real world dataset. assert bool("artificial" in dataset) == bool(data_dimension is not None) # Variables for storing the training set and the test set. self.X: np.ndarray = np.array([]) self.y: np.ndarray # Variables for storing the cross-validation splits. self.X_train_cv: tp.List[tp.Any] = [] # This will be the list of training subsets. self.X_valid_cv: tp.List[tp.Any] = [] # This will be the list of validation subsets. self.y_train_cv: tp.List[tp.Any] = [] self.y_valid_cv: tp.List[tp.Any] = [] self.X_train: np.ndarray self.y_train: np.ndarray self.X_test: np.ndarray self.y_test: np.ndarray evalparams: tp.Dict[str, tp.Any] = {} if regressor == "decision_tree_depth": # Only the depth, as an evaluation. parametrization = p.Instrumentation(depth=p.Scalar(lower=1, upper=1200).set_integer_casting()) # We optimize only the depth, so we fix all other parameters than the depth params = dict(noise_free=False, criterion="mse", min_samples_split=0.00001, regressor="decision_tree", alpha=1.0, learning_rate="no", activation="no", solver="no") elif regressor == "any": # First we define the list of parameters in the optimization parametrization = p.Instrumentation( depth=p.Scalar(lower=1, upper=1200).set_integer_casting(), # Depth, in case we use a decision tree. criterion=p.Choice(["mse", "friedman_mse", "mae"]), # Criterion for building the decision tree. min_samples_split=p.Log(lower=0.0000001, upper=1), # Min ratio of samples in a node for splitting. regressor=p.Choice(["mlp", "decision_tree"]), # Type of regressor. activation=p.Choice(["identity", "logistic", "tanh", "relu"]), # Activation function, in case we use a net. solver=p.Choice(["lbfgs", "sgd", "adam"]), # Numerical optimizer. learning_rate=p.Choice(["constant", "invscaling", "adaptive"]), # Learning rate schedule. alpha=p.Log(lower=0.0000001, upper=1.), # Complexity penalization. ) # noise_free is False (meaning that we consider the cross-validation loss) during the optimization. params = dict(noise_free=False) elif regressor == "decision_tree": # We specify below the list of hyperparameters for the decision trees. parametrization = p.Instrumentation( depth=p.Scalar(lower=1, upper=1200).set_integer_casting(), criterion=p.Choice(["mse", "friedman_mse", "mae"]), min_samples_split=p.Log(lower=0.0000001, upper=1), regressor="decision_tree", ) params = dict(noise_free=False, alpha=1.0, learning_rate="no", regressor="decision_tree", activation="no", solver="no") evalparams = dict(params, criterion="mse", min_samples_split=0.00001) elif regressor == "mlp": # Let us define the parameters of the neural network. parametrization = p.Instrumentation( activation=p.Choice(["identity", "logistic", "tanh", "relu"]), solver=p.Choice(["lbfgs", "sgd", "adam"]), regressor="mlp", learning_rate=p.Choice(["constant", "invscaling", "adaptive"]), alpha=p.Log(lower=0.0000001, upper=1.), ) params = dict(noise_free=False, regressor="mlp", depth=-3, criterion="no", min_samples_split=0.1) else: assert False, f"Problem type {regressor} undefined!" # build eval params if not specified if not evalparams: evalparams = dict(params) # For the evaluation we remove the noise (unless overfitter) evalparams["noise_free"] = not overfitter super().__init__(partial(self._ml_parametrization, **params), parametrization.set_name("")) self._evalparams = evalparams self.register_initialization(regressor=regressor, data_dimension=data_dimension, dataset=dataset, overfitter=overfitter)
def __init__(self, regressor: str, data_dimension: tp.Optional[int] = None, dataset: str = "artificial", overfitter: bool = False) -> None: self.regressor = regressor self.data_dimension = data_dimension self.dataset = dataset self.overfitter = overfitter self._descriptors: tp.Dict[str, tp.Any] = {} self.add_descriptors(regressor=regressor, data_dimension=data_dimension, dataset=dataset, overfitter=overfitter) self.name = regressor + f"Dim{data_dimension}" self.num_data: int = 0 # Dimension does not make sense if we use a real world dataset. assert bool("artificial" in dataset) == bool( data_dimension is not None) # Variables for storing the training set and the test set. self.X: np.ndarray = np.array([]) self.y: np.ndarray # Variables for storing the cross-validation splits. self.X_train: tp.List[tp.Any] = [ ] # This will be the list of training subsets. self.X_valid: tp.List[tp.Any] = [ ] # This will be the list of validation subsets. self.y_train: tp.List[tp.Any] = [] self.y_valid: tp.List[tp.Any] = [] self.X_test: np.ndarray self.y_test: np.ndarray if regressor == "decision_tree_depth": # Only the depth, as an evaluation. parametrization = p.Instrumentation( depth=p.Scalar(lower=1, upper=1200).set_integer_casting()) # We optimize only the depth, so we fix all other parameters than the depth, using "partial". super().__init__( partial(self._ml_parametrization, noise_free=False, criterion="mse", min_samples_split=0.00001, regressor="decision_tree", alpha=1.0, learning_rate="no", activation="no", solver="no"), parametrization) # For the evaluation, we remove the noise. self.evaluation_function = partial( self._ml_parametrization, # type: ignore noise_free=not overfitter, criterion="mse", min_samples_split=0.00001, regressor="decision_tree", alpha=1.0, learning_rate="no", activation="no", solver="no") elif regressor == "any": # First we define the list of parameters in the optimization parametrization = p.Instrumentation( depth=p.Scalar(lower=1, upper=1200).set_integer_casting( ), # Depth, in case we use a decision tree. criterion=p.Choice( ["mse", "friedman_mse", "mae"]), # Criterion for building the decision tree. min_samples_split=p.Log( lower=0.0000001, upper=1), # Min ratio of samples in a node for splitting. regressor=p.Choice(["mlp", "decision_tree"]), # Type of regressor. activation=p.Choice( ["identity", "logistic", "tanh", "relu"]), # Activation function, in case we use a net. solver=p.Choice(["lbfgs", "sgd", "adam"]), # Numerical optimizer. learning_rate=p.Choice(["constant", "invscaling", "adaptive" ]), # Learning rate schedule. alpha=p.Log(lower=0.0000001, upper=1.), # Complexity penalization. ) # Only the dimension is fixed, so "partial" is just used for fixing the dimension. # noise_free is False (meaning that we consider the cross-validation loss) during the optimization. super().__init__( partial(self._ml_parametrization, noise_free=False), parametrization) # For the evaluation we use the test set, which is big, so noise_free = True. self.evaluation_function = partial( self._ml_parametrization, # type: ignore noise_free=not overfitter) elif regressor == "decision_tree": # We specify below the list of hyperparameters for the decision trees. parametrization = p.Instrumentation( depth=p.Scalar(lower=1, upper=1200).set_integer_casting(), criterion=p.Choice(["mse", "friedman_mse", "mae"]), min_samples_split=p.Log(lower=0.0000001, upper=1), regressor="decision_tree", ) # We use "partial" for fixing the parameters of the neural network, given that we work on the decision tree only. super().__init__( partial(self._ml_parametrization, noise_free=False, alpha=1.0, learning_rate="no", regressor="decision_tree", activation="no", solver="no"), parametrization) # For the test we just switch noise_free to True. self.evaluation_function = partial( self._ml_parametrization, criterion="mse", # type: ignore min_samples_split=0.00001, regressor="decision_tree", noise_free=not overfitter, alpha=1.0, learning_rate="no", activation="no", solver="no") elif regressor == "mlp": # Let us define the parameters of the neural network. parametrization = p.Instrumentation( activation=p.Choice(["identity", "logistic", "tanh", "relu"]), solver=p.Choice(["lbfgs", "sgd", "adam"]), regressor="mlp", learning_rate=p.Choice(["constant", "invscaling", "adaptive"]), alpha=p.Log(lower=0.0000001, upper=1.), ) # And, using partial, we get rid of the parameters of the decision tree (we work on the neural net, not # on the decision tree). super().__init__( partial(self._ml_parametrization, noise_free=False, regressor="mlp", depth=-3, criterion="no", min_samples_split=0.1), parametrization) self.evaluation_function = partial( self._ml_parametrization, # type: ignore regressor="mlp", noise_free=not overfitter, depth=-3, criterion="no", min_samples_split=0.1) else: assert False, f"Problem type {regressor} undefined!" # assert data_dimension is not None or dataset[:10] != "artificial" # self.get_dataset(data_dimension, dataset) self.register_initialization(regressor=regressor, data_dimension=data_dimension, dataset=dataset, overfitter=overfitter)