def result(self): """ The OptimizeResult """ res = OptimizeResult() res.x = self._xmin res.fun = self._fvalue res.message = self._message res.nit = self._step_record return res
def get_optimization_results(t, N, factors, taskset): K = len(factors) results = [] for k in range(K): factor = factors[k] result = OptimizeResult() result.x = deepcopy(factor.theta) result.fun = factor.f_opt result.nit = t result.nfev = (t + 1) * 2 * N result.message = deepcopy(taskset.normalizers) results.append(result) return results
def _int_spontaneous_raman(self, z_array, raman_matrix, alphap_fiber, freq_array, cr_raman_matrix, freq_diff, ase_bc, bn_array, temperature): spontaneous_raman_scattering = OptimizeResult() dx = self.solver_params.z_resolution h = ph.value('Planck constant') kb = ph.value('Boltzmann constant') power_ase = np.nan * np.ones(raman_matrix.shape) int_pump = cumtrapz(raman_matrix, z_array, dx=dx, axis=1, initial=0) for f_ind, f_ase in enumerate(freq_array): cr_raman = cr_raman_matrix[f_ind, :] vibrational_loss = f_ase / freq_array[:f_ind] eta = 1 / (np.exp( (h * freq_diff[f_ind, f_ind + 1:]) / (kb * temperature)) - 1) int_fiber_loss = -alphap_fiber[f_ind] * z_array int_raman_loss = np.sum( (cr_raman[:f_ind] * vibrational_loss * int_pump[:f_ind, :].transpose()).transpose(), axis=0) int_raman_gain = np.sum( (cr_raman[f_ind + 1:] * int_pump[f_ind + 1:, :].transpose()).transpose(), axis=0) int_gain_loss = int_fiber_loss + int_raman_gain + int_raman_loss new_ase = np.sum( (cr_raman[f_ind + 1:] * (1 + eta) * raman_matrix[f_ind + 1:, :].transpose()).transpose() * h * f_ase * bn_array[f_ind], axis=0) bc_evolution = ase_bc[f_ind] * np.exp(int_gain_loss) ase_evolution = np.exp(int_gain_loss) * cumtrapz( new_ase * np.exp(-int_gain_loss), z_array, dx=dx, initial=0) power_ase[f_ind, :] = bc_evolution + ase_evolution spontaneous_raman_scattering.x = power_ase spontaneous_raman_scattering.success = True spontaneous_raman_scattering.message = "Spontaneous Raman Scattering evaluated successfully" return spontaneous_raman_scattering
def scipy_nlopt_cobyla(*args, **kwargs): """Wraps nlopt library cobyla function to be compatible with scipy optimize parameters: args[0]: target, function to be minimized args[1]: x0, starting point for minimization bounds: list of bounds for the movement [[min, max], [min, max], ...] ftol_rel: same as in nlopt xtol_rel: same as in nlopt one of the tol_rel should be specified returns: OptimizeResult() object with properly set x, fun, success. status is not set when nlopt.RoundoffLimited is raised """ answ = OptimizeResult() bounds = kwargs['bounds'] opt = nlopt.opt(nlopt.LN_COBYLA, len(args[1])) opt.set_lower_bounds([i[0] for i in bounds]) opt.set_upper_bounds([i[1] for i in bounds]) if 'ftol_rel' in kwargs.keys(): opt.set_ftol_rel(kwargs['ftol_rel']) if 'xtol_rel' in kwargs.keys(): opt.set_ftol_rel(kwargs['xtol_rel']) opt.set_min_objective(args[0]) x0 = list(args[1]) try: x1 = opt.optimize(x0) except nlopt.RoundoffLimited: answ.x = x0 answ.fun = args[0](x0) answ.success = False answ.message = 'nlopt.RoundoffLimited' return answ answ.x = x1 answ.fun = args[0](x1) answ.success = True if opt.last_optimize_result() in [3, 4] else False answ.status = opt.last_optimize_result() if not answ.fun == opt.last_optimum_value(): print 'Something\'s wrong, ', answ.fun, opt.last_optimum_value() return answ
def get_optimization_results(t, population, factorial_cost, scalar_fitness, skill_factor, message): K = len(set(skill_factor)) N = len(population) // 2 results = [] for k in range(K): result = OptimizeResult() x, fun = get_best_individual(population, factorial_cost, scalar_fitness, skill_factor, k) result.x = x result.fun = fun result.message = message result.nit = t result.nfev = (t + 1) * N mean, std = get_statistics(factorial_cost, skill_factor, k) result.mean = mean result.std = std results.append(result) return results
def dual_annealing(func, bounds, args=(), maxiter=1000, local_search_options={}, initial_temp=5230., restart_temp_ratio=2.e-5, visit=2.62, accept=-5.0, maxfun=1e7, seed=None, no_local_search=False, callback=None, x0=None): """ Find the global minimum of a function using Dual Annealing. Parameters ---------- func : callable The objective function to be minimized. Must be in the form ``f(x, *args)``, where ``x`` is the argument in the form of a 1-D array and ``args`` is a tuple of any additional fixed parameters needed to completely specify the function. bounds : sequence, shape (n, 2) Bounds for variables. ``(min, max)`` pairs for each element in ``x``, defining bounds for the objective function parameter. args : tuple, optional Any additional fixed parameters needed to completely specify the objective function. maxiter : int, optional The maximum number of global search iterations. Default value is 1000. local_search_options : dict, optional Extra keyword arguments to be passed to the local minimizer (`minimize`). Some important options could be: ``method`` for the minimizer method to use and ``args`` for objective function additional arguments. initial_temp : float, optional The initial temperature, use higher values to facilitates a wider search of the energy landscape, allowing dual_annealing to escape local minima that it is trapped in. Default value is 5230. Range is (0.01, 5.e4]. restart_temp_ratio : float, optional During the annealing process, temperature is decreasing, when it reaches ``initial_temp * restart_temp_ratio``, the reannealing process is triggered. Default value of the ratio is 2e-5. Range is (0, 1). visit : float, optional Parameter for visiting distribution. Default value is 2.62. Higher values give the visiting distribution a heavier tail, this makes the algorithm jump to a more distant region. The value range is (0, 3]. accept : float, optional Parameter for acceptance distribution. It is used to control the probability of acceptance. The lower the acceptance parameter, the smaller the probability of acceptance. Default value is -5.0 with a range (-1e4, -5]. maxfun : int, optional Soft limit for the number of objective function calls. If the algorithm is in the middle of a local search, this number will be exceeded, the algorithm will stop just after the local search is done. Default value is 1e7. seed : {int or `~numpy.random.mtrand.RandomState` instance}, optional If `seed` is not specified the `~numpy.random.mtrand.RandomState` singleton is used. If `seed` is an int, a new ``RandomState`` instance is used, seeded with `seed`. If `seed` is already a ``RandomState`` instance, then that instance is used. Specify `seed` for repeatable minimizations. The random numbers generated with this seed only affect the visiting distribution function and new coordinates generation. no_local_search : bool, optional If `no_local_search` is set to True, a traditional Generalized Simulated Annealing will be performed with no local search strategy applied. callback : callable, optional A callback function with signature ``callback(x, f, context)``, which will be called for all minima found. ``x`` and ``f`` are the coordinates and function value of the latest minimum found, and ``context`` has value in [0, 1, 2], with the following meaning: - 0: minimum detected in the annealing process. - 1: detection occurred in the local search process. - 2: detection done in the dual annealing process. If the callback implementation returns True, the algorithm will stop. x0 : ndarray, shape(n,), optional Coordinates of a single N-D starting point. Returns ------- res : OptimizeResult The optimization result represented as a `OptimizeResult` object. Important attributes are: ``x`` the solution array, ``fun`` the value of the function at the solution, and ``message`` which describes the cause of the termination. See `OptimizeResult` for a description of other attributes. Notes ----- This function implements the Dual Annealing optimization. This stochastic approach derived from [3]_ combines the generalization of CSA (Classical Simulated Annealing) and FSA (Fast Simulated Annealing) [1]_ [2]_ coupled to a strategy for applying a local search on accepted locations [4]_. An alternative implementation of this same algorithm is described in [5]_ and benchmarks are presented in [6]_. This approach introduces an advanced method to refine the solution found by the generalized annealing process. This algorithm uses a distorted Cauchy-Lorentz visiting distribution, with its shape controlled by the parameter :math:`q_{v}` .. math:: g_{q_{v}}(\\Delta x(t)) \\propto \\frac{ \\ \\left[T_{q_{v}}(t) \\right]^{-\\frac{D}{3-q_{v}}}}{ \\ \\left[{1+(q_{v}-1)\\frac{(\\Delta x(t))^{2}} { \\ \\left[T_{q_{v}}(t)\\right]^{\\frac{2}{3-q_{v}}}}}\\right]^{ \\ \\frac{1}{q_{v}-1}+\\frac{D-1}{2}}} Where :math:`t` is the artificial time. This visiting distribution is used to generate a trial jump distance :math:`\\Delta x(t)` of variable :math:`x(t)` under artificial temperature :math:`T_{q_{v}}(t)`. From the starting point, after calling the visiting distribution function, the acceptance probability is computed as follows: .. math:: p_{q_{a}} = \\min{\\{1,\\left[1-(1-q_{a}) \\beta \\Delta E \\right]^{ \\ \\frac{1}{1-q_{a}}}\\}} Where :math:`q_{a}` is a acceptance parameter. For :math:`q_{a}<1`, zero acceptance probability is assigned to the cases where .. math:: [1-(1-q_{a}) \\beta \\Delta E] < 0 The artificial temperature :math:`T_{q_{v}}(t)` is decreased according to .. math:: T_{q_{v}}(t) = T_{q_{v}}(1) \\frac{2^{q_{v}-1}-1}{\\left( \\ 1 + t\\right)^{q_{v}-1}-1} Where :math:`q_{v}` is the visiting parameter. .. versionadded:: 1.2.0 References ---------- .. [1] Tsallis C. Possible generalization of Boltzmann-Gibbs statistics. Journal of Statistical Physics, 52, 479-487 (1998). .. [2] Tsallis C, Stariolo DA. Generalized Simulated Annealing. Physica A, 233, 395-406 (1996). .. [3] Xiang Y, Sun DY, Fan W, Gong XG. Generalized Simulated Annealing Algorithm and Its Application to the Thomson Model. Physics Letters A, 233, 216-220 (1997). .. [4] Xiang Y, Gong XG. Efficiency of Generalized Simulated Annealing. Physical Review E, 62, 4473 (2000). .. [5] Xiang Y, Gubian S, Suomela B, Hoeng J. Generalized Simulated Annealing for Efficient Global Optimization: the GenSA Package for R. The R Journal, Volume 5/1 (2013). .. [6] Mullen, K. Continuous Global Optimization in R. Journal of Statistical Software, 60(6), 1 - 45, (2014). DOI:10.18637/jss.v060.i06 Examples -------- The following example is a 10-D problem, with many local minima. The function involved is called Rastrigin (https://en.wikipedia.org/wiki/Rastrigin_function) >>> from scipy.optimize import dual_annealing >>> func = lambda x: np.sum(x*x - 10*np.cos(2*np.pi*x)) + 10*np.size(x) >>> lw = [-5.12] * 10 >>> up = [5.12] * 10 >>> ret = dual_annealing(func, bounds=list(zip(lw, up)), seed=1234) >>> print("global minimum: xmin = {0}, f(xmin) = {1:.6f}".format( ... ret.x, ret.fun)) global minimum: xmin = [-4.26437714e-09 -3.91699361e-09 -1.86149218e-09 -3.97165720e-09 -6.29151648e-09 -6.53145322e-09 -3.93616815e-09 -6.55623025e-09 -6.05775280e-09 -5.00668935e-09], f(xmin) = 0.000000 """ # noqa: E501 if x0 is not None and not len(x0) == len(bounds): raise ValueError('Bounds size does not match x0') lu = list(zip(*bounds)) lower = np.array(lu[0]) upper = np.array(lu[1]) # Check that restart temperature ratio is correct if restart_temp_ratio <= 0. or restart_temp_ratio >= 1.: raise ValueError('Restart temperature ratio has to be in range (0, 1)') # Checking bounds are valid if (np.any(np.isinf(lower)) or np.any(np.isinf(upper)) or np.any(np.isnan(lower)) or np.any(np.isnan(upper))): raise ValueError('Some bounds values are inf values or nan values') # Checking that bounds are consistent if not np.all(lower < upper): raise ValueError('Bounds are not consistent min < max') # Checking that bounds are the same length if not len(lower) == len(upper): raise ValueError('Bounds do not have the same dimensions') # Wrapper for the objective function func_wrapper = ObjectiveFunWrapper(func, maxfun, *args) # Wrapper fot the minimizer minimizer_wrapper = LocalSearchWrapper(bounds, func_wrapper, **local_search_options) # Initialization of RandomState for reproducible runs if seed provided rand_state = check_random_state(seed) # Initialization of the energy state energy_state = EnergyState(lower, upper, callback) energy_state.reset(func_wrapper, rand_state, x0) # Minimum value of annealing temperature reached to perform # re-annealing temperature_restart = initial_temp * restart_temp_ratio # VisitingDistribution instance visit_dist = VisitingDistribution(lower, upper, visit, rand_state) # Strategy chain instance strategy_chain = StrategyChain(accept, visit_dist, func_wrapper, minimizer_wrapper, rand_state, energy_state) need_to_stop = False iteration = 0 message = [] # OptimizeResult object to be returned optimize_res = OptimizeResult() optimize_res.success = True optimize_res.status = 0 t1 = np.exp((visit - 1) * np.log(2.0)) - 1.0 # Run the search loop while (not need_to_stop): for i in range(maxiter): # Compute temperature for this step s = float(i) + 2.0 t2 = np.exp((visit - 1) * np.log(s)) - 1.0 temperature = initial_temp * t1 / t2 if iteration >= maxiter: message.append("Maximum number of iteration reached") need_to_stop = True break # Need a re-annealing process? if temperature < temperature_restart: energy_state.reset(func_wrapper, rand_state) break # starting strategy chain val = strategy_chain.run(i, temperature) if val is not None: message.append(val) need_to_stop = True optimize_res.success = False break # Possible local search at the end of the strategy chain if not no_local_search: val = strategy_chain.local_search() if val is not None: message.append(val) need_to_stop = True optimize_res.success = False break iteration += 1 # Setting the OptimizeResult values optimize_res.x = energy_state.xbest optimize_res.fun = energy_state.ebest optimize_res.nit = iteration optimize_res.nfev = func_wrapper.nfev optimize_res.njev = func_wrapper.ngev optimize_res.nhev = func_wrapper.nhev optimize_res.message = message return optimize_res
def model_policy_gradient( f: Callable[..., float], x0: np.ndarray, *, args=(), learning_rate: float = 1e-2, decay_rate: float = 0.96, decay_steps: int = 5, log_sigma_init: float = -5.0, max_iterations: int = 1000, batch_size: int = 10, radius_coeff: float = 3.0, warmup_steps: int = 10, batch_size_model: int = 65536, save_func_vals: bool = False, random_state: "cirq.RANDOM_STATE_OR_SEED_LIKE" = None, known_values: Optional[Tuple[List[np.ndarray], List[float]]] = None, max_evaluations: Optional[int] = None ) -> scipy.optimize.OptimizeResult: """Model policy gradient algorithm for black-box optimization. The idea of this algorithm is to perform policy gradient, but estimate the function values using a surrogate model. The surrogate model is a least-squared quadratic fit to points sampled from the vicinity of the current iterate. Args: f: The function to minimize. x0: An initial guess. args: Additional arguments to pass to the function. learning_rate: The learning rate for the policy gradient. decay_rate: the learning decay rate for the Adam optimizer. decay_steps: the learning decay steps for the Adam optimizer. log_sigma_init: the intial value for the sigma of the policy in the log scale. max_iterations: The maximum number of iterations to allow before termination. batch_size: The number of points to sample in each iteration. The cost of evaluation of these samples are computed through the quantum computer cost model. radius_coeff: The ratio determining the size of the radius around the current iterate to sample points from to build the quadratic model. The ratio is with respect to the maximal ratio of the samples from the current policy. warmup_steps: The number of steps before the model policy gradient is performed. before these steps, we use the policy gradient without the model. batch_size_model: The model sample batch size. After we fit the quadratic model, we use the model to evaluate on big enough batch of samples. save_func_vals: whether to compute and save the function values for the current value of parameter. random_state: A seed (int) or `np.random.RandomState` class to use when generating random values. If not set, defaults to using the module methods in `np.random`. known_values: Any prior known values of the objective function. This is given as a tuple where the first element is a list of points and the second element is a list of the function values at those points. max_evaluations: The maximum number of function evaluations to allow before termination. Returns: Scipy OptimizeResult """ random_state = value.parse_random_state(random_state) if known_values is not None: known_xs, known_ys = known_values known_xs = [np.copy(x) for x in known_xs] known_ys = [np.copy(y) for y in known_ys] else: known_xs, known_ys = [], [] if max_evaluations is None: max_evaluations = np.inf n = len(x0) log_sigma = np.ones(n) * log_sigma_init sigma = np.exp(log_sigma) # set up the first and second moment estimate m_mean = np.zeros(n) v_mean = np.zeros(n) m_log_sigma = np.zeros(n) v_log_sigma = np.zeros(n) # set up lr schedule and optimizer lr_schedule1 = _ExponentialSchedule(learning_rate, decay_steps=decay_steps, decay_rate=decay_rate, staircase=True) lr_schedule2 = _ExponentialSchedule(learning_rate, decay_steps=decay_steps, decay_rate=decay_rate, staircase=True) _, f = wrap_function(f, args) res = OptimizeResult() current_x = np.copy(x0) res.x_iters = [] # initializes as lists res.xs_iters = [] res.ys_iters = [] res.func_vals = [] res.fun = 0 total_evals = 0 num_iter = 0 message = None # stats history_max = -np.inf while num_iter < max_iterations: # get samples from the current policy to evaluate z = random_state.randn(batch_size, n) new_xs = sigma * z + current_x if total_evals + batch_size > max_evaluations: message = "Reached maximum number of evaluations." break # Evaluate points res.xs_iters.append(new_xs) new_ys = [f(x) for x in new_xs] res.ys_iters.append(new_ys) total_evals += batch_size known_xs.extend(new_xs) known_ys.extend(new_ys) # Save function value if save_func_vals: res.func_vals.append(f(current_x)) res.x_iters.append(np.copy(current_x)) res.fun = res.func_vals[-1] # current sampling radius (maximal) max_radius = 0 for x in new_xs: if np.linalg.norm(x - current_x) > max_radius: max_radius = np.linalg.norm(x - current_x) reward = [-y for y in new_ys] # warmup steps control whether to use the model to estimate the f if num_iter >= warmup_steps: # Determine points to use to build model model_xs = [] model_ys = [] for x, y in zip(known_xs, known_ys): if np.linalg.norm(x - current_x) < radius_coeff * max_radius: model_xs.append(x) model_ys.append(y) # safer way without the `SVD` not converging try: model = _get_quadratic_model(model_xs, model_ys, x) use_model = True except ValueError: use_model = False if use_model: # get samples (from model) z = random_state.randn(batch_size_model, n) new_xs = sigma * z + current_x # use the model for prediction new_ys = model.predict(new_xs - current_x) reward = [-y for y in new_ys] reward = np.array(reward) # stats reward_mean = np.mean(reward) reward_max = np.max(reward) if reward_max > history_max: history_max = reward_max # subtract baseline reward = reward - reward_mean # analytic derivatives (natural gradient policy gradient) delta_mean = np.dot(z.T, reward) * sigma delta_log_sigma = np.dot(z.T**2, reward) / np.sqrt(2) delta_mean_norm = np.linalg.norm(np.dot(z.T, reward)) delta_log_sigma_norm = np.linalg.norm(np.dot(z.T**2, reward)) delta_mean = delta_mean / delta_mean_norm delta_log_sigma = delta_log_sigma / delta_log_sigma_norm # gradient ascend to update the parameters current_x, m_mean, v_mean = _adam_update(delta_mean, current_x, num_iter, m_mean, v_mean, lr_schedule=lr_schedule1) log_sigma, m_log_sigma, v_log_sigma = _adam_update( delta_log_sigma, log_sigma, num_iter, m_log_sigma, v_log_sigma, lr_schedule=lr_schedule2, ) log_sigma = np.clip(log_sigma, -20.0, 2.0) sigma = np.exp(log_sigma) num_iter += 1 final_val = f(current_x) res.func_vals.append(final_val) if message is None: message = "Reached maximum number of iterations." res.x_iters.append(current_x) total_evals += 1 res.x = current_x res.fun = final_val res.nit = num_iter res.nfev = total_evals res.message = message return res
def model_gradient_descent( f: Callable[..., float], x0: np.ndarray, *, args=(), rate: float = 1e-1, sample_radius: float = 1e-1, n_sample_points: int = 100, n_sample_points_ratio: Optional[float] = None, rate_decay_exponent: float = 0.0, stability_constant: float = 0.0, sample_radius_decay_exponent: float = 0.0, tol: float = 1e-8, known_values: Optional[Tuple[List[np.ndarray], List[float]]] = None, max_iterations: Optional[int] = None, max_evaluations: Optional[int] = None) -> scipy.optimize.OptimizeResult: """Model gradient descent algorithm for black-box optimization. The idea of this algorithm is to perform gradient descent, but estimate the gradient using a surrogate model instead of, say, by finite-differencing. The surrogate model is a least-squared quadratic fit to points sampled from the vicinity of the current iterate. This algorithm works well when you have an initial guess which is in the convex neighborhood of a local optimum and you want to converge to that local optimum. It's meant to be used when the function is stochastic. Args: f: The function to minimize. x0: An initial guess. args: Additional arguments to pass to the function. rate: The learning rate for the gradient descent. sample_radius: The radius around the current iterate to sample points from to build the quadratic model. n_sample_points: The number of points to sample in each iteration. n_sample_points_ratio: This specifies the number of points to sample in each iteration as a coefficient of the number of points required to exactly determine a quadratic model. The number of sample points will be this coefficient times (n+1)(n+2)/2, rounded up, where n is the number of parameters. Setting this overrides n_sample_points. rate_decay_exponent: Controls decay of learning rate. In each iteration, the learning rate is changed to the base learning rate divided by (i + 1 + S)**a, where S is the stability constant and a is the rate decay exponent (this parameter). stability_constant: Affects decay of learning rate. In each iteration, the learning rate is changed to the base learning rate divided by (i + 1 + S)**a, where S is the stability constant (this parameter) and a is the rate decay exponent. sample_radius_decay_exponent: Controls decay of sample radius. tol: The algorithm terminates when the difference between the current iterate and the next suggested iterate is smaller than this value. known_values: Any prior known values of the objective function. This is given as a tuple where the first element is a list of points and the second element is a list of the function values at those points. max_iterations: The maximum number of iterations to allow before termination. max_evaluations: The maximum number of function evaluations to allow before termination. Returns: Scipy OptimizeResult """ if known_values is not None: known_xs, known_ys = known_values known_xs = [np.copy(x) for x in known_xs] known_ys = [np.copy(y) for y in known_ys] else: known_xs, known_ys = [], [] if max_iterations is None: max_iterations = np.inf if max_evaluations is None: max_evaluations = np.inf n = len(x0) if n_sample_points_ratio is not None: n_sample_points = int( np.ceil(n_sample_points_ratio * (n + 1) * (n + 2) / 2)) _, f = wrap_function(f, args) res = OptimizeResult() current_x = np.copy(x0) res.x_iters = [] # initializes as lists res.xs_iters = [] res.ys_iters = [] res.func_vals = [] res.model_vals = [None] res.fun = 0 total_evals = 0 num_iter = 0 converged = False message = None while num_iter < max_iterations: current_sample_radius = (sample_radius / (num_iter + 1)**sample_radius_decay_exponent) # Determine points to evaluate # in ball around current point new_xs = [np.copy(current_x)] + [ current_x + _random_point_in_ball(n, current_sample_radius) for _ in range(n_sample_points) ] if total_evals + len(new_xs) > max_evaluations: message = 'Reached maximum number of evaluations.' break # Evaluate points res.xs_iters.append(new_xs) new_ys = [f(x) for x in new_xs] res.ys_iters.append(new_ys) total_evals += len(new_ys) known_xs.extend(new_xs) known_ys.extend(new_ys) # Save function value res.func_vals.append(new_ys[0]) res.x_iters.append(np.copy(current_x)) res.fun = res.func_vals[-1] # Determine points to use to build model model_xs = [] model_ys = [] for x, y in zip(known_xs, known_ys): if np.linalg.norm(x - current_x) < current_sample_radius: model_xs.append(x) model_ys.append(y) # Build and solve model model_gradient, model = _get_least_squares_model_gradient( model_xs, model_ys, current_x) # calculate the gradient and update the current point gradient_norm = np.linalg.norm(model_gradient) decayed_rate = ( rate / (num_iter + 1 + stability_constant)**rate_decay_exponent) # Convergence criteria if decayed_rate * gradient_norm < tol: converged = True message = 'Optimization converged successfully.' break # Update current_x -= decayed_rate * model_gradient res.model_vals.append( model.predict([-decayed_rate * model_gradient])[0]) num_iter += 1 if converged: final_val = res.func_vals[-1] else: final_val = f(current_x) res.func_vals.append(final_val) if message is None: message = 'Reached maximum number of iterations.' res.x_iters.append(current_x) total_evals += 1 res.x = current_x res.fun = final_val res.nit = num_iter res.nfev = total_evals res.message = message return res
def dual_annealing(func, x0, bounds, args=(), maxiter=1000, local_search_options={}, initial_temp=5230., restart_temp_ratio=2.e-5, visit=2.62, accept=-5.0, maxfun=1e7, seed=None, no_local_search=False, callback=None): """ Find the global minimum of a function using Dual Annealing. Parameters ---------- func : callable The objective function to be minimized. Must be in the form ``f(x, *args)``, where ``x`` is the argument in the form of a 1-D array and ``args`` is a tuple of any additional fixed parameters needed to completely specify the function. x0 : ndarray, shape(n,) A single initial starting point coordinates. If ``None`` is provided, initial coordinates are automatically generated (using the ``reset`` method from the internal ``EnergyState`` class). bounds : sequence, shape (n, 2) Bounds for variables. ``(min, max)`` pairs for each element in ``x``, defining bounds for the objective function parameter. args : tuple, optional Any additional fixed parameters needed to completely specify the objective function. maxiter : int, optional The maximum number of global search iterations. Default value is 1000. local_search_options : dict, optional Extra keyword arguments to be passed to the local minimizer (`minimize`). Some important options could be: ``method`` for the minimizer method to use and ``args`` for objective function additional arguments. initial_temp : float, optional The initial temperature, use higher values to facilitates a wider search of the energy landscape, allowing dual_annealing to escape local minima that it is trapped in. Default value is 5230. Range is (0.01, 5.e4]. restart_temp_ratio : float, optional During the annealing process, temperature is decreasing, when it reaches ``initial_temp * restart_temp_ratio``, the reannealing process is triggered. Default value of the ratio is 2e-5. Range is (0, 1). visit : float, optional Parameter for visiting distribution. Default value is 2.62. Higher values give the visiting distribution a heavier tail, this makes the algorithm jump to a more distant region. The value range is (0, 3]. accept : float, optional Parameter for acceptance distribution. It is used to control the probability of acceptance. The lower the acceptance parameter, the smaller the probability of acceptance. Default value is -5.0 with a range (-1e4, -5]. maxfun : int, optional Soft limit for the number of objective function calls. If the algorithm is in the middle of a local search, this number will be exceeded, the algorithm will stop just after the local search is done. Default value is 1e7. seed : {int or `numpy.random.RandomState` instance}, optional If `seed` is not specified the `numpy.random.RandomState` singleton is used. If `seed` is an int, a new ``RandomState`` instance is used, seeded with `seed`. If `seed` is already a ``RandomState`` instance, then that instance is used. Specify `seed` for repeatable minimizations. The random numbers generated with this seed only affect the visiting distribution function and new coordinates generation. no_local_search : bool, optional If `no_local_search` is set to True, a traditional Generalized Simulated Annealing will be performed with no local search strategy applied. callback : callable, optional A callback function with signature ``callback(x, f, context)``, which will be called for all minima found. ``x`` and ``f`` are the coordinates and function value of the latest minimum found, and ``context`` has value in [0, 1, 2], with the following meaning: - 0: minimum detected in the annealing process. - 1: detection occured in the local search process. - 2: detection done in the dual annealing process. If the callback implementation returns True, the algorithm will stop. Returns ------- res : OptimizeResult The optimization result represented as a `OptimizeResult` object. Important attributes are: ``x`` the solution array, ``fun`` the value of the function at the solution, and ``message`` which describes the cause of the termination. See `OptimizeResult` for a description of other attributes. Notes ----- This function implements the Dual Annealing optimization. This stochastic approach derived from [3]_ combines the generalization of CSA (Classical Simulated Annealing) and FSA (Fast Simulated Annealing) [1]_ [2]_ coupled to a strategy for applying a local search on accepted locations [4]_. An alternative implementation of this same algorithm is described in [5]_ and benchmarks are presented in [6]_. This approach introduces an advanced method to refine the solution found by the generalized annealing process. This algorithm uses a distorted Cauchy-Lorentz visiting distribution, with its shape controlled by the parameter :math:`q_{v}` .. math:: g_{q_{v}}(\\Delta x(t)) \\propto \\frac{ \\ \\left[T_{q_{v}}(t) \\right]^{-\\frac{D}{3-q_{v}}}}{ \\ \\left[{1+(q_{v}-1)\\frac{(\\Delta x(t))^{2}} { \\ \\left[T_{q_{v}}(t)\\right]^{\\frac{2}{3-q_{v}}}}}\\right]^{ \\ \\frac{1}{q_{v}-1}+\\frac{D-1}{2}}} Where :math:`t` is the artificial time. This visiting distribution is used to generate a trial jump distance :math:`\\Delta x(t)` of variable :math:`x(t)` under artificial temperature :math:`T_{q_{v}}(t)`. From the starting point, after calling the visiting distribution function, the acceptance probability is computed as follows: .. math:: p_{q_{a}} = \\min{\\{1,\\left[1-(1-q_{a}) \\beta \\Delta E \\right]^{ \\ \\frac{1}{1-q_{a}}}\\}} Where :math:`q_{a}` is a acceptance parameter. For :math:`q_{a}<1`, zero acceptance probability is assigned to the cases where .. math:: [1-(1-q_{a}) \\beta \\Delta E] < 0 The artificial temperature :math:`T_{q_{v}}(t)` is decreased according to .. math:: T_{q_{v}}(t) = T_{q_{v}}(1) \\frac{2^{q_{v}-1}-1}{\\left( \\ 1 + t\\right)^{q_{v}-1}-1} Where :math:`q_{v}` is the visiting parameter. .. versionadded:: 1.2.0 References ---------- .. [1] Tsallis C. Possible generalization of Boltzmann-Gibbs statistics. Journal of Statistical Physics, 52, 479-487 (1998). .. [2] Tsallis C, Stariolo DA. Generalized Simulated Annealing. Physica A, 233, 395-406 (1996). .. [3] Xiang Y, Sun DY, Fan W, Gong XG. Generalized Simulated Annealing Algorithm and Its Application to the Thomson Model. Physics Letters A, 233, 216-220 (1997). .. [4] Xiang Y, Gong XG. Efficiency of Generalized Simulated Annealing. Physical Review E, 62, 4473 (2000). .. [5] Xiang Y, Gubian S, Suomela B, Hoeng J. Generalized Simulated Annealing for Efficient Global Optimization: the GenSA Package for R. The R Journal, Volume 5/1 (2013). .. [6] Mullen, K. Continuous Global Optimization in R. Journal of Statistical Software, 60(6), 1 - 45, (2014). DOI:10.18637/jss.v060.i06 Examples -------- The following example is a 10-dimensional problem, with many local minima. The function involved is called Rastrigin (https://en.wikipedia.org/wiki/Rastrigin_function) >>> from scipy.optimize import dual_annealing >>> func = lambda x: np.sum(x*x - 10*np.cos(2*np.pi*x)) + 10*np.size(x) >>> lw = [-5.12] * 10 >>> up = [5.12] * 10 >>> ret = dual_annealing(func, None, bounds=list(zip(lw, up)), seed=1234) >>> print("global minimum: xmin = {0}, f(xmin) = {1:.6f}".format( ... ret.x, ret.fun)) global minimum: xmin = [-4.26437714e-09 -3.91699361e-09 -1.86149218e-09 -3.97165720e-09 -6.29151648e-09 -6.53145322e-09 -3.93616815e-09 -6.55623025e-09 -6.05775280e-09 -5.00668935e-09], f(xmin) = 0.000000 """ if x0 is not None and not len(x0) == len(bounds): raise ValueError('Bounds size does not match x0') lu = list(zip(*bounds)) lower = np.array(lu[0]) upper = np.array(lu[1]) # Check that restart temperature ratio is correct if restart_temp_ratio <= 0. or restart_temp_ratio >= 1.: raise ValueError('Restart temperature ratio has to be in range (0, 1)') # Checking bounds are valid if (np.any(np.isinf(lower)) or np.any(np.isinf(upper)) or np.any( np.isnan(lower)) or np.any(np.isnan(upper))): raise ValueError('Some bounds values are inf values or nan values') # Checking that bounds are consistent if not np.all(lower < upper): raise ValueError('Bounds are note consistent min < max') # Wrapper for the objective function func_wrapper = ObjectiveFunWrapper(func, maxfun, *args) # Wrapper fot the minimizer minimizer_wrapper = LocalSearchWrapper( bounds, func_wrapper, **local_search_options) # Initialization of RandomState for reproducible runs if seed provided rand_state = check_random_state(seed) # Initialization of the energy state energy_state = EnergyState(lower, upper, callback) energy_state.reset(func_wrapper, rand_state, x0) # Minimum value of annealing temperature reached to perform # re-annealing temperature_restart = initial_temp * restart_temp_ratio # VisitingDistribution instance visit_dist = VisitingDistribution(lower, upper, visit, rand_state) # Strategy chain instance strategy_chain = StrategyChain(accept, visit_dist, func_wrapper, minimizer_wrapper, rand_state, energy_state) # Run the search loop need_to_stop = False iteration = 0 message = [] t1 = np.exp((visit - 1) * np.log(2.0)) - 1.0 while(not need_to_stop): for i in range(maxiter): # Compute temperature for this step s = float(i) + 2.0 t2 = np.exp((visit - 1) * np.log(s)) - 1.0 temperature = initial_temp * t1 / t2 iteration += 1 if iteration >= maxiter: message.append("Maximum number of iteration reached") need_to_stop = True break # Need a re-annealing process? if temperature < temperature_restart: energy_state.reset(func_wrapper, rand_state) break # starting strategy chain val = strategy_chain.run(i, temperature) if val is not None: message.append(val) need_to_stop = True break # Possible local search at the end of the strategy chain if not no_local_search: val = strategy_chain.local_search() if val is not None: message.append(val) need_to_stop = True break # Return the OptimizeResult res = OptimizeResult() res.x = energy_state.xbest res.fun = energy_state.ebest res.nit = iteration res.nfev = func_wrapper.nfev res.njev = func_wrapper.ngev res.message = message return res
def optimize_minimize_mhmcmc_cluster(objective, bounds, args=(), x0=None, T=1, N=3, burnin=100000, maxiter=1000000, target_ar=0.4, ar_tolerance=0.05, cluster_eps=DEFAULT_CLUSTER_EPS, rnd_seed=None, collect_samples=None, logger=None): """ Minimize objective function and return up to N local minima solutions. :param objective: Objective function to minimize. Takes unpacked args as function call arguments and returns a float. :type objective: Callable(\*args) -> float :param bounds: Bounds of the parameter space. :type bounds: scipy.optimize.Bounds :param args: Any additional fixed parameters needed to completely specify the objective function. :type args: tuple or list :param x0: Initial guess. If None, will be selected randomly and uniformly within the parameter bounds. :type x0: numpy.array with same shape as elements of bounds :param T: The "temperature" parameter for the accept or reject criterion. To sample the domain well, should be in the order of the typical difference in local minima objective valuations. :type T: float :param N: Maximum number of minima to return :type N: int :param burnin: Number of random steps to discard before starting to accumulate statistics. :type burnin: int :param maxiter: Maximum number of steps to take (including burnin). :type maxiter: int :param target_ar: Target acceptance rate of point samples generated by stepping. :type target_ar: float between 0 and 1 :param ar_tolerance: Tolerance on the acceptance rate before actively adapting the step size. :type ar_tolerance: float :param cluster_eps: Point proximity tolerance for DBSCAN clustering, in normalized bounds coordinates. :type cluster_eps: float :param rnd_seed: Random seed to force deterministic behaviour :type rnd_seed: int :param collect_samples: If not None and integral type, collect collect_samples at regular intervals and return as part of solution. :type collect_samples: int or NoneType :param logger: Logger instance for outputting log messages. :return: OptimizeResult containing solution(s) and solver data. :rtype: scipy.optimize.OptimizeResult with additional attributes """ @call_counter def obj_counted(*args): return objective(*args) # end func assert maxiter >= 2 * burnin, "maxiter {} should be at least twice burnin steps {}".format( maxiter, burnin) main_iter = maxiter - burnin if collect_samples is not None: assert isinstance(collect_samples, int), "collect_samples expected to be integral type" assert collect_samples > 0, "collect_samples expected to be positive" # end if beta = 1.0 / T if rnd_seed is None: rnd_seed = int(time.time() * 1000) % (1 << 31) # end if np.random.seed(rnd_seed) if logger: logger.info('Using random seed {}'.format(rnd_seed)) # end if x0 is None: x0 = np.random.uniform(bounds.lb, bounds.ub) # end if assert np.all((x0 >= bounds.lb) & (x0 <= bounds.ub)) x = x0.copy() funval = obj_counted(x, *args) # Set up stepper with adaptive acceptance rate stepper = BoundedRandNStepper(bounds) stepper = AdaptiveStepsize(stepper, accept_rate=target_ar, ar_tolerance=ar_tolerance, interval=50) # ------------------------------- # DO BURN-IN rejected_randomly = 0 accepted_burnin = 0 tracked_range = tqdm(range(burnin), total=burnin, desc='BURN-IN') if logger: stepper.logger = lambda msg: tracked_range.write(logger.name + ':' + msg) else: stepper.logger = tracked_range.write # end if for _ in tracked_range: x_new = stepper(x) funval_new = obj_counted(x_new, *args) log_alpha = -(funval_new - funval) * beta if log_alpha > 0 or np.log(np.random.rand()) <= log_alpha: x = x_new funval = funval_new stepper.notify_accept() accepted_burnin += 1 elif log_alpha <= 0: rejected_randomly += 1 # end if # end for ar = float(accepted_burnin) / burnin if logger: logger.info("Burn-in acceptance rate: {}".format(ar)) # end if # ------------------------------- # DO MAIN LOOP if collect_samples is not None: nsamples = min(collect_samples, main_iter) sample_cadence = main_iter / nsamples samples = np.zeros((nsamples, len(x))) samples_fval = np.zeros(nsamples) # end if accepted = 0 rejected_randomly = 0 minima_sorted = SortedList( key=lambda rec: rec[1]) # Sort by objective function value hist = HistogramIncremental(bounds, nbins=100) # Cached a lot of potential minimum values, as these need to be clustered before return N results N_cached = int(np.ceil(N * main_iter / 500)) next_sample = 0.0 sample_count = 0 tracked_range = tqdm(range(main_iter), total=main_iter, desc='MAIN') if logger: stepper.logger = lambda msg: tracked_range.write(logger.name + ':' + msg) else: stepper.logger = tracked_range.write # end if for i in tracked_range: if collect_samples and i >= next_sample: assert sample_count < collect_samples samples[sample_count] = x samples_fval[sample_count] = funval sample_count += 1 next_sample += sample_cadence # end if x_new = stepper(x) funval_new = obj_counted(x_new, *args) log_alpha = -(funval_new - funval) * beta if log_alpha > 0 or np.log(np.random.rand()) <= log_alpha: x = x_new funval = funval_new minima_sorted.add((x, funval)) if len(minima_sorted) > N_cached: minima_sorted.pop() # end if stepper.notify_accept() hist += x accepted += 1 elif log_alpha <= 0: rejected_randomly += 1 # end if # end for stepper.logger = None ar = float(accepted) / main_iter if logger: logger.info("Acceptance rate: {}".format(ar)) logger.info("Best minima (before clustering):\n{}".format( np.array([_mx[0] for _mx in minima_sorted[:10]]))) # end if # ------------------------------- # Cluster minima and associate each cluster with a local minimum. # Using a normalized coordinate space for cluster detection. x_range = bounds.ub - bounds.lb pts = np.array([x[0] for x in minima_sorted]) fvals = np.array([x[1] for x in minima_sorted]) pts_norm = (pts - bounds.lb) / x_range _, labels = dbscan(pts_norm, eps=cluster_eps, min_samples=21, n_jobs=-1) # Compute mean of each cluster and evaluate objective function at cluster mean locations. minima_candidates = [] for grp in range(max(labels) + 1): mask = (labels == grp) mean_loc = np.mean(pts[mask, :], axis=0) # Evaluate objective function precisely at the mean location of each cluster fval = obj_counted(mean_loc, *args) minima_candidates.append((mean_loc, grp, fval)) # end for # Rank minima locations by objective function. minima_candidates.sort(key=lambda c: c[2]) # Pick up to N solutions solutions = minima_candidates[:N] # Put results into OptimizeResult container. # Add histograms to output result (in form of scipy.stats.rv_histogram) solution = OptimizeResult() solution.x = np.array([s[0] for s in solutions]) solution.clusters = [pts[(labels == s[1])] for s in solutions] solution.cluster_funvals = [fvals[(labels == s[1])] for s in solutions] solution.bins = hist.bins solution.distribution = hist.histograms solution.acceptance_rate = ar solution.success = True solution.status = 0 if len(solutions) > 0: solution.message = 'SUCCESS: Found {} local minima'.format( len(solutions)) else: solution.message = 'WARNING: Found no clusters within tolerance {}'.format( cluster_eps) # end if solution.fun = np.array([s[2] for s in solutions]) solution.jac = None solution.nfev = obj_counted.counter solution.njev = 0 solution.nit = main_iter solution.maxcv = None solution.samples = samples if collect_samples else None solution.sample_funvals = samples_fval if collect_samples else None solution.bounds = bounds solution.version = 's0.3' # Solution version for future traceability solution.rnd_seed = rnd_seed return solution