def import_best(self, walkers: ExportedWalkers): """ Import the best walker from the target :class:`ExportedWalkers` if it \ improves the best value present in the :class:`Swarm`' walkers. Args: walkers: Walkers containing the best walker that will be imported \ if it improves the current best value found. Returns: None. """ if self._imported_best_is_better(walkers): best_ix = walkers.get_best_index(self.swarm.walkers.minimize) best_reward = judo.copy(walkers.rewards[best_ix]) best_state = judo.copy(walkers.states[best_ix]) best_obs = judo.copy(walkers.observs[best_ix]) best_id = judo.copy(walkers.id_walkers[best_ix]) self.swarm.walkers.states.update( best_reward=best_reward, best_state=best_state, best_obs=best_obs, best_id=best_id ) self.swarm.walkers.fix_best()
def update_states(self, env_states, model_states, best_ix): """Update the data of the root state.""" self.root_env_states.update(other=env_states) self.root_model_states.update(other=model_states) if self.accumulate_rewards: cum_rewards = self.root_walkers_states.cum_rewards cum_rewards = cum_rewards + self.root_env_states.rewards else: cum_rewards = self.root_env_states.rewards dt = self.root_model_states.dt if hasattr(self.root_model_states, "dt") else 1.0 times = dt + self.root_walker.times root_id = tensor(self.walkers.states.id_walkers[best_ix]) self.root_walkers_states.update( cum_rewards=cum_rewards, times=times, id_walkers=tensor([root_id]), ) self.root_walker = OneWalker( reward=judo.copy(cum_rewards[0]), observ=judo.copy(self.root_env_states.observs[0]), state=judo.copy(self.root_env_states.states[0]), time=judo.copy(times[0]), id_walker=root_id.squeeze(), )
def copy(self): """Return a copy of the current instance.""" new_walkers = ExportedWalkers(batch_size=len(self)) new_walkers.update( id_walkers=judo.copy(self.id_walkers), rewards=judo.copy(self.rewards), states=judo.copy(self.states), observs=judo.copy(self.observs), ) return new_walkers
def minimize_batch( self, x: typing.Tensor) -> Tuple[typing.Tensor, typing.Tensor]: """ Minimize a batch of points. Args: x: Array representing a batch of points to be optimized, stacked \ across the first dimension. Returns: Tuple of arrays containing the local optimum found for each point, \ and an array with the values assigned to each of the points found. """ x = judo.to_numpy(judo.copy(x)) with Backend.use_backend("numpy"): result = judo.zeros_like(x) rewards = judo.zeros((x.shape[0], 1)) for i in range(x.shape[0]): new_x, reward = self.minimize_point(x[i, :]) result[i, :] = new_x rewards[i, :] = float(reward) self.bounds.high = tensor(self.bounds.high) self.bounds.low = tensor(self.bounds.low) result, rewards = tensor(result), tensor(rewards) return result, rewards
def copy(self) -> "States": """Crete a copy of the current instance.""" param_dict = { str(name): judo.copy(val) if judo.is_tensor(val) else copy.deepcopy(val) for name, val in self.items() } return States(batch_size=self.n, **param_dict)
def reset(self, batch_size: int = 1, **kwargs) -> StatesEnv: states = super(LennardJones, self).reset(batch_size=batch_size, **kwargs) new_states = random_state.normal(0, scale=1.0, size=states.states.shape) states.update(observs=new_states, states=judo.copy(new_states)) return states
def update_states(self, best_ix): """Update the data of the root walker after an internal Swarm iteration has finished.""" # The accumulation of rewards is already done in the internal Swarm cum_rewards = self.root_walkers_states.cum_rewards times = self.root_walkers_states.times + self.root_walker.times root_id = tensor(self.walkers.states.id_walkers[best_ix]) self.root_walkers_states.update( cum_rewards=cum_rewards, id_walkers=tensor([root_id]), times=times, ) self.root_walker = OneWalker( reward=judo.copy(cum_rewards[0]), observ=judo.copy(self.root_env_states.observs[0]), state=judo.copy(self.root_env_states.states[0]), time=judo.copy(times[0]), id_walker=root_id, )
def reset( self, env_states: StatesEnv = None, model_states: StatesModel = None, walkers_states: StatesWalkers = None, ) -> None: """ Restart all the internal states involved in the algorithm iteration. After reset a new run of the algorithm will be ready to be launched. """ if walkers_states is not None: self.states.update(walkers_states) else: self.states.reset() self.env_states.times = judo.copy(self.env_states.times) self.env_states.times[:] = -1.0 old_ids = judo.copy(self.states.id_walkers) self.update_states(env_states=env_states, model_states=model_states) self.states.id_walkers = old_ids self._epoch = 0
def reset( self, root_walker: OneWalker = None, walkers_states: StatesWalkers = None, model_states: StatesModel = None, env_states: StatesEnv = None, ): """ Reset the :class:`fragile.Walkers`, the :class:`Environment`, the \ :class:`Model` and clear the internal data to start a new search process. Args: root_walker: Walker representing the initial state of the search. \ The walkers will be reset to this walker, and it will \ be added to the root of the :class:`StateTree` if any. model_states: :class:`StatesModel` that define the initial state of \ the :class:`Model`. env_states: :class:`StatesEnv` that define the initial state of \ the :class:`Environment`. walkers_states: :class:`StatesWalkers` that define the internal \ states of the :class:`Walkers`. """ self._epoch = 0 env_states = (self.env.reset( batch_size=self.walkers.n) if env_states is None else env_states) # Add corresponding root_walkers data to env_states if root_walker is not None: if not isinstance(root_walker, OneWalker): raise ValueError("Root walker needs to be an " "instance of OneWalker, got %s instead." % type(root_walker)) env_states = self._update_env_with_root(root_walker=root_walker, env_states=env_states) model_states = (self.model.reset(batch_size=len(self.walkers), env_states=env_states) if model_states is None else model_states) model_states.update(init_actions=model_states.actions) self.walkers.reset(env_states=env_states, model_states=model_states) root_id = (self.walkers.get("id_walkers")[0] if root_walker is None else judo.copy(root_walker.id_walkers[0])) self.walkers.states.id_walkers[:] = root_id self.walkers.states.best_id = root_id if self.tree is not None: self.tree.reset( root_id=root_id, env_states=self.walkers.env_states, model_states=self.walkers.model_states, walkers_states=self.walkers.states, )
def update_best(self, walkers: ExportedWalkers): """ Update the values tracked by the walker if the passed \ :class:`ExternalWalkers` contain a better value. Args: walkers: The current best values will be compared against the \ walkers of this instance of :class:`ExportedWalkers`. Returns: None """ curr_best = self.get_best_reward(self.minimize) other_best = walkers.get_best_reward(self.minimize) other_improves = curr_best > other_best if self.minimize else curr_best < other_best if other_improves: ix = walkers.get_best_index(self.minimize) self.states = judo.copy(walkers.states[ix]) # judo.copy(walkers.states[ix]) self.observs = judo.copy(walkers.observs[ix]) self.rewards = judo.copy(walkers.rewards[ix]) self.id_walkers = judo.copy(walkers.id_walkers[ix])
def step_walkers(self) -> None: """ Make the walkers evolve to their next state sampling an action from the \ :class:`Model` and applying it to the :class:`Environment`. """ model_states = self.walkers.model_states env_states = self.walkers.env_states parent_ids = judo.copy( self.walkers.states.id_walkers) if self.tree is not None else None model_states = self.model.predict(env_states=env_states, model_states=model_states, walkers_states=self.walkers.states) env_states = self.env.step(model_states=model_states, env_states=env_states) self.walkers.update_states( env_states=env_states, model_states=model_states, ) self.update_tree(parent_ids)
async def step_walkers(self) -> None: """ Make the walkers evolve to their next state sampling an action from the \ :class:`Model` and applying it to the :class:`Environment`. """ model_states = self.walkers.get("model_states") env_states = self.walkers.get("env_states") walkers_states = self.walkers.get("states") parent_ids = judo.copy( self.walkers.get("id_walkers")) if self.tree is not None else None model_states = self.model.predict(env_states=env_states, model_states=model_states, walkers_states=walkers_states) env_states = await self.env.step.remote(model_states=model_states, env_states=env_states) # env_states = ray.get(step_id) self.walkers.update_states( env_states=env_states, model_states=model_states, ) self.update_tree(parent_ids)
def __init__(self, state: Tensor, observ: Tensor, reward: Scalar, id_walker=None, time=0.0, state_dict: StateDict = None, **kwargs): """ Initialize a :class:`OneWalker`. Args: state: Non batched numpy array defining the state of the walker. observ: Non batched numpy array defining the observation of the walker. reward: typing.Scalar value representing the reward of the walker. id_walker: Hash of the provided State. If None it will be calculated when the the :class:`OneWalker` is initialized. state_dict: External :class:`typing.StateDict` that overrides the default values. time: Time step of the current walker. Measures the length of the path followed \ by the walker. **kwargs: Additional data needed to define the walker. Its structure \ needs to be defined in the provided ``state_dict``. These attributes will be assigned to the :class:`EnvStates` of the :class:`Swarm`. """ self.id_walkers = None self.rewards = None self.observs = None self.states = None self.times = None self._observs_size = observ.shape self._observs_dtype = observ.dtype self._states_size = state.shape self._states_dtype = state.dtype self._rewards_dtype = tensor(reward).dtype # Accept external definition of param_dict values walkers_dict = self.get_params_dict() if state_dict is not None: for k, v in state_dict.items(): if k in ["observs", "states" ]: # These two are parsed from the provided opts continue if k in walkers_dict: walkers_dict[k] = v super(OneWalker, self).__init__(batch_size=1, state_dict=walkers_dict) # Keyword arguments must be defined in state_dict if state_dict is not None: for k in kwargs.keys(): if k not in state_dict: raise ValueError( "The provided attributes must be defined in state_dict." "param_dict: %s\n kwargs: %s" % (state_dict, kwargs)) self.observs[:] = judo.copy(observ) self.states[:] = judo.copy(state) self.rewards[:] = judo.copy(reward) if judo.is_tensor( reward) else copy.deepcopy(reward) self.times[:] = judo.copy(time) if judo.is_tensor( time) else copy.deepcopy(time) self.id_walkers[:] = (judo.copy(id_walker.squeeze()) if id_walker is not None else hasher.hash_tensor(state)) self.update(**kwargs)