def get_plot_data(self, swarm: Swarm = None): """Extract the best reward found by the swarm and create a \ :class:`pandas.DataFrame` to keep track of it.""" columns = ["Epoch", "Best Reward", "Deaths", "Clones"] if swarm is None: data = pandas.DataFrame( { "Epoch": [], "Best Reward": [], "Deaths": [], "Clones": [] }, columns=columns) else: oobs = swarm.get("oobs") will_clone = swarm.get("will_clone") best_reward = swarm.get("best_reward") epoch = swarm.get("epoch") deaths = float(oobs.sum()) / len(swarm) clones = float(will_clone.sum()) / len(swarm) data = pandas.DataFrame( { "Epoch": [int(epoch)], "Best Reward": ["{:.4f}".format(float(best_reward))], "Deaths": ["{:.2f}%".format(100 * deaths)], "Clones": ["{:.2f}%".format(100 * clones)], }, columns=columns, ) return data
def get_plot_data(self, swarm: Swarm = None) -> numpy.ndarray: """Extract the frame from the :class:`AtariEnv` that the target \ :class:`Swarm` contains.""" if swarm is None: return numpy.zeros((210, 160, 3)) state = swarm.get("best_state") return self.image_from_state(swarm=swarm, state=state)
def get_plot_data(self, swarm: Swarm = None): """Extract the best reward found by the swarm and create a \ :class:`pandas.DataFrame` to keep track of it.""" if swarm is None: data = pandas.DataFrame({ "x": [], "best_val": [] }, columns=["x", "best_val"]) else: data = pandas.DataFrame( { "x": [int(swarm.get("epoch"))], "best_val": [float(swarm.get("best_reward"))] }, columns=["x", "best_val"], ) return data
def create_cartpole_swarm(): swarm = Swarm( model=lambda x: DiscreteUniform(env=x), walkers=Walkers, env=lambda: DiscreteEnv(ClassicControl("CartPole-v0")), reward_limit=121, n_walkers=150, max_epochs=300, reward_scale=2, ) return swarm
def create_cartpole_swarm(): swarm = Swarm( model=lambda x: DiscreteUniform(env=x), walkers=Walkers, env=lambda: DiscreteEnv(ClassicControl()), n_walkers=20, max_iters=200, prune_tree=True, reward_scale=2, ) return swarm
def create_atari_swarm(): env = AtariEnvironment(name="MsPacman-ram-v0", ) dt = GaussianDt(min_dt=10, max_dt=100, loc_dt=5, scale_dt=2) swarm = Swarm( model=lambda x: DiscreteUniform(env=x, critic=dt), env=lambda: DiscreteEnv(env), n_walkers=6, max_epochs=10, reward_scale=2, reward_limit=1, ) return swarm
def create_atari_swarm(): env = AtariEnvironment(name="MsPacman-ram-v0", clone_seeds=True, autoreset=True) dt = GaussianDt(min_dt=3, max_dt=100, loc_dt=5, scale_dt=2) swarm = Swarm( model=lambda x: DiscreteUniform(env=x, critic=dt), walkers=Walkers, env=lambda: DiscreteEnv(env), n_walkers=67, max_epochs=500, reward_scale=2, reward_limit=751, ) return swarm
def get_plot_data(self, swarm: Swarm, attr: str): """ Extract the data of the attribute of the :class:`Swarm` that will be \ represented as a histogram. Args: swarm: Target :class:`Swarm`. attr: Attribute of the target :class:`States` that will be plotted. Returns: Histogram containing the target data. """ if swarm is None: return super(SwarmHistogram, self).get_plot_data(swarm) data = swarm.get(attr) if swarm is not None else numpy.arange(10) self._update_lims(data) return super(SwarmHistogram, self).get_plot_data(data)
def create_atari_swarm(): env = ParallelEnvironment( env_class=AtariEnvironment, name="MsPacman-ram-v0", clone_seeds=True, autoreset=True, blocking=False, ) dt = GaussianDt(min_dt=3, max_dt=100, loc_dt=5, scale_dt=2) swarm = Swarm( model=lambda x: DiscreteUniform(env=x, critic=dt), walkers=Walkers, env=lambda: DiscreteEnv(env), n_walkers=67, max_iters=20, prune_tree=True, reward_scale=2, ) return swarm
def append_swarm(self, swarm: Swarm, mode=None): """ Extract the replay data from a :class:`Swarm` and incorporate it to the \ already saved experiences. """ # extract data from the swarm mode = self.mode if mode is None else mode if mode == "best_state": data = next(swarm.tree.iterate_branch(swarm.best_id, batch_size=-1, names=self.names)) self.append(**dict(zip(self.names, data))) elif mode == "best_leaf": best_leaf = swarm.walkers.states.id_walkers[swarm.get("cum_rewards").argmax()] data = next(swarm.tree.iterate_branch(best_leaf, batch_size=-1, names=self.names)) self.append(**dict(zip(self.names, data))) elif mode == "branches": for node_id in swarm.tree.leafs: data = next(swarm.tree.iterate_branch(node_id, batch_size=-1, names=self.names)) self.append(**dict(zip(self.names, data))) else: data = next(swarm.tree.iterate_nodes_at_random(batch_size=-1, names=self.names)) self.append(**dict(zip(self.names, data))) # Concatenate the data to the current memory self._log.info("Memory now contains %s samples" % len(self))
def mathy_swarm(config: SwarmConfig, env_callable=None) -> Swarm: if env_callable is None: env_callable = lambda: FragileMathyEnv( name="mathy_v0", repeat_problem=config.single_problem) if config.use_mp: env_callable = ParallelEnv(env_callable=env_callable) tree_callable = None if config.history: tree_callable = lambda: HistoryTree(prune=True, names=config.history_names) swarm = Swarm( model=lambda env: DiscreteMasked(env=env), env=env_callable, tree=tree_callable, reward_limit=EnvRewards.WIN, n_walkers=config.n_walkers, max_epochs=config.max_iters, reward_scale=1, distance_scale=3, distance_function=mathy_dist, show_pbar=False, ) return swarm
def get_z_coords(self, swarm: Swarm, X: numpy.ndarray = None): """Return the normalized ``distances`` of the walkers.""" distances: numpy.ndarray = judo.to_numpy(swarm.get("distances")) return distances
def get_z_coords(self, swarm: Swarm, X: numpy.ndarray = None): """Return the normalized ``virtual_rewards`` of the walkers.""" virtual_rewards: numpy.ndarray = judo.to_numpy( swarm.get("virtual_rewards")) return virtual_rewards
def get_z_coords(self, swarm: Swarm, X: numpy.ndarray = None): """Return the normalized ``cum_rewards`` of the walkers.""" rewards: numpy.ndarray = judo.to_numpy( relativize(swarm.get("cum_rewards"))) return rewards