示例#1
0
 def get_plot_data(self, swarm: Swarm = None):
     """Extract the best reward found by the swarm and create a \
     :class:`pandas.DataFrame` to keep track of it."""
     columns = ["Epoch", "Best Reward", "Deaths", "Clones"]
     if swarm is None:
         data = pandas.DataFrame(
             {
                 "Epoch": [],
                 "Best Reward": [],
                 "Deaths": [],
                 "Clones": []
             },
             columns=columns)
     else:
         oobs = swarm.get("oobs")
         will_clone = swarm.get("will_clone")
         best_reward = swarm.get("best_reward")
         epoch = swarm.get("epoch")
         deaths = float(oobs.sum()) / len(swarm)
         clones = float(will_clone.sum()) / len(swarm)
         data = pandas.DataFrame(
             {
                 "Epoch": [int(epoch)],
                 "Best Reward": ["{:.4f}".format(float(best_reward))],
                 "Deaths": ["{:.2f}%".format(100 * deaths)],
                 "Clones": ["{:.2f}%".format(100 * clones)],
             },
             columns=columns,
         )
     return data
示例#2
0
 def get_plot_data(self, swarm: Swarm = None) -> numpy.ndarray:
     """Extract the frame from the :class:`AtariEnv` that the target \
     :class:`Swarm` contains."""
     if swarm is None:
         return numpy.zeros((210, 160, 3))
     state = swarm.get("best_state")
     return self.image_from_state(swarm=swarm, state=state)
示例#3
0
    def get_plot_data(self, swarm: Swarm = None):
        """Extract the best reward found by the swarm and create a \
        :class:`pandas.DataFrame` to keep track of it."""
        if swarm is None:
            data = pandas.DataFrame({
                "x": [],
                "best_val": []
            },
                                    columns=["x", "best_val"])
        else:

            data = pandas.DataFrame(
                {
                    "x": [int(swarm.get("epoch"))],
                    "best_val": [float(swarm.get("best_reward"))]
                },
                columns=["x", "best_val"],
            )
        return data
示例#4
0
def create_cartpole_swarm():
    swarm = Swarm(
        model=lambda x: DiscreteUniform(env=x),
        walkers=Walkers,
        env=lambda: DiscreteEnv(ClassicControl("CartPole-v0")),
        reward_limit=121,
        n_walkers=150,
        max_epochs=300,
        reward_scale=2,
    )
    return swarm
示例#5
0
def create_cartpole_swarm():
    swarm = Swarm(
        model=lambda x: DiscreteUniform(env=x),
        walkers=Walkers,
        env=lambda: DiscreteEnv(ClassicControl()),
        n_walkers=20,
        max_iters=200,
        prune_tree=True,
        reward_scale=2,
    )
    return swarm
示例#6
0
def create_atari_swarm():
    env = AtariEnvironment(name="MsPacman-ram-v0", )
    dt = GaussianDt(min_dt=10, max_dt=100, loc_dt=5, scale_dt=2)
    swarm = Swarm(
        model=lambda x: DiscreteUniform(env=x, critic=dt),
        env=lambda: DiscreteEnv(env),
        n_walkers=6,
        max_epochs=10,
        reward_scale=2,
        reward_limit=1,
    )
    return swarm
示例#7
0
def create_atari_swarm():
    env = AtariEnvironment(name="MsPacman-ram-v0",
                           clone_seeds=True,
                           autoreset=True)
    dt = GaussianDt(min_dt=3, max_dt=100, loc_dt=5, scale_dt=2)
    swarm = Swarm(
        model=lambda x: DiscreteUniform(env=x, critic=dt),
        walkers=Walkers,
        env=lambda: DiscreteEnv(env),
        n_walkers=67,
        max_epochs=500,
        reward_scale=2,
        reward_limit=751,
    )
    return swarm
示例#8
0
    def get_plot_data(self, swarm: Swarm, attr: str):
        """
        Extract the data of the attribute of the :class:`Swarm` that will be \
        represented as a histogram.

        Args:
            swarm: Target :class:`Swarm`.
            attr: Attribute of the target :class:`States` that will be plotted.

        Returns:
            Histogram containing the target data.

        """
        if swarm is None:
            return super(SwarmHistogram, self).get_plot_data(swarm)
        data = swarm.get(attr) if swarm is not None else numpy.arange(10)
        self._update_lims(data)
        return super(SwarmHistogram, self).get_plot_data(data)
示例#9
0
def create_atari_swarm():
    env = ParallelEnvironment(
        env_class=AtariEnvironment,
        name="MsPacman-ram-v0",
        clone_seeds=True,
        autoreset=True,
        blocking=False,
    )
    dt = GaussianDt(min_dt=3, max_dt=100, loc_dt=5, scale_dt=2)
    swarm = Swarm(
        model=lambda x: DiscreteUniform(env=x, critic=dt),
        walkers=Walkers,
        env=lambda: DiscreteEnv(env),
        n_walkers=67,
        max_iters=20,
        prune_tree=True,
        reward_scale=2,
    )
    return swarm
示例#10
0
 def append_swarm(self, swarm: Swarm, mode=None):
     """
     Extract the replay data from a :class:`Swarm` and incorporate it to the \
     already saved experiences.
     """
     # extract data from the swarm
     mode = self.mode if mode is None else mode
     if mode == "best_state":
         data = next(swarm.tree.iterate_branch(swarm.best_id, batch_size=-1, names=self.names))
         self.append(**dict(zip(self.names, data)))
     elif mode == "best_leaf":
         best_leaf = swarm.walkers.states.id_walkers[swarm.get("cum_rewards").argmax()]
         data = next(swarm.tree.iterate_branch(best_leaf, batch_size=-1, names=self.names))
         self.append(**dict(zip(self.names, data)))
     elif mode == "branches":
         for node_id in swarm.tree.leafs:
             data = next(swarm.tree.iterate_branch(node_id, batch_size=-1, names=self.names))
             self.append(**dict(zip(self.names, data)))
     else:
         data = next(swarm.tree.iterate_nodes_at_random(batch_size=-1, names=self.names))
         self.append(**dict(zip(self.names, data)))
     # Concatenate the data to the current memory
     self._log.info("Memory now contains %s samples" % len(self))
示例#11
0
def mathy_swarm(config: SwarmConfig, env_callable=None) -> Swarm:
    if env_callable is None:
        env_callable = lambda: FragileMathyEnv(
            name="mathy_v0", repeat_problem=config.single_problem)
    if config.use_mp:
        env_callable = ParallelEnv(env_callable=env_callable)
    tree_callable = None
    if config.history:
        tree_callable = lambda: HistoryTree(prune=True,
                                            names=config.history_names)
    swarm = Swarm(
        model=lambda env: DiscreteMasked(env=env),
        env=env_callable,
        tree=tree_callable,
        reward_limit=EnvRewards.WIN,
        n_walkers=config.n_walkers,
        max_epochs=config.max_iters,
        reward_scale=1,
        distance_scale=3,
        distance_function=mathy_dist,
        show_pbar=False,
    )
    return swarm
示例#12
0
 def get_z_coords(self, swarm: Swarm, X: numpy.ndarray = None):
     """Return the normalized ``distances`` of the walkers."""
     distances: numpy.ndarray = judo.to_numpy(swarm.get("distances"))
     return distances
示例#13
0
 def get_z_coords(self, swarm: Swarm, X: numpy.ndarray = None):
     """Return the normalized ``virtual_rewards`` of the walkers."""
     virtual_rewards: numpy.ndarray = judo.to_numpy(
         swarm.get("virtual_rewards"))
     return virtual_rewards
示例#14
0
 def get_z_coords(self, swarm: Swarm, X: numpy.ndarray = None):
     """Return the normalized ``cum_rewards`` of the walkers."""
     rewards: numpy.ndarray = judo.to_numpy(
         relativize(swarm.get("cum_rewards")))
     return rewards