class ActorProxy(object):
    def __init__(self, proxy_params):
        self._proxy = Proxy(component_type="actor", **proxy_params)

    def roll_out(self, model_dict: dict = None, epsilon_dict: dict = None, done: bool = False,
                 return_details: bool = True):
        if done:
            self._proxy.ibroadcast(tag=MessageTag.ROLLOUT,
                                   session_type=SessionType.NOTIFICATION,
                                   payload={PayloadKey.DONE: True})
            return None, None
        else:
            performance, exp_by_agent = {}, {}
            payloads = [(peer, {PayloadKey.MODEL: model_dict,
                                PayloadKey.EPSILON: epsilon_dict,
                                PayloadKey.RETURN_DETAILS: return_details})
                        for peer in self._proxy.peers["actor_worker"]]
            # TODO: double check when ack enable
            replies = self._proxy.scatter(tag=MessageTag.ROLLOUT, session_type=SessionType.TASK,
                                          destination_payload_list=payloads)
            for msg in replies:
                performance[msg.source] = msg.payload[PayloadKey.PERFORMANCE]
                if msg.payload[PayloadKey.EXPERIENCE] is not None:
                    for agent_id, exp_set in msg.payload[PayloadKey.EXPERIENCE].items():
                        if agent_id not in exp_by_agent:
                            exp_by_agent[agent_id] = defaultdict(list)
                        for k, v in exp_set.items():
                            exp_by_agent[agent_id][k].extend(v)

            return performance, exp_by_agent
class ActorProxy(object):
    """A simple proxy wrapper for sending roll-out requests to remote actors.

    Args:
        proxy_params: Parameters for instantiating a ``Proxy`` instance.
    """
    def __init__(self, proxy_params):
        self._proxy = Proxy(component_type="actor", **proxy_params)

    def roll_out(self,
                 model_dict: dict = None,
                 epsilon_dict: dict = None,
                 done: bool = False,
                 return_details: bool = True):
        """Send roll-out requests to remote actors.

        This method has exactly the same signature as ``SimpleActor``'s ``roll_out`` method but instead of doing
        the roll-out itself, sends roll-out requests to remote actors and returns the results sent back. The
        ``SimpleLearner`` simply calls the actor's ``roll_out`` method without knowing whether its performed locally
        or remotely.

        Args:
            model_dict (dict): If not None, the agents will load the models from model_dict and use these models
                to perform roll-out.
            epsilon_dict (dict): Exploration rate by agent.
            done (bool): If True, the current call is the last call, i.e., no more roll-outs will be performed.
                This flag is used to signal remote actor workers to exit.
            return_details (bool): If True, return experiences as well as performance metrics provided by the env.

        Returns:
            Performance and per-agent experiences from the remote actor.
        """
        if done:
            self._proxy.ibroadcast(tag=MessageTag.ROLLOUT,
                                   session_type=SessionType.NOTIFICATION,
                                   payload={PayloadKey.DONE: True})
            return None, None
        else:
            performance, exp_by_agent = {}, {}
            payloads = [(peer, {
                PayloadKey.MODEL: model_dict,
                PayloadKey.EPSILON: epsilon_dict,
                PayloadKey.RETURN_DETAILS: return_details
            }) for peer in self._proxy.peers["actor_worker"]]
            # TODO: double check when ack enable
            replies = self._proxy.scatter(tag=MessageTag.ROLLOUT,
                                          session_type=SessionType.TASK,
                                          destination_payload_list=payloads)
            for msg in replies:
                performance[msg.source] = msg.payload[PayloadKey.PERFORMANCE]
                if msg.payload[PayloadKey.EXPERIENCE] is not None:
                    for agent_id, exp_set in msg.payload[
                            PayloadKey.EXPERIENCE].items():
                        if agent_id not in exp_by_agent:
                            exp_by_agent[agent_id] = defaultdict(list)
                        for k, v in exp_set.items():
                            exp_by_agent[agent_id][k].extend(v)

            return performance, exp_by_agent
Exemplo n.º 3
0
def master(group_name: str, worker_num: int, is_immediate: bool = False):
    """
    The main master logic includes initialize proxy and allocate jobs to workers.

    Args:
        group_name (str): Identifier for the group of all communication components,
        worker_num (int): The number of workers,
        is_immediate (bool): If True, it will be an async mode; otherwise, it will be an sync mode.
            Async Mode: The proxy only returns the session id for sending messages. Based on the local task priority,
                        you can do something with high priority before receiving replied messages from peers.
            Sync Mode: It will block until the proxy returns all the replied messages.
    """
    proxy = Proxy(group_name=group_name,
                  component_type="master",
                  expected_peers={"worker": worker_num})

    if is_immediate:
        session_ids = proxy.ibroadcast(tag="INC",
                                       session_type=SessionType.NOTIFICATION)
        # do some tasks with higher priority here.
        replied_msgs = proxy.receive_by_id(session_ids)
    else:
        replied_msgs = proxy.broadcast(tag="INC",
                                       session_type=SessionType.NOTIFICATION)

    for msg in replied_msgs:
        print(
            f"{proxy.component_name} get receive notification from {msg.source} with message session stage "
            + f"{msg.session_stage}.")
Exemplo n.º 4
0
class ActorProxy(object):
    """Actor proxy that manages a set of remote actors.

    Args:
        group_name (str): Identifier of the group to which the actor belongs. It must be the same group name
            assigned to the actors (and roll-out clients, if any).
        num_actors (int): Expected number of actors in the group identified by ``group_name``.
        update_trigger (str): Number or percentage of ``MessageTag.FINISHED`` messages required to trigger
            learner updates, i.e., model training.
        proxy_options (dict): Keyword parameters for the internal ``Proxy`` instance. See ``Proxy`` class
            for details. Defaults to None.
    """
    def __init__(
        self,
        group_name: str,
        num_actors: int,
        update_trigger: str = None,
        proxy_options: dict = None
    ):
        self.agent = None
        peers = {"actor": num_actors}
        if proxy_options is None:
            proxy_options = {}
        self._proxy = Proxy(group_name, "learner", peers, **proxy_options)
        self._actors = self._proxy.peers_name["actor"]  # remote actor ID's
        self._registry_table = RegisterTable(self._proxy.peers_name)
        if update_trigger is None:
            update_trigger = len(self._actors)
        self._registry_table.register_event_handler(
            f"actor:{MessageTag.FINISHED.value}:{update_trigger}", self._on_rollout_finish
        )
        self.logger = InternalLogger("ACTOR_PROXY")

    def roll_out(self, index: int, training: bool = True, model_by_agent: dict = None, exploration_params=None):
        """Collect roll-out data from remote actors.

        Args:
            index (int): Index of roll-out requests.
            training (bool): If true, the roll-out request is for training purposes.
            model_by_agent (dict): Models to be broadcast to remote actors for inference. Defaults to None.
            exploration_params: Exploration parameters to be used by the remote roll-out actors. Defaults to None.
        """
        payload = {
            PayloadKey.ROLLOUT_INDEX: index,
            PayloadKey.TRAINING: training,
            PayloadKey.MODEL: model_by_agent,
            PayloadKey.EXPLORATION_PARAMS: exploration_params
        }
        self._proxy.iscatter(MessageTag.ROLLOUT, SessionType.TASK, [(actor, payload) for actor in self._actors])
        self.logger.info(f"Sent roll-out requests to {self._actors} for ep-{index}")

        # Receive roll-out results from remote actors
        for msg in self._proxy.receive():
            if msg.payload[PayloadKey.ROLLOUT_INDEX] != index:
                self.logger.info(
                    f"Ignore a message of type {msg.tag} with ep {msg.payload[PayloadKey.ROLLOUT_INDEX]} "
                    f"(expected {index} or greater)"
                )
                continue
            if msg.tag == MessageTag.FINISHED:
                # If enough update messages have been received, call update() and break out of the loop to start
                # the next episode.
                result = self._registry_table.push(msg)
                if result:
                    env_metrics, details = result[0]
                    break

        return env_metrics, details

    def _on_rollout_finish(self, messages: List[Message]):
        metrics = {msg.source: msg.payload[PayloadKey.METRICS] for msg in messages}
        details = {msg.source: msg.payload[PayloadKey.DETAILS] for msg in messages}
        return metrics, details

    def terminate(self):
        """Tell the remote actors to exit."""
        self._proxy.ibroadcast(
            component_type="actor", tag=MessageTag.EXIT, session_type=SessionType.NOTIFICATION
        )
        self.logger.info("Exiting...")
class ActorProxy(object):
    """A simple proxy wrapper for sending roll-out requests to remote actors.

    Args:
        proxy_params: Parameters for instantiating a ``Proxy`` instance.
        experience_collecting_func (Callable): A function responsible for collecting experiences from multiple sources.
    """
    def __init__(self, proxy_params, experience_collecting_func: Callable):
        self._proxy = Proxy(component_type="learner", **proxy_params)
        self._experience_collecting_func = experience_collecting_func

    def roll_out(self,
                 model_dict: dict = None,
                 exploration_params=None,
                 done: bool = False,
                 return_details: bool = True):
        """Send roll-out requests to remote actors.

        This method has exactly the same signature as ``SimpleActor``'s ``roll_out`` method but instead of doing
        the roll-out itself, sends roll-out requests to remote actors and returns the results sent back. The
        ``SimpleLearner`` simply calls the actor's ``roll_out`` method without knowing whether its performed locally
        or remotely.

        Args:
            model_dict (dict): If not None, the agents will load the models from model_dict and use these models
                to perform roll-out.
            exploration_params: Exploration parameters.
            done (bool): If True, the current call is the last call, i.e., no more roll-outs will be performed.
                This flag is used to signal remote actor workers to exit.
            return_details (bool): If True, return experiences as well as performance metrics provided by the env.

        Returns:
            Performance and per-agent experiences from the remote actor.
        """
        if done:
            self._proxy.ibroadcast(component_type="actor",
                                   tag=MessageTag.ROLLOUT,
                                   session_type=SessionType.NOTIFICATION,
                                   payload={PayloadKey.DONE: True})
            return None, None

        payloads = [(peer, {
            PayloadKey.MODEL: model_dict,
            PayloadKey.EXPLORATION_PARAMS: exploration_params,
            PayloadKey.RETURN_DETAILS: return_details
        }) for peer in self._proxy.peers_name["actor"]]
        # TODO: double check when ack enable
        replies = self._proxy.scatter(tag=MessageTag.ROLLOUT,
                                      session_type=SessionType.TASK,
                                      destination_payload_list=payloads)

        performance = [(msg.source, msg.payload[PayloadKey.PERFORMANCE])
                       for msg in replies]
        details_by_source = {
            msg.source: msg.payload[PayloadKey.DETAILS]
            for msg in replies
        }
        details = self._experience_collecting_func(
            details_by_source) if return_details else None

        return performance, details