Exemplo n.º 1
0
  def build(self, name='r2d2'):
    """Build the distributed agent topology."""
    program = lp.Program(name=name)

    with program.group('replay'):
      replay = program.add_node(lp.ReverbNode(self.replay))

    with program.group('counter'):
      counter = program.add_node(lp.CourierNode(self.counter))

    with program.group('learner'):
      learner = program.add_node(lp.CourierNode(self.learner, replay, counter))

    with program.group('cacher'):
      cacher = program.add_node(
          lp.CacherNode(learner, refresh_interval_ms=2000, stale_after_ms=4000))

    with program.group('evaluator'):
      program.add_node(lp.CourierNode(self.evaluator, cacher, counter))

    # Generate an epsilon for each actor.
    epsilons = np.flip(np.logspace(1, 8, self._num_actors, base=0.4), axis=0)

    with program.group('actor'):
      for epsilon in epsilons:
        program.add_node(
            lp.CourierNode(self.actor, replay, cacher, counter, epsilon))

    return program
Exemplo n.º 2
0
  def build(self, name='impala'):
    """Build the distributed agent topology."""
    program = lp.Program(name=name)

    with program.group('replay'):
      queue = program.add_node(lp.ReverbNode(self.queue))

    with program.group('counter'):
      counter = program.add_node(lp.CourierNode(self.counter))

    with program.group('learner'):
      learner = program.add_node(
          lp.CourierNode(self.learner, queue, counter))

    with program.group('evaluator'):
      program.add_node(lp.CourierNode(self.evaluator, learner, counter))

    with program.group('cacher'):
      cacher = program.add_node(
          lp.CacherNode(learner, refresh_interval_ms=2000, stale_after_ms=4000))

    with program.group('actor'):
      for _ in range(self._num_actors):
        program.add_node(lp.CourierNode(self.actor, queue, cacher, counter))

    return program
Exemplo n.º 3
0
    def build(self, name: str = "madqn") -> Any:
        """Build the distributed system as a graph program.

        Args:
            name (str, optional): system name. Defaults to "madqn".

        Returns:
            Any: graph program for distributed system training.
        """

        program = lp.Program(name=name)

        with program.group("replay"):
            replay = program.add_node(lp.ReverbNode(self.replay))

        with program.group("counter"):
            counter = program.add_node(lp.CourierNode(self.counter, self._checkpoint))

        if self._max_executor_steps:
            with program.group("coordinator"):
                _ = program.add_node(lp.CourierNode(self.coordinator, counter))

        with program.group("trainer"):
            trainer = program.add_node(lp.CourierNode(self.trainer, replay, counter))

        with program.group("evaluator"):
            program.add_node(lp.CourierNode(self.evaluator, trainer, counter, trainer))

        if not self._num_caches:
            # Use the trainer as a single variable source.
            sources = [trainer]
        else:
            with program.group("cacher"):
                # Create a set of trainer caches.
                sources = []
                for _ in range(self._num_caches):
                    cacher = program.add_node(
                        lp.CacherNode(
                            trainer, refresh_interval_ms=2000, stale_after_ms=4000
                        )
                    )
                    sources.append(cacher)

        with program.group("executor"):
            # Add executors which pull round-robin from our variable sources.
            for executor_id in range(self._num_exectors):
                source = sources[executor_id % len(sources)]
                program.add_node(
                    lp.CourierNode(
                        self.executor,
                        executor_id,
                        replay,
                        source,
                        counter,
                        trainer,
                    )
                )

        return program
Exemplo n.º 4
0
    def build(self, name='dqn'):
        """Build the distributed agent topology."""
        program = lp.Program(name=name)

        with program.group('replay'):
            replay = program.add_node(lp.ReverbNode(self.replay))

        with program.group('counter'):
            counter = program.add_node(lp.CourierNode(self.counter))

            if self._max_actor_steps:
                program.add_node(
                    lp.CourierNode(self.coordinator, counter,
                                   self._max_actor_steps))

        with program.group('learner'):
            learner = program.add_node(
                lp.CourierNode(self.learner, replay, counter))

        with program.group('evaluator'):
            program.add_node(lp.CourierNode(self.evaluator, learner, counter))

        # Generate an epsilon for each actor.
        epsilons = np.flip(np.logspace(1, 8, self._num_actors, base=0.4),
                           axis=0)

        with program.group('cacher'):
            # Create a set of learner caches.
            sources = []
            for _ in range(self._num_caches):
                cacher = program.add_node(
                    lp.CacherNode(learner,
                                  refresh_interval_ms=2000,
                                  stale_after_ms=4000))
                sources.append(cacher)

        with program.group('actor'):
            # Add actors which pull round-robin from our variable sources.
            for actor_id, epsilon in enumerate(epsilons):
                source = sources[actor_id % len(sources)]
                program.add_node(
                    lp.CourierNode(self.actor, replay, source, counter,
                                   epsilon))

        return program
Exemplo n.º 5
0
    def build(self, name='dmpo'):
        """Build the distributed agent topology."""
        program = lp.Program(name=name)

        with program.group('replay'):
            replay = program.add_node(lp.ReverbNode(self.replay))

        with program.group('counter'):
            counter = program.add_node(lp.CourierNode(self.counter))

            if self._max_actor_steps:
                _ = program.add_node(
                    lp.CourierNode(self.coordinator, counter,
                                   self._max_actor_steps))

        with program.group('learner'):
            learner = program.add_node(
                lp.CourierNode(self.learner, replay, counter))

        with program.group('evaluator'):
            program.add_node(lp.CourierNode(self.evaluator, learner, counter))

        if not self._num_caches:
            # Use our learner as a single variable source.
            sources = [learner]
        else:
            with program.group('cacher'):
                # Create a set of learner caches.
                sources = []
                for _ in range(self._num_caches):
                    cacher = program.add_node(
                        lp.CacherNode(learner,
                                      refresh_interval_ms=2000,
                                      stale_after_ms=4000))
                    sources.append(cacher)

        with program.group('actor'):
            # Add actors which pull round-robin from our variable sources.
            for actor_id in range(self._num_actors):
                source = sources[actor_id % len(sources)]
                program.add_node(
                    lp.CourierNode(self.actor, replay, source, counter,
                                   actor_id))

        return program