示例#1
0
def main(_):
    environment_factory = lp_utils.partial_kwargs(helpers.make_environment,
                                                  domain_name=FLAGS.domain,
                                                  task_name=FLAGS.task)

    batch_size = 32
    sequence_length = 20
    gradient_steps_per_actor_step = 1.0
    samples_per_insert = (gradient_steps_per_actor_step * batch_size *
                          sequence_length)
    num_actors = 1

    program = svg0_prior.DistributedSVG0(
        environment_factory=environment_factory,
        network_factory=lp_utils.partial_kwargs(
            svg0_prior.make_default_networks),
        batch_size=batch_size,
        sequence_length=sequence_length,
        samples_per_insert=samples_per_insert,
        entropy_regularizer_cost=1e-4,
        max_replay_size=int(2e6),
        target_update_period=250,
        num_actors=num_actors).build()

    lp.launch(program, xm_resources=lp_utils.make_xm_docker_resources(program))
示例#2
0
def main(_: Any) -> None:

    # Environment.
    environment_factory = functools.partial(
        debugging_utils.make_environment,
        env_name=FLAGS.env_name,
        action_space=FLAGS.action_space,
    )

    # Networks.
    network_factory = lp_utils.partial_kwargs(
        madqn.make_default_networks,
        archecture_type=ArchitectureType.recurrent)

    # Checkpointer appends "Checkpoints" to checkpoint_dir
    checkpoint_dir = f"{FLAGS.base_dir}/{FLAGS.mava_id}"

    # Log every [log_every] seconds.
    log_every = 10
    logger_factory = functools.partial(
        logger_utils.make_logger,
        directory=FLAGS.base_dir,
        to_terminal=True,
        to_tensorboard=True,
        time_stamp=FLAGS.mava_id,
        time_delta=log_every,
    )

    # Distributed program.
    program = madqn.MADQN(
        environment_factory=environment_factory,
        network_factory=network_factory,
        logger_factory=logger_factory,
        num_executors=1,
        exploration_scheduler_fn=LinearExplorationScheduler,
        epsilon_min=0.05,
        epsilon_decay=5e-4,
        optimizer=snt.optimizers.Adam(learning_rate=1e-4),
        checkpoint_subpath=checkpoint_dir,
        trainer_fn=madqn.training.MADQNRecurrentTrainer,
        executor_fn=madqn.execution.MADQNRecurrentExecutor,
        batch_size=32,
    ).build()

    # Ensure only trainer runs on gpu, while other processes run on cpu.
    gpu_id = -1
    env_vars = {"CUDA_VISIBLE_DEVICES": str(gpu_id)}
    local_resources = {
        "trainer": [],
        "evaluator": PythonProcess(env=env_vars),
        "executor": PythonProcess(env=env_vars),
    }

    # Launch.
    lp.launch(
        program,
        lp.LaunchType.LOCAL_MULTI_PROCESSING,
        terminal="current_terminal",
        local_resources=local_resources,
    )
示例#3
0
    def test_agent(self, distributional_critic):
        # Create objectives.
        reward_objectives, qvalue_objectives = make_objectives()

        network_factory = lp_utils.partial_kwargs(
            make_networks, distributional_critic=distributional_critic)

        agent = mompo.DistributedMultiObjectiveMPO(
            reward_objectives,
            qvalue_objectives,
            environment_factory=make_environment,
            network_factory=network_factory,
            num_actors=2,
            batch_size=32,
            min_replay_size=32,
            max_replay_size=1000,
        )
        program = agent.build()

        (learner_node, ) = program.groups['learner']
        learner_node.disable_run()

        lp.launch(program, launch_type='test_mt')

        learner: acme.Learner = learner_node.create_handle().dereference()

        for _ in range(5):
            learner.step()
示例#4
0
def main(_: Any) -> None:

    # environment
    environment_factory = functools.partial(smac_utils.make_environment,
                                            map_name=FLAGS.map_name)

    # Networks.
    network_factory = lp_utils.partial_kwargs(
        vdn.make_default_networks, policy_networks_layer_sizes=[64, 64])

    # Checkpointer appends "Checkpoints" to checkpoint_dir
    checkpoint_dir = f"{FLAGS.base_dir}/{FLAGS.mava_id}"

    # Log every [log_every] seconds.
    log_every = 10
    logger_factory = functools.partial(
        logger_utils.make_logger,
        directory=FLAGS.base_dir,
        to_terminal=True,
        to_tensorboard=True,
        time_stamp=FLAGS.mava_id,
        time_delta=log_every,
    )

    # distributed program
    program = vdn.VDN(
        environment_factory=environment_factory,
        network_factory=network_factory,
        logger_factory=logger_factory,
        num_executors=1,
        exploration_scheduler_fn=LinearExplorationScheduler,
        epsilon_min=0.05,
        epsilon_decay=1e-5,
        optimizer=snt.optimizers.SGD(learning_rate=1e-2),
        checkpoint_subpath=checkpoint_dir,
        batch_size=512,
        executor_variable_update_period=100,
        target_update_period=200,
        max_gradient_norm=10.0,
        eval_loop_fn=MonitorParallelEnvironmentLoop,
        eval_loop_fn_kwargs={
            "path": checkpoint_dir,
            "record_every": 100
        },
    ).build()

    # launch
    gpu_id = -1
    env_vars = {"CUDA_VISIBLE_DEVICES": str(gpu_id)}
    local_resources = {
        "trainer": [],
        "evaluator": PythonProcess(env=env_vars),
        "executor": PythonProcess(env=env_vars),
    }
    lp.launch(
        program,
        lp.LaunchType.LOCAL_MULTI_PROCESSING,
        terminal="current_terminal",
        local_resources=local_resources,
    )
示例#5
0
    def test_agent(self):
        env_factory = lambda x: fakes.fake_atari_wrapped(oar_wrapper=True)
        net_factory = lambda spec: networks.R2D2AtariNetwork(spec.num_values)

        agent = r2d2.DistributedR2D2(
            environment_factory=env_factory,
            network_factory=net_factory,
            num_actors=2,
            batch_size=32,
            min_replay_size=32,
            max_replay_size=1000,
            replay_period=1,
            burn_in_length=1,
            trace_length=10,
        )
        program = agent.build()

        (learner_node, ) = program.groups['learner']
        learner_node.disable_run()

        lp.launch(program, launch_type='test_mt')

        learner: acme.Learner = learner_node.create_handle().dereference()

        for _ in range(5):
            learner.step()
示例#6
0
def main(_: Any) -> None:

    # Environment.
    environment_factory = functools.partial(
        debugging_utils.make_environment,
        env_name=FLAGS.env_name,
        action_space=FLAGS.action_space,
    )

    # Networks.
    network_factory = lp_utils.partial_kwargs(maddpg.make_default_networks,
                                              shared_weights=False)

    # Checkpointer appends "Checkpoints" to checkpoint_dir.
    checkpoint_dir = f"{FLAGS.base_dir}/{FLAGS.mava_id}"

    # Log every [log_every] seconds.
    log_every = 10
    logger_factory = functools.partial(
        logger_utils.make_logger,
        directory=FLAGS.base_dir,
        to_terminal=True,
        to_tensorboard=True,
        time_stamp=FLAGS.mava_id,
        time_delta=log_every,
    )

    # Distributed program.
    program = maddpg.MADDPG(
        environment_factory=environment_factory,
        network_factory=network_factory,
        logger_factory=logger_factory,
        num_executors=1,
        policy_optimizer=snt.optimizers.Adam(learning_rate=1e-4),
        critic_optimizer=snt.optimizers.Adam(learning_rate=1e-4),
        checkpoint_subpath=checkpoint_dir,
        max_gradient_norm=40.0,
        trainer_fn=maddpg.MADDPGNetworkedTrainer,
        architecture=architectures.NetworkedQValueCritic,
        connection_spec=custom_connected_network_spec,
        shared_weights=False,
    ).build()

    # Ensure only trainer runs on gpu, while other processes run on cpu.
    gpu_id = -1
    env_vars = {"CUDA_VISIBLE_DEVICES": str(gpu_id)}
    local_resources = {
        "trainer": [],
        "evaluator": PythonProcess(env=env_vars),
        "executor": PythonProcess(env=env_vars),
    }

    # Launch.
    lp.launch(
        program,
        lp.LaunchType.LOCAL_MULTI_PROCESSING,
        terminal="current_terminal",
        local_resources=local_resources,
    )
示例#7
0
def main(_: Any) -> None:
    # Environment.
    environment_factory = functools.partial(
        pettingzoo_utils.make_environment,
        env_class=FLAGS.env_class,
        env_name=FLAGS.env_name,
    )

    # Networks.
    network_factory = lp_utils.partial_kwargs(
        maddpg.make_default_networks,
        archecture_type=ArchitectureType.recurrent)

    # Checkpointer appends "Checkpoints" to checkpoint_dir.
    checkpoint_dir = f"{FLAGS.base_dir}/{FLAGS.mava_id}"

    # Log every [log_every] seconds.
    log_every = 10
    logger_factory = functools.partial(
        logger_utils.make_logger,
        directory=FLAGS.base_dir,
        to_terminal=True,
        to_tensorboard=True,
        time_stamp=FLAGS.mava_id,
        time_delta=log_every,
    )

    # Distributed program.
    program = maddpg.MADDPG(
        environment_factory=environment_factory,
        network_factory=network_factory,
        logger_factory=logger_factory,
        num_executors=1,
        policy_optimizer=snt.optimizers.Adam(learning_rate=1e-4),
        critic_optimizer=snt.optimizers.Adam(learning_rate=1e-4),
        checkpoint_subpath=checkpoint_dir,
        max_gradient_norm=40.0,
        trainer_fn=maddpg.training.MADDPGDecentralisedRecurrentTrainer,
        executor_fn=maddpg.execution.MADDPGRecurrentExecutor,
        batch_size=32,
    ).build()

    # Ensure only trainer runs on gpu, while other processes run on cpu.
    gpu_id = -1
    env_vars = {"CUDA_VISIBLE_DEVICES": str(gpu_id)}
    local_resources = {
        "trainer": [],
        "evaluator": PythonProcess(env=env_vars),
        "executor": PythonProcess(env=env_vars),
    }

    # Launch.
    lp.launch(
        program,
        lp.LaunchType.LOCAL_MULTI_PROCESSING,
        terminal="current_terminal",
        local_resources=local_resources,
    )
示例#8
0
def main(_):
  # Define a program which describes the topology of communicating nodes and
  # edges. In more involved examples, several programs can be defined and
  # launched at once.
  program = make_program(num_producers=FLAGS.num_producers)

  # Note that at launch time, none of the producers has been instantiated.
  # Producers are instantiated only at runtime.
  lp.launch(program)  
示例#9
0
文件: run_dqn.py 项目: deepmind/acme
def main(_):
  config = build_experiment_config()
  # Evaluation is disabled for performance reasons. Set `num_eval_episodes` to
  # a positive number and remove `evaluator_factories=[]` to enable it.
  if FLAGS.run_distributed:
    program = experiments.make_distributed_experiment(
        experiment=config, num_actors=4)
    lp.launch(program, xm_resources=lp_utils.make_xm_docker_resources(program))
  else:
    experiments.run_experiment(experiment=config, num_eval_episodes=0)
示例#10
0
def main(_):
    environment_factory = lp_utils.partial_kwargs(helpers.make_environment,
                                                  task=FLAGS.task)

    program = d4pg.DistributedD4PG(environment_factory=environment_factory,
                                   network_factory=lp_utils.partial_kwargs(
                                       helpers.make_networks),
                                   num_actors=2).build()

    lp.launch(program, xm_resources=lp_utils.make_xm_docker_resources(program))
示例#11
0
def main(_):
    environment_factory = lp_utils.partial_kwargs(helpers.make_environment,
                                                  task=FLAGS.task)

    program = d4pg.DistributedD4PG(environment_factory=environment_factory,
                                   network_factory=lp_utils.partial_kwargs(
                                       helpers.make_networks),
                                   num_actors=2).build()

    lp.launch(program, lp.LaunchType.LOCAL_MULTI_PROCESSING)
示例#12
0
def main(_):
  config = build_experiment_config()
  if FLAGS.run_distributed:
    program = experiments.make_distributed_experiment(
        experiment=config, num_actors=4)
    lp.launch(program, xm_resources=lp_utils.make_xm_docker_resources(program))
  else:
    experiments.run_experiment(
        experiment=config,
        eval_every=FLAGS.eval_every,
        num_eval_episodes=FLAGS.evaluation_episodes)
示例#13
0
def main(_):
    task = FLAGS.task
    environment_factory = lambda seed: helpers.make_environment(task)
    program = sac.DistributedSAC(
        environment_factory=environment_factory,
        network_factory=sac.make_networks,
        config=sac.SACConfig(**{'num_sgd_steps_per_step': 64}),
        num_actors=4,
        seed=1,
        max_number_of_steps=100).build()

    # Launch experiment.
    lp.launch(program, xm_resources=lp_utils.make_xm_docker_resources(program))
示例#14
0
def main(_: Any) -> None:
    # Environment.
    environment_factory = lp_utils.partial_kwargs(robocup_utils.make_environment)

    # Networks.
    network_factory = lp_utils.partial_kwargs(
        mad4pg.make_default_networks, archecture_type=ArchitectureType.recurrent
    )

    # Checkpointer appends "Checkpoints" to checkpoint_dir.
    checkpoint_dir = f"{FLAGS.base_dir}/{FLAGS.mava_id}"

    # Log every [log_every] seconds.
    log_every = 10
    logger_factory = functools.partial(
        logger_utils.make_logger,
        directory=FLAGS.base_dir,
        to_terminal=True,
        to_tensorboard=True,
        time_stamp=FLAGS.mava_id,
        time_delta=log_every,
    )

    program = mad4pg.MAD4PG(
        architecture=StateBasedQValueCritic,
        environment_factory=environment_factory,
        network_factory=network_factory,
        logger_factory=logger_factory,
        num_executors=int(FLAGS.num_executors),
        samples_per_insert=None,
        trainer_fn=MAD4PGStateBasedRecurrentTrainer,
        executor_fn=MAD4PGRecurrentExecutor,
        shared_weights=True,
        checkpoint_subpath=checkpoint_dir,
        batch_size=265,
    ).build()

    # launch
    gpu_id = -1
    env_vars = {"CUDA_VISIBLE_DEVICES": str(gpu_id)}
    local_resources = {
        "trainer": [],
        "evaluator": PythonProcess(env=env_vars),
        "executor": PythonProcess(env=env_vars),
    }
    lp.launch(
        program,
        lp.LaunchType.LOCAL_MULTI_PROCESSING,
        terminal="current_terminal",
        local_resources=local_resources,
    )
示例#15
0
    def test_maddpg_on_debugging_env(self) -> None:
        """Tests that the system can run on the simple spread
        debugging environment without crashing."""

        # environment
        environment_factory = functools.partial(
            debugging_utils.make_environment,
            env_name="simple_spread",
            action_space="continuous",
        )

        # networks
        network_factory = lp_utils.partial_kwargs(make_networks)

        # system
        system = maddpg.MADDPG(
            environment_factory=environment_factory,
            network_factory=network_factory,
            num_executors=2,
            batch_size=32,
            min_replay_size=32,
            max_replay_size=1000,
            policy_optimizer=snt.optimizers.Adam(learning_rate=1e-4),
            critic_optimizer=snt.optimizers.Adam(learning_rate=1e-4),
            checkpoint=False,
        )
        program = system.build()

        (trainer_node,) = program.groups["trainer"]
        trainer_node.disable_run()

        # Launch gpu config - don't use gpu
        gpu_id = -1
        env_vars = {"CUDA_VISIBLE_DEVICES": str(gpu_id)}
        local_resources = {
            "trainer": PythonProcess(env=env_vars),
            "evaluator": PythonProcess(env=env_vars),
            "executor": PythonProcess(env=env_vars),
        }

        lp.launch(
            program,
            launch_type="test_mt",
            local_resources=local_resources,
        )

        trainer: mava.Trainer = trainer_node.create_handle().dereference()

        for _ in range(5):
            trainer.step()
示例#16
0
def main(_):
    task = FLAGS.task
    environment_factory = lambda seed: helpers.make_environment(task)
    sac_config = sac.SACConfig(num_sgd_steps_per_step=64)
    sac_builder = sac.SACBuilder(sac_config)

    ail_config = ail.AILConfig(direct_rl_batch_size=sac_config.batch_size *
                               sac_config.num_sgd_steps_per_step)

    def network_factory(spec: specs.EnvironmentSpec) -> ail.AILNetworks:
        def discriminator(*args, **kwargs) -> networks_lib.Logits:
            return ail.DiscriminatorModule(environment_spec=spec,
                                           use_action=True,
                                           use_next_obs=True,
                                           network_core=ail.DiscriminatorMLP(
                                               [4, 4], ))(*args, **kwargs)

        discriminator_transformed = hk.without_apply_rng(
            hk.transform_with_state(discriminator))

        return ail.AILNetworks(ail.make_discriminator(
            spec, discriminator_transformed),
                               imitation_reward_fn=ail.rewards.gail_reward(),
                               direct_rl_networks=sac.make_networks(spec))

    def policy_network(
            network: ail.AILNetworks,
            eval_mode: bool = False) -> actor_core_lib.FeedForwardPolicy:
        return sac.apply_policy_and_sample(network.direct_rl_networks,
                                           eval_mode=eval_mode)

    program = ail.DistributedAIL(
        environment_factory=environment_factory,
        rl_agent=sac_builder,
        config=ail_config,
        network_factory=network_factory,
        seed=0,
        batch_size=sac_config.batch_size * sac_config.num_sgd_steps_per_step,
        make_demonstrations=functools.partial(
            helpers.make_demonstration_iterator,
            dataset_name=FLAGS.dataset_name),
        policy_network=policy_network,
        evaluator_policy_network=(lambda n: policy_network(n, eval_mode=True)),
        num_actors=4,
        max_number_of_steps=100,
        discriminator_loss=ail.losses.gail_loss()).build()

    # Launch experiment.
    lp.launch(program, xm_resources=lp_utils.make_xm_docker_resources(program))
示例#17
0
def main(_):
    # Configure the environment factory with requested task.
    make_environment = functools.partial(helpers.make_environment,
                                         domain_name=_DOMAIN.value,
                                         task_name=_TASK.value)

    # Construct the program.
    program_builder = mpo.DistributedMPO(
        make_environment,
        make_networks,
        target_policy_update_period=25,
        max_actor_steps=_MAX_ACTOR_STEPS.value,
        num_actors=4)

    lp.launch(programs=program_builder.build())
示例#18
0
def main(_):
  task = FLAGS.task
  env_factory = lambda seed: helpers.make_environment(task)

  environment_spec = specs.make_environment_spec(env_factory(True))
  program = td3.DistributedTD3(
      environment_factory=env_factory,
      environment_spec=environment_spec,
      network_factory=td3.make_networks,
      config=td3.TD3Config(),
      num_actors=4,
      seed=1,
      max_number_of_steps=100).build()

  lp.launch(program, xm_resources=lp_utils.make_xm_docker_resources(program))
示例#19
0
def main(_):
    # Configure the environment factory with requested task.
    make_environment = functools.partial(helpers.make_environment,
                                         domain_name=_DOMAIN.value,
                                         task_name=_TASK.value)

    # Construct the program.
    program_builder = d4pg.DistributedD4PG(
        make_environment,
        make_networks,
        max_actor_steps=_MAX_ACTOR_STEPS.value,
        num_actors=4)

    # Launch experiment.
    lp.launch(programs=program_builder.build())
示例#20
0
def main(_):
    task = FLAGS.task
    environment_factory = lambda seed: helpers.make_environment(task)
    config = ppo.PPOConfig(unroll_length=16,
                           num_minibatches=32,
                           num_epochs=10,
                           batch_size=2048 // 16)
    program = ppo.DistributedPPO(environment_factory=environment_factory,
                                 network_factory=ppo.make_continuous_networks,
                                 config=config,
                                 seed=FLAGS.seed,
                                 num_actors=4,
                                 max_number_of_steps=100).build()

    # Launch experiment.
    lp.launch(program, xm_resources=lp_utils.make_xm_docker_resources(program))
示例#21
0
def main(_):
    task = FLAGS.env_name
    environment_factory = lambda seed: helpers.make_environment(task)
    config = value_dice.ValueDiceConfig(num_sgd_steps_per_step=64)
    agent = value_dice.DistributedValueDice(
        environment_factory=environment_factory,
        network_factory=value_dice.make_networks,
        config=config,
        num_actors=4,
        log_to_bigtable=True,
        max_number_of_steps=100,
        seed=1,
        make_demonstrations=functools.partial(
            helpers.make_demonstration_iterator,
            dataset_name=FLAGS.dataset_name))
    program = agent.build()

    # Launch experiment.
    lp.launch(program, xm_resources=lp_utils.make_xm_docker_resources(program))
示例#22
0
    def test_agent(self):

        env_factory = lambda seed: fakes.fake_atari_wrapped(oar_wrapper=True)

        config = r2d2.R2D2Config(batch_size=1,
                                 trace_length=5,
                                 sequence_period=1,
                                 samples_per_insert=1.,
                                 min_replay_size=32,
                                 burn_in_length=1,
                                 prefetch_size=2,
                                 target_update_period=2500,
                                 max_replay_size=100_000,
                                 importance_sampling_exponent=0.6,
                                 priority_exponent=0.9,
                                 max_priority_weight=0.9,
                                 bootstrap_n=5,
                                 clip_rewards=False,
                                 variable_update_period=400)

        dummy_seed = 1
        agent = r2d2.DistributedR2D2FromConfig(
            environment_factory=env_factory,
            environment_spec=acme.make_environment_spec(
                env_factory(dummy_seed)),
            network_factory=functools.partial(r2d2.make_atari_networks,
                                              config.batch_size),
            config=config,
            seed=0,
            num_actors=1,
        )

        program = agent.build()
        (learner_node, ) = program.groups['learner']
        learner_node.disable_run()  # pytype: disable=attribute-error

        lp.launch(program, launch_type='test_mt')

        learner: acme.Learner = learner_node.create_handle().dereference()

        for _ in range(5):
            learner.step()
示例#23
0
    def test_distributed_sac_fd(self):
        def make_env(seed):
            del seed
            return fakes.ContinuousEnvironment(episode_length=10,
                                               action_dim=3,
                                               observation_dim=5,
                                               bounded=True)

        spec = specs.make_environment_spec(make_env(seed=0))

        batch_size = 10
        sac_config = sac.SACConfig(
            batch_size=batch_size,
            target_entropy=sac.target_entropy_from_env_spec(spec),
            min_replay_size=16,
            samples_per_insert=2)
        lfd_config = config.LfdConfig(initial_insert_count=0,
                                      demonstration_ratio=0.2)
        sac_fd_config = sacfd_agents.SACfDConfig(lfd_config=lfd_config,
                                                 sac_config=sac_config)

        agent = sacfd_agents.DistributedSACfD(
            environment_factory=make_env,
            network_factory=sac.make_networks,
            sac_fd_config=sac_fd_config,
            lfd_iterator_fn=fake_demonstration_iterator,
            seed=0,
            num_actors=2)

        program = agent.build()
        (learner_node, ) = program.groups['learner']
        learner_node.disable_run()  # pytype: disable=attribute-error

        lp.launch(program, launch_type='test_mt')

        learner: acme.Learner = learner_node.create_handle().dereference()

        for _ in range(5):
            learner.step()
示例#24
0
def main(_):
  # Configure the environment factory with requested task.
  make_environment = functools.partial(
      helpers.make_environment,
      domain_name=_DOMAIN.value,
      task_name=_TASK.value,
      from_pixels=True,
      frames_to_stack=3,
      num_action_repeats=2)

  # Construct the program.
  program_builder = dmpo.DistributedDistributionalMPO(
      make_environment,
      make_networks,
      n_step=3,  # Reduce the n-step to account for action-repeat.
      max_actor_steps=_MAX_ACTOR_STEPS.value,
      num_actors=4)

  # Launch experiment.
  lp.launch(
      programs=program_builder.build()
  )
示例#25
0
    def test_control_suite(self):
        """Tests that the agent can run on the control suite without crashing."""

        agent = svg0_prior.DistributedSVG0(
            environment_factory=lambda x: fakes.ContinuousEnvironment(),
            network_factory=make_networks,
            num_actors=2,
            batch_size=32,
            min_replay_size=32,
            max_replay_size=1000,
        )
        program = agent.build()

        (learner_node, ) = program.groups['learner']
        learner_node.disable_run()

        lp.launch(program, launch_type='test_mt')

        learner: acme.Learner = learner_node.create_handle().dereference()

        for _ in range(5):
            learner.step()
示例#26
0
    def test_agent(self):

        agent = mpo.DistributedMPO(
            environment_factory=lambda x: fakes.ContinuousEnvironment(bounded=
                                                                      True),
            network_factory=make_networks,
            num_actors=2,
            batch_size=32,
            min_replay_size=32,
            max_replay_size=1000,
        )
        program = agent.build()

        (learner_node, ) = program.groups['learner']
        learner_node.disable_run()

        lp.launch(program, launch_type='test_mt')

        learner: acme.Learner = learner_node.create_handle().dereference()

        for _ in range(5):
            learner.step()
示例#27
0
    def test_consumer_steps(self):
        """Runs the program and makes sure the consumer can run 10 steps."""
        program = launch.make_program(num_producers=2)

        # Retrieve the consumer node from the program. Nodes are organized as a
        # mapping of label->nodes, stored as a dict in `program.groups`
        (consumer_node, ) = program.groups['consumer']
        # Disable the automatic execution of its `run()` method.
        consumer_node.disable_run()  # pytype: disable=attribute-error

        # Launch all workers declared by the program. Remember to set the launch
        # type here (test & multithreaded).
        lp.launch(program, launch_type='test_mt')

        # Dereference `consumer_node`'s courier handle explicitly to obtain courier
        # client of it.
        consumer = consumer_node.create_handle().dereference()

        # Success criteria for this integration test defined as consumer being
        # able to take 10 steps.
        for _ in range(10):
            consumer.step()
示例#28
0
    def test_atari(self):
        """Tests that the agent can run for some steps without crashing."""
        env_factory = lambda x: fakes.fake_atari_wrapped(oar_wrapper=True)
        net_factory = lambda spec: networks.IMPALAAtariNetwork(spec.num_values)

        agent = impala.DistributedIMPALA(
            environment_factory=env_factory,
            network_factory=net_factory,
            num_actors=2,
            batch_size=32,
            sequence_length=5,
            sequence_period=1,
        )
        program = agent.build()

        (learner_node, ) = program.groups['learner']
        learner_node.disable_run()

        lp.launch(program, launch_type='test_mt')

        learner: acme.Learner = learner_node.create_handle().dereference()

        for _ in range(5):
            learner.step()
示例#29
0
    def test_atari(self):
        """Tests that the agent can run for some steps without crashing."""
        env_factory = lambda x: fakes.fake_atari_wrapped()
        net_factory = lambda spec: networks.DQNAtariNetwork(spec.num_values)

        agent = dqn.DistributedDQN(
            environment_factory=env_factory,
            network_factory=net_factory,
            num_actors=2,
            batch_size=32,
            min_replay_size=32,
            max_replay_size=1000,
        )
        program = agent.build()

        (learner_node, ) = program.groups['learner']
        learner_node.disable_run()

        lp.launch(program, launch_type='test_mt')

        learner: acme.Learner = learner_node.create_handle().dereference()

        for _ in range(5):
            learner.step()
示例#30
0
def main(_):
    # Configure the environment factory with requested task.
    make_environment = functools.partial(helpers.make_environment,
                                         domain_name=_DOMAIN.value,
                                         task_name=_TASK.value,
                                         from_pixels=True,
                                         frames_to_stack=3,
                                         flatten_stack=True,
                                         num_action_repeats=2)

    # Construct the program.
    program_builder = dmpo.DistributedDistributionalMPO(
        make_environment,
        make_networks,
        target_policy_update_period=100,
        max_actor_steps=_MAX_ACTOR_STEPS.value,
        num_actors=4,
        samples_per_insert=256,
        n_step=3,  # Reduce the n-step to account for action-repeat.
        observation_augmentation=image_augmentation.pad_and_crop,
    )

    # Launch experiment.
    lp.launch(programs=program_builder.build())