示例#1
0
    c.save_freq = None
    c.eval_freq = None
    c.eval_env = RogueEnvExt(
        RogueEnv(
            config_dict=rogue_config(2),
            mex_steps=500,
            stair_reward=50.0,
            image_setting=EXPAND,
        ))
    c.set_optimizer(lambda params: Adam(params, lr=1.0e-4, eps=1.0e-8))
    CNN_PARAM = [(8, 1), (4, 1), (3, 1)]
    c.set_net_fn(
        "actor-critic",
        rainy.net.actor_critic.ac_conv(
            kernel_and_strides=CNN_PARAM,
            output_dim=256,
        ),
    )
    c.nworkers = 32
    c.nsteps = 125
    c.value_loss_weight = 0.5
    c.gae_lambda = 0.95
    c.ppo_minibatch_size = (c.nworkers * c.nsteps) // 4
    return c


if __name__ == "__main__":
    cli.run_cli(config,
                rainy.agents.PPOAgent,
                script_path=os.path.realpath(__file__))
from torch.optim import RMSprop


def config() -> Config:
    c = Config()
    env_use = "Pong"
    c.set_env(lambda: Atari(env_use, frame_stack=False))
    c.set_optimizer(
        lambda params: RMSprop(params, lr=7e-4, alpha=0.99, eps=1e-5))
    c.set_net_fn('actor-critic', net.actor_critic.ac_conv(rnn=net.GruBlock))
    #c.set_net_fn('actor-critic', net.actor_critic.ac_conv())
    c.nworkers = 16
    c.nsteps = 5
    c.set_parallel_env(atari_parallel())
    c.grad_clip = 0.5
    c.value_loss_weight = 0.5
    c.use_gae = False
    c.max_steps = int(2e7)
    c.eval_env = Atari(env_use)
    c.use_reward_monitor = True
    c.eval_deterministic = False
    c.episode_log_freq = 100
    c.eval_freq = None
    c.save_freq = None
    print("GRU on Pong!")
    return c


if __name__ == '__main__':
    cli.run_cli(config(), A2cAgent, script_path=os.path.realpath(__file__))
示例#3
0
import os
from rainy import Config
from rainy.agents import PpoAgent
import rainy.utils.cli as cli
from rogue_gym.envs import ImageSetting, StatusFlag, DungeonType
from torch.optim import Adam
from env import set_env
from net import a2c_conv

EXPAND = ImageSetting(dungeon=DungeonType.SYMBOL, status=StatusFlag.EMPTY)
AGENT = PpoAgent


def config() -> Config:
    c = Config()
    set_env(c, EXPAND)
    c.set_optimizer(lambda params: Adam(params, lr=2.5e-4, eps=1.0e-4))
    c.set_net_fn('actor-critic', a2c_conv())
    c.grad_clip = 0.5
    c.episode_log_freq = 100
    c.eval_deterministic = False
    return c


if __name__ == '__main__':
    cli.run_cli(config(), AGENT, script_path=os.path.realpath(__file__))
示例#4
0
import os
import rainy.utils.cli as cli
from rainy.envs import Atari, atari_parallel
from int_rew import rnd


def config(envname: str = "MontezumaRevenge") -> rnd.RNDConfig:
    c = rnd.RNDConfig()
    c.set_env(lambda: Atari(envname, cfg=rnd.atari_config(), frame_stack=False))
    c.set_parallel_env(atari_parallel())
    c.max_steps = int(1e8) * 6
    c.grad_clip = 1.0
    # ppo params
    c.nworkers = 64
    c.nsteps = 128
    c.value_loss_weight = 0.5
    c.gae_lambda = 0.95
    c.ppo_minibatch_size = (c.nworkers * c.nsteps) // 4
    c.auxloss_use_ratio = min(1.0, 32.0 / c.nworkers)
    c.use_reward_monitor = True
    # eval settings
    c.eval_env = Atari(envname, cfg=rnd.atari_config())
    c.episode_log_freq = 1000
    c.eval_freq = None
    c.save_freq = int(1e8)
    return c


if __name__ == "__main__":
    cli.run_cli(config, rnd.RNDAgent, script_path=os.path.realpath(__file__))
示例#5
0
from os.path import realpath
import ppo_atari
import rainy
from rainy.envs import Atari, atari_parallel
import rainy.utils.cli as cli


def config() -> rainy.Config:
    c = ppo_atari.config()
    c.set_env(lambda: Atari('Breakout', flicker_frame=True, frame_stack=False))
    c.set_parallel_env(atari_parallel(frame_stack=False))
    c.set_net_fn('actor-critic',
                 rainy.net.actor_critic.ac_conv(rnn=rainy.net.GruBlock))
    c.eval_env = Atari('Breakout', frame_stack=False)
    return c


if __name__ == '__main__':
    cli.run_cli(config(),
                rainy.agents.PpoAgent,
                script_path=realpath(__file__))
import os
import rainy
from rainy.utils.cli import run_cli
from rainy.envs import MultiProcEnv
from torch.optim import Adam


def config() -> rainy.Config:
    c = rainy.Config()
    c.max_steps = int(1e6)
    c.nworkers = 8
    c.nsteps = 32
    c.set_parallel_env(MultiProcEnv)
    c.set_optimizer(lambda params: Adam(params, lr=2.5e-4, eps=1.0e-4))
    c.value_loss_weight = 0.1
    c.grad_clip = 0.1
    c.gae_lambda = 0.95
    c.ppo_minibatch_size = 64
    c.use_gae = True
    c.ppo_clip = 0.2
    c.eval_freq = None
    # c.set_net_fn('actor-critic', rainy.net.actor_critic.fc_shared(rnn=rainy.net.GruBlock))
    return c


if __name__ == '__main__':
    run_cli(config(),
            rainy.agents.PpoAgent,
            script_path=os.path.realpath(__file__))
示例#7
0
        lambda: Atari(envname, cfg=rnd.atari_config(), frame_stack=False))
    c.set_optimizer(lambda params: Adam(params, lr=rnd_lr),
                    key="rnd_separated")
    c.set_parallel_env(atari_parallel())
    c.max_steps = int(1e8) * 6
    c.grad_clip = 1.0
    # ppo params
    c.nworkers = 64
    c.nsteps = 128
    c.value_loss_weight = 0.5
    c.gae_lambda = 0.95
    c.ppo_minibatch_size = (c.nworkers * c.nsteps) // 4
    c.auxloss_use_ratio = min(1.0, 32.0 / c.nworkers)
    c.use_reward_monitor = True
    # eval settings
    c.eval_env = Atari(envname, cfg=rnd.atari_config())
    c.episode_log_freq = 100
    c.eval_freq = None
    c.save_freq = int(1e8)
    return c


if __name__ == "__main__":
    options = [click.Option(["--rnd-lr"], type=float, default=5.0e-5)]
    cli.run_cli(
        config,
        rnd.TunedRNDAgent,
        os.path.realpath(__file__),
        options,
    )
from rainy.agents import DoubleDqnAgent
from rainy.envs import Atari
from rainy.lib.explore import EpsGreedy, LinearCooler
import rainy.utils.cli as cli
from torch.optim import RMSprop


def config() -> Config:
    c = Config()
    c.set_env(lambda: Atari('Breakout'))
    c.set_optimizer(lambda params: RMSprop(
        params, lr=0.00025, alpha=0.95, eps=0.01, centered=True))
    c.set_explorer(lambda: EpsGreedy(1.0, LinearCooler(1.0, 0.1, int(1e6))))
    c.set_net_fn('value', net.value.dqn_conv())
    c.replay_size = int(1e6)
    c.batch_size = 32
    c.train_start = 50000
    c.sync_freq = 10000
    c.max_steps = int(2e7)
    c.eval_env = Atari('Breakout', episodic_life=False)
    c.eval_freq = None
    c.seed = 1
    c.use_reward_monitor = True
    return c


if __name__ == '__main__':
    cli.run_cli(config(),
                DoubleDqnAgent,
                script_path=os.path.realpath(__file__))