Пример #1
0
def main():
    parser = argparse.ArgumentParser(
        description='Learner process for distributed reinforcement.')
    parser.add_argument('-e',
                        '--env',
                        type=str,
                        default='MultiFrameBreakout-v0',
                        help='Environment name.')
    parser.add_argument('-a',
                        '--algorithm',
                        type=str,
                        default='ape_x',
                        choices=['ape_x', 'r2d2'],
                        help='Select an algorithm.')
    parser.add_argument('-r',
                        '--redisserver',
                        type=str,
                        default='localhost',
                        help="Redis's server name.")
    parser.add_argument('-v',
                        '--visdomserver',
                        type=str,
                        default='localhost',
                        help="Visdom's server name.")
    parser.add_argument('-d',
                        '--actordevice',
                        type=str,
                        default='',
                        help="Actor's device.")
    parser.add_argument('-s',
                        '--replaysize',
                        type=int,
                        default=100000,
                        help="Replay memory size.")
    args = parser.parse_args()
    env = gym.make(args.env)
    vis = visdom.Visdom(server='http://' + args.visdomserver)
    actordevice = ("cuda" if torch.cuda.is_available() else
                   "cpu") if args.actordevice == '' else args.actordevice
    if args.algorithm == 'ape_x':
        nstep_return = 3
        model = models.DuelingDQN(env.action_space.n).to(device)
        learner = Learner(model,
                          models.DuelingDQN(env.action_space.n).to(device),
                          optim.RMSprop(model.parameters(),
                                        lr=0.00025 / 4,
                                        alpha=0.95,
                                        eps=1.5e-7),
                          vis,
                          replay_size=args.replaysize,
                          hostname=args.redisserver,
                          use_memory_compress=True)
        learner.optimize_loop(gamma=0.999**nstep_return,
                              actor_device=torch.device(actordevice))
    elif args.algorithm == 'r2d2':
        batch_size = 64
        nstep_return = 5
        model = models.DuelingLSTMDQN(env.action_space.n,
                                      batch_size,
                                      nstep_return=nstep_return).to(device)
        learner = Learner(model,
                          models.DuelingLSTMDQN(
                              env.action_space.n,
                              batch_size,
                              nstep_return=nstep_return).to(device),
                          optim.Adam(model.parameters(),
                                     lr=0.00048,
                                     eps=1.0e-3),
                          vis,
                          replay_size=args.replaysize,
                          hostname=args.redisserver,
                          use_memory_compress=True)
        learner.optimize_loop(batch_size=batch_size,
                              gamma=0.997**nstep_return,
                              beta0=0.6,
                              target_update=2500,
                              actor_device=torch.device(actordevice))
    else:
        raise ValueError('Unknown the algorithm: %s.' % args.algorithm)
Пример #2
0
def main():
    parser = argparse.ArgumentParser(
        description='Actor process for distributed reinforcement.')
    parser.add_argument('-n',
                        '--no',
                        type=int,
                        default=1,
                        help='Actor number.')
    parser.add_argument('-e',
                        '--env',
                        type=str,
                        default='MultiFrameBreakout-v0',
                        help='Environment name.')
    parser.add_argument('-a',
                        '--algorithm',
                        type=str,
                        default='ape_x',
                        choices=['ape_x', 'r2d2'],
                        help='Select an algorithm.')
    parser.add_argument('-t',
                        '--num_total_actors',
                        type=int,
                        default=4,
                        help='Total number of actors.')
    parser.add_argument('-r',
                        '--redisserver',
                        type=str,
                        default='localhost',
                        help="Redis's server name.")
    parser.add_argument('-v',
                        '--visdomserver',
                        type=str,
                        default='localhost',
                        help="Visdom's server name.")
    args = parser.parse_args()
    vis = visdom.Visdom(server='http://' + args.visdomserver)
    env = gym.make(args.env)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    if args.algorithm == 'ape_x':
        from distributed_rl.ape_x.actor import Actor
        actor = Actor(args.no,
                      env,
                      models.DuelingDQN(env.action_space.n).to(device),
                      vis,
                      hostname=args.redisserver,
                      num_total_actors=args.num_total_actors,
                      device=device)
    elif args.algorithm == 'r2d2':
        from distributed_rl.r2d2.actor import Actor
        nstep_return = 5
        actor = Actor(
            args.no,
            env,
            models.DuelingLSTMDQN(env.action_space.n,
                                  1,
                                  nstep_return=nstep_return).to(device),
            models.DuelingLSTMDQN(env.action_space.n,
                                  1,
                                  nstep_return=nstep_return).to(device),
            vis,
            hostname=args.redisserver,
            num_total_actors=args.num_total_actors,
            device=device)
    else:
        raise ValueError('Unknown the algorithm: %s.' % args.algorithm)
    actor.run()
Пример #3
0
def main():
    parser = argparse.ArgumentParser(
        description='Actor process for distributed reinforcement.')
    parser.add_argument('-n',
                        '--name',
                        type=str,
                        default='actor1',
                        help='Actor name.')
    parser.add_argument('-e',
                        '--env',
                        type=str,
                        default='MineRLTreechop-v0',
                        help='Environment name.')
    parser.add_argument('-a',
                        '--algorithm',
                        type=str,
                        default='r2d3',
                        choices=['ape_x', 'r2d2', 'r2d3'],
                        help='Select an algorithm.')
    parser.add_argument('-d',
                        '--eps_decay',
                        type=int,
                        default=10000000,
                        help='Decay of random action rate in e-greedy.')
    parser.add_argument('-r',
                        '--redisserver',
                        type=str,
                        default='localhost',
                        help="Redis's server name.")
    parser.add_argument('-v',
                        '--visdomserver',
                        type=str,
                        default='localhost',
                        help="Visdom's server name.")
    args = parser.parse_args()
    vis = visdom.Visdom(server='http://' + args.visdomserver)
    env = gym.make(args.env)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    if args.algorithm == 'ape_x':
        from distributed_rl.ape_x.actor import Actor
        actor = Actor(args.name,
                      env,
                      models.DuelingDQN(env.action_space.n).to(device),
                      vis,
                      hostname=args.redisserver,
                      eps_decay=args.eps_decay,
                      device=device)
    elif args.algorithm == 'r2d2':
        from distributed_rl.r2d2.actor import Actor
        nstep_return = 5
        actor = Actor(
            args.name,
            env,
            models.DuelingLSTMDQN(env.action_space.n,
                                  1,
                                  nstep_return=nstep_return).to(device),
            models.DuelingLSTMDQN(env.action_space.n,
                                  1,
                                  nstep_return=nstep_return).to(device),
            vis,
            hostname=args.redisserver,
            eps_decay=args.eps_decay,
            device=device)
    elif args.algorithm == 'r2d3':
        from distributed_rl.r2d3.actor import Actor
        nstep_return = 5
        actor = Actor(
            args.name,
            env,
            models.DuelingLSTMDQN2(7, 1, nstep_return=nstep_return).to(device),
            models.DuelingLSTMDQN2(7, 1, nstep_return=nstep_return).to(device),
            models.GNetwork(10).to(device),
            models.FNetwork(7, 10).to(device),
            vis,
            hostname=args.redisserver,
            eps_decay=args.eps_decay,
            device=device)
    else:
        raise ValueError('Unknown the algorithm: %s.' % args.algorithm)
    actor.run()