Пример #1
0
def main():
    gym_id = 'CartPole-v0'
    max_episodes = 10000
    max_timesteps = 1000

    env = OpenAIGymEnvironment(gym_id, monitor=False, monitor_video=False)

    config = Config({
        'repeat_actions': 1,
        'actions': env.actions,
        'action_shape': env.action_shape,
        'state_shape': env.state_shape,
        'exploration': 'constant',
        'exploration_args': [0.1]
    })

    agent = SimpleQAgent(config, "simpleq")

    runner = Runner(agent, env)

    def episode_finished(r):
        if r.episode % 10 == 0:
            print("Finished episode {ep} after {ts} timesteps".format(
                ep=r.episode + 1, ts=r.timestep + 1))
            print("Episode reward: {}".format(r.episode_rewards[-1]))
            print("Average of last 10 rewards: {}".format(
                np.mean(r.episode_rewards[-10:])))
        return True

    print("Starting {agent} for Environment '{env}'".format(agent=agent,
                                                            env=env))
    runner.run(max_episodes, max_timesteps, episode_finished=episode_finished)
    print("Learning finished. Total episodes: {ep}".format(ep=runner.episode +
                                                           1))
Пример #2
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('gym_id', help="ID of the gym environment")
    parser.add_argument('-a', '--agent', default='DQNAgent')
    parser.add_argument('-c', '--agent-config', help="Agent configuration file")
    parser.add_argument('-n', '--network-config', help="Network configuration file")
    parser.add_argument('-e', '--episodes', type=int, default=50000, help="Number of episodes")
    parser.add_argument('-t', '--max-timesteps', type=int, default=2000, help="Maximum number of timesteps per episode")
    parser.add_argument('-m', '--monitor', help="Save results to this directory")
    parser.add_argument('-ms', '--monitor-safe', action='store_true', default=False, help="Do not overwrite previous results")
    parser.add_argument('-mv', '--monitor-video', type=int, default=0, help="Save video every x steps (0 = disabled)")
    parser.add_argument('-s', '--save', help="Save agent to this dir")
    parser.add_argument('-se', '--save-episodes', type=int, default=100, help="Save agent every x episodes")
    parser.add_argument('-l', '--load', help="Load agent from this dir")
    parser.add_argument('-D', '--debug', action='store_true', default=False, help="Show debug outputs")

    args = parser.parse_args()

    env = OpenAIGymEnvironment(args.gym_id, monitor=args.monitor, monitor_safe=args.monitor_safe, monitor_video=args.monitor_video)

    config = Config({
        'repeat_actions': 1,
        'actions': env.actions,
        'action_shape': env.action_shape,
        'state_shape': env.state_shape,
        'max_episode_length': args.max_timesteps
    })

    if args.agent_config:
        config.read_json(args.agent_config)

    if args.network_config:
        config.read_json(args.network_config)

    logger = logging.getLogger(__name__)
    logger.setLevel(log_levels[config.loglevel])

    preprocessing_config = config.get('preprocessing')
    if preprocessing_config:
        stack = build_preprocessing_stack(preprocessing_config)
        config.state_shape = stack.shape(config.state_shape)
    else:
        stack = None

    if args.debug:
        logger.info("-" * 16)
        logger.info("File configuration:")
        logger.info(config)

    agent = create_agent(args.agent, config)

    if args.load:
        load_dir = os.path.dirname(args.load)
        if not os.path.isdir(load_dir):
            raise OSError("Could not load agent from {}: No such directory.".format(load_dir))
        agent.load_model(args.load)

    if args.debug:
        logger.info("-" * 16)
        logger.info("Agent configuration:")
        logger.info(agent.config)
        if agent.model:
            logger.info("Model configuration:")
            logger.info(agent.model.config)

    runner = Runner(agent, env, preprocessor=stack, repeat_actions=config.repeat_actions)

    if args.save:
        save_dir = os.path.dirname(args.save)
        if not os.path.isdir(save_dir):
            try:
                os.mkdir(save_dir, 0o755)
            except OSError:
                raise OSError("Cannot save agent to dir {} ()".format(save_dir))
        runner.save_model(args.save, args.save_episodes)

    report_episodes = args.episodes // 1000
    if args.debug:
        report_episodes = 10

    def episode_finished(r):
        if r.episode % report_episodes == 0:
            logger.info("Finished episode {ep} after {ts} timesteps".format(ep=r.episode + 1, ts=r.timestep + 1))
            logger.info("Episode reward: {}".format(r.episode_rewards[-1]))
            logger.info("Average of last 500 rewards: {}".format(np.mean(r.episode_rewards[-500:])))
            logger.info("Average of last 100 rewards: {}".format(np.mean(r.episode_rewards[-100:])))
        return True

    logger.info("Starting {agent} for Environment '{env}'".format(agent=agent, env=env))
    runner.run(args.episodes, args.max_timesteps, episode_finished=episode_finished)
    logger.info("Learning finished. Total episodes: {ep}".format(ep=runner.episode + 1))

    if args.monitor:
        env.gym.monitor.close()
    env.close()
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('gym_id', help="ID of the gym environment")
    # Currently does not do anything since we don't have the distributed API for all models yet
    parser.add_argument('-a', '--agent', default='DQNAgent')
    parser.add_argument('-c',
                        '--agent-config',
                        help="Agent configuration file",
                        default='examples/configs/dqn_agent.json')
    parser.add_argument('-n',
                        '--network-config',
                        help="Network configuration file",
                        default='examples/configs/dqn_network.json')
    parser.add_argument('-e',
                        '--global-steps',
                        type=int,
                        default=1000000,
                        help="Total number of steps")
    parser.add_argument('-t',
                        '--max-timesteps',
                        type=int,
                        default=2000,
                        help="Maximum number of timesteps per episode")
    parser.add_argument('-l',
                        '--local-steps',
                        type=int,
                        default=20,
                        help="Maximum number of local steps before update")
    parser.add_argument('-w',
                        '--num-workers',
                        type=int,
                        default=1,
                        help="Number of worker agents")
    parser.add_argument('-r',
                        '--repeat-actions',
                        type=int,
                        default=1,
                        help="???")
    parser.add_argument('-m', '--monitor', help="Save results to this file")
    parser.add_argument('-M',
                        '--mode',
                        choices=['tmux', 'child'],
                        default='tmux',
                        help="Starter mode")
    parser.add_argument('-L',
                        '--logdir',
                        default='logs_async',
                        help="Log directory")
    parser.add_argument('-C', '--is-child', action='store_true', default=False)
    parser.add_argument('-i',
                        '--task-index',
                        type=int,
                        default=0,
                        help="Task index")
    parser.add_argument('-p',
                        '--is-ps',
                        type=int,
                        default=0,
                        help="Is param server")
    parser.add_argument('-K',
                        '--kill',
                        action='store_true',
                        default=False,
                        help="Kill runners")

    args = parser.parse_args()
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.DEBUG)

    session_name = 'openai_async'
    shell = '/bin/bash'

    kill_cmds = [
        "kill $( lsof -i:12222-{} -t ) > /dev/null 2>&1".format(
            12222 + args.num_workers),
        "tmux kill-session -t {}".format(session_name),
    ]
    if args.kill:
        os.system("\n".join(kill_cmds))
        return 0

    if not args.is_child:
        # start up child processes
        target_script = os.path.abspath(inspect.stack()[0][1])

        def wrap_cmd(session, name, cmd):
            if isinstance(cmd, list):
                cmd = ' '.join(shlex_quote(str(arg)) for arg in cmd)
            if args.mode == 'tmux':
                return 'tmux send-keys -t {}:{} {} Enter'.format(
                    session, name, shlex_quote(cmd))
            elif args.mode == 'child':
                return '{} > {}/{}.{}.out 2>&1 & echo kill $! >> {}/kill.sh'.format(
                    cmd, args.logdir, session, name, args.logdir)

        def build_cmd(index, parameter_server):
            cmd_args = [
                'CUDA_VISIBLE_DEVICES=', sys.executable, target_script,
                args.gym_id, '--is-child', '--agent-config',
                os.path.join(os.getcwd(),
                             args.agent_config), '--network-config',
                os.path.join(os.getcwd(), args.network_config),
                '--num-workers', args.num_workers, '--task-index', index,
                '--is-ps', parameter_server
            ]

            return cmd_args

        if args.mode == 'tmux':
            cmds = kill_cmds + [
                'tmux new-session -d -s {} -n ps'.format(session_name)
            ]
        elif args.mode == 'child':
            cmds = [
                'mkdir -p {}'.format(args.logdir),
                'rm -f {}/kill.sh'.format(args.logdir),
                'echo "#/bin/bash" > {}/kill.sh'.format(args.logdir),
                'chmod +x {}/kill.sh'.format(args.logdir)
            ]
        cmds.append(wrap_cmd(session_name, 'ps', build_cmd(0, 1)))

        for i in xrange(args.num_workers):
            name = 'w_{}'.format(i)
            if args.mode == 'tmux':
                cmds.append('tmux new-window -t {} -n {} -d {}'.format(
                    session_name, name, shell))
            cmds.append(wrap_cmd(session_name, name, build_cmd(i, 0)))

        # add one PS call
        # cmds.append('tmux new-window -t {} -n ps -d {}'.format(session_name, shell))

        print("\n".join(cmds))

        os.system("\n".join(cmds))

        return 0

    env = OpenAIGymEnvironment(args.gym_id)

    config = Config({
        'repeat_actions': 1,
        'actions': env.actions,
        'action_shape': env.action_shape,
        'state_shape': env.state_shape
    })

    if args.agent_config:
        config.read_json(args.agent_config)
    if args.network_config:
        config.read_json(args.network_config)

    preprocessing_config = config.get('preprocessing')

    if preprocessing_config:
        stack = build_preprocessing_stack(preprocessing_config)
        config.state_shape = stack.shape(config.state_shape)
    else:
        stack = None

    logger.info("Starting distributed agent for OpenAI Gym '{gym_id}'".format(
        gym_id=args.gym_id))
    logger.info("Config:")
    logger.info(config)

    runner = DistributedRunner(agent_type=args.agent,
                               agent_config=config,
                               n_agents=args.num_workers,
                               n_param_servers=1,
                               environment=env,
                               global_steps=args.global_steps,
                               max_episode_steps=args.max_timesteps,
                               preprocessor=stack,
                               repeat_actions=args.repeat_actions,
                               local_steps=args.local_steps,
                               task_index=args.task_index,
                               is_ps=(args.is_ps == 1))
    runner.run()