Exemplo n.º 1
0
def get_default_config(agent_type):
    """
    Get default configuration from agent by providing type as a string parameter.

    :param agent_type: String parameter containing agent type
    :return: Default configuration dict
    """
    agent_class = agents.get(agent_type)

    if not agent_class:
        raise TensorForceValueError("No such agent: {}".format(agent_type))

    return Config(agent_class.default_config), Config(
        agent_class.model_ref.default_config)
Exemplo n.º 2
0
def main():
    gym_id = 'CartPole-v0'
    max_episodes = 10000
    max_timesteps = 1000

    env = OpenAIGymEnvironment(gym_id, monitor=False, monitor_video=False)

    config = Config({
        'repeat_actions': 1,
        'actions': env.actions,
        'action_shape': env.action_shape,
        'state_shape': env.state_shape,
        'exploration': 'constant',
        'exploration_args': [0.1]
    })

    agent = SimpleQAgent(config, "simpleq")

    runner = Runner(agent, env)

    def episode_finished(r):
        if r.episode % 10 == 0:
            print("Finished episode {ep} after {ts} timesteps".format(
                ep=r.episode + 1, ts=r.timestep + 1))
            print("Episode reward: {}".format(r.episode_rewards[-1]))
            print("Average of last 10 rewards: {}".format(
                np.mean(r.episode_rewards[-10:])))
        return True

    print("Starting {agent} for Environment '{env}'".format(agent=agent,
                                                            env=env))
    runner.run(max_episodes, max_timesteps, episode_finished=episode_finished)
    print("Learning finished. Total episodes: {ep}".format(ep=runner.episode +
                                                           1))
Exemplo n.º 3
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('gym_id', help="ID of the gym environment")
    parser.add_argument('-a', '--agent', default='DQNAgent')
    parser.add_argument('-c', '--agent-config', help="Agent configuration file")
    parser.add_argument('-n', '--network-config', help="Network configuration file")
    parser.add_argument('-e', '--episodes', type=int, default=50000, help="Number of episodes")
    parser.add_argument('-t', '--max-timesteps', type=int, default=2000, help="Maximum number of timesteps per episode")
    parser.add_argument('-m', '--monitor', help="Save results to this directory")
    parser.add_argument('-ms', '--monitor-safe', action='store_true', default=False, help="Do not overwrite previous results")
    parser.add_argument('-mv', '--monitor-video', type=int, default=0, help="Save video every x steps (0 = disabled)")
    parser.add_argument('-s', '--save', help="Save agent to this dir")
    parser.add_argument('-se', '--save-episodes', type=int, default=100, help="Save agent every x episodes")
    parser.add_argument('-l', '--load', help="Load agent from this dir")
    parser.add_argument('-D', '--debug', action='store_true', default=False, help="Show debug outputs")

    args = parser.parse_args()

    env = OpenAIGymEnvironment(args.gym_id, monitor=args.monitor, monitor_safe=args.monitor_safe, monitor_video=args.monitor_video)

    config = Config({
        'repeat_actions': 1,
        'actions': env.actions,
        'action_shape': env.action_shape,
        'state_shape': env.state_shape,
        'max_episode_length': args.max_timesteps
    })

    if args.agent_config:
        config.read_json(args.agent_config)

    if args.network_config:
        config.read_json(args.network_config)

    logger = logging.getLogger(__name__)
    logger.setLevel(log_levels[config.loglevel])

    preprocessing_config = config.get('preprocessing')
    if preprocessing_config:
        stack = build_preprocessing_stack(preprocessing_config)
        config.state_shape = stack.shape(config.state_shape)
    else:
        stack = None

    if args.debug:
        logger.info("-" * 16)
        logger.info("File configuration:")
        logger.info(config)

    agent = create_agent(args.agent, config)

    if args.load:
        load_dir = os.path.dirname(args.load)
        if not os.path.isdir(load_dir):
            raise OSError("Could not load agent from {}: No such directory.".format(load_dir))
        agent.load_model(args.load)

    if args.debug:
        logger.info("-" * 16)
        logger.info("Agent configuration:")
        logger.info(agent.config)
        if agent.model:
            logger.info("Model configuration:")
            logger.info(agent.model.config)

    runner = Runner(agent, env, preprocessor=stack, repeat_actions=config.repeat_actions)

    if args.save:
        save_dir = os.path.dirname(args.save)
        if not os.path.isdir(save_dir):
            try:
                os.mkdir(save_dir, 0o755)
            except OSError:
                raise OSError("Cannot save agent to dir {} ()".format(save_dir))
        runner.save_model(args.save, args.save_episodes)

    report_episodes = args.episodes // 1000
    if args.debug:
        report_episodes = 10

    def episode_finished(r):
        if r.episode % report_episodes == 0:
            logger.info("Finished episode {ep} after {ts} timesteps".format(ep=r.episode + 1, ts=r.timestep + 1))
            logger.info("Episode reward: {}".format(r.episode_rewards[-1]))
            logger.info("Average of last 500 rewards: {}".format(np.mean(r.episode_rewards[-500:])))
            logger.info("Average of last 100 rewards: {}".format(np.mean(r.episode_rewards[-100:])))
        return True

    logger.info("Starting {agent} for Environment '{env}'".format(agent=agent, env=env))
    runner.run(args.episodes, args.max_timesteps, episode_finished=episode_finished)
    logger.info("Learning finished. Total episodes: {ep}".format(ep=runner.episode + 1))

    if args.monitor:
        env.gym.monitor.close()
    env.close()
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('gym_id', help="ID of the gym environment")
    # Currently does not do anything since we don't have the distributed API for all models yet
    parser.add_argument('-a', '--agent', default='DQNAgent')
    parser.add_argument('-c',
                        '--agent-config',
                        help="Agent configuration file",
                        default='examples/configs/dqn_agent.json')
    parser.add_argument('-n',
                        '--network-config',
                        help="Network configuration file",
                        default='examples/configs/dqn_network.json')
    parser.add_argument('-e',
                        '--global-steps',
                        type=int,
                        default=1000000,
                        help="Total number of steps")
    parser.add_argument('-t',
                        '--max-timesteps',
                        type=int,
                        default=2000,
                        help="Maximum number of timesteps per episode")
    parser.add_argument('-l',
                        '--local-steps',
                        type=int,
                        default=20,
                        help="Maximum number of local steps before update")
    parser.add_argument('-w',
                        '--num-workers',
                        type=int,
                        default=1,
                        help="Number of worker agents")
    parser.add_argument('-r',
                        '--repeat-actions',
                        type=int,
                        default=1,
                        help="???")
    parser.add_argument('-m', '--monitor', help="Save results to this file")
    parser.add_argument('-M',
                        '--mode',
                        choices=['tmux', 'child'],
                        default='tmux',
                        help="Starter mode")
    parser.add_argument('-L',
                        '--logdir',
                        default='logs_async',
                        help="Log directory")
    parser.add_argument('-C', '--is-child', action='store_true', default=False)
    parser.add_argument('-i',
                        '--task-index',
                        type=int,
                        default=0,
                        help="Task index")
    parser.add_argument('-p',
                        '--is-ps',
                        type=int,
                        default=0,
                        help="Is param server")
    parser.add_argument('-K',
                        '--kill',
                        action='store_true',
                        default=False,
                        help="Kill runners")

    args = parser.parse_args()
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.DEBUG)

    session_name = 'openai_async'
    shell = '/bin/bash'

    kill_cmds = [
        "kill $( lsof -i:12222-{} -t ) > /dev/null 2>&1".format(
            12222 + args.num_workers),
        "tmux kill-session -t {}".format(session_name),
    ]
    if args.kill:
        os.system("\n".join(kill_cmds))
        return 0

    if not args.is_child:
        # start up child processes
        target_script = os.path.abspath(inspect.stack()[0][1])

        def wrap_cmd(session, name, cmd):
            if isinstance(cmd, list):
                cmd = ' '.join(shlex_quote(str(arg)) for arg in cmd)
            if args.mode == 'tmux':
                return 'tmux send-keys -t {}:{} {} Enter'.format(
                    session, name, shlex_quote(cmd))
            elif args.mode == 'child':
                return '{} > {}/{}.{}.out 2>&1 & echo kill $! >> {}/kill.sh'.format(
                    cmd, args.logdir, session, name, args.logdir)

        def build_cmd(index, parameter_server):
            cmd_args = [
                'CUDA_VISIBLE_DEVICES=', sys.executable, target_script,
                args.gym_id, '--is-child', '--agent-config',
                os.path.join(os.getcwd(),
                             args.agent_config), '--network-config',
                os.path.join(os.getcwd(), args.network_config),
                '--num-workers', args.num_workers, '--task-index', index,
                '--is-ps', parameter_server
            ]

            return cmd_args

        if args.mode == 'tmux':
            cmds = kill_cmds + [
                'tmux new-session -d -s {} -n ps'.format(session_name)
            ]
        elif args.mode == 'child':
            cmds = [
                'mkdir -p {}'.format(args.logdir),
                'rm -f {}/kill.sh'.format(args.logdir),
                'echo "#/bin/bash" > {}/kill.sh'.format(args.logdir),
                'chmod +x {}/kill.sh'.format(args.logdir)
            ]
        cmds.append(wrap_cmd(session_name, 'ps', build_cmd(0, 1)))

        for i in xrange(args.num_workers):
            name = 'w_{}'.format(i)
            if args.mode == 'tmux':
                cmds.append('tmux new-window -t {} -n {} -d {}'.format(
                    session_name, name, shell))
            cmds.append(wrap_cmd(session_name, name, build_cmd(i, 0)))

        # add one PS call
        # cmds.append('tmux new-window -t {} -n ps -d {}'.format(session_name, shell))

        print("\n".join(cmds))

        os.system("\n".join(cmds))

        return 0

    env = OpenAIGymEnvironment(args.gym_id)

    config = Config({
        'repeat_actions': 1,
        'actions': env.actions,
        'action_shape': env.action_shape,
        'state_shape': env.state_shape
    })

    if args.agent_config:
        config.read_json(args.agent_config)
    if args.network_config:
        config.read_json(args.network_config)

    preprocessing_config = config.get('preprocessing')

    if preprocessing_config:
        stack = build_preprocessing_stack(preprocessing_config)
        config.state_shape = stack.shape(config.state_shape)
    else:
        stack = None

    logger.info("Starting distributed agent for OpenAI Gym '{gym_id}'".format(
        gym_id=args.gym_id))
    logger.info("Config:")
    logger.info(config)

    runner = DistributedRunner(agent_type=args.agent,
                               agent_config=config,
                               n_agents=args.num_workers,
                               n_param_servers=1,
                               environment=env,
                               global_steps=args.global_steps,
                               max_episode_steps=args.max_timesteps,
                               preprocessor=stack,
                               repeat_actions=args.repeat_actions,
                               local_steps=args.local_steps,
                               task_index=args.task_index,
                               is_ps=(args.is_ps == 1))
    runner.run()
def test_memoryagent_update_frequency():
    """
    Test MemoryAgent update frequency for SGD and value function updates.

    """
    update_steps = np.random.randint(1, 10)
    target_update_steps = np.random.randint(20, 200)

    state_shape = list(np.random.randint(2, 8, size=3))
    min_replay_size = np.random.randint(int(1e2), int(2e2))

    memory_capacity = np.random.randint(int(5e2), int(1e3))

    config = Config({
        'loglevel': 'debug',
        'actions': np.random.randint(2, 10),
        'batch_size': np.random.randint(2, 32),
        'update_rate': 1.0 / update_steps,
        'target_network_update_rate': 1.0 / target_update_steps,
        'min_replay_size': min_replay_size,
        'deterministic_mode': False,
        'use_target_network': True,
        'memory_capacity': memory_capacity,
        'state_shape': state_shape,
        'action_shape': []
    })

    agent = MemoryAgent(config, scope="memoryagent")
    model = MemoryAgentTestModel(config)

    # Set value function manually
    agent.model = model

    # Assert config values
    assert agent.batch_size == config['batch_size']
    assert agent.update_steps == update_steps
    assert agent.target_update_steps == target_update_steps
    assert agent.min_replay_size == config['min_replay_size']
    assert agent.use_target_network == config['use_target_network']

    max_steps = np.random.randint(int(5e3), int(1e4))

    print("Testing MemoryAgent for {} steps.".format(max_steps))
    print("Memory capacity: {}".format(config['memory_capacity']))
    print("Min replay size: {}".format(config['min_replay_size']))
    print("Batch size:      {}".format(config['batch_size']))
    print("Update steps:    {}".format(update_steps))
    print("Target steps:    {}".format(target_update_steps))
    print("State shape:     {}".format(state_shape))
    print("Actions:         {}".format(config['actions']))

    print("-" * 16)

    step_count = 0
    history = []
    history_sums = []
    for step_count in xrange(max_steps):
        while True:
            state = np.random.randint(0, 255, size=state_shape)
            action = agent.get_action(state)
            reward = float(np.random.randint(0, 100) // 80)  # p = .8 for reward = 1
            terminal = bool(np.random.randint(0, 100) // 95)

            sumsq = np.sum(np.square(state))

            # avoid duplicate experiences
            if not sumsq in history_sums:
                break

        agent.add_observation(state, action, reward, terminal)
        history.append((state.astype(np.float32), action, reward, terminal))
        history_sums.append(sumsq)

    # All steps - steps before min_replay_size + possible step if min_replay_size is a step itself

    expected_updates = (step_count + 1) // update_steps\
                       - min_replay_size // update_steps \
                       + int(min_replay_size % update_steps == 0)

    expected_target_updates = (step_count + 1) // target_update_steps \
                              - min_replay_size // target_update_steps \
                              + int(min_replay_size % target_update_steps == 0)

    print("Took {} steps.".format(step_count + 1))
    print("Observed {} updates (expected {})".format(model.count_updates, expected_updates))
    print("Observed {} target updates (expected {})".format(model.count_target_updates, expected_target_updates))
    print("Memory has size {}".format(agent.memory.size))

    assert model.count_updates == expected_updates
    assert model.count_target_updates == expected_target_updates

    assert memory_capacity == agent.memory.size
Exemplo n.º 6
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('env_id',
                        help="ID of the universe environment",
                        default='HarvestDay-v0')
    parser.add_argument('-a', '--agent', default='DQNAgent')
    parser.add_argument('-c',
                        '--agent-config',
                        help="Agent configuration file",
                        default='examples/configs/dqn_agent.json')
    parser.add_argument('-n',
                        '--network-config',
                        help="Network configuration file",
                        default='examples/configs/dqn_network.json')
    parser.add_argument('-e',
                        '--episodes',
                        type=int,
                        default=10000,
                        help="Number of episodes")
    parser.add_argument('-t',
                        '--max-timesteps',
                        type=int,
                        default=2000,
                        help="Maximum number of timesteps per episode")
    parser.add_argument('-m', '--monitor', help="Save results to this file")

    args = parser.parse_args()

    env_id = args.env_id

    episodes = args.episodes
    report_episodes = episodes / 100

    max_timesteps = args.max_timesteps

    environment = OpenAIUniverseEnvironment(env_id)

    config = Config({
        'actions': environment.actions,
        'action_shape': environment.action_shape,
        'state_shape': environment.state_shape
    })

    if args.agent_config:
        config.read_json(args.agent_config)

    if args.network_config:
        config.read_json(args.network_config)

    state_wrapper = None
    if config.state_wrapper:
        state_wrapper = create_wrapper(config.state_wrapper,
                                       config.state_wrapper_param)
        config.state_shape = state_wrapper.state_shape(config.state_shape)

    agent = create_agent(args.agent, config)

    if args.monitor:
        environment.env.monitor.start(args.monitor)
        environment.env.monitor.configure(
            video_callable=lambda count: False)  # count % 500 == 0)

    print("Starting {agent_type} for OpenAI Universe environment '{env_id}'".
          format(agent_type=args.agent, env_id=env_id))
    total_states = 0
    repeat_actions = config.get('repeat_actions', 4)
    episode_rewards = []
    for i in xrange(episodes):
        state = environment.reset()
        episode_reward = 0
        repeat_action_count = 0
        for j in xrange(max_timesteps):
            if state_wrapper:
                full_state = state_wrapper.get_full_state(state)
            else:
                full_state = state
            if repeat_action_count <= 0:
                action = agent.get_action(full_state, i, total_states)
                repeat_action_count = repeat_actions - 1
            else:
                repeat_action_count -= 1
            result = environment.execute_action(action)
            episode_reward += result['reward']
            agent.add_observation(full_state, action, result['reward'],
                                  result['terminal_state'])

            state = result['state']
            total_states += 1
            if result['terminal_state']:
                break

        episode_rewards.append(episode_reward)

        if i % report_episodes == 0:
            print("Finished episode {ep} after {ts} timesteps".format(ep=i + 1,
                                                                      ts=j +
                                                                      1))
            print("Total reward: {}".format(episode_reward))
            print("Average of last 500 rewards: {}".format(
                np.mean(episode_rewards[-500:])))
            print("Average of last 100 rewards: {}".format(
                np.mean(episode_rewards[-100:])))

    if args.monitor:
        environment.env.monitor.close()

    print("Learning finished. Total episodes: {ep}".format(ep=i + 1))