示例#1
0
def agent_factory(name, role, kind, clients, max_episodes, max_actions, logdir,
                  quit):
    assert len(
        clients
    ) >= 2, 'There are not enough Malmo clients in the pool (need at least 2)'

    clients = parse_clients_args(clients)
    visualizer = ConsoleVisualizer(prefix='Agent %d' % role)

    if role == 0:
        env = PigChaseEnvironment(clients,
                                  PigChaseSymbolicStateBuilder(),
                                  actions=ENV_ACTIONS,
                                  role=role,
                                  human_speed=True,
                                  randomize_positions=True)
        agent = PigChaseChallengeAgent(name)

        if type(agent.current_agent) == RandomAgent:
            agent_type = PigChaseEnvironment.AGENT_TYPE_1
        else:
            agent_type = PigChaseEnvironment.AGENT_TYPE_2
        obs = env.reset(agent_type)
        reward = 0
        rewards = []
        done = False
        episode = 0

        while True:

            # select an action
            action = agent.act(obs, reward, done, True)

            if done:
                visualizer << (episode + 1, 'Reward', sum(rewards))
                rewards = []
                episode += 1

                if type(agent.current_agent) == RandomAgent:
                    agent_type = PigChaseEnvironment.AGENT_TYPE_1
                else:
                    agent_type = PigChaseEnvironment.AGENT_TYPE_2
                obs = env.reset(agent_type)

            # take a step
            obs, reward, done = env.do(action)
            rewards.append(reward)

    else:
        env = PigChaseEnvironment(clients,
                                  PigChaseSymbolicStateBuilder(),
                                  actions=list(ARROW_KEYS_MAPPING.values()),
                                  role=role,
                                  randomize_positions=True)
        env.reset(PigChaseEnvironment.AGENT_TYPE_3)

        agent = PigChaseHumanAgent(name, env, list(ARROW_KEYS_MAPPING.keys()),
                                   max_episodes, max_actions, visualizer, quit)
        agent.show()
示例#2
0
def run_maze_learner(mission, clients):

    if 'malmopy.visualization.tensorboard' in sys.modules:
        visualizer = TensorboardVisualizer()
        visualizer.initialize(logdir, None)

    else:
        visualizer = ConsoleVisualizer()


#    with TensorboardVisualizer() as visualizer:
    env = MazeEnvironment(mission,
                          [str.split(client, ':') for client in clients])
    env.recording = False

    #    explorer = LinearEpsilonGreedyExplorer(1, 0.1, 10000)
    #        model = DeepQNeuralNetwork((4, 84, 84), (env.available_actions,), momentum=0, visualizer=visualizer)
    #        memory = TemporalMemory(50000, model.input_shape[1:], model.input_shape[0], False)

    agent = RandomAgent(
        "rand", 3
    )  #DQNAgent("Maze DQN Agent", env.available_actions, model, memory, explorer=explorer,
    #visualizer=visualizer)

    #        exp = SingleAgentExperiment("Malmo Cliff Walking", agent, env, 500000, warm_up_timesteps=500,
    #                                    visualizer=visualizer)
    #        exp.episode_end += on_episode_end

    #        visualizer.initialize(MALMO_MAZE_FOLDER, model, CntkConverter())

    #   with Popen(['tensorboard', '--logdir=%s' % path.join(MALMO_MAZE_FOLDER, path.pardir), '--port=6006']):
    EPOCH_SIZE = 250000
    max_training_steps = 50 * EPOCH_SIZE
    state = env.reset()
    reward = 0
    agent_done = False
    viz_rewards = []
    for step in range(1, max_training_steps + 1):

        #        action = agent.act(state, reward, agent_done, is_training=True)
        # check if env needs reset
        if env.done:
            visualize_training(visualizer, step, viz_rewards)
            agent.inject_summaries(step)
            viz_rewards = []
            state = env.reset()

            # select an action
        action = agent.act(state, reward, agent_done, is_training=True)
        print('ACTION BEING TAKEN: ', action)

        # take a step
        state, reward, agent_done = env.do(action)
        viz_rewards.append(reward)

        if (step % EPOCH_SIZE) == 0:
            model.save('%s-%s-dqn_%d.model' %
                       (backend, environment, step / EPOCH_SIZE))
示例#3
0
def run_maze_learner(mission, clients):

    if 'malmopy.visualization.tensorboard' in sys.modules:
        visualizer = TensorboardVisualizer()
        visualizer.initialize(logdir, None)

    else:
        visualizer = ConsoleVisualizer()

    env = MazeEnvironment(mission,
                          [str.split(client, ':') for client in clients])
    env.recording = False

    agent = TabularQLearnerAgent("rand", 3)

    #taking random actions
    EPOCH_SIZE = 250000
    max_training_steps = 50 * EPOCH_SIZE
    state = env.reset()
    reward = 0
    agent_done = False
    viz_rewards = []
    for step in range(1, max_training_steps + 1):

        # check if env needs reset
        if env.done:
            visualize_training(visualizer, step, viz_rewards)
            agent.inject_summaries(step)
            viz_rewards = []
            state = env.reset()

            # select an action
        action = agent.act(step, state, is_training=True)
        if type(action) == int:
            print('ACTION BEING TAKEN: ', action)
        else:
            print('ACTION BEING TAKEN: ', np.asscalar(action))

            # take a step
        old = state
        state, reward, agent_done = env.do(action)
        agent.observe(old, action, state, reward, env.done)
        viz_rewards.append(reward)

        if (step % EPOCH_SIZE) == 0:
            model.save('%s-%s-dqn_%d.model' %
                       (backend, environment, step / EPOCH_SIZE))
示例#4
0
                            help='The type of baseline to run.')
    arg_parser.add_argument('-e',
                            '--epochs',
                            type=int,
                            default=5,
                            help='Number of epochs to run.')
    arg_parser.add_argument('clients',
                            nargs='*',
                            default=['127.0.0.1:10000', '127.0.0.1:10001'],
                            help='Minecraft clients endpoints (ip(:port)?)+')
    args = arg_parser.parse_args()

    logdir = BASELINES_FOLDER % ('bayes_agent', datetime.utcnow().isoformat())
    if 'malmopy.visualization.tensorboard' in sys.modules:
        visualizer = TensorboardVisualizer()
        visualizer.initialize(logdir, None)
    else:
        visualizer = ConsoleVisualizer()

    agents = [{
        'name': agent,
        'role': role,
        'type': args.type,
        'clients': args.clients,
        'max_epochs': args.epochs,
        'logdir': logdir,
        'visualizer': visualizer
    } for role, agent in enumerate(ENV_AGENT_NAMES)]

    run_experiment(agents)
示例#5
0

    def restartGame():
        obs = env.reset()
        reward = torch.zeros(config.general.batch_size).type_as(dtype)
        done = torch.zeros(config.general.batch_size).type_as(dtype)

        for agent in agents:
            agent.reset()

        return obs, reward, done

    obs, reward, done = restartGame()

    if config.envs.visualize:
        visualizer = ConsoleVisualizer(prefix='Agent %d' % 0)
        ag2 = PigChaseHumanAgent("Agent_2", env,
                                 list(ARROW_KEYS_MAPPING.keys()),
                                 10, 25, visualizer, quit)
        ag2.show()

    crt_agent = 0
    it = [0, 0]
    episode = 0

    # Action batch
    rew_1 = 0
    done_1 = 0
    all_rewards = 0

    while episode < EVAL_EPISODES: