示例#1
0
def run(q, threadid):
    random_instance = random.Random(100 + threadid)
    random.seed(100 + threadid)
    np.random.seed(100 + threadid)

    num_agents = 4
    # adhoc = AdhocAgent(3, mcts_c=mcts_c, mcts_k=mcts_k, mcts_n=mcts_n, behavior_model_size=bsize,
    #                    environment_model_size=esize)
    adhoc = AdhocAfterNAgent(agent_type(3),
                             episodes - 1,
                             3,
                             mcts_c=mcts_c,
                             mcts_k=mcts_k,
                             mcts_n=mcts_n,
                             behavior_model_size=bsize,
                             environment_model_size=esize)
    agents = [agent_type(i) for i in range(num_agents - 1)] + [adhoc]
    transition_f = get_transition_function(num_agents, world_size,
                                           random.Random(100))
    reward_f = get_reward_function(num_agents, world_size)

    world = World(
        PursuitState.random_state(num_agents, world_size, random_instance),
        agents, transition_f, reward_f)
    results = []
    bmodelmetric = []
    emodelmetric = []
    emodelmetric_prey = []
    try:
        for i in range(episodes):
            world.initial_state = PursuitState.random_state(
                num_agents, world_size, random_instance)
            timesteps, reward = world.run(0, 200)
            results.append(timesteps)
            timesteps = max(1, timesteps)
            bmodelmetric.append(
                sum(adhoc.b_model.metric[-timesteps:]) / timesteps)
            emodelmetric.append(
                sum(adhoc.e_model.metric[-timesteps:]) / timesteps)
            emodelmetric_prey.append(
                sum(adhoc.e_model.metric_prey[-timesteps:]) / timesteps)
            q.put(1)
    finally:
        np.save(str(results_folder / 'results_{}'.format(threadid)),
                np.array(results))
        np.save(str(results_folder / 'eaccuracy_{}'.format(threadid)),
                np.array(emodelmetric))
        np.save(str(results_folder / 'baccuracy_{}'.format(threadid)),
                np.array(bmodelmetric))
        np.save(str(results_folder / 'eaccuracyprey_{}'.format(threadid)),
                np.array(emodelmetric_prey))
def run(progress_q, results_q, threadid, adhoc_filename, episodes,
        results_folder, world_size):
    random_instance = random.Random(100 + threadid)
    random.seed(100 + threadid)
    np.random.seed(100 + threadid)

    num_agents = 4
    # adhoc = AdhocAgent(3, mcts_c=mcts_c, mcts_k=mcts_k, mcts_n=mcts_n, behavior_model_size=bsize,
    #                    environment_model_size=esize)
    # load_run(dataset_folder / dataset_name, adhoc, episodes, fit=False, compute_metrics=False)
    adhoc = AdhocAgent.load(adhoc_filename)
    agents = [agent_type(i) for i in range(num_agents - 1)] + [adhoc]
    transition_f = get_transition_function(num_agents, world_size,
                                           random.Random(100))
    reward_f = get_reward_function(num_agents, world_size)

    world = World(
        PursuitState.random_state(num_agents, world_size, random_instance),
        agents, transition_f, reward_f)
    timesteps, reward = world.run(0, 500)
    progress_q.put(1)

    results_q.put(
        (str(results_folder / 'results_eps{}'.format(episodes)), timesteps))
    results_q.put((str(results_folder / 'eaccuracy_eps{}'.format(episodes)),
                   np.average(adhoc.e_model.metric)))
    results_q.put((str(results_folder / 'baccuracy_eps{}'.format(episodes)),
                   np.average(adhoc.b_model.metric)))
    results_q.put(
        (str(results_folder / 'eaccuracyprey_eps{}'.format(episodes)),
         np.average(adhoc.e_model.metric_prey)))
示例#3
0
def save_run(filename, number_episodes, agents, world_size=(5, 5), seed=100):
    random_instance = random.Random(seed)
    num_agents = len(agents)
    transition_f = get_transition_function(num_agents, world_size, random.Random(seed))
    reward_f = get_reward_function(num_agents, world_size)
    transition_recorder = TransitionRecorder()
    world = World(PursuitState.random_state(num_agents, world_size, random_instance), agents, transition_f, reward_f,
                  visualizers=(transition_recorder, ))

    for i in range(number_episodes):
        world.initial_state = PursuitState.random_state(num_agents, world_size, random_instance)
        _, _ = world.run(0, 1000)

    output_file = open(filename, 'wb')
    pickle.dump(transition_recorder.transitions, output_file)
    output_file.close()
示例#4
0
def init(episodes, world_q):
    random_instance = random.Random(100)
    random.seed(100)
    np.random.seed(100)

    num_agents = 4
    adhoc = AdhocAfterNAgent(agent_type(3), episodes, 3,
                             mcts_c=mcts_c, mcts_k=mcts_k, mcts_n=mcts_n, behavior_model_size=bsize,
                             environment_model_size=esize)
    agents = [agent_type(i) for i in range(num_agents - 1)] + [adhoc]
    transition_f = get_transition_function(num_agents, world_size, random.Random(100))
    reward_f = get_reward_function(num_agents, world_size)

    world = World(PursuitState.random_state(num_agents, world_size, random_instance), agents, transition_f, reward_f)

    for _ in tqdm.tqdm(range(episodes)):
        world.initial_state = PursuitState.random_state(num_agents, world_size, random_instance)
        world.run(0, 200)

    for _ in range(n_threads):
        world_q.put(world)

    return world, adhoc
示例#5
0
def run(q, threadid, world_q):
    world = deepcopy(world_q.get())
    random_instance = random.Random(100+threadid)
    num_agents = 4
    try:
        world.initial_state = PursuitState.random_state(num_agents, world_size, random_instance)
        timesteps, reward = world.run(0, 200)

        bacc = sum(adhoc.b_model.metric[-timesteps:]) / timesteps
        eacc = sum(adhoc.e_model.metric[-timesteps:]) / timesteps
        eacc_prey = sum(adhoc.e_model.metric_prey[-timesteps:]) / timesteps
        q.put((timesteps, bacc, eacc, eacc_prey))
    except Exception as e:
        print(e)
示例#6
0
    def transition(state, actions):
        assert (len(actions) == num_agents)
        directions = [(1, 0), (-1, 0), (0, 1), (0, -1), (0, 0)]

        def choose_prey_move():
            if not prey_moves:
                return random_instance.choice(directions)
            else:
                result = prey_moves[0]
                if len(prey_moves) > 1:
                    prey_moves.pop(0)
                return result

        occupied_positions = set(state.prey_positions) | set(
            state.agent_positions)

        num_preys = len(state.prey_positions)

        apos_array = [None] * num_agents
        ppos_array = [None] * num_preys
        agents_indexs = [(i, True) for i in range(num_agents)] + \
                        [(i, False) for i in range(num_preys)]
        random_instance.shuffle(agents_indexs)

        for i, is_agent in agents_indexs:
            if is_agent:
                position = state.agent_positions[i]
                action = actions[i]
            else:
                position = state.prey_positions[i]
                action = choose_prey_move()
            new_position = move(position, action, world_size)

            # if collision is detected, just go to the original position
            if new_position in occupied_positions:
                new_position = position

            occupied_positions.remove(position)
            occupied_positions.add(new_position)

            if is_agent:
                apos_array[i] = new_position
            else:
                ppos_array[i] = new_position

        return PursuitState(prey_positions=tuple(ppos_array),
                            agent_positions=tuple(apos_array),
                            world_size=tuple(world_size))
示例#7
0
        tree = MCTS(tree_policy=UCB1(c=self.mcts_c),
                    default_policy=RandomKStepRollOut2(self.mcts_k),
                    backup=monte_carlo)
        self.prev_action = tree(self.root, n=n)
        # print([[y.n for y in x.children.values()] for x in self.root.children.values()])
        return self.prev_action


for k in (10, 100, 1000):
    for n in (1000, ):
        for c in (100, ):
            agents = [GreedyAgent(i) for i in range(4)]
            random.seed(100)
            agents[-1] = MCTSAgent(3, n, k, c * k)
            results = []
            for i in range(1):
                world = World(
                    PursuitState.random_state(len(agents), world_size), agents,
                    get_transition_function(len(agents), world_size),
                    get_reward_function(len(agents), world_size))
                timesteps, reward = world.run(0, 1000)
                results.append(timesteps)

            print("k: " + str(k))
            print("n: " + str(n))
            print("c: " + str(c))
            print("avg: " + str(sum(results) / len(results)))

print(rollouts)
print(rewards)
示例#8
0
 def _get_new_state(self):
     return PursuitState.random_state(4, self.world_size)
from pursuit.transition import get_transition_function
import matplotlib.pyplot as plt

agent = TeammateAwareAgent(0)
world_size = (10, 10)
adhoc_filename = 'adhoc_dataset/10x10ta_random_200'
adhoc = AdhocAgent.load(adhoc_filename)
positions = [(3, 3), (3, 7), (7, 3)]
prey = (5, 5)
result = np.zeros(world_size)

for x in range(world_size[0]):
    for y in range(world_size[1]):
        if (x, y) in positions:
            continue
        initial_state = PursuitState(tuple([(x, y)] + positions), (prey, ),
                                     world_size)
        adhoc.b_model.predict(initial_state)
        predicted_action_dist = adhoc.b_model.cache[initial_state][0]

        true_action = agent.act(initial_state)

        result[x, y] = predicted_action_dist[ACTIONS.index(true_action)]

fig, ax = plt.subplots(1)
im = ax.imshow(result, interpolation='nearest')
fig.colorbar(im)
for x, y in positions:
    rect = patches.Rectangle((x - 0.5, y - 0.5),
                             0.95,
                             0.95,
                             linewidth=1,
示例#10
0
num_agents = 4
world_size = (5, 5)
agents = [TeammateAwareAgent(i) for i in range(num_agents)]
prey_moves = [(-1, 0), (1, 0), (0, 0)]
transition_f = get_transition_function(num_agents,
                                       world_size,
                                       prey_moves=prey_moves)
reward_f = get_reward_function(num_agents, world_size)
agent_colors = [(255, 0, 0), (175, 0, 75), (75, 0, 175), (0, 0, 255)]
visualizer = PygameVisualizer(200,
                              200,
                              agent_colors=agent_colors,
                              agents=agents)
visualizers = (visualizer, )

initial_state = PursuitState(((0, 1), (1, 0), (0, 3), (1, 2)), ((0, 0), ),
                             world_size)

world = World(initial_state,
              agents,
              transition_f,
              reward_f,
              visualizers=visualizers)
print(world.run(1, 100))

# expected actions
# RIGHT LEFT UP DOWN NOOP
# 4, 2, 2, 4 DOWN LEFT LEFT DOWN
# 4, 2, 2, 1 DOWN LEFT LEFT RIGH
# 4, 3, 2, 1 DOWN UUUP LEFT RIGH
# 1, 3, 2, 3 RIGH UUUP LEFT UUUP
# 1, 3, 2, 1 RIGH UUUP LEFT RIGH
adhoc = AdhocAgent(3,
                   mcts_c=mcts_c,
                   mcts_k=mcts_k,
                   mcts_n=mcts_n,
                   behavior_model_size=bsize,
                   environment_model_size=esize,
                   eps=1.0,
                   fit=None)
# adhoc = AdhocAgent.load('adhoc_dataset/10x10greedy_random_200')
agents = [agent_type(i) for i in range(num_agents - 1)] + [adhoc]
transition_f = get_transition_function(num_agents, world_size,
                                       random.Random(100))
reward_f = get_reward_function(num_agents, world_size)

world = World(
    PursuitState.random_state(num_agents, world_size, random_instance), agents,
    transition_f, reward_f)
save_episodes = (1, 5, 10, 20, 50, 100, 150, 200)
current_episode = 0
for episodes in save_episodes:
    for current_episode in range(current_episode, episodes):

        world.initial_state = PursuitState.random_state(
            num_agents, world_size, random_instance)
        timesteps, reward = world.run(0, 100)
        print(timesteps)

        print("acc average " + str(np.average(adhoc.e_model.metric)))
        print("acc prey average " + str(np.average(adhoc.e_model.metric_prey)))
        print("behavior average " + str(np.average(adhoc.b_model.metric)))
示例#12
0
results = []

agent_colors = [(random.randint(0, 255), random.randint(0, 50),
                 random.randint(0, 255)) for _ in range(num_agents)]
visualizer = PygameVisualizer(400,
                              400,
                              agent_colors=agent_colors,
                              agents=agents)
visualizers = (visualizer, )

for i in range(iters):
    transition_f = get_transition_function(num_agents, world_size,
                                           random.Random(100 + i))
    reward_f = get_reward_function(num_agents, world_size)
    world = World(
        PursuitState.random_state(num_agents, world_size,
                                  random.Random(100 + i)),
        agents,
        transition_f,
        reward_f,
    )
    timesteps, reward = world.run(0., 5000)
    results.append(timesteps)
    print(timesteps)

plt.plot(results)
plt.plot([np.average(results[:i]) for i in range(1, len(results))],
         label='average')
plt.show()
# print(results)
# print(world_size)
# print(k)