def run(q, threadid): random_instance = random.Random(100 + threadid) random.seed(100 + threadid) np.random.seed(100 + threadid) num_agents = 4 # adhoc = AdhocAgent(3, mcts_c=mcts_c, mcts_k=mcts_k, mcts_n=mcts_n, behavior_model_size=bsize, # environment_model_size=esize) adhoc = AdhocAfterNAgent(agent_type(3), episodes - 1, 3, mcts_c=mcts_c, mcts_k=mcts_k, mcts_n=mcts_n, behavior_model_size=bsize, environment_model_size=esize) agents = [agent_type(i) for i in range(num_agents - 1)] + [adhoc] transition_f = get_transition_function(num_agents, world_size, random.Random(100)) reward_f = get_reward_function(num_agents, world_size) world = World( PursuitState.random_state(num_agents, world_size, random_instance), agents, transition_f, reward_f) results = [] bmodelmetric = [] emodelmetric = [] emodelmetric_prey = [] try: for i in range(episodes): world.initial_state = PursuitState.random_state( num_agents, world_size, random_instance) timesteps, reward = world.run(0, 200) results.append(timesteps) timesteps = max(1, timesteps) bmodelmetric.append( sum(adhoc.b_model.metric[-timesteps:]) / timesteps) emodelmetric.append( sum(adhoc.e_model.metric[-timesteps:]) / timesteps) emodelmetric_prey.append( sum(adhoc.e_model.metric_prey[-timesteps:]) / timesteps) q.put(1) finally: np.save(str(results_folder / 'results_{}'.format(threadid)), np.array(results)) np.save(str(results_folder / 'eaccuracy_{}'.format(threadid)), np.array(emodelmetric)) np.save(str(results_folder / 'baccuracy_{}'.format(threadid)), np.array(bmodelmetric)) np.save(str(results_folder / 'eaccuracyprey_{}'.format(threadid)), np.array(emodelmetric_prey))
def run(progress_q, results_q, threadid, adhoc_filename, episodes, results_folder, world_size): random_instance = random.Random(100 + threadid) random.seed(100 + threadid) np.random.seed(100 + threadid) num_agents = 4 # adhoc = AdhocAgent(3, mcts_c=mcts_c, mcts_k=mcts_k, mcts_n=mcts_n, behavior_model_size=bsize, # environment_model_size=esize) # load_run(dataset_folder / dataset_name, adhoc, episodes, fit=False, compute_metrics=False) adhoc = AdhocAgent.load(adhoc_filename) agents = [agent_type(i) for i in range(num_agents - 1)] + [adhoc] transition_f = get_transition_function(num_agents, world_size, random.Random(100)) reward_f = get_reward_function(num_agents, world_size) world = World( PursuitState.random_state(num_agents, world_size, random_instance), agents, transition_f, reward_f) timesteps, reward = world.run(0, 500) progress_q.put(1) results_q.put( (str(results_folder / 'results_eps{}'.format(episodes)), timesteps)) results_q.put((str(results_folder / 'eaccuracy_eps{}'.format(episodes)), np.average(adhoc.e_model.metric))) results_q.put((str(results_folder / 'baccuracy_eps{}'.format(episodes)), np.average(adhoc.b_model.metric))) results_q.put( (str(results_folder / 'eaccuracyprey_eps{}'.format(episodes)), np.average(adhoc.e_model.metric_prey)))
def save_run(filename, number_episodes, agents, world_size=(5, 5), seed=100): random_instance = random.Random(seed) num_agents = len(agents) transition_f = get_transition_function(num_agents, world_size, random.Random(seed)) reward_f = get_reward_function(num_agents, world_size) transition_recorder = TransitionRecorder() world = World(PursuitState.random_state(num_agents, world_size, random_instance), agents, transition_f, reward_f, visualizers=(transition_recorder, )) for i in range(number_episodes): world.initial_state = PursuitState.random_state(num_agents, world_size, random_instance) _, _ = world.run(0, 1000) output_file = open(filename, 'wb') pickle.dump(transition_recorder.transitions, output_file) output_file.close()
def init(episodes, world_q): random_instance = random.Random(100) random.seed(100) np.random.seed(100) num_agents = 4 adhoc = AdhocAfterNAgent(agent_type(3), episodes, 3, mcts_c=mcts_c, mcts_k=mcts_k, mcts_n=mcts_n, behavior_model_size=bsize, environment_model_size=esize) agents = [agent_type(i) for i in range(num_agents - 1)] + [adhoc] transition_f = get_transition_function(num_agents, world_size, random.Random(100)) reward_f = get_reward_function(num_agents, world_size) world = World(PursuitState.random_state(num_agents, world_size, random_instance), agents, transition_f, reward_f) for _ in tqdm.tqdm(range(episodes)): world.initial_state = PursuitState.random_state(num_agents, world_size, random_instance) world.run(0, 200) for _ in range(n_threads): world_q.put(world) return world, adhoc
def run(q, threadid, world_q): world = deepcopy(world_q.get()) random_instance = random.Random(100+threadid) num_agents = 4 try: world.initial_state = PursuitState.random_state(num_agents, world_size, random_instance) timesteps, reward = world.run(0, 200) bacc = sum(adhoc.b_model.metric[-timesteps:]) / timesteps eacc = sum(adhoc.e_model.metric[-timesteps:]) / timesteps eacc_prey = sum(adhoc.e_model.metric_prey[-timesteps:]) / timesteps q.put((timesteps, bacc, eacc, eacc_prey)) except Exception as e: print(e)
def transition(state, actions): assert (len(actions) == num_agents) directions = [(1, 0), (-1, 0), (0, 1), (0, -1), (0, 0)] def choose_prey_move(): if not prey_moves: return random_instance.choice(directions) else: result = prey_moves[0] if len(prey_moves) > 1: prey_moves.pop(0) return result occupied_positions = set(state.prey_positions) | set( state.agent_positions) num_preys = len(state.prey_positions) apos_array = [None] * num_agents ppos_array = [None] * num_preys agents_indexs = [(i, True) for i in range(num_agents)] + \ [(i, False) for i in range(num_preys)] random_instance.shuffle(agents_indexs) for i, is_agent in agents_indexs: if is_agent: position = state.agent_positions[i] action = actions[i] else: position = state.prey_positions[i] action = choose_prey_move() new_position = move(position, action, world_size) # if collision is detected, just go to the original position if new_position in occupied_positions: new_position = position occupied_positions.remove(position) occupied_positions.add(new_position) if is_agent: apos_array[i] = new_position else: ppos_array[i] = new_position return PursuitState(prey_positions=tuple(ppos_array), agent_positions=tuple(apos_array), world_size=tuple(world_size))
tree = MCTS(tree_policy=UCB1(c=self.mcts_c), default_policy=RandomKStepRollOut2(self.mcts_k), backup=monte_carlo) self.prev_action = tree(self.root, n=n) # print([[y.n for y in x.children.values()] for x in self.root.children.values()]) return self.prev_action for k in (10, 100, 1000): for n in (1000, ): for c in (100, ): agents = [GreedyAgent(i) for i in range(4)] random.seed(100) agents[-1] = MCTSAgent(3, n, k, c * k) results = [] for i in range(1): world = World( PursuitState.random_state(len(agents), world_size), agents, get_transition_function(len(agents), world_size), get_reward_function(len(agents), world_size)) timesteps, reward = world.run(0, 1000) results.append(timesteps) print("k: " + str(k)) print("n: " + str(n)) print("c: " + str(c)) print("avg: " + str(sum(results) / len(results))) print(rollouts) print(rewards)
def _get_new_state(self): return PursuitState.random_state(4, self.world_size)
from pursuit.transition import get_transition_function import matplotlib.pyplot as plt agent = TeammateAwareAgent(0) world_size = (10, 10) adhoc_filename = 'adhoc_dataset/10x10ta_random_200' adhoc = AdhocAgent.load(adhoc_filename) positions = [(3, 3), (3, 7), (7, 3)] prey = (5, 5) result = np.zeros(world_size) for x in range(world_size[0]): for y in range(world_size[1]): if (x, y) in positions: continue initial_state = PursuitState(tuple([(x, y)] + positions), (prey, ), world_size) adhoc.b_model.predict(initial_state) predicted_action_dist = adhoc.b_model.cache[initial_state][0] true_action = agent.act(initial_state) result[x, y] = predicted_action_dist[ACTIONS.index(true_action)] fig, ax = plt.subplots(1) im = ax.imshow(result, interpolation='nearest') fig.colorbar(im) for x, y in positions: rect = patches.Rectangle((x - 0.5, y - 0.5), 0.95, 0.95, linewidth=1,
num_agents = 4 world_size = (5, 5) agents = [TeammateAwareAgent(i) for i in range(num_agents)] prey_moves = [(-1, 0), (1, 0), (0, 0)] transition_f = get_transition_function(num_agents, world_size, prey_moves=prey_moves) reward_f = get_reward_function(num_agents, world_size) agent_colors = [(255, 0, 0), (175, 0, 75), (75, 0, 175), (0, 0, 255)] visualizer = PygameVisualizer(200, 200, agent_colors=agent_colors, agents=agents) visualizers = (visualizer, ) initial_state = PursuitState(((0, 1), (1, 0), (0, 3), (1, 2)), ((0, 0), ), world_size) world = World(initial_state, agents, transition_f, reward_f, visualizers=visualizers) print(world.run(1, 100)) # expected actions # RIGHT LEFT UP DOWN NOOP # 4, 2, 2, 4 DOWN LEFT LEFT DOWN # 4, 2, 2, 1 DOWN LEFT LEFT RIGH # 4, 3, 2, 1 DOWN UUUP LEFT RIGH # 1, 3, 2, 3 RIGH UUUP LEFT UUUP # 1, 3, 2, 1 RIGH UUUP LEFT RIGH
adhoc = AdhocAgent(3, mcts_c=mcts_c, mcts_k=mcts_k, mcts_n=mcts_n, behavior_model_size=bsize, environment_model_size=esize, eps=1.0, fit=None) # adhoc = AdhocAgent.load('adhoc_dataset/10x10greedy_random_200') agents = [agent_type(i) for i in range(num_agents - 1)] + [adhoc] transition_f = get_transition_function(num_agents, world_size, random.Random(100)) reward_f = get_reward_function(num_agents, world_size) world = World( PursuitState.random_state(num_agents, world_size, random_instance), agents, transition_f, reward_f) save_episodes = (1, 5, 10, 20, 50, 100, 150, 200) current_episode = 0 for episodes in save_episodes: for current_episode in range(current_episode, episodes): world.initial_state = PursuitState.random_state( num_agents, world_size, random_instance) timesteps, reward = world.run(0, 100) print(timesteps) print("acc average " + str(np.average(adhoc.e_model.metric))) print("acc prey average " + str(np.average(adhoc.e_model.metric_prey))) print("behavior average " + str(np.average(adhoc.b_model.metric)))
results = [] agent_colors = [(random.randint(0, 255), random.randint(0, 50), random.randint(0, 255)) for _ in range(num_agents)] visualizer = PygameVisualizer(400, 400, agent_colors=agent_colors, agents=agents) visualizers = (visualizer, ) for i in range(iters): transition_f = get_transition_function(num_agents, world_size, random.Random(100 + i)) reward_f = get_reward_function(num_agents, world_size) world = World( PursuitState.random_state(num_agents, world_size, random.Random(100 + i)), agents, transition_f, reward_f, ) timesteps, reward = world.run(0., 5000) results.append(timesteps) print(timesteps) plt.plot(results) plt.plot([np.average(results[:i]) for i in range(1, len(results))], label='average') plt.show() # print(results) # print(world_size) # print(k)