def run(progress_q, results_q, threadid, adhoc_filename, episodes, results_folder, world_size): random_instance = random.Random(100 + threadid) random.seed(100 + threadid) np.random.seed(100 + threadid) num_agents = 4 # adhoc = AdhocAgent(3, mcts_c=mcts_c, mcts_k=mcts_k, mcts_n=mcts_n, behavior_model_size=bsize, # environment_model_size=esize) # load_run(dataset_folder / dataset_name, adhoc, episodes, fit=False, compute_metrics=False) adhoc = AdhocAgent.load(adhoc_filename) agents = [agent_type(i) for i in range(num_agents - 1)] + [adhoc] transition_f = get_transition_function(num_agents, world_size, random.Random(100)) reward_f = get_reward_function(num_agents, world_size) world = World( PursuitState.random_state(num_agents, world_size, random_instance), agents, transition_f, reward_f) timesteps, reward = world.run(0, 500) progress_q.put(1) results_q.put( (str(results_folder / 'results_eps{}'.format(episodes)), timesteps)) results_q.put((str(results_folder / 'eaccuracy_eps{}'.format(episodes)), np.average(adhoc.e_model.metric))) results_q.put((str(results_folder / 'baccuracy_eps{}'.format(episodes)), np.average(adhoc.b_model.metric))) results_q.put( (str(results_folder / 'eaccuracyprey_eps{}'.format(episodes)), np.average(adhoc.e_model.metric_prey)))
def __call__(self, state_node): result = super().__call__(state_node) world = World(state_node.state, [GreedyAgent(i) for i in range(4)], get_transition_function(4, world_size), get_reward_function(4, world_size)) ts, reward = world.run(0, 1000) rollouts[self.k].append(result) rewards[self.k].append(reward) return result
def __init__(self): logging.info('Initialising vision') #required on DICE: #self.capture = MPlayerCapture(self.rawSize) self.capture = Capture(self.rawSize) world = World('blue') # arbitrary colour world.pointer=None self.threshold = vision.threshold.AltRaw() self.pre = Preprocessor(self.rawSize, self.threshold, None) self.gui = GUI(world, self.rawSize, self.threshold) logging.debug('Vision initialised')
def run(q, threadid): random_instance = random.Random(100 + threadid) random.seed(100 + threadid) np.random.seed(100 + threadid) num_agents = 4 # adhoc = AdhocAgent(3, mcts_c=mcts_c, mcts_k=mcts_k, mcts_n=mcts_n, behavior_model_size=bsize, # environment_model_size=esize) adhoc = AdhocAfterNAgent(agent_type(3), episodes - 1, 3, mcts_c=mcts_c, mcts_k=mcts_k, mcts_n=mcts_n, behavior_model_size=bsize, environment_model_size=esize) agents = [agent_type(i) for i in range(num_agents - 1)] + [adhoc] transition_f = get_transition_function(num_agents, world_size, random.Random(100)) reward_f = get_reward_function(num_agents, world_size) world = World( PursuitState.random_state(num_agents, world_size, random_instance), agents, transition_f, reward_f) results = [] bmodelmetric = [] emodelmetric = [] emodelmetric_prey = [] try: for i in range(episodes): world.initial_state = PursuitState.random_state( num_agents, world_size, random_instance) timesteps, reward = world.run(0, 200) results.append(timesteps) timesteps = max(1, timesteps) bmodelmetric.append( sum(adhoc.b_model.metric[-timesteps:]) / timesteps) emodelmetric.append( sum(adhoc.e_model.metric[-timesteps:]) / timesteps) emodelmetric_prey.append( sum(adhoc.e_model.metric_prey[-timesteps:]) / timesteps) q.put(1) finally: np.save(str(results_folder / 'results_{}'.format(threadid)), np.array(results)) np.save(str(results_folder / 'eaccuracy_{}'.format(threadid)), np.array(emodelmetric)) np.save(str(results_folder / 'baccuracy_{}'.format(threadid)), np.array(bmodelmetric)) np.save(str(results_folder / 'eaccuracyprey_{}'.format(threadid)), np.array(emodelmetric_prey))
def __init__(self, agents, world_size=(5, 5), max_steps=1000): self.world_size = world_size self.agent = DummyAgent(3) initial_state = self._get_new_state() transition_f = get_transition_function(4, world_size) reward_f = get_reward_function(4, world_size) self.world = World(initial_state, agents + [self.agent], transition_f, reward_f) self.reward_range = (-1, 0) self.action_space = spaces.Discrete(4) self.observation_space = spaces.Box(low=0, high=max(world_size), shape=(8, )) self.max_steps = max_steps self.i = 0
def save_run(filename, number_episodes, agents, world_size=(5, 5), seed=100): random_instance = random.Random(seed) num_agents = len(agents) transition_f = get_transition_function(num_agents, world_size, random.Random(seed)) reward_f = get_reward_function(num_agents, world_size) transition_recorder = TransitionRecorder() world = World(PursuitState.random_state(num_agents, world_size, random_instance), agents, transition_f, reward_f, visualizers=(transition_recorder, )) for i in range(number_episodes): world.initial_state = PursuitState.random_state(num_agents, world_size, random_instance) _, _ = world.run(0, 1000) output_file = open(filename, 'wb') pickle.dump(transition_recorder.transitions, output_file) output_file.close()
def init(episodes, world_q): random_instance = random.Random(100) random.seed(100) np.random.seed(100) num_agents = 4 adhoc = AdhocAfterNAgent(agent_type(3), episodes, 3, mcts_c=mcts_c, mcts_k=mcts_k, mcts_n=mcts_n, behavior_model_size=bsize, environment_model_size=esize) agents = [agent_type(i) for i in range(num_agents - 1)] + [adhoc] transition_f = get_transition_function(num_agents, world_size, random.Random(100)) reward_f = get_reward_function(num_agents, world_size) world = World(PursuitState.random_state(num_agents, world_size, random_instance), agents, transition_f, reward_f) for _ in tqdm.tqdm(range(episodes)): world.initial_state = PursuitState.random_state(num_agents, world_size, random_instance) world.run(0, 200) for _ in range(n_threads): world_q.put(world) return world, adhoc
tree = MCTS(tree_policy=UCB1(c=self.mcts_c), default_policy=RandomKStepRollOut2(self.mcts_k), backup=monte_carlo) self.prev_action = tree(self.root, n=n) # print([[y.n for y in x.children.values()] for x in self.root.children.values()]) return self.prev_action for k in (10, 100, 1000): for n in (1000, ): for c in (100, ): agents = [GreedyAgent(i) for i in range(4)] random.seed(100) agents[-1] = MCTSAgent(3, n, k, c * k) results = [] for i in range(1): world = World( PursuitState.random_state(len(agents), world_size), agents, get_transition_function(len(agents), world_size), get_reward_function(len(agents), world_size)) timesteps, reward = world.run(0, 1000) results.append(timesteps) print("k: " + str(k)) print("n: " + str(n)) print("c: " + str(c)) print("avg: " + str(sum(results) / len(results))) print(rollouts) print(rewards)
agents = [TeammateAwareAgent(i) for i in range(num_agents)] prey_moves = [(-1, 0), (1, 0), (0, 0)] transition_f = get_transition_function(num_agents, world_size, prey_moves=prey_moves) reward_f = get_reward_function(num_agents, world_size) agent_colors = [(255, 0, 0), (175, 0, 75), (75, 0, 175), (0, 0, 255)] visualizer = PygameVisualizer(200, 200, agent_colors=agent_colors, agents=agents) visualizers = (visualizer, ) initial_state = PursuitState(((0, 1), (1, 0), (0, 3), (1, 2)), ((0, 0), ), world_size) world = World(initial_state, agents, transition_f, reward_f, visualizers=visualizers) print(world.run(1, 100)) # expected actions # RIGHT LEFT UP DOWN NOOP # 4, 2, 2, 4 DOWN LEFT LEFT DOWN # 4, 2, 2, 1 DOWN LEFT LEFT RIGH # 4, 3, 2, 1 DOWN UUUP LEFT RIGH # 1, 3, 2, 3 RIGH UUUP LEFT UUUP # 1, 3, 2, 1 RIGH UUUP LEFT RIGH # 1, 3, 2, 1 RIGH UUUP LEFT RIGH
adhoc = AdhocAgent(3, mcts_c=mcts_c, mcts_k=mcts_k, mcts_n=mcts_n, behavior_model_size=bsize, environment_model_size=esize, eps=1.0, fit=None) # adhoc = AdhocAgent.load('adhoc_dataset/10x10greedy_random_200') agents = [agent_type(i) for i in range(num_agents - 1)] + [adhoc] transition_f = get_transition_function(num_agents, world_size, random.Random(100)) reward_f = get_reward_function(num_agents, world_size) world = World( PursuitState.random_state(num_agents, world_size, random_instance), agents, transition_f, reward_f) save_episodes = (1, 5, 10, 20, 50, 100, 150, 200) current_episode = 0 for episodes in save_episodes: for current_episode in range(current_episode, episodes): world.initial_state = PursuitState.random_state( num_agents, world_size, random_instance) timesteps, reward = world.run(0, 100) print(timesteps) print("acc average " + str(np.average(adhoc.e_model.metric))) print("acc prey average " + str(np.average(adhoc.e_model.metric_prey))) print("behavior average " + str(np.average(adhoc.b_model.metric)))
agent_colors = [(random.randint(0, 255), random.randint(0, 50), random.randint(0, 255)) for _ in range(num_agents)] visualizer = PygameVisualizer(400, 400, agent_colors=agent_colors, agents=agents) visualizers = (visualizer, ) for i in range(iters): transition_f = get_transition_function(num_agents, world_size, random.Random(100 + i)) reward_f = get_reward_function(num_agents, world_size) world = World( PursuitState.random_state(num_agents, world_size, random.Random(100 + i)), agents, transition_f, reward_f, ) timesteps, reward = world.run(0., 5000) results.append(timesteps) print(timesteps) plt.plot(results) plt.plot([np.average(results[:i]) for i in range(1, len(results))], label='average') plt.show() # print(results) # print(world_size) # print(k) print(np.average(results))
#! /usr/bin/env python # -*- coding: utf-8 -*- from vision2.vision import Vision from common.world import World import sys import logging #logging.basicConfig(level=logging.DEBUG) args = len(sys.argv) if args < 1: print "Usage: vision.py [filename]" sys.exit(2) world = World() if args == 1: v = Vision(world) elif args > 1: files = sys.argv[1:] v = Vision(world, files) v.run()