def __init__(self, fn='depthFirstSearch', prob='PositionSearchProblem', heuristic='nullHeuristic'): # Warning: some advanced Python magic is employed below to find the right functions and problems # Get the search function from the name and heuristic Agent.__init__(self) if fn not in dir(search): raise AttributeError, fn + ' is not a search function in search.py.' func = getattr(search, fn) if 'heuristic' not in func.func_code.co_varnames: print('[SearchAgent] using function ' + fn) self.searchFunction = func else: if heuristic in globals().keys(): heur = globals()[heuristic] elif heuristic in dir(search): heur = getattr(search, heuristic) else: raise AttributeError, heuristic + ' is not a function in searchAgents.py or search.py.' print('[SearchAgent] using function %s and heuristic %s' % (fn, heuristic)) # Note: this bit of Python trickery combines the search algorithm and the heuristic self.searchFunction = lambda x: func(x, heuristic=heur) # Get the search problem type from the name if prob not in globals().keys() or not prob.endswith('Problem'): raise AttributeError, prob + ' is not a search problem type in SearchAgents.py.' self.searchType = globals()[prob] print('[SearchAgent] using problem type ' + prob)
def __init__(self, **args): """ Initialize the neural network etc.""" """ DON'T CHANGE THIS PART""" Agent.__init__(self, **args) self.verbose = False self.a_dict = NN_util.TwoWayDict() self.a_dict["North"] = 0 self.a_dict["East"] = 1 self.a_dict["South"] = 2 self.a_dict["West"] = 3 # self.a_dict["Stop"] = 4 self.num_actions = len(self.a_dict) self.prev_state = None self.prev_action = None self.prev_score = 0.0 self.exp = [] """ PLAY AROUND AND CHANGE THIS PART""" self.eps = 0.1 # For epsilon greedy action selection. self.alpha = 1e-9 # learning rate self.gamma = 0.99 # discount factor self.layers = [2, 64, 64, 64, self.num_actions] self.activation_fns = [NN_util.ReLU, NN_util.ReLU, NN_util.ReLU, NN_util.Linear] assert len(self.layers) == len(self.activation_fns) + 1, "Number of layers and activation functions don't match!" """ DON'T CHANGE THIS PART""" self.NN = init_NN_Glorot(self.layers, self.activation_fns) self.TNN = init_NN_Glorot(self.layers, self.activation_fns)
def __init__(self, epsilon, alpha): Agent.__init__(self) self.epsilon = epsilon self.cardsWeights={} self.cardsFeat={} self.discount = 1 self.alpha = alpha
def __init__(self, epsilon, alpha): Agent.__init__(self) self.epsilon = epsilon self.cardsWeights={} self.callWeights = {} self.discount = 1 self.alpha = alpha self.legalCalls = [True, False]
def __init__(self): Agent.__init__(self) func = getattr(search, 'aStarSearch') heur = globals()['manhattanHeuristic'] # heur = getattr(search, heuristic) self.searchFunction = lambda x: func(x, heuristic=heur) # Get the search problem type from the name self.searchType = globals()[prob]
def __init__(self, command_buffer, index, server, display): Agent.__init__(self, index) self.buffer = command_buffer self.lastMove = Directions.STOP self.recvDirection = "" self.index = index self.server = server self.keys = [] self.display = display self.state = None self.ready = False self.life_map = self.server.life_map thread.start_new_thread(self.constantReceiver, ())
def __init__(self, alpha=1.0, epsilon=0.05, gamma=0.8, numTraining=10): """ Sets options, which can be passed in via the Pacman command line using -a alpha=0.5,... alpha - learning rate epsilon - exploration rate gamma - discount factor numTraining - number of training episodes, i.e. no learning after these many episodes """ Agent.__init__(self) self.alpha = float(alpha) self.epsilon = float(epsilon) self.discount = float(gamma) self.numTraining = int(numTraining)
def __init__(self): Agent.__init__(self) self.ghost_fare_level = 2 self.has_scared_ghosts = False self.recompute_delay = 0 self.visited = [] self.actions = [] self.map = [] self.map_height = 0 self.map_width = 0 self.ghost_cost = [] self.food_heuristic = [] self.f = dict() self.current_target = (1, 1)
def evaluation(best, N, test_runs, output_dir, random=False): print 'TEST RUN' results = [] position = [] for _ in range(test_runs): agents = [Agent([-1 for _ in range(IND_SIZE)]) for _ in range(NUM_IND)] if random: best = Agent([-1] * 32) agents[0] = best game = Game(agents, N) game.play() results.append(best.score) position.append( sorted([a.score for a in agents], reverse=True).index(best.score)) # Save results if not os.path.exists(output_dir): os.mkdir(output_dir) plt.figure() plt.hist(results) plt.xlabel('Score') plt.ylabel('Frequency') plt.savefig(os.path.join(output_dir, 'scores.pdf')) plt.figure() plt.hist(position) plt.xlabel('Rank') plt.ylabel('Frequency') plt.savefig(os.path.join(output_dir, 'ranks.pdf')) with open(os.path.join(output_dir, 'scores.csv'), 'w') as f: w = writer(f) w.writerow(results) with open(os.path.join(output_dir, 'ranks.csv'), 'w') as f: w = writer(f) w.writerow(position) with open(os.path.join(output_dir, 'strategy.txt'), 'w') as f: f.write(str(best.strategy)) return results, position
def __init__(self, args): Agent.__init__(self) info("Initializing DQN Agent...") tf.reset_default_graph() self.session = tf.Session() self.params = _init_dqn_params(args) self.replay_memory = _init_replay_memory(args) self.frame_stack = FrameStack(self.params[FRAME_STACK_SIZE], self.params[FRAME_WIDTH], self.params[FRAME_HEIGHT]) self.dqn = DeepQNetwork(self.params, self.session, 'online') self.target_dqn = self.dqn if not self.params[NO_TRAIN]: self.target_dqn = DeepQNetwork(self.params, self.session, 'target', False) self.target_dqn.assign(self.dqn) self.run_id = get_time() self.first_move = True self.current_state = None self.last_state = None self.last_action = None self.last_score = None self.last_reward = None self.ep_reward = None self.terminal_state = None self.won = None self.best_q = np.nan self.last_100_wins_avg = CappedMovingAverage(100) self.last_100_reward_avg = CappedMovingAverage(100) self.wins_save_threshold = [0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1] info("Done initializing DQN Agent.")
def singleAgentEvolution(CXPB, MUTPB, NGEN, N): pop = toolbox.population(n=NUM_IND) agents = [Agent(ind) for ind in pop] game = Game(agents, N) # Evaluate the entire population fitnesses = game.play() for ind, fit in zip(pop, fitnesses): ind.fitness.values = fit for g in range(NGEN): # Select individual to be evolved index = random.randint(0, len(pop) - 1) # Select the new generation individuals offspring = toolbox.select(pop, len(pop)) # Clone the selected individuals offspring = map(toolbox.clone, offspring) # Apply crossover and mutation on the offspring child1 = offspring[index] for child2 in [ element for idx, element in enumerate(pop) if idx != index ]: if random.random() < CXPB: toolbox.mate(child1, child2) continue mutant = offspring[index] if random.random() < MUTPB: toolbox.mutate(mutant) # Set new (evolved) genotype agents[index].setStrategy(pop[index]) # The chosen individual is replaced by the offspring pop[index] = offspring[index] # Evaluate the individuals fitnesses = game.play() for ind, fit in zip(pop, fitnesses): ind.fitness.values = fit # get best agent best = agents[0] for agent in agents: if best.score < agent.score: best = agent return best
def multiAgentEvolution(CXPB, MUTPB, NGEN, N): pop = toolbox.population(n=NUM_IND) agents = [Agent(ind) for ind in pop] game = Game(agents, N) # Evaluate the entire population fitnesses = game.play() for ind, fit in zip(pop, fitnesses): ind.fitness.values = fit for g in range(NGEN): # Select the next generation individuals offspring = toolbox.select(pop, len(pop)) # Clone the selected individuals offspring = map(toolbox.clone, offspring) # Apply crossover and mutation on the offspring for child1, child2 in zip(offspring[::2], offspring[1::2]): if random.random() < CXPB: toolbox.mate(child1, child2) for mutant in offspring: if random.random() < MUTPB: toolbox.mutate(mutant) # Set new (evolved) genotypes for agent, ind in zip(agents, offspring): agent.setStrategy(ind) # Evaluate the individuals fitnesses = game.play() for ind, fit in zip(offspring, fitnesses): ind.fitness.values = fit # The population is entirely replaced by the offspring pop[:] = offspring # get best agent best = agents[0] for agent in agents: if best.score < agent.score: best = agent return best
def __init__(self,index = 0): Agent.__init__(self,index) self.policy = None
with open(os.path.join(output_dir, 'ranks.csv'), 'w') as f: w = writer(f) w.writerow(position) with open(os.path.join(output_dir, 'strategy.txt'), 'w') as f: f.write(str(best.strategy)) return results, position if __name__ == '__main__': CXPB, MUTPB, NGEN, N, TEST_RUNS = 0.5, 0.2, 10000, 1000, 1000 NUM_IND = 20 # Baseline agent who never changes society best = Agent([-1] * 32) evaluation(best, N, TEST_RUNS, 'baseline_20', random=True) # All agents evolving at the same time best = multiAgentEvolution(CXPB, MUTPB, NGEN, N) evaluation(best, N, TEST_RUNS, 'multi_evolution_20') # One agent evolving at a time best = singleAgentEvolution(CXPB, MUTPB, NGEN, N) evaluation(best, N, TEST_RUNS, 'single_evolution_20') NUM_IND = 100 # Baseline agent who never changes society best = Agent([-1] * 32) evaluation(best, N, TEST_RUNS, 'baseline_100', random=True)
def __init__(self): self.initialPos = None Agent.__init__(self)
def __init__(self): Agent.__init__(self)
def __init__(self, **args): Agent.__init__(self, **args)
def __init__(self, evalFn='scoreEvaluationFunction', depth='2'): Agent.__init__(self) self.index = 0 # Pacman is always agent index 0 self.evaluationFunction = lookup(evalFn, globals()) self.depth = int(depth) self.NO_ACTION = "NoAction"
def game_setup(num_agents): game = Game() agents = [Agent() for i in range(num_agents)] ratings = [Rating() for i in range(num_agents)] return game, agents, ratings
def __init__(self): Agent.__init__(self) self.next_action = None
def __init__(self, action_event, done_event): Agent.__init__(self) self.next_action = None self.action_event = action_event self.done_event = done_event self.kill = False
def __init__(self, index=0): Agent.__init__(self, index) self.lastAction = None
def __init__(self,sys1 = Agent(),sys2 = Agent()): self.system_1_model = System1Agent() self.system_2_model = System2Agent() self.count = 0
def __init__(self, index=0): Agent.__init__(self, index=0) self.actionIndex = 0 self.actions = []
def __init__(self, index=0): Agent.__init__(self, index=0)
def __init__(self, evalFn='scoreEvaluationFunction', depth='2'): Agent.__init__(self) self.index = 0 # Pacman is always agent index 0 self.evaluationFunction = util.lookup(evalFn, globals()) self.depth = int(depth)
def __init__(self, *args): Agent.__init__(self, *args) self.lastStop = 1