def measure_efficiency(evolution, n_preys, n_tests, confusion): total_preys, total_visible_preys, total_successes, attempts = list(), list( ), list(), list() if confusion: for t in range(n_tests): grid = Grid(512, 512) predator = evolution.predator_pool[0] Predator(grid, brain=predator.brain, mask=predator.mask, confusion=True) prey = evolution.prey_pool[0] for _ in range(n_preys): Prey(grid, brain=prey.brain) print("\nRunning test", t + 1, "out of", n_tests) visible_preys, successes = list(), list() for _ in range(2000): for animal in grid.inhabitants: result = animal.look_around() if result: visible_preys.append(result[0]) successes.append(result[1]) for animal in grid.inhabitants: animal.pick_move() #print(grid.n_preys(),'preys left') #print('Average number of visible preys in attack:',np.mean(visible_preys)) #print('Proportion of successes:',len(np.where(successes)[0])/len(successes)) total_preys.append(grid.n_preys()) total_visible_preys.append(np.mean(visible_preys)) total_successes.append( len(np.where(successes)[0]) / len(successes)) attempts.append(len(successes)) print('\nAverage number of preys left:', np.mean(total_preys)) print('\nAverage number of visible preys in attacks:', np.mean(total_visible_preys)) print('\nProportion of successes:', np.mean(total_successes)) print('\nAverage number of attacks:', np.mean(attempts)) else: preys_left = list() for t in range(n_tests): grid = Grid(512, 512) predator = evolution.predator_pool[0] Predator(grid, brain=predator.brain, mask=predator.mask, confusion=False) prey = evolution.prey_pool[0] for _ in range(n_preys): Prey(grid, brain=prey.brain) print("\nRunning test", t + 1, "out of", n_tests) for _ in range(2000): for animal in grid.inhabitants: animal.look_around() for animal in grid.inhabitants: animal.pick_move() preys_left.append(grid.n_preys()) print('\nAverage number of preys left:', np.mean(preys_left))
def __init__(self, Environment, location): self.Environment = Environment # Initialize Q alpha = 0.3 gamma = 0.7 epsilon = 0.1 self.Prey = Prey(Environment, location) self.actions = Prey.actions self.TeamQLearning = TeamQLearning(self, alpha, gamma, epsilon)
def update_prey_pool(self, ranked_preys): # same syntax as update_predator_pool but with preys kids, p1, p2 = self.make_up_next_prey_generation(ranked_preys) self.prey_pool = [ Prey(Grid(*self.grid_size), brain=p1), Prey(Grid(*self.grid_size), brain=p2) ] for kid in kids: self.prey_pool.append(kid)
def move(self, directionX, directionY): oldXPosition = self.gridX oldYPosition = self.gridY nextXPosition = self.gridX + directionX nextYPosition = self.gridY + directionY if self.isMovementPossible(nextXPosition, nextYPosition): # If prey is in new position if not PreyAdult.dictionaryOfPreyAdults.has_key((nextXPosition, nextYPosition)): # Another PreyAdult is not on the next Position, so valid movement Prey.move(self, directionX, directionY) PreyAdult.dictionaryOfPreyAdults.pop((oldXPosition, oldYPosition)) PreyAdult.dictionaryOfPreyAdults[(self.gridX, self.gridY)] = self
def move(self, directionX, directionY): oldXPosition = self.gridX oldYPosition = self.gridY nextXPosition = self.gridX + directionX nextYPosition = self.gridY + directionY if (self.isMovementPossible(nextXPosition, nextYPosition)): #If prey is in new position if (not PreyAdult.dictionaryOfPreyAdults.has_key( (nextXPosition, nextYPosition))): #Another PreyAdult is not on the next Position, so valid movement Prey.move(self, directionX, directionY) PreyAdult.dictionaryOfPreyAdults.pop( (oldXPosition, oldYPosition)) PreyAdult.dictionaryOfPreyAdults[(self.gridX, self.gridY)] = self
def test_one_on_one(predator, prey, return_dict, num_test, pr): grid = Grid(512, 512) # copy the predator test_predator = Predator(grid, brain=predator.brain, confusion=self.confusion, mask=self.mask) # copy and duplicate the prey (n_preys instances) swarm = [Prey(grid, brain=prey.brain) for _ in range(self.n_preys)] for i in range(self.n_iters): # next 4 lines to print the % of progress in the simulation #if i%(self.n_iters/10)==0: #if pr[int(i/(self.n_iters/10))]<self.card_prey_pool*self.card_predator_pool-10: # pr[int(i/(self.n_iters/10))]+=1 #else: # t = np.round((time.time()-start)/60,2) # t = str(t).split('.') # t = [t[0],str(int(int(t[1])*.6))] # print(int(i*100/self.n_iters),'%\t'+t[0]+'m'+t[1]+'s') # pr[int(i/(self.n_iters/10))] = -self.card_prey_pool*self.card_predator_pool self.next_timeStep(grid) return_dict[num_test] = { 'predator': test_predator.fitness, 'prey': swarm[0].fitness }
def make_up_next_prey_generation(self, ranked_preys): # same syntax as make_up_next_predator_generations, but with preys p = [ranked_preys[i][0].brain for i in range(3)] p += [ranked_preys[4][0].brain] kids = [] for i in range(self.card_prey_pool - 2): w = np.zeros(p[0].coef_.shape) p1, p2 = np.random.choice(p, 2, replace=False) for row in range(w.shape[0]): for col in range(w.shape[1]): r = np.random.random() if r < .45: w[row, col] = p1.coef_[row, col] elif r < .9: w[row, col] = p2.coef_[row, col] else: w[row, col] = np.random.random() * 2 - 1 kid = Prey(Grid(*self.grid_size)) kid.brain.coef_ = w kids.append(kid) return kids, p[0], p[1]
def __init__(self, width=11, height=11, preyLocation=(5, 5), predatorLocation=(0, 0)): self.width = width self.height = height self.S, self.terminal_states = self.getStates() self.Prey = Prey(self, preyLocation) self.Predators = Predator(self, predatorLocation)
def __init__( self, width=11, height=11, preyLocation=(5,5), predatorLocation=(0,0), numberOfPredators=1 ): assert numberOfPredators > 0 assert numberOfPredators < 5 assert type(numberOfPredators) == int self.width = width self.height = height self.numberOfPredators = numberOfPredators self.Prey = Prey( self, preyLocation ) self.PredatorLocations = [(0,0), (10,0), (0,10), (10,10)] self.Predators = [Predator(self, self.PredatorLocations[i]) \ for i in range(self.numberOfPredators)] self.Agents = [self.Prey] + self.Predators
def __init__(self, width, height, color, grid): Prey.__init__(self, width, height, color, grid) self.offspringsProtected = 0 PreyAdult.dictionaryOfPreyAdults[(self.gridX, self.gridY)] = self
class TeamPrey(): def __init__(self, Environment, location): self.Environment = Environment # Initialize Q alpha = 0.3 gamma = 0.7 epsilon = 0.1 self.Prey = Prey(Environment, location) self.actions = Prey.actions self.TeamQLearning = TeamQLearning(self, alpha, gamma, epsilon) def updateQ(self, s, a, o, s_prime, r): ''' Update this teams Q and V. ''' O = self.actions A = self.actions # Use linear programming to obtain optimal policy for this state try: # Create a new model m = grb.Model("MultiAgentMinimax") m.setParam("OutputFlag", 0) # Create variables pi = dict() for a in A: pi[a] = m.addVar(0.0, 1.0, vtype=grb.GRB.CONTINUOUS, name=str(a)) # Integrate new variables m.update() # Set objective m.setObjective( grb.LinExpr([(self.TeamQLearning.Q[s][(a, o)], pi[a]) for o in O for a in A]), grb.GRB.MAXIMIZE) # Add constraint: Sum_a pi(a) = 1 expr = grb.quicksum(m.getVars()) m.addConstr(expr == 1, "Total probability") # Add more constraints for o in O: expr = grb.LinExpr([(self.TeamQLearning.Q[s][(a, o)], pi[a]) for a in A]) m.addConstr(expr >= 0) m.optimize() for a in A: self.TeamQLearning.policy[s][a] = pi[a].x except grb.GurobiError: print 'Error reported' # Update Q and V self.TeamQLearning.updateQ(s, a, o, s_prime, r) def getActionEpsilonGreedy(self, s): # Find the (joint) action that maximizes Q[(s, a)] prob_actions = dict() uniform_epsilon = self.TeamQLearning.epsilon / (len(self.actions)) for possible_a in self.actions: # Set probabilities of all actions uniformly prob_actions[possible_a] = uniform_epsilon best_a = argmax(self.TeamQLearning.policy[s]) prob_actions[best_a] += 1 - self.TeamQLearning.epsilon # For every action, check if the cumulative probability exceeds a # random number. random_number = random.random() cumulative_prob = 0.0 for a in self.actions: cumulative_prob += prob_actions[a] if cumulative_prob >= random_number: return a def performAction(self, a): self.Prey.performAction(a) def permutations(self, iterable, r=None): ''' iterator <- permutations(iterable, r) Finds permutations of iterable of length r, with duplicate entries. ''' pool = tuple(iterable) n = len(pool) r = n if r is None else r for indices in product(range(n), repeat=r): if len(indices) == r: yield tuple(pool[i] for i in indices)
def generate_random_preys(self, n): return [Prey(Grid(*self.grid_size)) for _ in range(n)]
class TeamPrey(): def __init__(self, Environment, location): self.Environment = Environment # Initialize Q alpha = 0.3 gamma = 0.7 epsilon = 0.1 self.Prey = Prey(Environment, location) self.actions = Prey.actions self.TeamQLearning = TeamQLearning(self, alpha, gamma, epsilon) def updateQ(self, s, a, o, s_prime, r): ''' Update this teams Q and V. ''' O = self.actions A = self.actions # Use linear programming to obtain optimal policy for this state try: # Create a new model m = grb.Model("MultiAgentMinimax") m.setParam("OutputFlag",0) # Create variables pi = dict() for a in A: pi[a] = m.addVar( 0.0, 1.0, vtype = grb.GRB.CONTINUOUS, name = str(a) ) # Integrate new variables m.update() # Set objective m.setObjective( grb.LinExpr( [ ( self.TeamQLearning.Q[s][(a,o)], pi[a] ) for o in O for a in A ] ), grb.GRB.MAXIMIZE) # Add constraint: Sum_a pi(a) = 1 expr = grb.quicksum( m.getVars() ) m.addConstr( expr == 1, "Total probability" ) # Add more constraints for o in O: expr = grb.LinExpr( [ (self.TeamQLearning.Q[s][(a,o)], pi[a]) for a in A ] ) m.addConstr( expr >= 0 ) m.optimize() for a in A: self.TeamQLearning.policy[s][a] = pi[a].x except grb.GurobiError: print 'Error reported' # Update Q and V self.TeamQLearning.updateQ(s, a, o, s_prime, r) def getActionEpsilonGreedy(self, s): # Find the (joint) action that maximizes Q[(s, a)] prob_actions = dict() uniform_epsilon = self.TeamQLearning.epsilon / (len(self.actions)) for possible_a in self.actions: # Set probabilities of all actions uniformly prob_actions[possible_a] = uniform_epsilon best_a = argmax( self.TeamQLearning.policy[s] ) prob_actions[best_a] += 1 - self.TeamQLearning.epsilon # For every action, check if the cumulative probability exceeds a # random number. random_number = random.random() cumulative_prob = 0.0 for a in self.actions: cumulative_prob += prob_actions[a] if cumulative_prob >= random_number: return a def performAction(self, a): self.Prey.performAction(a) def permutations(self, iterable, r=None): ''' iterator <- permutations(iterable, r) Finds permutations of iterable of length r, with duplicate entries. ''' pool = tuple(iterable) n = len(pool) r = n if r is None else r for indices in product(range(n), repeat=r): if len(indices) == r: yield tuple(pool[i] for i in indices)
def visualize(evolution, n_predator, n_prey, prey_card): predator = evolution.predator_pool[n_predator] prey = evolution.prey_pool[n_prey] preys = [Prey(predator.grid, brain=prey.brain) for i in range(prey_card)] predator.grid.visualize() print('There are', predator.grid.n_preys(), 'preys left alive.\n')
def __init__(self,width,height,color,grid): Prey.__init__(self, width, height, color, grid) PreyOffspring.dictionaryOfOffsprings[(self.gridX,self.gridY)] = self
sizeOfScreen = numOfGridsInARow * sizeOfGrid surface = pygame.display.set_mode((numOfGridsInARow * sizeOfGrid,numOfGridsInARow * sizeOfGrid)) numOfPreys = 1 numOfPredators = 1 numOfObstacles = 1 numOfFood = 1 worldMap = Map(numOfGridsInARow,sizeOfGrid,WHITE,surface,pygame) # fixed item should be placed first, since our grid is implemented in a stack manner obstacles = [Obstacle(worldMap,random.randrange(1,4),random.randrange(1,4),BROWN) for i in range(numOfObstacles)] # obstacles = [Obstacle(worldMap,3,1,BROWN) for i in range(numOfObstacles)] foods = [Food(worldMap,1,1,GREEN) for i in range(numOfFood)] predators = [Predator(worldMap,3,1,1,RED) for i in range(numOfPredators)] preys = [Prey(worldMap,2,1,1,YELLOW) for i in range(numOfPreys)] age = 1 learningAges = 1 preysEaten = [] foodEaten = [] while True: event = pygame.event.poll() if event.type == pygame.QUIT: pygame.quit() break surface.fill(WHITE) #update for every animat in the world # eatF = 0