def __init__(self,
                 width=11,
                 height=11,
                 preyLocation=(5, 5),
                 predatorLocation=(0, 0),
                 numberOfPredators=1):

        assert numberOfPredators > 0
        assert numberOfPredators < 5
        assert type(numberOfPredators) == int

        self.width = width
        self.height = height
        self.numberOfPredators = numberOfPredators

        self.TeamPrey = TeamPrey(self, preyLocation)
        self.TeamPredator = TeamPredator(self, predatorLocation)
 def __init__( self, 
               width=11, 
               height=11, 
               preyLocation=(5,5), 
               predatorLocation=(0,0),
               numberOfPredators=1  ):
                   
     assert numberOfPredators > 0
     assert numberOfPredators < 5
     assert type(numberOfPredators) == int        
     
     self.width  = width
     self.height = height
     self.numberOfPredators = numberOfPredators
     
     self.TeamPrey = TeamPrey( self, preyLocation )
     self.TeamPredator = TeamPredator(self, predatorLocation)
class Environment:
    '''
    Creates an instance of the environment. Default an eleven by eleven grid
    is used. The default position for the prey is (5,5).
    '''
    def __init__( self, 
                  width=11, 
                  height=11, 
                  preyLocation=(5,5), 
                  predatorLocation=(0,0),
                  numberOfPredators=1  ):
                      
        assert numberOfPredators > 0
        assert numberOfPredators < 5
        assert type(numberOfPredators) == int        
        
        self.width  = width
        self.height = height
        self.numberOfPredators = numberOfPredators
        
        self.TeamPrey = TeamPrey( self, preyLocation )
        self.TeamPredator = TeamPredator(self, predatorLocation)
        
    def minimaxQLearning( self, episodes = 1000 ):

        done = False
        self.resetAgents()
        s = self.gameState()
        episode = 0

        while not done:
            # In state s take action a and opponent action o to get reward r and
            # state s_prime
            
            # Find the optimal action, epsilon greedy
            a_teamPred = self.TeamPredator.getJointActionEpsilonGreedy( s )
            a_teamPrey = self.TeamPrey.getActionEpsilonGreedy( s )
            
            # Each team performs the found action
            self.TeamPredator.performAction( a_teamPred )
            self.TeamPrey.performAction( a_teamPrey )
            
            s_prime = self.gameState()
            reward, game_over = self.reward(s_prime)
            
            # update Q : State , own action, opponent action
            self.TeamPredator.updateQ(s, a_teamPred, a_teamPrey, s_prime, reward)
            self.TeamPrey.updateQ(s, a_teamPrey, a_teamPred, s_prime, reward)
            
            s = s_prime
            if reward != 0:
                if episode < episodes:
                    episode += 1
                    self.resetAgents()
                else:
                    done = True
            
    def reward( self, s ):
        '''
        r, game_over <- reward(s)

        Returns the reward for a given state containing location of 
        the predators and prey. Return is the reward of the predator(s), as 
        this is a zero-sum-game, the reward of the predator is simply the 
        negated reward of the prey. Boolean game_over indicates if the state 
        is absorbing.
        '''
        # Prioritize confusion of predators            
        if len(s) != len(set(s)):
            return -10, True
        elif (0,0) in s:
            return 10, True
        else:
            return 0, False
 
    def deriveState( self, locations ):
        '''
        state <- deriveState( locations )        
        
        Derive the state based on the locations of the prey and the predators.
        '''
        prey_x, prey_y = locations[0]

        predator_x, predator_y = locations[1]
        x = ( ( 5 + prey_x - predator_x ) % ( self.width ) ) - 5
        y = ( ( 5 + prey_y - predator_y ) % ( self.height ) ) - 5

        return (x,y)

    def gameState( self ):
        '''
        state <- deriveState( gameState ) 
        
        Derives the gamestate based on agents location
        '''
        prey_x, prey_y = self.TeamPrey.Prey.location
    
        predator_x, predator_y = self.TeamPredator.Predator.location
        x = ( ( 5 + prey_x - predator_x ) % ( self.width ) ) - 5
        y = ( ( 5 + prey_y - predator_y ) % ( self.height ) ) - 5

        return (x,y)

    def resetAgents(self):
        '''
        Reset the position of the prey and predator in this environment.        
        '''
        self.TeamPrey.Prey.location = (5,5)
        self.TeamPredator.Predator.location = (random.randint(-5,5), random.randint(-5,5))
    
    def simulateEnvironment(self):
        ''''
        simulateEnvironment(reset=False)

        Simulate the environment for one step. Location of each agent is 
        updated. Returns a list of agent locations. 
        '''                
        s = self.gameState()
        
        for Agent in self.Agents:
            # Get an action based on Q
            a = Agent.getActionEpsilonGreedy(s)
            # Update location
            Agent.performAction(a)
class Environment:
    '''
    Creates an instance of the environment. Default an eleven by eleven grid
    is used. The default position for the prey is (5,5).
    '''
    def __init__(self,
                 width=11,
                 height=11,
                 preyLocation=(5, 5),
                 predatorLocation=(0, 0),
                 numberOfPredators=1):

        assert numberOfPredators > 0
        assert numberOfPredators < 5
        assert type(numberOfPredators) == int

        self.width = width
        self.height = height
        self.numberOfPredators = numberOfPredators

        self.TeamPrey = TeamPrey(self, preyLocation)
        self.TeamPredator = TeamPredator(self, predatorLocation)

    def minimaxQLearning(self, episodes=1000):

        done = False
        self.resetAgents()
        s = self.gameState()
        episode = 0

        while not done:
            # In state s take action a and opponent action o to get reward r and
            # state s_prime

            # Find the optimal action, epsilon greedy
            a_teamPred = self.TeamPredator.getJointActionEpsilonGreedy(s)
            a_teamPrey = self.TeamPrey.getActionEpsilonGreedy(s)

            # Each team performs the found action
            self.TeamPredator.performAction(a_teamPred)
            self.TeamPrey.performAction(a_teamPrey)

            s_prime = self.gameState()
            reward, game_over = self.reward(s_prime)

            # update Q : State , own action, opponent action
            self.TeamPredator.updateQ(s, a_teamPred, a_teamPrey, s_prime,
                                      reward)
            self.TeamPrey.updateQ(s, a_teamPrey, a_teamPred, s_prime, reward)

            s = s_prime
            if reward != 0:
                if episode < episodes:
                    episode += 1
                    self.resetAgents()
                else:
                    done = True

    def reward(self, s):
        '''
        r, game_over <- reward(s)

        Returns the reward for a given state containing location of 
        the predators and prey. Return is the reward of the predator(s), as 
        this is a zero-sum-game, the reward of the predator is simply the 
        negated reward of the prey. Boolean game_over indicates if the state 
        is absorbing.
        '''
        # Prioritize confusion of predators
        if len(s) != len(set(s)):
            return -10, True
        elif (0, 0) in s:
            return 10, True
        else:
            return 0, False

    def deriveState(self, locations):
        '''
        state <- deriveState( locations )        
        
        Derive the state based on the locations of the prey and the predators.
        '''
        prey_x, prey_y = locations[0]

        predator_x, predator_y = locations[1]
        x = ((5 + prey_x - predator_x) % (self.width)) - 5
        y = ((5 + prey_y - predator_y) % (self.height)) - 5

        return (x, y)

    def gameState(self):
        '''
        state <- deriveState( gameState ) 
        
        Derives the gamestate based on agents location
        '''
        prey_x, prey_y = self.TeamPrey.Prey.location

        predator_x, predator_y = self.TeamPredator.Predator.location
        x = ((5 + prey_x - predator_x) % (self.width)) - 5
        y = ((5 + prey_y - predator_y) % (self.height)) - 5

        return (x, y)

    def resetAgents(self):
        '''
        Reset the position of the prey and predator in this environment.        
        '''
        self.TeamPrey.Prey.location = (5, 5)
        self.TeamPredator.Predator.location = (random.randint(-5, 5),
                                               random.randint(-5, 5))

    def simulateEnvironment(self):
        ''''
        simulateEnvironment(reset=False)

        Simulate the environment for one step. Location of each agent is 
        updated. Returns a list of agent locations. 
        '''
        s = self.gameState()

        for Agent in self.Agents:
            # Get an action based on Q
            a = Agent.getActionEpsilonGreedy(s)
            # Update location
            Agent.performAction(a)