Exemplo n.º 1
0
 def setupAdvising(self,agentIndex,allAgents):
     """ This method is called in preparation for advising """
     self.adviceObject = AdviceUtil()
     #Get the next agent
     index = (agentIndex+1)%len(allAgents)
     advisors = [allAgents[index]]
     self.adviceObject.setupAdvisors(advisors)
Exemplo n.º 2
0
 def setupAdvising(self, agentIndex, allAgents):
     """ This method is called in preparation for advising """
     self.adviceObject = AdviceUtil()
     advisors = [x for i, x in enumerate(allAgents) if i != agentIndex]
     self.adviceObject.setupAdvisors(advisors)
Exemplo n.º 3
0
class AdHoc(SARSATile):
    startAdvice = None
    learningEpisodes = None
    budgetAsk = 0
    budgetAdvise = 0
    spentBudgetAsk = 0
    spentBudgetAdvise = 0

    scalingVisits = math.exp(10)

    lastStatus = agent.IN_GAME

    #Enum for importance metrics
    VISIT_IMPORTANCE, Q_IMPORTANCE = range(2)

    stateImportanceMetric = None

    adviceObject = None

    ASK, ADVISE = range(2)
    visitTable = None

    advisedState = None
    informAction = None  #must be informed in subclass

    def __init__(self,
                 budgetAsk,
                 budgetAdvise,
                 stateImportanceMetric,
                 seed=12345,
                 port=12345,
                 epsilon=0.1,
                 alpha=0.1,
                 gamma=0.9,
                 decayRate=0.9,
                 serverPath="/home/leno/HFO/bin/"):
        super(AdHoc, self).__init__(seed=seed,
                                    port=port,
                                    serverPath=serverPath)
        self.name = "AdHoc"
        self.visitTable = {}
        self.advisedState = {}
        self.budgetAsk = budgetAsk
        self.budgetAdvise = budgetAdvise
        self.stateImportanceMetric = stateImportanceMetric
        self.startAdvice = 1
        self.learningEpisodes = 0

    def select_action(self, stateFeatures, state, noAdvice=False):
        """Changes the exploration strategy"""
        if self.exploring and self.spentBudgetAsk < self.budgetAsk and stateFeatures[
                self.
                ABLE_KICK] == 1 and not noAdvice and self.learningEpisodes >= self.startAdvice:
            #Check if it should ask for advice
            ask = self.check_ask(state)
            if ask:
                #----
                #Ask for advice
                #----

                #In case the agent will communicate its intended action
                if self.informAction:
                    normalAction = super(AdHoc, self).select_action(
                        stateFeatures, state)
                else:
                    normalAction = None

                advised = self.adviceObject.ask_advice(self.get_Unum(),
                                                       stateFeatures,
                                                       normalAction)

                if advised:
                    try:
                        self.advisedState[self.quantize_features(state)] = True
                        self.spentBudgetAsk = self.spentBudgetAsk + 1
                        action = self.combineAdvice(advised)
                        return action
                    except:
                        print "Exception when combining the advice " + str(
                            advised)
                #No need to compute two times the intended action
                if self.informAction:
                    return normalAction

        return super(AdHoc, self).select_action(stateFeatures, state, noAdvice)

    @abc.abstractmethod
    def check_advise(self, stateFeatures, state):
        """Returns if the agent should advice in this state.
        The advised action is also returned in the positive case"""

        #importance = self.state_importance(state,self.stateImportanceMetric)
        #midpoint = self.midpoint(self.ADVISE)

        #Calculates the probability
        #prob = self.calc_prob_adv(importance,midpoint,self.ADVISE)
        ##
        #processedState = self.quantize_features(state)
        #numberVisits = self.number_visits(processedState)
        #if importance>0:
        #print str(importance)+"  -  "+str(prob)
        ##
        #Check if the agent should advise
        #if random.random() < prob and prob > 0.1:
        #advisedAction = self.select_action(stateFeatures,state,True)
        #return True,advisedAction

        #return False,None

    def combineAdvice(self, advised):
        return int(max(set(advised), key=advised.count))

    def state_importance(self, state, typeProb):
        """Calculates the state importance
        state - the state
        typeProb - is the state importance being calculated in regard to
        the number of visits or also by Q-table values?"""
        processedState = self.quantize_features(state)
        numberVisits = self.number_visits(processedState)

        if numberVisits == 0:
            return 0.0

        visitImportance = numberVisits / (
            numberVisits + math.log(self.scalingVisits + numberVisits))

        if typeProb == self.VISIT_IMPORTANCE:
            return visitImportance
        elif typeProb == self.Q_IMPORTANCE:

            maxQ = -float("inf")
            minQ = float("inf")
            #Get max and min Q value
            actions = [self.DRIBBLE, self.SHOOT, self.PASSfar, self.PASSnear]
            for act in actions:
                if (processedState, act) in self.qTable:
                    actQ = self.qTable.get((processedState, act))
                    if actQ > maxQ:
                        maxQ = actQ
                    if actQ < minQ:
                        minQ = actQ

            # print "MaxQ "+str(maxQ)
            # print "MinQ "+str(minQ)
            # print "len "+str(len(actions))
            qImportance = math.fabs(maxQ - minQ)  #* len(actions)
            if qImportance == float('inf'):
                return 0.0
            #if qImportance != 0:
            #print str(qImportance) + " - "+str(visitImportance)
            return qImportance / (1 - visitImportance)
        #If the agent got here, it is an error
        return None

    def step(self, state, action):
        """Modifies the default step action just to include a state visit counter"""
        if self.exploring:
            processedState = self.quantize_features(state)
            self.visitTable[processedState] = self.visitTable.get(
                processedState, 0.0) + 1
        status, statePrime, actionPrime = super(AdHoc,
                                                self).step(state, action)
        self.lastStatus = status

        if self.lastStatus != self.IN_GAME:
            self.advisedState = {}
            if self.exploring:
                self.learningEpisodes += 1

        return status, statePrime, actionPrime

    @abc.abstractmethod
    def check_ask(self, state):
        """Returns if the agent should ask for advise in this state"""

        #if self.exploring and not (self.quantize_features(state) in self.advisedState):
        #    importance = self.state_importance(state,self.VISIT_IMPORTANCE)
        #    midpoint = self.midpoint(self.ASK)

        #Calculates the probability
        #    prob = self.calc_prob_adv(importance,midpoint,self.ASK)

        ##
        #processedState = self.quantize_features(state)
        #numberVisits = self.number_visits(processedState)
        #print str(numberVisits)+"  -  "+str(prob)
        ##

        #    if random.random() < prob and prob > 0.1:
        #        return True
        #return False

        #Call default sarsa method if no action was selected

    def calc_prob_adv(self, importance, midpoint, typeProb):
        """Calculates the probability of giving/receiving advice
        importance - the current state importance
        midpoint - the midpoint for the logistic function
        typeProb - ASK or ADVISE
        """
        signal = 1 if typeProb == self.ASK else -1
        k = 10

        prob = 1 / (1 + math.exp(signal * k * (importance - midpoint)))
        return prob

    def advise_action(self, uNum, state, adviseeAction=None):
        """Verifies if the agent can advice a friend, and return the action if possible"""
        if self.spentBudgetAdvise < self.budgetAdvise:
            #Check if the agent should advise
            advise, advisedAction = self.check_advise(
                state, self.get_transformed_features(state))
            if advise:
                if adviseeAction is None or advisedAction != adviseeAction:
                    self.spentBudgetAdvise = self.spentBudgetAdvise + 1
                    return advisedAction
        return None

    def setupAdvising(self, agentIndex, allAgents):
        """ This method is called in preparation for advising """
        self.adviceObject = AdviceUtil()
        advisors = [x for i, x in enumerate(allAgents) if i != agentIndex]
        self.adviceObject.setupAdvisors(advisors)

    def get_used_budget(self):
        return self.spentBudgetAdvise

    @abc.abstractmethod
    def midpoint(self, typeMid):
        """Calculates the midpoint"""
        pass

    def number_visits(self, state):
        return self.visitTable.get(state, 0.0)
Exemplo n.º 4
0
class Torrey(SARSATile):
    
    budget = 0
    spentBudget = 0
    lastStatus = agent.IN_GAME
    adviceObject = None
    advisedState = None
    informAction = None
    
    def __init__(self, budget=1000,threshold = 0.01,seed=12345, port=12345, serverPath = "/home/leno/HFO/bin/"):
        super(Torrey, self).__init__(seed=seed,port=port,serverPath=serverPath)
        self.name = "Torrey"
        self.advisedState = {}
        self.budget = budget
        self.threshold = threshold
        self.informAction = False
       
        
    def step(self, state, action):
        """Modifies the default step action just to include a state visit counter"""
        status, statePrime, actionPrime = super(Torrey, self).step(state,action)
        self.lastStatus = status
        if self.lastStatus != self.IN_GAME:
            self.advisedState = {}
        return status, statePrime, actionPrime        
    
    def select_action(self, stateFeatures, state, noAdvice = False):
        """Changes the exploration strategy"""
        if self.exploring and stateFeatures[self.ABLE_KICK] == 1 and not noAdvice and not (self.quantize_features(state) in self.advisedState):
            #Ask for advice
            if self.informAction:
                normalAction = super(Torrey, self).select_action(stateFeatures,state,True)
            else:
                normalAction = None
            advised = self.adviceObject.ask_advice(self.get_Unum(),stateFeatures,normalAction)
            if advised:
                    try:
                        self.advisedState[self.quantize_features(state)] = True
                        action = self.combineAdvice(advised)
                        return action
                    except:
                        print "Exception when combining the advice " + str(advised)
            #No need to compute again the intended action
            if self.informAction:
                return normalAction
        #else:
        #    if self.exploring and stateFeatures[self.ABLE_KICK] == 1:
        #        with open("debugTorrey.log","a") as myfile:
        #            #if importance>0:
        #            myfile.write("Exp -  "+str(self.exploring)+",AbleKick = "+str(stateFeatures[self.ABLE_KICK] == 1)+", NoAdvice: "+str(noAdvice) +", Advised?: "+str(not (self.quantize_features(state) in self.advisedState))+"\n")
        return super(Torrey, self).select_action(stateFeatures,state,noAdvice)
        
    def combineAdvice(self,advised):
        return int(max(set(advised), key=advised.count)) 
        
    def advise_action(self,uNum,state,intendedAction=None):
        """Verifies if the agent can advice a friend, and return the action if possible"""
        if self.spentBudget < self.budget:
            #Check if the agent should advise
            advise,advisedAction = self.check_advise(state,self.get_transformed_features(state))
            if advise:
                 if intendedAction is None or advisedAction!=intendedAction:
                     self.spentBudget = self.spentBudget + 1
                     return advisedAction
        return None    
        
   
                                
    def check_advise(self,stateFeatures,state): 
        """Returns if the agent should advice in this state.
        The advised action is also returned in the positive case"""
            
        
        importance = self.state_importance(state)
        
        #with open("debugTorrey.log","a") as myfile:
            #if importance>0:
        #        myfile.write("Importance "+str(importance)+"   -   ") 
        if importance > self.threshold:
            advisedAction = self.select_action(stateFeatures,state,True)
            return True,advisedAction          
            
        return False,None
        
    def state_importance(self,state):
        """Calculates the state importance
        state - the state
        typeProb - is the state importance being calculated in regard to
        the number of visits or also by Q-table values?"""
        processedState = self.quantize_features(state)
        
        
        maxQ = -float("inf")
        minQ = float("inf")
        #Get max and min Q value
        actions = [self.DRIBBLE, self.SHOOT, self.PASSfar, self.PASSnear]
        for act in actions:
            if (processedState,act) in self.qTable:
                actQ = self.qTable.get((processedState, act),0)
                if actQ > maxQ:
                    maxQ = actQ
                if actQ < minQ:
                    minQ = actQ
        
        #print "MaxQ "+str(maxQ)+"   - MinQ "+str(minQ)
        #print "MinQ "+str(minQ)
        # print "len "+str(len(actions))


        qImportance = math.fabs(maxQ - minQ) 
        
        return qImportance        

    def get_used_budget(self):
        """Returns the ask budget the agent already used"""
        return self.spentBudget
        
        
    def setupAdvising(self,agentIndex,allAgents):
        """ This method is called in preparation for advising """
        self.adviceObject = AdviceUtil()
        #Get the next agent
        index = (agentIndex+1)%len(allAgents)
        advisors = [allAgents[index]]
        self.adviceObject.setupAdvisors(advisors)
Exemplo n.º 5
0
Arquivo: adhoc.py Projeto: cowhi/HFO
 def setup_advising(self,agentIndex,allAgents):
     """ This method is called in preparation for advising """
     self.adviceObject = AdviceUtil()
     fellows = [x for i,x in enumerate(allAgents) if i!=agentIndex]
     self.adviceObject.setupAdvisors(fellows)
Exemplo n.º 6
0
class AdHoc(QLearning):
    
    
    spentBudgetAsk = None
    spentBudgetAdv = None
    budgetAsk = None
    budgetAdv = None
    visitedNumber = None
    advisedState = None
    adviceObject = None
    
    
    
    def __init__(self,agentIndex,alpha=0.2,gamma=0.9,T=0.4,budgetAsk = 350,budgetAdv = 350):
         super(AdHoc, self).__init__(agentIndex,alpha=alpha,gamma=gamma,T=T)
               
         self.budgetAsk = budgetAsk
         self.budgetAdv = budgetAdv
         self.spentBudgetAsk = 0
         self.spentBudgetAdv = 0
         self.fellowAgents = []
         self.visitedNumber = {}
         self.advisedState = {}
         
         
         
    
    
    def setup_advising(self,agentIndex,allAgents):
        """ This method is called in preparation for advising """
        self.adviceObject = AdviceUtil()
        fellows = [x for i,x in enumerate(allAgents) if i!=agentIndex]
        self.adviceObject.setupAdvisors(fellows)
        
    def get_used_budget(self):
        return self.spentBudgetAdv
        
    def advise_action(self,uNum,state):
        """Verifies if the agent can advice a friend, and return the action if possible"""
        if self.spentBudgetAdv < self.budgetAdv:
            #Check if the agent should advise
            advise,advisedAction = self.check_advise(state)
            if advise:
                self.spentBudgetAdv = self.spentBudgetAdv + 1
                return advisedAction
        return None    
             
    @abc.abstractmethod
    def check_advise(self,state): 
        """Returns if the agent should advice in this state.
        The advised action is also returned in the positive case"""
        pass
    @abc.abstractmethod
    def check_ask(self,state): 
        """Returns if the agent should advice in this state.
        The advised action is also returned in the positive case"""
        pass
                

    
       
    def observe_reward(self,state,action,statePrime,reward) :
        """Does the necessary updates (Q-table, etc)"""

        super(AdHoc, self).observe_reward(state,action,statePrime,reward)   
        if reward==1: #terminal state
            self.advisedState = {}
            
    def combineAdvice(self,advised):
        return int(max(set(advised), key=advised.count))  
             
        
    def action(self,state,noAdvice = False):
        """Returns the action for the current state"""
        if self.exploring and not noAdvice:
            self.visitedNumber[state] = self.visitedNumber.get(state,0) + 1
            ask = self.check_ask(state)
            if ask:            
                #Ask for advice
                advised = self.adviceObject.ask_advice(self.agentIndex,state)
                if advised:
                        try: 
                            self.spentBudgetAsk = self.spentBudgetAsk + 1
                            action = self.combineAdvice(advised)
                            self.advisedState[state] = True
                            return action
                        except:
                            print "Exception when combining the advice " + str(advised)
        return super(AdHoc, self).action(state,noAdvice)
Exemplo n.º 7
0
class Torrey(QLearning):

    fellowAgents = None
    spentBudget = None
    budget = None
    episodeUpdateTrace = None
    threshold = None

    def __init__(self,
                 agentIndex,
                 alpha=0.2,
                 gamma=0.9,
                 T=0.4,
                 budget=350,
                 threshold=0.001):
        super(Torrey, self).__init__(agentIndex, alpha=alpha, gamma=gamma, T=T)

        self.budget = budget
        self.spentBudget = 0
        self.fellowAgents = []
        self.threshold = threshold

    def setup_advising(self, agentIndex, allAgents):
        """ This method is called in preparation for advising """
        self.adviceObject = AdviceUtil()
        #Get the next agent
        index = (agentIndex + 1) % len(allAgents)
        advisors = [allAgents[index]]
        self.adviceObject.setupAdvisors(advisors)

    def get_used_budget(self):
        return self.spentBudget

    def advise_action(self, uNum, state):
        """Verifies if the agent can advice a friend, and return the action if possible"""
        if self.spentBudget < self.budget:
            #Check if the agent should advise
            advise, advisedAction = self.check_advise(state)
            if advise:
                self.spentBudget = self.spentBudget + 1
                return advisedAction
        return None

    def check_advise(self, state):
        """Returns if the agent should advice in this state.
        The advised action is also returned in the positive case"""

        importance = self.state_importance(state)

        if importance > self.threshold:
            advisedAction = self.action(state, True)
            return True, advisedAction

        return False, None

    def state_importance(self, state):
        """Calculates the state importance
        state - the state
        typeProb - is the state importance being calculated in regard to
        the number of visits or also by Q-table values?"""

        allActions = [actions.NORTH, actions.SOUTH, actions.WEST, actions.EAST]
        maxQ, minQ = self.get_max_min_q_value(state, allActions)

        qImportance = math.fabs(maxQ - minQ)

        return qImportance

    def observe_reward(self, state, action, statePrime, reward):
        """Does the necessary updates (Q-table, etc)"""
        super(Torrey, self).observe_reward(state, action, statePrime, reward)

    def combineAdvice(self, advised):
        return int(max(set(advised), key=advised.count))

    def action(self, state, noAdvice=False):
        """Returns the action for the current state"""
        if self.exploring and not noAdvice and state[0] != float('inf'):
            #Ask for advice
            advised = self.adviceObject.ask_advice(self.agentIndex, state)
            if advised:
                try:
                    action = self.combineAdvice(advised)
                    return action
                except:
                    print "Exception when combining the advice " + str(advised)
        return super(Torrey, self).action(state, noAdvice)
Exemplo n.º 8
0
Arquivo: torrey.py Projeto: cowhi/HFO
class Torrey(QLearning):
    
    fellowAgents = None
    spentBudget = None
    budget = None
    episodeUpdateTrace = None
    threshold = None
    
    def __init__(self,agentIndex,alpha=0.2,gamma=0.9,T=0.4,budget = 350,threshold = 0.05):
         super(Torrey, self).__init__(agentIndex,alpha=alpha,gamma=gamma,T=T)
               
         self.budget = budget
         self.spentBudget = 0
         self.fellowAgents = []
         self.threshold = threshold
         
         
    
    
    def setup_advising(self,agentIndex,allAgents):
        """ This method is called in preparation for advising """
        self.adviceObject = AdviceUtil()
        #Get the next agent
        index = (agentIndex+1)%len(allAgents)
        advisors = [allAgents[index]]
        self.adviceObject.setupAdvisors(advisors)
        
    def get_used_budget(self):
        return self.spentBudget
        
    def advise_action(self,uNum,state):
        """Verifies if the agent can advice a friend, and return the action if possible"""
        if self.spentBudget < self.budget:
            #Check if the agent should advise
            advise,advisedAction = self.check_advise(state)
            if advise:
                self.spentBudget = self.spentBudget + 1
                return advisedAction
        return None    
                        
    def check_advise(self,state): 
        """Returns if the agent should advice in this state.
        The advised action is also returned in the positive case"""
            
        
        importance = self.state_importance(state)

        if importance > self.threshold:
            advisedAction = self.action(state,True)
            return True,advisedAction          
            
        return False,None
        
    def state_importance(self,state):
        """Calculates the state importance
        state - the state
        typeProb - is the state importance being calculated in regard to
        the number of visits or also by Q-table values?"""
               
        allActions = [actions.NORTH, actions.SOUTH, actions.WEST, actions.EAST]
        maxQ,minQ = self.get_max_min_q_value(state,allActions)
       

        qImportance = math.fabs(maxQ - minQ) 
        
        
        
        return qImportance                                 

    
       
    def observe_reward(self,state,action,statePrime,reward) :
        """Does the necessary updates (Q-table, etc)"""
        super(Torrey, self).observe_reward(state,action,statePrime,reward)         
            
    def combineAdvice(self,advised):
        return int(max(set(advised), key=advised.count))  
             
        
    def action(self,state,noAdvice = False):
        """Returns the action for the current state"""
        if self.exploring and not noAdvice and state[0] != float('inf'):
            #Ask for advice
            advised = self.adviceObject.ask_advice(self.agentIndex,state)
            if advised:
                    try:                
                        action = self.combineAdvice(advised)
                        return action
                    except:
                        print "Exception when combining the advice " + str(advised)
        return super(Torrey, self).action(state,noAdvice)