示例#1
0
 def agent_init(self,taskSpec):
     super(ALESarsaAgent,self).agent_init(taskSpec)
     self.state_projector = TileCoding(limits=self.limits)
     #BasicALEFeatures(num_tiles=np.array([14,16]),
        # background_file = '../data/space_invaders/background.pkl')
     self.theta = np.zeros((self.state_projector.num_features(),
                             self.num_actions()))
     self.sparse = True
示例#2
0
class ALESarsaAgent(ALEAgent):
    
    def __init__(self,alpha=0.1,lambda_=0.9,gamma=.999,eps=0.0,
                 agent_id=0,save_path='.'):
        #use full images and color mode
        super(ALESarsaAgent,self).__init__(np.array([0,1,2]),agent_id,save_path)
        
        self.eps = eps
        self.name='SARSA'
        self.alpha0 = alpha
        self.alpha = alpha
        self.lambda_ = lambda_
        self.gamma = gamma

    def agent_start(self,observation):
        super(ALESarsaAgent,self).agent_start(observation)
        #reset trace
        self.trace = np.zeros_like(self.theta)
        #action selection
        phi = self.get_phi(observation)
        vals = self.get_all_values(phi,self.sparse)
        a = self.select_action(vals)
        #log state and action
        self.phi = phi
        self.a = a
        return self.create_action(self.actions[a])
        
    
    def agent_init(self,taskSpec):
        super(ALESarsaAgent,self).agent_init(taskSpec)
        self.state_projector = TileCoding(limits=self.limits)
        #BasicALEFeatures(num_tiles=np.array([14,16]),
           # background_file = '../data/space_invaders/background.pkl')
        self.theta = np.zeros((self.state_projector.num_features(),
                                self.num_actions()))
        self.sparse = True

    def get_phi(self,obs):
        im = np.array(obs.doubleArray)#self.get_frame_data(obs)#.reshape((210,160))
        if self.sparse:
            #returns only active tiles indices
            return self.state_projector.phi_idx(im)
        else:
            #returns full binary features vector
            return self.state_projector.phi(im)
    
    def update_alpha(self):
        pass
        
    def get_value(self,phi,a,sparse=False):
        if sparse:
            return np.sum(self.theta[phi,a])
        else:
            return np.dot(phi,self.theta[:,a])
            
    def get_all_values(self,phi,sparse=False):
        if sparse:
            return np.sum(self.theta[phi,:],axis=0)
        else:
            return np.dot(phi,self.theta)
            
    def select_action(self,values): 
        #egreedy
        acts = np.arange(values.size)
        if np.random.rand()< self.eps:
            return np.random.choice(acts)
        else:
            max_acts = acts[values == np.max(values)]
            return np.random.choice(max_acts)
            
    def update_trace(self,phi,a):
        self.trace *= self.gamma*self.lambda_
        if self.sparse:
            self.trace[phi,a] += 1
        else:      
            self.trace[:,a] += phi
        #self.trace = np.clip(self.trace,0.,5.)
        
    def step(self,reward,phi_ns = None):
        n_rew = self.normalize_reward(reward)
        self.update_trace(self.phi,self.a)
        delta = n_rew - self.get_value(self.phi,self.a,self.sparse)
        a_ns = None
        if not (phi_ns is None):
            ns_values = self.get_all_values(phi_ns,self.sparse)
            a_ns = self.select_action(ns_values)
            delta += self.gamma*ns_values[a_ns]
        #normalize alpha with nr of active features
        alpha = self.alpha / float(np.sum(self.phi!=0.))
        self.theta+= alpha*delta*self.trace
        return a_ns  #a_ns is action index (not action value)

    def agent_step(self,reward, observation):
        super(ALESarsaAgent,self).agent_step(reward, observation)
        phi_ns = self.get_phi(observation)
        a_ns = self.step(reward,phi_ns)
        #log state data
        self.phi = phi_ns
        self.a = a_ns 
        
        return self.create_action(self.actions[a_ns])#create RLGLUE action
        
        
             
    def agent_end(self,reward):
        super(ALESarsaAgent,self).agent_end(reward)
        self.step(reward)