Пример #1
0
    def get_action(self, o_t):
        if self.model is None:
            raise RuntimeError('model was not initialized with the update_model method')
        H = self.H
        Ua, Sa, Va, wa, w_ter = self.model

        self.betas[0,:] = o_t

        ### Initialize the expected rewards for the plan ###
        ### Initialize to the immediate reward function ###
        self.alphas[:,:] = self.random_stream.normal(0,1e-1, self.alphas.shape)
        self.alphas[-1,:] = w_ter


        self.plan_param[:,:] = 0.0
        self.stoch_plan, plan_val, values, alphas, betas = c_gradient_improve_plan(self.plan_param, 
                                                self.alphas, 
                                                self.betas, 
                                                self.model, 
                                                self.H, 
                                                self.discount, 
                                                learn_rate = self.learn_rate)
        print plan_val
        a = self.discrete_actions[sampled_argmax(self.stoch_plan[0], self.random_stream)]
        return a
Пример #2
0
    def get_action(self, o_t):
        if self.embedded_models is None:
            raise RuntimeError('model was not initialized with the update_model method')
        H = self.H
        Kab, Da, wa, Ua, Va, imp_a, w_ter = self.embedded_models

        self.betas[0] = np.tensordot(Va, o_t, (1,0))

        ### Initialize the expected rewards for the plan ###
        ### Initialize to the immediate reward function ###
        self.alphas[:,:,:] = wa[None,:,:]
        self.alphas[-1,:,:] += w_ter

        self.alphas *= Da[None,:,:]


        self.stoch_plan[:,:] = 1.0/self.stoch_plan.shape[1]
        self.stoch_plan, plan_val, alphas, betas, _ = lr_improve_plan(self.stoch_plan, 
                                                self.alphas, 
                                                self.betas, 
                                                self.embedded_models, 
                                                self.H, 
                                                self.discount, 
                                                start_temp = self.start_temp)
        a = self.discrete_actions[sampled_argmax(self.stoch_plan[0], self.random_stream)]
        return a
Пример #3
0
 def get_action(self, o_t):
     if o_t.ndim > 1:
         return np.vstack(( self.get_action(o) for o in o_t ))
     else:
         qa = self.evaluate_actions(o_t)
         return self.discrete_actions[sampled_argmax(qa, self.random_stream)]
Пример #4
0
 def get_action(self, o_t):
     if self.lem is None:
         raise RuntimeError('model was not initialized with the update_model method')
     qa = lr_evaluate_actions(o_t, self.lem, self.thetas)
     a = self.discrete_actions[sampled_argmax(qa, self.random_stream)]
     return a
Пример #5
0
 def get_action(self, o_t):
     stoch_plan, plan_val, alphas, betas = self.plan(o_t)
     a = self.discrete_actions[sampled_argmax(stoch_plan[0], self.random_stream)]
     return a