示例#1
0
文件: policies.py 项目: oidelima/ppo
    def _evaluate(self, variables, observation, **extra_feed):
        sess = self.sess
        feed_dict = {self.X: adjust_shape(self.X, observation)}
        for inpt_name, data in extra_feed.items():
            if inpt_name in self.__dict__.keys():
                inpt = self.__dict__[inpt_name]
                if isinstance(inpt, tf.Tensor) and inpt._op.type == "Placeholder":
                    feed_dict[inpt] = adjust_shape(inpt, data)

        return sess.run(variables, feed_dict)
示例#2
0
 def _lf_(self, observation_, **extra_feed):
     sess = self.sess
     feed_dict = {self.X_: adjust_shape(self.X_, observation_)}
     for inpt_name, data in extra_feed.items():
         if inpt_name in self.__dict__.keys():
             inpt = self.__dict__[inpt_name]
             if isinstance(inpt,
                           tf.Tensor) and inpt._op.type == 'Placeholder':
                 feed_dict[inpt] = adjust_shape(inpt, data)
     return sess.run(self.lf_, feed_dict)
示例#3
0
    def step(self, obs, apply_noise=True, compute_Q=True):
        if self.param_noise is not None and apply_noise:
            actor_tf = self.perturbed_actor_tf
        else:
            actor_tf = self.actor_tf
        feed_dict = {self.obs0: U.adjust_shape(self.obs0, [obs])}
        if compute_Q:
            action, q = self.sess.run([actor_tf, self.critic_with_actor_tf], feed_dict=feed_dict)
        else:
            action = self.sess.run(actor_tf, feed_dict=feed_dict)
            q = None
        
        if self.action_noise is not None and apply_noise:
            noise = self.action_noise()
            # print('noise: ', noise.shape, action.shape)
            # assert noise.shape == action.shape  #(1,3), (3,)  correct addition, no need to assert
            # print(action, noise)
            action += noise
            # print(action)
        action = np.clip(action, self.action_range[0], self.action_range[1])
        # '''added'''
        # action_set=[]
        # # print('action: ', action)
        # for i in range (int(len(action[0])/2)):
        #     # print(action[0][2*i:2*i+2])
        #     action_set.append(np.argmax(action[0][2*i:2*i+2]))
        # # print('action_set: ', action_set)
        # # action = np.argmax(action[0])
        

        # return action_set, q, None, None

        return action, q, None, None
示例#4
0
 def step(self, obs, noise_factor=1., apply_noise=True, compute_Q=True):
     if self.param_noise is not None and apply_noise:
         res_actor_tf = self.perturbed_res_actor_tf
     else:
         res_actor_tf = self.res_actor_tf
     feed_dict = {self.obs0: U.adjust_shape(self.obs0, [obs])}
     if compute_Q:
         action, action_res, q = self.sess.run(
             [self.actor_tf, res_actor_tf, self.critic_with_actor_tf],
             feed_dict=feed_dict)
     else:
         action, action_res = self.sess.run([self.actor_tf, res_actor_tf],
                                            feed_dict=feed_dict)
         q = None
     print('action res: ', action_res)
     if self.action_noise is not None and apply_noise:
         noise = self.action_noise()
         # print('noise: ', noise.shape, action.shape)
         # assert noise.shape == action.shape  #(1,3), (3,)  correct addition, no need to assert
         # print('action, noise: ',action_res, noise)
         action_res += noise_factor * noise
         # print(action)
     # print(action, action_res)
     action_res = np.clip(action_res, self.action_range[0],
                          self.action_range[1])
     action = np.clip(action, self.action_range[0], self.action_range[1])
     return action, action_res, q, None, None
    def step(self, obs, apply_noise=True, compute_Q=True):
        if self.param_noise is not None and apply_noise:
            actor_tf = self.perturbed_actor_tf
        else:
            actor_tf = self.actor_tf
        feed_dict = {self.obs0: U.adjust_shape(self.obs0, [obs])}
        if compute_Q:
            action, q = self.sess.run([actor_tf, self.critic_with_actor_tf],
                                      feed_dict=feed_dict)
        else:
            action = self.sess.run(actor_tf, feed_dict=feed_dict)
            q = None
        if self.action_noise is not None and apply_noise:
            noise = self.action_noise()
            # assert noise.shape == action.shape
            action += noise
        action = np.clip(action, self.action_range[0], self.action_range[1])

        action_set = []
        '''discrete the action to be 0, 1 (binarization)'''
        for i in range(int(len(action[0]))):
            # use 0.5 as output activation of actor is sigmoid (0-1)
            if action[0][i] > 0.5:
                action_set.append(1)
            else:
                action_set.append(0)
            # if action is discretisized in TF, apply the following
            # action_set.append(action[0][i])
        ''' like DQN applying argmax to choose action;
        DDPG does NOT use argmax to determine action like DQN!!!'''
        # for i in range (int(len(action[0])/2)):
        #     # print(action[0][2*i:2*i+2])
        #     action_set.append(np.argmax(action[0][2*i:2*i+2]))

        return action_set, q, None, None
    def step(self, obs, apply_noise=True, compute_Q=True):
        if self.param_noise is not None and apply_noise:
            actor_tf = self.perturbed_actor_tf
        else:
            actor_tf = self.actor_tf
        feed_dict = {self.obs0: U.adjust_shape(self.obs0, [obs])}
        if compute_Q:
            action, q = self.sess.run([actor_tf, self.critic_with_actor_tf], feed_dict=feed_dict)
        else:
            action = self.sess.run(actor_tf, feed_dict=feed_dict)
            q = None
        # print(action)
        if self.action_noise is not None and apply_noise:
            noise = self.action_noise()
            # assert noise.shape == action.shape
            # print('ac: ', action, noise)
            action += noise
        #no need for clip here    
        # action = np.clip(action, self.action_range[0], self.action_range[1])
        # print(action)
        '''added'''
        action_set=[]
        print('action_before_binarization: ', action[0])
        #discrete the action to be 0, 1 (binarization)
        for i in range (int(len(action[0]))):
        #     '''tanh as output'''
        #     # if action[0][i]>0:
        #     #     action_set.append(1)
        #     # else:
        #     #     action_set.append(0)
        #     '''sigmoid as output'''
            if action[0][i]>0.5:
                action_set.append(1)
            else:
                action_set.append(0)

        # print('action: ', action)
        ''' #DDPG doesnt use argmax to determine action like DQN!!!
        for i in range (int(len(action[0])/2)):
            # print(action[0][2*i:2*i+2])
            action_set.append(np.argmax(action[0][2*i:2*i+2]))
        '''
        # print('action_set: ', action_set)
        # action = np.argmax(action[0])
        

        return action_set, q, None, None
示例#7
0
 def make_feed_dict(self, data):
     return {self._placeholder: adjust_shape(self._placeholder, data)}