def _evaluate(self, variables, observation, **extra_feed): sess = self.sess feed_dict = {self.X: adjust_shape(self.X, observation)} for inpt_name, data in extra_feed.items(): if inpt_name in self.__dict__.keys(): inpt = self.__dict__[inpt_name] if isinstance(inpt, tf.Tensor) and inpt._op.type == "Placeholder": feed_dict[inpt] = adjust_shape(inpt, data) return sess.run(variables, feed_dict)
def _lf_(self, observation_, **extra_feed): sess = self.sess feed_dict = {self.X_: adjust_shape(self.X_, observation_)} for inpt_name, data in extra_feed.items(): if inpt_name in self.__dict__.keys(): inpt = self.__dict__[inpt_name] if isinstance(inpt, tf.Tensor) and inpt._op.type == 'Placeholder': feed_dict[inpt] = adjust_shape(inpt, data) return sess.run(self.lf_, feed_dict)
def step(self, obs, apply_noise=True, compute_Q=True): if self.param_noise is not None and apply_noise: actor_tf = self.perturbed_actor_tf else: actor_tf = self.actor_tf feed_dict = {self.obs0: U.adjust_shape(self.obs0, [obs])} if compute_Q: action, q = self.sess.run([actor_tf, self.critic_with_actor_tf], feed_dict=feed_dict) else: action = self.sess.run(actor_tf, feed_dict=feed_dict) q = None if self.action_noise is not None and apply_noise: noise = self.action_noise() # print('noise: ', noise.shape, action.shape) # assert noise.shape == action.shape #(1,3), (3,) correct addition, no need to assert # print(action, noise) action += noise # print(action) action = np.clip(action, self.action_range[0], self.action_range[1]) # '''added''' # action_set=[] # # print('action: ', action) # for i in range (int(len(action[0])/2)): # # print(action[0][2*i:2*i+2]) # action_set.append(np.argmax(action[0][2*i:2*i+2])) # # print('action_set: ', action_set) # # action = np.argmax(action[0]) # return action_set, q, None, None return action, q, None, None
def step(self, obs, noise_factor=1., apply_noise=True, compute_Q=True): if self.param_noise is not None and apply_noise: res_actor_tf = self.perturbed_res_actor_tf else: res_actor_tf = self.res_actor_tf feed_dict = {self.obs0: U.adjust_shape(self.obs0, [obs])} if compute_Q: action, action_res, q = self.sess.run( [self.actor_tf, res_actor_tf, self.critic_with_actor_tf], feed_dict=feed_dict) else: action, action_res = self.sess.run([self.actor_tf, res_actor_tf], feed_dict=feed_dict) q = None print('action res: ', action_res) if self.action_noise is not None and apply_noise: noise = self.action_noise() # print('noise: ', noise.shape, action.shape) # assert noise.shape == action.shape #(1,3), (3,) correct addition, no need to assert # print('action, noise: ',action_res, noise) action_res += noise_factor * noise # print(action) # print(action, action_res) action_res = np.clip(action_res, self.action_range[0], self.action_range[1]) action = np.clip(action, self.action_range[0], self.action_range[1]) return action, action_res, q, None, None
def step(self, obs, apply_noise=True, compute_Q=True): if self.param_noise is not None and apply_noise: actor_tf = self.perturbed_actor_tf else: actor_tf = self.actor_tf feed_dict = {self.obs0: U.adjust_shape(self.obs0, [obs])} if compute_Q: action, q = self.sess.run([actor_tf, self.critic_with_actor_tf], feed_dict=feed_dict) else: action = self.sess.run(actor_tf, feed_dict=feed_dict) q = None if self.action_noise is not None and apply_noise: noise = self.action_noise() # assert noise.shape == action.shape action += noise action = np.clip(action, self.action_range[0], self.action_range[1]) action_set = [] '''discrete the action to be 0, 1 (binarization)''' for i in range(int(len(action[0]))): # use 0.5 as output activation of actor is sigmoid (0-1) if action[0][i] > 0.5: action_set.append(1) else: action_set.append(0) # if action is discretisized in TF, apply the following # action_set.append(action[0][i]) ''' like DQN applying argmax to choose action; DDPG does NOT use argmax to determine action like DQN!!!''' # for i in range (int(len(action[0])/2)): # # print(action[0][2*i:2*i+2]) # action_set.append(np.argmax(action[0][2*i:2*i+2])) return action_set, q, None, None
def step(self, obs, apply_noise=True, compute_Q=True): if self.param_noise is not None and apply_noise: actor_tf = self.perturbed_actor_tf else: actor_tf = self.actor_tf feed_dict = {self.obs0: U.adjust_shape(self.obs0, [obs])} if compute_Q: action, q = self.sess.run([actor_tf, self.critic_with_actor_tf], feed_dict=feed_dict) else: action = self.sess.run(actor_tf, feed_dict=feed_dict) q = None # print(action) if self.action_noise is not None and apply_noise: noise = self.action_noise() # assert noise.shape == action.shape # print('ac: ', action, noise) action += noise #no need for clip here # action = np.clip(action, self.action_range[0], self.action_range[1]) # print(action) '''added''' action_set=[] print('action_before_binarization: ', action[0]) #discrete the action to be 0, 1 (binarization) for i in range (int(len(action[0]))): # '''tanh as output''' # # if action[0][i]>0: # # action_set.append(1) # # else: # # action_set.append(0) # '''sigmoid as output''' if action[0][i]>0.5: action_set.append(1) else: action_set.append(0) # print('action: ', action) ''' #DDPG doesnt use argmax to determine action like DQN!!! for i in range (int(len(action[0])/2)): # print(action[0][2*i:2*i+2]) action_set.append(np.argmax(action[0][2*i:2*i+2])) ''' # print('action_set: ', action_set) # action = np.argmax(action[0]) return action_set, q, None, None
def make_feed_dict(self, data): return {self._placeholder: adjust_shape(self._placeholder, data)}