def predict_std(self, state, deterministic_=True): # states = np.zeros((self._batch_size, self._state_length), dtype=self._settings['float_type']) # states[0, ...] = state """ if ( ('disable_parameter_scaling' in self._settings) and (self._settings['disable_parameter_scaling'])): pass else: """ state = norm_state(state, self._state_bounds) state = np.array(state, dtype=self._settings['float_type']) self._model.setStates(state) # action_ = lasagne.layers.get_output(self._model.getActorNetwork(), state, deterministic=deterministic_).mean() # action_ = scale_action(self._q_action()[0], self._action_bounds) # if deterministic_: # action_std = scale_action(self._q_action_std()[0], self._action_bounds) if (('disable_parameter_scaling' in self._settings) and (self._settings['disable_parameter_scaling'])): action_std = self._q_action_std() # action_std = self._q_action_std()[0] * (action_bound_std(self._action_bounds)) else: action_std = self._q_action_std() * (action_bound_std( self._action_bounds)) # else: # action_ = scale_action(self._q_action()[0], self._action_bounds) # action_ = q_valsActA[0] return action_std
def predict_std(self, state, deterministic_=True): state = norm_state(state, self._state_bounds) state = np.array(state, dtype=self._settings['float_type']) # action_std = self._model.getActorNetwork().predict(state, batch_size=1)[:,self._action_length:] * (action_bound_std(self._action_bounds)) action_std = self._q_action_std([state])[0] * action_bound_std( self._action_bounds) # print ("Policy std: ", repr(action_std)) return action_std
def predict_std(self, state, deterministic_=True): state = norm_state(state, self._state_bounds) state = np.array(state, dtype=self._settings['float_type']) self._model.setStates(state) if (('disable_parameter_scaling' in self._settings) and (self._settings['disable_parameter_scaling'])): action_std = self._q_action_std()[0] else: action_std = self._q_action_std()[0] * (action_bound_std( self._action_bounds)) return action_std
def predict_std(self, state, deterministic_=True): state = norm_state(state, self._state_bounds) state = np.array(state, dtype=self._settings['float_type']) self._model.setStates(state) if (('disable_parameter_scaling' in self._settings) and (self._settings['disable_parameter_scaling'])): action_std = self._model.getActorNetwork().predict( state, batch_size=1)[:, self._action_length:] # action_std = self._q_action_std()[0] * (action_bound_std(self._action_bounds)) else: action_std = self._model.getActorNetwork().predict( state, batch_size=1)[:, self._action_length:] * ( action_bound_std(self._action_bounds)) return action_std