def __init__(self,
                 env_specs,
                 policy,
                 qf,
                 replay_buffer,
                 policy_optimizer=tf.optimizers.Adam(),
                 qf_optimizer=tf.optimizers.Adam(lr=0.1),
                 exploration_strategy=None,
                 exploration_interval=10,
                 target_update_tau=0.01,
                 target_update_period=10,
                 td_errors_loss_fn=None,
                 gamma=0.99,
                 reward_scale=1.0,
                 gradient_clipping=None,
                 train_sequence_length=None,
                 name='Bilevel_leader',
                 agent_id=-1):
        self._Serializable__initialize(locals())
        self._agent_id = agent_id
        self._env_specs = env_specs
        if self._agent_id >= 0:
            observation_space = self._env_specs.observation_space[
                self._agent_id]
            action_space = self._env_specs.action_space[self._agent_id]
        else:
            observation_space = self._env_specs.observation_space
            action_space = self._env_specs.action_space

        self._exploration_strategy = exploration_strategy

        self._qf_optimizer = qf_optimizer

        self._target_qf = Serializable.clone(qf,
                                             name='target_qf_agent_{}'.format(
                                                 self._agent_id))

        self._target_update_tau = target_update_tau
        self._target_update_period = target_update_period
        self._td_errors_loss_fn = (td_errors_loss_fn or tf.losses.Huber)
        self._gamma = gamma
        self._reward_scale = reward_scale
        self._gradient_clipping = gradient_clipping
        self._train_step = 0
        self._exploration_interval = exploration_interval
        self._exploration_status = False

        self.required_experiences = [
            'observation', 'actions', 'rewards', 'next_observations',
            'opponent_actions', 'target_actions'
        ]

        self._observation_space = observation_space
        self._action_space = action_space
        self._policy = policy
        self._qf = qf
        self._replay_buffer = replay_buffer
        self._name = name
 def __setstate__(self, state):
     Serializable.__setstate__(self, state)
     self.set_weights(state['pickled_weights'])
 def __getstate__(self):
     state = Serializable.__getstate__(self)
     state['pickled_weights'] = self.get_weights()
     return state