def _build_test_policies(self):
     policy = policies.DeterministicSoftPolicy(
         a_network=self._agent_module.p_net)
     self._test_policies['main'] = policy
     policy = policies.MaxQSoftPolicy(
         a_network=self._agent_module.p_net,
         q_network=self._agent_module.q_nets[0][0],
     )
     self._test_policies['max_q'] = policy
 def _build_test_policies(self):
   policy = policies.DeterministicSoftPolicy(
       a_network=self._agent_module.p_net)
   self._test_policies['main'] = policy