示例#1
0
def wrap_policy(a_net, wrapper):
    """Wraps actor network with desired randomization."""
    if wrapper[0] == 'none':
        policy = policies.RandomSoftPolicy(a_net)
    elif wrapper[0] == 'eps':
        policy = policies.EpsilonGreedyRandomSoftPolicy(a_net, wrapper[1])
    elif wrapper[0] == 'gaussian':
        policy = policies.GaussianRandomSoftPolicy(a_net, std=wrapper[1])
    elif wrapper[0] == 'gaussianeps':
        policy = policies.GaussianEpsilonGreedySoftPolicy(a_net,
                                                          std=wrapper[1],
                                                          eps=wrapper[2])
    return policy
 def _build_online_policy(self):
   return policies.RandomSoftPolicy(
       a_network=self._agent_module.p_net,
       )