Exemplo n.º 1
0
 def __init__(self, *args, **kwargs):
     ProxyEnv.__init__(self, *args, **kwargs)
     self.original_observation_space = self.observation_space.spaces
     self.original_action_space = self.action_space
     self.observation_space = ml.nested_apply(
         create_space, self.original_observation_space)
     self.action_space = create_space(self.original_action_space)
Exemplo n.º 2
0
 def __init__(
     self, 
     wrapped_env,
     vae,
     selector=(lambda x: x),
     assigner=(lambda x, y: y),
     **kwargs
 ):
     ProxyEnv.__init__(self, wrapped_env, **kwargs)
     Cloneable.__init__(
         self,
         wrapped_env,
         vae,
         selector=(lambda x: x),
         assigner=(lambda x, y: y),
         **kwargs)
     observation_space = self.wrapped_env.observation_space
     if (isinstance(observation_space, Dict) or
             isinstance(observation_space, Tuple)):
         observation_space = observation_space.spaces
     self.observation_space = assigner(
         observation_space,
         Box(-1.0 * np.ones([vae.latent_size]), np.ones([vae.latent_size])))
     self.action_space = self.wrapped_env.action_space
     self.vae = vae
     self.selector = selector
     self.assigner = assigner
Exemplo n.º 3
0
 def copy_to(
     self,
     clone
 ):
     ProxyEnv.copy_to(clone)
     clone.vae = self.vae
     clone.selector = self.selector
     clone.assigner = self.assigner
Exemplo n.º 4
0
 def reset(self, **kwargs):
     observation = ProxyEnv.reset(self, **kwargs)
     observation = ml.nested_apply(normalize, observation,
                                   self.original_observation_space)
     observation = ml.nested_apply(lambda x: x.astype(np.float32),
                                   observation)
     return observation
Exemplo n.º 5
0
 def reset(
     self,
     **kwargs
 ):
     observation = self.selector(ProxyEnv.reset(self, **kwargs))
     encoding = self.vae.encoder.get_expected_value(observation[None, ...])[0]
     return self.assigner(observation, encoding)
Exemplo n.º 6
0
 def step(
     self,
     action
 ):
     observation, reward, done, info = ProxyEnv.step(
         self, action)
     encoding = self.vae.encoder.get_expected_value(observation[None, ...])[0]
     return self.assigner(observation, encoding), reward, done, info
Exemplo n.º 7
0
 def step(self, action):
     denormalized_action = denormalize(action, self.original_action_space)
     observation, reward, done, info = ProxyEnv.step(
         self, denormalized_action)
     observation = ml.nested_apply(normalize, observation,
                                   self.original_observation_space)
     observation = ml.nested_apply(lambda x: x.astype(np.float32),
                                   observation)
     return observation, reward, done, info