예제 #1
0
class CarRacingMDNRNN(CarRacingWrapper):
  def __init__(self, args, load_model=True, full_episode=False, with_obs=False):
    super(CarRacingMDNRNN, self).__init__(full_episode=full_episode)
    self.with_obs = with_obs # whether or not to return the frame with the encodings
    self.vae = CVAE(args)
    self.rnn = MDNRNN(args)
     
    if load_model:
      self.vae.set_weights([param_i.numpy() for param_i in tf.saved_model.load('results/{}/{}/tf_vae'.format(args.exp_name, args.env_name)).variables])
      self.rnn.set_weights([param_i.numpy() for param_i in tf.saved_model.load('results/{}/{}/tf_rnn'.format(args.exp_name, args.env_name)).variables])
    self.rnn_states = rnn_init_state(self.rnn)
    
    self.full_episode = False 
    self.observation_space = Box(low=np.NINF, high=np.Inf, shape=(args.z_size+args.rnn_size*args.state_space))
  def encode_obs(self, obs):
    # convert raw obs to z, mu, logvar
    result = np.copy(obs).astype(np.float)/255.0
    result = result.reshape(1, 64, 64, 3)
    z = self.vae.encode(result)[0]
    return z
  def reset(self):
    self.rnn_states = rnn_init_state(self.rnn)
    if self.with_obs:
        [z_state, obs] = super(CarRacingMDNRNN, self).reset() # calls step
        self.N_tiles = len(self.track)
        return [z_state, obs]
    else:
        z_state = super(CarRacingMDNRNN, self).reset() # calls step
        self.N_tiles = len(self.track)
        return z_state
  def _step(self, action):
    obs, reward, done, _ = super(CarRacingMDNRNN, self)._step(action)
    z = tf.squeeze(self.encode_obs(obs))
    h = tf.squeeze(self.rnn_states[0])
    c = tf.squeeze(self.rnn_states[1])
    if self.rnn.args.state_space == 2:
        z_state = tf.concat([z, c, h], axis=-1)
    else:
        z_state = tf.concat([z, h], axis=-1)
    if action is not None: # don't compute state on reset
        self.rnn_states = rnn_next_state(self.rnn, z, action, self.rnn_states)
    if self.with_obs:
        return [z_state, obs], reward, done, {}
    else:
        return z_state, reward, done, {}
  def close(self):
    super(CarRacingMDNRNN, self).close()
    tf.keras.backend.clear_session()
    gc.collect()
예제 #2
0
class DoomTakeCoverMDNRNN(DoomTakeCoverEnv):
    def __init__(self,
                 args,
                 render_mode=False,
                 load_model=True,
                 with_obs=False):
        super(DoomTakeCoverMDNRNN, self).__init__()

        self.with_obs = with_obs

        self.no_render = True
        if render_mode:
            self.no_render = False
        self.current_obs = None

        self.vae = CVAE(args)
        self.rnn = MDNRNN(args)

        if load_model:
            self.vae.set_weights([
                param_i.numpy() for param_i in
                tf.saved_model.load('results/{}/{}/tf_vae'.format(
                    args.exp_name, args.env_name)).variables
            ])
            self.rnn.set_weights([
                param_i.numpy() for param_i in
                tf.saved_model.load('results/{}/{}/tf_rnn'.format(
                    args.exp_name, args.env_name)).variables
            ])

        self.action_space = Box(low=-1.0, high=1.0, shape=())
        self.obs_size = self.rnn.args.z_size + self.rnn.args.rnn_size * self.rnn.args.state_space

        self.observation_space = Box(low=0, high=255, shape=(64, 64, 3))
        self.actual_observation_space = Box(low=-50.,
                                            high=50.,
                                            shape=(self.obs_size))

        self._seed()

        self.rnn_states = None
        self.z = None
        self.restart = None
        self.frame_count = None
        self.viewer = None
        self._reset()

    def close(self):
        super(DoomTakeCoverMDNRNN, self).close()
        tf.keras.backend.clear_session()
        gc.collect()

    def _step(self, action):

        # update states of rnn
        self.frame_count += 1

        self.rnn_states = rnn_next_state(self.rnn, self.z, action,
                                         self.rnn_states)

        # actual action in wrapped env:

        threshold = 0.3333
        full_action = [0] * 43

        if action < -threshold:
            full_action[11] = 1

        if action > threshold:
            full_action[10] = 1

        obs, reward, done, _ = super(DoomTakeCoverMDNRNN,
                                     self)._step(full_action)
        small_obs = self._process_frame(obs)
        self.current_obs = small_obs
        self.z = self._encode(small_obs)

        if done:
            self.restart = 1
        else:
            self.restart = 0

        if self.with_obs:
            return [self._current_state(), self.current_obs], reward, done, {}
        else:
            return self._current_state(), reward, done, {}

    def _encode(self, img):
        simple_obs = np.copy(img).astype(np.float) / 255.0
        simple_obs = simple_obs.reshape(1, 64, 64, 3)
        z = self.vae.encode(simple_obs)[0]
        return z

    def _reset(self):
        obs = super(DoomTakeCoverMDNRNN, self)._reset()
        small_obs = self._process_frame(obs)
        self.current_obs = small_obs
        self.rnn_states = rnn_init_state(self.rnn)
        self.z = self._encode(small_obs)
        self.restart = 1
        self.frame_count = 0

        if self.with_obs:
            return [self._current_state(), self.current_obs]
        else:
            return self._current_state()

    def _process_frame(self, frame):
        obs = frame[0:400, :, :]
        obs = Image.fromarray(obs, mode='RGB').resize((64, 64))
        obs = np.array(obs)
        return obs

    def _current_state(self):
        if self.rnn.args.state_space == 2:
            return np.concatenate([
                self.z,
                tf.keras.backend.flatten(self.rnn_states[1]),
                tf.keras.backend.flatten(self.rnn_states[0])
            ],
                                  axis=0)  # cell then hidden fro some reason
        return np.concatenate(
            [self.z, tf.keras.backend.flatten(self.rnn_states[0])],
            axis=0)  # only the hidden state

    def _seed(self, seed=None):
        if seed:
            tf.random.set_seed(seed)
        self.np_random, seed = seeding.np_random(seed)
        return [seed]
예제 #3
0
filelist = os.listdir(DATA_DIR)
obs = np.load(os.path.join(DATA_DIR, random.choice(filelist)))["obs"]
obs = obs.astype(np.float32) / 255.0


def resize(img, factor):
    obs = Image.fromarray(img, mode="RGB").resize((64 * factor, 64 * factor))
    return np.array(obs)


while True:
    state, done = env.reset(start_evaluation=True), False

    while not done:

        batch_z = vae.encode(state.reshape(1, 64, 64, 3) / 255)
        reconstruct = vae.decode(batch_z)
        screen.fill((0, 0, 0))
        screen.blit(pygame.surfarray.make_surface(resize(state, 3)), (0, 0))
        screen.blit(pygame.surfarray.make_surface(resize(reconstruct[0], 3)),
                    (300, 0))
        pygame.display.flip()

        # plt.imshow(state)
        # plt.show()

        # plt.imshow(reconstruct[0])
        # plt.show()

        action = env.action_space.sample()
        state, reward, done, _ = env.step(action)
예제 #4
0
class VaeCarWrapper(gym.ObservationWrapper):
    def __init__(self, env, silent=False):
        super().__init__(env)


        from vae.vae import CVAE
        from utils import PARSER
        args = PARSER.parse_args(['--config_path', 'configs/carracing.config'])
        model_path_name = "models/tf_vae"

        self.vae = CVAE(args)



        # self.vae.set_weights(tf.keras.models.load_model(
        #     model_path_name, compile=False).get_weights())

        self.vae.set_weights(np.load("vae_weights.npy", allow_pickle=True))


        self.observation_space = Box(low=float("-inf"), high=float("inf"), shape=(40,))
        self.silent = silent

    def _process_frame(self, frame):
        obs = (frame[0:84, :, :] * 255).astype(np.uint8)
        obs = Image.fromarray(obs, mode="RGB").resize((64, 64))
        obs = np.array(obs)


        return np.array(self.vae.encode(obs.reshape(1, 64, 64, 3)/255)[0])

    def observation(self, frame):
        # far-front spike
        car_body = np.sum((frame[56:59, 47, 1] > 0.5).flatten())

        # main headlights
        car_body = np.sum((frame[59:74, 46:49, 1] > 0.5).flatten())

        # rear wheels
        car_body += np.sum((frame[72:76, 44, 1] > 0.5).flatten())   
        car_body += np.sum((frame[72:76, 50, 1] > 0.5).flatten())


        #sides
        car_body += np.sum((frame[67:77, 45, 1] > 0.5).flatten())   
        car_body += np.sum((frame[67:77, 49, 1] > 0.5).flatten())

        self.green = car_body / 55.0

        self.speed = sum(frame[85:, 2, 0]) / 5


        self.abs1 = sum(frame[85:, 9, 2])
        self.abs2 = sum(frame[85:, 14, 2])
        self.abs3 = sum(frame[85:, 19, 2])
        self.abs4 = sum(frame[85:, 24, 2])

        steering_input_left = sum(frame[90, 37:48, 1])
        steering_input_right = sum(frame[90, 47:58, 1])
        self.steering = steering_input_right - steering_input_left

        rotation_left = sum(frame[90, 59:72, 0])
        rotation_right = sum(frame[90, 72:85, 0])
        self.rotation = rotation_right - rotation_left

        if not self.silent:
            print(f"green:{self.green}\tspeed:{self.speed}\tabs:\t{self.abs1}\t{self.abs2}\t{self.abs3}\t{self.abs4}\tsteering:{self.steering}\trotation:{self.rotation}") 

        features = self._process_frame(frame)

        return np.concatenate([features, [self.speed, self.green, self.abs1, self.abs2, self.abs3, self.abs4, self.steering, self.rotation]])
예제 #5
0
class CarRacingMDNRNN(CarRacingWrapper):
    def __init__(self,
                 args,
                 load_model=True,
                 full_episode=False,
                 with_obs=False):
        super(CarRacingMDNRNN, self).__init__(full_episode=full_episode)
        self.with_obs = with_obs  # whether or not to return the frame with the encodings
        self.vae = CVAE(args)
        self.rnn = MDNRNN(args)

        if load_model:
            self.vae.set_weights(
                tf.keras.models.load_model('results/{}/{}/tf_vae'.format(
                    args.exp_name, args.env_name),
                                           compile=False).get_weights())
            self.rnn.set_weights(
                tf.keras.models.load_model('results/{}/{}/tf_rnn'.format(
                    args.exp_name, args.env_name),
                                           compile=False).get_weights())

        self.rnn_states = rnn_init_state(self.rnn)

        self.full_episode = False
        self.observation_space = Box(low=np.NINF,
                                     high=np.Inf,
                                     shape=(32 + 256))

    def encode_obs(self, obs):
        # convert raw obs to z, mu, logvar
        obs = self._process_frame(obs)
        result = np.copy(obs).astype(np.float) / 255.0
        result = result.reshape(1, 64, 64, 4)
        z = self.vae.encode(result)[0]
        return z

    def reset(self):
        self.rnn_states = rnn_init_state(self.rnn)
        obs = super(CarRacingMDNRNN, self).reset()
        obs = self._process_frame(obs)
        z = self.encode_obs(obs)
        h = tf.squeeze(self.rnn_states[0])
        z_h = tf.concat([z, h], axis=-1)

        if self.with_obs:
            return [z_h, obs]
        else:
            z_h = super(CarRacingMDNRNN, self).reset()  # calls step
            return z_h

    def _step(self, action):
        obs, reward, done, _ = super(CarRacingMDNRNN, self)._step(action)
        z = self.encode_obs(obs)
        h = tf.squeeze(self.rnn_states[0])
        z_h = tf.concat([z, h], axis=-1)

        if action is not None:  # don't compute state on reset
            self.rnn_states = rnn_next_state(self.rnn, z, action,
                                             self.rnn_states)
        if self.with_obs:
            return [z_h, obs], reward, done, {}
        else:
            return z_h, reward, done, {}

    def close(self):
        super(CarRacingMDNRNN, self).close()
        tf.keras.backend.clear_session()
        gc.collect()