class CarRacingMDNRNN(CarRacingWrapper): def __init__(self, args, load_model=True, full_episode=False, with_obs=False): super(CarRacingMDNRNN, self).__init__(full_episode=full_episode) self.with_obs = with_obs # whether or not to return the frame with the encodings self.vae = CVAE(args) self.rnn = MDNRNN(args) if load_model: self.vae.set_weights([param_i.numpy() for param_i in tf.saved_model.load('results/{}/{}/tf_vae'.format(args.exp_name, args.env_name)).variables]) self.rnn.set_weights([param_i.numpy() for param_i in tf.saved_model.load('results/{}/{}/tf_rnn'.format(args.exp_name, args.env_name)).variables]) self.rnn_states = rnn_init_state(self.rnn) self.full_episode = False self.observation_space = Box(low=np.NINF, high=np.Inf, shape=(args.z_size+args.rnn_size*args.state_space)) def encode_obs(self, obs): # convert raw obs to z, mu, logvar result = np.copy(obs).astype(np.float)/255.0 result = result.reshape(1, 64, 64, 3) z = self.vae.encode(result)[0] return z def reset(self): self.rnn_states = rnn_init_state(self.rnn) if self.with_obs: [z_state, obs] = super(CarRacingMDNRNN, self).reset() # calls step self.N_tiles = len(self.track) return [z_state, obs] else: z_state = super(CarRacingMDNRNN, self).reset() # calls step self.N_tiles = len(self.track) return z_state def _step(self, action): obs, reward, done, _ = super(CarRacingMDNRNN, self)._step(action) z = tf.squeeze(self.encode_obs(obs)) h = tf.squeeze(self.rnn_states[0]) c = tf.squeeze(self.rnn_states[1]) if self.rnn.args.state_space == 2: z_state = tf.concat([z, c, h], axis=-1) else: z_state = tf.concat([z, h], axis=-1) if action is not None: # don't compute state on reset self.rnn_states = rnn_next_state(self.rnn, z, action, self.rnn_states) if self.with_obs: return [z_state, obs], reward, done, {} else: return z_state, reward, done, {} def close(self): super(CarRacingMDNRNN, self).close() tf.keras.backend.clear_session() gc.collect()
class DoomTakeCoverMDNRNN(DoomTakeCoverEnv): def __init__(self, args, render_mode=False, load_model=True, with_obs=False): super(DoomTakeCoverMDNRNN, self).__init__() self.with_obs = with_obs self.no_render = True if render_mode: self.no_render = False self.current_obs = None self.vae = CVAE(args) self.rnn = MDNRNN(args) if load_model: self.vae.set_weights([ param_i.numpy() for param_i in tf.saved_model.load('results/{}/{}/tf_vae'.format( args.exp_name, args.env_name)).variables ]) self.rnn.set_weights([ param_i.numpy() for param_i in tf.saved_model.load('results/{}/{}/tf_rnn'.format( args.exp_name, args.env_name)).variables ]) self.action_space = Box(low=-1.0, high=1.0, shape=()) self.obs_size = self.rnn.args.z_size + self.rnn.args.rnn_size * self.rnn.args.state_space self.observation_space = Box(low=0, high=255, shape=(64, 64, 3)) self.actual_observation_space = Box(low=-50., high=50., shape=(self.obs_size)) self._seed() self.rnn_states = None self.z = None self.restart = None self.frame_count = None self.viewer = None self._reset() def close(self): super(DoomTakeCoverMDNRNN, self).close() tf.keras.backend.clear_session() gc.collect() def _step(self, action): # update states of rnn self.frame_count += 1 self.rnn_states = rnn_next_state(self.rnn, self.z, action, self.rnn_states) # actual action in wrapped env: threshold = 0.3333 full_action = [0] * 43 if action < -threshold: full_action[11] = 1 if action > threshold: full_action[10] = 1 obs, reward, done, _ = super(DoomTakeCoverMDNRNN, self)._step(full_action) small_obs = self._process_frame(obs) self.current_obs = small_obs self.z = self._encode(small_obs) if done: self.restart = 1 else: self.restart = 0 if self.with_obs: return [self._current_state(), self.current_obs], reward, done, {} else: return self._current_state(), reward, done, {} def _encode(self, img): simple_obs = np.copy(img).astype(np.float) / 255.0 simple_obs = simple_obs.reshape(1, 64, 64, 3) z = self.vae.encode(simple_obs)[0] return z def _reset(self): obs = super(DoomTakeCoverMDNRNN, self)._reset() small_obs = self._process_frame(obs) self.current_obs = small_obs self.rnn_states = rnn_init_state(self.rnn) self.z = self._encode(small_obs) self.restart = 1 self.frame_count = 0 if self.with_obs: return [self._current_state(), self.current_obs] else: return self._current_state() def _process_frame(self, frame): obs = frame[0:400, :, :] obs = Image.fromarray(obs, mode='RGB').resize((64, 64)) obs = np.array(obs) return obs def _current_state(self): if self.rnn.args.state_space == 2: return np.concatenate([ self.z, tf.keras.backend.flatten(self.rnn_states[1]), tf.keras.backend.flatten(self.rnn_states[0]) ], axis=0) # cell then hidden fro some reason return np.concatenate( [self.z, tf.keras.backend.flatten(self.rnn_states[0])], axis=0) # only the hidden state def _seed(self, seed=None): if seed: tf.random.set_seed(seed) self.np_random, seed = seeding.np_random(seed) return [seed]
filelist = os.listdir(DATA_DIR) obs = np.load(os.path.join(DATA_DIR, random.choice(filelist)))["obs"] obs = obs.astype(np.float32) / 255.0 def resize(img, factor): obs = Image.fromarray(img, mode="RGB").resize((64 * factor, 64 * factor)) return np.array(obs) while True: state, done = env.reset(start_evaluation=True), False while not done: batch_z = vae.encode(state.reshape(1, 64, 64, 3) / 255) reconstruct = vae.decode(batch_z) screen.fill((0, 0, 0)) screen.blit(pygame.surfarray.make_surface(resize(state, 3)), (0, 0)) screen.blit(pygame.surfarray.make_surface(resize(reconstruct[0], 3)), (300, 0)) pygame.display.flip() # plt.imshow(state) # plt.show() # plt.imshow(reconstruct[0]) # plt.show() action = env.action_space.sample() state, reward, done, _ = env.step(action)
class VaeCarWrapper(gym.ObservationWrapper): def __init__(self, env, silent=False): super().__init__(env) from vae.vae import CVAE from utils import PARSER args = PARSER.parse_args(['--config_path', 'configs/carracing.config']) model_path_name = "models/tf_vae" self.vae = CVAE(args) # self.vae.set_weights(tf.keras.models.load_model( # model_path_name, compile=False).get_weights()) self.vae.set_weights(np.load("vae_weights.npy", allow_pickle=True)) self.observation_space = Box(low=float("-inf"), high=float("inf"), shape=(40,)) self.silent = silent def _process_frame(self, frame): obs = (frame[0:84, :, :] * 255).astype(np.uint8) obs = Image.fromarray(obs, mode="RGB").resize((64, 64)) obs = np.array(obs) return np.array(self.vae.encode(obs.reshape(1, 64, 64, 3)/255)[0]) def observation(self, frame): # far-front spike car_body = np.sum((frame[56:59, 47, 1] > 0.5).flatten()) # main headlights car_body = np.sum((frame[59:74, 46:49, 1] > 0.5).flatten()) # rear wheels car_body += np.sum((frame[72:76, 44, 1] > 0.5).flatten()) car_body += np.sum((frame[72:76, 50, 1] > 0.5).flatten()) #sides car_body += np.sum((frame[67:77, 45, 1] > 0.5).flatten()) car_body += np.sum((frame[67:77, 49, 1] > 0.5).flatten()) self.green = car_body / 55.0 self.speed = sum(frame[85:, 2, 0]) / 5 self.abs1 = sum(frame[85:, 9, 2]) self.abs2 = sum(frame[85:, 14, 2]) self.abs3 = sum(frame[85:, 19, 2]) self.abs4 = sum(frame[85:, 24, 2]) steering_input_left = sum(frame[90, 37:48, 1]) steering_input_right = sum(frame[90, 47:58, 1]) self.steering = steering_input_right - steering_input_left rotation_left = sum(frame[90, 59:72, 0]) rotation_right = sum(frame[90, 72:85, 0]) self.rotation = rotation_right - rotation_left if not self.silent: print(f"green:{self.green}\tspeed:{self.speed}\tabs:\t{self.abs1}\t{self.abs2}\t{self.abs3}\t{self.abs4}\tsteering:{self.steering}\trotation:{self.rotation}") features = self._process_frame(frame) return np.concatenate([features, [self.speed, self.green, self.abs1, self.abs2, self.abs3, self.abs4, self.steering, self.rotation]])
class CarRacingMDNRNN(CarRacingWrapper): def __init__(self, args, load_model=True, full_episode=False, with_obs=False): super(CarRacingMDNRNN, self).__init__(full_episode=full_episode) self.with_obs = with_obs # whether or not to return the frame with the encodings self.vae = CVAE(args) self.rnn = MDNRNN(args) if load_model: self.vae.set_weights( tf.keras.models.load_model('results/{}/{}/tf_vae'.format( args.exp_name, args.env_name), compile=False).get_weights()) self.rnn.set_weights( tf.keras.models.load_model('results/{}/{}/tf_rnn'.format( args.exp_name, args.env_name), compile=False).get_weights()) self.rnn_states = rnn_init_state(self.rnn) self.full_episode = False self.observation_space = Box(low=np.NINF, high=np.Inf, shape=(32 + 256)) def encode_obs(self, obs): # convert raw obs to z, mu, logvar obs = self._process_frame(obs) result = np.copy(obs).astype(np.float) / 255.0 result = result.reshape(1, 64, 64, 4) z = self.vae.encode(result)[0] return z def reset(self): self.rnn_states = rnn_init_state(self.rnn) obs = super(CarRacingMDNRNN, self).reset() obs = self._process_frame(obs) z = self.encode_obs(obs) h = tf.squeeze(self.rnn_states[0]) z_h = tf.concat([z, h], axis=-1) if self.with_obs: return [z_h, obs] else: z_h = super(CarRacingMDNRNN, self).reset() # calls step return z_h def _step(self, action): obs, reward, done, _ = super(CarRacingMDNRNN, self)._step(action) z = self.encode_obs(obs) h = tf.squeeze(self.rnn_states[0]) z_h = tf.concat([z, h], axis=-1) if action is not None: # don't compute state on reset self.rnn_states = rnn_next_state(self.rnn, z, action, self.rnn_states) if self.with_obs: return [z_h, obs], reward, done, {} else: return z_h, reward, done, {} def close(self): super(CarRacingMDNRNN, self).close() tf.keras.backend.clear_session() gc.collect()