def __init__(self, render_mode=False, load_model=True): super(DoomTakeCoverWrapper, self).__init__() self.no_render = True if render_mode: self.no_render = False self.current_obs = None reset_graph() self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=tf.AUTO_REUSE) self.rnn = Model(hps_sample, gpu_mode=False) if load_model: self.vae.load_json(os.path.join(model_path_name, 'vae.json')) self.rnn.load_json(os.path.join(model_path_name, 'rnn.json')) self.action_space = spaces.Box(low=-1.0, high=1.0, shape=()) self.outwidth = self.rnn.hps.seq_width self.obs_size = self.outwidth + model_rnn_size * model_state_space self.observation_space = Box(low=0, high=255, shape=(SCREEN_Y, SCREEN_X, 3)) self.actual_observation_space = spaces.Box(low=-50., high=50., shape=(self.obs_size)) self.zero_state = self.rnn.sess.run(self.rnn.zero_state) self._seed() self.rnn_state = None self.z = None self.restart = None self.frame_count = None self.viewer = None self._reset()
def __init__(self, rnn_load_path, num_mixtures, temperature): #RNN parameters - modelled after hps_sample in doomrnn.py self.vae = VAE(z_size=LATENT_SPACE_DIMENSIONALITY, batch_size=1, is_training=False, reuse=False, gpu_mode=False) self.vae.load_json(os.path.join(VAE_PATH, 'vae.json')) hps = default_prediction_hps(num_mixtures) self.rnn = RNN(hps, gpu_mode=False) self.rnn.load_json(os.path.join(rnn_load_path, 'rnn.json')) self.frame_count = 0 self.temperature = temperature self.zero_state = self.rnn.sess.run(self.rnn.zero_state) self.outwidth = self.rnn.hps.seq_width self.restart = 1 self.rnn_state = self.zero_state
class DoomTakeCoverWrapper(DoomTakeCoverEnv): def __init__(self, render_mode=False, load_model=True): super(DoomTakeCoverWrapper, self).__init__() self.no_render = True if render_mode: self.no_render = False self.current_obs = None reset_graph() self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=tf.AUTO_REUSE) self.rnn = Model(hps_sample, gpu_mode=False) if load_model: self.vae.load_json(os.path.join(model_path_name, 'vae.json')) self.rnn.load_json(os.path.join(model_path_name, 'rnn.json')) self.action_space = spaces.Box(low=-1.0, high=1.0, shape=()) self.outwidth = self.rnn.hps.seq_width self.obs_size = self.outwidth + model_rnn_size * model_state_space self.observation_space = Box(low=0, high=255, shape=(SCREEN_Y, SCREEN_X, 3)) self.actual_observation_space = spaces.Box(low=-50., high=50., shape=(self.obs_size)) self.zero_state = self.rnn.sess.run(self.rnn.zero_state) self._seed() self.rnn_state = None self.z = None self.restart = None self.frame_count = None self.viewer = None self._reset() def _step(self, action): # update states of rnn self.frame_count += 1 prev_z = np.zeros((1, 1, self.outwidth)) prev_z[0][0] = self.z prev_action = np.zeros((1, 1)) prev_action[0] = action prev_restart = np.ones((1, 1)) prev_restart[0] = self.restart s_model = self.rnn feed = { s_model.input_z: prev_z, s_model.input_action: prev_action, s_model.input_restart: prev_restart, s_model.initial_state: self.rnn_state } self.rnn_state = s_model.sess.run(s_model.final_state, feed) # actual action in wrapped env: threshold = 0.3333 full_action = [0] * 43 if action < -threshold: full_action[11] = 1 if action > threshold: full_action[10] = 1 obs, reward, done, _ = super(DoomTakeCoverWrapper, self)._step(full_action) small_obs = _process_frame(obs) self.current_obs = small_obs self.z = self._encode(small_obs) if done: self.restart = 1 else: self.restart = 0 return self._current_state(), reward, done, {} def _encode(self, img): simple_obs = np.copy(img).astype(np.float) / 255.0 simple_obs = simple_obs.reshape(1, 64, 64, 3) mu, logvar = self.vae.encode_mu_logvar(simple_obs) return (mu + np.exp(logvar / 2.0) * self.np_random.randn(*logvar.shape))[0] def _decode(self, z): # decode the latent vector img = self.vae.decode(z.reshape(1, 64)) * 255. img = np.round(img).astype(np.uint8) img = img.reshape(64, 64, 3) return img def _reset(self): obs = super(DoomTakeCoverWrapper, self)._reset() small_obs = _process_frame(obs) self.current_obs = small_obs self.rnn_state = self.zero_state self.z = self._encode(small_obs) self.restart = 1 self.frame_count = 0 return self._current_state() def _current_state(self): if model_state_space == 2: return np.concatenate([ self.z, self.rnn_state.c.flatten(), self.rnn_state.h.flatten() ], axis=0) return np.concatenate([self.z, self.rnn_state.h.flatten()], axis=0) def _seed(self, seed=None): if seed: tf.set_random_seed(seed) self.np_random, seed = seeding.np_random(seed) return [seed] def _render(self, mode='human', close=False): if close: if self.viewer is not None: self.viewer.close() self.viewer = None # If we don't None out this reference pyglet becomes unhappy return try: state = self.game.get_state() img = state.image_buffer small_img = self.current_obs if img is None: img = np.zeros(shape=(480, 640, 3), dtype=np.uint8) if small_img is None: small_img = np.zeros(shape=(SCREEN_Y, SCREEN_X, 3), dtype=np.uint8) small_img = resize(small_img, (img.shape[0], img.shape[0])) vae_img = self._decode(self.z) vae_img = resize(vae_img, (img.shape[0], img.shape[0])) all_img = np.concatenate((img, small_img, vae_img), axis=1) img = all_img if mode == 'rgb_array': return img elif mode is 'human': from gym.envs.classic_control import rendering if self.viewer is None: self.viewer = rendering.SimpleImageViewer() self.viewer.imshow(img) except doom_py.vizdoom.ViZDoomIsNotRunningException: pass # Doom has been closed
def main(args): print("Train RNN begin") os.environ["CUDA_VISIBLE_DEVICES"]="0" np.set_printoptions(precision=4, edgeitems=6, linewidth=100, suppress=True) model_save_path = args.output_file_name model_rnn_size = 512 model_restart_factor = 10. Z_VECTOR_SIZE = 64 #KOEChange DATA_DIR = "series" initial_z_save_path = "tf_initial_z" if not os.path.exists(initial_z_save_path): os.makedirs(initial_z_save_path) model_num_mixture = args.num_mixtures epochs = args.epochs model_save_path += "_" + str(model_num_mixture) + "mixtures" if not os.path.exists(model_save_path): os.makedirs(model_save_path) def default_hps(): return HyperParams(max_seq_len=100, # KOEChange. Was 500 seq_width=Z_VECTOR_SIZE, # KOEChange. Was 64. rnn_size=model_rnn_size, # number of rnn cells batch_size=100, # minibatch sizes grad_clip=1.0, num_mixture=int(model_num_mixture), # number of mixtures in MDN restart_factor=model_restart_factor, # factor of importance for restart=1 rare case for loss. learning_rate=0.001, decay_rate=0.99999, min_learning_rate=0.00001, use_layer_norm=0, # set this to 1 to get more stable results (less chance of NaN), but slower use_recurrent_dropout=0, recurrent_dropout_prob=0.90, use_input_dropout=0, input_dropout_prob=0.90, use_output_dropout=0, output_dropout_prob=0.90, is_training=1) hps_model = default_hps() hps_sample = hps_model._replace(batch_size=1, max_seq_len=2, use_recurrent_dropout=0, is_training=0) # load preprocessed data raw_data = np.load(os.path.join(DATA_DIR, "series.npz")) raw_data_mu = raw_data["mu"] raw_data_logvar = raw_data["logvar"] raw_data_action = raw_data["action"] def load_series_data(): all_data = [] for i in range(len(raw_data_mu)): action = raw_data_action[i] mu = raw_data_mu[i] logvar = raw_data_logvar[i] all_data.append([mu, logvar, action]) return all_data def get_frame_count(all_data): frame_count = [] for data in all_data: frame_count.append(len(data[0])) return np.sum(frame_count) def create_batches(all_data, batch_size=100, seq_length=100): num_frames = get_frame_count(all_data) num_batches = int(num_frames/(batch_size*seq_length)) num_frames_adjusted = num_batches*batch_size*seq_length random.shuffle(all_data) num_frames = get_frame_count(all_data) data_mu = np.zeros((num_frames, N_z), dtype=np.float16) data_logvar = np.zeros((num_frames, N_z), dtype=np.float16) data_action = np.zeros(num_frames, dtype=np.float16) data_restart = np.zeros(num_frames, dtype=np.uint8) idx = 0 for data in all_data: mu, logvar, action=data N = len(action) data_mu[idx:idx+N] = mu.reshape(N, Z_VECTOR_SIZE) data_logvar[idx:idx+N] = logvar.reshape(N, Z_VECTOR_SIZE) data_action[idx:idx+N] = action.reshape(N) data_restart[idx]=1 idx += N data_mu = data_mu[0:num_frames_adjusted] data_logvar = data_logvar[0:num_frames_adjusted] data_action = data_action[0:num_frames_adjusted] data_restart = data_restart[0:num_frames_adjusted] data_mu = np.split(data_mu.reshape(batch_size, -1, Z_VECTOR_SIZE), num_batches, 1) data_logvar = np.split(data_logvar.reshape(batch_size, -1, Z_VECTOR_SIZE), num_batches, 1) data_action = np.split(data_action.reshape(batch_size, -1), num_batches, 1) data_restart = np.split(data_restart.reshape(batch_size, -1), num_batches, 1) return data_mu, data_logvar, data_action, data_restart def get_batch(batch_idx, data_mu, data_logvar, data_action, data_restart): batch_mu = data_mu[batch_idx] batch_logvar = data_logvar[batch_idx] batch_action = data_action[batch_idx] batch_restart = data_restart[batch_idx] batch_s = batch_logvar.shape batch_z = batch_mu + np.exp(batch_logvar/2.0) * np.random.randn(*batch_s) return batch_z, batch_action, batch_restart # process data all_data = load_series_data() max_seq_len = hps_model.max_seq_len N_z = hps_model.seq_width # save 1000 initial mu and logvars: initial_mu = [] initial_logvar = [] for i in range(1000): mu = np.copy(raw_data_mu[i][0, :]*10000).astype(np.int).tolist() logvar = np.copy(raw_data_logvar[i][0, :]*10000).astype(np.int).tolist() initial_mu.append(mu) initial_logvar.append(logvar) with open(os.path.join("tf_initial_z", "initial_z.json"), 'wt') as outfile: json.dump([initial_mu, initial_logvar], outfile, sort_keys=True, indent=0, separators=(',', ': ')) reset_graph() model = Model(hps_model) hps = hps_model start = time.time() print("Starting first epoch of total ", epochs) for epoch in range(1, epochs): print('preparing data for epoch', epoch) data_mu, data_logvar, data_action, data_restart = 0, 0, 0, 0 data_mu, data_logvar, data_action, data_restart = create_batches(all_data) num_batches = len(data_mu) print('number of batches', num_batches) end = time.time() time_taken = end-start print('time taken to create batches', time_taken) batch_state = model.sess.run(model.initial_state) for local_step in range(num_batches): batch_z, batch_action, batch_restart = get_batch(local_step, data_mu, data_logvar, data_action, data_restart) step = model.sess.run(model.global_step) curr_learning_rate = (hps.learning_rate-hps.min_learning_rate) * (hps.decay_rate) ** step + hps.min_learning_rate feed = {model.batch_z: batch_z, model.batch_action: batch_action, model.batch_restart: batch_restart, model.initial_state: batch_state, model.lr: curr_learning_rate} (train_cost, z_cost, r_cost, batch_state, train_step, _) = model.sess.run([model.cost, model.z_cost, model.r_cost, model.final_state, model.global_step, model.train_op], feed) if (step%20==0 and step > 0): end = time.time() time_taken = end-start start = time.time() output_log = "step: %d, lr: %.6f, cost: %.4f, z_cost: %.4f, r_cost: %.4f, train_time_taken: %.4f" % (step, curr_learning_rate, train_cost, z_cost, r_cost, time_taken) print(output_log) # save the model (don't bother with tf checkpoints json all the way ...) model.save_json(os.path.join(model_save_path, "rnn.json"))
max_seq_len = hps_model.max_seq_len N_z = hps_model.seq_width # save 1000 initial mu and logvars: initial_mu = [] initial_logvar = [] for i in range(258): mu = np.copy(raw_data_mu[i][0, :]*10000).astype(np.int).tolist() logvar = np.copy(raw_data_logvar[i][0, :]*10000).astype(np.int).tolist() initial_mu.append(mu) initial_logvar.append(logvar) with open(os.path.join("tf_initial_z", "initial_z.json"), 'wt') as outfile: json.dump([initial_mu, initial_logvar], outfile, sort_keys=True, indent=0, separators=(',', ': ')) reset_graph() model = Model(hps_model) hps = hps_model start = time.time() for epoch in range(1, 401): print('preparing data for epoch', epoch) data_mu, data_logvar, data_action, data_restart = 0, 0, 0, 0 data_mu, data_logvar, data_action, data_restart = create_batches(all_data) num_batches = len(data_mu) print('number of batches', num_batches) end = time.time() time_taken = end-start print('time taken to create batches', time_taken) batch_state = model.sess.run(model.initial_state)
initial_logvar = [] for i in range(1000): mu = np.copy(raw_data_mu[i][0, :] * 10000).astype(np.int).tolist() logvar = np.copy(raw_data_logvar[i][0, :] * 10000).astype(np.int).tolist() initial_mu.append(mu) initial_logvar.append(logvar) with open(os.path.join(initial_z_save_path, "{}.json".format(initial_z_name)), 'wt') as outfile: json.dump([initial_mu, initial_logvar], outfile, sort_keys=True, indent=0, separators=(',', ': ')) reset_graph() model = Model(hps_model) hps = hps_model start = time.time() for epoch in range(1, 401): print('preparing data for epoch', epoch) data_mu, data_logvar, data_action, data_restart = 0, 0, 0, 0 data_mu, data_logvar, data_action, data_restart = create_batches(all_data) num_batches = len(data_mu) print('number of batches', num_batches) end = time.time() time_taken = end - start print('time taken to create batches', time_taken) batch_state = model.sess.run(model.initial_state)
class RNNAnalyzer: def __init__(self, rnn_load_path, num_mixtures, temperature): #RNN parameters - modelled after hps_sample in doomrnn.py self.vae = VAE(z_size=LATENT_SPACE_DIMENSIONALITY, batch_size=1, is_training=False, reuse=False, gpu_mode=False) self.vae.load_json(os.path.join(VAE_PATH, 'vae.json')) hps = default_prediction_hps(num_mixtures) self.rnn = RNN(hps, gpu_mode=False) self.rnn.load_json(os.path.join(rnn_load_path, 'rnn.json')) self.frame_count = 0 self.temperature = temperature self.zero_state = self.rnn.sess.run(self.rnn.zero_state) self.outwidth = self.rnn.hps.seq_width self.restart = 1 self.rnn_state = self.zero_state def _reset(self, initial_z): #Resets RNN, with an initial z. self.rnn_state = self.zero_state self.z = initial_z self.restart = 1 self.frame_count = 0 def decode_with_vae(self, latent_vector_sequence): reconstructions = self.vae.decode(np.array(latent_vector_sequence)) return reconstructions def predict_one_step(self, action, previous_z=[]): #Predicts one step ahead from the previous state. #If previous z is given, we predict with that as input. Otherwise, we dream from the previous output we generated. print("Test") self.frame_count += 1 prev_z = np.zeros((1, 1, self.outwidth)) if len(previous_z) > 0: prev_z[0][0] = previous_z else: prev_z[0][0] = self.z prev_action = np.zeros((1, 1)) prev_action[0] = action prev_restart = np.ones((1, 1)) prev_restart[0] = self.restart s_model = self.rnn feed = { s_model.input_z: prev_z, s_model.input_action: prev_action, s_model.input_restart: prev_restart, s_model.initial_state: self.rnn_state } [logmix, mean, logstd, logrestart, next_state] = s_model.sess.run([ s_model.out_logmix, s_model.out_mean, s_model.out_logstd, s_model.out_restart_logits, s_model.final_state ], feed) OUTWIDTH = self.outwidth # adjust temperatures logmix2 = np.copy(logmix) / self.temperature logmix2 -= logmix2.max() logmix2 = np.exp(logmix2) logmix2 /= logmix2.sum(axis=1).reshape(OUTWIDTH, 1) mixture_idx = np.zeros(OUTWIDTH) chosen_mean = np.zeros(OUTWIDTH) chosen_logstd = np.zeros(OUTWIDTH) for j in range(OUTWIDTH): idx = get_pi_idx(np_random.rand(), logmix2[j]) mixture_idx[j] = idx chosen_mean[j] = mean[j][idx] chosen_logstd[j] = logstd[j][idx] rand_gaussian = np_random.randn(OUTWIDTH) * np.sqrt(self.temperature) next_z = chosen_mean + np.exp(chosen_logstd) * rand_gaussian self.restart = 0 next_restart = 0 #Never telling it that we got a restart. #if (logrestart[0] > 0): #next_restart = 1 self.z = next_z self.restart = next_restart self.rnn_state = next_state return next_z, logmix2