def sample_vae2(args): """ For vae from https://github.com/hardmaru/WorldModelsExperiments.git """ z_size = 32 batch_size = args.count learning_rate = 0.0001 kl_tolerance = 0.5 model_path_name = "tf_vae" reset_graph() vae = ConvVAE( z_size=z_size, batch_size=batch_size, learning_rate=learning_rate, kl_tolerance=kl_tolerance, is_training=False, reuse=False, gpu_mode=False) # use GPU on batchsize of 1000 -> much faster vae.load_json(os.path.join(model_path_name, 'vae.json')) z = np.random.normal(size=(args.count, z_size)) samples = vae.decode(z) input_dim = samples.shape[1:] n = args.count plt.figure(figsize=(20, 4)) plt.title('VAE samples') for i in range(n): ax = plt.subplot(2, n, i + 1) plt.imshow(samples[i].reshape(input_dim[0], input_dim[1], input_dim[2])) ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) #plt.savefig( image_path ) plt.show()
class Model: ''' simple one layer model for car racing ''' def __init__(self): self.env_name = "carracing" self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) self.vae.load_json('vae/vae.json') self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) self.rnn.load_json('rnn/rnn.json') self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.z_size = 32 if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, 3) self.bias_output = np.random.randn(3) self.param_count = ((self.input_size+1)*self.hidden_size) + (self.hidden_size*3+3) else: self.weight = np.random.randn(self.input_size, 3) self.bias = np.random.randn(3) self.param_count = (self.input_size)*3+3 self.render_mode = False def make_env(self, seed=-1, render_mode=False): self.render_mode = render_mode self.env = make_env(self.env_name, seed=seed, render_mode=render_mode) def reset(self): self.state = rnn_init_state(self.rnn) def encode_obs(self, obs): # convert raw obs to z, mu, logvar result = np.copy(obs).astype(np.float)/255.0 result = result.reshape(1, 64, 64, 3) mu, logvar = self.vae.encode_mu_logvar(result) mu = mu[0] logvar = logvar[0] s = logvar.shape z = mu + np.exp(logvar/2.0) * np.random.randn(*s) return z, mu, logvar def decode_obs(self, z): # decode the latent vector img = self.vae.decode(z.reshape(1, self.z_size)) * 255. img = np.round(img).astype(np.uint8) img = img.reshape(64, 64, 3) return img def get_action(self, z): h = rnn_output(self.state, z, EXP_MODE) ''' action = np.dot(h, self.weight) + self.bias action[0] = np.tanh(action[0]) action[1] = sigmoid(action[1]) action[2] = clip(np.tanh(action[2])) ''' if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer h = np.tanh(np.dot(h, self.weight_hidden) + self.bias_hidden) action = np.tanh(np.dot(h, self.weight_output) + self.bias_output) else: action = np.tanh(np.dot(h, self.weight) + self.bias) action[1] = (action[1]+1.0) / 2.0 action[2] = clip(action[2]) self.state = rnn_next_state(self.rnn, z, action, self.state) return action def set_model_params(self, model_params): if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer params = np.array(model_params) cut_off = (self.input_size+1)*self.hidden_size params_1 = params[:cut_off] params_2 = params[cut_off:] self.bias_hidden = params_1[:self.hidden_size] self.weight_hidden = params_1[self.hidden_size:].reshape(self.input_size, self.hidden_size) self.bias_output = params_2[:3] self.weight_output = params_2[3:].reshape(self.hidden_size, 3) else: self.bias = np.array(model_params[:3]) self.weight = np.array(model_params[3:]).reshape(self.input_size, 3) def load_model(self, filename): with open(filename) as f: data = json.load(f) print('loading file %s' % (filename)) self.data = data model_params = np.array(data[0]) # assuming other stuff is in data self.set_model_params(model_params) def get_random_model_params(self, stdev=0.1): return np.random.randn(self.param_count)*stdev
output_dir = "vae_test_result" z_size=32 filelist = os.listdir(DATA_DIR) filelist = [f for f in filelist if '.npz' in f] obs = np.load(os.path.join(DATA_DIR, random.choice(filelist)))["obs"] obs = np.expand_dims(obs, axis=-1) obs = obs.astype(np.float32)/255.0 n = len(obs) vae = ConvVAE(z_size=z_size, batch_size=1, is_training=False, reuse=False, gpu_mode=False) vae.load_json(os.path.join(model_path_name, 'vae.json')) if not os.path.exists(output_dir): os.mkdir(output_dir) print(n, "images loaded") for i in range(n): frame = obs[i].reshape(1, 64, 64, 1) batch_z = vae.encode(frame) reconstruct = vae.decode(batch_z) imsave(output_dir+'/%s.png' % pad_num(i), 255.*frame[0].reshape(64, 64)) imsave(output_dir+'/%s_vae.png' % pad_num(i), 255.*reconstruct[0].reshape(64, 64))
def sample_vae2(args): """ For vae from https://github.com/hardmaru/WorldModelsExperiments.git """ z_size = 64 # This needs to match the size of the trained vae batch_size = args.count learning_rate = 0.0001 kl_tolerance = 0.5 model_path_name = "tf_vae" reset_graph() vae = ConvVAE( z_size=z_size, batch_size=batch_size, learning_rate=learning_rate, kl_tolerance=kl_tolerance, is_training=False, reuse=False, gpu_mode=False) # use GPU on batchsize of 1000 -> much faster vae.load_json(os.path.join(model_path_name, 'vae.json')) z = np.random.normal(size=(args.count, z_size)) samples = vae.decode(z) input_dim = samples.shape[1:] gen = DriveDataGenerator(args.dirs, image_size=(64, 64), batch_size=args.count, shuffle=True, max_load=10000, images_only=True) orig = gen[0].astype(np.float) / 255.0 #mu, logvar = vae.encode_mu_logvar(orig) #recon = vae.decode( mu ) recon = vae.decode(vae.encode(orig)) n = args.count plt.figure(figsize=(20, 6), tight_layout=False) plt.title('VAE samples') for i in range(n): ax = plt.subplot(3, n, i + 1) plt.imshow(samples[i].reshape(input_dim[0], input_dim[1], input_dim[2])) ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) if 0 == i: ax.set_title("Random") for i in range(n): ax = plt.subplot(3, n, n + i + 1) plt.imshow(orig[i].reshape(input_dim[0], input_dim[1], input_dim[2])) ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) if 0 == i: ax.set_title("Real") ax = plt.subplot(3, n, (2 * n) + i + 1) plt.imshow(recon[i].reshape(input_dim[0], input_dim[1], input_dim[2])) ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) if 0 == i: ax.set_title("Reconstructed") plt.savefig("samples_vae.png") plt.show()
class Model: ''' simple one layer model for car racing ''' def __init__(self, arglist): self.env_name = arglist.game self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) self.vae.load_json(arglist.vae_file) self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) self.rnn.load_json(arglist.rnn_file) self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.z_size = 32 if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, 2) self.bias_output = np.random.randn(2) self.param_count = ((self.input_size + 1) * self.hidden_size) + (self.hidden_size * 2 + 2) else: self.weight = np.random.randn(self.input_size, 2) self.bias = np.random.randn(2) self.param_count = (self.input_size) * 2 + 2 self.render_mode = False def make_env(self, seed=-1, render_mode=False): self.render_mode = render_mode self.env = make_env(self.env_name, seed=seed, render_mode=render_mode) def reset(self): self.state = rnn_init_state(self.rnn) def encode_obs(self, obs): # convert raw obs to z, mu, logvar result = np.copy(obs).astype(np.float) / 255.0 result = result.reshape(1, 64, 64, 3) mu, logvar = self.vae.encode_mu_logvar(result) mu = mu[0] logvar = logvar[0] s = logvar.shape z = mu + np.exp(logvar / 2.0) * np.random.randn(*s) return z, mu, logvar def decode_obs(self, z): # decode the latent vector img = self.vae.decode(z.reshape(1, self.z_size)) * 255. img = np.round(img).astype(np.uint8) img = img.reshape(64, 64, 3) return img def get_action(self, z, arglist): h = rnn_output(self.state, z, EXP_MODE) ''' action = np.dot(h, self.weight) + self.bias action[0] = np.tanh(action[0]) action[1] = sigmoid(action[1]) action[2] = clip(np.tanh(action[2])) ''' if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer h = np.tanh(np.dot(h, self.weight_hidden) + self.bias_hidden) action = np.tanh(np.dot(h, self.weight_output) + self.bias_output) else: action = np.tanh(np.dot(h, self.weight) + self.bias) if arglist.competitive: obs, rewards, done, win = self.env.step([action[0], 'script']) else: obs, rewards, done, win = self.env.step(action) extra_reward = 0.0 # penalize for turning too frequently if arglist.competitive: if arglist.train_mode and penalize_turning: extra_reward -= np.abs(action[0]) / 10.0 rewards[0] += extra_reward reward = rewards[0] else: if arglist.train_mode and penalize_turning: reward = np.sum(rewards) extra_reward -= np.abs(action[0]) / 10.0 reward += extra_reward # recording_reward.append(reward) # total_reward += reward self.state = rnn_next_state(self.rnn, z, action, self.state) return action def set_model_params(self, model_params): if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer params = np.array(model_params) cut_off = (self.input_size + 1) * self.hidden_size params_1 = params[:cut_off] params_2 = params[cut_off:] self.bias_hidden = params_1[:self.hidden_size] self.weight_hidden = params_1[self.hidden_size:].reshape( self.input_size, self.hidden_size) self.bias_output = params_2[:2] self.weight_output = params_2[2:].reshape(self.hidden_size, 2) else: self.bias = np.array(model_params[:2]) self.weight = np.array(model_params[2:]).reshape( self.input_size, 2) def load_model(self, filename): with open(filename) as f: data = json.load(f) print('loading file %s' % (filename)) self.data = data model_params = np.array(data[0]) # assuming other stuff is in data self.set_model_params(model_params) def get_random_model_params(self, stdev=0.1): return np.random.randn(self.param_count) * stdev
class VAERacingStack(CarRacing): def __init__(self, full_episode=False, discrete_mode=False): super(VAERacingStack, self).__init__() self._internal_counter = 0 self.z_size = games['vae_racing_stack'].input_size self.vae = ConvVAE(batch_size=1, z_size=self.z_size, num_channel=FRAME_STACK, gpu_mode=False, is_training=False, reuse=True) self.vae.load_json('vae/vae_stack_' + str(FRAME_STACK) + '.json') self.full_episode = full_episode high = np.array([np.inf] * self.z_size) self.observation_space = Box(-high, high) self.cumulative_frames = None self._has_rendered = False self.discrete_mode = discrete_mode def _get_image(self, z, cumulative_frames): large_img = np.zeros((64 * 2, 64 * FRAME_STACK)) # decode the latent vector if z is not None: img = self.vae.decode(z.reshape(1, self.z_size)) * 255.0 img = np.round(img).astype(np.uint8) img = img.reshape(64, 64, FRAME_STACK) for i in range(FRAME_STACK): large_img[64:, i * 64:(i + 1) * 64] = img[:, :, i] if len(cumulative_frames) == FRAME_STACK: for i in range(FRAME_STACK): large_img[:64, i * 64:(i + 1) * 64] = cumulative_frames[i] large_img = large_img.astype(np.uint8) return large_img def _reset(self): self._internal_counter = 0 self.cumulative_frames = None self._has_rendered = False return super(VAERacingStack, self)._reset() def _render(self, mode='human', close=False): if mode == 'human' or mode == 'rgb_array': self._has_rendered = True return super(VAERacingStack, self)._render(mode=mode, close=close) def _step(self, action): if not self._has_rendered: self._render("rgb_array") self._has_rendered = False if action is not None: if not self.discrete_mode: action[0] = _clip(action[0], lo=-1.0, hi=+1.0) action[1] = _clip(action[1], lo=-1.0, hi=+1.0) action[1] = (action[1] + 1.0) / 2.0 action[2] = _clip(action[2]) else: ''' in discrete setting: if action[0] is the highest, then agent does nothing if action[1] is the highest, then agent hits the pedal if -action[1] is the highest, then agent hits the brakes if action[2] is the highest, then agent turns left if action[3] is the highest, then agent turns right ''' logits = [ _clip((action[0] + 1.0), hi=+2.0), _clip(action[1]), _clip(-action[1]), _clip(action[2]), _clip(-action[2]) ] probs = softmax(logits) #chosen_action = np.argmax(logits) chosen_action = sample(probs) a = np.array([0.0, 0.0, 0.0]) if chosen_action == 1: a[1] = +1.0 # up if chosen_action == 2: a[2] = +0.8 # down: 0.8 as recommended by the environment's built-in demo if chosen_action == 3: a[0] = -1.0 # left if chosen_action == 4: a[0] = +1.0 # right action = a #print("chosen_action", chosen_action, action) obs, reward, done, _ = super(VAERacingStack, self)._step(action) if self.cumulative_frames is not None: self.cumulative_frames.pop(0) self.cumulative_frames.append(_process_frame_green(obs)) else: self.cumulative_frames = [_process_frame_green(obs)] * FRAME_STACK self.z = z = _compress_frames(self.cumulative_frames, self.vae) if self.full_episode: return z, reward, False, {} self._internal_counter += 1 if self._internal_counter > TIME_LIMIT: done = True #img = self._get_image(self.z, self.cumulative_frames) #imageio.imwrite('dump/'+('%0*d' % (4, self._internal_counter))+'.png', img) return z, reward, done, {}
for i in range(steps): ob = obs[i:i+1] # (1, 64, 64, 1) action = oh_actions[i:i+1] # (1, n) z = vae.encode(ob) # (1, 32) VAE done! rnn_z = np.expand_dims(z, axis=0) # (1, 1, 32) action = np.expand_dims(action, axis=0) # (1, 1, n) input_x = np.concatenate([rnn_z, action], axis=2) # (1, 1, 32+n) feed = {rnn.input_x: input_x, rnn.initial_state: state} # predict the next state and next z. if pz is not None: # decode from the z frame = vae.decode(pz[None]) frame2 = vae.decode(z) #neglogp = neg_likelihood(logmix, mean, logstd, z.reshape(32,1)) #imsave(output_dir + '/%s_origin_%.2f.png' % (pad_num(i), np.exp(-neglogp)), 255.*ob.reshape(64, 64)) #imsave(output_dir + '/%s_reconstruct.png' % pad_num(i), 255. * frame[0].reshape(64, 64)) img = concat_img(255.*ob, 255*frame2, 255.*frame) imsave(output_dir + '/%s.png' % pad_num(i), img) (logmix, mean, logstd, state) = rnn.sess.run([rnn.out_logmix, rnn.out_mean, rnn.out_logstd, rnn.final_state], feed) # Sample the next frame's state. pz = sample_z(logmix, mean, logstd, OUTWIDTH, T)