def __init__(self, arglist): self.env_name = arglist.game self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) self.vae.load_json(arglist.vae_file) self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) self.rnn.load_json(arglist.rnn_file) self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.z_size = 32 if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, 2) self.bias_output = np.random.randn(2) self.param_count = ((self.input_size + 1) * self.hidden_size) + (self.hidden_size * 2 + 2) else: self.weight = np.random.randn(self.input_size, 2) self.bias = np.random.randn(2) self.param_count = (self.input_size) * 2 + 2 self.render_mode = False
def __init__(self): self.env_name = "carracing" self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) self.vae.load_json('vae/vae.json') self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) self.rnn.load_json('rnn/rnn.json') self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.z_size = 32 if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, 3) self.bias_output = np.random.randn(3) self.param_count = ((self.input_size+1)*self.hidden_size) + (self.hidden_size*3+3) else: self.weight = np.random.randn(self.input_size, 3) self.bias = np.random.randn(3) self.param_count = (self.input_size)*3+3 self.render_mode = False
def __init__(self, load_model=True): # For Mac # self.env_name = "/Users/intuinno/codegit/pushBlock/app/mac/VisualPushBlockContinuous" # For linux self.env_name = "/home/intuinno/codegit/pushblock/app/linux/pushblock.x86_64" self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) if load_model: self.vae.load_json('vae/vae.json') self.rnn.load_json('rnn/rnn.json') self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.z_size = 32 if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, 3) self.bias_output = np.random.randn(3) self.param_count = ((self.input_size+1)*self.hidden_size) + (self.hidden_size*3+3) else: self.weight = np.random.randn(self.input_size, 3) self.bias = np.random.randn(3) self.param_count = (self.input_size)*3+3 self.render_mode = False
def __init__(self, load_model=True): self.env_name = './VisualPushBlock_withBlock_z_info.x86_64' #'./VisualPushBlock.x86_64' self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) if load_model: self.vae.load_json('vae/vae.json') self.rnn.load_json('rnn/rnn.json') self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.z_size = z_size if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer ###CHANGE is made here self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, ACTION_SIZE) self.bias_output = np.random.randn(ACTION_SIZE) self.param_count = ((self.input_size + 1) * self.hidden_size) + ( self.hidden_size * ACTION_SIZE + ACTION_SIZE) else: self.weight = np.random.randn(self.input_size, ACTION_SIZE) self.bias = np.random.randn(ACTION_SIZE) self.param_count = (self.input_size) * ACTION_SIZE + ACTION_SIZE self.render_mode = False
def __init__(self, arglist, action_space, scope, load_model=True): self.action_space = action_space self.arglist = arglist self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) hps_sample = hps_model._replace( batch_size=1, input_seq_width=32 + arglist.action_space + (arglist.agent_num - 1) * arglist.action_space * arglist.timestep, max_seq_len=1, use_recurrent_dropout=0, is_training=0) self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) if load_model: self.vae.load_json(arglist.vae_model_dir) self.rnn.load_json(arglist.rnn_model_dir) self.state = rnn_init_state(self.rnn) self.rnn_mode = True if arglist.inference: self.input_size = rnn_output_size( EXP_MODE) + (arglist.agent_num - 1) * arglist.action_space else: self.input_size = rnn_output_size(EXP_MODE) self.z_size = 32 # action trajectories recording self.act_traj = [ collections.deque(np.zeros( (arglist.timestep, arglist.action_space)), maxlen=arglist.timestep) ] * (arglist.agent_num - 1) self.oppo_model = Oppo_Model(arglist.agent_num, arglist.timestep, arglist.action_space, arglist.action_space, "oppo_model_{}".format(scope)) self.inference = arglist.inference if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, self.action_space) self.bias_output = np.random.randn(self.action_space) self.param_count = ((self.input_size + 1) * self.hidden_size) + ( self.hidden_size * self.action_space + self.action_space) else: self.weight = np.random.randn(self.input_size, self.action_space) self.bias = np.random.randn(self.action_space) self.param_count = ( self.input_size) * self.action_space + self.action_space
class ModelMCTS(Model): def __init__(self, load_model=True): self.env_name = "carracing" self.env = make_env(self.env_name, seed=SEED, render_mode=render_mode, full_episode=False) self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) if load_model: self.vae.load_json('../vae/vae.json') self.rnn.load_json('../rnn/rnn.json') self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.z_size = 32 if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, 3) self.bias_output = np.random.randn(3) self.param_count = ((self.input_size + 1) * self.hidden_size) + (self.hidden_size * 3 + 3) else: self.weight = np.random.randn(self.input_size, 3) self.bias = np.random.randn(3) self.param_count = (self.input_size) * 3 + 3 self.render_mode = False self.mct = None def get_action(self, z): a = random_linear_sample(-1, 1) b = random_linear_sample(0, 1) c = random_linear_sample(0, 1) actions = dp(a, b, c) action, self.mct = mcts.mcts(z, self.env, actions, old_tree=self.mct, tree_depth=6, simulate_depth=200) self.state = rnn_next_state(self.rnn, z, action, self.state) return action
def __init__(self, full_episode=False): super(VAERacing, self).__init__() self._internal_counter = 0 self.z_size = games['vae_racing'].input_size self.vae = ConvVAE(batch_size=1, z_size=self.z_size, gpu_mode=False, is_training=False, reuse=True) self.vae.load_json('vae/vae_'+str(self.z_size)+'.json') self.full_episode = full_episode high = np.array([np.inf] * self.z_size) self.observation_space = Box(-high, high) self._has_rendered = False self.real_frame = None
def __init__(self, env, batchsize=64, input_size=(64,64), num_frame_stack=4, gamma=0.95, frame_skip=1, train_freq=4, initial_epsilon=1.0, min_epsilon=0.1, render=True, epsilon_decay_steps=int(1e6), min_experience_size=int(1e3), experience_capacity=int(1e5), network_update_freq=5000, regularization=1e-6, optimizer_params=None, action_map=None ): self.vae = ConvVAE(batch_size=batchsize, gpu_mode=False, is_training=False, reuse=True) self.vae.load_json('vae/vae.json') if action_map is not None: self.dim_actions = len(action_map) else: self.dim_actions = env.action_space.n self.network_update_freq = network_update_freq self.action_map = action_map self.env = env self.batchsize = batchsize self.num_frame_stack = num_frame_stack self.gamma = gamma self.frame_skip = frame_skip self.train_freq = train_freq self.initial_epsilon = initial_epsilon self.min_epsilon = min_epsilon self.epsilon_decay_steps = epsilon_decay_steps self.render = render self.min_experience_size = min_experience_size self.input_size = input_size self.regularization = regularization self.optimizer_params = optimizer_params or dict(learning_rate=0.0004, epsilon=1e-7) self.do_training = True self.playing_epsilon = 0.0 self.session = None self.state_size = (self.num_frame_stack,) + self.input_size self.global_counter = 0 self.episode_counter =0
def __init__(self, full_episode=False, discrete_mode=False): super(VAERacingStack, self).__init__() self._internal_counter = 0 self.z_size = games['vae_racing_stack'].input_size self.vae = ConvVAE(batch_size=1, z_size=self.z_size, num_channel=FRAME_STACK, gpu_mode=False, is_training=False, reuse=True) self.vae.load_json('vae/vae_stack_' + str(FRAME_STACK) + '.json') self.full_episode = full_episode high = np.array([np.inf] * self.z_size) self.observation_space = Box(-high, high) self.cumulative_frames = None self._has_rendered = False self.discrete_mode = discrete_mode
def __init__(self, model_name='', load_model=True, load_full_model=False, full_model_path=''): self.model_name = model_name self.env_name = "carracing" self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) if load_full_model: self.vae.load_json(os.path.join(full_model_path, 'vae.json')) self.rnn.load_json(os.path.join(full_model_path, 'rnn.json')) elif load_model: self.vae.load_json( os.path.join(vae_path, self.model_name + '_vae.json')) self.rnn.load_json( os.path.join(rnn_path, self.model_name + '_rnn.json')) self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.z_size = 32 if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, 3) self.bias_output = np.random.randn(3) self.param_count = ((self.input_size + 1) * self.hidden_size) + (self.hidden_size * 3 + 3) else: self.weight = np.random.randn(self.input_size, 3) self.bias = np.random.randn(3) self.param_count = (self.input_size) * 3 + 3 self.render_mode = False
def __init__(self, type="CarRacing", history_pick=4, seed=None, detect_edges=False, detect_grass=False, flip=False): self.name = type + str(time.time()) random.seed(30) self.env = make_env('CarRacing-v0', random.randint(1,10000000), render_mode = False, full_episode = True) self.image_dimension = [64,64] self.history_pick = history_pick self.state_space_size = history_pick * np.prod(self.image_dimension) self.action_space_size = 5 self.state_shape = [None, self.history_pick] + list(self.image_dimension) self.history = [] self.action_dict = {0: [-1, 0, 0], 1: [1, 0, 0], 2: [0, 1, 0], 3: [0, 0, 0.8], 4: [0, 0, 0]} self.seed = seed self.detect_edges = detect_edges self.detect_grass = detect_grass self.flip = flip self.flip_episode = False self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) self.vae.load_json('vae/vae.json') self.rnn.load_json('rnn/rnn.json')
def __init__(self, sess=None, summary_writer=tf.summary.FileWriter("logs/"), rl_training=False, reuse=False, cluster=None, index=0, device='/gpu:0', ppo_load_path=None, ppo_save_path=None, load_worldmodel=True, ntype='worldmodel'): self.policy_model_path_load = ppo_load_path + ntype self.policy_model_path_save = ppo_save_path + ntype self.rl_training = rl_training self.use_norm = True self.reuse = reuse self.sess = sess self.cluster = cluster self.index = index self.device = device self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) if load_worldmodel: self.vae.load_json('vae/vae.json') self.rnn.load_json('rnn/rnn.json') self.input_size = rnn_output_size(EXP_MODE) self._create_graph() self.rl_saver = tf.train.Saver() self.summary_writer = summary_writer
class CarRacingMDNRNN(CarRacingWrapper): def __init__(self, load_model=True, full_episode=False): super(CarRacingMDNRNN, self).__init__(full_episode=full_episode) self.vae = CVAE(batch_size=1) self.rnn = MDNRNN(hps_sample) if load_model: self.vae.load_json('tf_vae/vae.json') self.rnn.load_json('tf_rnn/rnn.json') self.rnn_states = rnn_init_state(self.rnn) self.full_episode = False self.observation_space = Box(low=np.NINF, high=np.Inf, shape=(32+256)) def encode_obs(self, obs): # convert raw obs to z, mu, logvar result = np.copy(obs).astype(np.float)/255.0 result = result.reshape(1, 64, 64, 3) mu, logvar = self.vae.encode_mu_logvar(result) mu = mu[0] logvar = logvar[0] s = logvar.shape z = mu + np.exp(logvar/2.0) * np.random.randn(*s) return z, mu, logvar def reset(self): self.rnn_states = rnn_init_state(self.rnn) z_h = super(CarRacingWrapper, self).reset() # calls step return z_h def _step(self, action): obs, reward, done, _ = super(CarRacingMDNRNN, self)._step(action) z, _, _ = self.encode_obs(obs) h = tf.squeeze(self.rnn_states[0]) z_h = tf.concat([z, h], axis=-1) if action is not None: # don't compute state on reset self.rnn_states = rnn_next_state(self.rnn, z, action, self.rnn_states) return z_h, reward, done, {}
def __init__(self, load_model=True): self.env_name = "Pong" self._make_env() self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) hps_sample_dynamic = hps_sample._replace(num_actions=self.num_actions) self.rnn = MDNRNN(hps_sample_dynamic, gpu_mode=False, reuse=True) if load_model: self.vae.load_json('vae/vae.json') self.rnn.load_json('rnn/rnn.json') self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.z_size = 32 if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer raise Exception("not ported for atari") self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, self.num_actions) self.bias_output = np.random.randn(self.num_actions) self.param_count = ((self.input_size + 1) * self.hidden_size) + ( (self.hidden_size + 1) * self.num_actions) else: # TODO: Not known until env.action_space is queried... self.weight = np.random.randn(self.input_size, self.num_actions) self.bias = np.random.randn(self.num_actions) self.param_count = (self.input_size + 1) * self.num_actions self.render_mode = False
def __init__(self, load_model=True, full_episode=False): super(CarRacingMDNRNN, self).__init__(full_episode=full_episode) self.vae = CVAE(batch_size=1) self.rnn = MDNRNN(hps_sample) if load_model: self.vae.load_json('tf_vae/vae.json') self.rnn.load_json('tf_rnn/rnn.json') self.rnn_states = rnn_init_state(self.rnn) self.full_episode = False self.observation_space = Box(low=np.NINF, high=np.Inf, shape=(32+256))
def __init__(self, load_model=True, env_name="Pong-v0", render_mode=False): self.env_name = env_name self.make_env() self.z_size = 32 self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) hps_atari = hps_sample._replace(input_seq_width=self.z_size + self.na) self.rnn = MDNRNN(hps_atari, gpu_mode=False, reuse=True) if load_model: self.vae.load_json('vae/vae.json') self.rnn.load_json('rnn/rnn.json') self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.init_controller() self.render_mode = False
def sample_vae2(args): """ For vae from https://github.com/hardmaru/WorldModelsExperiments.git """ z_size = 32 batch_size = args.count learning_rate = 0.0001 kl_tolerance = 0.5 model_path_name = "tf_vae" reset_graph() vae = ConvVAE( z_size=z_size, batch_size=batch_size, learning_rate=learning_rate, kl_tolerance=kl_tolerance, is_training=False, reuse=False, gpu_mode=False) # use GPU on batchsize of 1000 -> much faster vae.load_json(os.path.join(model_path_name, 'vae.json')) z = np.random.normal(size=(args.count, z_size)) samples = vae.decode(z) input_dim = samples.shape[1:] n = args.count plt.figure(figsize=(20, 4)) plt.title('VAE samples') for i in range(n): ax = plt.subplot(2, n, i + 1) plt.imshow(samples[i].reshape(input_dim[0], input_dim[1], input_dim[2])) ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) #plt.savefig( image_path ) plt.show()
def __init__(self, full_episode=False, pure_world=False): super(VAERacingWorld, self).__init__() self._internal_counter = 0 self.z_size = games['vae_racing'].input_size self.vae = ConvVAE(batch_size=1, z_size=self.z_size, gpu_mode=False, is_training=False, reuse=True) self.vae.load_json('vae/vae_' + str(self.z_size) + '.json') self.full_episode = full_episode if pure_world: high = np.array([np.inf] * 10) else: high = np.array([np.inf] * (self.z_size + 10)) self.observation_space = Box(-high, high) self._has_rendered = False self.real_frame = None self.world_model = SimpleWorldModel(obs_size=16, action_size=3, hidden_size=10) world_model_path = "./log/learn_vae_racing.cma.4.64.best.json" self.world_model.load_model(world_model_path) self.pure_world_mode = pure_world
class Model: ''' simple one layer model for car racing ''' def __init__(self): self.env_name = "carracing" self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) self.vae.load_json('vae/vae.json') self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) self.rnn.load_json('rnn/rnn.json') self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.z_size = 32 if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, 3) self.bias_output = np.random.randn(3) self.param_count = ((self.input_size+1)*self.hidden_size) + (self.hidden_size*3+3) else: self.weight = np.random.randn(self.input_size, 3) self.bias = np.random.randn(3) self.param_count = (self.input_size)*3+3 self.render_mode = False def make_env(self, seed=-1, render_mode=False): self.render_mode = render_mode self.env = make_env(self.env_name, seed=seed, render_mode=render_mode) def reset(self): self.state = rnn_init_state(self.rnn) def encode_obs(self, obs): # convert raw obs to z, mu, logvar result = np.copy(obs).astype(np.float)/255.0 result = result.reshape(1, 64, 64, 3) mu, logvar = self.vae.encode_mu_logvar(result) mu = mu[0] logvar = logvar[0] s = logvar.shape z = mu + np.exp(logvar/2.0) * np.random.randn(*s) return z, mu, logvar def decode_obs(self, z): # decode the latent vector img = self.vae.decode(z.reshape(1, self.z_size)) * 255. img = np.round(img).astype(np.uint8) img = img.reshape(64, 64, 3) return img def get_action(self, z): h = rnn_output(self.state, z, EXP_MODE) ''' action = np.dot(h, self.weight) + self.bias action[0] = np.tanh(action[0]) action[1] = sigmoid(action[1]) action[2] = clip(np.tanh(action[2])) ''' if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer h = np.tanh(np.dot(h, self.weight_hidden) + self.bias_hidden) action = np.tanh(np.dot(h, self.weight_output) + self.bias_output) else: action = np.tanh(np.dot(h, self.weight) + self.bias) action[1] = (action[1]+1.0) / 2.0 action[2] = clip(action[2]) self.state = rnn_next_state(self.rnn, z, action, self.state) return action def set_model_params(self, model_params): if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer params = np.array(model_params) cut_off = (self.input_size+1)*self.hidden_size params_1 = params[:cut_off] params_2 = params[cut_off:] self.bias_hidden = params_1[:self.hidden_size] self.weight_hidden = params_1[self.hidden_size:].reshape(self.input_size, self.hidden_size) self.bias_output = params_2[:3] self.weight_output = params_2[3:].reshape(self.hidden_size, 3) else: self.bias = np.array(model_params[:3]) self.weight = np.array(model_params[3:]).reshape(self.input_size, 3) def load_model(self, filename): with open(filename) as f: data = json.load(f) print('loading file %s' % (filename)) self.data = data model_params = np.array(data[0]) # assuming other stuff is in data self.set_model_params(model_params) def get_random_model_params(self, stdev=0.1): return np.random.randn(self.param_count)*stdev
if not os.path.exists(arglist.series_dir): os.makedirs(arglist.series_dir) filelist = os.listdir(arglist.data_dir) filelist.sort() filelist = filelist[0:10000] dataset, action_dataset, oppo_action_dataset = load_raw_data_list( filelist, arglist) reset_graph() if arglist.use_vae: vae = ConvVAE( z_size=arglist.z_size, batch_size=arglist.batch_size, learning_rate=arglist.lr, kl_tolerance=arglist.kl_tolerance, is_training=False, reuse=False, gpu_mode=True) # use GPU on batchsize of 1000 -> much faster vae.load_json(os.path.join(arglist.vae_path, 'vae.json')) mu_dataset = [] logvar_dataset = [] action_dataset_real = [] oppo_action_dataset_real = [] for i in range(len(dataset)): data_batch = dataset[i] if len(data_batch) <= arglist.batch_size: continue else:
if arglist.use_image: dataset = create_dataset_with_image(filelist, arglist.data_dir) else: dataset = create_dataset(filelist, arglist) # split into batches: total_length = len(dataset) num_batches = int(np.floor(total_length/arglist.batch_size)) print("num_batches", num_batches) reset_graph() vae = ConvVAE(z_size=arglist.z_size, batch_size=arglist.batch_size, learning_rate=arglist.lr, kl_tolerance=arglist.kl_tolerance, is_training=True, reuse=False, gpu_mode=True) # train loop: print("train", "step", "loss", "recon_loss", "kl_loss") for epoch in range(arglist.epoch): np.random.shuffle(dataset) for idx in range(num_batches): batch = dataset[idx*arglist.batch_size:(idx+1)*arglist.batch_size] obs = np.array(batch).astype(np.float)/255.0 feed = {vae.x: obs,}
filelist.sort() filelist = filelist[0:10000] #print("check total number of images:", count_length_of_filelist(filelist)) dataset = create_dataset(filelist) # split into batches: total_length = len(dataset) num_batches = int(np.floor(total_length/batch_size)) print("num_batches", num_batches) reset_graph() vae = ConvVAE(z_size=z_size, batch_size=batch_size, learning_rate=learning_rate, kl_tolerance=kl_tolerance, is_training=True, reuse=False, gpu_mode=True) # train loop: print("train", "step", "loss", "recon_loss", "kl_loss") for epoch in range(NUM_EPOCH): np.random.shuffle(dataset) for idx in range(num_batches): batch = dataset[idx*batch_size:(idx+1)*batch_size] obs = batch.astype(np.float) feed = {vae.x: obs,}
batch_size = 1000 # treat every episode as a batch of 1000! learning_rate = 0.0001 kl_tolerance = 0.5 filelist = os.listdir(DATA_DIR) filelist.sort() filelist = filelist[0:1000] dataset, action_dataset = load_raw_data_list(filelist) reset_graph() vae = ConvVAE(z_size=z_size, batch_size=batch_size, learning_rate=learning_rate, kl_tolerance=kl_tolerance, is_training=False, reuse=False, gpu_mode=True) # use GPU on batchsize of 1000 -> much faster vae.load_json(os.path.join(model_path_name, 'vae.json')) mu_dataset = [] logvar_dataset = [] for i in range(len(dataset)): data_batch = dataset[i] mu, logvar, z = encode_batch(data_batch) mu_dataset.append(mu.astype(np.float16)) logvar_dataset.append(logvar.astype(np.float16)) if ((i + 1) % 100 == 0): print(i + 1)
filelist.sort() filelist = filelist[:NUM_DATA] #print("check total number of images:", count_length_of_filelist(filelist)) dataset = create_dataset(filelist, N=NUM_DATA, M=NUM_FRAMES) # split into batches: total_length = len(dataset) num_batches = int(np.floor(total_length / batch_size)) print("num_batches", num_batches) reset_graph() vae = ConvVAE(z_size=z_size, batch_size=batch_size, learning_rate=learning_rate, kl_tolerance=kl_tolerance, beta=beta, is_training=True, reuse=False, gpu_mode=True) print('Training beta-VAE with beta={:.1f}'.format(beta)) # train loop: print("train", "step", "loss", "recon_loss", "kl_loss") for epoch in range(NUM_EPOCH): np.random.shuffle(dataset) for idx in range(num_batches): batch = dataset[idx * batch_size:(idx + 1) * batch_size] obs = batch.astype(np.float) / 255.0 feed = {
def main( dirs, z_size=32, batch_size=100, learning_rate=0.0001, kl_tolerance=0.5, epochs=100, save_model=False, verbose=True, optimizer="Adam" ): if save_model: model_save_path = "tf_vae" if not os.path.exists(model_save_path): os.makedirs(model_save_path) gen = DriveDataGenerator(dirs, image_size=(64,64), batch_size=batch_size, shuffle=True, max_load=10000, images_only=True ) num_batches = len(gen) reset_graph() vae = ConvVAE(z_size=z_size, batch_size=batch_size, learning_rate=learning_rate, kl_tolerance=kl_tolerance, is_training=True, reuse=False, gpu_mode=True, optimizer=optimizer) early = EarlyStopping(monitor='loss', min_delta=0.1, patience=5, verbose=verbose, mode='auto') early.set_model(vae) early.on_train_begin() best_loss = sys.maxsize if verbose: print("epoch\tstep\tloss\trecon_loss\tkl_loss") for epoch in range(epochs): for idx in range(num_batches): batch = gen[idx] obs = batch.astype(np.float)/255.0 feed = {vae.x: obs,} (train_loss, r_loss, kl_loss, train_step, _) = vae.sess.run([ vae.loss, vae.r_loss, vae.kl_loss, vae.global_step, vae.train_op ], feed) if train_loss < best_loss: best_loss = train_loss if save_model: if ((train_step+1) % 5000 == 0): vae.save_json("tf_vae/vae.json") if verbose: print("{} of {}\t{}\t{:.2f}\t{:.2f}\t{:.2f}".format( epoch, epochs, (train_step+1), train_loss, r_loss, kl_loss) ) gen.on_epoch_end() early.on_epoch_end(epoch, logs={"loss": train_loss}) if vae.stop_training: break early.on_train_end() # finished, final model: if save_model: vae.save_json("tf_vae/vae.json") return best_loss
def sample_vae2(args): """ For vae from https://github.com/hardmaru/WorldModelsExperiments.git """ z_size = 64 # This needs to match the size of the trained vae batch_size = args.count learning_rate = 0.0001 kl_tolerance = 0.5 model_path_name = "tf_vae" reset_graph() vae = ConvVAE( z_size=z_size, batch_size=batch_size, learning_rate=learning_rate, kl_tolerance=kl_tolerance, is_training=False, reuse=False, gpu_mode=False) # use GPU on batchsize of 1000 -> much faster vae.load_json(os.path.join(model_path_name, 'vae.json')) z = np.random.normal(size=(args.count, z_size)) samples = vae.decode(z) input_dim = samples.shape[1:] gen = DriveDataGenerator(args.dirs, image_size=(64, 64), batch_size=args.count, shuffle=True, max_load=10000, images_only=True) orig = gen[0].astype(np.float) / 255.0 #mu, logvar = vae.encode_mu_logvar(orig) #recon = vae.decode( mu ) recon = vae.decode(vae.encode(orig)) n = args.count plt.figure(figsize=(20, 6), tight_layout=False) plt.title('VAE samples') for i in range(n): ax = plt.subplot(3, n, i + 1) plt.imshow(samples[i].reshape(input_dim[0], input_dim[1], input_dim[2])) ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) if 0 == i: ax.set_title("Random") for i in range(n): ax = plt.subplot(3, n, n + i + 1) plt.imshow(orig[i].reshape(input_dim[0], input_dim[1], input_dim[2])) ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) if 0 == i: ax.set_title("Real") ax = plt.subplot(3, n, (2 * n) + i + 1) plt.imshow(recon[i].reshape(input_dim[0], input_dim[1], input_dim[2])) ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) if 0 == i: ax.set_title("Reconstructed") plt.savefig("samples_vae.png") plt.show()
class Model: ''' simple one layer model for translating game state to actions''' def __init__(self, load_model=True): self.env_name = "Pong" self._make_env() self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) hps_sample_dynamic = hps_sample._replace(num_actions=self.num_actions) self.rnn = MDNRNN(hps_sample_dynamic, gpu_mode=False, reuse=True) if load_model: self.vae.load_json('vae/vae.json') self.rnn.load_json('rnn/rnn.json') self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.z_size = 32 if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer raise Exception("not ported for atari") self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, self.num_actions) self.bias_output = np.random.randn(self.num_actions) self.param_count = ((self.input_size + 1) * self.hidden_size) + ( (self.hidden_size + 1) * self.num_actions) else: # TODO: Not known until env.action_space is queried... self.weight = np.random.randn(self.input_size, self.num_actions) self.bias = np.random.randn(self.num_actions) self.param_count = (self.input_size + 1) * self.num_actions self.render_mode = False def _make_env(self): self.render_mode = render_mode self.env = make_env(self.env_name) self.num_actions = self.env.action_space.n def make_env(self): pass #TODO (Chazzz): eventually remove def reset(self): self.state = rnn_init_state(self.rnn) def encode_obs(self, obs): # convert raw obs to z, mu, logvar result = np.copy(obs).astype(np.float) / 255.0 result = result.reshape(1, 64, 64, 1) mu, logvar = self.vae.encode_mu_logvar(result) mu = mu[0] logvar = logvar[0] s = logvar.shape z = mu + np.exp(logvar / 2.0) * np.random.randn(*s) return z, mu, logvar def get_action(self, z): h = rnn_output(self.state, z, EXP_MODE) # print(len(h), " h:", h) #TODO: 256+32 (the 32 comes first) # So we could have 288*2*18 params, or 288*2*environment.action_space.n (6 for Pong) ''' action = np.dot(h, self.weight) + self.bias action[0] = np.tanh(action[0]) action[1] = sigmoid(action[1]) action[2] = clip(np.tanh(action[2])) ''' if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer raise Exception("Not ported to atari") # h = np.tanh(np.dot(h, self.weight_hidden) + self.bias_hidden) # action = np.tanh(np.dot(h, self.weight_output) + self.bias_output) else: # could probabilistically sample from softmax, but greedy action = np.argmax(np.matmul(h, self.weight) + self.bias) # action[1] = (action[1]+1.0) / 2.0 # action[2] = clip(action[2]) # print("Action:", action) action_one_hot = np.zeros(self.num_actions) action_one_hot[action] = 1 # print("Action hot:", action_one_hot) self.state = rnn_next_state(self.rnn, z, action_one_hot, self.state) return action def set_model_params(self, model_params): if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer params = np.array(model_params) cut_off = (self.input_size + 1) * self.hidden_size params_1 = params[:cut_off] params_2 = params[cut_off:] self.bias_hidden = params_1[:self.hidden_size] self.weight_hidden = params_1[self.hidden_size:].reshape( self.input_size, self.hidden_size) self.bias_output = params_2[:self.num_actions] self.weight_output = params_2[self.num_actions:].reshape( self.hidden_size, self.num_actions) else: self.bias = np.array(model_params[:self.num_actions]) self.weight = np.array(model_params[self.num_actions:]).reshape( self.input_size, self.num_actions) def load_model(self, filename): with open(filename) as f: data = json.load(f) print('loading file %s' % (filename)) self.data = data model_params = np.array(data[0]) # assuming other stuff is in data self.set_model_params(model_params) def get_random_model_params(self, stdev=0.1): #return np.random.randn(self.param_count)*stdev return np.random.standard_cauchy( self.param_count) * stdev # spice things up def init_random_model_params(self, stdev=0.1): params = self.get_random_model_params(stdev=stdev) self.set_model_params(params) vae_params = self.vae.get_random_model_params(stdev=stdev) self.vae.set_model_params(vae_params) rnn_params = self.rnn.get_random_model_params(stdev=stdev) self.rnn.set_model_params(rnn_params)
class Model: ''' simple one layer model for car racing ''' def __init__(self, load_model=True): self.env_name = './VisualPushBlock_withBlock_z_info.x86_64' #'./VisualPushBlock.x86_64' self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) if load_model: self.vae.load_json('vae/vae.json') self.rnn.load_json('rnn/rnn.json') self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.z_size = z_size if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer ###CHANGE is made here self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, ACTION_SIZE) self.bias_output = np.random.randn(ACTION_SIZE) self.param_count = ((self.input_size + 1) * self.hidden_size) + ( self.hidden_size * ACTION_SIZE + ACTION_SIZE) else: self.weight = np.random.randn(self.input_size, ACTION_SIZE) self.bias = np.random.randn(ACTION_SIZE) self.param_count = (self.input_size) * ACTION_SIZE + ACTION_SIZE self.render_mode = False def make_env(self, seed=-1, render_mode=False, full_episode=False, worker_id=0): self.render_mode = render_mode self.env = make_env(self.env_name, seed=seed, render_mode=render_mode, full_episode=full_episode, worker_id=worker_id) def reset(self): self.state = rnn_init_state(self.rnn) def encode_obs(self, obs): # convert raw obs to z, mu, logvar #result = np.copy(obs).astype(np.float)/255.0 result = np.copy(obs).astype(np.float) result = result.reshape(1, IMAGE_W, IMAGE_H, 3) mu, logvar = self.vae.encode_mu_logvar(result) mu = mu[0] logvar = logvar[0] s = logvar.shape z = mu + np.exp(logvar / 2.0) * np.random.randn(*s) return z, mu, logvar def get_action(self, z): h = rnn_output(self.state, z, EXP_MODE) #print('h', h.shape, h) ''' action = np.dot(h, self.weight) + self.bias action[0] = np.tanh(action[0]) action[1] = sigmoid(action[1]) action[2] = clip(np.tanh(action[2])) ''' if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer h = np.tanh(np.dot(h, self.weight_hidden) + self.bias_hidden) action = np.tanh(np.dot(h, self.weight_output) + self.bias_output) else: '''print(h.shape) print(self.weight.shape) print(self.bias.shape)''' action = np.tanh(np.dot(h, self.weight) + self.bias) '''for i in range(ACTION_SIZE): action[i] = (action[i]+1.0) / 2.0 #all actions value are in range 0 to 1''' #action[2] = clip(action[2]) self.state = rnn_next_state(self.rnn, z, action, self.state) #update weights of MDN-RNN return action def set_model_params(self, model_params): if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer params = np.array(model_params) cut_off = (self.input_size + 1) * self.hidden_size params_1 = params[:cut_off] params_2 = params[cut_off:] self.bias_hidden = params_1[:self.hidden_size] self.weight_hidden = params_1[self.hidden_size:].reshape( self.input_size, self.hidden_size) self.bias_output = params_2[:ACTION_SIZE] self.weight_output = params_2[ACTION_SIZE:].reshape( self.hidden_size, ACTION_SIZE) else: self.bias = np.array(model_params[:ACTION_SIZE]) self.weight = np.array(model_params[ACTION_SIZE:]).reshape( self.input_size, ACTION_SIZE) def load_model(self, filename): with open(filename) as f: data = json.load(f) print('loading file %s' % (filename)) self.data = data model_params = np.array(data[0]) # assuming other stuff is in data self.set_model_params(model_params) def get_random_model_params(self, stdev=0.1): #return np.random.randn(self.param_count)*stdev return np.random.standard_cauchy( self.param_count) * stdev # spice things up def init_random_model_params(self, stdev=0.1): params = self.get_random_model_params(stdev=stdev) self.set_model_params(params) vae_params = self.vae.get_random_model_params(stdev=stdev) self.vae.set_model_params(vae_params) rnn_params = self.rnn.get_random_model_params(stdev=stdev) self.rnn.set_model_params(rnn_params)
class VAERacing(CarRacing): def __init__(self, full_episode=False): super(VAERacing, self).__init__() self._internal_counter = 0 self.z_size = games['vae_racing'].input_size #print("vae_racing.py z", self.z_size) self.vae = ConvVAE(batch_size=1, z_size=self.z_size, gpu_mode=True, is_training=False, reuse=True) #print("vae_racing.py vae", self.vae) self.vae.load_json('vae/vae_'+str(self.z_size)+'.json') self.full_episode = full_episode high = np.array([np.inf] * self.z_size) self.observation_space = Box(-high, high) self._has_rendered = False self.real_frame = None def reset(self): self._internal_counter = 0 self._has_rendered = False self.real_frame = None obs = super(VAERacing, self).reset() result = np.copy(_process_frame(obs)).astype(np.float)/255.0 result = result.reshape(1, 64, 64, 3) self.real_frame = result mu, logvar = self.vae.encode_mu_logvar(result) mu = mu[0] logvar = logvar[0] s = logvar.shape z = mu + np.exp(logvar/2.0) * np.random.randn(*s) if MU_MODE: return mu return z def render(self, mode='human', close=False): if mode == 'human' or mode == 'rgb_array': self._has_rendered = True return super(VAERacing, self).render(mode=mode) def step(self, action): #print("action", action) if not self._has_rendered: self.render("rgb_array") self._has_rendered = False if action is not None: action[0] = _clip(action[0], lo=-1.0, hi=+1.0) action[1] = _clip(action[1], lo=-1.0, hi=+1.0) action[1] = (action[1]+1.0) / 2.0 action[2] = _clip(action[2]) obs, reward, done, _ = super(VAERacing, self).step(action) result = np.copy(_process_frame(obs)).astype(np.float)/255.0 result = result.reshape(1, 64, 64, 3) self.real_frame = result #z = self.vae.encode(result).flatten() mu, logvar = self.vae.encode_mu_logvar(result) mu = mu[0] logvar = logvar[0] s = logvar.shape z = mu + np.exp(logvar/2.0) * np.random.randn(*s) if self.full_episode: if MU_MODE: return mu, reward, False, {} else: return z, reward, False, {} self._internal_counter += 1 if self._internal_counter > TIME_LIMIT: done = True if MU_MODE: #print("mu", mu) return mu, reward, done, {} return z, reward, done, {}
output_dir = "vae_test_result" z_size=32 filelist = os.listdir(DATA_DIR) filelist = [f for f in filelist if '.npz' in f] obs = np.load(os.path.join(DATA_DIR, random.choice(filelist)))["obs"] obs = np.expand_dims(obs, axis=-1) obs = obs.astype(np.float32)/255.0 n = len(obs) vae = ConvVAE(z_size=z_size, batch_size=1, is_training=False, reuse=False, gpu_mode=False) vae.load_json(os.path.join(model_path_name, 'vae.json')) if not os.path.exists(output_dir): os.mkdir(output_dir) print(n, "images loaded") for i in range(n): frame = obs[i].reshape(1, 64, 64, 1) batch_z = vae.encode(frame) reconstruct = vae.decode(batch_z) imsave(output_dir+'/%s.png' % pad_num(i), 255.*frame[0].reshape(64, 64)) imsave(output_dir+'/%s_vae.png' % pad_num(i), 255.*reconstruct[0].reshape(64, 64))