class ModelMCTS(Model): def __init__(self, load_model=True): self.env_name = "carracing" self.env = make_env(self.env_name, seed=SEED, render_mode=render_mode, full_episode=False) self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) if load_model: self.vae.load_json('../vae/vae.json') self.rnn.load_json('../rnn/rnn.json') self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.z_size = 32 if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, 3) self.bias_output = np.random.randn(3) self.param_count = ((self.input_size + 1) * self.hidden_size) + (self.hidden_size * 3 + 3) else: self.weight = np.random.randn(self.input_size, 3) self.bias = np.random.randn(3) self.param_count = (self.input_size) * 3 + 3 self.render_mode = False self.mct = None def get_action(self, z): a = random_linear_sample(-1, 1) b = random_linear_sample(0, 1) c = random_linear_sample(0, 1) actions = dp(a, b, c) action, self.mct = mcts.mcts(z, self.env, actions, old_tree=self.mct, tree_depth=6, simulate_depth=200) self.state = rnn_next_state(self.rnn, z, action, self.state) return action
class CarRacingMDNRNN(CarRacingWrapper): def __init__(self, load_model=True, full_episode=False): super(CarRacingMDNRNN, self).__init__(full_episode=full_episode) self.vae = CVAE(batch_size=1) self.rnn = MDNRNN(hps_sample) if load_model: self.vae.load_json('tf_vae/vae.json') self.rnn.load_json('tf_rnn/rnn.json') self.rnn_states = rnn_init_state(self.rnn) self.full_episode = False self.observation_space = Box(low=np.NINF, high=np.Inf, shape=(32+256)) def encode_obs(self, obs): # convert raw obs to z, mu, logvar result = np.copy(obs).astype(np.float)/255.0 result = result.reshape(1, 64, 64, 3) mu, logvar = self.vae.encode_mu_logvar(result) mu = mu[0] logvar = logvar[0] s = logvar.shape z = mu + np.exp(logvar/2.0) * np.random.randn(*s) return z, mu, logvar def reset(self): self.rnn_states = rnn_init_state(self.rnn) z_h = super(CarRacingWrapper, self).reset() # calls step return z_h def _step(self, action): obs, reward, done, _ = super(CarRacingMDNRNN, self)._step(action) z, _, _ = self.encode_obs(obs) h = tf.squeeze(self.rnn_states[0]) z_h = tf.concat([z, h], axis=-1) if action is not None: # don't compute state on reset self.rnn_states = rnn_next_state(self.rnn, z, action, self.rnn_states) return z_h, reward, done, {}
class Model: ''' simple one layer model for car racing ''' def __init__(self): self.env_name = "carracing" self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) self.vae.load_json('vae/vae.json') self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) self.rnn.load_json('rnn/rnn.json') self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.z_size = 32 if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, 3) self.bias_output = np.random.randn(3) self.param_count = ((self.input_size+1)*self.hidden_size) + (self.hidden_size*3+3) else: self.weight = np.random.randn(self.input_size, 3) self.bias = np.random.randn(3) self.param_count = (self.input_size)*3+3 self.render_mode = False def make_env(self, seed=-1, render_mode=False): self.render_mode = render_mode self.env = make_env(self.env_name, seed=seed, render_mode=render_mode) def reset(self): self.state = rnn_init_state(self.rnn) def encode_obs(self, obs): # convert raw obs to z, mu, logvar result = np.copy(obs).astype(np.float)/255.0 result = result.reshape(1, 64, 64, 3) mu, logvar = self.vae.encode_mu_logvar(result) mu = mu[0] logvar = logvar[0] s = logvar.shape z = mu + np.exp(logvar/2.0) * np.random.randn(*s) return z, mu, logvar def decode_obs(self, z): # decode the latent vector img = self.vae.decode(z.reshape(1, self.z_size)) * 255. img = np.round(img).astype(np.uint8) img = img.reshape(64, 64, 3) return img def get_action(self, z): h = rnn_output(self.state, z, EXP_MODE) ''' action = np.dot(h, self.weight) + self.bias action[0] = np.tanh(action[0]) action[1] = sigmoid(action[1]) action[2] = clip(np.tanh(action[2])) ''' if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer h = np.tanh(np.dot(h, self.weight_hidden) + self.bias_hidden) action = np.tanh(np.dot(h, self.weight_output) + self.bias_output) else: action = np.tanh(np.dot(h, self.weight) + self.bias) action[1] = (action[1]+1.0) / 2.0 action[2] = clip(action[2]) self.state = rnn_next_state(self.rnn, z, action, self.state) return action def set_model_params(self, model_params): if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer params = np.array(model_params) cut_off = (self.input_size+1)*self.hidden_size params_1 = params[:cut_off] params_2 = params[cut_off:] self.bias_hidden = params_1[:self.hidden_size] self.weight_hidden = params_1[self.hidden_size:].reshape(self.input_size, self.hidden_size) self.bias_output = params_2[:3] self.weight_output = params_2[3:].reshape(self.hidden_size, 3) else: self.bias = np.array(model_params[:3]) self.weight = np.array(model_params[3:]).reshape(self.input_size, 3) def load_model(self, filename): with open(filename) as f: data = json.load(f) print('loading file %s' % (filename)) self.data = data model_params = np.array(data[0]) # assuming other stuff is in data self.set_model_params(model_params) def get_random_model_params(self, stdev=0.1): return np.random.randn(self.param_count)*stdev
class Model: ''' simple one layer model for translating game state to actions''' def __init__(self, load_model=True): self.env_name = "Pong" self._make_env() self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) hps_sample_dynamic = hps_sample._replace(num_actions=self.num_actions) self.rnn = MDNRNN(hps_sample_dynamic, gpu_mode=False, reuse=True) if load_model: self.vae.load_json('vae/vae.json') self.rnn.load_json('rnn/rnn.json') self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.z_size = 32 if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer raise Exception("not ported for atari") self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, self.num_actions) self.bias_output = np.random.randn(self.num_actions) self.param_count = ((self.input_size + 1) * self.hidden_size) + ( (self.hidden_size + 1) * self.num_actions) else: # TODO: Not known until env.action_space is queried... self.weight = np.random.randn(self.input_size, self.num_actions) self.bias = np.random.randn(self.num_actions) self.param_count = (self.input_size + 1) * self.num_actions self.render_mode = False def _make_env(self): self.render_mode = render_mode self.env = make_env(self.env_name) self.num_actions = self.env.action_space.n def make_env(self): pass #TODO (Chazzz): eventually remove def reset(self): self.state = rnn_init_state(self.rnn) def encode_obs(self, obs): # convert raw obs to z, mu, logvar result = np.copy(obs).astype(np.float) / 255.0 result = result.reshape(1, 64, 64, 1) mu, logvar = self.vae.encode_mu_logvar(result) mu = mu[0] logvar = logvar[0] s = logvar.shape z = mu + np.exp(logvar / 2.0) * np.random.randn(*s) return z, mu, logvar def get_action(self, z): h = rnn_output(self.state, z, EXP_MODE) # print(len(h), " h:", h) #TODO: 256+32 (the 32 comes first) # So we could have 288*2*18 params, or 288*2*environment.action_space.n (6 for Pong) ''' action = np.dot(h, self.weight) + self.bias action[0] = np.tanh(action[0]) action[1] = sigmoid(action[1]) action[2] = clip(np.tanh(action[2])) ''' if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer raise Exception("Not ported to atari") # h = np.tanh(np.dot(h, self.weight_hidden) + self.bias_hidden) # action = np.tanh(np.dot(h, self.weight_output) + self.bias_output) else: # could probabilistically sample from softmax, but greedy action = np.argmax(np.matmul(h, self.weight) + self.bias) # action[1] = (action[1]+1.0) / 2.0 # action[2] = clip(action[2]) # print("Action:", action) action_one_hot = np.zeros(self.num_actions) action_one_hot[action] = 1 # print("Action hot:", action_one_hot) self.state = rnn_next_state(self.rnn, z, action_one_hot, self.state) return action def set_model_params(self, model_params): if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer params = np.array(model_params) cut_off = (self.input_size + 1) * self.hidden_size params_1 = params[:cut_off] params_2 = params[cut_off:] self.bias_hidden = params_1[:self.hidden_size] self.weight_hidden = params_1[self.hidden_size:].reshape( self.input_size, self.hidden_size) self.bias_output = params_2[:self.num_actions] self.weight_output = params_2[self.num_actions:].reshape( self.hidden_size, self.num_actions) else: self.bias = np.array(model_params[:self.num_actions]) self.weight = np.array(model_params[self.num_actions:]).reshape( self.input_size, self.num_actions) def load_model(self, filename): with open(filename) as f: data = json.load(f) print('loading file %s' % (filename)) self.data = data model_params = np.array(data[0]) # assuming other stuff is in data self.set_model_params(model_params) def get_random_model_params(self, stdev=0.1): #return np.random.randn(self.param_count)*stdev return np.random.standard_cauchy( self.param_count) * stdev # spice things up def init_random_model_params(self, stdev=0.1): params = self.get_random_model_params(stdev=stdev) self.set_model_params(params) vae_params = self.vae.get_random_model_params(stdev=stdev) self.vae.set_model_params(vae_params) rnn_params = self.rnn.get_random_model_params(stdev=stdev) self.rnn.set_model_params(rnn_params)
class Model: ''' simple one layer model for car racing ''' def __init__(self, load_model=True): self.env_name = './VisualPushBlock_withBlock_z_info.x86_64' #'./VisualPushBlock.x86_64' self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) if load_model: self.vae.load_json('vae/vae.json') self.rnn.load_json('rnn/rnn.json') self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.z_size = z_size if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer ###CHANGE is made here self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, ACTION_SIZE) self.bias_output = np.random.randn(ACTION_SIZE) self.param_count = ((self.input_size + 1) * self.hidden_size) + ( self.hidden_size * ACTION_SIZE + ACTION_SIZE) else: self.weight = np.random.randn(self.input_size, ACTION_SIZE) self.bias = np.random.randn(ACTION_SIZE) self.param_count = (self.input_size) * ACTION_SIZE + ACTION_SIZE self.render_mode = False def make_env(self, seed=-1, render_mode=False, full_episode=False, worker_id=0): self.render_mode = render_mode self.env = make_env(self.env_name, seed=seed, render_mode=render_mode, full_episode=full_episode, worker_id=worker_id) def reset(self): self.state = rnn_init_state(self.rnn) def encode_obs(self, obs): # convert raw obs to z, mu, logvar #result = np.copy(obs).astype(np.float)/255.0 result = np.copy(obs).astype(np.float) result = result.reshape(1, IMAGE_W, IMAGE_H, 3) mu, logvar = self.vae.encode_mu_logvar(result) mu = mu[0] logvar = logvar[0] s = logvar.shape z = mu + np.exp(logvar / 2.0) * np.random.randn(*s) return z, mu, logvar def get_action(self, z): h = rnn_output(self.state, z, EXP_MODE) #print('h', h.shape, h) ''' action = np.dot(h, self.weight) + self.bias action[0] = np.tanh(action[0]) action[1] = sigmoid(action[1]) action[2] = clip(np.tanh(action[2])) ''' if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer h = np.tanh(np.dot(h, self.weight_hidden) + self.bias_hidden) action = np.tanh(np.dot(h, self.weight_output) + self.bias_output) else: '''print(h.shape) print(self.weight.shape) print(self.bias.shape)''' action = np.tanh(np.dot(h, self.weight) + self.bias) '''for i in range(ACTION_SIZE): action[i] = (action[i]+1.0) / 2.0 #all actions value are in range 0 to 1''' #action[2] = clip(action[2]) self.state = rnn_next_state(self.rnn, z, action, self.state) #update weights of MDN-RNN return action def set_model_params(self, model_params): if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer params = np.array(model_params) cut_off = (self.input_size + 1) * self.hidden_size params_1 = params[:cut_off] params_2 = params[cut_off:] self.bias_hidden = params_1[:self.hidden_size] self.weight_hidden = params_1[self.hidden_size:].reshape( self.input_size, self.hidden_size) self.bias_output = params_2[:ACTION_SIZE] self.weight_output = params_2[ACTION_SIZE:].reshape( self.hidden_size, ACTION_SIZE) else: self.bias = np.array(model_params[:ACTION_SIZE]) self.weight = np.array(model_params[ACTION_SIZE:]).reshape( self.input_size, ACTION_SIZE) def load_model(self, filename): with open(filename) as f: data = json.load(f) print('loading file %s' % (filename)) self.data = data model_params = np.array(data[0]) # assuming other stuff is in data self.set_model_params(model_params) def get_random_model_params(self, stdev=0.1): #return np.random.randn(self.param_count)*stdev return np.random.standard_cauchy( self.param_count) * stdev # spice things up def init_random_model_params(self, stdev=0.1): params = self.get_random_model_params(stdev=stdev) self.set_model_params(params) vae_params = self.vae.get_random_model_params(stdev=stdev) self.vae.set_model_params(vae_params) rnn_params = self.rnn.get_random_model_params(stdev=stdev) self.rnn.set_model_params(rnn_params)
class CarRacing: # Parameters # - type: Name of environment. Default is classic Car Racing game, but can be changed to introduce perturbations in environment # - history_pick: Size of history # - seed: List of seeds to sample from during training. Default is none (random games) def __init__(self, type="CarRacing", history_pick=4, seed=None, detect_edges=False, detect_grass=False, flip=False): self.name = type + str(time.time()) random.seed(30) self.env = make_env('CarRacing-v0', random.randint(1,10000000), render_mode = False, full_episode = True) self.image_dimension = [64,64] self.history_pick = history_pick self.state_space_size = history_pick * np.prod(self.image_dimension) self.action_space_size = 5 self.state_shape = [None, self.history_pick] + list(self.image_dimension) self.history = [] self.action_dict = {0: [-1, 0, 0], 1: [1, 0, 0], 2: [0, 1, 0], 3: [0, 0, 0.8], 4: [0, 0, 0]} self.seed = seed self.detect_edges = detect_edges self.detect_grass = detect_grass self.flip = flip self.flip_episode = False self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) self.vae.load_json('vae/vae.json') self.rnn.load_json('rnn/rnn.json') # returns a random action def sample_action_space(self): return np.random.randint(self.action_space_size) def map_action(self, action): if self.flip_episode and action <= 1: action = 1 - action return self.action_dict[action] # resets the environment and returns the initial state def reset(self, test=False): self.state_rnn = rnn_init_state(self.rnn) if self.seed: self.env.seed(random.choice(self.seed)) self.flip_episode = random.random() > 0.5 and not test and self.flip state, self.state_rnn = self.encode_obs(self.env.reset(), self.state_rnn, np.array([0.5, 0.2, 0.8])) return state, 1 # take action def step(self, action, test=False): action = self.map_action(action) total_reward = 0 n = 1 if test else random.choice([2, 3, 4]) for i in range(n): next_state, reward, done, info = self.env.step(action) next_state, self. state_rnn = self.encode_obs(next_state, self.state_rnn, action) total_reward += reward info = {'true_done': done} if done: break return next_state, total_reward, done, info, 1 def render(self): self.env.render() # process state and return the current history def process(self, state): self.add_history(state) in_grass = utils.in_grass(state) if len(self.history) < self.history_pick: zeros = np.zeros(self.image_dimension) result = np.tile(zeros, ((self.history_pick - len(self.history)), 1, 1)) result = np.concatenate((result, np.array(self.history))) else: result = np.array(self.history) return result, in_grass def add_history(self, state): if len(self.history) >= self.history_pick: self.history.pop(0) #temp = utils.process_image(state, detect_edges=self.detect_edges, flip=self.flip_episode) self.history.append(state) def __str__(self): return self.name + '\nseed: {0}\nactions: {1}'.format(self.seed, self.action_dict) def encode_obs(self, obs, prev_state, action): # convert raw obs to z, mu, logvar result = np.copy(obs).astype(np.float)/255.0 result = result.reshape(1, 64, 64, 3) mu, logvar = self.vae.encode_mu_logvar(result) mu = mu[0] logvar = logvar[0] s = logvar.shape z = mu + np.exp(logvar/2.0) * np.random.randn(*s) h = rnn_output(prev_state, z, 4) next_state = rnn_next_state(self.rnn, z, np.array(action), prev_state) return np.concatenate([h, z]), next_state
class Model: ''' simple one layer model for car racing ''' def __init__(self, load_model=True, env_name="Pong-v0", render_mode=False): self.env_name = env_name self.make_env() self.z_size = 32 self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) hps_atari = hps_sample._replace(input_seq_width=self.z_size + self.na) self.rnn = MDNRNN(hps_atari, gpu_mode=False, reuse=True) if load_model: self.vae.load_json('vae/vae.json') self.rnn.load_json('rnn/rnn.json') self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.init_controller() self.render_mode = False # INIT The Controller After the enviroment Creation. def make_env(self, seed=-1, render_mode=False): self.render_mode = render_mode self.env = make_env(self.env_name, seed=seed, render_mode=render_mode) self.na = self.env.action_space.n # discrete by default. def init_controller(self): if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn( self.hidden_size, self.na) # pong. Modify later. self.bias_output = np.random.randn(self.na) self.param_count = (self.input_size + 1) * self.hidden_size + ( self.hidden_size + 1) * self.na else: self.weight = np.random.randn(self.input_size, self.na) self.bias = np.random.randn(self.na) self.param_count = (self.input_size + 1) * self.na def reset(self): self.state = rnn_init_state(self.rnn) def encode_obs(self, obs): # convert raw obs to z, mu, logvar result = np.copy(obs).astype(np.float) / 255.0 result = result.reshape(1, 64, 64, 1) mu, logvar = self.vae.encode_mu_logvar(result) mu = mu[0] logvar = logvar[0] s = logvar.shape z = mu + np.exp(logvar / 2.0) * np.random.randn(*s) return z, mu, logvar def get_action(self, z, epsilon=0.0): h = rnn_output(self.state, z, EXP_MODE) ''' action = np.dot(h, self.weight) + self.bias action[0] = np.tanh(action[0]) action[1] = sigmoid(action[1]) action[2] = clip(np.tanh(action[2])) ''' if np.random.rand() < epsilon: action = np.random.randint(0, self.na) else: if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer h = np.maximum( np.dot(h, self.weight_hidden) + self.bias_hidden, 0) action = np.argmax( np.dot(h, self.weight_output) + self.bias_output) else: action = np.argmax(np.dot(h, self.weight) + self.bias) oh_action = np.zeros(self.na) oh_action[action] = 1 # action[1] = (action[1]+1.0) / 2.0 # action[2] = clip(action[2]) # TODO check about this fucntion self.state = rnn_next_state(self.rnn, z, oh_action, self.state) return action def set_model_params(self, model_params): if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer params = np.array(model_params) cut_off = (self.input_size + 1) * self.hidden_size params_1 = params[:cut_off] params_2 = params[cut_off:] self.bias_hidden = params_1[:self.hidden_size] self.weight_hidden = params_1[self.hidden_size:].reshape( self.input_size, self.hidden_size) self.bias_output = params_2[:self.na] self.weight_output = params_2[self.na:].reshape( self.hidden_size, self.na) else: self.bias = np.array(model_params[:self.na]) self.weight = np.array(model_params[self.na:]).reshape( self.input_size, self.na) def load_model(self, filename): with open(filename) as f: data = json.load(f) print('loading file %s' % (filename)) self.data = data model_params = np.array(data[0]) # assuming other stuff is in data self.set_model_params(model_params) def get_random_model_params(self, stdev=0.1): #return np.random.randn(self.param_count)*stdev return np.random.standard_cauchy( self.param_count) * stdev # spice things up def init_random_model_params(self, stdev=0.1): params = self.get_random_model_params(stdev=stdev) self.set_model_params(params) vae_params = self.vae.get_random_model_params(stdev=stdev) self.vae.set_model_params(vae_params) rnn_params = self.rnn.get_random_model_params(stdev=stdev) self.rnn.set_model_params(rnn_params)
class Model: def __init__(self, arglist,action_space, scope, load_model=True): self.action_space = action_space self.arglist = arglist # self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) hps_sample = hps_model._replace(batch_size=1, input_seq_width = arglist.obs_size+ arglist.action_space + (arglist.agent_num-1) * arglist.action_space * arglist.timestep, max_seq_len=1, use_recurrent_dropout=0, is_training=0, obs_size = arglist.obs_size ) self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) if load_model: # self.vae.load_json(arglist.vae_model_dir) self.rnn.load_json(arglist.rnn_model_dir) self.state = self.rnn.rnn_init_state() self.rnn_mode = True print(arglist.inference) if arglist.inference == True: self.input_size = self.rnn.rnn_output_size(arglist.exp_mode) +(arglist.agent_num-1) * arglist.action_space elif arglist.inference == False: self.input_size = self.rnn.rnn_output_size(arglist.exp_mode) + (arglist.timestep) *( arglist.agent_num-1) * arglist.action_space # self.z_size = 32 # action trajectories recording self.act_traj = [collections.deque(np.zeros((arglist.timestep, arglist.action_space)), maxlen = arglist.timestep)] *(arglist.agent_num -1) # self.oppo_model = Oppo_Model(arglist.agent_num, arglist.timestep, arglist.action_space,arglist.action_space, "oppo_model_{}".format(scope) ) self.inference = arglist.inference if arglist.exp_mode == MODE_Z_HIDDEN: # one hidden layer self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, self.action_space) self.bias_output = np.random.randn(self.action_space) self.param_count = ((self.input_size+1)*self.hidden_size) + (self.hidden_size*self.action_space+self.action_space) else: self.weight = np.random.randn(self.input_size, self.action_space) self.bias = np.random.randn(self.action_space) self.param_count = (self.input_size)*self.action_space+self.action_space def reset(self): self.state = self.rnn.rnn_init_state() # self.oppo_state = lstm_init_state(self.oppo_model) # def encode_obs(self, obs): # # convert raw obs to z, mu, logvar # result = np.copy(obs).astype(np.float)/255.0 # result = result.reshape(1, 64, 64, 3) # mu, logvar = self.vae.encode_mu_logvar(result) # mu = mu[0] # logvar = logvar[0] # s = logvar.shape # z = mu + np.exp(logvar/2.0) * np.random.randn(*s) # return z, mu, logvar def get_action(self, obs, act_traj): h = self.rnn.rnn_output(self.state, obs, act_traj, self.arglist.exp_mode) if self.arglist.inference: oppo_intents = [] for i in range(self.arglist.agent_num - 1): act_traj = self.act_traj[i] # intent = self.oppo_model .get_inference(act_traj) intent = [0,0] oppo_intents.append(intent) oppo_intents = np.reshape(oppo_intents, ((self.arglist.agent_num-1 )* self.arglist.action_space)) #Oppo intent shape (batch_size, agent_num, action_space) # reshape oppo_intent agent_num * batch_size * action_space controller_input = np.concatenate((h, oppo_intents)) else: controller_input = h if self.arglist.exp_mode == MODE_Z_HIDDEN: # one hidden layer x = np.tanh(np.dot(controller_input, self.weight_hidden) + self.bias_hidden) action = np.tanh(np.dot(x, self.weight_output) + self.bias_output) else: action = np.tanh(np.dot(controller_input, self.weight) + self.bias) for i in range(self.action_space): action[i] = clip(action[i]) self.state = self.rnn.rnn_next_state(obs, action, self.act_traj, self.state) # self.oppo_state = oppo_next_state(self.oppo_model, action, self.act_traj, self.oppo_state) # epsilon exploration if np.random.uniform(0,1) < 0.2: action = [np.random.uniform(-3,3)] * len(action) return action def set_model_params(self, model_params): if self.arglist.exp_mode == MODE_Z_HIDDEN: # one hidden layer params = np.array(model_params) cut_off = (self.input_size+1)*self.hidden_size params_1 = params[:cut_off] params_2 = params[cut_off:] self.bias_hidden = params_1[:self.hidden_size] self.weight_hidden = params_1[self.hidden_size:].reshape(self.input_size, self.hidden_size) self.bias_output = params_2[:self.action_space] self.weight_output = params_2[self.action_space:].reshape(self.hidden_size, self.action_space) else: self.bias = np.array(model_params[:self.action_space]) self.weight = np.array(model_params[self.action_space:]).reshape(self.input_size, self.action_space) def load_model(self, filename): with open(filename) as f: data = json.load(f) print('loading file %s' % (filename)) # self.data = data model_params = np.array(data[0]) # assuming other stuff is in data self.set_model_params(model_params) def get_random_model_params(self, stdev=0.1): #return np.random.randn(self.param_count)*stdev return np.random.standard_cauchy(self.param_count)*stdev # spice things up def init_random_model_params(self, stdev=0.1): params = self.get_random_model_params(stdev=stdev) self.set_model_params(params) # vae_params = self.vae.get_random_model_params(stdev=stdev) # self.vae.set_model_params(vae_params) rnn_params = self.rnn.get_random_model_params(stdev=stdev) self.rnn.set_model_params(rnn_params)
class Model: ''' simple one layer model for car racing ''' def __init__(self, arglist): self.env_name = arglist.game self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) self.vae.load_json(arglist.vae_file) self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) self.rnn.load_json(arglist.rnn_file) self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.z_size = 32 if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, 2) self.bias_output = np.random.randn(2) self.param_count = ((self.input_size + 1) * self.hidden_size) + (self.hidden_size * 2 + 2) else: self.weight = np.random.randn(self.input_size, 2) self.bias = np.random.randn(2) self.param_count = (self.input_size) * 2 + 2 self.render_mode = False def make_env(self, seed=-1, render_mode=False): self.render_mode = render_mode self.env = make_env(self.env_name, seed=seed, render_mode=render_mode) def reset(self): self.state = rnn_init_state(self.rnn) def encode_obs(self, obs): # convert raw obs to z, mu, logvar result = np.copy(obs).astype(np.float) / 255.0 result = result.reshape(1, 64, 64, 3) mu, logvar = self.vae.encode_mu_logvar(result) mu = mu[0] logvar = logvar[0] s = logvar.shape z = mu + np.exp(logvar / 2.0) * np.random.randn(*s) return z, mu, logvar def decode_obs(self, z): # decode the latent vector img = self.vae.decode(z.reshape(1, self.z_size)) * 255. img = np.round(img).astype(np.uint8) img = img.reshape(64, 64, 3) return img def get_action(self, z, arglist): h = rnn_output(self.state, z, EXP_MODE) ''' action = np.dot(h, self.weight) + self.bias action[0] = np.tanh(action[0]) action[1] = sigmoid(action[1]) action[2] = clip(np.tanh(action[2])) ''' if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer h = np.tanh(np.dot(h, self.weight_hidden) + self.bias_hidden) action = np.tanh(np.dot(h, self.weight_output) + self.bias_output) else: action = np.tanh(np.dot(h, self.weight) + self.bias) if arglist.competitive: obs, rewards, done, win = self.env.step([action[0], 'script']) else: obs, rewards, done, win = self.env.step(action) extra_reward = 0.0 # penalize for turning too frequently if arglist.competitive: if arglist.train_mode and penalize_turning: extra_reward -= np.abs(action[0]) / 10.0 rewards[0] += extra_reward reward = rewards[0] else: if arglist.train_mode and penalize_turning: reward = np.sum(rewards) extra_reward -= np.abs(action[0]) / 10.0 reward += extra_reward # recording_reward.append(reward) # total_reward += reward self.state = rnn_next_state(self.rnn, z, action, self.state) return action def set_model_params(self, model_params): if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer params = np.array(model_params) cut_off = (self.input_size + 1) * self.hidden_size params_1 = params[:cut_off] params_2 = params[cut_off:] self.bias_hidden = params_1[:self.hidden_size] self.weight_hidden = params_1[self.hidden_size:].reshape( self.input_size, self.hidden_size) self.bias_output = params_2[:2] self.weight_output = params_2[2:].reshape(self.hidden_size, 2) else: self.bias = np.array(model_params[:2]) self.weight = np.array(model_params[2:]).reshape( self.input_size, 2) def load_model(self, filename): with open(filename) as f: data = json.load(f) print('loading file %s' % (filename)) self.data = data model_params = np.array(data[0]) # assuming other stuff is in data self.set_model_params(model_params) def get_random_model_params(self, stdev=0.1): return np.random.randn(self.param_count) * stdev
from baselines.ddpg.noise import AdaptiveParamNoiseSpec, NormalActionNoise, OrnsteinUhlenbeckActionNoise from baselines.common import set_global_seeds import baselines.common.tf_util as U from baselines import logger import numpy as np try: from mpi4py import MPI except ImportError: MPI = None vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) vae.load_json('vae/vae.json') rnn.load_json('rnn/rnn.json') def learn(network, env, seed=None, total_timesteps=None, nb_epochs=None, # with default settings, perform 1M steps total nb_epoch_cycles=20, nb_rollout_steps=100, reward_scale=1.0, render=False, render_eval=False, noise_type='adaptive-param_0.2', normalize_returns=False, normalize_observations=True, critic_l2_reg=1e-2,
class Model: ''' simple one layer model for car racing ''' def __init__(self, load_model=True): self.env_name = 'Carracing' self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) if load_model: self.vae.load_json('vae/vae.json') self.rnn.load_json('rnn/rnn.json') self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.z_size = 16 if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, 3) self.bias_output = np.random.randn(3) self.param_count = ((self.input_size + 1) * self.hidden_size) + (self.hidden_size * 3 + 3) else: self.weight = np.random.randn(self.input_size, 3) self.bias = np.random.randn(3) self.param_count = (self.input_size) * 3 + 3 self.render_mode = False def make_env(self, client, seed=-1, render_mode=False, full_episode=False): self.client = client self.render_mode = render_mode self.env = TorcsEnv( vision=False, throttle=True, gear_change=False ) #make_env(self.env_name, seed=seed, render_mode=render_mode, full_episode=full_episode) def reset(self): self.state = rnn_init_state(self.rnn) def encode_obs(self, obs): dictlist = [] for key, value in obs.items(): if key == 'opponents' or key == 'track' or key == 'wheelSpinVel' or key == 'focus': dictlist = dictlist + value else: dictlist.append(value) obs = dictlist result = np.copy(obs).astype(np.float) / 255.0 result = result.reshape(1, 79, 1) mu, logvar = self.vae.encode_mu_logvar(result) mu = mu[0] logvar = logvar[0] s = logvar.shape z = mu + np.exp(logvar / 2.0) * np.random.randn(*s) return z, mu, logvar def get_action(self, z): h = rnn_output(self.state, z, EXP_MODE) ''' action = np.dot(h, self.weight) + self.bias action[0] = np.tanh(action[0]) action[1] = sigmoid(action[1]) action[2] = clip(np.tanh(action[2])) ''' if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer h = np.tanh(np.dot(h, self.weight_hidden) + self.bias_hidden) action = np.tanh(np.dot(h, self.weight_output) + self.bias_output) else: action = np.tanh(np.dot(h, self.weight) + self.bias) action[0] = action[0] action[1] = (action[1] + 1.0) / 2.0 action[2] = clip(action[2]) self.state = rnn_next_state(self.rnn, z, action, self.state) return action def set_model_params(self, model_params): if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer params = np.array(model_params) cut_off = (self.input_size + 1) * self.hidden_size params_1 = params[:cut_off] params_2 = params[cut_off:] self.bias_hidden = params_1[:self.hidden_size] self.weight_hidden = params_1[self.hidden_size:].reshape( self.input_size, self.hidden_size) self.bias_output = params_2[:3] self.weight_output = params_2[3:].reshape(self.hidden_size, 3) else: self.bias = np.array(model_params[:3]) self.weight = np.array(model_params[3:]).reshape( self.input_size, 3) def load_model(self, filename): with open(filename) as f: data = json.load(f) print('loading file %s' % (filename)) self.data = data model_params = np.array(data[0]) # assuming other stuff is in data self.set_model_params(model_params) def get_random_model_params(self, stdev=0.1): #return np.random.randn(self.param_count)*stdev return np.random.standard_cauchy( self.param_count) * stdev # spice things up def init_random_model_params(self, stdev=0.1): params = self.get_random_model_params(stdev=stdev) self.set_model_params(params) vae_params = self.vae.get_random_model_params(stdev=stdev) self.vae.set_model_params(vae_params) rnn_params = self.rnn.get_random_model_params(stdev=stdev) self.rnn.set_model_params(rnn_params)
class Model: ''' simple one layer model for car racing ''' def __init__(self, load_model=True): # For Mac # self.env_name = "/Users/intuinno/codegit/pushBlock/app/mac/VisualPushBlockContinuous" # For linux self.env_name = "/home/intuinno/codegit/pushblock/app/linux/pushblock.x86_64" self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) if load_model: self.vae.load_json('vae/vae.json') self.rnn.load_json('rnn/rnn.json') self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.z_size = 32 if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, 3) self.bias_output = np.random.randn(3) self.param_count = ((self.input_size+1)*self.hidden_size) + (self.hidden_size*3+3) else: self.weight = np.random.randn(self.input_size, 3) self.bias = np.random.randn(3) self.param_count = (self.input_size)*3+3 self.render_mode = False def make_env(self, seed=-1, render_mode=False, full_episode=False, workerid=1): self.render_mode = render_mode self.env = make_env(self.env_name, seed=seed, render_mode=render_mode, full_episode=full_episode, workerid=workerid) def reset(self): self.state = rnn_init_state(self.rnn) def encode_obs(self, obs): # convert raw obs to z, mu, logvar result = np.copy(obs).astype(np.float)/255.0 result = result.reshape(1, 64, 64, 3) mu, logvar = self.vae.encode_mu_logvar(result) mu = mu[0] logvar = logvar[0] s = logvar.shape z = mu + np.exp(logvar/2.0) * np.random.randn(*s) return z, mu, logvar def get_action(self, z): h = rnn_output(self.state, z, EXP_MODE) ''' action = np.dot(h, self.weight) + self.bias action[0] = np.tanh(action[0]) action[1] = sigmoid(action[1]) action[2] = clip(np.tanh(action[2])) ''' if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer h = np.tanh(np.dot(h, self.weight_hidden) + self.bias_hidden) action = np.tanh(np.dot(h, self.weight_output) + self.bias_output) else: action = np.tanh(np.dot(h, self.weight) + self.bias) self.state = rnn_next_state(self.rnn, z, action, self.state) return action def set_model_params(self, model_params): if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer params = np.array(model_params) cut_off = (self.input_size+1)*self.hidden_size params_1 = params[:cut_off] params_2 = params[cut_off:] self.bias_hidden = params_1[:self.hidden_size] self.weight_hidden = params_1[self.hidden_size:].reshape(self.input_size, self.hidden_size) self.bias_output = params_2[:3] self.weight_output = params_2[3:].reshape(self.hidden_size, 3) else: self.bias = np.array(model_params[:3]) self.weight = np.array(model_params[3:]).reshape(self.input_size, 3) def load_model(self, filename): with open(filename) as f: data = json.load(f) print('loading file %s' % (filename)) self.data = data model_params = np.array(data[0]) # assuming other stuff is in data self.set_model_params(model_params) def get_random_model_params(self, stdev=0.1): #return np.random.randn(self.param_count)*stdev return np.random.standard_cauchy(self.param_count)*stdev # spice things up def init_random_model_params(self, stdev=0.1): params = self.get_random_model_params(stdev=stdev) self.set_model_params(params) vae_params = self.vae.get_random_model_params(stdev=stdev) self.vae.set_model_params(vae_params) rnn_params = self.rnn.get_random_model_params(stdev=stdev) self.rnn.set_model_params(rnn_params)
# Third, Build the VAE vae = ConvVAE(z_size=z_size, batch_size=1, is_training=False, reuse=False, gpu_mode=False) vae.load_json(os.path.join('vae', 'vae.json')) # Fourth, build the RNN hps_atari_sample = hps_sample._replace(input_seq_width=z_size+na) OUTWIDTH = hps_atari_sample.output_seq_width rnn = MDNRNN(hps_atari_sample, gpu_mode=False) rnn.load_json(os.path.join('rnn', 'rnn.json')) print("All model loaded.") # Fifth, run the evaluation. -> We have no predictions about the first frame. start = time.time() state = rnn_init_state(rnn) # initialize the state. pz = None for i in range(steps): ob = obs[i:i+1] # (1, 64, 64, 1) action = oh_actions[i:i+1] # (1, n)
class MiniNetwork(object): def __init__(self, sess=None, summary_writer=tf.summary.FileWriter("logs/"), rl_training=False, reuse=False, cluster=None, index=0, device='/gpu:0', ppo_load_path=None, ppo_save_path=None, load_worldmodel=True, ntype='worldmodel'): self.policy_model_path_load = ppo_load_path + ntype self.policy_model_path_save = ppo_save_path + ntype self.rl_training = rl_training self.use_norm = True self.reuse = reuse self.sess = sess self.cluster = cluster self.index = index self.device = device self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) if load_worldmodel: self.vae.load_json('vae/vae.json') self.rnn.load_json('rnn/rnn.json') self.input_size = rnn_output_size(EXP_MODE) self._create_graph() self.rl_saver = tf.train.Saver() self.summary_writer = summary_writer def initialize(self): init_op = tf.global_variables_initializer() self.sess.run(init_op) def reset_old_network(self): self.policy_ppo.assign_policy_parameters() self.policy_ppo.reset_mean_returns() self.sess.run(self.results_sum.assign(0)) self.sess.run(self.game_num.assign(0)) def _create_graph(self): if self.reuse: tf.get_variable_scope().reuse_variables() assert tf.get_variable_scope().reuse worker_device = "/job:worker/task:%d" % self.index + self.device with tf.device( tf.train.replica_device_setter(worker_device=worker_device, cluster=self.cluster)): self.results_sum = tf.get_variable( name="results_sum", shape=[], initializer=tf.zeros_initializer) self.game_num = tf.get_variable(name="game_num", shape=[], initializer=tf.zeros_initializer) self.global_steps = tf.get_variable( name="global_steps", shape=[], initializer=tf.zeros_initializer) self.win_rate = self.results_sum / self.game_num self.mean_win_rate = tf.summary.scalar( 'mean_win_rate_dis', self.results_sum / self.game_num) self.merged = tf.summary.merge([self.mean_win_rate]) mini_scope = "MiniPolicyNN" with tf.variable_scope(mini_scope): ob_space = self.input_size act_space_array = _SIZE_MINI_ACTIONS self.policy = Policy_net('policy', self.sess, ob_space, act_space_array) self.policy_old = Policy_net('old_policy', self.sess, ob_space, act_space_array) self.policy_ppo = PPOTrain('PPO', self.sess, self.policy, self.policy_old, lr=P.mini_lr, epoch_num=P.mini_epoch_num) var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) self.policy_saver = tf.train.Saver(var_list=var_list) def Update_result(self, result_list): win = 0 for i in result_list: if i > 0: win += 1 self.sess.run(self.results_sum.assign_add(win)) self.sess.run(self.game_num.assign_add(len(result_list))) def Update_summary(self, counter): print("Update summary........") policy_summary = self.policy_ppo.get_summary_dis() self.summary_writer.add_summary(policy_summary, counter) summary = self.sess.run(self.merged) self.summary_writer.add_summary(summary, counter) self.sess.run(self.global_steps.assign(counter)) print("Update summary finished!") steps = int(self.sess.run(self.global_steps)) win_game = int(self.sess.run(self.results_sum)) all_game = int(self.sess.run(self.game_num)) win_rate = win_game / float(all_game) return steps, win_rate def get_win_rate(self): return float(self.sess.run(self.win_rate)) def Update_policy(self, buffer): self.policy_ppo.ppo_train_dis(buffer.observations, buffer.tech_actions, buffer.rewards, buffer.values, buffer.values_next, buffer.gaes, buffer.returns, verbose=False) def get_global_steps(self): return int(self.sess.run(self.global_steps)) def save_policy(self): self.policy_saver.save(self.sess, self.policy_model_path_save) print("policy has been saved in", self.policy_model_path_save) def restore_policy(self): self.policy_saver.restore(self.sess, self.policy_model_path_load) print("Restore policy from", self.policy_model_path_load)
class Model: ''' simple one layer model for car racing ''' def __init__(self, load_model=True): self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True) self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True) if load_model: self.vae.load_json('vae/vae.json') self.rnn.load_json('rnn/rnn.json') self.state = rnn_init_state(self.rnn) self.rnn_mode = True self.input_size = rnn_output_size(EXP_MODE) self.z_size = 32 if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer self.hidden_size = 40 self.weight_hidden = np.random.randn(self.input_size, self.hidden_size) self.bias_hidden = np.random.randn(self.hidden_size) self.weight_output = np.random.randn(self.hidden_size, 2) self.bias_output = np.random.randn(3) self.param_count = ((self.input_size + 1) * self.hidden_size) + (self.hidden_size * 2 + 2) else: self.weight = np.random.randn(self.input_size, 2) self.bias = np.random.randn(2) self.param_count = (self.input_size) * 2 + 2 def reset(self): self.state = rnn_init_state(self.rnn) def encode_obs(self, obs): # convert raw obs to z, mu, logvar result = np.copy(obs).astype(np.float) / 255.0 result = result.reshape(1, 64, 64, 3) mu, logvar = self.vae.encode_mu_logvar(result) mu = mu[0] logvar = logvar[0] s = logvar.shape z = mu + np.exp(logvar / 2.0) * np.random.randn(*s) return z, mu, logvar def get_action(self, z): h = rnn_output(self.state, z, EXP_MODE) ''' action = np.dot(h, self.weight) + self.bias action[0] = np.tanh(action[0]) action[1] = sigmoid(action[1]) action[2] = clip(np.tanh(action[2])) ''' if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer h = np.tanh(np.dot(h, self.weight_hidden) + self.bias_hidden) action = np.tanh(np.dot(h, self.weight_output) + self.bias_output) else: action = np.tanh(np.dot(h, self.weight) + self.bias) action[0] = clip(action[0]) action[1] = clip(action[1]) self.state = rnn_next_state(self.rnn, z, action, self.state) if np.random.uniform(0, 1) < 0.2: action = [np.random.uniform(-2, 2), np.random.uniform(-2, 2)] return action def set_model_params(self, model_params): if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer params = np.array(model_params) cut_off = (self.input_size + 1) * self.hidden_size params_1 = params[:cut_off] params_2 = params[cut_off:] self.bias_hidden = params_1[:self.hidden_size] self.weight_hidden = params_1[self.hidden_size:].reshape( self.input_size, self.hidden_size) self.bias_output = params_2[:2] self.weight_output = params_2[2:].reshape(self.hidden_size, 2) else: self.bias = np.array(model_params[:2]) self.weight = np.array(model_params[2:]).reshape( self.input_size, 2) def load_model(self, filename): with open(filename) as f: data = json.load(f) print('loading file %s' % (filename)) self.data = data model_params = np.array(data[0]) # assuming other stuff is in data self.set_model_params(model_params) def get_random_model_params(self, stdev=0.1): #return np.random.randn(self.param_count)*stdev return np.random.standard_cauchy( self.param_count) * stdev # spice things up def init_random_model_params(self, stdev=0.1): params = self.get_random_model_params(stdev=stdev) self.set_model_params(params) vae_params = self.vae.get_random_model_params(stdev=stdev) self.vae.set_model_params(vae_params) rnn_params = self.rnn.get_random_model_params(stdev=stdev) self.rnn.set_model_params(rnn_params)
class Model: def __init__(self,load_model=True): self.env_name="carracing" self.vae=ConvVAE(batch_size=1,gpu_mode=False,is_training=False,reuse=True) self.rnn=MDNRNN(hps_sample,gpu_mode=False,reuse=True) if load_model: self.vae.load_json('vae/vae.json') self.rnn.load_json('rnn/rnn.json') self.state=rnn_init_state(self.rnn) self.rnn_mode=True self.input_size=rnn_output_size(EXP_MODE) self.z_size=32 if EXP_MODE==MODE_Z_HIDDEN: self.hidden_size=40 self.weight_hidden=np.random.randn(self.input_size,self.hidden_size) self.bias_hidden=np.random.randn(self.hidden_size) self.weight_output=np.random.randn(self.hidden_size,3) self.bias_output=np.random.randn(3) self.param_count=((self.input_size+1)*self.hidden_size)+(self.hidden_size*3+3) else: self.weight=np.random.randn(self.input_size,3) self.bias=np.random.randn(3) self.param_count=(self.input_size)*3+3 self.render_mode=False def make_env(self,seed=-1,render_mode=False,full_episode=False): self.render_mode=render_mode self.env=make_env(self.env_name,seed=seed,render_mode=render_mode,full_episode=full_episode) def reset(self): self.state=rnn_init_state(self.rnn) def encode_obs(self,obs): result=np.copy(obs).astype(np.float)/255.0 result=result.reshape(1,64,64,3) mu,logvar=self.vae.encode_mu_logvar(result) mu=mu[0] logvar=logvar[0] s=logvar.shape z=mu+np.exp(logvar/2.0)*np.random.randn(*s) return z,mu,logvar def get_action(self,z): h=rnn_output(self.state,z,EXP_MODE) if EXP_MODE==MODE_Z_HIDDEN: h=np.tanh(np.dot(h,self.weight_hidden)+self.bias_hidden) action=np.tanh(np.dot(h,self.weight_output)+self.bias_output) else: action=np.tanh(np.dot(h,self.weight)+self.bias) action[1]=(action[1]+1.0)/2.0 action[2]=clip(action[2]) self.state=rnn_next_state(self.rnn,z,action,self.state) return action def set_model_params(self,model_params): if EXP_MODE==MODE_Z_HIDDEN: params=np.array(model_params) cut_off=(self.input_size+1)*self.hidden_size params_1=params[:cut_off] params_2=params[cut_off:] self.bias_hidden=params_1[:self.hidden_size] self.weight_hidden=params_1[self.hidden_size:].reshape(self.input_size,self.hidden_size) self.bias_output=params_2[:3] self.weight_output=params_2[3:].reshape(self.hidden_size,3) else: self.bias=np.array(model_params[:3]) self.weight=np.array(model_params[3:]).reshape(self.input_size,3) def load_model(self,filename): with open(filename)as f: data=json.load(f) print('loading file %s'%(filename)) self.data=data model_params=np.array(data[0]) self.set_model_params(model_params) def get_random_model_params(self,stdev=0.1): return np.random.standard_cauchy(self.param_count)*stdev def init_random_model_params(self,stdev=0.1): params=self.get_random_model_params(stdev=stdev) self.set_model_params(params) vae_params=self.vae.get_random_model_params(stdev=stdev) self.vae.set_model_params(vae_params) rnn_params=self.rnn.get_random_model_params(stdev=stdev) self.rnn.set_model_params(rnn_params)