def __init__(self, temperature=0.25): # constants self.TEMPERATURE = temperature self.DT = 0.2 # should be the same as data rnn was trained with initial_z_path = os.path.expanduser( "~/navrep/datasets/M/im/corridor_koze_kids_bag_mus_logvars_robotstates_actions_rewards_dones.npz" ) rnn_model_path = os.path.expanduser("~/navrep/models/M/imrnn.json") vae_model_path = os.path.expanduser("~/navrep/models/V/imvae.json") # V + M Models reset_graph() self.rnn = MDNRNN(sample_hps_params, gpu_mode=False) self.vae = ConvVAE(batch_size=1, is_training=False, channels=3) self.vae.load_json(vae_model_path) self.rnn.load_json(rnn_model_path) # load initial image encoding arrays = np.load(initial_z_path) initial_mu = arrays["mus"][0] initial_logvar = arrays["logvars"][0] self.initial_z = initial_mu + np.exp(initial_logvar / 2.0) * np.random.randn( *(initial_mu.shape) ) # other tools self.viewer = None # environment state variables self.reset() # hot-start the rnn state for i in range(20): self.step(np.array([0,0,0]), override_next_z=self.initial_z)
def __init__( self, temperature=0.25, initial_z_path=os.path. expanduser( "~/navrep/datasets/M/ian/000_mus_logvars_robotstates_actions_rewards_dones.npz" ), rnn_model_path=os.path.expanduser("~/navrep/models/M/rnn.json"), vae_model_path=os.path.expanduser("~/navrep/models/V/vae.json"), ): # constants self.TEMPERATURE = temperature self.DT = 0.5 # should be the same as data rnn was trained with # V + M Models reset_graph() self.rnn = MDNRNN(sample_hps_params, gpu_mode=False) self.vae = ConvVAE(batch_size=1, is_training=False) self.vae.load_json(vae_model_path) self.rnn.load_json(rnn_model_path) # load initial image encoding arrays = np.load(initial_z_path) initial_mu = arrays["mus"][0] initial_logvar = arrays["logvars"][0] initial_robotstate = arrays["robotstates"][0] ini_lidar_z = initial_mu + np.exp( initial_logvar / 2.0) * np.random.randn(*(initial_mu.shape)) ini_goal_z = initial_robotstate[:2] / MAX_GOAL_DIST self.initial_z = np.concatenate([ini_lidar_z, ini_goal_z], axis=-1) # other tools self.rings_def = generate_rings(64, 64) self.viewer = None # environment state variables self.reset() # hot-start the rnn state for i in range(20): self.step(np.array([0, 0, 0]), override_next_z=self.initial_z)
all_data.append([ arrays["mus"], arrays["logvars"], arrays["robotstates"], arrays["actions"], arrays["dones"], arrays["rewards"], ]) n_total_frames = np.sum([mu.shape[0] for mu, _, _, _, _, _ in all_data]) chunksize = hps.batch_size * hps.max_seq_len # frames per batch (100'000) print("total frames: ", n_total_frames) if n_total_frames < chunksize: raise ValueError() reset_graph() model = MDNRNN(hps) model.print_trainable_params() vae = None viewer = None values_logs = None start = time.time() for epoch in range(1, N_EPOCHS + 1): # print('preparing data for epoch', epoch) batches_start = time.time() # flatten all sequences into one mu_sequence = np.zeros((n_total_frames, _Z), dtype=np.float32) logvar_sequence = np.zeros((n_total_frames, _Z), dtype=np.float32) robotstate_sequence = np.zeros((n_total_frames, 5), dtype=np.float32) action_sequence = np.zeros((n_total_frames, 3), dtype=np.float32)
from navrep.scripts.train_rnn import _H elif backend == "VAE1D_LSTM": from navrep.scripts.train_vae1d import _Z from navrep.scripts.train_rnn import _H # load W / M model model = None if backend == "VAE_LSTM": vae_model_path = os.path.join(MODELDIR, "V", environment + "vae.json") reset_graph() vae = ConvVAE(z_size=_Z, batch_size=1, is_training=False) vae.load_json(vae_model_path) hps = default_hps() hps = hps._replace(seq_width=_Z + _G, action_width=_A, rnn_size=_H) rnn = MDNRNN(hps, gpu_mode=gpu) rnn.load_json(path) elif backend == "VAE1D_LSTM": vae_model_path = os.path.join(MODELDIR, "V", environment + "vae1d.json") reset_graph() reset_graph() vae = Conv1DVAE(z_size=_Z, batch_size=1, is_training=False) vae.load_json(vae_model_path) hps = default_hps() hps = hps._replace(seq_width=_Z + _G, action_width=_A, rnn_size=_H) rnn = MDNRNN(hps, gpu_mode=gpu) rnn.load_json(path) elif backend == "GPT": mconf = GPTConfig(BLOCK_SIZE, _H) model = GPT(mconf, gpu=gpu)
arrays = np.load(path) all_data.append( [ arrays["mus"], arrays["logvars"], arrays["actions"], arrays["dones"], arrays["rewards"], ] ) n_total_frames = np.sum([mu.shape[0] for mu, _, _, _, _ in all_data]) print("total frames: ", n_total_frames) reset_graph() model = MDNRNN(hps) viewer = None values_logs = None for epoch in range(1, N_EPOCHS + 1): # print('preparing data for epoch', epoch) start = time.time() # flatten all sequences into one mu_sequence = np.zeros((n_total_frames, _Z), dtype=np.float32) logvar_sequence = np.zeros((n_total_frames, _Z), dtype=np.float32) action_sequence = np.zeros((n_total_frames, 3), dtype=np.float32) done_sequence = np.zeros((n_total_frames, 1), dtype=np.float32) reward_sequence = np.zeros((n_total_frames, 1), dtype=np.float32) i = 0 random.shuffle(all_data)
from navrep.models.rnn import reset_graph, sample_hps_params, MDNRNN, get_pi_idx from navrep.models.vae2d import ConvVAE # parameters TEMPERATURE = 0.5 _Z = 32 sequence_z_path = os.path.expanduser( "~/navrep/datasets/M/im/corridor_koze_kids_bag_mus_logvars_actions_rewards_dones.npz" ) rnn_model_path = os.path.expanduser("~/navrep/models/M/imrnn.json") vae_model_path = os.path.expanduser("~/navrep/models/V/imvae.json") reset_graph() imrnn = MDNRNN(sample_hps_params, gpu_mode=False) imvae = ConvVAE(batch_size=1, is_training=False, channels=3) imvae.load_json(vae_model_path) imrnn.load_json(rnn_model_path) # load sequence image encoding arrays = np.load(sequence_z_path) sequence_action = arrays["actions"] sequence_mu = arrays["mus"] sequence_logvar = arrays["logvars"] sequence_z = sequence_mu + np.exp( sequence_logvar / 2.0) * np.random.randn(*(sequence_mu.shape)) SEQUENCE_LENGTH = len(sequence_mu) prev_z = sequence_z[0]
class DreamEnv(object): def __init__( self, temperature=0.25, initial_z_path=os.path. expanduser( "~/navrep/datasets/M/ian/000_mus_logvars_robotstates_actions_rewards_dones.npz" ), rnn_model_path=os.path.expanduser("~/navrep/models/M/rnn.json"), vae_model_path=os.path.expanduser("~/navrep/models/V/vae.json"), ): # constants self.TEMPERATURE = temperature self.DT = 0.5 # should be the same as data rnn was trained with # V + M Models reset_graph() self.rnn = MDNRNN(sample_hps_params, gpu_mode=False) self.vae = ConvVAE(batch_size=1, is_training=False) self.vae.load_json(vae_model_path) self.rnn.load_json(rnn_model_path) # load initial image encoding arrays = np.load(initial_z_path) initial_mu = arrays["mus"][0] initial_logvar = arrays["logvars"][0] initial_robotstate = arrays["robotstates"][0] ini_lidar_z = initial_mu + np.exp( initial_logvar / 2.0) * np.random.randn(*(initial_mu.shape)) ini_goal_z = initial_robotstate[:2] / MAX_GOAL_DIST self.initial_z = np.concatenate([ini_lidar_z, ini_goal_z], axis=-1) # other tools self.rings_def = generate_rings(64, 64) self.viewer = None # environment state variables self.reset() # hot-start the rnn state for i in range(20): self.step(np.array([0, 0, 0]), override_next_z=self.initial_z) def step(self, action, override_next_z=None): feed = { self.rnn.input_z: np.reshape(self.prev_z, (1, 1, _Z + _G)), self.rnn.input_action: np.reshape(action, (1, 1, 3)), self.rnn.input_restart: np.reshape(self.prev_restart, (1, 1)), self.rnn.initial_state: self.rnn_state, } [logmix, mean, logstd, logrestart, next_state] = self.rnn.sess.run( [ self.rnn.out_logmix, self.rnn.out_mean, self.rnn.out_logstd, self.rnn.out_restart_logits, self.rnn.final_state, ], feed, ) OUTWIDTH = _Z + _G if self.TEMPERATURE == 0: # deterministically pick max of MDN distribution mixture_idx = np.argmax(logmix, axis=-1) chosen_mean = mean[(range(OUTWIDTH), mixture_idx)] chosen_logstd = logstd[(range(OUTWIDTH), mixture_idx)] next_z = chosen_mean else: # sample from modelled MDN distribution mixprob = np.copy(logmix) / self.TEMPERATURE # adjust temperatures mixprob -= mixprob.max() mixprob = np.exp(mixprob) mixprob /= mixprob.sum(axis=1).reshape(OUTWIDTH, 1) mixture_idx = np.zeros(OUTWIDTH) chosen_mean = np.zeros(OUTWIDTH) chosen_logstd = np.zeros(OUTWIDTH) for j in range(OUTWIDTH): idx = get_pi_idx(np.random.rand(), mixprob[j]) mixture_idx[j] = idx chosen_mean[j] = mean[j][idx] chosen_logstd[j] = logstd[j][idx] rand_gaussian = np.random.randn(OUTWIDTH) * np.sqrt( self.TEMPERATURE) next_z = chosen_mean + np.exp(chosen_logstd) * rand_gaussian if sample_hps_params.differential_z: next_z = self.prev_z + next_z next_restart = 0 # if logrestart[0] > 0: # next_restart = 1 self.prev_z = next_z if override_next_z is not None: self.prev_z = override_next_z self.prev_restart = next_restart self.rnn_state = next_state # logging-only vars, used for rendering self.prev_action = action self.episode_step += 1 return next_z, None, next_restart, {} def reset(self): self.prev_z = self.initial_z self.prev_restart = np.array([1]) self.rnn_state = self.rnn.sess.run(self.rnn.zero_state) # logging vars self.prev_action = np.array([0.0, 0.0, 0.0]) self.episode_step = 0 def render(self, mode="human", close=False): if close: if self.viewer is not None: self.viewer.close() return # get last z decoding rings_pred = ( self.vae.decode(self.prev_z.reshape(1, _Z + _G)[:, :_Z]) * self.rings_def["rings_to_bool"]) predicted_ranges = self.rings_def["rings_to_lidar"](rings_pred, 1080) goal_pred = self.prev_z.reshape((_Z + _G, ))[_Z:] * MAX_GOAL_DIST if mode == "rgb_array": raise NotImplementedError elif mode == "human": # Window and viewport size WINDOW_W = 256 WINDOW_H = 256 M_PER_PX = 25.6 / WINDOW_H VP_W = WINDOW_W VP_H = WINDOW_H from gym.envs.classic_control import rendering import pyglet from pyglet import gl # Create viewer if self.viewer is None: self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H) self.score_label = pyglet.text.Label( "0000", font_size=12, x=20, y=WINDOW_H * 2.5 / 40.00, anchor_x="left", anchor_y="center", color=(255, 255, 255, 255), ) # self.transform = rendering.Transform() self.currently_rendering_iteration = 0 self.image_lock = threading.Lock() # Render in pyglet def make_circle(c, r, res=10): thetas = np.linspace(0, 2 * np.pi, res + 1)[:-1] verts = np.zeros((res, 2)) verts[:, 0] = c[0] + r * np.cos(thetas) verts[:, 1] = c[1] + r * np.sin(thetas) return verts with self.image_lock: self.currently_rendering_iteration += 1 self.viewer.draw_circle(r=10, color=(0.3, 0.3, 0.3)) win = self.viewer.window win.switch_to() win.dispatch_events() win.clear() gl.glViewport(0, 0, VP_W, VP_H) # colors bgcolor = np.array([0.4, 0.8, 0.4]) nosecolor = np.array([0.3, 0.3, 0.3]) lidarcolor = np.array([1.0, 0.0, 0.0]) # Green background gl.glBegin(gl.GL_QUADS) gl.glColor4f(bgcolor[0], bgcolor[1], bgcolor[2], 1.0) gl.glVertex3f(0, VP_H, 0) gl.glVertex3f(VP_W, VP_H, 0) gl.glVertex3f(VP_W, 0, 0) gl.glVertex3f(0, 0, 0) gl.glEnd() # LIDAR i = WINDOW_W / 2.0 j = WINDOW_H / 2.0 angle = np.pi / 2.0 scan = np.squeeze(predicted_ranges) lidar_angles = np.linspace(0, 2 * np.pi, len(scan) + 1)[:-1] lidar_angles = lidar_angles + np.pi / 2. # make robot face up i_ray_ends = i + scan / M_PER_PX * np.cos(lidar_angles) j_ray_ends = j + scan / M_PER_PX * np.sin(lidar_angles) is_in_fov = np.cos(lidar_angles - angle) >= 0.78 for ray_idx in range(len(scan)): end_i = i_ray_ends[ray_idx] end_j = j_ray_ends[ray_idx] gl.glBegin(gl.GL_LINE_LOOP) if is_in_fov[ray_idx]: gl.glColor4f(1.0, 1.0, 0.0, 0.1) else: gl.glColor4f(lidarcolor[0], lidarcolor[1], lidarcolor[2], 0.1) gl.glVertex3f(i, j, 0) gl.glVertex3f(end_i, end_j, 0) gl.glEnd() # Agent body i = WINDOW_W / 2.0 j = WINDOW_H / 2.0 r = 0.3 / M_PER_PX angle = np.pi / 2.0 poly = make_circle((i, j), r) gl.glBegin(gl.GL_POLYGON) color = np.array([1.0, 1.0, 1.0]) gl.glColor4f(color[0], color[1], color[2], 1) for vert in poly: gl.glVertex3f(vert[0], vert[1], 0) gl.glEnd() # Direction triangle inose = i + r * np.cos(angle) jnose = j + r * np.sin(angle) iright = i + 0.3 * r * -np.sin(angle) jright = j + 0.3 * r * np.cos(angle) ileft = i - 0.3 * r * -np.sin(angle) jleft = j - 0.3 * r * np.cos(angle) gl.glBegin(gl.GL_TRIANGLES) gl.glColor4f(nosecolor[0], nosecolor[1], nosecolor[2], 1) gl.glVertex3f(inose, jnose, 0) gl.glVertex3f(iright, jright, 0) gl.glVertex3f(ileft, jleft, 0) gl.glEnd() # Goal goalcolor = np.array([1., 1., 0.3]) px_goal = goal_pred / M_PER_PX igoal = i - px_goal[1] # rotate 90deg to face up jgoal = j + px_goal[0] # Goal line gl.glBegin(gl.GL_LINE_LOOP) gl.glColor4f(goalcolor[0], goalcolor[1], goalcolor[2], 1) gl.glVertex3f(i, j, 0) gl.glVertex3f(igoal, jgoal, 0) gl.glEnd() # Goal markers gl.glBegin(gl.GL_TRIANGLES) gl.glColor4f(goalcolor[0], goalcolor[1], goalcolor[2], 1) triangle = make_circle((igoal, jgoal), r / 3., res=3) for vert in triangle: gl.glVertex3f(vert[0], vert[1], 0) gl.glEnd() # Text self.score_label.text = "A {:.1f} {:.1f} {:.1f} S {}".format( self.prev_action[0], self.prev_action[1], self.prev_action[2], self.episode_step, ) self.score_label.draw() win.flip() return self.viewer.isopen def close(self): self.render(close=True) def _get_dt(self): return self.DT def _get_viewer(self): return self.viewer
class ImDreamEnv(object): def __init__(self, temperature=0.25): # constants self.TEMPERATURE = temperature self.DT = 0.2 # should be the same as data rnn was trained with initial_z_path = os.path.expanduser( "~/navrep/datasets/M/im/corridor_koze_kids_bag_mus_logvars_robotstates_actions_rewards_dones.npz" ) rnn_model_path = os.path.expanduser("~/navrep/models/M/imrnn.json") vae_model_path = os.path.expanduser("~/navrep/models/V/imvae.json") # V + M Models reset_graph() self.rnn = MDNRNN(sample_hps_params, gpu_mode=False) self.vae = ConvVAE(batch_size=1, is_training=False, channels=3) self.vae.load_json(vae_model_path) self.rnn.load_json(rnn_model_path) # load initial image encoding arrays = np.load(initial_z_path) initial_mu = arrays["mus"][0] initial_logvar = arrays["logvars"][0] self.initial_z = initial_mu + np.exp(initial_logvar / 2.0) * np.random.randn( *(initial_mu.shape) ) # other tools self.viewer = None # environment state variables self.reset() # hot-start the rnn state for i in range(20): self.step(np.array([0,0,0]), override_next_z=self.initial_z) def step(self, action, override_next_z=None): feed = { self.rnn.input_z: np.reshape(self.prev_z, (1, 1, _Z)), self.rnn.input_action: np.reshape(action, (1, 1, 3)), self.rnn.input_restart: np.reshape(self.prev_restart, (1, 1)), self.rnn.initial_state: self.rnn_state, } [logmix, mean, logstd, logrestart, next_state] = self.rnn.sess.run( [ self.rnn.out_logmix, self.rnn.out_mean, self.rnn.out_logstd, self.rnn.out_restart_logits, self.rnn.final_state, ], feed, ) OUTWIDTH = _Z if self.TEMPERATURE == 0: # deterministically pick max of MDN distribution mixture_idx = np.argmax(logmix, axis=-1) chosen_mean = mean[(range(OUTWIDTH), mixture_idx)] chosen_logstd = logstd[(range(OUTWIDTH), mixture_idx)] next_z = chosen_mean else: # sample from modelled MDN distribution mixprob = np.copy(logmix) / self.TEMPERATURE # adjust temperatures mixprob -= mixprob.max() mixprob = np.exp(mixprob) mixprob /= mixprob.sum(axis=1).reshape(OUTWIDTH, 1) mixture_idx = np.zeros(OUTWIDTH) chosen_mean = np.zeros(OUTWIDTH) chosen_logstd = np.zeros(OUTWIDTH) for j in range(OUTWIDTH): idx = get_pi_idx(np.random.rand(), mixprob[j]) mixture_idx[j] = idx chosen_mean[j] = mean[j][idx] chosen_logstd[j] = logstd[j][idx] rand_gaussian = np.random.randn(OUTWIDTH) * np.sqrt(self.TEMPERATURE) next_z = chosen_mean + np.exp(chosen_logstd) * rand_gaussian if sample_hps_params.differential_z: next_z = self.prev_z + next_z next_restart = 0 # if logrestart[0] > 0: # next_restart = 1 self.prev_z = next_z if override_next_z is not None: self.prev_z = override_next_z self.prev_restart = next_restart self.rnn_state = next_state # logging-only vars, used for rendering self.prev_action = action self.episode_step += 1 return next_z, None, next_restart, {} def reset(self): self.prev_z = self.initial_z self.prev_restart = np.array([1]) self.rnn_state = self.rnn.sess.run(self.rnn.zero_state) # logging vars self.prev_action = np.array([0.0, 0.0, 0.0]) self.episode_step = 0 def render(self, mode="human", close=False): img_pred = (self.vae.decode(self.prev_z.reshape(1, _Z)) * 255).astype(np.uint8) img_pred = img_pred.reshape(_64, _64, 3) if mode == "rgb_array": raise NotImplementedError elif mode == "human": # Window and viewport size WINDOW_W = 256 WINDOW_H = 256 VP_W = WINDOW_W VP_H = WINDOW_H from gym.envs.classic_control import rendering import pyglet from pyglet import gl # Create pyglet image # pixels = [ # 255, 0, 0, 0, 255, 0, 0, 0, 255, # RGB values range from # 255, 0, 0, 255, 0, 0, 255, 0, 0, # 0 to 255 for each color # 255, 0, 0, 255, 0, 0, 255, 0, 0, # component. # ] from pyglet.gl.gl import GLubyte pixels = img_pred.flatten() rawData = (GLubyte * len(pixels))(*pixels) image_data = pyglet.image.ImageData(_64, _64, 'RGB', rawData) # Create viewer if self.viewer is None: self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H) self.score_label = pyglet.text.Label( "0000", font_size=12, x=20, y=WINDOW_H * 2.5 / 40.00, anchor_x="left", anchor_y="center", color=(255, 255, 255, 255), ) # self.transform = rendering.Transform() self.currently_rendering_iteration = 0 self.image_lock = threading.Lock() # Render in pyglet with self.image_lock: self.currently_rendering_iteration += 1 self.viewer.draw_circle(r=10, color=(0.3, 0.3, 0.3)) win = self.viewer.window win.switch_to() win.dispatch_events() win.clear() gl.glViewport(0, 0, VP_W, VP_H) # Image image_data.blit(96,96) # Text self.score_label.text = "A {:.1f} {:.1f} {:.1f} S {}".format( self.prev_action[0], self.prev_action[1], self.prev_action[2], self.episode_step, ) self.score_label.draw() win.flip() return self.viewer.isopen
all_data = [] for path in files: arrays = np.load(path) all_data.append([ arrays["mus"], arrays["logvars"], arrays["robotstates"], arrays["actions"], arrays["dones"], arrays["rewards"], ]) n_total_frames = np.sum([mu.shape[0] for mu, _, _, _, _, _ in all_data]) print("total frames: ", n_total_frames) reset_graph() model = MDNRNN(hps) model.load_json(model_path) for epoch in range(1): epoch_z_costs = [] epoch_wrongaction_z_costs = [] # print('preparing data for epoch', epoch) start = time.time() # flatten all sequences into one mu_sequence = np.zeros((n_total_frames, _Z), dtype=np.float32) logvar_sequence = np.zeros((n_total_frames, _Z), dtype=np.float32) robotstate_sequence = np.zeros((n_total_frames, 5), dtype=np.float32) action_sequence = np.zeros((n_total_frames, 3), dtype=np.float32) done_sequence = np.zeros((n_total_frames, 1), dtype=np.float32) reward_sequence = np.zeros((n_total_frames, 1), dtype=np.float32) i = 0
def __init__(self, backend, encoding, rnn_model_path=os.path.expanduser("~/navrep/models/M/rnn.json"), rnn1d_model_path=os.path.expanduser("~/navrep/models/M/rnn1d.json"), vae_model_path=os.path.expanduser("~/navrep/models/V/vae.json"), vae1d_model_path=os.path.expanduser("~/navrep/models/V/vae1d.json"), gpt_model_path=os.path.expanduser("~/navrep/models/W/gpt"), gpt1d_model_path=os.path.expanduser("~/navrep/models/W/gpt1d"), vae1dlstm_model_path=os.path.expanduser("~/navrep/models/W/vae1dlstm"), vaelstm_model_path=os.path.expanduser("~/navrep/models/W/vaelstm"), gpu=False, encoder_to_share_model_with=None, # another EnvEncoder ): LIDAR_NORM_FACTOR = None if backend == "GPT": from navrep.scripts.train_gpt import _Z, _H elif backend == "GPT1D": from navrep.scripts.train_gpt1d import _Z, _H from navrep.tools.wdataset import LIDAR_NORM_FACTOR elif backend == "VAE1DLSTM": from navrep.scripts.train_vae1dlstm import _Z, _H from navrep.tools.wdataset import LIDAR_NORM_FACTOR elif backend == "VAELSTM": from navrep.scripts.train_vaelstm import _Z, _H elif backend == "VAE_LSTM": from navrep.scripts.train_vae import _Z from navrep.scripts.train_rnn import _H elif backend == "VAE1D_LSTM": from navrep.scripts.train_vae1d import _Z from navrep.scripts.train_rnn import _H from navrep.scripts.train_vae1d import MAX_LIDAR_DIST as LIDAR_NORM_FACTOR self._Z = _Z self._H = _H self.LIDAR_NORM_FACTOR = LIDAR_NORM_FACTOR self.encoding = encoding self.backend = backend if self.encoding == "V_ONLY": self.encoding_dim = _Z + _RS elif self.encoding == "VM": self.encoding_dim = _Z + _H + _RS elif self.encoding == "M_ONLY": self.encoding_dim = _H + _RS else: raise NotImplementedError self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(self.encoding_dim,), dtype=np.float32) # V + M Models if encoder_to_share_model_with is not None: self.vae = encoder_to_share_model_with.vae self.rnn = encoder_to_share_model_with.rnn else: # load world model if self.backend == "VAE_LSTM": reset_graph() self.vae = ConvVAE(z_size=_Z, batch_size=1, is_training=False) self.vae.load_json(vae_model_path) if self.encoding in ["VM", "M_ONLY"]: hps = sample_hps_params. _replace(seq_width=_Z+_G, action_width=_A, rnn_size=_H) self.rnn = MDNRNN(hps, gpu_mode=gpu) self.rnn.load_json(rnn_model_path) elif self.backend == "VAE1D_LSTM": reset_graph() self.vae = Conv1DVAE(z_size=_Z, batch_size=1, is_training=False) self.vae.load_json(vae1d_model_path) if self.encoding in ["VM", "M_ONLY"]: hps = sample_hps_params. _replace(seq_width=_Z+_G, action_width=_A, rnn_size=_H) self.rnn = MDNRNN(hps, gpu_mode=gpu) self.rnn.load_json(rnn1d_model_path) elif self.backend == "GPT": mconf = GPTConfig(BLOCK_SIZE, _H) model = GPT(mconf, gpu=gpu) load_checkpoint(model, gpt_model_path, gpu=gpu) self.vae = model self.rnn = model elif self.backend == "GPT1D": mconf = GPTConfig(BLOCK_SIZE, _H) model = GPT1D(mconf, gpu=gpu) load_checkpoint(model, gpt1d_model_path, gpu=gpu) self.vae = model self.rnn = model elif self.backend == "VAELSTM": mconf = VAELSTMConfig(_Z, _H) model = VAELSTM(mconf, gpu=gpu) load_checkpoint(model, vaelstm_model_path, gpu=gpu) self.vae = model self.rnn = model elif self.backend == "VAE1DLSTM": mconf = VAE1DLSTMConfig(_Z, _H) model = VAE1DLSTM(mconf, gpu=gpu) load_checkpoint(model, vae1dlstm_model_path, gpu=gpu) self.vae = model self.rnn = model else: raise NotImplementedError # other tools self.rings_def = generate_rings(_64, _64) self.viewer = None # environment state variables self.reset()
class EnvEncoder(object): """ Generic class to encode the observations of an environment, look at EncodedEnv to see how it is typically used """ def __init__(self, backend, encoding, rnn_model_path=os.path.expanduser("~/navrep/models/M/rnn.json"), rnn1d_model_path=os.path.expanduser("~/navrep/models/M/rnn1d.json"), vae_model_path=os.path.expanduser("~/navrep/models/V/vae.json"), vae1d_model_path=os.path.expanduser("~/navrep/models/V/vae1d.json"), gpt_model_path=os.path.expanduser("~/navrep/models/W/gpt"), gpt1d_model_path=os.path.expanduser("~/navrep/models/W/gpt1d"), vae1dlstm_model_path=os.path.expanduser("~/navrep/models/W/vae1dlstm"), vaelstm_model_path=os.path.expanduser("~/navrep/models/W/vaelstm"), gpu=False, encoder_to_share_model_with=None, # another EnvEncoder ): LIDAR_NORM_FACTOR = None if backend == "GPT": from navrep.scripts.train_gpt import _Z, _H elif backend == "GPT1D": from navrep.scripts.train_gpt1d import _Z, _H from navrep.tools.wdataset import LIDAR_NORM_FACTOR elif backend == "VAE1DLSTM": from navrep.scripts.train_vae1dlstm import _Z, _H from navrep.tools.wdataset import LIDAR_NORM_FACTOR elif backend == "VAELSTM": from navrep.scripts.train_vaelstm import _Z, _H elif backend == "VAE_LSTM": from navrep.scripts.train_vae import _Z from navrep.scripts.train_rnn import _H elif backend == "VAE1D_LSTM": from navrep.scripts.train_vae1d import _Z from navrep.scripts.train_rnn import _H from navrep.scripts.train_vae1d import MAX_LIDAR_DIST as LIDAR_NORM_FACTOR self._Z = _Z self._H = _H self.LIDAR_NORM_FACTOR = LIDAR_NORM_FACTOR self.encoding = encoding self.backend = backend if self.encoding == "V_ONLY": self.encoding_dim = _Z + _RS elif self.encoding == "VM": self.encoding_dim = _Z + _H + _RS elif self.encoding == "M_ONLY": self.encoding_dim = _H + _RS else: raise NotImplementedError self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(self.encoding_dim,), dtype=np.float32) # V + M Models if encoder_to_share_model_with is not None: self.vae = encoder_to_share_model_with.vae self.rnn = encoder_to_share_model_with.rnn else: # load world model if self.backend == "VAE_LSTM": reset_graph() self.vae = ConvVAE(z_size=_Z, batch_size=1, is_training=False) self.vae.load_json(vae_model_path) if self.encoding in ["VM", "M_ONLY"]: hps = sample_hps_params. _replace(seq_width=_Z+_G, action_width=_A, rnn_size=_H) self.rnn = MDNRNN(hps, gpu_mode=gpu) self.rnn.load_json(rnn_model_path) elif self.backend == "VAE1D_LSTM": reset_graph() self.vae = Conv1DVAE(z_size=_Z, batch_size=1, is_training=False) self.vae.load_json(vae1d_model_path) if self.encoding in ["VM", "M_ONLY"]: hps = sample_hps_params. _replace(seq_width=_Z+_G, action_width=_A, rnn_size=_H) self.rnn = MDNRNN(hps, gpu_mode=gpu) self.rnn.load_json(rnn1d_model_path) elif self.backend == "GPT": mconf = GPTConfig(BLOCK_SIZE, _H) model = GPT(mconf, gpu=gpu) load_checkpoint(model, gpt_model_path, gpu=gpu) self.vae = model self.rnn = model elif self.backend == "GPT1D": mconf = GPTConfig(BLOCK_SIZE, _H) model = GPT1D(mconf, gpu=gpu) load_checkpoint(model, gpt1d_model_path, gpu=gpu) self.vae = model self.rnn = model elif self.backend == "VAELSTM": mconf = VAELSTMConfig(_Z, _H) model = VAELSTM(mconf, gpu=gpu) load_checkpoint(model, vaelstm_model_path, gpu=gpu) self.vae = model self.rnn = model elif self.backend == "VAE1DLSTM": mconf = VAE1DLSTMConfig(_Z, _H) model = VAE1DLSTM(mconf, gpu=gpu) load_checkpoint(model, vae1dlstm_model_path, gpu=gpu) self.vae = model self.rnn = model else: raise NotImplementedError # other tools self.rings_def = generate_rings(_64, _64) self.viewer = None # environment state variables self.reset() def reset(self): if self.encoding in ["VM", "M_ONLY"]: if self.backend in ["VAE_LSTM", "VAE1D_LSTM"]: self.state = rnn_init_state(self.rnn) elif self.backend in ["GPT", "VAELSTM", "VAE1DLSTM", "GPT1D"]: self.gpt_sequence = [] self.lidar_z = np.zeros(self._Z) def close(self): if self.viewer is not None: self.viewer.close() def _get_last_decoded_scan(self): obs_pred = self.vae.decode(self.lidar_z.reshape((1,self._Z))) if self.backend in ["VAE1DLSTM", "GPT1D", "VAE1D_LSTM"]: decoded_scan = (obs_pred * self.LIDAR_NORM_FACTOR).reshape((_L)) else: rings_pred = obs_pred * self.rings_def["rings_to_bool"] decoded_scan = self.rings_def["rings_to_lidar"](rings_pred, _L).reshape((_L)) return decoded_scan def _encode_obs(self, obs, action): """ obs is (lidar, other_obs) where lidar is (time_samples, ray, channel) and other_obs is (5,) - [goal_x, goal_y, vel_x, vel_y, vel_theta] all in robot frame h is (32+2+512), i.e. concat[lidar_z, robotstate, h rnn state] lidar_z is -inf, inf h rnn state is ? other_obs is -inf, inf """ # convert lidar scan to obs lidar_scan = obs[0] # latest scan only obs (buffer, ray, channel) lidar_scan = lidar_scan.reshape(1, _L).astype(np.float32) lidar_mode = "scans" if "1D" in self.backend else "rings" lidar_obs = scans_to_lidar_obs(lidar_scan, lidar_mode, self.rings_def, channel_first=False) self.last_lidar_obs = lidar_obs # for rendering purposes # obs to z, mu, logvar mu, logvar = self.vae.encode_mu_logvar(lidar_obs) mu = mu[0] logvar = logvar[0] s = logvar.shape if NO_VAE_VAR: lidar_z = mu * 1. else: lidar_z = mu + np.exp(logvar / 2.0) * np.random.randn(*s) # encode obs through V + M self.lidar_z = lidar_z if self.encoding == "V_ONLY": encoded_obs = np.concatenate([self.lidar_z, obs[1]], axis=0) elif self.encoding in ["VM", "M_ONLY"]: # get h if self.backend in ["VAE_LSTM", "VAE1D_LSTM"]: goal_z = obs[1][:2] / MAX_GOAL_DIST rnn_z = np.concatenate([lidar_z, goal_z], axis=-1) self.state = rnn_next_state(self.rnn, rnn_z, action, self.state) h = self.state.h[0] elif self.backend in ["GPT", "VAELSTM", "VAE1DLSTM", "GPT1D"]: self.gpt_sequence.append(dict(obs=lidar_obs[0], state=obs[1][:2], action=action)) self.gpt_sequence = self.gpt_sequence[:BLOCK_SIZE] h = self.rnn.get_h(self.gpt_sequence) # encoded obs if self.encoding == "VM": encoded_obs = np.concatenate([self.lidar_z, obs[1], h], axis=0) elif self.encoding == "M_ONLY": encoded_obs = np.concatenate([obs[1], h], axis=0) return encoded_obs def _render_rings_polar(self, close, save_to_file=False): if close: self.viewer.close() return # rendering if self.backend in ["VAE1DLSTM", "GPT1D", "VAE1D_LSTM"]: return False else: last_rings_obs = self.last_lidar_obs.reshape((_64, _64, 1)) last_rings_pred = self.vae.decode(self.lidar_z.reshape((1,self._Z))).reshape((_64, _64, 1)) import matplotlib.pyplot as plt plt.ion() fig, (ax1, ax2) = plt.subplots( 1, 2, subplot_kw=dict(projection="polar"), num="rings" ) ax1.clear() ax2.clear() if self.viewer is None: self.rendering_iteration = 0 self.viewer = fig self.rings_def["visualize_rings"](last_rings_obs, scan=None, fig=fig, ax=ax1) self.rings_def["visualize_rings"](last_rings_pred, scan=None, fig=fig, ax=ax2) ax1.set_ylim([0, 10]) ax1.set_title("ground truth") ax2.set_ylim([0, 10]) ax2.set_title("lidar reconstruction") # rings box viz fig2, (ax1, ax2) = plt.subplots(1, 2, num="2d") ax1.clear() ax2.clear() ax1.imshow(np.squeeze(last_rings_obs), cmap=plt.cm.Greys) ax2.imshow(np.squeeze(last_rings_pred), cmap=plt.cm.Greys) ax1.set_title("ground truth") ax2.set_title("lidar reconstruction") # update plt.pause(0.01) self.rendering_iteration += 1 if save_to_file: fig.savefig( "/tmp/encodedenv_polar{:04d}.png".format(self.rendering_iteration)) fig2.savefig( "/tmp/encodedenv_box{:04d}.png".format(self.rendering_iteration)) def _render_rings(self, close, save_to_file=False): if close: self.viewer.close() return # rendering if self.backend in ["VAE1DLSTM", "GPT1D", "VAE1D_LSTM"]: return False else: last_rings_obs = self.last_lidar_obs.reshape((_64, _64)) last_rings_pred = self.vae.decode(self.lidar_z.reshape((1,self._Z))).reshape((_64, _64)) # Window and viewport size ring_size = _64 # grid cells padding = 4 # grid cells grid_size = 1 # px per grid cell WINDOW_W = (2 * ring_size + 3 * padding) * grid_size WINDOW_H = (1 * ring_size + 2 * padding) * grid_size VP_W = WINDOW_W VP_H = WINDOW_H from gym.envs.classic_control import rendering import pyglet from pyglet import gl # Create viewer if self.viewer is None: self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H) self.rendering_iteration = 0 # Render in pyglet win = self.viewer.window win.switch_to() win.dispatch_events() win.clear() gl.glViewport(0, 0, VP_W, VP_H) # colors bgcolor = np.array([0.4, 0.8, 0.4]) # Green background gl.glBegin(gl.GL_QUADS) gl.glColor4f(bgcolor[0], bgcolor[1], bgcolor[2], 1.0) gl.glVertex3f(0, VP_H, 0) gl.glVertex3f(VP_W, VP_H, 0) gl.glVertex3f(VP_W, 0, 0) gl.glVertex3f(0, 0, 0) gl.glEnd() # rings - observation w_offset = 0 for rings in [last_rings_obs, last_rings_pred]: for i in range(ring_size): for j in range(ring_size): cell_color = 1 - rings[i, j] cell_y = (padding + i) * grid_size # px cell_x = (padding + j + w_offset) * grid_size # px gl.glBegin(gl.GL_QUADS) gl.glColor4f(cell_color, cell_color, cell_color, 1.0) gl.glVertex3f(cell_x+ 0, cell_y+grid_size, 0) # noqa gl.glVertex3f(cell_x+grid_size, cell_y+grid_size, 0) # noqa gl.glVertex3f(cell_x+grid_size, cell_y+ 0, 0) # noqa gl.glVertex3f(cell_x+ 0, cell_y+ 0, 0) # noqa gl.glEnd() w_offset += ring_size + padding if save_to_file: pyglet.image.get_buffer_manager().get_color_buffer().save( "/tmp/encodeder_rings{:04d}.png".format(self.rendering_iteration)) # actualize win.flip() self.rendering_iteration += 1 return self.viewer.isopen