Пример #1
0
 def __init__(self, temperature=0.25):
     # constants
     self.TEMPERATURE = temperature
     self.DT = 0.2  # should be the same as data rnn was trained with
     initial_z_path = os.path.expanduser(
         "~/navrep/datasets/M/im/corridor_koze_kids_bag_mus_logvars_robotstates_actions_rewards_dones.npz"
     )
     rnn_model_path = os.path.expanduser("~/navrep/models/M/imrnn.json")
     vae_model_path = os.path.expanduser("~/navrep/models/V/imvae.json")
     # V + M Models
     reset_graph()
     self.rnn = MDNRNN(sample_hps_params, gpu_mode=False)
     self.vae = ConvVAE(batch_size=1, is_training=False, channels=3)
     self.vae.load_json(vae_model_path)
     self.rnn.load_json(rnn_model_path)
     # load initial image encoding
     arrays = np.load(initial_z_path)
     initial_mu = arrays["mus"][0]
     initial_logvar = arrays["logvars"][0]
     self.initial_z = initial_mu + np.exp(initial_logvar / 2.0) * np.random.randn(
         *(initial_mu.shape)
     )
     # other tools
     self.viewer = None
     # environment state variables
     self.reset()
     # hot-start the rnn state
     for i in range(20):
         self.step(np.array([0,0,0]), override_next_z=self.initial_z)
Пример #2
0
 def __init__(
         self,
         temperature=0.25,
         initial_z_path=os.path.
     expanduser(
         "~/navrep/datasets/M/ian/000_mus_logvars_robotstates_actions_rewards_dones.npz"
     ),
         rnn_model_path=os.path.expanduser("~/navrep/models/M/rnn.json"),
         vae_model_path=os.path.expanduser("~/navrep/models/V/vae.json"),
 ):
     # constants
     self.TEMPERATURE = temperature
     self.DT = 0.5  # should be the same as data rnn was trained with
     # V + M Models
     reset_graph()
     self.rnn = MDNRNN(sample_hps_params, gpu_mode=False)
     self.vae = ConvVAE(batch_size=1, is_training=False)
     self.vae.load_json(vae_model_path)
     self.rnn.load_json(rnn_model_path)
     # load initial image encoding
     arrays = np.load(initial_z_path)
     initial_mu = arrays["mus"][0]
     initial_logvar = arrays["logvars"][0]
     initial_robotstate = arrays["robotstates"][0]
     ini_lidar_z = initial_mu + np.exp(
         initial_logvar / 2.0) * np.random.randn(*(initial_mu.shape))
     ini_goal_z = initial_robotstate[:2] / MAX_GOAL_DIST
     self.initial_z = np.concatenate([ini_lidar_z, ini_goal_z], axis=-1)
     # other tools
     self.rings_def = generate_rings(64, 64)
     self.viewer = None
     # environment state variables
     self.reset()
     # hot-start the rnn state
     for i in range(20):
         self.step(np.array([0, 0, 0]), override_next_z=self.initial_z)
Пример #3
0
        all_data.append([
            arrays["mus"],
            arrays["logvars"],
            arrays["robotstates"],
            arrays["actions"],
            arrays["dones"],
            arrays["rewards"],
        ])
    n_total_frames = np.sum([mu.shape[0] for mu, _, _, _, _, _ in all_data])
    chunksize = hps.batch_size * hps.max_seq_len  # frames per batch (100'000)
    print("total frames: ", n_total_frames)
    if n_total_frames < chunksize:
        raise ValueError()

    reset_graph()
    model = MDNRNN(hps)
    model.print_trainable_params()
    vae = None

    viewer = None
    values_logs = None

    start = time.time()
    for epoch in range(1, N_EPOCHS + 1):
        #     print('preparing data for epoch', epoch)
        batches_start = time.time()
        # flatten all sequences into one
        mu_sequence = np.zeros((n_total_frames, _Z), dtype=np.float32)
        logvar_sequence = np.zeros((n_total_frames, _Z), dtype=np.float32)
        robotstate_sequence = np.zeros((n_total_frames, 5), dtype=np.float32)
        action_sequence = np.zeros((n_total_frames, 3), dtype=np.float32)
Пример #4
0
            from navrep.scripts.train_rnn import _H
        elif backend == "VAE1D_LSTM":
            from navrep.scripts.train_vae1d import _Z
            from navrep.scripts.train_rnn import _H

        # load W / M model
        model = None
        if backend == "VAE_LSTM":
            vae_model_path = os.path.join(MODELDIR, "V",
                                          environment + "vae.json")
            reset_graph()
            vae = ConvVAE(z_size=_Z, batch_size=1, is_training=False)
            vae.load_json(vae_model_path)
            hps = default_hps()
            hps = hps._replace(seq_width=_Z + _G, action_width=_A, rnn_size=_H)
            rnn = MDNRNN(hps, gpu_mode=gpu)
            rnn.load_json(path)
        elif backend == "VAE1D_LSTM":
            vae_model_path = os.path.join(MODELDIR, "V",
                                          environment + "vae1d.json")
            reset_graph()
            reset_graph()
            vae = Conv1DVAE(z_size=_Z, batch_size=1, is_training=False)
            vae.load_json(vae_model_path)
            hps = default_hps()
            hps = hps._replace(seq_width=_Z + _G, action_width=_A, rnn_size=_H)
            rnn = MDNRNN(hps, gpu_mode=gpu)
            rnn.load_json(path)
        elif backend == "GPT":
            mconf = GPTConfig(BLOCK_SIZE, _H)
            model = GPT(mconf, gpu=gpu)
Пример #5
0
    arrays = np.load(path)
    all_data.append(
        [
            arrays["mus"],
            arrays["logvars"],
            arrays["actions"],
            arrays["dones"],
            arrays["rewards"],
        ]
    )
n_total_frames = np.sum([mu.shape[0] for mu, _, _, _, _ in all_data])
print("total frames: ", n_total_frames)


reset_graph()
model = MDNRNN(hps)

viewer = None
values_logs = None

for epoch in range(1, N_EPOCHS + 1):
    #     print('preparing data for epoch', epoch)
    start = time.time()
    # flatten all sequences into one
    mu_sequence = np.zeros((n_total_frames, _Z), dtype=np.float32)
    logvar_sequence = np.zeros((n_total_frames, _Z), dtype=np.float32)
    action_sequence = np.zeros((n_total_frames, 3), dtype=np.float32)
    done_sequence = np.zeros((n_total_frames, 1), dtype=np.float32)
    reward_sequence = np.zeros((n_total_frames, 1), dtype=np.float32)
    i = 0
    random.shuffle(all_data)
Пример #6
0
from navrep.models.rnn import reset_graph, sample_hps_params, MDNRNN, get_pi_idx
from navrep.models.vae2d import ConvVAE

# parameters
TEMPERATURE = 0.5
_Z = 32

sequence_z_path = os.path.expanduser(
    "~/navrep/datasets/M/im/corridor_koze_kids_bag_mus_logvars_actions_rewards_dones.npz"
)
rnn_model_path = os.path.expanduser("~/navrep/models/M/imrnn.json")
vae_model_path = os.path.expanduser("~/navrep/models/V/imvae.json")

reset_graph()
imrnn = MDNRNN(sample_hps_params, gpu_mode=False)
imvae = ConvVAE(batch_size=1, is_training=False, channels=3)

imvae.load_json(vae_model_path)
imrnn.load_json(rnn_model_path)

# load sequence image encoding
arrays = np.load(sequence_z_path)
sequence_action = arrays["actions"]
sequence_mu = arrays["mus"]
sequence_logvar = arrays["logvars"]
sequence_z = sequence_mu + np.exp(
    sequence_logvar / 2.0) * np.random.randn(*(sequence_mu.shape))
SEQUENCE_LENGTH = len(sequence_mu)

prev_z = sequence_z[0]
Пример #7
0
class DreamEnv(object):
    def __init__(
            self,
            temperature=0.25,
            initial_z_path=os.path.
        expanduser(
            "~/navrep/datasets/M/ian/000_mus_logvars_robotstates_actions_rewards_dones.npz"
        ),
            rnn_model_path=os.path.expanduser("~/navrep/models/M/rnn.json"),
            vae_model_path=os.path.expanduser("~/navrep/models/V/vae.json"),
    ):
        # constants
        self.TEMPERATURE = temperature
        self.DT = 0.5  # should be the same as data rnn was trained with
        # V + M Models
        reset_graph()
        self.rnn = MDNRNN(sample_hps_params, gpu_mode=False)
        self.vae = ConvVAE(batch_size=1, is_training=False)
        self.vae.load_json(vae_model_path)
        self.rnn.load_json(rnn_model_path)
        # load initial image encoding
        arrays = np.load(initial_z_path)
        initial_mu = arrays["mus"][0]
        initial_logvar = arrays["logvars"][0]
        initial_robotstate = arrays["robotstates"][0]
        ini_lidar_z = initial_mu + np.exp(
            initial_logvar / 2.0) * np.random.randn(*(initial_mu.shape))
        ini_goal_z = initial_robotstate[:2] / MAX_GOAL_DIST
        self.initial_z = np.concatenate([ini_lidar_z, ini_goal_z], axis=-1)
        # other tools
        self.rings_def = generate_rings(64, 64)
        self.viewer = None
        # environment state variables
        self.reset()
        # hot-start the rnn state
        for i in range(20):
            self.step(np.array([0, 0, 0]), override_next_z=self.initial_z)

    def step(self, action, override_next_z=None):
        feed = {
            self.rnn.input_z: np.reshape(self.prev_z, (1, 1, _Z + _G)),
            self.rnn.input_action: np.reshape(action, (1, 1, 3)),
            self.rnn.input_restart: np.reshape(self.prev_restart, (1, 1)),
            self.rnn.initial_state: self.rnn_state,
        }

        [logmix, mean, logstd, logrestart, next_state] = self.rnn.sess.run(
            [
                self.rnn.out_logmix,
                self.rnn.out_mean,
                self.rnn.out_logstd,
                self.rnn.out_restart_logits,
                self.rnn.final_state,
            ],
            feed,
        )
        OUTWIDTH = _Z + _G

        if self.TEMPERATURE == 0:  # deterministically pick max of MDN distribution
            mixture_idx = np.argmax(logmix, axis=-1)
            chosen_mean = mean[(range(OUTWIDTH), mixture_idx)]
            chosen_logstd = logstd[(range(OUTWIDTH), mixture_idx)]
            next_z = chosen_mean
        else:  # sample from modelled MDN distribution
            mixprob = np.copy(logmix) / self.TEMPERATURE  # adjust temperatures
            mixprob -= mixprob.max()
            mixprob = np.exp(mixprob)
            mixprob /= mixprob.sum(axis=1).reshape(OUTWIDTH, 1)

            mixture_idx = np.zeros(OUTWIDTH)
            chosen_mean = np.zeros(OUTWIDTH)
            chosen_logstd = np.zeros(OUTWIDTH)
            for j in range(OUTWIDTH):
                idx = get_pi_idx(np.random.rand(), mixprob[j])
                mixture_idx[j] = idx
                chosen_mean[j] = mean[j][idx]
                chosen_logstd[j] = logstd[j][idx]
            rand_gaussian = np.random.randn(OUTWIDTH) * np.sqrt(
                self.TEMPERATURE)
            next_z = chosen_mean + np.exp(chosen_logstd) * rand_gaussian
        if sample_hps_params.differential_z:
            next_z = self.prev_z + next_z

        next_restart = 0
        #         if logrestart[0] > 0:
        #             next_restart = 1

        self.prev_z = next_z
        if override_next_z is not None:
            self.prev_z = override_next_z
        self.prev_restart = next_restart
        self.rnn_state = next_state
        # logging-only vars, used for rendering
        self.prev_action = action
        self.episode_step += 1

        return next_z, None, next_restart, {}

    def reset(self):
        self.prev_z = self.initial_z
        self.prev_restart = np.array([1])
        self.rnn_state = self.rnn.sess.run(self.rnn.zero_state)
        # logging vars
        self.prev_action = np.array([0.0, 0.0, 0.0])
        self.episode_step = 0

    def render(self, mode="human", close=False):
        if close:
            if self.viewer is not None:
                self.viewer.close()
            return

        # get last z decoding
        rings_pred = (
            self.vae.decode(self.prev_z.reshape(1, _Z + _G)[:, :_Z]) *
            self.rings_def["rings_to_bool"])
        predicted_ranges = self.rings_def["rings_to_lidar"](rings_pred, 1080)
        goal_pred = self.prev_z.reshape((_Z + _G, ))[_Z:] * MAX_GOAL_DIST

        if mode == "rgb_array":
            raise NotImplementedError
        elif mode == "human":
            # Window and viewport size
            WINDOW_W = 256
            WINDOW_H = 256
            M_PER_PX = 25.6 / WINDOW_H
            VP_W = WINDOW_W
            VP_H = WINDOW_H
            from gym.envs.classic_control import rendering
            import pyglet
            from pyglet import gl

            # Create viewer
            if self.viewer is None:
                self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)
                self.score_label = pyglet.text.Label(
                    "0000",
                    font_size=12,
                    x=20,
                    y=WINDOW_H * 2.5 / 40.00,
                    anchor_x="left",
                    anchor_y="center",
                    color=(255, 255, 255, 255),
                )
                #                 self.transform = rendering.Transform()
                self.currently_rendering_iteration = 0
                self.image_lock = threading.Lock()
            # Render in pyglet
            def make_circle(c, r, res=10):
                thetas = np.linspace(0, 2 * np.pi, res + 1)[:-1]
                verts = np.zeros((res, 2))
                verts[:, 0] = c[0] + r * np.cos(thetas)
                verts[:, 1] = c[1] + r * np.sin(thetas)
                return verts

            with self.image_lock:
                self.currently_rendering_iteration += 1
                self.viewer.draw_circle(r=10, color=(0.3, 0.3, 0.3))
                win = self.viewer.window
                win.switch_to()
                win.dispatch_events()
                win.clear()
                gl.glViewport(0, 0, VP_W, VP_H)
                # colors
                bgcolor = np.array([0.4, 0.8, 0.4])
                nosecolor = np.array([0.3, 0.3, 0.3])
                lidarcolor = np.array([1.0, 0.0, 0.0])
                # Green background
                gl.glBegin(gl.GL_QUADS)
                gl.glColor4f(bgcolor[0], bgcolor[1], bgcolor[2], 1.0)
                gl.glVertex3f(0, VP_H, 0)
                gl.glVertex3f(VP_W, VP_H, 0)
                gl.glVertex3f(VP_W, 0, 0)
                gl.glVertex3f(0, 0, 0)
                gl.glEnd()
                # LIDAR
                i = WINDOW_W / 2.0
                j = WINDOW_H / 2.0
                angle = np.pi / 2.0
                scan = np.squeeze(predicted_ranges)
                lidar_angles = np.linspace(0, 2 * np.pi, len(scan) + 1)[:-1]
                lidar_angles = lidar_angles + np.pi / 2.  # make robot face up
                i_ray_ends = i + scan / M_PER_PX * np.cos(lidar_angles)
                j_ray_ends = j + scan / M_PER_PX * np.sin(lidar_angles)
                is_in_fov = np.cos(lidar_angles - angle) >= 0.78
                for ray_idx in range(len(scan)):
                    end_i = i_ray_ends[ray_idx]
                    end_j = j_ray_ends[ray_idx]
                    gl.glBegin(gl.GL_LINE_LOOP)
                    if is_in_fov[ray_idx]:
                        gl.glColor4f(1.0, 1.0, 0.0, 0.1)
                    else:
                        gl.glColor4f(lidarcolor[0], lidarcolor[1],
                                     lidarcolor[2], 0.1)
                    gl.glVertex3f(i, j, 0)
                    gl.glVertex3f(end_i, end_j, 0)
                    gl.glEnd()
                # Agent body
                i = WINDOW_W / 2.0
                j = WINDOW_H / 2.0
                r = 0.3 / M_PER_PX
                angle = np.pi / 2.0
                poly = make_circle((i, j), r)
                gl.glBegin(gl.GL_POLYGON)
                color = np.array([1.0, 1.0, 1.0])
                gl.glColor4f(color[0], color[1], color[2], 1)
                for vert in poly:
                    gl.glVertex3f(vert[0], vert[1], 0)
                gl.glEnd()
                # Direction triangle
                inose = i + r * np.cos(angle)
                jnose = j + r * np.sin(angle)
                iright = i + 0.3 * r * -np.sin(angle)
                jright = j + 0.3 * r * np.cos(angle)
                ileft = i - 0.3 * r * -np.sin(angle)
                jleft = j - 0.3 * r * np.cos(angle)
                gl.glBegin(gl.GL_TRIANGLES)
                gl.glColor4f(nosecolor[0], nosecolor[1], nosecolor[2], 1)
                gl.glVertex3f(inose, jnose, 0)
                gl.glVertex3f(iright, jright, 0)
                gl.glVertex3f(ileft, jleft, 0)
                gl.glEnd()
                # Goal
                goalcolor = np.array([1., 1., 0.3])
                px_goal = goal_pred / M_PER_PX
                igoal = i - px_goal[1]  # rotate 90deg to face up
                jgoal = j + px_goal[0]
                # Goal line
                gl.glBegin(gl.GL_LINE_LOOP)
                gl.glColor4f(goalcolor[0], goalcolor[1], goalcolor[2], 1)
                gl.glVertex3f(i, j, 0)
                gl.glVertex3f(igoal, jgoal, 0)
                gl.glEnd()
                # Goal markers
                gl.glBegin(gl.GL_TRIANGLES)
                gl.glColor4f(goalcolor[0], goalcolor[1], goalcolor[2], 1)
                triangle = make_circle((igoal, jgoal), r / 3., res=3)
                for vert in triangle:
                    gl.glVertex3f(vert[0], vert[1], 0)
                gl.glEnd()
                # Text
                self.score_label.text = "A {:.1f} {:.1f} {:.1f} S {}".format(
                    self.prev_action[0],
                    self.prev_action[1],
                    self.prev_action[2],
                    self.episode_step,
                )
                self.score_label.draw()
                win.flip()
                return self.viewer.isopen

    def close(self):
        self.render(close=True)

    def _get_dt(self):
        return self.DT

    def _get_viewer(self):
        return self.viewer
Пример #8
0
class ImDreamEnv(object):
    def __init__(self, temperature=0.25):
        # constants
        self.TEMPERATURE = temperature
        self.DT = 0.2  # should be the same as data rnn was trained with
        initial_z_path = os.path.expanduser(
            "~/navrep/datasets/M/im/corridor_koze_kids_bag_mus_logvars_robotstates_actions_rewards_dones.npz"
        )
        rnn_model_path = os.path.expanduser("~/navrep/models/M/imrnn.json")
        vae_model_path = os.path.expanduser("~/navrep/models/V/imvae.json")
        # V + M Models
        reset_graph()
        self.rnn = MDNRNN(sample_hps_params, gpu_mode=False)
        self.vae = ConvVAE(batch_size=1, is_training=False, channels=3)
        self.vae.load_json(vae_model_path)
        self.rnn.load_json(rnn_model_path)
        # load initial image encoding
        arrays = np.load(initial_z_path)
        initial_mu = arrays["mus"][0]
        initial_logvar = arrays["logvars"][0]
        self.initial_z = initial_mu + np.exp(initial_logvar / 2.0) * np.random.randn(
            *(initial_mu.shape)
        )
        # other tools
        self.viewer = None
        # environment state variables
        self.reset()
        # hot-start the rnn state
        for i in range(20):
            self.step(np.array([0,0,0]), override_next_z=self.initial_z)

    def step(self, action, override_next_z=None):
        feed = {
            self.rnn.input_z: np.reshape(self.prev_z, (1, 1, _Z)),
            self.rnn.input_action: np.reshape(action, (1, 1, 3)),
            self.rnn.input_restart: np.reshape(self.prev_restart, (1, 1)),
            self.rnn.initial_state: self.rnn_state,
        }

        [logmix, mean, logstd, logrestart, next_state] = self.rnn.sess.run(
            [
                self.rnn.out_logmix,
                self.rnn.out_mean,
                self.rnn.out_logstd,
                self.rnn.out_restart_logits,
                self.rnn.final_state,
            ],
            feed,
        )
        OUTWIDTH = _Z

        if self.TEMPERATURE == 0:  # deterministically pick max of MDN distribution
            mixture_idx = np.argmax(logmix, axis=-1)
            chosen_mean = mean[(range(OUTWIDTH), mixture_idx)]
            chosen_logstd = logstd[(range(OUTWIDTH), mixture_idx)]
            next_z = chosen_mean
        else:  # sample from modelled MDN distribution
            mixprob = np.copy(logmix) / self.TEMPERATURE  # adjust temperatures
            mixprob -= mixprob.max()
            mixprob = np.exp(mixprob)
            mixprob /= mixprob.sum(axis=1).reshape(OUTWIDTH, 1)

            mixture_idx = np.zeros(OUTWIDTH)
            chosen_mean = np.zeros(OUTWIDTH)
            chosen_logstd = np.zeros(OUTWIDTH)
            for j in range(OUTWIDTH):
                idx = get_pi_idx(np.random.rand(), mixprob[j])
                mixture_idx[j] = idx
                chosen_mean[j] = mean[j][idx]
                chosen_logstd[j] = logstd[j][idx]
            rand_gaussian = np.random.randn(OUTWIDTH) * np.sqrt(self.TEMPERATURE)
            next_z = chosen_mean + np.exp(chosen_logstd) * rand_gaussian
        if sample_hps_params.differential_z:
            next_z = self.prev_z + next_z

        next_restart = 0
        #         if logrestart[0] > 0:
        #             next_restart = 1

        self.prev_z = next_z
        if override_next_z is not None:
            self.prev_z = override_next_z
        self.prev_restart = next_restart
        self.rnn_state = next_state
        # logging-only vars, used for rendering
        self.prev_action = action
        self.episode_step += 1

        return next_z, None, next_restart, {}

    def reset(self):
        self.prev_z = self.initial_z
        self.prev_restart = np.array([1])
        self.rnn_state = self.rnn.sess.run(self.rnn.zero_state)
        # logging vars
        self.prev_action = np.array([0.0, 0.0, 0.0])
        self.episode_step = 0

    def render(self, mode="human", close=False):
        img_pred = (self.vae.decode(self.prev_z.reshape(1, _Z)) * 255).astype(np.uint8)
        img_pred = img_pred.reshape(_64, _64, 3)

        if mode == "rgb_array":
            raise NotImplementedError
        elif mode == "human":
            # Window and viewport size
            WINDOW_W = 256
            WINDOW_H = 256
            VP_W = WINDOW_W
            VP_H = WINDOW_H
            from gym.envs.classic_control import rendering
            import pyglet
            from pyglet import gl

            # Create pyglet image
#             pixels = [
#                 255, 0, 0,      0, 255, 0,      0, 0, 255,     # RGB values range from
#                 255, 0, 0,      255, 0, 0,      255, 0, 0,     # 0 to 255 for each color
#                 255, 0, 0,      255, 0, 0,      255, 0, 0,     # component.
#             ]
            from pyglet.gl.gl import GLubyte
            pixels = img_pred.flatten()
            rawData = (GLubyte * len(pixels))(*pixels)
            image_data = pyglet.image.ImageData(_64, _64, 'RGB', rawData)

            # Create viewer
            if self.viewer is None:
                self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)
                self.score_label = pyglet.text.Label(
                    "0000",
                    font_size=12,
                    x=20,
                    y=WINDOW_H * 2.5 / 40.00,
                    anchor_x="left",
                    anchor_y="center",
                    color=(255, 255, 255, 255),
                )
                #                 self.transform = rendering.Transform()
                self.currently_rendering_iteration = 0
                self.image_lock = threading.Lock()
            # Render in pyglet
            with self.image_lock:
                self.currently_rendering_iteration += 1
                self.viewer.draw_circle(r=10, color=(0.3, 0.3, 0.3))
                win = self.viewer.window
                win.switch_to()
                win.dispatch_events()
                win.clear()
                gl.glViewport(0, 0, VP_W, VP_H)
                # Image
                image_data.blit(96,96)
                # Text
                self.score_label.text = "A {:.1f} {:.1f} {:.1f} S {}".format(
                    self.prev_action[0],
                    self.prev_action[1],
                    self.prev_action[2],
                    self.episode_step,
                )
                self.score_label.draw()
                win.flip()
                return self.viewer.isopen
Пример #9
0
all_data = []
for path in files:
    arrays = np.load(path)
    all_data.append([
        arrays["mus"],
        arrays["logvars"],
        arrays["robotstates"],
        arrays["actions"],
        arrays["dones"],
        arrays["rewards"],
    ])
n_total_frames = np.sum([mu.shape[0] for mu, _, _, _, _, _ in all_data])
print("total frames: ", n_total_frames)

reset_graph()
model = MDNRNN(hps)
model.load_json(model_path)

for epoch in range(1):
    epoch_z_costs = []
    epoch_wrongaction_z_costs = []
    #     print('preparing data for epoch', epoch)
    start = time.time()
    # flatten all sequences into one
    mu_sequence = np.zeros((n_total_frames, _Z), dtype=np.float32)
    logvar_sequence = np.zeros((n_total_frames, _Z), dtype=np.float32)
    robotstate_sequence = np.zeros((n_total_frames, 5), dtype=np.float32)
    action_sequence = np.zeros((n_total_frames, 3), dtype=np.float32)
    done_sequence = np.zeros((n_total_frames, 1), dtype=np.float32)
    reward_sequence = np.zeros((n_total_frames, 1), dtype=np.float32)
    i = 0
Пример #10
0
 def __init__(self,
              backend, encoding,
              rnn_model_path=os.path.expanduser("~/navrep/models/M/rnn.json"),
              rnn1d_model_path=os.path.expanduser("~/navrep/models/M/rnn1d.json"),
              vae_model_path=os.path.expanduser("~/navrep/models/V/vae.json"),
              vae1d_model_path=os.path.expanduser("~/navrep/models/V/vae1d.json"),
              gpt_model_path=os.path.expanduser("~/navrep/models/W/gpt"),
              gpt1d_model_path=os.path.expanduser("~/navrep/models/W/gpt1d"),
              vae1dlstm_model_path=os.path.expanduser("~/navrep/models/W/vae1dlstm"),
              vaelstm_model_path=os.path.expanduser("~/navrep/models/W/vaelstm"),
              gpu=False,
              encoder_to_share_model_with=None,  # another EnvEncoder
              ):
     LIDAR_NORM_FACTOR = None
     if backend == "GPT":
         from navrep.scripts.train_gpt import _Z, _H
     elif backend == "GPT1D":
         from navrep.scripts.train_gpt1d import _Z, _H
         from navrep.tools.wdataset import LIDAR_NORM_FACTOR
     elif backend == "VAE1DLSTM":
         from navrep.scripts.train_vae1dlstm import _Z, _H
         from navrep.tools.wdataset import LIDAR_NORM_FACTOR
     elif backend == "VAELSTM":
         from navrep.scripts.train_vaelstm import _Z, _H
     elif backend == "VAE_LSTM":
         from navrep.scripts.train_vae import _Z
         from navrep.scripts.train_rnn import _H
     elif backend == "VAE1D_LSTM":
         from navrep.scripts.train_vae1d import _Z
         from navrep.scripts.train_rnn import _H
         from navrep.scripts.train_vae1d import MAX_LIDAR_DIST as LIDAR_NORM_FACTOR
     self._Z = _Z
     self._H = _H
     self.LIDAR_NORM_FACTOR = LIDAR_NORM_FACTOR
     self.encoding = encoding
     self.backend = backend
     if self.encoding == "V_ONLY":
         self.encoding_dim = _Z + _RS
     elif self.encoding == "VM":
         self.encoding_dim = _Z + _H + _RS
     elif self.encoding == "M_ONLY":
         self.encoding_dim = _H + _RS
     else:
         raise NotImplementedError
     self.observation_space = spaces.Box(low=-np.inf, high=np.inf,
                                         shape=(self.encoding_dim,), dtype=np.float32)
     # V + M Models
     if encoder_to_share_model_with is not None:
         self.vae = encoder_to_share_model_with.vae
         self.rnn = encoder_to_share_model_with.rnn
     else:
         # load world model
         if self.backend == "VAE_LSTM":
             reset_graph()
             self.vae = ConvVAE(z_size=_Z, batch_size=1, is_training=False)
             self.vae.load_json(vae_model_path)
             if self.encoding in ["VM", "M_ONLY"]:
                 hps = sample_hps_params. _replace(seq_width=_Z+_G, action_width=_A, rnn_size=_H)
                 self.rnn = MDNRNN(hps, gpu_mode=gpu)
                 self.rnn.load_json(rnn_model_path)
         elif self.backend == "VAE1D_LSTM":
             reset_graph()
             self.vae = Conv1DVAE(z_size=_Z, batch_size=1, is_training=False)
             self.vae.load_json(vae1d_model_path)
             if self.encoding in ["VM", "M_ONLY"]:
                 hps = sample_hps_params. _replace(seq_width=_Z+_G, action_width=_A, rnn_size=_H)
                 self.rnn = MDNRNN(hps, gpu_mode=gpu)
                 self.rnn.load_json(rnn1d_model_path)
         elif self.backend == "GPT":
             mconf = GPTConfig(BLOCK_SIZE, _H)
             model = GPT(mconf, gpu=gpu)
             load_checkpoint(model, gpt_model_path, gpu=gpu)
             self.vae = model
             self.rnn = model
         elif self.backend == "GPT1D":
             mconf = GPTConfig(BLOCK_SIZE, _H)
             model = GPT1D(mconf, gpu=gpu)
             load_checkpoint(model, gpt1d_model_path, gpu=gpu)
             self.vae = model
             self.rnn = model
         elif self.backend == "VAELSTM":
             mconf = VAELSTMConfig(_Z, _H)
             model = VAELSTM(mconf, gpu=gpu)
             load_checkpoint(model, vaelstm_model_path, gpu=gpu)
             self.vae = model
             self.rnn = model
         elif self.backend == "VAE1DLSTM":
             mconf = VAE1DLSTMConfig(_Z, _H)
             model = VAE1DLSTM(mconf, gpu=gpu)
             load_checkpoint(model, vae1dlstm_model_path, gpu=gpu)
             self.vae = model
             self.rnn = model
         else:
             raise NotImplementedError
     # other tools
     self.rings_def = generate_rings(_64, _64)
     self.viewer = None
     # environment state variables
     self.reset()
Пример #11
0
class EnvEncoder(object):
    """ Generic class to encode the observations of an environment,
    look at EncodedEnv to see how it is typically used """
    def __init__(self,
                 backend, encoding,
                 rnn_model_path=os.path.expanduser("~/navrep/models/M/rnn.json"),
                 rnn1d_model_path=os.path.expanduser("~/navrep/models/M/rnn1d.json"),
                 vae_model_path=os.path.expanduser("~/navrep/models/V/vae.json"),
                 vae1d_model_path=os.path.expanduser("~/navrep/models/V/vae1d.json"),
                 gpt_model_path=os.path.expanduser("~/navrep/models/W/gpt"),
                 gpt1d_model_path=os.path.expanduser("~/navrep/models/W/gpt1d"),
                 vae1dlstm_model_path=os.path.expanduser("~/navrep/models/W/vae1dlstm"),
                 vaelstm_model_path=os.path.expanduser("~/navrep/models/W/vaelstm"),
                 gpu=False,
                 encoder_to_share_model_with=None,  # another EnvEncoder
                 ):
        LIDAR_NORM_FACTOR = None
        if backend == "GPT":
            from navrep.scripts.train_gpt import _Z, _H
        elif backend == "GPT1D":
            from navrep.scripts.train_gpt1d import _Z, _H
            from navrep.tools.wdataset import LIDAR_NORM_FACTOR
        elif backend == "VAE1DLSTM":
            from navrep.scripts.train_vae1dlstm import _Z, _H
            from navrep.tools.wdataset import LIDAR_NORM_FACTOR
        elif backend == "VAELSTM":
            from navrep.scripts.train_vaelstm import _Z, _H
        elif backend == "VAE_LSTM":
            from navrep.scripts.train_vae import _Z
            from navrep.scripts.train_rnn import _H
        elif backend == "VAE1D_LSTM":
            from navrep.scripts.train_vae1d import _Z
            from navrep.scripts.train_rnn import _H
            from navrep.scripts.train_vae1d import MAX_LIDAR_DIST as LIDAR_NORM_FACTOR
        self._Z = _Z
        self._H = _H
        self.LIDAR_NORM_FACTOR = LIDAR_NORM_FACTOR
        self.encoding = encoding
        self.backend = backend
        if self.encoding == "V_ONLY":
            self.encoding_dim = _Z + _RS
        elif self.encoding == "VM":
            self.encoding_dim = _Z + _H + _RS
        elif self.encoding == "M_ONLY":
            self.encoding_dim = _H + _RS
        else:
            raise NotImplementedError
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf,
                                            shape=(self.encoding_dim,), dtype=np.float32)
        # V + M Models
        if encoder_to_share_model_with is not None:
            self.vae = encoder_to_share_model_with.vae
            self.rnn = encoder_to_share_model_with.rnn
        else:
            # load world model
            if self.backend == "VAE_LSTM":
                reset_graph()
                self.vae = ConvVAE(z_size=_Z, batch_size=1, is_training=False)
                self.vae.load_json(vae_model_path)
                if self.encoding in ["VM", "M_ONLY"]:
                    hps = sample_hps_params. _replace(seq_width=_Z+_G, action_width=_A, rnn_size=_H)
                    self.rnn = MDNRNN(hps, gpu_mode=gpu)
                    self.rnn.load_json(rnn_model_path)
            elif self.backend == "VAE1D_LSTM":
                reset_graph()
                self.vae = Conv1DVAE(z_size=_Z, batch_size=1, is_training=False)
                self.vae.load_json(vae1d_model_path)
                if self.encoding in ["VM", "M_ONLY"]:
                    hps = sample_hps_params. _replace(seq_width=_Z+_G, action_width=_A, rnn_size=_H)
                    self.rnn = MDNRNN(hps, gpu_mode=gpu)
                    self.rnn.load_json(rnn1d_model_path)
            elif self.backend == "GPT":
                mconf = GPTConfig(BLOCK_SIZE, _H)
                model = GPT(mconf, gpu=gpu)
                load_checkpoint(model, gpt_model_path, gpu=gpu)
                self.vae = model
                self.rnn = model
            elif self.backend == "GPT1D":
                mconf = GPTConfig(BLOCK_SIZE, _H)
                model = GPT1D(mconf, gpu=gpu)
                load_checkpoint(model, gpt1d_model_path, gpu=gpu)
                self.vae = model
                self.rnn = model
            elif self.backend == "VAELSTM":
                mconf = VAELSTMConfig(_Z, _H)
                model = VAELSTM(mconf, gpu=gpu)
                load_checkpoint(model, vaelstm_model_path, gpu=gpu)
                self.vae = model
                self.rnn = model
            elif self.backend == "VAE1DLSTM":
                mconf = VAE1DLSTMConfig(_Z, _H)
                model = VAE1DLSTM(mconf, gpu=gpu)
                load_checkpoint(model, vae1dlstm_model_path, gpu=gpu)
                self.vae = model
                self.rnn = model
            else:
                raise NotImplementedError
        # other tools
        self.rings_def = generate_rings(_64, _64)
        self.viewer = None
        # environment state variables
        self.reset()

    def reset(self):
        if self.encoding in ["VM", "M_ONLY"]:
            if self.backend in ["VAE_LSTM", "VAE1D_LSTM"]:
                self.state = rnn_init_state(self.rnn)
            elif self.backend in ["GPT", "VAELSTM", "VAE1DLSTM", "GPT1D"]:
                self.gpt_sequence = []
        self.lidar_z = np.zeros(self._Z)

    def close(self):
        if self.viewer is not None:
            self.viewer.close()

    def _get_last_decoded_scan(self):
        obs_pred = self.vae.decode(self.lidar_z.reshape((1,self._Z)))
        if self.backend in ["VAE1DLSTM", "GPT1D", "VAE1D_LSTM"]:
            decoded_scan = (obs_pred * self.LIDAR_NORM_FACTOR).reshape((_L))
        else:
            rings_pred = obs_pred * self.rings_def["rings_to_bool"]
            decoded_scan = self.rings_def["rings_to_lidar"](rings_pred, _L).reshape((_L))
        return decoded_scan

    def _encode_obs(self, obs, action):
        """
    obs is (lidar, other_obs)
    where lidar is (time_samples, ray, channel)
    and other_obs is (5,) - [goal_x, goal_y, vel_x, vel_y, vel_theta] all in robot frame

    h is (32+2+512), i.e. concat[lidar_z, robotstate, h rnn state]
    lidar_z is -inf, inf
    h rnn state is ?
    other_obs is -inf, inf
    """
        # convert lidar scan to obs
        lidar_scan = obs[0]  # latest scan only obs (buffer, ray, channel)
        lidar_scan = lidar_scan.reshape(1, _L).astype(np.float32)
        lidar_mode = "scans" if "1D" in self.backend else "rings"
        lidar_obs = scans_to_lidar_obs(lidar_scan, lidar_mode, self.rings_def, channel_first=False)
        self.last_lidar_obs = lidar_obs  # for rendering purposes

        # obs to z, mu, logvar
        mu, logvar = self.vae.encode_mu_logvar(lidar_obs)
        mu = mu[0]
        logvar = logvar[0]
        s = logvar.shape
        if NO_VAE_VAR:
            lidar_z = mu * 1.
        else:
            lidar_z = mu + np.exp(logvar / 2.0) * np.random.randn(*s)

        # encode obs through V + M
        self.lidar_z = lidar_z
        if self.encoding == "V_ONLY":
            encoded_obs = np.concatenate([self.lidar_z, obs[1]], axis=0)
        elif self.encoding in ["VM", "M_ONLY"]:
            # get h
            if self.backend in ["VAE_LSTM", "VAE1D_LSTM"]:
                goal_z = obs[1][:2] / MAX_GOAL_DIST
                rnn_z = np.concatenate([lidar_z, goal_z], axis=-1)
                self.state = rnn_next_state(self.rnn, rnn_z, action, self.state)
                h = self.state.h[0]
            elif self.backend in ["GPT", "VAELSTM", "VAE1DLSTM", "GPT1D"]:
                self.gpt_sequence.append(dict(obs=lidar_obs[0], state=obs[1][:2], action=action))
                self.gpt_sequence = self.gpt_sequence[:BLOCK_SIZE]
                h = self.rnn.get_h(self.gpt_sequence)
            # encoded obs
            if self.encoding == "VM":
                encoded_obs = np.concatenate([self.lidar_z, obs[1], h], axis=0)
            elif self.encoding == "M_ONLY":
                encoded_obs = np.concatenate([obs[1], h], axis=0)
        return encoded_obs

    def _render_rings_polar(self, close, save_to_file=False):
        if close:
            self.viewer.close()
            return
        # rendering
        if self.backend in ["VAE1DLSTM", "GPT1D", "VAE1D_LSTM"]:
            return False
        else:
            last_rings_obs = self.last_lidar_obs.reshape((_64, _64, 1))
            last_rings_pred = self.vae.decode(self.lidar_z.reshape((1,self._Z))).reshape((_64, _64, 1))
            import matplotlib.pyplot as plt
            plt.ion()
            fig, (ax1, ax2) = plt.subplots(
                1, 2, subplot_kw=dict(projection="polar"), num="rings"
            )
            ax1.clear()
            ax2.clear()
            if self.viewer is None:
                self.rendering_iteration = 0
            self.viewer = fig
            self.rings_def["visualize_rings"](last_rings_obs, scan=None, fig=fig, ax=ax1)
            self.rings_def["visualize_rings"](last_rings_pred, scan=None, fig=fig, ax=ax2)
            ax1.set_ylim([0, 10])
            ax1.set_title("ground truth")
            ax2.set_ylim([0, 10])
            ax2.set_title("lidar reconstruction")
            # rings box viz
            fig2, (ax1, ax2) = plt.subplots(1, 2, num="2d")
            ax1.clear()
            ax2.clear()
            ax1.imshow(np.squeeze(last_rings_obs), cmap=plt.cm.Greys)
            ax2.imshow(np.squeeze(last_rings_pred), cmap=plt.cm.Greys)
            ax1.set_title("ground truth")
            ax2.set_title("lidar reconstruction")
            # update
            plt.pause(0.01)
            self.rendering_iteration += 1
            if save_to_file:
                fig.savefig(
                    "/tmp/encodedenv_polar{:04d}.png".format(self.rendering_iteration))
                fig2.savefig(
                    "/tmp/encodedenv_box{:04d}.png".format(self.rendering_iteration))

    def _render_rings(self, close, save_to_file=False):
        if close:
            self.viewer.close()
            return
        # rendering
        if self.backend in ["VAE1DLSTM", "GPT1D", "VAE1D_LSTM"]:
            return False
        else:
            last_rings_obs = self.last_lidar_obs.reshape((_64, _64))
            last_rings_pred = self.vae.decode(self.lidar_z.reshape((1,self._Z))).reshape((_64, _64))
            # Window and viewport size
            ring_size = _64  # grid cells
            padding = 4  # grid cells
            grid_size = 1  # px per grid cell
            WINDOW_W = (2 * ring_size + 3 * padding) * grid_size
            WINDOW_H = (1 * ring_size + 2 * padding) * grid_size
            VP_W = WINDOW_W
            VP_H = WINDOW_H
            from gym.envs.classic_control import rendering
            import pyglet
            from pyglet import gl
            # Create viewer
            if self.viewer is None:
                self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)
                self.rendering_iteration = 0
            # Render in pyglet
            win = self.viewer.window
            win.switch_to()
            win.dispatch_events()
            win.clear()
            gl.glViewport(0, 0, VP_W, VP_H)
            # colors
            bgcolor = np.array([0.4, 0.8, 0.4])
            # Green background
            gl.glBegin(gl.GL_QUADS)
            gl.glColor4f(bgcolor[0], bgcolor[1], bgcolor[2], 1.0)
            gl.glVertex3f(0, VP_H, 0)
            gl.glVertex3f(VP_W, VP_H, 0)
            gl.glVertex3f(VP_W, 0, 0)
            gl.glVertex3f(0, 0, 0)
            gl.glEnd()
            # rings - observation
            w_offset = 0
            for rings in [last_rings_obs, last_rings_pred]:
                for i in range(ring_size):
                    for j in range(ring_size):
                        cell_color = 1 - rings[i, j]
                        cell_y = (padding + i) * grid_size  # px
                        cell_x = (padding + j + w_offset) * grid_size  # px
                        gl.glBegin(gl.GL_QUADS)
                        gl.glColor4f(cell_color, cell_color, cell_color, 1.0)
                        gl.glVertex3f(cell_x+       0,  cell_y+grid_size, 0)  # noqa
                        gl.glVertex3f(cell_x+grid_size, cell_y+grid_size, 0)  # noqa
                        gl.glVertex3f(cell_x+grid_size, cell_y+        0, 0)  # noqa
                        gl.glVertex3f(cell_x+        0, cell_y+        0, 0)  # noqa
                        gl.glEnd()
                w_offset += ring_size + padding
            if save_to_file:
                pyglet.image.get_buffer_manager().get_color_buffer().save(
                    "/tmp/encodeder_rings{:04d}.png".format(self.rendering_iteration))
            # actualize
            win.flip()
            self.rendering_iteration += 1
            return self.viewer.isopen