Exemplo n.º 1
0
def compare_at_positions(args, args_real, env):
    r1 = np.array([[1.115, 0.825], [1.189, 0.825], [1.263, 0.825],
                   [1.337, 0.825], [1.411, 0.825], [1.485, 0.825]])
    r1 += np.array([0., 0.002])
    r2 = np.array([[1.485, 0.7875], [1.485, 0.75], [1.485, 0.7125],
                   [1.485, 0.675]])
    r3 = np.array([[1.411, 0.675], [1.337, 0.675], [1.263, 0.675],
                   [1.189, 0.675], [1.115, 0.675]])
    r3 -= np.array([0., 0.002])
    r4 = np.array([[1.115, 0.7125], [1.115, 0.75], [1.115, 0.7875],
                   [1.115, 0.825]])
    routes = np.concatenate([r1, r2, r3, r4], axis=0)

    obs = []
    env_images = []
    env.reset()
    for px, py in routes:
        env.env.env._move_object(position=[px, py, 0.425])
        for timestep in range(10):
            '''o = env.get_obs()
			obs.append(o)
			env_images.append(take_env_image(env, args.img_size))'''
            action = [0., 0., 0., 0.]
            o, _, _, info = env.step(action)
            obs.append(o)
            env_images.append(take_env_image(env, args.img_size))

    save_results_from_obs(obs, args, args_real, 'around_obstacle')
    create_rollout_video(env_images, args=args, filename='around_obstacle')

    pos_x = [1.1, 1.2, 1.3, 1.5]
    pos_y = np.linspace(0.5, 1.0, num=10, endpoint=True)
    for i, px in enumerate(pos_x):
        obs = []
        env_images = []
        env.reset()
        for py in pos_y:
            env.env.env._move_object(position=[px, py, 0.425])
            for timestep in range(3):
                action = [0., 0., 0., 0.]
                o, _, _, info = env.step(action)
                obs.append(o)
                env_images.append(take_env_image(env, args.img_size))

        save_results_from_obs(obs, args, args_real, 'on_row_{}'.format(i))
        create_rollout_video(env_images,
                             args=args,
                             filename='on_row_{}'.format(i))
Exemplo n.º 2
0
def create_heatmap_qvalues(args, env, player, player_imaginary):
    nx = 80  #todo at least 100
    ny = 80

    pos_x = np.linspace(args.real_field_center[0] - args.real_field_size[0],
                        args.real_field_center[0] + args.real_field_size[0],
                        num=nx,
                        endpoint=True)
    pos_y = np.linspace(args.real_field_center[1] - args.real_field_size[1],
                        args.real_field_center[1] + args.real_field_size[1],
                        num=ny,
                        endpoint=True)

    first_ob = env.reset()
    puck_first_pos = first_ob['achieved_goal']
    grip_pos = env.env.env.sim.data.get_site_xpos('robot0:grip')
    dt = env.env.env.sim.nsubsteps * env.env.env.sim.model.opt.timestep
    grip_velp = env.env.env.sim.data.get_site_xvelp('robot0:grip') * dt
    robot_qpos, robot_qvel = utils.robot_get_obs(env.env.env.sim)
    for timestep in range(17):
        obs_orginal = []
        obs = []
        data_v_vals = np.zeros(shape=(nx, ny))
        data_v_vals_imaginary = np.zeros(shape=(nx, ny))

        #pass some steps so that moving obstacles are in other part
        #move puck and robot back to beginning
        #show image

        do_steps = 3
        while do_steps > 0:
            do_steps -= 1
            action = [0., 0., 0., 0.]
            env.step(action)

        env_image = take_env_image(env, 500)
        im_current = Image.fromarray(env_image.astype(np.uint8))
        im_current.save('log/{}_env.png'.format(args.env))

        for i, px in enumerate(pos_x):
            for j, py in enumerate(pos_y):
                env.env.env._move_gripper_to(position=[px, py + 0.1, 0.61])
                '''try:
                    env.env.env._move_gripper_to(position=[px, py + 0.15, 0.52])
                except Exception:
                    print('no work with x={}, y={} i={}, j={}'.format(px,py,i, j) )
                    exit()'''
                env.env.env._move_object(position=[px, py, 0.425
                                                   ])  #todo use height of env

                # move puck and robot back to beginning
                # show image
                env_image = take_env_image(env, 500)
                im_current = Image.fromarray(env_image.astype(np.uint8))
                #im_current.save('log/{}_env.png'.format(args.env))
                if (i > 0) and (i % 10 == 0) and (j % 10 == 0):
                    im_current.save('log/{}_env_from_{}_{}.png'.format(
                        args.env, i, j))
                    print('d')
                o = env.get_obs()
                obs_orginal.append(o)
                obs.append(goal_based_process(o))
                q_val = player.get_q_pi([goal_based_process(o)])[0]
                q_val_imaginary = player_imaginary.get_q_pi(
                    [goal_based_process(o)])[0]

                data_v_vals[i, j] = q_val
                data_v_vals_imaginary[i, j] = q_val_imaginary
            fwef = 4234
        rq4r4 = 43231

        ax = plt.gca()
        im = ax.imshow(data_v_vals, cmap='cool', interpolation='nearest')
        # Create colorbar
        cbar_kw = {}
        cbarlabel = ""
        cbar = ax.figure.colorbar(im, ax=ax, **cbar_kw)
        cbar.ax.set_ylabel(cbarlabel, rotation=-90, va="bottom")
        plt.title('heatmap')
        plt.savefig('log/{}_valuemap_{}.png'.format(args.env, timestep))
        plt.clf()
        plt.close()

        #for imaginary values
        ax = plt.gca()
        im = ax.imshow(data_v_vals_imaginary,
                       cmap='cool',
                       interpolation='nearest')
        # Create colorbar
        cbar_kw = {}
        cbarlabel = ""
        cbar = ax.figure.colorbar(im, ax=ax, **cbar_kw)
        cbar.ax.set_ylabel(cbarlabel, rotation=-90, va="bottom")
        plt.title('heatmap with imaginary')
        plt.savefig('log/{}_valuemap_{}_imaginary.png'.format(
            args.env, timestep))
        plt.clf()
        plt.close()
Exemplo n.º 3
0
	def test_acc(self, key, env, agent):
		if self.args.goal in self.args.colls_test_check_envs:
			# +1 since when it becomes 0 then not considered as success
			# coll toll is modified externally for every iteraiton
			envs_collision = [self.coll_tol + 1 for _ in range(len(self.env_List))]
		if self.calls % 40 == 0 or self.calls in [0, 1, 2, 5, 8, 10] or self.after_train_test:
			if self.after_train_test:
				eps_idx = [i for i in range(0, self.test_rollouts-1, 20)] + [self.test_rollouts-1]
			else:
				eps_idx = [0, 5, 8, 10, 15, 20, self.test_rollouts-1]
			acc_sum, obs = 0.0, []
			prev_obs = []
			env_images = [[] for _ in eps_idx]
			if self.args.vae_dist_help:
				latent_points = [[] for _ in eps_idx]
			for i in range(self.test_rollouts):
				o = env[i].reset()
				obs.append(goal_based_process(o))
				prev_obs.append(o)
				if i in eps_idx:
					t = eps_idx.index(i)
					env_images[t].append(take_env_image(env[i], self.args.img_vid_size))
					if self.args.vae_dist_help:
						latent_points[t].append(o['achieved_goal_latent'])
			for timestep in range(self.args.timesteps):
				actions = agent.step_batch(obs)
				obs, infos = [], []
				for i in range(self.test_rollouts):
					ob, _, _, info = env[i].step(actions[i])
					obs.append(goal_based_process(ob))
					#this should be used just for testing after training
					if self.args.goal in self.args.colls_test_check_envs:
						if ob['collision_check']:
							envs_collision[i] -= 1
						if envs_collision[i] <= 0:
							info['Success'] = 0.0
					infos.append(info)


					if i in eps_idx:
						t = eps_idx.index(i)
						env_images[t].append(take_env_image(env[i], self.args.img_vid_size))
						if self.args.vae_dist_help:
							latent_points[t].append(ob['achieved_goal_latent'])
					prev_obs[i] = copy.deepcopy(ob)
			for i, t in enumerate(eps_idx):
				create_rollout_video(np.array(env_images[i]), args=self.args,
									 filename='rollout_it_{}_ep_{}_test'.format(self.calls, t))
				name = "{}rollout_latent_coords_it_{}_ep_{}_test".format(self.args.logger.my_log_dir, self.calls, t)
				if self.args.vae_dist_help:
					show_points(points_list=np.array(latent_points[i]), save_file=name, space_of='latent')
			
		else:
			acc_sum, obs = 0.0, []
			for i in range(self.test_rollouts):
				obs.append(goal_based_process(env[i].reset()))
			for timestep in range(self.args.timesteps):
				actions = agent.step_batch(obs)
				obs, infos = [], []
				for i in range(self.test_rollouts):
					ob, _, _, info = env[i].step(actions[i])
					obs.append(goal_based_process(ob))
					if self.args.goal in self.args.colls_test_check_envs:
						if ob['collision_check']:
							envs_collision[i] -= 1
						if envs_collision[i] <= 0:
							info['Success'] = 0.0
					infos.append(info)
		minDist = infos[0]['Distance']
		maxDist = infos[0]['Distance']
		for i in range(self.test_rollouts):
			acc_sum += infos[i]['Success']
			if infos[i]['Distance'] < minDist:
				minDist = infos[i]['Distance']
			if infos[i]['Distance'] > maxDist:
				maxDist = infos[i]['Distance']

		steps = self.args.buffer.counter
		acc = acc_sum/self.test_rollouts
		self.acc_record[key].append((steps, acc, minDist, maxDist))
		self.args.logger.add_record('Success', acc)
		self.args.logger.add_record('MaxDistance', maxDist)
		self.args.logger.add_record('MinDistance', minDist)
		self.calls += 1
Exemplo n.º 4
0
def create_heatmap_distances(args, env):
    nx = 100
    ny = 100

    pos_x = np.linspace(1.05, 1.55, num=nx, endpoint=True)
    pos_y = np.linspace(0.5, 1.0, num=ny, endpoint=True)
    env.reset()
    for timestep in range(10):
        data = np.zeros(shape=(nx, ny))
        #pass some steps so that moving obstacles are in other part
        do_steps = 7
        while do_steps > 0:
            do_steps -= 1
            action = [0., 0., 0., 0.]
            env.step(action)

        env_image = take_env_image(env, 500)
        im_current = Image.fromarray(env_image.astype(np.uint8))
        im_current.save('log/{}_distance_env_at_timestep_{}.png'.format(
            args.env, timestep))

        for i, px in enumerate(pos_x):
            for j, py in enumerate(pos_y):
                env.env.env._move_object(position=[px, py, 0.425])
                o = env.get_obs()
                if args.vae_dist_help:
                    # have to render here since get obs wont actualize the image position (internally just updated on step)
                    image = take_image_objects(env, args.img_size)
                    if args.vae_type == 'space' or args.vae_type == 'bbox' or args.vae_type == 'faster_rcnn':
                        lg, lg_s, lo, lo_s = latents_from_images(
                            np.array([image]), args)
                        #achieved_goal_size_latent = lg_s[0].copy()
                    else:
                        lg, lo, lo_s = latents_from_images(
                            np.array([image]), args)
                    achieved_goal_latent = lg[0].copy()
                    #obstacle_latent = lo[0].copy()
                    #obstacle_size_latent = lo_s[0].copy()

                    if achieved_goal_latent[0] == 100:
                        env_image = take_env_image(env, 500)
                        im_current = Image.fromarray(env_image.astype(
                            np.uint8))
                        im_current.save('log/dummy_im.png'.format(
                            args.env, timestep))
                        dummy_var = 2
                    distance_to_goal = args.dist_estimator.calculate_distance_batch(
                        goal_pos=o['desired_goal_latent'].copy(),
                        current_pos_batch=np.array([achieved_goal_latent]))[0]
                    #clipped to 100 since estimator puts regions inside obstacle too far away and
                    #then heatmap does not represent any important info (these clipping depend of env)
                    distance_to_goal = np.clip(distance_to_goal,
                                               a_min=None,
                                               a_max=3.2)
                else:

                    distance_to_goal = args.dist_estimator.calculate_distance_batch(
                        goal_pos=o['desired_goal'].copy(),
                        current_pos_batch=np.array([o['achieved_goal']]))[0]
                    # clipped to 0.8 since estimator puts regions inside obstacle too far away and
                    # then heatmap does not represent any important info
                    distance_to_goal = np.clip(distance_to_goal,
                                               a_min=None,
                                               a_max=0.8)

                data[i, j] = distance_to_goal

        ax = plt.gca()
        im = ax.imshow(data, cmap='viridis', interpolation='nearest')
        # Create colorbar
        cbar_kw = {}
        cbarlabel = ""
        cbar = ax.figure.colorbar(im, ax=ax, **cbar_kw)
        cbar.ax.set_ylabel(cbarlabel, rotation=-90, va="bottom")
        plt.title('heatmap')
        plt.savefig('log/{}_distance_map_at_timestep_{}.png'.format(
            args.env, timestep))
        plt.clf()
Exemplo n.º 5
0
def calculate_comparison_player(args, args_real, env, player):
    o = env.reset()
    if player is None:
        name = 'none'
    else:
        name = 'player_{}'.format(player.play_epoch)
    n = np.random.randint(100)

    for timestep in range(n):
        if player is None:
            action = [0., 0., 0., 0.]
        else:
            action = player.my_step_batch([goal_based_process(o)])[0]
        o, _, _, info = env.step(action)
    image_objects_current = take_image_objects(env, args.img_size)
    image_env_current = take_env_image(env, args.img_vid_size)
    lg, lg_s, lo, lo_s = [], [], [], []
    for _ in range(10):
        slg, slg_s, slo, slo_s = latents_from_images(
            np.array([image_objects_current.copy()]), args)
        lg.append(slg[0])
        lg_s.append(slg_s[0])
        lo.append(slo[0])
        lo_s.append(slo_s[0])
    lg = np.array(lg)
    lg_s = np.array(lg_s)
    lo = np.array(lo)
    lo_s = np.array(lo_s)

    im_current = Image.fromarray(image_env_current.copy().astype(np.uint8))
    im_current.save('log/space_tests/current_image_{}.png'.format(name))
    im_current_objects = Image.fromarray(image_objects_current.copy().astype(
        np.uint8))
    im_current_objects.save(
        'log/space_tests/current_image_objects_{}.png'.format(name))

    real_coords_obstacle = np.array([o['real_obstacle_info'][:2]] * 10)
    real_size_obstacle = np.array([o['real_obstacle_info'][3:5]] * 10)
    real_coords_puck = np.array([o['achieved_goal'][:2]] * 10)
    real_size_puck = np.array([o['real_size_goal'][:2]] * 10)

    latent_coords_obstacle = lo[:, 0, :]
    latent_size_obstacle = lo_s[:, 0, :]
    latent_coords_puck = lg
    latent_size_puck = lg_s

    latent_coords_obstacle = map_coords(latent_coords_obstacle)
    latent_size_obstacle = map_sizes(latent_size_obstacle)
    latent_coords_puck = map_coords(latent_coords_puck)
    latent_size_puck = map_sizes(latent_size_puck)

    its = np.arange(len(real_coords_obstacle))
    fig, ax = plt.subplots()
    ax.plot(its, real_coords_obstacle[:, 0], label='real')
    ax.plot(its, latent_coords_obstacle[:, 0], label='latent_mapped')
    plt.legend()
    plt.title('x-axis')
    plt.savefig('log/space_tests/static_coords_obstacle_X_{}.png'.format(name))
    plt.clf()
    fig, ax = plt.subplots()
    ax.plot(its, real_coords_obstacle[:, 1], label='real')
    ax.plot(its, latent_coords_obstacle[:, 1], label='latent_mapped')
    plt.legend()
    plt.title('y-axis')
    plt.savefig('log/space_tests/static_coords_obstacle_Y_{}.png'.format(name))
    plt.clf()

    fig, ax = plt.subplots()
    ax.plot(its, real_size_obstacle[:, 0], label='real')
    ax.plot(its, latent_size_obstacle[:, 0], label='latent_mapped')
    plt.legend()
    plt.title('x-axis size')
    plt.savefig('log/space_tests/static_size_obstacle_X_{}.png'.format(name))
    plt.clf()
    fig, ax = plt.subplots()
    ax.plot(its, real_size_obstacle[:, 1], label='real')
    ax.plot(its, latent_size_obstacle[:, 1], label='latent_mapped')
    plt.legend()
    plt.title('y-axis size')
    plt.savefig('log/space_tests/static_size_obstacle_Y_{}.png'.format(name))
    plt.clf()

    fig, ax = plt.subplots()
    ax.plot(its, real_coords_puck[:, 0], label='real')
    ax.plot(its, latent_coords_puck[:, 0], label='latent_mapped')
    plt.legend()
    plt.title('x-axis')
    plt.savefig('log/space_tests/static_coords_puck_X_{}.png'.format(name))
    plt.clf()
    fig, ax = plt.subplots()
    ax.plot(its, real_coords_puck[:, 1], label='real')
    ax.plot(its, latent_coords_puck[:, 1], label='latent_mapped')
    plt.legend()
    plt.title('y-axis')
    plt.savefig('log/space_tests/static_coords_puck_Y_{}.png'.format(name))
    plt.clf()

    fig, ax = plt.subplots()
    ax.plot(its, real_size_puck[:, 0], label='real')
    ax.plot(its, latent_size_puck[:, 0], label='latent_mapped')
    plt.legend()
    plt.title('x-axis size')
    plt.savefig('log/space_tests/static_size_puck_X_{}.png'.format(name))
    plt.clf()
    fig, ax = plt.subplots()
    ax.plot(its, real_size_puck[:, 1], label='real')
    ax.plot(its, latent_size_puck[:, 1], label='latent_mapped')
    plt.legend()
    plt.title('y-axis size')
    plt.savefig('log/space_tests/static_size_puck_Y_{}.png'.format(name))
    plt.clf()

    obs = []
    o = env.reset()
    env_images = []
    obs.append(o)
    env_images.append(take_env_image(env, args.img_vid_size))

    for timestep in range(100):
        if player is None:
            action = [0., 0., 0., 0.]
        else:
            action = player.my_step_batch([goal_based_process(o)])[0]
        o, _, _, info = env.step(action)
        obs.append(o)
        env_images.append(take_env_image(env, args.img_vid_size))
    create_rollout_video(env_images,
                         args=args,
                         filename='episode_{}'.format(name))

    save_results_from_obs(obs, args, args_real, name)

    obs = []
    o = env.reset()
    obs.append(o)
    for ep in range(10):
        for timestep in range(100):
            if player is None:
                action = [0., 0., 0., 0.]
            else:
                action = player.my_step_batch([goal_based_process(o)])[0]
            o, _, _, info = env.step(action)
            obs.append(o)

    real_coords_obstacle = np.array(
        [obs[i]['real_obstacle_info'][:2] for i in range(len(obs))])
    real_size_obstacle = np.array(
        [obs[i]['real_obstacle_info'][3:5] for i in range(len(obs))])
    real_coords_puck = np.array(
        [obs[i]['achieved_goal'][:2] for i in range(len(obs))])
    real_size_puck = np.array(
        [obs[i]['real_size_goal'][:2] for i in range(len(obs))])

    #real_distance_to_goal = args_real.dist_estimator.calculate_distance_batch(obs[0]['desired_goal'], real_coords_puck)

    latent_coords_obstacle = np.array(
        [obs[i]['obstacle_latent'][0] for i in range(len(obs))])
    latent_size_obstacle = np.array(
        [obs[i]['obstacle_size_latent'][0] for i in range(len(obs))])
    latent_coords_puck = np.array(
        [obs[i]['achieved_goal_latent'] for i in range(len(obs))])
    latent_size_puck = np.array(
        [obs[i]['achieved_goal_size_latent'] for i in range(len(obs))])

    #latent_distance_to_goal = args.dist_estimator.calculate_distance_batch(obs[0]['desired_goal_latent'],
    #																	   latent_coords_puck)

    latent_coords_obstacle = map_coords(latent_coords_obstacle)
    latent_size_obstacle = map_sizes(latent_size_obstacle)
    latent_coords_puck = map_coords(latent_coords_puck)
    latent_size_puck = map_sizes(latent_size_puck)

    mse_coords_obstacle = ((real_coords_obstacle -
                            latent_coords_obstacle)**2).mean()
    mse_size_obstacle = ((real_size_obstacle - latent_size_obstacle)**2).mean()
    mse_coords_puck = ((real_coords_puck - latent_coords_puck)**2).mean()
    mse_size_puck = ((real_size_puck - latent_size_puck)**2).mean()

    print(
        ' mse_coords_obstacle: {}\n mse_size_obstacle: {}\n mse_coords_puck: {}\n mse_size_puck: {}\n'
        .format(mse_coords_obstacle, mse_size_obstacle, mse_coords_puck,
                mse_size_puck))
Exemplo n.º 6
0
    def play(self):
        # play policy on env
        env = self.env
        acc_sum, obs = 0.0, []

        for t in range(self.test_rollouts):
            rand_steps_wait = np.random.randint(low=12, high=18)
            '''acs = [np.array([0., 0., 0., 0.]) for _ in range(rand_steps_wait)] + [
                np.array([-1., 0., 0., 0.]) for _ in range(3)] + [np.array([0., -1., 0., 0.]) for _ in range(7)] + [
                np.array([1., 1., 0., 0.]) for _ in range(100)]'''
            '''rand_steps_wait = np.random.randint(low=6, high=12)
            acs = [np.array([0., 0., -.5, 0.]) for _ in range(3)] + [np.array([0.672, -0.8, 0, 0.]) for _ in range(3)] + \
                  [np.array([0., 0., -.4, 0.]) for _ in range(2)]+ \
                  [np.array([0., 0.5, 0., 0.]) for _ in range(10)] + [np.array([-0.1, 0., 0., 0.]) for _ in range(2)]+ \
                  [np.array([0.2, -0.8, 0., 0.]) for _ in range(3)] + [np.array([0.4, 0., 0., 0.]) for _ in range(3)] + \
                  [np.array([0., 1., 0., 0.]) for _ in range(4)] + [np.array([-0.6, -0.2, 0., 0.]) for _ in range(4)] + \
                  [np.array([0., 0.89, 0., 0.]) for _ in range(4)] + [np.array([0., 0., 0., 0.]) for _ in range(rand_steps_wait)] + \
                  [np.array([1., 0., 0., 0.]) for _ in range(5)] + [np.array([0.5, -0.5, 0., 0.]) for _ in range(8)] + \
                  [np.array([0., 0., 0., 0.]) for _ in range(110)]'''
            '''acs = [np.array([1., 0., 0., 0.]) for _ in range(3)] + \
                  [np.array([0., -.5, 0., 0.]) for _ in range(6)] + \
                  [np.array([0., 0., -.6, 0.]) for _ in range(2)] + \
                  [np.array([-0.9, 0., 0, 0.]) for _ in range(10)] + \
                  [np.array([0., 1., 0., 0.]) for _ in range(4)] + \
                  [np.array([-0.8, 0., 0., 0.]) for _ in range(2)] + \
                  [np.array([0., 0., 0., 0.]) for _ in range(rand_steps_wait)] +\
                  [np.array([0., -1., 0., 0.]) for _ in range(15)]+ \
                  [np.array([0., 0., 0., 0.]) for _ in range(110)]'''
            acs = [np.array([0., 0., -0.1, 0.]) for _ in range(3)]+\
                  [np.array([0., -1., 0., 0.]) for _ in range(10)]+ \
                  [np.array([0., 0., 0., 0.]) for _ in range(100)]
            #env.env.env._move_object(position=[1.13, 0.75, 0.425])
            #acs = [np.array([0., 0., 0., 0.]) for _ in range(100)]
            ob = env.reset()
            obs.append(goal_based_process(ob))
            trajectory_goals = [ob['achieved_goal'].copy()]
            #trajectory_goals_latents = [ob['achieved_goal_latent'].copy()]

            trajectory_obstacles = [ob['real_obstacle_info'].copy()]
            #trajectory_obstacles_latents = [ob['obstacle_latent'].copy()]
            #trajectory_obstacles_latents_sizes = [ob['obstacle_size_latent'].copy()]

            tr_env_images = [take_env_image(self.env, args.img_vid_size)]

            for timestep in range(self.args.timesteps):
                #actions = self.my_step_batch(obs)
                #actions = [env.action_space.sample() for _ in range(len(obs))]
                actions = [acs[timestep]]
                obs, infos = [], []
                ob, _, _, info = env.step(actions[0])
                obs.append(goal_based_process(ob))
                trajectory_goals.append(ob['achieved_goal'].copy())
                #trajectory_goals_latents.append(ob['achieved_goal_latent'].copy())

                trajectory_obstacles.append(ob['real_obstacle_info'].copy())
                #trajectory_obstacles_latents.append(ob['obstacle_latent'].copy())
                #trajectory_obstacles_latents_sizes.append(ob['obstacle_size_latent'].copy())

                tr_env_images.append(take_env_image(self.env, args.img_vid_size))
                infos.append(info)
                sim = env.sim
                '''print('________ contacts at step {} -------'.format(timestep))
                for i in range(sim.data.ncon):
                    # Note that the contact array has more than `ncon` entries,
                    # so be careful to only read the valid entries.

                    contact = sim.data.contact[i]
                    geom1_name = sim.model.geom_id2name(contact.geom1)
                    geom2_name = sim.model.geom_id2name(contact.geom2)
                    if geom1_name == 'object0' or geom2_name == 'object0':

                        print('contact', i)
                        print('dist', contact.dist)
                        print('geom1', contact.geom1, sim.model.geom_id2name(contact.geom1))
                        print('geom2', contact.geom2, sim.model.geom_id2name(contact.geom2))
                        # There's more stuff in the data structure
                        # See the mujoco documentation for more info!
                        geom2_body = sim.model.geom_bodyid[sim.data.contact[i].geom2]
                        print(' Contact force on geom2 body', sim.data.cfrc_ext[geom2_body])
                        print('norm', np.sqrt(np.sum(np.square(sim.data.cfrc_ext[geom2_body]))))
                        # Use internal functions to read out mj_contactForce
                        c_array = np.zeros(6, dtype=np.float64)
                        print('c_array', c_array)
                        functions.mj_contactForce(sim.model, sim.data, i, c_array)
                        print('c_array', c_array)

                print('done')'''


            if t % 1 == 0 or t == self.test_rollouts -1:
                steps = np.arange(len(tr_env_images))
                #latent_ind_x = 1
                #latent_ind_y = 0
                latent_ind_x = 0
                latent_ind_y = 1

                plt.plot(steps, np.array(trajectory_goals)[:, 0], label='real')
                #plt.plot(steps, map_x_table(np.array(trajectory_goals_latents)[:, latent_ind_x]), label='latent')
                plt.title('positions_x_goals')
                plt.legend(loc=4)
                plt.savefig("{}it_{}_positions_x_goals.png".format(args.logger.my_log_dir, t))
                plt.close()

                plt.plot(steps, np.array(trajectory_goals)[:, 1], label='real')
                #plt.plot(steps, map_y_table(np.array(trajectory_goals_latents)[:, latent_ind_y]), label='latent')
                plt.title('positions_y_goals')
                plt.legend(loc=4)
                plt.savefig("{}it_{}_positions_y_goals.png".format(args.logger.my_log_dir, t))
                plt.close()

                '''plt.plot(steps, np.array(trajectory_obstacles)[:, 0], label='real')
                plt.plot(steps, map_x_table(np.array(trajectory_obstacles_latents)[:, 0, latent_ind_x]), label='latent')
                plt.title('positions_x_obstacles')
                plt.legend(loc=4)
                plt.savefig("{}it_{}_positions_x_obstacles.png".format(args.logger.my_log_dir, i))
                plt.close()



                plt.plot(steps, np.array(trajectory_obstacles)[:, 1], label='real')
                plt.plot(steps, map_y_table(np.array(trajectory_obstacles_latents)[:, 0, latent_ind_y]), label='latent')
                plt.title('positions_y_obstacles')
                plt.legend(loc=4)
                plt.savefig("{}it_{}_positions_y_obstacles.png".format(args.logger.my_log_dir, i))
                plt.close()

                plt.plot(steps, np.array(trajectory_obstacles)[:, 3], label='real')
                plt.plot(steps, map_y_table(np.array(trajectory_obstacles_latents_sizes)[:, 0, latent_ind_x]), label='latent')
                plt.title('sizes_x_obstacles')
                plt.legend(loc=4)
                plt.savefig("{}it_{}_sizes_x_obstacles.png".format(args.logger.my_log_dir, i))
                plt.close()

                plt.plot(steps, np.array(trajectory_obstacles)[:, 4], label='real')
                plt.plot(steps, map_y_table(np.array(trajectory_obstacles_latents_sizes)[:, 0, latent_ind_y]), label='latent')
                plt.title('sizes_y_obstacles')
                plt.legend(loc=4)
                plt.savefig("{}it_{}_sizes_y_obstacles.png".format(args.logger.my_log_dir, i))
                plt.close()'''

                create_rollout_video(tr_env_images, args=self.args, filename='play_it_{}'.format(t))
        if hasattr(env.env.env, 'reset_sim_counter'):
            print(env.env.env.reset_sim_counter)

if __name__ == '__main__':
    #read args and init env
    args = get_args()
    #load_vaes(args)
    env = make_env(args)
    #dist_estimator = DistMovEst()

    #run env
    acc_sum, obs = 0.0, []
    prev_obs = []
    env_images = []
    start_time = time.time()

    image_real = take_env_image(env, 500)
    img = Image.fromarray(image_real.astype(np.uint8))
    img.save('image_of.png'.format(args.env))


    for vid in range(5):
        o = env.reset()
        obs.append(o)
        prev_obs.append(o)

        image_real = take_env_image(env, 500)
        img = Image.fromarray(image_real.astype(np.uint8))
        img.save('{}_task.png'.format(args.env))
        '''image_real = take_env_image(env, 500)
        img = Image.fromarray(image_real.astype(np.uint8))
        img.save('{}_real.png'.format(args.env))'''