示例#1
0
文件: play.py 项目: Jason93415/C-HGG
 def demoRecordPickAndPlaceObstacle(
         self, raw_path="videos/KukaPickAndPlaceObstacle"):
     env = self.env
     test_rollouts = 5
     goals = [[0.80948876, -0.24847823, 0.85],
              [0.90204398, -0.24176245, 0.85],
              [0.72934716, -0.19637749,
               0.85], [0.6970663, -0.25643907, 0.85],
              [0.7029464, -0.18765762, 0.85]]
     recorder = VideoRecorder(env.env.env, base_path=raw_path)
     acc_sum, obs = 0.0, []
     test_rollouts = 5
     for i in range(test_rollouts):
         env.reset()
         env.set_goal(np.array(goals[i]))
         obs.append(goal_based_process(env.get_obs()))
         print("Rollout {}/{} ...".format(i + 1, test_rollouts))
         for timestep in range(200):
             actions = self.my_step_batch(obs)
             obs, infos = [], []
             ob, _, _, info = env.step(actions[0])
             obs.append(goal_based_process(ob))
             infos.append(info)
             recorder.capture_frame()
     recorder.close()
示例#2
0
文件: play.py 项目: Jason93415/C-HGG
 def play(self):
     # play policy on env
     env = self.env
     acc_sum, obs = 0.0, []
     for i in range(self.test_rollouts):
         obs.append(goal_based_process(env.reset()))
         for timestep in range(self.args.timesteps):
             actions = self.my_step_batch(obs)
             obs, infos = [], []
             ob, _, _, info = env.step(actions[0])
             obs.append(goal_based_process(ob))
             infos.append(info)
             env.render()
示例#3
0
def experiment_setup_test(args):

    if args.vae_dist_help:
        load_vaes(args)
        if args.vae_type == 'bbox':
            file_index_object = 'data/' + args.env + '/' + args.vae_type + '_obj_i.npy'
            file_indices_obstacle = 'data/' + args.env + '/' + args.vae_type + '_obstacles_indices.npy'
            args.obj_index = np.load(file_index_object)
            args.obstacles_indices = np.load(file_indices_obstacle)

    load_field_parameters(args)
    #if args.dist_estimator_type is not None:
    #	temp_env = make_temp_env(args)
    #	load_dist_estimator(args, temp_env)
    #	del temp_env
    env = make_env(args)

    if args.goal_based:
        args.obs_dims = list(goal_based_process(env.reset()).shape)
        args.acts_dims = [env.action_space.shape[0]]
        args.compute_reward = env.compute_reward
        args.compute_distance = env.compute_distance

    from play import Player
    args.agent = agent = Player(args)
    #fix number of test rollouts to 500
    args.tester = tester = Tester(args,
                                  test_rollouts=100,
                                  after_train_test=True)
    args.timesteps = env.env.env.spec.max_episode_steps

    return env, agent, tester
示例#4
0
def experiment_setup(args):
    if args.vae_dist_help:
        load_vaes(args)

    #since some extensions of the envs use the distestimator this load is used with the interval wrapper#todo use other?
    load_field_parameters(args)
    if args.dist_estimator_type is not None:
        temp_env = make_temp_env(args)
        load_dist_estimator(args, temp_env)
        del temp_env

    env = make_env(args)
    env_test = make_env(args)

    if args.goal_based:

        args.obs_dims = list(goal_based_process(env.reset()).shape)
        args.acts_dims = [env.action_space.shape[0]]
        args.compute_reward = env.compute_reward
        args.compute_distance = env.compute_distance

    if args.imaginary_obstacle_transitions:
        #relative small buffer size so it always have most recent collisions
        args.imaginary_buffer = ReplayBuffer_Imaginary(
            args, buffer_size=args.im_buffer_size)
    args.buffer = buffer = ReplayBuffer_Episodic(args)
    args.learner = learner = create_learner(args)
    args.agent = agent = create_agent(args)
    args.logger.info('*** network initialization complete ***')
    args.tester = tester = Tester(args)
    args.logger.info('*** tester initialization complete ***')
    args.timesteps = env.env.env.spec.max_episode_steps

    return env, env_test, agent, buffer, learner, tester
示例#5
0
    def test_acc(self, key, env, agent):
        acc_sum, obs = 0.0, []
        for i in range(self.test_rollouts):
            obs.append(goal_based_process(env[i].reset()))
        for timestep in range(self.args.timesteps):
            actions = agent.step_batch(obs)
            obs, infos = [], []
            for i in range(self.test_rollouts):
                ob, _, _, info = env[i].step(actions[i])
                obs.append(goal_based_process(ob))
                infos.append(info)
        for i in range(self.test_rollouts):
            acc_sum += infos[i]['Success']

        steps = self.args.buffer.counter
        acc = acc_sum / self.test_rollouts
        self.acc_record[key].append((steps, acc))
        self.args.logger.add_record('Success/' + key, acc)
示例#6
0
文件: play.py 项目: Jason93415/C-HGG
 def record_video(self, raw_path="myrecord"):
     env = self.env
     test_rollouts = 5
     # play policy on env
     recorder = VideoRecorder(env.env.env, base_path=raw_path)
     acc_sum, obs = 0.0, []
     for i in range(test_rollouts):
         obs.append(goal_based_process(env.reset()))
         if hasattr(env.env.env, "set_camera_pos"):
             env.env.env.set_camera_pos(i)
         print("Rollout {}/{} ...".format(i + 1, test_rollouts))
         for timestep in range(self.args.timesteps):
             actions = self.my_step_batch(obs)
             obs, infos = [], []
             ob, _, _, info = env.step(actions[0])
             obs.append(goal_based_process(ob))
             infos.append(info)
             recorder.capture_frame()
         print("... done.")
     recorder.close()
示例#7
0
文件: play.py 项目: Jason93415/C-HGG
 def demoRecordPush(self, raw_path="videos/KukaPush"):
     env = self.env
     test_rollouts = 5
     goals = [[0.68, -0.18, 0.85], [0.60, -0.3, 0.85], [0.72, -0.28, 0.85],
              [0.58, -0.3, 0.85], [0.62, -0.25, 0.85]]
     recorder = VideoRecorder(env.env.env, base_path=raw_path)
     acc_sum, obs = 0.0, []
     test_rollouts = 5
     for i in range(test_rollouts):
         env.reset()
         env.set_goal(np.array(goals[i]))
         obs.append(goal_based_process(env.get_obs()))
         print("Rollout {}/{} ...".format(i + 1, test_rollouts))
         for timestep in range(self.args.timesteps):
             actions = self.my_step_batch(obs)
             obs, infos = [], []
             ob, _, _, info = env.step(actions[0])
             obs.append(goal_based_process(ob))
             infos.append(info)
             recorder.capture_frame()
     recorder.close()
示例#8
0
 def demoRecord(self, raw_path="videos/KukaReach"):
     env = self.env
     goals = [[0.84604588, 0.14732964, 1.35766576], [0.79483348, -0.14184732,  1.20930532],
              [0.919015, -0.15907337, 1.18060975], [7.11554270e-01, 1.51756884e-03, 1.34433537e+00],
              [0.70905836, 0.13042637, 1.19320888]]
     recorder = VideoRecorder(env.env.env, base_path=raw_path)
     acc_sum, obs = 0.0, []
     test_rollouts = 5
     for i in range(test_rollouts):
         env.reset()
         env.set_goal(np.array(goals[i]))
         obs.append(goal_based_process(env.get_obs()))
         print("Rollout {}/{} ...".format(i + 1, test_rollouts))
         for timestep in range(self.args.timesteps):
             actions = self.my_step_batch(obs)
             obs, infos = [], []
             ob, _, _, info = env.step(actions[0])
             obs.append(goal_based_process(ob))
             infos.append(info)
             recorder.capture_frame()
     recorder.close()
def experiment_setup(args):
    env = make_env(args)
    env_test = make_env(args)
    if args.goal_based:
        args.obs_dims = list(goal_based_process(env.reset()).shape)
        args.acts_dims = [env.action_space.shape[0]]
        args.compute_reward = env.compute_reward
        args.compute_distance = env.compute_distance

    args.buffer = buffer = ReplayBuffer_Episodic(args)
    args.learner = learner = create_learner(args)
    args.agent = agent = create_agent(args)
    args.logger.info('*** network initialization complete ***')
    args.tester = tester = Tester(args)
    args.logger.info('*** tester initialization complete ***')

    return env, env_test, agent, buffer, learner, tester
示例#10
0
	def step(self, obs, explore=False, test_info=False):
		if (not test_info) and (self.args.buffer.steps_counter<self.args.warmup):
			return np.random.uniform(-1, 1, size=self.args.acts_dims)
		if self.args.goal_based: obs = goal_based_process(obs)

		# eps-greedy exploration
		if explore and np.random.uniform()<=self.args.eps_act:
			return np.random.uniform(-1, 1, size=self.args.acts_dims)

		feed_dict = {
			self.raw_obs_ph: [obs]
		}
		action, info = self.sess.run([self.pi, self.step_info], feed_dict)
		action = action[0]

		# uncorrelated gaussian explorarion
		if explore: action += np.random.normal(0, self.args.std_act, size=self.args.acts_dims)
		action = np.clip(action, -1, 1)

		if test_info: return action, info
		return action
示例#11
0
def create_heatmap_qvalues(args, env, player, player_imaginary):
    nx = 80  #todo at least 100
    ny = 80

    pos_x = np.linspace(args.real_field_center[0] - args.real_field_size[0],
                        args.real_field_center[0] + args.real_field_size[0],
                        num=nx,
                        endpoint=True)
    pos_y = np.linspace(args.real_field_center[1] - args.real_field_size[1],
                        args.real_field_center[1] + args.real_field_size[1],
                        num=ny,
                        endpoint=True)

    first_ob = env.reset()
    puck_first_pos = first_ob['achieved_goal']
    grip_pos = env.env.env.sim.data.get_site_xpos('robot0:grip')
    dt = env.env.env.sim.nsubsteps * env.env.env.sim.model.opt.timestep
    grip_velp = env.env.env.sim.data.get_site_xvelp('robot0:grip') * dt
    robot_qpos, robot_qvel = utils.robot_get_obs(env.env.env.sim)
    for timestep in range(17):
        obs_orginal = []
        obs = []
        data_v_vals = np.zeros(shape=(nx, ny))
        data_v_vals_imaginary = np.zeros(shape=(nx, ny))

        #pass some steps so that moving obstacles are in other part
        #move puck and robot back to beginning
        #show image

        do_steps = 3
        while do_steps > 0:
            do_steps -= 1
            action = [0., 0., 0., 0.]
            env.step(action)

        env_image = take_env_image(env, 500)
        im_current = Image.fromarray(env_image.astype(np.uint8))
        im_current.save('log/{}_env.png'.format(args.env))

        for i, px in enumerate(pos_x):
            for j, py in enumerate(pos_y):
                env.env.env._move_gripper_to(position=[px, py + 0.1, 0.61])
                '''try:
                    env.env.env._move_gripper_to(position=[px, py + 0.15, 0.52])
                except Exception:
                    print('no work with x={}, y={} i={}, j={}'.format(px,py,i, j) )
                    exit()'''
                env.env.env._move_object(position=[px, py, 0.425
                                                   ])  #todo use height of env

                # move puck and robot back to beginning
                # show image
                env_image = take_env_image(env, 500)
                im_current = Image.fromarray(env_image.astype(np.uint8))
                #im_current.save('log/{}_env.png'.format(args.env))
                if (i > 0) and (i % 10 == 0) and (j % 10 == 0):
                    im_current.save('log/{}_env_from_{}_{}.png'.format(
                        args.env, i, j))
                    print('d')
                o = env.get_obs()
                obs_orginal.append(o)
                obs.append(goal_based_process(o))
                q_val = player.get_q_pi([goal_based_process(o)])[0]
                q_val_imaginary = player_imaginary.get_q_pi(
                    [goal_based_process(o)])[0]

                data_v_vals[i, j] = q_val
                data_v_vals_imaginary[i, j] = q_val_imaginary
            fwef = 4234
        rq4r4 = 43231

        ax = plt.gca()
        im = ax.imshow(data_v_vals, cmap='cool', interpolation='nearest')
        # Create colorbar
        cbar_kw = {}
        cbarlabel = ""
        cbar = ax.figure.colorbar(im, ax=ax, **cbar_kw)
        cbar.ax.set_ylabel(cbarlabel, rotation=-90, va="bottom")
        plt.title('heatmap')
        plt.savefig('log/{}_valuemap_{}.png'.format(args.env, timestep))
        plt.clf()
        plt.close()

        #for imaginary values
        ax = plt.gca()
        im = ax.imshow(data_v_vals_imaginary,
                       cmap='cool',
                       interpolation='nearest')
        # Create colorbar
        cbar_kw = {}
        cbarlabel = ""
        cbar = ax.figure.colorbar(im, ax=ax, **cbar_kw)
        cbar.ax.set_ylabel(cbarlabel, rotation=-90, va="bottom")
        plt.title('heatmap with imaginary')
        plt.savefig('log/{}_valuemap_{}_imaginary.png'.format(
            args.env, timestep))
        plt.clf()
        plt.close()
示例#12
0
	def test_acc(self, key, env, agent):
		if self.args.goal in self.args.colls_test_check_envs:
			# +1 since when it becomes 0 then not considered as success
			# coll toll is modified externally for every iteraiton
			envs_collision = [self.coll_tol + 1 for _ in range(len(self.env_List))]
		if self.calls % 40 == 0 or self.calls in [0, 1, 2, 5, 8, 10] or self.after_train_test:
			if self.after_train_test:
				eps_idx = [i for i in range(0, self.test_rollouts-1, 20)] + [self.test_rollouts-1]
			else:
				eps_idx = [0, 5, 8, 10, 15, 20, self.test_rollouts-1]
			acc_sum, obs = 0.0, []
			prev_obs = []
			env_images = [[] for _ in eps_idx]
			if self.args.vae_dist_help:
				latent_points = [[] for _ in eps_idx]
			for i in range(self.test_rollouts):
				o = env[i].reset()
				obs.append(goal_based_process(o))
				prev_obs.append(o)
				if i in eps_idx:
					t = eps_idx.index(i)
					env_images[t].append(take_env_image(env[i], self.args.img_vid_size))
					if self.args.vae_dist_help:
						latent_points[t].append(o['achieved_goal_latent'])
			for timestep in range(self.args.timesteps):
				actions = agent.step_batch(obs)
				obs, infos = [], []
				for i in range(self.test_rollouts):
					ob, _, _, info = env[i].step(actions[i])
					obs.append(goal_based_process(ob))
					#this should be used just for testing after training
					if self.args.goal in self.args.colls_test_check_envs:
						if ob['collision_check']:
							envs_collision[i] -= 1
						if envs_collision[i] <= 0:
							info['Success'] = 0.0
					infos.append(info)


					if i in eps_idx:
						t = eps_idx.index(i)
						env_images[t].append(take_env_image(env[i], self.args.img_vid_size))
						if self.args.vae_dist_help:
							latent_points[t].append(ob['achieved_goal_latent'])
					prev_obs[i] = copy.deepcopy(ob)
			for i, t in enumerate(eps_idx):
				create_rollout_video(np.array(env_images[i]), args=self.args,
									 filename='rollout_it_{}_ep_{}_test'.format(self.calls, t))
				name = "{}rollout_latent_coords_it_{}_ep_{}_test".format(self.args.logger.my_log_dir, self.calls, t)
				if self.args.vae_dist_help:
					show_points(points_list=np.array(latent_points[i]), save_file=name, space_of='latent')
			
		else:
			acc_sum, obs = 0.0, []
			for i in range(self.test_rollouts):
				obs.append(goal_based_process(env[i].reset()))
			for timestep in range(self.args.timesteps):
				actions = agent.step_batch(obs)
				obs, infos = [], []
				for i in range(self.test_rollouts):
					ob, _, _, info = env[i].step(actions[i])
					obs.append(goal_based_process(ob))
					if self.args.goal in self.args.colls_test_check_envs:
						if ob['collision_check']:
							envs_collision[i] -= 1
						if envs_collision[i] <= 0:
							info['Success'] = 0.0
					infos.append(info)
		minDist = infos[0]['Distance']
		maxDist = infos[0]['Distance']
		for i in range(self.test_rollouts):
			acc_sum += infos[i]['Success']
			if infos[i]['Distance'] < minDist:
				minDist = infos[i]['Distance']
			if infos[i]['Distance'] > maxDist:
				maxDist = infos[i]['Distance']

		steps = self.args.buffer.counter
		acc = acc_sum/self.test_rollouts
		self.acc_record[key].append((steps, acc, minDist, maxDist))
		self.args.logger.add_record('Success', acc)
		self.args.logger.add_record('MaxDistance', maxDist)
		self.args.logger.add_record('MinDistance', minDist)
		self.calls += 1
示例#13
0
def calculate_comparison_player(args, args_real, env, player):
    o = env.reset()
    if player is None:
        name = 'none'
    else:
        name = 'player_{}'.format(player.play_epoch)
    n = np.random.randint(100)

    for timestep in range(n):
        if player is None:
            action = [0., 0., 0., 0.]
        else:
            action = player.my_step_batch([goal_based_process(o)])[0]
        o, _, _, info = env.step(action)
    image_objects_current = take_image_objects(env, args.img_size)
    image_env_current = take_env_image(env, args.img_vid_size)
    lg, lg_s, lo, lo_s = [], [], [], []
    for _ in range(10):
        slg, slg_s, slo, slo_s = latents_from_images(
            np.array([image_objects_current.copy()]), args)
        lg.append(slg[0])
        lg_s.append(slg_s[0])
        lo.append(slo[0])
        lo_s.append(slo_s[0])
    lg = np.array(lg)
    lg_s = np.array(lg_s)
    lo = np.array(lo)
    lo_s = np.array(lo_s)

    im_current = Image.fromarray(image_env_current.copy().astype(np.uint8))
    im_current.save('log/space_tests/current_image_{}.png'.format(name))
    im_current_objects = Image.fromarray(image_objects_current.copy().astype(
        np.uint8))
    im_current_objects.save(
        'log/space_tests/current_image_objects_{}.png'.format(name))

    real_coords_obstacle = np.array([o['real_obstacle_info'][:2]] * 10)
    real_size_obstacle = np.array([o['real_obstacle_info'][3:5]] * 10)
    real_coords_puck = np.array([o['achieved_goal'][:2]] * 10)
    real_size_puck = np.array([o['real_size_goal'][:2]] * 10)

    latent_coords_obstacle = lo[:, 0, :]
    latent_size_obstacle = lo_s[:, 0, :]
    latent_coords_puck = lg
    latent_size_puck = lg_s

    latent_coords_obstacle = map_coords(latent_coords_obstacle)
    latent_size_obstacle = map_sizes(latent_size_obstacle)
    latent_coords_puck = map_coords(latent_coords_puck)
    latent_size_puck = map_sizes(latent_size_puck)

    its = np.arange(len(real_coords_obstacle))
    fig, ax = plt.subplots()
    ax.plot(its, real_coords_obstacle[:, 0], label='real')
    ax.plot(its, latent_coords_obstacle[:, 0], label='latent_mapped')
    plt.legend()
    plt.title('x-axis')
    plt.savefig('log/space_tests/static_coords_obstacle_X_{}.png'.format(name))
    plt.clf()
    fig, ax = plt.subplots()
    ax.plot(its, real_coords_obstacle[:, 1], label='real')
    ax.plot(its, latent_coords_obstacle[:, 1], label='latent_mapped')
    plt.legend()
    plt.title('y-axis')
    plt.savefig('log/space_tests/static_coords_obstacle_Y_{}.png'.format(name))
    plt.clf()

    fig, ax = plt.subplots()
    ax.plot(its, real_size_obstacle[:, 0], label='real')
    ax.plot(its, latent_size_obstacle[:, 0], label='latent_mapped')
    plt.legend()
    plt.title('x-axis size')
    plt.savefig('log/space_tests/static_size_obstacle_X_{}.png'.format(name))
    plt.clf()
    fig, ax = plt.subplots()
    ax.plot(its, real_size_obstacle[:, 1], label='real')
    ax.plot(its, latent_size_obstacle[:, 1], label='latent_mapped')
    plt.legend()
    plt.title('y-axis size')
    plt.savefig('log/space_tests/static_size_obstacle_Y_{}.png'.format(name))
    plt.clf()

    fig, ax = plt.subplots()
    ax.plot(its, real_coords_puck[:, 0], label='real')
    ax.plot(its, latent_coords_puck[:, 0], label='latent_mapped')
    plt.legend()
    plt.title('x-axis')
    plt.savefig('log/space_tests/static_coords_puck_X_{}.png'.format(name))
    plt.clf()
    fig, ax = plt.subplots()
    ax.plot(its, real_coords_puck[:, 1], label='real')
    ax.plot(its, latent_coords_puck[:, 1], label='latent_mapped')
    plt.legend()
    plt.title('y-axis')
    plt.savefig('log/space_tests/static_coords_puck_Y_{}.png'.format(name))
    plt.clf()

    fig, ax = plt.subplots()
    ax.plot(its, real_size_puck[:, 0], label='real')
    ax.plot(its, latent_size_puck[:, 0], label='latent_mapped')
    plt.legend()
    plt.title('x-axis size')
    plt.savefig('log/space_tests/static_size_puck_X_{}.png'.format(name))
    plt.clf()
    fig, ax = plt.subplots()
    ax.plot(its, real_size_puck[:, 1], label='real')
    ax.plot(its, latent_size_puck[:, 1], label='latent_mapped')
    plt.legend()
    plt.title('y-axis size')
    plt.savefig('log/space_tests/static_size_puck_Y_{}.png'.format(name))
    plt.clf()

    obs = []
    o = env.reset()
    env_images = []
    obs.append(o)
    env_images.append(take_env_image(env, args.img_vid_size))

    for timestep in range(100):
        if player is None:
            action = [0., 0., 0., 0.]
        else:
            action = player.my_step_batch([goal_based_process(o)])[0]
        o, _, _, info = env.step(action)
        obs.append(o)
        env_images.append(take_env_image(env, args.img_vid_size))
    create_rollout_video(env_images,
                         args=args,
                         filename='episode_{}'.format(name))

    save_results_from_obs(obs, args, args_real, name)

    obs = []
    o = env.reset()
    obs.append(o)
    for ep in range(10):
        for timestep in range(100):
            if player is None:
                action = [0., 0., 0., 0.]
            else:
                action = player.my_step_batch([goal_based_process(o)])[0]
            o, _, _, info = env.step(action)
            obs.append(o)

    real_coords_obstacle = np.array(
        [obs[i]['real_obstacle_info'][:2] for i in range(len(obs))])
    real_size_obstacle = np.array(
        [obs[i]['real_obstacle_info'][3:5] for i in range(len(obs))])
    real_coords_puck = np.array(
        [obs[i]['achieved_goal'][:2] for i in range(len(obs))])
    real_size_puck = np.array(
        [obs[i]['real_size_goal'][:2] for i in range(len(obs))])

    #real_distance_to_goal = args_real.dist_estimator.calculate_distance_batch(obs[0]['desired_goal'], real_coords_puck)

    latent_coords_obstacle = np.array(
        [obs[i]['obstacle_latent'][0] for i in range(len(obs))])
    latent_size_obstacle = np.array(
        [obs[i]['obstacle_size_latent'][0] for i in range(len(obs))])
    latent_coords_puck = np.array(
        [obs[i]['achieved_goal_latent'] for i in range(len(obs))])
    latent_size_puck = np.array(
        [obs[i]['achieved_goal_size_latent'] for i in range(len(obs))])

    #latent_distance_to_goal = args.dist_estimator.calculate_distance_batch(obs[0]['desired_goal_latent'],
    #																	   latent_coords_puck)

    latent_coords_obstacle = map_coords(latent_coords_obstacle)
    latent_size_obstacle = map_sizes(latent_size_obstacle)
    latent_coords_puck = map_coords(latent_coords_puck)
    latent_size_puck = map_sizes(latent_size_puck)

    mse_coords_obstacle = ((real_coords_obstacle -
                            latent_coords_obstacle)**2).mean()
    mse_size_obstacle = ((real_size_obstacle - latent_size_obstacle)**2).mean()
    mse_coords_puck = ((real_coords_puck - latent_coords_puck)**2).mean()
    mse_size_puck = ((real_size_puck - latent_size_puck)**2).mean()

    print(
        ' mse_coords_obstacle: {}\n mse_size_obstacle: {}\n mse_coords_puck: {}\n mse_size_puck: {}\n'
        .format(mse_coords_obstacle, mse_size_obstacle, mse_coords_puck,
                mse_size_puck))
示例#14
0
    def play(self):
        # play policy on env
        env = self.env
        acc_sum, obs = 0.0, []

        for t in range(self.test_rollouts):
            rand_steps_wait = np.random.randint(low=12, high=18)
            '''acs = [np.array([0., 0., 0., 0.]) for _ in range(rand_steps_wait)] + [
                np.array([-1., 0., 0., 0.]) for _ in range(3)] + [np.array([0., -1., 0., 0.]) for _ in range(7)] + [
                np.array([1., 1., 0., 0.]) for _ in range(100)]'''
            '''rand_steps_wait = np.random.randint(low=6, high=12)
            acs = [np.array([0., 0., -.5, 0.]) for _ in range(3)] + [np.array([0.672, -0.8, 0, 0.]) for _ in range(3)] + \
                  [np.array([0., 0., -.4, 0.]) for _ in range(2)]+ \
                  [np.array([0., 0.5, 0., 0.]) for _ in range(10)] + [np.array([-0.1, 0., 0., 0.]) for _ in range(2)]+ \
                  [np.array([0.2, -0.8, 0., 0.]) for _ in range(3)] + [np.array([0.4, 0., 0., 0.]) for _ in range(3)] + \
                  [np.array([0., 1., 0., 0.]) for _ in range(4)] + [np.array([-0.6, -0.2, 0., 0.]) for _ in range(4)] + \
                  [np.array([0., 0.89, 0., 0.]) for _ in range(4)] + [np.array([0., 0., 0., 0.]) for _ in range(rand_steps_wait)] + \
                  [np.array([1., 0., 0., 0.]) for _ in range(5)] + [np.array([0.5, -0.5, 0., 0.]) for _ in range(8)] + \
                  [np.array([0., 0., 0., 0.]) for _ in range(110)]'''
            '''acs = [np.array([1., 0., 0., 0.]) for _ in range(3)] + \
                  [np.array([0., -.5, 0., 0.]) for _ in range(6)] + \
                  [np.array([0., 0., -.6, 0.]) for _ in range(2)] + \
                  [np.array([-0.9, 0., 0, 0.]) for _ in range(10)] + \
                  [np.array([0., 1., 0., 0.]) for _ in range(4)] + \
                  [np.array([-0.8, 0., 0., 0.]) for _ in range(2)] + \
                  [np.array([0., 0., 0., 0.]) for _ in range(rand_steps_wait)] +\
                  [np.array([0., -1., 0., 0.]) for _ in range(15)]+ \
                  [np.array([0., 0., 0., 0.]) for _ in range(110)]'''
            acs = [np.array([0., 0., -0.1, 0.]) for _ in range(3)]+\
                  [np.array([0., -1., 0., 0.]) for _ in range(10)]+ \
                  [np.array([0., 0., 0., 0.]) for _ in range(100)]
            #env.env.env._move_object(position=[1.13, 0.75, 0.425])
            #acs = [np.array([0., 0., 0., 0.]) for _ in range(100)]
            ob = env.reset()
            obs.append(goal_based_process(ob))
            trajectory_goals = [ob['achieved_goal'].copy()]
            #trajectory_goals_latents = [ob['achieved_goal_latent'].copy()]

            trajectory_obstacles = [ob['real_obstacle_info'].copy()]
            #trajectory_obstacles_latents = [ob['obstacle_latent'].copy()]
            #trajectory_obstacles_latents_sizes = [ob['obstacle_size_latent'].copy()]

            tr_env_images = [take_env_image(self.env, args.img_vid_size)]

            for timestep in range(self.args.timesteps):
                #actions = self.my_step_batch(obs)
                #actions = [env.action_space.sample() for _ in range(len(obs))]
                actions = [acs[timestep]]
                obs, infos = [], []
                ob, _, _, info = env.step(actions[0])
                obs.append(goal_based_process(ob))
                trajectory_goals.append(ob['achieved_goal'].copy())
                #trajectory_goals_latents.append(ob['achieved_goal_latent'].copy())

                trajectory_obstacles.append(ob['real_obstacle_info'].copy())
                #trajectory_obstacles_latents.append(ob['obstacle_latent'].copy())
                #trajectory_obstacles_latents_sizes.append(ob['obstacle_size_latent'].copy())

                tr_env_images.append(take_env_image(self.env, args.img_vid_size))
                infos.append(info)
                sim = env.sim
                '''print('________ contacts at step {} -------'.format(timestep))
                for i in range(sim.data.ncon):
                    # Note that the contact array has more than `ncon` entries,
                    # so be careful to only read the valid entries.

                    contact = sim.data.contact[i]
                    geom1_name = sim.model.geom_id2name(contact.geom1)
                    geom2_name = sim.model.geom_id2name(contact.geom2)
                    if geom1_name == 'object0' or geom2_name == 'object0':

                        print('contact', i)
                        print('dist', contact.dist)
                        print('geom1', contact.geom1, sim.model.geom_id2name(contact.geom1))
                        print('geom2', contact.geom2, sim.model.geom_id2name(contact.geom2))
                        # There's more stuff in the data structure
                        # See the mujoco documentation for more info!
                        geom2_body = sim.model.geom_bodyid[sim.data.contact[i].geom2]
                        print(' Contact force on geom2 body', sim.data.cfrc_ext[geom2_body])
                        print('norm', np.sqrt(np.sum(np.square(sim.data.cfrc_ext[geom2_body]))))
                        # Use internal functions to read out mj_contactForce
                        c_array = np.zeros(6, dtype=np.float64)
                        print('c_array', c_array)
                        functions.mj_contactForce(sim.model, sim.data, i, c_array)
                        print('c_array', c_array)

                print('done')'''


            if t % 1 == 0 or t == self.test_rollouts -1:
                steps = np.arange(len(tr_env_images))
                #latent_ind_x = 1
                #latent_ind_y = 0
                latent_ind_x = 0
                latent_ind_y = 1

                plt.plot(steps, np.array(trajectory_goals)[:, 0], label='real')
                #plt.plot(steps, map_x_table(np.array(trajectory_goals_latents)[:, latent_ind_x]), label='latent')
                plt.title('positions_x_goals')
                plt.legend(loc=4)
                plt.savefig("{}it_{}_positions_x_goals.png".format(args.logger.my_log_dir, t))
                plt.close()

                plt.plot(steps, np.array(trajectory_goals)[:, 1], label='real')
                #plt.plot(steps, map_y_table(np.array(trajectory_goals_latents)[:, latent_ind_y]), label='latent')
                plt.title('positions_y_goals')
                plt.legend(loc=4)
                plt.savefig("{}it_{}_positions_y_goals.png".format(args.logger.my_log_dir, t))
                plt.close()

                '''plt.plot(steps, np.array(trajectory_obstacles)[:, 0], label='real')
                plt.plot(steps, map_x_table(np.array(trajectory_obstacles_latents)[:, 0, latent_ind_x]), label='latent')
                plt.title('positions_x_obstacles')
                plt.legend(loc=4)
                plt.savefig("{}it_{}_positions_x_obstacles.png".format(args.logger.my_log_dir, i))
                plt.close()



                plt.plot(steps, np.array(trajectory_obstacles)[:, 1], label='real')
                plt.plot(steps, map_y_table(np.array(trajectory_obstacles_latents)[:, 0, latent_ind_y]), label='latent')
                plt.title('positions_y_obstacles')
                plt.legend(loc=4)
                plt.savefig("{}it_{}_positions_y_obstacles.png".format(args.logger.my_log_dir, i))
                plt.close()

                plt.plot(steps, np.array(trajectory_obstacles)[:, 3], label='real')
                plt.plot(steps, map_y_table(np.array(trajectory_obstacles_latents_sizes)[:, 0, latent_ind_x]), label='latent')
                plt.title('sizes_x_obstacles')
                plt.legend(loc=4)
                plt.savefig("{}it_{}_sizes_x_obstacles.png".format(args.logger.my_log_dir, i))
                plt.close()

                plt.plot(steps, np.array(trajectory_obstacles)[:, 4], label='real')
                plt.plot(steps, map_y_table(np.array(trajectory_obstacles_latents_sizes)[:, 0, latent_ind_y]), label='latent')
                plt.title('sizes_y_obstacles')
                plt.legend(loc=4)
                plt.savefig("{}it_{}_sizes_y_obstacles.png".format(args.logger.my_log_dir, i))
                plt.close()'''

                create_rollout_video(tr_env_images, args=self.args, filename='play_it_{}'.format(t))
        if hasattr(env.env.env, 'reset_sim_counter'):
            print(env.env.env.reset_sim_counter)