{ 'cameraEyePosition': [-1.0, -0.25, 0.6], 'cameraTargetPosition': [-0.6, -0.05, 0.2], 'cameraUpVector': [0, 0, 1], 'render_width': 224, 'render_height': 224 }, ] env = pmg.make_env( task='pick_and_place', gripper='parallel_jaw', render=True, binary_reward=True, max_episode_steps=5, image_observation=True, depth_image=False, goal_image=True, visualize_target=True, camera_setup=camera_setup, observation_cam_id=[0], goal_cam_id=1, ) obs = env.reset() t = 0 while True: t += 1 action = env.action_space.sample() obs, reward, done, info = env.step(action) axarr[0].imshow(obs['desired_goal_img']) axarr[1].imshow(obs['achieved_goal_img']) plt.pause(0.00001)
'cameraEyePosition': [-0.9, -0.0, 0.4], 'cameraTargetPosition': [-0.45, -0.0, 0.0], 'cameraUpVector': [0, 0, 1], 'render_width': 224, 'render_height': 224 } ] env = pmg.make_env(task='insertion', render=True, binary_reward=True, distance_threshold=0.05, image_observation=True, depth_image=True, goal_image=True, point_cloud=False, state_noise=True, visualize_target=False, camera_setup=camera_setup, observation_cam_id=[1], goal_cam_id=-1, gripper='parallel_jaw', max_episode_steps=50) obs = env.reset() time_done = False f, axarr = plt.subplots(2, 2) while True: action = env.action_space.sample() obs, reward, time_done, info = env.step(action) axarr[0][0].imshow(obs['desired_goal_img'][:, :, :3])
'cameraUpVector': [0, 0, 1], 'render_width': 224, 'render_height': 224 }, ] env = pmg.make_env( # task args task='block_stack', gripper='parallel_jaw', grip_informed_goal=False, num_block=5, # only meaningful for multi-block tasks render=True, binary_reward=True, max_episode_steps=25, # image observation args image_observation=True, depth_image=False, goal_image=True, visualize_target=True, camera_setup=camera_setup, observation_cam_id=[0], goal_cam_id=1, # task decomposition task_decomposition=True) """The desired goal changes as the subgoal is being set""" obs = env.reset() time_done = False env.set_sub_goal(0) t = 0
path = os.path.dirname(os.path.realpath(__file__)) directory_name = args['task'] + '_' + str(args['num_blocks']) params['curriculum'] = args['crcl'] if args['crcl']: directory_name += '_crcl' path = os.path.join(path, directory_name) seed_returns = [] seed_success_rates = [] for seed in seeds: # make env instance env = pmg.make_env(task=args['task'], gripper='parallel_jaw', num_block=args['num_blocks'], render=args['render'], binary_reward=True, image_observation=False, use_curriculum=args['crcl'], num_goals_to_generate=num_total_episodes, max_episode_steps=max_episode_steps) seed_path = path + '/seed' + str(seed) agent = GoalConditionedDDPG(algo_params=params, env=env, path=seed_path, seed=seed) agent.run(test=False) seed_returns.append(agent.statistic_dict['epoch_test_return']) seed_success_rates.append( agent.statistic_dict['epoch_test_success_rate']) del env, agent
env_mujoco.reset() done_mujoco = False while not done_mujoco: action = env_mujoco.action_space.sample() _, _, done_mujoco, _ = env_mujoco.step(action) cost = timeit.default_timer() - start_mujoco costs_mujoco.append(cost / num_episodes) print("Seed {}, mujoco average runtime over 100 episodes: {}".format( seed, costs_mujoco[-1])) # pmg loop env_pmg = pmg.make_env(task='reach', gripper='parallel_jaw', render=False, binary_reward=True, max_episode_steps=50, image_observation=False, depth_image=False, goal_image=False) env_pmg.seed(seed) start_pmg = timeit.default_timer() for i in range(num_episodes): env_pmg.reset() done_pmg = False while not done_pmg: action = env_pmg.action_space.sample() _, _, done_pmg, _ = env_pmg.step(action) cost = timeit.default_timer() - start_pmg costs_pmg.append(cost / num_episodes)
'cameraUpVector': [0, 0, 1], 'render_width': 224, 'render_height': 224 }, ] env = pmg.make_env( # task args task='block_rearrange', gripper='parallel_jaw', grip_informed_goal=False, num_block=4, # only meaningful for multi-block tasks render=True, binary_reward=True, max_episode_steps=5, # image observation args image_observation=True, depth_image=False, goal_image=True, visualize_target=True, camera_setup=camera_setup, observation_cam_id=[0], goal_cam_id=1, # curriculum args use_curriculum=True, num_goals_to_generate=20) """You can expect the desired goal to change every once a while based on the current curriculum level, and settle down at the hardest one finally.""" obs = env.reset() env.activate_curriculum_update() time_done = False
# directory for storing data path = os.path.dirname(os.path.realpath(__file__)) directory_name = args['task'] if args['joint_ctrl']: directory_name += '_joint_ctrl' if args['hindsight']: directory_name += '_her' path = os.path.join(path, directory_name) seed_returns = [] seed_success_rates = [] for seed in seeds: # make env instance env = pmg.make_env(task=args['task'], gripper='parallel_jaw', joint_control=args['joint_ctrl'], render=args['render'], binary_reward=True, max_episode_steps=50) seed_path = path + '/seed' + str(seed) agent = GoalConditionedDDPG(algo_params=params, env=env, path=seed_path, seed=seed) agent.run(test=False) seed_returns.append(agent.statistic_dict['epoch_test_return']) seed_success_rates.append( agent.statistic_dict['epoch_test_success_rate']) del env, agent
import os import numpy as np import time import pybullet_multigoal_gym as pmg num_episodes = 32 env = pmg.make_env(task='block_rearrange', gripper='parallel_jaw', grip_informed_goal=False, num_block=4, render=True, visualize_target=True, binary_reward=True, joint_control=False, max_episode_steps=10000, image_observation=False, use_curriculum=True, task_decomposition=False, num_goals_to_generate=num_episodes) env.activate_curriculum_update() obs = env.reset(test=False) time_done = False while not time_done: # time.sleep(0.1) # action = env.action_space.sample() * 0 # action[-1] = -1 # obs, reward, time_done, info = env.step(action) # if time_done: # env.reset(test=False) # time_done = False