示例#1
0
    {
        'cameraEyePosition': [-1.0, -0.25, 0.6],
        'cameraTargetPosition': [-0.6, -0.05, 0.2],
        'cameraUpVector': [0, 0, 1],
        'render_width': 224,
        'render_height': 224
    },
]

env = pmg.make_env(
    task='pick_and_place',
    gripper='parallel_jaw',
    render=True,
    binary_reward=True,
    max_episode_steps=5,
    image_observation=True,
    depth_image=False,
    goal_image=True,
    visualize_target=True,
    camera_setup=camera_setup,
    observation_cam_id=[0],
    goal_cam_id=1,
)
obs = env.reset()
t = 0
while True:
    t += 1
    action = env.action_space.sample()
    obs, reward, done, info = env.step(action)
    axarr[0].imshow(obs['desired_goal_img'])
    axarr[1].imshow(obs['achieved_goal_img'])
    plt.pause(0.00001)
示例#2
0
        'cameraEyePosition': [-0.9, -0.0, 0.4],
        'cameraTargetPosition': [-0.45, -0.0, 0.0],
        'cameraUpVector': [0, 0, 1],
        'render_width': 224,
        'render_height': 224
    }
]

env = pmg.make_env(task='insertion',
                   render=True,
                   binary_reward=True,
                   distance_threshold=0.05,
                   image_observation=True,
                   depth_image=True,
                   goal_image=True,
                   point_cloud=False,
                   state_noise=True,
                   visualize_target=False,
                   camera_setup=camera_setup,
                   observation_cam_id=[1],
                   goal_cam_id=-1,
                   gripper='parallel_jaw',
                   max_episode_steps=50)

obs = env.reset()
time_done = False
f, axarr = plt.subplots(2, 2)
while True:
    action = env.action_space.sample()
    obs, reward, time_done, info = env.step(action)
    axarr[0][0].imshow(obs['desired_goal_img'][:, :, :3])
        'cameraUpVector': [0, 0, 1],
        'render_width': 224,
        'render_height': 224
    },
]

env = pmg.make_env(
    # task args
    task='block_stack',
    gripper='parallel_jaw',
    grip_informed_goal=False,
    num_block=5,  # only meaningful for multi-block tasks
    render=True,
    binary_reward=True,
    max_episode_steps=25,
    # image observation args
    image_observation=True,
    depth_image=False,
    goal_image=True,
    visualize_target=True,
    camera_setup=camera_setup,
    observation_cam_id=[0],
    goal_cam_id=1,
    # task decomposition
    task_decomposition=True)
"""The desired goal changes as the subgoal is being set"""

obs = env.reset()
time_done = False
env.set_sub_goal(0)
t = 0
    path = os.path.dirname(os.path.realpath(__file__))
    directory_name = args['task'] + '_' + str(args['num_blocks'])
    params['curriculum'] = args['crcl']
    if args['crcl']:
        directory_name += '_crcl'
    path = os.path.join(path, directory_name)

    seed_returns = []
    seed_success_rates = []
    for seed in seeds:
        # make env instance
        env = pmg.make_env(task=args['task'],
                           gripper='parallel_jaw',
                           num_block=args['num_blocks'],
                           render=args['render'],
                           binary_reward=True,
                           image_observation=False,
                           use_curriculum=args['crcl'],
                           num_goals_to_generate=num_total_episodes,
                           max_episode_steps=max_episode_steps)

        seed_path = path + '/seed' + str(seed)
        agent = GoalConditionedDDPG(algo_params=params,
                                    env=env,
                                    path=seed_path,
                                    seed=seed)
        agent.run(test=False)
        seed_returns.append(agent.statistic_dict['epoch_test_return'])
        seed_success_rates.append(
            agent.statistic_dict['epoch_test_success_rate'])
        del env, agent
        env_mujoco.reset()
        done_mujoco = False
        while not done_mujoco:
            action = env_mujoco.action_space.sample()
            _, _, done_mujoco, _ = env_mujoco.step(action)

    cost = timeit.default_timer() - start_mujoco
    costs_mujoco.append(cost / num_episodes)
    print("Seed {}, mujoco average runtime over 100 episodes: {}".format(
        seed, costs_mujoco[-1]))

    # pmg loop
    env_pmg = pmg.make_env(task='reach',
                           gripper='parallel_jaw',
                           render=False,
                           binary_reward=True,
                           max_episode_steps=50,
                           image_observation=False,
                           depth_image=False,
                           goal_image=False)
    env_pmg.seed(seed)

    start_pmg = timeit.default_timer()
    for i in range(num_episodes):
        env_pmg.reset()
        done_pmg = False
        while not done_pmg:
            action = env_pmg.action_space.sample()
            _, _, done_pmg, _ = env_pmg.step(action)

    cost = timeit.default_timer() - start_pmg
    costs_pmg.append(cost / num_episodes)
        'cameraUpVector': [0, 0, 1],
        'render_width': 224,
        'render_height': 224
    },
]

env = pmg.make_env(
    # task args
    task='block_rearrange',
    gripper='parallel_jaw',
    grip_informed_goal=False,
    num_block=4,  # only meaningful for multi-block tasks
    render=True,
    binary_reward=True,
    max_episode_steps=5,
    # image observation args
    image_observation=True,
    depth_image=False,
    goal_image=True,
    visualize_target=True,
    camera_setup=camera_setup,
    observation_cam_id=[0],
    goal_cam_id=1,
    # curriculum args
    use_curriculum=True,
    num_goals_to_generate=20)
"""You can expect the desired goal to change every once a while based on the current curriculum level,
and settle down at the hardest one finally."""

obs = env.reset()
env.activate_curriculum_update()
time_done = False
    # directory for storing data
    path = os.path.dirname(os.path.realpath(__file__))
    directory_name = args['task']
    if args['joint_ctrl']:
        directory_name += '_joint_ctrl'
    if args['hindsight']:
        directory_name += '_her'
    path = os.path.join(path, directory_name)

    seed_returns = []
    seed_success_rates = []
    for seed in seeds:
        # make env instance
        env = pmg.make_env(task=args['task'],
                           gripper='parallel_jaw',
                           joint_control=args['joint_ctrl'],
                           render=args['render'],
                           binary_reward=True,
                           max_episode_steps=50)

        seed_path = path + '/seed' + str(seed)

        agent = GoalConditionedDDPG(algo_params=params,
                                    env=env,
                                    path=seed_path,
                                    seed=seed)
        agent.run(test=False)
        seed_returns.append(agent.statistic_dict['epoch_test_return'])
        seed_success_rates.append(
            agent.statistic_dict['epoch_test_success_rate'])
        del env, agent
import os
import numpy as np
import time
import pybullet_multigoal_gym as pmg

num_episodes = 32
env = pmg.make_env(task='block_rearrange',
                   gripper='parallel_jaw',
                   grip_informed_goal=False,
                   num_block=4,
                   render=True,
                   visualize_target=True,
                   binary_reward=True,
                   joint_control=False,
                   max_episode_steps=10000,
                   image_observation=False,
                   use_curriculum=True,
                   task_decomposition=False,
                   num_goals_to_generate=num_episodes)

env.activate_curriculum_update()
obs = env.reset(test=False)
time_done = False
while not time_done:
    # time.sleep(0.1)
    # action = env.action_space.sample() * 0
    # action[-1] = -1
    # obs, reward, time_done, info = env.step(action)
    # if time_done:
    #     env.reset(test=False)
    #     time_done = False