def main(): env = gym.make(ENV) raw_env = env.unwrapped sim = raw_env.sim env.render() add_selection_logger(raw_env.viewer, sim) # env.reset() # sim.data.qpos[:] = 0.0 # sim.data.qvel[:] = 0.0 # sim.step() agent = YumiReachAgent(env) done = True obs = None for _ in it.count(): if done: obs = env.reset() u = agent.predict(obs) # print(u) obs, rew, done, _ = env.step(u) env.render() print(rew)
def main(): # env = gym.make('HandPickAndPlaceDense-v0') env = gym.make( 'HandPickAndPlace-v0', ignore_rotation_ctrl=True, ignore_target_rotation=True, success_on_grasp_only=True, randomize_initial_arm_pos=True, randomize_initial_object_pos=True, # grasp_state=True, # grasp_state_reset_p=0.5, object_cage=True, object_id='teapot' ) obs = env.reset() env.render() sim = env.unwrapped.sim add_selection_logger(env.unwrapped.viewer, sim) print('nconmax:', sim.model.nconmax) print('obs.shape:', obs['observation'].shape) global selected_action p = Thread(target=action_thread) # p.start() selected_action = np.zeros(27) for i in it.count(): for j in range(6): env.reset() # for val in np.linspace(-1, 1, 60): while True: env.render() action = selected_action.copy() action[-7:] *= 0.2 selected_action[-7:] *= 0.0 render_pose(env.unwrapped.get_table_surface_pose(), env.unwrapped.viewer, unique_id=535) rew, done = env.step(action)[1:3]
def main(): env = gym.make('YumiBar-v1', randomize_initial_object_pos=True) raw_env = env.unwrapped sim = raw_env.sim env.render() add_selection_logger(raw_env.viewer, sim) agent = YumiBarAgent(env, check_joint_limits=False, use_qp_solver=False) done = True obs = None for _ in it.count(): if done: obs = env.reset() u = agent.predict(obs) obs, rew, done, _ = env.step(u) env.render() print(rew)
def main(): env = gym.make('YumiLift-v1', randomize_initial_object_pos=True) raw_env = env.unwrapped sim = raw_env.sim env.render() add_selection_logger(raw_env.viewer, sim) agent = YumiLiftAgent(env) done = True obs = None for _ in it.count(): if done: agent.reset() obs = env.reset() u = agent.predict(obs) obs, rew, done, _ = env.step(u) env.render() print(rew)
def main(): env = gym.make( 'FetchPickAndPlaceDense-v1', reward_params=dict(stepped=True), # explicit_goal_distance=True, has_rotating_platform=False, has_button=False, object_id='sphere', has_object_box=False, ) raw_env = env.unwrapped # type: FetchEnv sim = raw_env.sim env.render() add_selection_logger(raw_env.viewer, sim) global selected_action # p = Thread(target=action_thread) # p.start() selected_action = np.zeros(4) # agent = FetchPickAndPlaceAgent(env) agent = FetchPushAgent(env) while True: obs = env.reset() for _ in range(100): env.render() action = selected_action.copy() action = env.action_space.sample() action = agent.predict(obs) obs, rew, done, info = env.step(action) selected_action[:3] *= 0.0 sleep(1 / 60) print('rew', rew)
def main(): env = gym.make(ENV) raw_env = env.unwrapped sim = raw_env.sim env.render() add_selection_logger(raw_env.viewer, sim) env.reset() markers = deque(maxlen=300) for _ in it.count(): obs = env.reset() goal = obs['desired_goal'] if len(markers) < markers.maxlen: if raw_env.has_left_arm: markers.append( dict(pos=goal[:3], label="", type=mj_const.GEOM_SPHERE, size=np.ones(3) * 0.05, rgba=np.r_[0.2, 0., 1., 1], specular=0.)) if raw_env.has_right_arm: markers.append( dict(pos=goal[3:], label="", type=mj_const.GEOM_SPHERE, size=np.ones(3) * 0.05, rgba=np.r_[0.2, .7, 1., 1], specular=0.)) for m in markers: raw_env.viewer.add_marker(**m) env.render()
def main(): env = gym.make('YumiStepped-v1', render_substeps=True) raw_env = env.unwrapped sim = raw_env.sim env.render() add_selection_logger(raw_env.viewer, sim) done = True obs = None for _ in it.count(): if done: obs = env.reset() a = 0.3 u = np.r_[np.sin(a), np.cos(a), 0., 10., np.sin(a + np.pi), np.cos(a + np.pi), 0., -10., ] obs, rew, done, _ = env.step(u) env.render() print(rew)
def main(): env = gym.make( 'HandPickAndPlaceStepped-v0', render_substeps=True, ) obs = env.reset() global selected_action, selected_point p = Thread(target=action_thread) # p.start() selected_action = np.zeros(27) selected_point = np.zeros(3) def handle_click(selected_pt: np.ndarray, x, y): global selected_point selected_point[:2] = (x-.5)*2, (y-.5)*2 print(x, y) def handle_scroll(pos): global selected_point selected_point[2] = pos print(pos) env.render() sim = env.unwrapped.sim_env.sim add_selection_logger(env.unwrapped.sim_env.viewer, sim, callback=handle_click) add_scroll_callback(env.unwrapped.sim_env.viewer, callback=handle_scroll) print('nconmax:', sim.model.nconmax) print('obs.shape:', obs['observation'].shape) for i in it.count(): for j in range(6): obs = env.reset() # for val in np.linspace(-1, 1, 60): while True: env.render() action = np.zeros(env.action_space.shape) if action.size == 18: goal = obs['desired_goal'][:3].copy() arm_bounds = np.array(env.unwrapped.sim_env.forearm_bounds).T goal -= arm_bounds.mean(axis=1) goal /= np.abs(arm_bounds[:, 1] - arm_bounds[:, 0]) / 2.0 action[15:] = goal # lfdistal: 3, rfdistal: 2, mfdistal: 1, ffdistal: 0, thdistal: 4 sel_dir = selected_point / np.linalg.norm(selected_point + 1e-10, ord=2) for idx in range(5): action[idx*3:(idx+1)*3] = sel_dir closed_pos = 0.5 # thdistal action[12] = -closed_pos action[13] = closed_pos # ffdistal action[0] = closed_pos action[1] = closed_pos # mfdistal action[3] = closed_pos action[4] = 0.0 # lfdistal action[9] = -closed_pos action[10] = -closed_pos # rfdistal action[6] = closed_pos action[7] = -closed_pos obs, rew, done = env.step(action)[:3] # action = selected_action.copy() # action[-7:] *= 0.2 # selected_action[-7:] *= 0.0 # env.unwrapped.sim_env.step(action) print(rew) if done: obs = env.reset()
def main(): env = gym.make('YumiConstrained-v2', reward_type='sparse', render_poses=False, has_rotating_platform=False, has_button=False, has_object_box=False, object_id="fetch_sphere") raw_env = env.unwrapped sim = raw_env.sim env.render() add_selection_logger(raw_env.viewer, sim) agent = YumiConstrainedAgent(env) done = True obs = None steps_to_success = [] n_steps = 0 reachability = np.zeros((2, 3)) reachability[0] = np.inf reachability[1] = -np.inf unreachable_eps = 0 tot_eps = 0 for i in it.count(): if done: obs = env.reset() n_steps = 0 tot_eps += 1 # center_pos = obs['observation'][:3] # achieved_goal = obs['achieved_goal'] # err = achieved_goal - center_pos # # u = np.zeros(env.action_space.shape) # # u[1:4] = err * 10.0 u = env.action_space.sample() u[0] = -1 if i // 8 % 2 == 0 else 1 u[1:] = 0. u = agent.predict(obs) render_pose(env.unwrapped.get_table_surface_pose(), env.unwrapped.viewer, unique_id=535) obs, rew, done, _ = env.step(u) # done = False n_steps += 1 env.render() if rew == 0.0: steps_to_success.append(n_steps) arr = np.asarray(steps_to_success) # print('min', arr.min(), 'max', arr.max(), 'avg', arr.mean(), 'std', arr.std()) done = True goal = obs['desired_goal'].copy() reachability[0] = np.minimum(reachability[0], goal) reachability[1] = np.maximum(reachability[1], goal) # print(reachability) # print(unreachable_eps / tot_eps) # print() elif done: unreachable_eps += 1
def test_reachability(render=False): env = gym.make('YumiConstrainedLong-v2', reward_type='sparse', render_poses=False, has_rotating_platform=False, has_button=False, extended_bounds=True) if render: raw_env = env.unwrapped sim = raw_env.sim env.render() add_selection_logger(raw_env.viewer, sim) agent = YumiConstrainedAgent(env) goal_reach = np.zeros((2, 3)) goal_reach[0] = np.inf goal_reach[1] = -np.inf init_reach = goal_reach.copy() done = True obs = init_pos = None n_steps = 0 unreachable_eps = 0 tot_eps = 0 for i in it.count(): if done: obs = env.reset() init_pos = obs['achieved_goal'].copy() n_steps = 0 tot_eps += 1 u = agent.predict(obs) obs, rew, done, _ = env.step(u) n_steps += 1 if render: env.render() if rew == 0.0: done = True if n_steps >= 10: goal = obs['desired_goal'].copy() goal_reach[0] = np.minimum(goal_reach[0], goal) goal_reach[1] = np.maximum(goal_reach[1], goal) init_reach[0] = np.minimum(init_reach[0], init_pos) init_reach[1] = np.maximum(init_reach[1], init_pos) elif done: unreachable_eps += 1 if done: print(goal_reach) print(init_reach) print(unreachable_eps / tot_eps) print()