def _init_env(goal_pose_json, difficulty): eval_config = { 'action_space': difficulty2config[difficulty]['action_space'], 'frameskip': 3, 'reward_fn': 'competition_reward', 'termination_fn': 'no_termination', 'initializer': 'random_init', 'monitor': False, 'visualization': True, 'sim': True, 'rank': 0 } from code.utils import set_seed set_seed(0) goal_pose_dict = json.loads(goal_pose_json) env = make_training_env(goal_pose_dict, difficulty, **eval_config) return env
goal_rot = Rotation.from_quat(self.obs['goal_object_orientation']) y_axis = [0, 1, 0] goal_direction_vector = goal_rot.apply(y_axis) actual_direction_vector = actual_rot.apply(y_axis) quat = get_rotation_between_vecs(actual_direction_vector, goal_direction_vector) return (Rotation.from_quat(quat) * actual_rot).as_quat() if __name__ == '__main__': from code.make_env import make_training_env from trifinger_simulation.tasks import move_cube import dl seed = 0 dl.rng.seed(seed) env = make_training_env(move_cube.sample_goal(-1).to_dict(), 4, reward_fn='competition_reward', termination_fn='position_close_to_goal', initializer='centered_init', action_space='torque_and_position', sim=True, visualization=True, episode_length=10000, rank=seed) state_machine = TriFingerStateMachine(env) state_machine.run()
path = Path(cube_path, joint_conf_path, tip_path, grasp) path.set_min_height(self.env, path_min_height) return path if __name__ == '__main__': from trifinger_simulation.tasks import move_cube from code.make_env import make_training_env reward_fn = 'competition_reward' termination_fn = 'position_close_to_goal' initializer = 'small_rot_init' env = make_training_env(move_cube.sample_goal(-1).to_dict(), 4, reward_fn=reward_fn, termination_fn=termination_fn, initializer=initializer, action_space='position', sim=True, visualization=True) for i in range(1): obs = env.reset() pos = obs["object_position"] quat = obs["object_orientation"] goal_pos = obs["goal_object_position"] goal_quat = obs["goal_object_orientation"] planner = WholeBodyPlanner(env) path = planner.plan(pos, quat, goal_pos,
pregrasp_tip_pos.append(tip_pos) pregrasp_jconfs.append(qs[0]) return pregrasp_jconfs[-1], pregrasp_tip_pos[-1] if __name__ == '__main__': import pybullet as p from code.make_env import make_training_env from trifinger_simulation.tasks import move_cube from code.grasping.grasp_functions import get_heuristic_grasp env = make_training_env(move_cube.sample_goal(-1).to_dict(), 3, reward_fn='competition_reward', termination_fn='position_close_to_goal', initializer='training_init', action_space='torque', sim=True, visualization=True, rank=1) obs = env.reset() p.resetDebugVisualizerCamera(cameraDistance=0.6, cameraYaw=0, cameraPitch=-40, cameraTargetPosition=[0, 0, 0]) grasp = get_heuristic_grasp(env, obs['object_position'], obs['object_orientation']) obs, done = execute_grasp_approach(env, obs, grasp)
if __name__ == '__main__': from code.make_env import make_training_env from trifinger_simulation.tasks import move_cube from code.grasping import get_planned_grasp, execute_grasp_approach from code.utils import set_seed from .fc import ForceControlPolicy set_seed(0) viz = True difficulty = 4 is_level_4 = difficulty == 4 env = make_training_env(move_cube.sample_goal(-1).to_dict(), difficulty, reward_fn='competition_reward', termination_fn='no_termination', initializer='training_init', action_space='torque_and_position', sim=True, visualization=viz, rank=0) env.reset() for _ in range(5): obs = env.reset() p.configureDebugVisualizer(p.COV_ENABLE_GUI, 0) p.resetDebugVisualizerCamera(cameraDistance=0.6, cameraYaw=0, cameraPitch=-40, cameraTargetPosition=[0, 0, 0]) grasp, path = get_planned_grasp(env,
def main(args): goal = move_cube.sample_goal(args.difficulty) goal_dict = {'position': goal.position, 'orientation': goal.orientation} eval_config = { 'cube_goal_pose': goal_dict, 'goal_difficulty': args.difficulty, 'action_space': 'torque' if args.policy == 'fc' else 'torque_and_position', 'frameskip': 3, 'reward_fn': 'competition_reward', 'termination_fn': 'no_termination', 'initializer': 'training_init', 'sim': True, 'monitor': False, 'rank': args.seed, 'training': True } env = make_training_env(visualization=True, **eval_config) acc_rewards = [] wallclock_times = [] aligning_steps = [] env_steps = [] avg_rewards = [] for i in range(args.num_episodes): start = time.time() is_done = False observation = env.reset() accumulated_reward = 0 aligning_steps.append(env.unwrapped.step_count) #clear some windows in GUI p.configureDebugVisualizer(p.COV_ENABLE_GUI, 0) #change camera parameters # You can also rotate the camera by CTRL + drag p.resetDebugVisualizerCamera(cameraDistance=0.6, cameraYaw=0, cameraPitch=-40, cameraTargetPosition=[0, 0, 0]) step = 0 while not is_done and step < 2000: step += 1 action = env.action_space.sample() if isinstance(action, dict): action['torque'] *= 0 action['position'] *= 0 else: action *= 0 observation, reward, is_done, info = env.step(action) accumulated_reward += reward acc_rewards.append(accumulated_reward) env_steps.append(env.unwrapped.step_count - aligning_steps[-1]) avg_rewards.append(accumulated_reward / env_steps[-1]) print("Episode {}\tAccumulated reward: {}".format( i, accumulated_reward)) print("Episode {}\tAlinging steps: {}".format(i, aligning_steps[-1])) print("Episode {}\tEnv steps: {}".format(i, env_steps[-1])) print("Episode {}\tAvg reward: {}".format(i, avg_rewards[-1])) end = time.time() print('Elapsed:', end - start) wallclock_times.append(end - start) env.close() def _print_stats(name, data): print('======================================') print(f'Mean {name}\t{np.mean(data):.2f}') print(f'Max {name}\t{max(data):.2f}') print(f'Min {name}\t{min(data):.2f}') print(f'Median {name}\t{statistics.median(data):2f}') print('Total elapsed time\t{:.2f}'.format(sum(wallclock_times))) print('Mean elapsed time\t{:.2f}'.format( sum(wallclock_times) / len(wallclock_times))) _print_stats('acc reward', acc_rewards) _print_stats('aligning steps', aligning_steps) _print_stats('step reward', avg_rewards)