def test_model(sampler, args, gripper=None): """ Maximize the GP mean function to get the best policy. :param sampler: A GP fit to the current BusyBox. :return: Regret. """ # Optimize the GP to get the best policy. ucb = False stop_x, stop_policy, start_x = sampler.optim.optimize_gp(ucb) # Execute the policy and observe the true motion. debug = False viz = False if gripper is None: _, gripper = setup_env(sampler.bb, viz, debug) else: gripper.reset(sampler.mech) pose_handle_base_world = sampler.mech.get_pose_handle_base_world() traj = stop_policy.generate_trajectory(pose_handle_base_world, debug=debug) cmotion, motion, _ = gripper.execute_trajectory(traj, sampler.mech, stop_policy.type, debug=debug) # Calculate the regret. max_d = sampler.mech.get_max_net_motion() regret = (max_d - motion)/max_d return regret, start_x, stop_x, stop_policy.type
def main(args): busybox_data = get_bb_dataset(args.bb_fname, args.N, args.mech_types, 1, args.urdf_num) all_steps = [] for ix, bb_results in enumerate(busybox_data): if args.type == 'gpucb': single_dataset, _, steps = create_single_bb_gpucb_dataset( bb_results[0], '', args.plot, args, ix, success_regret=SUCCESS_REGRET) all_steps.append(steps) print('steps', steps) elif args.type == 'random': bb = BusyBox.bb_from_result(bb_results[0]) image_data, gripper = setup_env(bb, args.viz, args.debug) regret = float("inf") steps = 0 while regret > SUCCESS_REGRET: mech = bb._mechanisms[0] # generate either a random or model-based policy and goal configuration policy = policies.generate_policy(mech) pose_handle_world_init = mech.get_handle_pose() # calculate trajectory pose_handle_base_world = mech.get_pose_handle_base_world() traj = policy.generate_trajectory(pose_handle_base_world, args.debug, color=[0, 0, 1]) # execute trajectory cumu_motion, net_motion, pose_handle_world_final = \ gripper.execute_trajectory(traj, mech, policy.type, args.debug) # calc regret max_dist = mech.get_max_net_motion() regret = (max_dist - net_motion) / max_dist steps += 1 # reset gripper.reset(mech) p.disconnect() all_steps.append(steps) print('steps', steps) # Save the dataset. util.write_to_file(args.fname, all_steps)
def replay_result(result): from gen.generator_busybox import BusyBox from utils.setup_pybullet import setup_env from actions.policies import get_policy_from_tuple bb = BusyBox.bb_from_result(result) image_data, gripper = setup_env(bb, True, True) mech = bb._mechanisms[0] policy = get_policy_from_tuple(result.policy_params) pose_handle_base_world = mech.get_pose_handle_base_world() traj = policy.generate_trajectory(pose_handle_base_world, True) cumu_motion, net_motion, pose_handle_world_final = gripper.execute_trajectory( traj, mech, policy.type, True) import pdb pdb.set_trace() p.disconnect()
def get_true_ys(X_pred, mech, policy_params): Y_pred = np.zeros((X_pred.shape[0])) viz = False debug = False for i, x in enumerate(X_pred): policy = policies.get_policy_from_x(mech, x, policy_params) width, height = 0.6, 0.6 bb = BusyBox.get_busybox(width, height, [mech]) _, gripper = setup_env(bb, viz, debug) # calculate trajectory pose_handle_base_world = mech.get_pose_handle_base_world() traj = policy.generate_trajectory(pose_handle_base_world, debug) # execute trajectory _, net_motion, _ = gripper.execute_trajectory(traj, mech, policy.type, False) Y_pred[i] = net_motion gripper.reset(mech) return Y_pred
def generate_dataset(args, git_hash): bb_dataset = get_bb_dataset(args.bb_fname, args.n_bbs, args.mech_types, args.max_mech, args.urdf_num) if args.n_samples == 0: return bb_dataset results = [] for (i, bb_results) in enumerate(bb_dataset): bb = BusyBox.bb_from_result(bb_results[0]) image_data, gripper = setup_env(bb, args.viz, args.debug) bb_results = [] for j in range(args.n_samples): sys.stdout.write("\rProcessing sample %i/%i for busybox %i/%i" % (j+1, args.n_samples, i+1, args.n_bbs)) for mech in bb._mechanisms: # generate either a random or model-based policy and goal configuration policy = policies.generate_policy(mech) pose_handle_world_init = mech.get_handle_pose() # calculate trajectory pose_handle_base_world = mech.get_pose_handle_base_world() traj = policy.generate_trajectory(pose_handle_base_world, args.debug, color=[0, 0, 1]) # execute trajectory cumu_motion, net_motion, pose_handle_world_final = \ gripper.execute_trajectory(traj, mech, policy.type, args.debug) # save result data policy_params = policy.get_policy_tuple() mechanism_params = mech.get_mechanism_tuple() result = util.Result(policy_params, mechanism_params, net_motion, \ cumu_motion, pose_handle_world_init, pose_handle_world_final, \ image_data, git_hash) bb_results.append(result) gripper.reset(mech) results.append(bb_results) p.disconnect() print() return results
def get_bb_dataset(bb_fname, n_bbs, mech_types, max_mech, urdf_num): # Create a dataset of busyboxes. if bb_fname == '' or bb_fname is None: print('Creating Busyboxes.') mech_classes = [] for mech_type in mech_types: if mech_type == 'slider': mech_classes.append(Slider) if mech_type == 'door': mech_classes.append(Door) bb_dataset = [] for _ in range(n_bbs): # TODO: i think there is a bug here... bb = BusyBox.generate_random_busybox(max_mech=max_mech, mech_types=mech_classes, urdf_tag=urdf_num) mechanism_params = bb._mechanisms[0].get_mechanism_tuple() image_data, gripper = setup_env(bb, False, False) bb_dataset.append([util.Result(None, mechanism_params, None, None, None, None, image_data, None)]) print('BusyBoxes created.') else: # Load in a file with predetermined BusyBoxes. bb_dataset = read_from_file(bb_fname)[:n_bbs] return bb_dataset
def create_single_bb_gpucb_dataset(bb_result, nn_fname, plot, args, bb_i, n_interactions=None, plot_dir_prefix='', ret_regret=False, success_regret=None): use_cuda = False dataset = [] viz = False debug = False # interact with BB bb = BusyBox.bb_from_result(bb_result, urdf_num=args.urdf_num) mech = bb._mechanisms[0] image_data, gripper = setup_env(bb, viz, debug) pose_handle_base_world = mech.get_pose_handle_base_world() sampler = UCB_Interaction(bb, image_data, plot, args, nn_fname=nn_fname) for ix in itertools.count(): gripper.reset(mech) if args.debug: sys.stdout.write('\rProcessing sample %i' % ix) # if we are done sampling n_interactions OR we need to get regret after # each interaction if ((not n_interactions is None) and ix==n_interactions) or \ (not success_regret is None): regret, start_x, stop_x, policy_type = test_model(sampler, args, gripper=gripper) gripper.reset(mech) #print('Current regret', regret) opt_points = (policy_type, [(start_x, 'g'), (stop_x, 'r')]) sample_points = {'Prismatic': [(sample, 'k') for sample in sampler.xs['Prismatic']], 'Revolute': [(sample, 'k') for sample in sampler.xs['Revolute']]} # if done sampling n_interactions if (not n_interactions is None) and ix==n_interactions: if plot: viz_circles(util.GP_PLOT, image_data, mech, BETA, sample_points=sample_points, opt_points=opt_points, gps=sampler.gps, nn=sampler.nn, bb_i=bb_i, plot_dir_prefix=plot_dir_prefix) plt.show() input('Enter to close') plt.close() if ret_regret: return dataset, sampler.gps, regret else: return dataset, sampler.gps # if got successful interaction or timeout elif (not success_regret is None) and \ ((regret < success_regret) or (ix >= 100)): if ix >= 100: print('timeout interactions') elif regret < success_regret: print('succcessful interaction!') if plot: viz_circles(util.GP_PLOT, image_data, mech, BETA, sample_points=sample_points, opt_points=opt_points, gps=sampler.gps, nn=sampler.nn, bb_i=bb_i, plot_dir_prefix=plot_dir_prefix) return dataset, sampler.gps, ix # sample a policy # image_data, gripper = setup_env(bb, False, debug) gripper.reset(mech) x, policy = sampler.sample() # execute traj = policy.generate_trajectory(pose_handle_base_world, debug=debug) c_motion, motion, handle_pose_final = gripper.execute_trajectory(traj, mech, policy.type, False) result = util.Result(policy.get_policy_tuple(), mech.get_mechanism_tuple(), motion, c_motion, handle_pose_final, handle_pose_final, image_data, None) dataset.append(result) # update GP sampler.update(result, x)