def main(args): busybox_data = get_bb_dataset(args.bb_fname, args.N, args.mech_types, 1, args.urdf_num) all_steps = [] for ix, bb_results in enumerate(busybox_data): if args.type == 'gpucb': single_dataset, _, steps = create_single_bb_gpucb_dataset( bb_results[0], '', args.plot, args, ix, success_regret=SUCCESS_REGRET) all_steps.append(steps) print('steps', steps) elif args.type == 'random': bb = BusyBox.bb_from_result(bb_results[0]) image_data, gripper = setup_env(bb, args.viz, args.debug) regret = float("inf") steps = 0 while regret > SUCCESS_REGRET: mech = bb._mechanisms[0] # generate either a random or model-based policy and goal configuration policy = policies.generate_policy(mech) pose_handle_world_init = mech.get_handle_pose() # calculate trajectory pose_handle_base_world = mech.get_pose_handle_base_world() traj = policy.generate_trajectory(pose_handle_base_world, args.debug, color=[0, 0, 1]) # execute trajectory cumu_motion, net_motion, pose_handle_world_final = \ gripper.execute_trajectory(traj, mech, policy.type, args.debug) # calc regret max_dist = mech.get_max_net_motion() regret = (max_dist - net_motion) / max_dist steps += 1 # reset gripper.reset(mech) p.disconnect() all_steps.append(steps) print('steps', steps) # Save the dataset. util.write_to_file(args.fname, all_steps)
def replay_result(result): from gen.generator_busybox import BusyBox from utils.setup_pybullet import setup_env from actions.policies import get_policy_from_tuple bb = BusyBox.bb_from_result(result) image_data, gripper = setup_env(bb, True, True) mech = bb._mechanisms[0] policy = get_policy_from_tuple(result.policy_params) pose_handle_base_world = mech.get_pose_handle_base_world() traj = policy.generate_trajectory(pose_handle_base_world, True) cumu_motion, net_motion, pose_handle_world_final = gripper.execute_trajectory( traj, mech, policy.type, True) import pdb pdb.set_trace() p.disconnect()
def plot_test_set(): # plot test data angles and positions for (i, point) in enumerate(test_data[:10]): bb = BusyBox.bb_from_result(point) if p.getConnectionInfo()['isConnected']: p.disconnect() setup_pybullet.setup_env(bb, False, False) mech = bb._mechanisms[0] true_policy = policies.generate_policy(mech, False) pos = (true_policy.rigid_position[0], true_policy.rigid_position[2]) pos_ax.plot(*pos, 'm.') pos_ax.annotate(i, pos) if i == len(test_data): angle_ax.plot(true_policy.pitch, 2.0, 'k.', label='test data') else: angle_ax.plot(true_policy.pitch, 2.0, 'k.') angle_ax.annotate(i, (true_policy.pitch, 2.0))
def generate_dataset(args, git_hash): bb_dataset = get_bb_dataset(args.bb_fname, args.n_bbs, args.mech_types, args.max_mech, args.urdf_num) if args.n_samples == 0: return bb_dataset results = [] for (i, bb_results) in enumerate(bb_dataset): bb = BusyBox.bb_from_result(bb_results[0]) image_data, gripper = setup_env(bb, args.viz, args.debug) bb_results = [] for j in range(args.n_samples): sys.stdout.write("\rProcessing sample %i/%i for busybox %i/%i" % (j+1, args.n_samples, i+1, args.n_bbs)) for mech in bb._mechanisms: # generate either a random or model-based policy and goal configuration policy = policies.generate_policy(mech) pose_handle_world_init = mech.get_handle_pose() # calculate trajectory pose_handle_base_world = mech.get_pose_handle_base_world() traj = policy.generate_trajectory(pose_handle_base_world, args.debug, color=[0, 0, 1]) # execute trajectory cumu_motion, net_motion, pose_handle_world_final = \ gripper.execute_trajectory(traj, mech, policy.type, args.debug) # save result data policy_params = policy.get_policy_tuple() mechanism_params = mech.get_mechanism_tuple() result = util.Result(policy_params, mechanism_params, net_motion, \ cumu_motion, pose_handle_world_init, pose_handle_world_final, \ image_data, git_hash) bb_results.append(result) gripper.reset(mech) results.append(bb_results) p.disconnect() print() return results
def create_single_bb_gpucb_dataset(bb_result, nn_fname, plot, args, bb_i, n_interactions=None, plot_dir_prefix='', ret_regret=False, success_regret=None): use_cuda = False dataset = [] viz = False debug = False # interact with BB bb = BusyBox.bb_from_result(bb_result, urdf_num=args.urdf_num) mech = bb._mechanisms[0] image_data, gripper = setup_env(bb, viz, debug) pose_handle_base_world = mech.get_pose_handle_base_world() sampler = UCB_Interaction(bb, image_data, plot, args, nn_fname=nn_fname) for ix in itertools.count(): gripper.reset(mech) if args.debug: sys.stdout.write('\rProcessing sample %i' % ix) # if we are done sampling n_interactions OR we need to get regret after # each interaction if ((not n_interactions is None) and ix==n_interactions) or \ (not success_regret is None): regret, start_x, stop_x, policy_type = test_model(sampler, args, gripper=gripper) gripper.reset(mech) #print('Current regret', regret) opt_points = (policy_type, [(start_x, 'g'), (stop_x, 'r')]) sample_points = {'Prismatic': [(sample, 'k') for sample in sampler.xs['Prismatic']], 'Revolute': [(sample, 'k') for sample in sampler.xs['Revolute']]} # if done sampling n_interactions if (not n_interactions is None) and ix==n_interactions: if plot: viz_circles(util.GP_PLOT, image_data, mech, BETA, sample_points=sample_points, opt_points=opt_points, gps=sampler.gps, nn=sampler.nn, bb_i=bb_i, plot_dir_prefix=plot_dir_prefix) plt.show() input('Enter to close') plt.close() if ret_regret: return dataset, sampler.gps, regret else: return dataset, sampler.gps # if got successful interaction or timeout elif (not success_regret is None) and \ ((regret < success_regret) or (ix >= 100)): if ix >= 100: print('timeout interactions') elif regret < success_regret: print('succcessful interaction!') if plot: viz_circles(util.GP_PLOT, image_data, mech, BETA, sample_points=sample_points, opt_points=opt_points, gps=sampler.gps, nn=sampler.nn, bb_i=bb_i, plot_dir_prefix=plot_dir_prefix) return dataset, sampler.gps, ix # sample a policy # image_data, gripper = setup_env(bb, False, debug) gripper.reset(mech) x, policy = sampler.sample() # execute traj = policy.generate_trajectory(pose_handle_base_world, debug=debug) c_motion, motion, handle_pose_final = gripper.execute_trajectory(traj, mech, policy.type, False) result = util.Result(policy.get_policy_tuple(), mech.get_mechanism_tuple(), motion, c_motion, handle_pose_final, handle_pose_final, image_data, None) dataset.append(result) # update GP sampler.update(result, x)
def viz_train_test_data(train_data, test_data): import matplotlib.pyplot as plt from gen.generator_busybox import BusyBox n_inter_per_bb = 100 n_bbs = int(len(train_data) / n_inter_per_bb) training_dataset_size = n_bbs #10 plt.ion() angle_fig, angle_ax = plt.subplots() pos_fig, pos_ax = plt.subplots() def plot_test_set(): # plot test data angles and positions for (i, point) in enumerate(test_data[:10]): bb = BusyBox.bb_from_result(point) if p.getConnectionInfo()['isConnected']: p.disconnect() setup_pybullet.setup_env(bb, False, False) mech = bb._mechanisms[0] true_policy = policies.generate_policy(mech, False) pos = (true_policy.rigid_position[0], true_policy.rigid_position[2]) pos_ax.plot(*pos, 'm.') pos_ax.annotate(i, pos) if i == len(test_data): angle_ax.plot(true_policy.pitch, 2.0, 'k.', label='test data') else: angle_ax.plot(true_policy.pitch, 2.0, 'k.') angle_ax.annotate(i, (true_policy.pitch, 2.0)) dataset_sizes = list( range(training_dataset_size, n_bbs + 1, training_dataset_size)) pitches = [] # plot training data angles and positions for n in range(1, n_bbs + 1): point = train_data[(n - 1) * n_inter_per_bb] if p.getConnectionInfo()['isConnected']: p.disconnect() bb = BusyBox.bb_from_result(point) setup_pybullet.setup_env(bb, False, False) mech = bb._mechanisms[0] true_policy = policies.generate_policy(mech, False) pitches += [true_policy.pitch] pos = (true_policy.rigid_position[0], true_policy.rigid_position[2]) pos_ax.plot(*pos, 'c.') if n in dataset_sizes: angle_ax.clear() pos_ax.set_title('Positions of Training and Test Data, n_bbs=' + str(n)) pos_fig.savefig('dataset_imgs/poss_n_bbs' + str(n)) angle_ax.set_title('Historgram of Training Data Angle, n_bbs=' + str(n)) angle_ax.hist(pitches, 30) plot_test_set() #angle_fig.savefig('dataset_imgs/pitches_n_bbs'+str(n)) plt.show() input('enter to close') angle_ax.set_title('Historgram of Training Data Angle, n_bbs=' + str(n)) angle_ax.hist(pitches, 30) plot_test_set() #angle_fig.savefig('dataset_imgs/pitches_n_bbs'+str(n)) plt.show() input('enter to close')