def get_goal_flags(self): n_key_configs = self.collision_vector.shape[1] is_goal_obj = utils.convert_binary_vec_to_one_hot( np.array([self.obj in self.goal_entities])) is_goal_obj = np.tile(is_goal_obj, (n_key_configs, 1)).reshape( (1, n_key_configs, 2, 1)) is_goal_region = utils.convert_binary_vec_to_one_hot( np.array([self.region in self.goal_entities])) is_goal_region = np.tile(is_goal_region, (n_key_configs, 1)).reshape( (1, n_key_configs, 2, 1)) return np.concatenate([is_goal_obj, is_goal_region], axis=2)
def create_state_vec(key_config_obstacles, action, goal_entities): obj = action.discrete_parameters['object'] region = action.discrete_parameters['region'] one_hot = utils.convert_binary_vec_to_one_hot(key_config_obstacles) is_goal_obj = utils.convert_binary_vec_to_one_hot( np.array([obj in goal_entities])) is_goal_region = utils.convert_binary_vec_to_one_hot( np.array([region in goal_entities])) state_vec = np.vstack([one_hot, is_goal_obj, is_goal_region]) state_vec = state_vec.reshape((1, len(state_vec), 2, 1)) return state_vec
def sample_placements(self, pose_ids, collisions, n_smpls): if self.v_manip is None: v_manip = compute_v_manip(self.abstract_state, self.abstract_state.goal_entities[:-1], self.key_configs) v_manip = utils.convert_binary_vec_to_one_hot( v_manip.squeeze()).reshape((1, len(self.key_configs), 2, 1)) v_manip = np.tile(v_manip, (n_smpls, 1, 1, 1)) self.v_manip = v_manip state_vec = np.concatenate([collisions, self.v_manip], axis=2) if 'center_shelf' in self.region: chosen_sampler = self.samplers['place_obj_region'] place_samples = chosen_sampler.generate(state_vec, pose_ids) place_samples = np.array([ utils.decode_pose_with_sin_and_cos_angle(s) for s in place_samples ]) elif 'rectangular_packing_box1_region' in self.region: chosen_sampler = self.samplers['place_goal_region'] place_samples = np.array( [chosen_sampler.sample() for _ in range(n_smpls)]) else: raise NotImplementedError return place_samples
def get_konf_obstacles_from_key_configs(self): collision_vec = [] for k in self.key_configs: utils.set_rightarm_torso(k, self.abstract_state.problem_env.robot) col = self.problem_env.env.CheckCollision(self.problem_env.robot) collision_vec.append(col) collision_vec = utils.convert_binary_vec_to_one_hot(np.array(collision_vec)) return collision_vec.reshape((1, len(collision_vec), 2, 1))
def get_konf_obstacles(self, obj_pose_and_obj_name_to_prm_indices_in_collision): prm_indices_in_collision = list(set.union(*obj_pose_and_obj_name_to_prm_indices_in_collision.values())) key_configs = self.key_configs collision_vector = np.zeros((len(key_configs))) collision_vector[prm_indices_in_collision] = 1 collision_vector = utils.convert_binary_vec_to_one_hot(collision_vector) collision_vector = collision_vector.reshape((1, len(collision_vector), 2, 1)) return collision_vector
def compute_collision_and_make_predictions(qg, key_configs, net): collisions = utils.compute_occ_vec(key_configs) col = utils.convert_binary_vec_to_one_hot(collisions.squeeze()) vertex = make_vertices(qg, key_configs, col, net) output = net(vertex) # print output, (output > 0.5).numpy()[0, 0] # print output return (output > 0.5).numpy()[0, 0]
def process_abstract_state_collisions_into_key_config_obstacles(self, abstract_state): n_vtxs = len(abstract_state.prm_vertices) collision_vector = np.zeros((n_vtxs)) colliding_vtx_idxs = self.get_colliding_prm_idxs(abstract_state) collision_vector[colliding_vtx_idxs] = 1 collision_vector = utils.convert_binary_vec_to_one_hot(collision_vector) collision_vector = collision_vector.reshape((len(collision_vector), 2)) return collision_vector
def get_augmented_state_vec_and_poses(obj, state_vec, smpler_state): n_key_configs = 615 utils.set_color(obj, [1, 0, 0]) is_goal_obj = utils.convert_binary_vec_to_one_hot( np.array([obj in smpler_state.goal_entities])) is_goal_obj = np.tile(is_goal_obj, (n_key_configs, 1)).reshape( (1, n_key_configs, 2, 1)) is_goal_region = utils.convert_binary_vec_to_one_hot( np.array([smpler_state.region in smpler_state.goal_entities])) is_goal_region = np.tile(is_goal_region, (n_key_configs, 1)).reshape( (1, n_key_configs, 2, 1)) state_vec = np.concatenate([state_vec, is_goal_obj, is_goal_region], axis=2) poses = np.hstack([ utils.encode_pose_with_sin_and_cos_angle( utils.get_body_xytheta(obj).squeeze()), 0, 0, 0, 0 ]).reshape((1, 8)) return state_vec, poses
def get_collison_vector(self, given_collision_vector): if given_collision_vector is None: collision_vector = utils.compute_occ_vec(self.key_configs) else: collision_vector = given_collision_vector collision_vector = utils.convert_binary_vec_to_one_hot( collision_vector) collision_vector = collision_vector.reshape( (1, len(collision_vector), 2, 1)) return collision_vector
def sample_new_points(self, n_smpls): stime11 = time.time() stime = time.time() poses = data_processing_utils.get_processed_poses_from_state( self.smpler_state, None)[None, :] if 'rectangular' in self.obj: object_id = [1, 0] else: object_id = [0, 1] object_id = np.array(object_id)[None, :] pose_ids = np.hstack([poses, object_id]) pose_ids = np.tile(pose_ids, (n_smpls, 1)) collisions = self.smpler_state.pick_collision_vector if self.state_vec is None: stime = time.time() v_manip = compute_v_manip(self.abstract_state, self.abstract_state.goal_entities[:-1]) v_manip = utils.convert_binary_vec_to_one_hot( v_manip.squeeze()).reshape((1, 618, 2, 1)) self.state_vec = np.concatenate([collisions, v_manip], axis=2) self.state_vec = np.tile(self.state_vec, (n_smpls, 1, 1, 1)) print 'state_vec creation time', time.time() - stime collisions = np.tile(collisions, (n_smpls, 1, 1, 1)) print "input processing time", time.time() - stime stime = time.time() pick_samples = self.sample_picks(pose_ids, collisions) print "Pick sampling time", time.time() - stime stime = time.time() place_samples = self.sample_placements(pose_ids, collisions, pick_samples, n_smpls) print "Place sampling time", time.time() - stime samples = np.hstack([pick_samples, place_samples]) print "Total sampling time", time.time() - stime11 return samples
def predict(self, op_parameters, abstract_state, abstract_action): collisions = self.process_abstract_state_collisions_into_key_config_obstacles(abstract_state) key_configs = abstract_state.prm_vertices pick_qg = op_parameters['pick']['q_goal'] v = self.make_vertices(pick_qg, key_configs, collisions) is_pick_reachable = ((self.pick_net(v) > 0.5).cpu().numpy() == True)[0, 0] if is_pick_reachable: # I have to hold the object and check collisions orig_config = utils.get_robot_xytheta() target_obj = abstract_action.discrete_parameters['object'] utils.two_arm_pick_object(target_obj, {'q_goal': pick_qg}) collisions = utils.compute_occ_vec(key_configs) collisions = utils.convert_binary_vec_to_one_hot(collisions) utils.two_arm_place_object({'q_goal': pick_qg}) utils.set_robot_config(orig_config) place_qg = op_parameters['place']['q_goal'] v = self.make_vertices(place_qg, key_configs, collisions) pred = self.place_net(v) is_place_reachable = ((pred > 0.5).cpu().numpy() == True)[0, 0] return is_place_reachable else: return False
def load_data(traj_dir): traj_files = os.listdir(traj_dir) cache_file_name = 'cache_state_data_mode_%s_action_data_mode_%s.pkl' % ( state_data_mode, action_data_mode) if os.path.isfile(traj_dir + cache_file_name): print "Loading the cache file", traj_dir + cache_file_name return pickle.load(open(traj_dir + cache_file_name, 'r')) print 'caching file...' all_states = [] all_actions = [] all_sum_rewards = [] all_poses = [] all_rel_konfs = [] key_configs = pickle.load(open('prm.pkl', 'r'))[0] key_configs = np.delete(key_configs, [415, 586, 615, 618, 619], axis=0) for traj_file in traj_files: if 'pidx' not in traj_file: continue traj = pickle.load(open(traj_dir + traj_file, 'r')) if len(traj.states) == 0: continue # states = np.array([s.state_vec for s in traj.states]) # collision vectors states = [] for s in traj.states: # state_vec = np.delete(s.state_vec, [415, 586, 615, 618, 619], axis=1) state_vec = s.collision_vector n_key_configs = state_vec.shape[1] is_goal_obj = utils.convert_binary_vec_to_one_hot( np.array([s.obj in s.goal_entities])) is_goal_obj = np.tile(is_goal_obj, (n_key_configs, 1)).reshape( (1, n_key_configs, 2, 1)) is_goal_region = utils.convert_binary_vec_to_one_hot( np.array([s.region in s.goal_entities])) is_goal_region = np.tile(is_goal_region, (n_key_configs, 1)).reshape( (1, n_key_configs, 2, 1)) state_vec = np.concatenate( [state_vec, is_goal_obj, is_goal_region], axis=2) states.append(state_vec) states = np.array(states) poses = np.array( [get_processed_poses_from_state(s) for s in traj.states]) actions = np.array([ get_processed_poses_from_action(s, a) for s, a in zip(traj.states, traj.actions) ]) for s in traj.states: rel_konfs = make_konfs_relative_to_pose(s.abs_obj_pose, key_configs) all_rel_konfs.append(np.array(rel_konfs).reshape((1, 615, 4, 1))) rewards = traj.rewards sum_rewards = np.array( [np.sum(traj.rewards[t:]) for t in range(len(rewards))]) all_poses.append(poses) all_states.append(states) all_actions.append(actions) all_sum_rewards.append(sum_rewards) all_rel_konfs = np.vstack(all_rel_konfs) all_states = np.vstack(all_states).squeeze(axis=1) all_actions = np.vstack(all_actions) all_sum_rewards = np.hstack( np.array(all_sum_rewards))[:, None] # keras requires n_data x 1 all_poses = np.vstack(all_poses).squeeze() pickle.dump( (all_states, all_poses, all_rel_konfs, all_actions, all_sum_rewards), open(traj_dir + cache_file_name, 'wb')) return all_states, all_poses, all_rel_konfs, all_actions, all_sum_rewards[:, None]
def load_pos_neu_data(self, action_data_mode): traj_dir = self.get_data_dir() traj_files = os.listdir(traj_dir) cache_file_name = self.get_cache_file_name(action_data_mode) if os.path.isfile(traj_dir + cache_file_name): print "Loading the cache file", traj_dir + cache_file_name f = pickle.load(open(traj_dir + cache_file_name, 'r')) print "Cache data loaded" return f n_episodes = 0 all_states = [] all_actions = [] all_poses_ids = [] all_labels = [] for traj_file_idx, traj_file in enumerate(traj_files): if 'pidx' not in traj_file: print 'not pkl file' continue traj = pickle.load(open(traj_dir + traj_file, 'r')) if len(traj['positive_data'] + traj['neutral_data']) == 0: continue states = [] poses_ids = [] actions = [] data = traj['positive_data'] + traj['neutral_data'] temp_labels = [1] * len(traj['positive_data']) + [0] * len( traj['neutral_data']) labels = [] assert 'two_arm' in data[0][ 'action'].type, 'change the reward condition on region for one_arm' for node, temp_label in zip(data, temp_labels): is_neutral_data = temp_label == 0 reward = is_neutral_data or\ (node['parent_n_in_way'] - node['n_in_way'] > 0) or \ (node['parent_n_in_way']==0 and node['n_in_way']==0 and\ node['action'].discrete_parameters['place_region'] == 'home_region') s = node['concrete_state'] if self.we_should_skip_this_state_and_action(s, reward): continue else: labels.append(temp_label) collision_vec = s.pick_collision_vector v_manip_goal = node['parent_v_manip'] if 'two_arm' in self.config.domain: v_manip_vec = utils.convert_binary_vec_to_one_hot( v_manip_goal.squeeze()).reshape((1, 618, 2, 1)) else: v_manip_vec = utils.convert_binary_vec_to_one_hot( v_manip_goal.squeeze()).reshape((1, 355, 2, 1)) state_vec = np.concatenate([collision_vec, v_manip_vec], axis=2) states.append(state_vec) poses_from_state = get_processed_poses_from_state( s, 'absolute') a = node['action_info'] if 'rectangular' in a['object_name']: object_id = [1, 0] else: object_id = [0, 1] poses_from_state_and_id = np.hstack( [poses_from_state, object_id]) poses_ids.append(poses_from_state_and_id) actions.append( get_processed_poses_from_action( s, a, action_data_mode)) if len(poses_ids) == 0: continue all_poses_ids.append(poses_ids) all_states.append(states) all_actions.append(actions) all_labels.append(labels) assert len(np.hstack(all_labels)) == len(np.vstack(all_poses_ids)) try: print 'n_data %d progress %d/%d n_pos %d n_neutral %d ' \ % (len(np.vstack(all_actions)), traj_file_idx, len(traj_files), np.sum(np.hstack(all_labels)), np.sum(np.hstack(all_labels) == 0)) except: import pdb pdb.set_trace() if traj_file_idx > self.config.num_episode: break all_states = np.vstack(all_states).squeeze(axis=1) all_actions = np.vstack(all_actions).squeeze() all_poses_ids = np.vstack(all_poses_ids).squeeze() all_labels = np.hstack(all_labels).squeeze() pickle.dump((all_states, all_poses_ids, all_actions, all_labels), open(traj_dir + cache_file_name, 'wb')) return all_states, all_poses_ids, all_actions, all_labels
def load_data_from_files(self, action_data_mode): traj_dir = self.get_data_dir() print "Loading data from", traj_dir traj_files = os.listdir(traj_dir) cache_file_name = self.get_cache_file_name(action_data_mode) if os.path.isfile(traj_dir + cache_file_name): print "Loading the cache file", traj_dir + cache_file_name f = pickle.load(open(traj_dir + cache_file_name, 'r')) print "Cache data loaded" return f print 'caching file...%s' % cache_file_name all_states = [] all_actions = [] all_sum_rewards = [] all_poses_ids = [] all_konf_relevance = [] n_episodes = 0 for traj_file_idx, traj_file in enumerate(traj_files): if 'pidx' not in traj_file: print 'not pkl file' continue try: traj = pickle.load(open(traj_dir + traj_file, 'r')) except: print traj_file continue if len(traj.states) == 0: print 'failed instance' continue states = [] poses_ids = [] actions = [] konf_relevance = [] if self.use_filter: rewards = np.array(traj.prev_n_in_way) - np.array( traj.n_in_way) > 0 else: rewards = 1 for s, a, reward, v_manip_goal in zip(traj.states, traj.actions, rewards, traj.prev_v_manip_goal): if self.we_should_skip_this_state_and_action(s, reward): continue collision_vec = s.pick_collision_vector if 'two_arm' in self.config.domain: v_manip_vec = utils.convert_binary_vec_to_one_hot( v_manip_goal.squeeze()).reshape((1, 618, 2, 1)) else: v_manip_vec = utils.convert_binary_vec_to_one_hot( v_manip_goal.squeeze()).reshape((1, 355, 2, 1)) state_vec = np.concatenate([collision_vec, v_manip_vec], axis=2) states.append(state_vec) # note that this is not used in one arm domain if 'rectangular' in a['object_name']: object_id = [1, 0] else: object_id = [0, 1] poses_from_state = get_processed_poses_from_state( s, 'absolute') poses_from_state_and_id = np.hstack( [poses_from_state, object_id]) poses_ids.append(poses_from_state_and_id) actions.append( get_processed_poses_from_action(s, a, action_data_mode)) states = np.array(states) poses_ids = np.array(poses_ids) actions = np.array(actions) rewards = traj.rewards sum_rewards = np.array( [np.sum(traj.rewards[t:]) for t in range(len(rewards))]) if len(states) == 0: print "no state" continue all_poses_ids.append(poses_ids) all_states.append(states) all_actions.append(actions) all_sum_rewards.append(sum_rewards) all_konf_relevance.append(konf_relevance) n_episodes += 1 print 'n_data %d progress %d/%d' % (len( np.vstack(all_actions)), traj_file_idx, len(traj_files)) print "N episodes", n_episodes n_data = len(np.vstack(all_actions)) assert len(np.vstack(all_states)) == n_data all_states = np.vstack(all_states).squeeze(axis=1) all_actions = np.vstack(all_actions).squeeze() all_poses_ids = np.vstack(all_poses_ids).squeeze() pickle.dump((all_states, all_poses_ids, all_actions), open(traj_dir + cache_file_name, 'wb')) return all_states, all_poses_ids, all_actions
def get_data(self, action_type): plan_exp_dir = './planning_experience/processed/motion_plans/' cache_file_name = plan_exp_dir + '/' + action_type + '_cached_data.pkl' if os.path.isfile(cache_file_name): q0s, qgs, collisions, labels = pickle.load( open(cache_file_name, 'r')) return q0s, qgs, collisions, labels plan_exp_files = os.listdir(plan_exp_dir) np.random.shuffle(plan_exp_files) q0s = [] qgs = [] collisions = [] labels = [] n_episodes = 0 for plan_exp_file in plan_exp_files: if 'cached' in plan_exp_file: continue plan = pickle.load(open(plan_exp_dir + plan_exp_file, 'r')) if len(plan[action_type + '_q0s']) == 0: continue file_q0s = plan[action_type + '_q0s'] #for idx, q0 in enumerate(file_q0s): # file_q0s[idx] = utils.encode_pose_with_sin_and_cos_angle(q0) file_qgs = plan[action_type + '_qgs'] #for idx, qg in enumerate(file_qgs): # file_qgs[idx] = utils.encode_pose_with_sin_and_cos_angle(qg) q0s.append(np.array(file_q0s, dtype=np.float32)) qgs.append(np.array(plan[action_type + '_qgs'], dtype=np.float32)) cols = [] for c in plan[action_type + '_collisions']: col = utils.convert_binary_vec_to_one_hot(c.squeeze()) col = col.reshape((1, 618, 2)) cols.append(col) collisions.append(np.array(cols, dtype=np.float32)) labels.append( np.array(plan[action_type + '_labels'], dtype=np.float32)) n_episodes += 1 #if n_episodes == 5000: # break q0s = np.vstack(q0s) qgs = np.vstack(qgs) collisions = np.vstack(collisions) try: labels = np.vstack(labels) except ValueError: labels = np.hstack(labels) q0s = Variable(torch.from_numpy(q0s)) qgs = Variable(torch.from_numpy(qgs)) collisions = Variable(torch.from_numpy(collisions)) labels = Variable(torch.from_numpy(labels)) pickle.dump((q0s, qgs, collisions, labels), open(cache_file_name, 'wb')) return q0s, qgs, collisions, labels
def main(): device = torch.device("cpu") edges = pickle.load(open('prm_edges_for_reachability_gnn.pkl', 'r')) n_msg_passing = 0 net = ReachabilityNet(edges, n_key_configs=618, device=device, n_msg_passing=n_msg_passing) action_type = 'place' load_weights(net, 35, action_type, 1, n_msg_passing, device) get_data_test_acc = False if get_data_test_acc: seed = 0 np.random.seed(seed) torch.cuda.manual_seed_all(seed) dataset = GNNReachabilityDataset(action_type) n_train = int(len(dataset) * 0.9) trainset, testset = torch.utils.data.random_split( dataset, [n_train, len(dataset) - n_train]) testloader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=False, num_workers=20, pin_memory=True) print "Getting test accuracy for n_test %d..." % len(testset) test_acc = get_test_acc(testloader, net, device, len(testset)) print test_acc import pdb pdb.set_trace() problem_seed = 1 problem_env, openrave_env = create_environment(problem_seed) if 'Encoded' in net.__class__.__name__: tmp, _ = pickle.load(open('prm.pkl', 'r')) key_configs = np.zeros((618, 4)) for pidx, p in enumerate(tmp): key_configs[pidx] = utils.encode_pose_with_sin_and_cos_angle(p) else: key_configs, _ = pickle.load(open('prm.pkl', 'r')) compute_clf_acc = False if compute_clf_acc: compute_clf_rates_from_diff_pinstances(problem_env, key_configs, net, action_type) utils.viewer() if action_type == 'place': status = "NoSolution" while status != "HasSolution": sampler = UniformSampler(problem_env.regions['home_region']) checker = TwoArmPaPFeasibilityCheckerWithoutSavingFeasiblePick( problem_env) pick_param = sampler.sample() target_obj = problem_env.objects[np.random.randint(8)] abstract_action = Operator( operator_type='two_arm_pick_two_arm_place', discrete_parameters={ 'object': target_obj, 'place_region': 'home_region' }) op_parameters, status = checker.check_feasibility( abstract_action, pick_param) utils.two_arm_pick_object(target_obj, op_parameters['pick']) collisions = utils.compute_occ_vec(key_configs) col = utils.convert_binary_vec_to_one_hot(collisions.squeeze()) qg = sample_qg(problem_env, 'home_region') utils.visualize_path([qg]) vertex = make_vertices(qg, key_configs, col, net) vertex_outputs = net.get_vertex_activations(vertex).data.numpy().squeeze() """ top_activations = np.max(vertex_outputs, axis=0) top_k_args = np.argsort(abs(top_activations))[-10:] """ top_k_args = np.argsort(abs(vertex_outputs))[-30:] top_k_key_configs = key_configs[top_k_args, :] import pdb pdb.set_trace() utils.visualize_path(top_k_key_configs) # todo visualize the activations import pdb pdb.set_trace()