def get_pap_gnn_model(mover, config): is_use_gnn = 'qlearned' in config.h_option if is_use_gnn: mconfig_type = collections.namedtuple('mconfig_type', 'operator n_msg_passing n_layers num_fc_layers n_hidden no_goal_nodes ' 'top_k optimizer lr use_mse batch_size seed num_train val_portion ' 'mse_weight diff_weight_msg_passing same_vertex_model ' 'weight_initializer loss use_region_agnostic') pap_mconfig = mconfig_type( operator='two_arm_pick_two_arm_place', n_msg_passing=1, n_layers=2, num_fc_layers=2, n_hidden=32, no_goal_nodes=False, top_k=1, optimizer='adam', lr=1e-4, use_mse=True, batch_size='32', seed=config.absq_seed, num_train=config.num_train, val_portion=.1, mse_weight=config.mse_weight, diff_weight_msg_passing=False, same_vertex_model=False, weight_initializer='glorot_uniform', loss=config.loss, use_region_agnostic=config.use_region_agnostic ) if config.domain == 'two_arm_mover': num_entities = 11 #8 n_regions = 2 elif config.domain == 'one_arm_mover': num_entities = 12 n_regions = 2 else: raise NotImplementedError num_node_features = 20 num_edge_features = 28 num_node_features = 10 num_edge_features = 44 entity_names = mover.entity_names with tf.variable_scope('pap'): pap_model = PaPGNN(num_entities, num_node_features, num_edge_features, pap_mconfig, entity_names, n_regions) pap_model.load_weights() else: pap_model = None return pap_model
def create_gnn_model(config, nodes, edges): num_entities = nodes.shape[1] m = PaPGNN(num_entities, nodes.shape[-1], edges.shape[-1], config) if os.path.isfile( m.weight_file_name) and not config.donttrain and not config.f: print "Quitting because we've already trained with the given configuration" sys.exit(-1) return m
def load_learned_q(config, problem_env): mconfig_type = collections.namedtuple('mconfig_type', 'operator n_msg_passing n_layers num_fc_layers n_hidden no_goal_nodes top_k optimizer lr use_mse batch_size seed num_train val_portion mse_weight diff_weight_msg_passing same_vertex_model weight_initializer loss use_region_agnostic') pap_mconfig = mconfig_type( operator='two_arm_pick_two_arm_place', n_msg_passing=1, n_layers=2, num_fc_layers=2, n_hidden=32, no_goal_nodes=False, top_k=1, optimizer='adam', lr=1e-4, use_mse=True, batch_size='32', seed=config.train_seed, num_train=config.num_train, val_portion=.1, mse_weight=0.0, diff_weight_msg_passing=False, same_vertex_model=False, weight_initializer='glorot_uniform', loss=config.loss, use_region_agnostic=False ) if config.domain == 'two_arm_mover': num_entities = 10 n_regions = 2 elif config.domain == 'one_arm_mover': num_entities = 12 n_regions = 2 else: raise NotImplementedError num_node_features = 10 num_edge_features = 44 entity_names = problem_env.entity_names with tf.variable_scope('pap'): pap_model = PaPGNN(num_entities, num_node_features, num_edge_features, pap_mconfig, entity_names, n_regions) pap_model.load_weights() return pap_model
def get_problem(mover): tt = time.time() obj_names = [obj.GetName() for obj in mover.objects] n_objs_pack = config.n_objs_pack if config.domain == 'two_arm_mover': statecls = ShortestPathPaPState goal = ['home_region'] + [obj.GetName() for obj in mover.objects[:n_objs_pack]] elif config.domain == 'one_arm_mover': def create_one_arm_pap_state(*args, **kwargs): while True: state = OneArmPaPState(*args, **kwargs) if len(state.nocollision_place_op) > 0: return state else: print('failed to find any paps, trying again') statecls = create_one_arm_pap_state goal = ['rectangular_packing_box1_region'] + [obj.GetName() for obj in mover.objects[:n_objs_pack]] else: raise NotImplementedError if config.visualize_plan: mover.env.SetViewer('qtcoin') set_viewer_options(mover.env) pr = cProfile.Profile() pr.enable() state = statecls(mover, goal) pr.disable() pstats.Stats(pr).sort_stats('tottime').print_stats(30) pstats.Stats(pr).sort_stats('cumtime').print_stats(30) #state.make_pklable() mconfig_type = collections.namedtuple('mconfig_type', 'operator n_msg_passing n_layers num_fc_layers n_hidden no_goal_nodes top_k optimizer lr use_mse batch_size seed num_train val_portion num_test mse_weight diff_weight_msg_passing same_vertex_model weight_initializer loss use_region_agnostic') assert config.num_train <= 5000 pap_mconfig = mconfig_type( operator='two_arm_pick_two_arm_place', n_msg_passing=1, n_layers=2, num_fc_layers=2, n_hidden=32, no_goal_nodes=False, top_k=1, optimizer='adam', lr=1e-4, use_mse=True, batch_size='32', seed=config.train_seed, num_train=config.num_train, val_portion=.1, num_test=1882, mse_weight=1.0, diff_weight_msg_passing=False, same_vertex_model=False, weight_initializer='glorot_uniform', loss=config.loss, use_region_agnostic=False ) if config.domain == 'two_arm_mover': num_entities = 11 n_regions = 2 elif config.domain == 'one_arm_mover': num_entities = 12 n_regions = 2 else: raise NotImplementedError num_node_features = 10 num_edge_features = 44 entity_names = mover.entity_names with tf.variable_scope('pap'): pap_model = PaPGNN(num_entities, num_node_features, num_edge_features, pap_mconfig, entity_names, n_regions) pap_model.load_weights() mover.reset_to_init_state_stripstream() depth_limit = 60 class Node(object): def __init__(self, parent, action, state, reward=0): self.parent = parent # parent.state is initial state self.action = action self.state = state # resulting state self.reward = reward # resulting reward if parent is None: self.depth = 1 else: self.depth = parent.depth + 1 def backtrack(self): node = self while node is not None: yield node node = node.parent action_queue = Queue.PriorityQueue() # (heuristic, nan, operator skeleton, state. trajectory) initnode = Node(None, None, state) initial_state = state actions = get_actions(mover, goal, config) for a in actions: hval = compute_heuristic(state, a, pap_model, mover) action_queue.put((hval, float('nan'), a, initnode)) # initial q iter = 0 # beginning of the planner while True: if time.time() - tt > config.timelimit: return None, iter iter += 1 #if 'one_arm' in mover.name: # time.sleep(3.5) # gauged using max_ik_attempts = 20 if iter > 3000: print('failed to find plan: iteration limit') return None, iter if action_queue.empty(): actions = get_actions(mover, goal, config) for a in actions: action_queue.put((compute_heuristic(initial_state, a, pap_model, mover), float('nan'), a, initnode)) # initial q #import pdb;pdb.set_trace() curr_hval, _, action, node = action_queue.get() state = node.state print('\n'.join([str(parent.action.discrete_parameters.values()) for parent in list(node.backtrack())[-2::-1]])) print("{}".format(action.discrete_parameters.values())) if node.depth >= 2 and action.type == 'two_arm_pick' and node.parent.action.discrete_parameters['object'] == \ action.discrete_parameters['object']: # and plan[-1][1] == r: print('skipping because repeat', action.discrete_parameters['object']) continue if node.depth > depth_limit: print('skipping because depth limit', node.action.discrete_parameters.values()) # reset to state state.restore(mover) #utils.set_color(action.discrete_parameters['object'], [1, 0, 0]) # visualization purpose if action.type == 'two_arm_pick_two_arm_place': smpler = PaPUniformGenerator(action, mover, None) smpled_param = smpler.sample_next_point(action, n_iter=200, n_parameters_to_try_motion_planning=3, cached_collisions=state.collides, cached_holding_collisions=None) if smpled_param['is_feasible']: action.continuous_parameters = smpled_param action.execute() print "Action executed" else: print "Failed to sample an action" #utils.set_color(action.discrete_parameters['object'], [0, 1, 0]) # visualization purpose continue is_goal_achieved = \ np.all([mover.regions['home_region'].contains(mover.env.GetKinBody(o).ComputeAABB()) for o in obj_names[:n_objs_pack]]) if is_goal_achieved: print("found successful plan: {}".format(n_objs_pack)) print iter, time.time()-tt import pdb;pdb.set_trace() """ trajectory = Trajectory(mover.seed, mover.seed) plan = list(node.backtrack())[::-1] # plan of length 0 is possible I think trajectory.states = [nd.state for nd in plan] trajectory.actions = [nd.action for nd in plan[1:]] + [action] trajectory.rewards = [nd.reward for nd in plan[1:]] + [0] trajectory.state_prime = [nd.state for nd in plan[1:]] trajectory.seed = mover.seed print(trajectory) return trajectory, iter """ else: newstate = statecls(mover, goal, node.state, action) print "New state computed" newstate.make_pklable() newnode = Node(node, action, newstate) newactions = get_actions(mover, goal, config) print "Old h value", curr_hval for newaction in newactions: hval = compute_heuristic(newstate, newaction, pap_model, mover) - 1. * newnode.depth print "New state h value %.4f for %s %s" % ( hval, newaction.discrete_parameters['object'], newaction.discrete_parameters['region']) action_queue.put( (hval, float('nan'), newaction, newnode)) import pdb;pdb.set_trace() #utils.set_color(action.discrete_parameters['object'], [0, 1, 0]) # visualization purpose elif action.type == 'one_arm_pick_one_arm_place': success = False obj = action.discrete_parameters['object'] region = action.discrete_parameters['region'] o = obj.GetName() r = region.name if (o, r) in state.nocollision_place_op: pick_op, place_op = node.state.nocollision_place_op[(o, r)] pap_params = pick_op.continuous_parameters, place_op.continuous_parameters else: mover.enable_objects() current_region = mover.get_region_containing(obj).name papg = OneArmPaPUniformGenerator(action, mover, cached_picks=(node.state.iksolutions[current_region], node.state.iksolutions[r])) pick_params, place_params, status = papg.sample_next_point(500) if status == 'HasSolution': pap_params = pick_params, place_params else: pap_params = None if pap_params is not None: pick_params, place_params = pap_params action = Operator( operator_type='one_arm_pick_one_arm_place', discrete_parameters={ 'object': obj, 'region': mover.regions[r], }, continuous_parameters={ 'pick': pick_params, 'place': place_params, } ) action.execute() success = True is_goal_achieved = \ np.all([mover.regions['rectangular_packing_box1_region'].contains( mover.env.GetKinBody(o).ComputeAABB()) for o in obj_names[:n_objs_pack]]) if is_goal_achieved: print("found successful plan: {}".format(n_objs_pack)) trajectory = Trajectory(mover.seed, mover.seed) plan = list(node.backtrack())[::-1] trajectory.states = [nd.state for nd in plan] for s in trajectory.states: s.pap_params = None s.pick_params = None s.place_params = None s.nocollision_pick_op = None s.collision_pick_op = None s.nocollision_place_op = None s.collision_place_op = None trajectory.actions = [nd.action for nd in plan[1:]] + [action] for op in trajectory.actions: op.discrete_parameters = { key: value.name if 'region' in key else value.GetName() for key, value in op.discrete_parameters.items() } trajectory.rewards = [nd.reward for nd in plan[1:]] trajectory.state_prime = [nd.state for nd in plan[1:]] trajectory.seed = mover.seed print(trajectory) return trajectory, iter else: pr = cProfile.Profile() pr.enable() newstate = statecls(mover, goal, node.state, action) pr.disable() pstats.Stats(pr).sort_stats('tottime').print_stats(30) pstats.Stats(pr).sort_stats('cumtime').print_stats(30) print "New state computed" newnode = Node(node, action, newstate) newactions = get_actions(mover, goal, config) print "Old h value", curr_hval for newaction in newactions: hval = compute_heuristic(newstate, newaction, pap_model, mover) - 1. * newnode.depth action_queue.put((hval, float('nan'), newaction, newnode)) if not success: print('failed to execute action') else: print('action successful') else: raise NotImplementedError