Exemplo n.º 1
0
    def call(self,req_type,params):
        """
        Calls CLARK with different types of inputs and parameters.
        """
        req_type = req_type.lower()
        if req_type == 'pddl':
            parser = pddlc.get_argument_parser()
            call_func = pddlc.call_clark
        elif req_type in ['crmpl','rmpyl']:
            parser = rmpylc.get_argument_parser()
            call_func = rmpylc.call_clark
        elif req_type == 'ccpomdp':
            parser = ccpomdpc.get_argument_parser()
            call_func = ccpomdpc.call_clark
        else:
            print(req_type+' is an invalid request type.')
            _print_clark_usage()

        _add_common_parsing(parser) #Adds common arguments

        try:
            args = parser.parse_args(params.split())
            success,output_dict = call_func(args)

            #If requested, writes the policy in graphical SVG format.
            if args.svg:
                policy_file = args.output[:args.output.rfind('.')]
                dot_policy = policy_to_dot(output_dict['explicit'],output_dict['policy'])
                dot_policy.write(policy_file+'.svg',format='svg')

            #Generates output tpn
            output_dict['rmpyl'].to_ptpn(filename=args.output)           

        except:
            success,output_dict = False,None
            print('\n##### ERROR: could not process request\n\tType: %s\n\tParams: %s\n'%(req_type,params))
            if self.debug:
                import ipdb; ipdb.set_trace()
            raise

        return success,output_dict
init_tcs.append(TemporalConstraint(start=cc_fp.global_start_event,
                                   end=cc_fp.global_end_event,
                                   ctype='controllable',lb=0.0, ub=1000.0))

b0 = cc_fp.get_initial_belief(prior=prior,initial_site='start_point',goal_site='end_point',
                              init_tcs=init_tcs)

planner = RAOStar(cc_fp,node_name='id',cc=0.01,cc_type='overall',
                  terminal_prob=1.0,randomization=0.0,propagate_risk=True,
                  halt_on_violation=False,verbose=1)

#Searches for the optimal policy
policy,explicit,performance = planner.search(b0)

#Converts policy to graphical SVG format
dot_policy = policy_to_dot(explicit,policy)
dot_policy.write('flightgear_policy.svg',format='svg')

#Converts optimal exploration policy into an RMPyL program
exploration_policy = policy_to_rmpyl(explicit,policy)

#The flight policy has the additional actions of taking off and landing.
flight_policy = RMPyL(name='run()')
flight_policy *= flight_policy.sequence(Episode(action='(takeoff plane)'),
                                        exploration_policy,
                                        Episode(action='(land plane)'))

#Eliminates probabilistic choices from the policy, since Pike (in fact, the
#Lisp tpn package) cannot properly handle them.
for obs in flight_policy.observations:
    if obs.type=='probabilistic':
Exemplo n.º 3
0
#                                     time_limit=5,drop_penalty=10.0,p_fail=0.1,verbose=0)


# Diagnostic demo, where the robot has to figure out if it is fit for the task

# With this setting of parameters, the robot tries an action a couple of times
# before bugging the human for help.
param_dict = {'p_fail_fit':0.2,      #Prob. of failing a task, while being fit
               'p_fail_unfit':0.999,  #Prob. of failing a task, while not being fit
               'p_fit_fit':1.0,       #Prob. of remaining fit, if fit before
               'p_fit_unfit':0.0,     #Prob. becoming fit, if unfit before
               'goal_drop_penalty':100.0, #Penalty for not achieving a goal
               'robot_action_cost':1.0, #Cost of robot performing an action
               'human_action_cost':5.0} #Cost of human performing an action

mitsu_model = DiagnosticMitsubishi(domain_file=dom_file,prob_file=prob_file,
                                    time_limit=5,verbose=1,param_dict=param_dict)

b0 = mitsu_model.get_initial_belief()

planner = RAOStar(mitsu_model,node_name='id',cc=0.3,cc_type='overall',
                  terminal_prob=1.0,randomization=0.0,propagate_risk=True,
                  verbose=1,log=False)

policy,explicit,performance = planner.search(b0)
dot_policy = policy_to_dot(explicit,policy)
rmpyl_policy = policy_to_rmpyl(explicit,policy)

dot_policy.write('mitsubishi_policy.svg',format='svg')
rmpyl_policy.to_ptpn(filename='mitsubishi_policy_rmpyl_ptpn.tpn')
Exemplo n.º 4
0
    def search(self, b0, time_limit=np.infty, iter_limit=np.infty):
        """Searches for the optimal path from start to goal on the hypergraph."""

        if self._verbose >= 1:
            print('\n##### Starting RAO* search!\n')

        self._start_time = time.time()
        self._init_search(b0)
        count = 0
        root = self._explicit.root

        #Initial objective at the root, which is the best possible (it can
        #only degrade with an admissible heuristic).
        prev_root_value = np.infty if self._model.is_maximization else -np.infty

        if self._log:  #Creates a log file, if necessary
            filename = time.strftime("%d_%m_%Y_%H_%M_%S") + '_log_rao.txt'

            if not os.path.exists('./log'):
                os.makedirs('./log')
            self._log_f = open('./log/' + filename, 'w')
            print("\nLogging into " + filename)

        if self._animation:
            if not os.path.exists('./animation'):
                os.makedirs('./animation')
            print("\nCreated animaition directory")

        interrupted = False
        try:
            while len(self._opennodes) > 0 and (count <= iter_limit) and (
                    time.time() - self._start_time <= time_limit):
                count += 1

                ########### CORE FUNCTIONS
                #Expands the current best solution
                expanded_nodes = self._expand_best_partial_solution()

                #Updates the value estimates and policy
                self._update_values_and_best_actions(expanded_nodes)

                #NOTE: the value inconsistency here was coming from nodes that
                #were not contained in the best partial policy graph, but contained
                #children in it.
                #debug_all_hypegraph_values(self,'After value update')

                #Updates the mapping of ancestors on the best policy graph and also
                #the list of open nodes to be expanded.
                self._update_policy_open_nodes()
                #debug_policy_values(self,'Policy values after policy update')

                #debug_all_hypegraph_values(self,'After policy update')
                #######################################################

                #Performance measures and info
                root_value = root.value
                self.performance['root_value_series'].append(root_value)
                self.performance['root_exec_risk_series'].append(
                    root.exec_risk)

                #Root node changed from its best value
                if not np.isclose(root_value, prev_root_value):
                    #If the heuristic is really admissible, the root value can
                    #only degrade (decrease for maximization, or increase for
                    #minimization).
                    if self._is_better(root_value, prev_root_value):
                        print(
                            'WARNING: root value improved, which might indicate inadmissibility.'
                        )
                    else:
                        self.performance['time_to_best_value'] = time.time(
                        ) - self._start_time
                        self.performance['iter_to_best_value'] = count
                        prev_root_value = root_value

                if self._verbose >= 2:
                    print("Expanded nodes [%s]" %
                          (' '.join([str(n.name) for n in expanded_nodes])))

                if self._verbose >= 1:
                    total_states = sum(
                        [len(node.state.belief) for node in expanded_nodes])
                    print(
                        "Iter: %d, Open nodes: %d, States evaluted: %d, Root value: %.4f, Root ER: %.4f"
                        % (count, len(self._opennodes), total_states,
                           root.value, root.exec_risk))
                if self._animation:  #If an animation should be generated
                    partial_policy = self._extract_policy(partial=True)
                    dot_policy = policy_to_dot(self._explicit, partial_policy)
                    dot_policy.write('./animation/%d_rao.svg' % (count),
                                     format='svg')

        except KeyboardInterrupt:
            interrupted = True
            print(
                "\n\n***** EXECUTION TERMINATED BY USER AT ITERATION %d. *****"
                % (count))

        self.performance['total_elapsed_time'] = time.time() - self._start_time
        self.performance['optimal_value'] = root.value
        self.performance['exec_risk_for_optimal_value'] = root.exec_risk

        if self._log:  #Closes the log file, if one has been created
            self._log_f.close()
            print("\nClosed " + filename)

        print("\nTotal elapsed time: %f s" %
              (self.performance['total_elapsed_time']))
        print("Time to optimal value: %f s" %
              (self.performance['time_to_best_value']))
        print("Iterations till optimal value: %d" %
              (self.performance['iter_to_best_value']))
        print("Number of expanded nodes: %d" %
              (self.performance['expanded_nodes']))
        print("Number of evaluated particles: %d" %
              (self.performance['evaluated_particles']))
        print("Optimal value: %f" % (self.performance['optimal_value']))
        print("Execution risk for optimal value: %f" %
              (self.performance['exec_risk_for_optimal_value']))

        policy = self._extract_policy(partial=interrupted)

        if len(policy) == 0:
            print('\n##### Failed to find policy (it is empty)...\n')
        elif self.performance['optimal_value'] == -float('inf'):
            print(
                '\n##### Failed to find policy (probably due to chance constraint violation)...\n'
            )
        else:
            print('\n##### Policy found!\n')

        return policy, self._explicit, self.performance
Exemplo n.º 5
0
    def search(self,b0,time_limit=np.infty,iter_limit=np.infty):
        """Searches for the optimal path from start to goal on the hypergraph."""

        if self._verbose>=1:
            print('\n##### Starting RAO* search!\n')

        self._start_time = time.time()
        self._init_search(b0)
        count=0
        root = self._explicit.root

        #Initial objective at the root, which is the best possible (it can
        #only degrade with an admissible heuristic).
        prev_root_value = np.infty if self._model.is_maximization else -np.infty

        if self._log: #Creates a log file, if necessary
            filename = time.strftime("%d_%m_%Y_%H_%M_%S")+'_log_rao.txt'

            if not os.path.exists('./log'):
                os.makedirs('./log')
            self._log_f=open('./log/'+filename,'w')
            print("\nLogging into "+filename)

        if self._animation:
            if not os.path.exists('./animation'):
                os.makedirs('./animation')
            print("\nCreated animaition directory")

        interrupted=False
        try:
            while len(self._opennodes)>0 and (count<=iter_limit) and (time.time()-self._start_time<=time_limit):
                count+=1

                ########### CORE FUNCTIONS
                #Expands the current best solution
                expanded_nodes = self._expand_best_partial_solution()

                #Updates the value estimates and policy
                self._update_values_and_best_actions(expanded_nodes)

                #NOTE: the value inconsistency here was coming from nodes that
                #were not contained in the best partial policy graph, but contained
                #children in it.
                #debug_all_hypegraph_values(self,'After value update')

                #Updates the mapping of ancestors on the best policy graph and also
                #the list of open nodes to be expanded.
                self._update_policy_open_nodes()
                #debug_policy_values(self,'Policy values after policy update')

                #debug_all_hypegraph_values(self,'After policy update')
                #######################################################

                #Performance measures and info
                root_value = root.value
                self.performance['root_value_series'].append(root_value)
                self.performance['root_exec_risk_series'].append(root.exec_risk)

                #Root node changed from its best value
                if not np.isclose(root_value,prev_root_value):
                    #If the heuristic is really admissible, the root value can
                    #only degrade (decrease for maximization, or increase for
                    #minimization).
                    if self._is_better(root_value,prev_root_value):
                        print('WARNING: root value improved, which might indicate inadmissibility.')
                    else:
                        self.performance['time_to_best_value'] = time.time()-self._start_time
                        self.performance['iter_to_best_value'] = count
                        prev_root_value = root_value

                if self._verbose>=2:
                    print("Expanded nodes [%s]"%(' '.join([str(n.name) for n in expanded_nodes])))

                if self._verbose>=1:
                    total_states=sum([len(node.state.belief) for node in expanded_nodes])
                    print("Iter: %d, Open nodes: %d, States evaluted: %d, Root value: %.4f, Root ER: %.4f"%(count,
                                                                                                            len(self._opennodes),
                                                                                                            total_states,
                                                                                                            root.value,
                                                                                                            root.exec_risk))
                if self._animation: #If an animation should be generated
                    partial_policy = self._extract_policy(partial=True)
                    dot_policy = policy_to_dot(self._explicit,partial_policy)
                    dot_policy.write('./animation/%d_rao.svg'%(count),format='svg')

        except KeyboardInterrupt:
            interrupted=True
            print("\n\n***** EXECUTION TERMINATED BY USER AT ITERATION %d. *****"%(count))

        self.performance['total_elapsed_time'] = time.time()-self._start_time
        self.performance['optimal_value'] = root.value
        self.performance['exec_risk_for_optimal_value'] = root.exec_risk

        if self._log: #Closes the log file, if one has been created
            self._log_f.close()
            print("\nClosed "+filename)

        print("\nTotal elapsed time: %f s"%(self.performance['total_elapsed_time']))
        print("Time to optimal value: %f s"%(self.performance['time_to_best_value']))
        print("Iterations till optimal value: %d"%(self.performance['iter_to_best_value']))
        print("Number of expanded nodes: %d"%(self.performance['expanded_nodes']))
        print("Number of evaluated particles: %d"%(self.performance['evaluated_particles']))
        print("Optimal value: %f"%(self.performance['optimal_value']))
        print("Execution risk for optimal value: %f"%(self.performance['exec_risk_for_optimal_value']))

        policy = self._extract_policy(partial=interrupted)

        if len(policy)==0:
            print('\n##### Failed to find policy (it is empty)...\n')
        elif self.performance['optimal_value']==-float('inf'):
            print('\n##### Failed to find policy (probably due to chance constraint violation)...\n')
        else:
            print('\n##### Policy found!\n')

        return policy, self._explicit, self.performance