def determine_pomdp_dict( self, action_code, use_confidence_scores=False ): if use_confidence_scores == False: existing_code = tbh_action_code_manager.verify_action_code( action_code ) if not existing_code: error_string = "Not an existing code:", action_code #raise Exception( error_string ) action_code = ['null', ['yes_record']] #tbh_action_code_manager.add_action_code( action_code ) # print action_code # For POMDP dialog manager, pass to a POMDP belief update algorithm pomdp_dict = tbh_pomdp_manager.process_result( \ self.pomdp, action_code, self.machine_action, \ self.Q, use_confidence_scores=False, skype_contacts_dict=self.skype_contacts_dict ) else: # action_code is actually a list revised_action_code_list = [] for index, code in enumerate( action_code ): existing_code = tbh_action_code_manager.verify_action_code( code ) if not existing_code: revised_action_code_list.append( ['null', ['yes_record']] ) else: revised_action_code_list.append( code ) pomdp_dict = tbh_pomdp_manager.process_result( \ self.pomdp, revised_action_code_list, self.machine_action, \ self.Q, use_confidence_scores=True, \ skype_contacts_dict=self.skype_contacts_dict ) # This is for the next belief update self.machine_action = pomdp_dict['new_machine_action'] self.belief_for_policy = pomdp_dict['belief_for_policy'] print "\n=================" print "CURRENT POLICY" print self.belief_for_policy print "=================" #print "==========BELIEF==========" #print self.belief_for_policy #print "==========================" # Update the belief as appropriate self.pomdp.belief = pomdp_dict['final_belief'] return pomdp_dict
def determine_pomdp_dict_confidence( self, action_code_list ): modified_action_code_list = [] for action_code in action_code_list: existing_code = tbh_action_code_manager.verify_action_code( action_code ) if not existing_code: error_string = "Not an existing code:", action_code #raise Exception( error_string ) modified_action_code = ['null', ['yes_record']] else: modified_action_code = action_code modified_action_code_list.append( action_code ) #tbh_action_code_manager.add_action_code( action_code ) # print action_code # compute the observation function # For POMDP dialog manager, pass to a POMDP belief update algorithm pomdp_dict = tbh_pomdp_manager.process_result_confidence( \ self.pomdp, action_code_list, self.machine_action, \ self.Q, use_confidence_scores=False, skype_contacts_dict=self.skype_contacts_dict ) # This is for the next belief update self.machine_action = pomdp_dict['new_machine_action'] self.belief_for_policy = pomdp_dict['belief_for_policy'] print "\n=================" print "CURRENT POLICY" print self.belief_for_policy print "=================" #print "==========BELIEF==========" #print self.belief_for_policy #print "==========================" # Update the belief as appropriate self.pomdp.belief = pomdp_dict['final_belief'] return pomdp_dict