def act_on(self, domainInControl, state): ''' Provides the next system action based on the domain in control and the belief state. The belief state is mapped to an abstract representation which is used for all committee members. :param domainInControl: the domain unique identifier string of the domain in control :type domainInControl: str :param state: the belief state to act on :type state: :class:`~utils.DialogueState.DialogueState` :returns: the next system action ''' # 1. check if domains are booted # 2. calculate domain Qs etc from each committee member # 3. calculate committee Q and select act belief = state.getDomainState(domainInControl) domainPolicies = self.manager.domainPolicies if not isinstance(domainPolicies[domainInControl], CommitteeMember): logger.error( "Committee member is not of type CommitteeMember: {}".format( domainInControl)) # 1. Abstract the belief state (blief) based on domainInControl: abstracted_state = domainPolicies[domainInControl].get_State(belief) # 2. Determine the domains non-executable actions and then abstract them: nonExecutableActions = domainPolicies[ domainInControl].actions.getNonExecutable( belief, domainPolicies[domainInControl].lastSystemAction) nonExecutableActions = domainPolicies[ domainInControl].abstract_actions(nonExecutableActions) # 3. Get the statistics needed for BCM decision: # 3.1 - Q(b,*) for all abstract executable actions for each committee member domainQs = {} just_return_bye = False for dstring in self.members: if domainPolicies[dstring] is None: self.manager.bootup(dstring) if isinstance(domainPolicies[dstring], CommitteeMember): #method exists as type must be GPPolicy or derived padded_state = domainPolicies[dstring].get_State( abstracted_state) domain_QmeanVar = domainPolicies[ dstring].getMeanVar_for_executable_actions( belief, padded_state, nonExecutableActions) if not isinstance(domain_QmeanVar, dict): # have returned the bye action -- just_return_bye = True summaryAct = domain_QmeanVar break else: domainQs[dstring] = domain_QmeanVar else: logger.warning( 'Skipping policy committee member %s as policy is not a GP' % dstring) if not just_return_bye: # 3.2 - get domain priors over acts: priors = {} for abs_act in domainQs[domainInControl]: priors[abs_act] = domainPolicies[domainInControl].getPriorVar( belief, abs_act) # 4. form BCM decision # ----- get BCM abstract summary action abstractAct = self._bayes_committee_calculator( domainQs, priors, domainInControl, domainPolicies[domainInControl].learner._scale) logger.info('BCM: abstract action: %s' % abstractAct) # 5. Convert abstract action back to real action: summaryAct = domainPolicies[domainInControl].unabstract_action( abstractAct) logger.info('BCM: summary action: %s' % summaryAct) # 6. Finally convert to master action: systemAct = domainPolicies[domainInControl].actions.Convert( belief, summaryAct, domainPolicies[domainInControl].lastSystemAction) # 7. Bookkeeping: domainPolicies[domainInControl].lastSystemAction = systemAct domainPolicies[ domainInControl].summaryAct = summaryAct # TODO -check- not sure this is correct domainPolicies[domainInControl].prevbelief = belief domainPolicies[domainInControl].actToBeRecorded = abstractAct # 8. Multiagent learning if self.learning_method == "multiagent": abstract_GPAction = domainPolicies[domainInControl].get_Action( summaryAct) self.domainInControl = domainInControl self.multiagent_abstract_state = abstracted_state self.multiagent_abstract_action = abstract_GPAction _systemAct = DiaAct.DiaAct(systemAct) return _systemAct
def transform(self, sysAct): ''' Transforms the sysAct from a semantic utterance form to a text form using the rules in the generator. This function will run the sysAct through all variable rules and will choose the best one according to the number of matched act types, matched items and missing items. :param sysAct: input system action (semantic form). :type sysAct: str :returns: (str) natural language ''' input_utt = DiaAct.DiaAct(sysAct) # FIXME hack to transform system acts with slot op "!=" to "=" and add slot-value pair other=true which is needed by NLG rule base # assumption: "!=" only appears if there are no further alternatives, ie, inform(name=none, name!=place!, ...) negFound = False for item in input_utt.items: if item.op == "!=": item.op = u"=" negFound = True if negFound: otherTrue = dact.DactItem(u'other', u'=', u'true') input_utt.items.append(otherTrue) # Iterate over BasicTemplateRule rules. best_rule = None best = None best_matches = 0 best_type_match = 0 best_missing = 1000 best_non_term_map = None for rule in self.rules: logger.debug('Checking Rule %s' % str(rule)) out, matches, missing, type_match, non_term_map = rule.generate( input_utt) if type_match > 0: logger.debug( 'Checking Rule %s: type_match=%d, missing=%d, matches=%d, output=%s' % (str(rule), type_match, missing, matches, ' '.join(out))) # Pick up the best rule. choose_this = False if type_match > 0: if missing < best_missing: choose_this = True elif missing == best_missing: if type_match > best_type_match: choose_this = True elif type_match == best_type_match and matches > best_matches: choose_this = True if choose_this: best_rule = rule best = out best_missing = missing best_type_match = type_match best_matches = matches best_non_term_map = non_term_map if best_type_match == 1 and best_missing == 0 and best_matches == len( input_utt.items): break if best_rule is not None: if best_missing > 0: logger.warning( 'While transforming %s, there were missing items.' % sysAct) else: logger.debug('No rule used.') best = self.compute_ftn(best, best_non_term_map) return ' '.join(best)
def _getTurnReward(self, turnInfo): ''' Computes the turn reward regarding turnInfo. The default turn reward is -1 unless otherwise computed. :param turnInfo: parameters necessary for computing the turn reward, eg., system act or model of the simulated user. :type turnInfo: dict :return: int -- the turn reward. ''' # Immediate reward for each turn. reward = -self.penalise_all_turns if turnInfo is not None and isinstance(turnInfo, dict): if 'usermodel' in turnInfo and 'sys_act' in turnInfo: um = turnInfo['usermodel'] self.user_goal = um.goal.constraints # unpack input user model um. #prev_consts = um.prev_goal.constraints prev_consts = copy.deepcopy(um.goal.constraints) for item in prev_consts: if item.slot == 'name' and item.op == '=': item.val = 'dontcare' requests = um.goal.requests sys_act = DiaAct.DiaAct(turnInfo['sys_act']) user_act = um.lastUserAct # Check if the most recent venue satisfies constraints. name = sys_act.get_value('name', negate=False) lvr = self.last_venue_recomended if hasattr( self, 'last_venue_recomended') else 'not existing' if name not in ['none', None]: # Venue is recommended. #possible_entities = Ontology.global_ontology.entity_by_features(self.domainString, constraints=prev_consts) #is_valid_venue = name in [e['name'] for e in possible_entities] self.last_venue_recomended = name is_valid_venue = self._isValidVenue(name, prev_consts) if is_valid_venue: # Success except if the next user action is reqalts. if user_act.act != 'reqalts': logger.debug('Correct venue is recommended.') self.venue_recommended = True # Correct venue is recommended. else: logger.debug( 'Correct venue is recommended but the user has changed his mind.' ) else: # Previous venue did not match. logger.debug('Venue is not correct.') self.venue_recommended = False logger.debug( 'Goal constraints: {}'.format(prev_consts)) reward -= self.wrong_venue_penalty # If system inform(name=none) but it was not right decision based on wrong values. if name == 'none' and sys_act.has_conflicting_value( prev_consts): reward -= self.wrong_venue_penalty # Check if the system used slot values previously not mentioned for 'select' and 'confirm'. not_mentioned = False if sys_act.act in ['select', 'confirm']: for slot in Ontology.global_ontology.get_system_requestable_slots( self.domainString): values = set(sys_act.get_values(slot)) if len(values - self.mentioned_values[slot]) > 0: # System used values which are not previously mentioned. not_mentioned = True break if not_mentioned: reward -= self.not_mentioned_value_penalty # If the correct venue has been recommended and all requested slots are filled, # check if this dialogue is successful. if self.venue_recommended and None not in requests.values(): reward += self.reward_venue_recommended # Update mentioned values. self._update_mentioned_value(sys_act) self._update_mentioned_value(user_act) if 'sys_act' in turnInfo and self.using_tasks: self.DM_history.append(turnInfo['sys_act']) return reward
def read_from_stream(self, scanner): sin = '' while scanner.cur[1] != ';' and scanner.cur[0] != tokenize.ENDMARKER and scanner.cur[1] != ':': sin += scanner.cur[1] scanner.next() return DiaAct.DiaAct(sin)