def convertStateAction(self, state, action): ''' :param belief: :type belief: :param belief: :type belief: ''' cState = state cAction = action if not isinstance(state, GPState): if isinstance(state, Policy.TerminalState): cState = gp.TerminalGPState() else: cState = self.get_State(state) if not isinstance(action, gp.GPAction): if isinstance(action, Policy.TerminalAction): cAction = gp.TerminalGPAction() else: cAction = self.get_Action(action) return cState, cAction
def _load_domains_policy(self, domainString=None): ''' Loads and instantiates the respective policy as configured in config file. The new object is added to the internal dictionary. Default is 'hdc'. .. Note: To dynamically load a class, the __init__() must take one argument: domainString, learning :param domainString: the domain the policy will work on. Default is None. :type domainString: str :returns: the new policy object ''' # 1. get type: policy_type = 'hdc' # domain+resource independent default in_policy_file = '' out_policy_file = '' learning = False useconfreq = False if not Settings.config.has_section('policy_' + domainString): if not Settings.config.has_section('policy'): logger.warning("No policy section specified for domain: " + domainString + " - defaulting to HDC") else: logger.info("No policy section specified for domain: " + domainString + " - using values from 'policy' section") if Settings.config.has_option('policy', 'policytype'): policy_type = Settings.config.get('policy', 'policytype') if Settings.config.has_option('policy', 'learning'): learning = Settings.config.getboolean('policy', 'learning') if Settings.config.has_option('policy', 'useconfreq'): useconfreq = Settings.config.getboolean('policy', 'useconfreq') if Settings.config.has_option('policy', 'inpolicyfile'): in_policy_file = Settings.config.get('policy', 'inpolicyfile') if Settings.config.has_option('policy', 'outpolicyfile'): out_policy_file = Settings.config.get('policy', 'outpolicyfile') if Settings.config.has_option('policy_' + domainString, 'policytype'): policy_type = Settings.config.get('policy_' + domainString, 'policytype') if Settings.config.has_option('policy_' + domainString, 'learning'): learning = Settings.config.getboolean('policy_' + domainString, 'learning') if Settings.config.has_option('policy_' + domainString, 'useconfreq'): useconfreq = Settings.config.getboolean('policy_' + domainString, 'useconfreq') if Settings.config.has_option('policy_' + domainString, 'inpolicyfile'): in_policy_file = Settings.config.get('policy_' + domainString, 'inpolicyfile') if Settings.config.has_option('policy_' + domainString, 'outpolicyfile'): out_policy_file = Settings.config.get('policy_' + domainString, 'outpolicyfile') if domainString in self.SPECIAL_DOMAINS: if domainString == 'topicmanager': policy_type = 'hdc_topicmanager' from policy import HDCTopicManager self.domainPolicies[ domainString] = HDCTopicManager.HDCTopicManagerPolicy() elif domainString == "wikipedia": policy_type = 'hdc_wikipedia' import WikipediaTools self.domainPolicies[domainString] = WikipediaTools.WikipediaDM( ) else: if policy_type == 'hdc': from policy import HDCPolicy self.domainPolicies[domainString] = HDCPolicy.HDCPolicy( domainString) elif policy_type == 'gp': from policy import GPPolicy self.domainPolicies[domainString] = GPPolicy.GPPolicy( domainString, learning, self.shared_params) elif policy_type == 'dipgp': from policy import DIPGPPolicy self.domainPolicies[domainString] = DIPGPPolicy.GPPolicy( domainString, learning, self.shared_params) elif policy_type == 'dqn': from policy import DQNPolicy self.domainPolicies[domainString] = DQNPolicy.DQNPolicy( in_policy_file, out_policy_file, domainString, learning) elif policy_type == 'a2c': from policy import A2CPolicy self.domainPolicies[domainString] = A2CPolicy.A2CPolicy( in_policy_file, out_policy_file, domainString, learning) elif policy_type == 'enac': from policy import ENACPolicy self.domainPolicies[domainString] = ENACPolicy.ENACPolicy( in_policy_file, out_policy_file, domainString, learning) elif policy_type == 'bdqn': from policy import BDQNPolicy self.domainPolicies[domainString] = BDQNPolicy.BDQNPolicy( in_policy_file, out_policy_file, domainString, learning) elif policy_type == 'acer': from policy import ACERPolicy self.domainPolicies[domainString] = ACERPolicy.ACERPolicy( in_policy_file, out_policy_file, domainString, learning) elif policy_type == 'tracer': from policy import TRACERPolicy self.domainPolicies[domainString] = TRACERPolicy.TRACERPolicy( in_policy_file, out_policy_file, domainString, learning) elif policy_type == 'concrete': from policy import ConcreteDQNPolicy self.domainPolicies[ domainString] = ConcreteDQNPolicy.ConcreteDQNPolicy( in_policy_file, out_policy_file, domainString, learning) elif policy_type == 'bootstrapped': from policy import BootstrappedDQNPolicy self.domainPolicies[ domainString] = BootstrappedDQNPolicy.BootstrappedDQNPolicy( in_policy_file, out_policy_file, domainString, learning) elif policy_type == 'dropout': from policy import DropoutDQNPolicy self.domainPolicies[ domainString] = DropoutDQNPolicy.DropoutDQNPolicy( in_policy_file, out_policy_file, domainString, learning) elif policy_type == 'feudal': from policy import FeudalPolicy self.domainPolicies[domainString] = FeudalPolicy.FeudalPolicy( in_policy_file, out_policy_file, domainString, learning) elif policy_type == 'feudalAC': from policy import FeudalACPolicy self.domainPolicies[ domainString] = FeudalACPolicy.FeudalACPolicy( in_policy_file, out_policy_file, domainString, learning) else: try: # try to view the config string as a complete module path to the class to be instantiated components = policy_type.split('.') packageString = '.'.join(components[:-1]) classString = components[-1] mod = __import__(packageString, fromlist=[classString]) klass = getattr(mod, classString) self.domainPolicies[domainString] = klass( domainString, learning) except ImportError as e: logger.error( 'Invalid policy type "{}" for domain "{}" raising error {}' .format(policy_type, domainString, e)) #------------------------------ # TODO - Not currently implemented as we aren't currently using these policy types # elif True: # exit('NOT IMPLEMENTED... see msg at this point in code') # elif policy_type == 'type': # from policy import TypePolicy # policy = TypePolicy.TypePolicy() # elif policy_type == 'select': # from policy import SelectPolicy # policy = SelectPolicy.SelectPolicy(use_confreq=useconfreq) # elif policy_type == 'nn': # from policy import NNPolicy # # TODO - further change here - train is now implmented in config file. below needs updating # policy = NNPolicy.NNPolicy(use_confreq=useconfreq, is_training=train) #------------------------------ return
def __init__(self, domainUtil): """ Constructor for Dialogue manager: has a belief state tracker and a policy. :param domainUtil: (instance) of :class:`DomainUtils` :return: """ configlist = ['policytype'] self.useconfreq = False self.actions = SummaryAction.SummaryAction(domainUtil) self.bcm = False self.curr_policy = -1 #TODO adding domainUtil instance to class - for conditional tracking -- may not really be required self.domainUtil = domainUtil # General [policy] config options. (just bcm at present, rest use a domain tag as well) if Settings.config.has_option('policy', 'bcm'): configlist.append('bcm') self.bcm = Settings.config.getboolean('policy', 'bcm') if not Settings.config.has_section('policy_' + domainUtil.domainString): logger.warning("No policy section specified for domain: " + domainUtil.domainString + " - defaulting to HDC") self.pol_type = 'hdc' self.learning = False if Settings.config.has_option('policy_' + domainUtil.domainString, 'learning'): configlist.append('learning') self.learning = Settings.config.getboolean( 'policy_' + domainUtil.domainString, 'learning') if Settings.config.has_option('policy_' + domainUtil.domainString, 'useconfreq'): configlist.append('useconfreq') self.useconfreq = Settings.config.getboolean( 'policy_' + domainUtil.domainString, 'useconfreq') if Settings.config.has_option('policy_' + domainUtil.domainString, "currpolicy"): configs.append('currpolicy') self.curr_policy = Settings.config.getint( 'policy_' + domainUtil.domainString, "currpolicy") in_policy_file = None if Settings.config.has_option('policy_' + domainUtil.domainString, 'inpolicyfile'): configlist.append('inpolicyfile') in_policy_file = Settings.config.get( 'policy_' + domainUtil.domainString, 'inpolicyfile') out_policy_file = None if Settings.config.has_option('policy_' + domainUtil.domainString, 'outpolicyfile'): configlist.append('outpolicyfile') out_policy_file = Settings.config.get( 'policy_' + domainUtil.domainString, 'outpolicyfile') if in_policy_file is None: self.pol_type = 'hdc' else: self.pol_type = "gp" if Settings.config.has_option('policy_' + domainUtil.domainString, 'policytype'): self.pol_type = Settings.config.get( 'policy_' + domainUtil.domainString, 'policytype') if self.pol_type == 'hdc': logger.warning( 'Policy file is given: %s, but policy type is set to hdc.') logger.warning( 'Ignoring the given policy file and using hdc policy.') if self.pol_type == 'hdc': from policy import HDCPolicy self.policy = HDCPolicy.HDCPolicy(use_confreq=self.useconfreq, domainUtil=domainUtil) elif self.pol_type == 'gp': from policy import GPPolicy if self.bcm: policy_files = DomainUtils.get_all_policies( ) # TODO - delete - deprecated -- policy_file.split(";") self.policies = [] for pf in policy_files: self.policies.append( GPPolicy.GPPolicy(pf, len(self.actions.action_names), self.actions.action_names)) else: self.policy = GPPolicy.GPPolicy(in_policy_file, out_policy_file, len(self.actions.action_names), self.actions.action_names, domainUtil, self.learning) elif self.pol_type == 'mcc': from policy import MCCPolicy self.policy = MCCPolicy.MCCPolicy(in_policy_file, out_policy_file, self.useconfreq, self.learning, domainUtil) #------------------------------ # TODO - following policies need to receive the DomainUtils instance that Policy.Policy() requires # --- Not currently implemented as we aren't currently using these policy types elif True: exit('NOT IMPLEMENTED... see msg at this point in code') elif self.pol_type == 'type': from policy import TypePolicy self.policy = TypePolicy.TypePolicy() elif self.pol_type == 'select': from policy import SelectPolicy self.policy = SelectPolicy.SelectPolicy( use_confreq=self.useconfreq) elif self.pol_type == 'nn': from policy import NNPolicy # TODO - further change here - train is now implmented in config file. below needs updating self.policy = NNPolicy.NNPolicy(use_confreq=self.useconfreq, is_training=train) else: logger.error('Invalid policy type: ' + self.pol_type) #------------------------------ if self.pol_type != 'gp' and self.pol_type != 'hdc' and self.pol_type != 'mcc': self.policy.load(policy_file) belief_type = 'baseline' # can alternatively use 'focus' as the default if Settings.config.has_option('policy_' + domainUtil.domainString, 'belieftype'): configlist.append('belieftype') belief_type = Settings.config.get( 'policy_' + domainUtil.domainString, 'belieftype') self.startwithhello = False if Settings.config.has_option('policy_' + domainUtil.domainString, 'startwithhello'): configlist.append('startwithhello') self.startwithhello = Settings.config.getboolean( 'policy_' + domainUtil.domainString, 'startwithhello') if Settings.config.has_section('policy_' + domainUtil.domainString): for opt in Settings.config.options('policy_' + domainUtil.domainString): if opt not in configlist and opt not in Settings.config.defaults( ): logger.error('Invalid config: ' + opt) if belief_type == 'focus': self.beliefs = BeliefTracker.FocusTracker(domainUtil) elif belief_type == 'baseline': self.beliefs = BeliefTracker.BaselineTracker(domainUtil) elif belief_type == 'rnn': self.beliefs = BeliefTracker.RNNTracker() else: logger.error('Invalid belief tracker: ' + belief_type)