Пример #1
0
    def convertStateAction(self, state, action):
        '''
        
        :param belief:
        :type belief:
        :param belief:
        :type belief:
        '''
        cState = state
        cAction = action

        if not isinstance(state, GPState):
            if isinstance(state, Policy.TerminalState):
                cState = gp.TerminalGPState()
            else:
                cState = self.get_State(state)

        if not isinstance(action, gp.GPAction):
            if isinstance(action, Policy.TerminalAction):
                cAction = gp.TerminalGPAction()
            else:
                cAction = self.get_Action(action)

        return cState, cAction
Пример #2
0
    def _load_domains_policy(self, domainString=None):
        '''
        Loads and instantiates the respective policy as configured in config file. The new object is added to the internal
        dictionary. 
        
        Default is 'hdc'.
        
        .. Note:
            To dynamically load a class, the __init__() must take one argument: domainString, learning
        
        :param domainString: the domain the policy will work on. Default is None.
        :type domainString: str
        :returns: the new policy object
        '''

        # 1. get type:
        policy_type = 'hdc'  # domain+resource independent default
        in_policy_file = ''
        out_policy_file = ''
        learning = False
        useconfreq = False

        if not Settings.config.has_section('policy_' + domainString):
            if not Settings.config.has_section('policy'):
                logger.warning("No policy section specified for domain: " +
                               domainString + " - defaulting to HDC")
            else:
                logger.info("No policy section specified for domain: " +
                            domainString +
                            " - using values from 'policy' section")
        if Settings.config.has_option('policy', 'policytype'):
            policy_type = Settings.config.get('policy', 'policytype')
        if Settings.config.has_option('policy', 'learning'):
            learning = Settings.config.getboolean('policy', 'learning')
        if Settings.config.has_option('policy', 'useconfreq'):
            useconfreq = Settings.config.getboolean('policy', 'useconfreq')
        if Settings.config.has_option('policy', 'inpolicyfile'):
            in_policy_file = Settings.config.get('policy', 'inpolicyfile')
        if Settings.config.has_option('policy', 'outpolicyfile'):
            out_policy_file = Settings.config.get('policy', 'outpolicyfile')

        if Settings.config.has_option('policy_' + domainString, 'policytype'):
            policy_type = Settings.config.get('policy_' + domainString,
                                              'policytype')
        if Settings.config.has_option('policy_' + domainString, 'learning'):
            learning = Settings.config.getboolean('policy_' + domainString,
                                                  'learning')
        if Settings.config.has_option('policy_' + domainString, 'useconfreq'):
            useconfreq = Settings.config.getboolean('policy_' + domainString,
                                                    'useconfreq')
        if Settings.config.has_option('policy_' + domainString,
                                      'inpolicyfile'):
            in_policy_file = Settings.config.get('policy_' + domainString,
                                                 'inpolicyfile')
        if Settings.config.has_option('policy_' + domainString,
                                      'outpolicyfile'):
            out_policy_file = Settings.config.get('policy_' + domainString,
                                                  'outpolicyfile')

        if domainString in self.SPECIAL_DOMAINS:
            if domainString == 'topicmanager':
                policy_type = 'hdc_topicmanager'
                from policy import HDCTopicManager
                self.domainPolicies[
                    domainString] = HDCTopicManager.HDCTopicManagerPolicy()
            elif domainString == "wikipedia":
                policy_type = 'hdc_wikipedia'
                import WikipediaTools
                self.domainPolicies[domainString] = WikipediaTools.WikipediaDM(
                )
        else:
            if policy_type == 'hdc':
                from policy import HDCPolicy
                self.domainPolicies[domainString] = HDCPolicy.HDCPolicy(
                    domainString)
            elif policy_type == 'gp':
                from policy import GPPolicy
                self.domainPolicies[domainString] = GPPolicy.GPPolicy(
                    domainString, learning, self.shared_params)
            elif policy_type == 'dipgp':
                from policy import DIPGPPolicy
                self.domainPolicies[domainString] = DIPGPPolicy.GPPolicy(
                    domainString, learning, self.shared_params)
            elif policy_type == 'dqn':
                from policy import DQNPolicy
                self.domainPolicies[domainString] = DQNPolicy.DQNPolicy(
                    in_policy_file, out_policy_file, domainString, learning)
            elif policy_type == 'a2c':
                from policy import A2CPolicy
                self.domainPolicies[domainString] = A2CPolicy.A2CPolicy(
                    in_policy_file, out_policy_file, domainString, learning)
            elif policy_type == 'enac':
                from policy import ENACPolicy
                self.domainPolicies[domainString] = ENACPolicy.ENACPolicy(
                    in_policy_file, out_policy_file, domainString, learning)
            elif policy_type == 'bdqn':
                from policy import BDQNPolicy
                self.domainPolicies[domainString] = BDQNPolicy.BDQNPolicy(
                    in_policy_file, out_policy_file, domainString, learning)
            elif policy_type == 'acer':
                from policy import ACERPolicy
                self.domainPolicies[domainString] = ACERPolicy.ACERPolicy(
                    in_policy_file, out_policy_file, domainString, learning)
            elif policy_type == 'tracer':
                from policy import TRACERPolicy
                self.domainPolicies[domainString] = TRACERPolicy.TRACERPolicy(
                    in_policy_file, out_policy_file, domainString, learning)
            elif policy_type == 'concrete':
                from policy import ConcreteDQNPolicy
                self.domainPolicies[
                    domainString] = ConcreteDQNPolicy.ConcreteDQNPolicy(
                        in_policy_file, out_policy_file, domainString,
                        learning)
            elif policy_type == 'bootstrapped':
                from policy import BootstrappedDQNPolicy
                self.domainPolicies[
                    domainString] = BootstrappedDQNPolicy.BootstrappedDQNPolicy(
                        in_policy_file, out_policy_file, domainString,
                        learning)
            elif policy_type == 'dropout':
                from policy import DropoutDQNPolicy
                self.domainPolicies[
                    domainString] = DropoutDQNPolicy.DropoutDQNPolicy(
                        in_policy_file, out_policy_file, domainString,
                        learning)
            elif policy_type == 'feudal':
                from policy import FeudalPolicy
                self.domainPolicies[domainString] = FeudalPolicy.FeudalPolicy(
                    in_policy_file, out_policy_file, domainString, learning)
            elif policy_type == 'feudalAC':
                from policy import FeudalACPolicy
                self.domainPolicies[
                    domainString] = FeudalACPolicy.FeudalACPolicy(
                        in_policy_file, out_policy_file, domainString,
                        learning)
            else:
                try:
                    # try to view the config string as a complete module path to the class to be instantiated
                    components = policy_type.split('.')
                    packageString = '.'.join(components[:-1])
                    classString = components[-1]
                    mod = __import__(packageString, fromlist=[classString])
                    klass = getattr(mod, classString)
                    self.domainPolicies[domainString] = klass(
                        domainString, learning)
                except ImportError as e:
                    logger.error(
                        'Invalid policy type "{}" for domain "{}" raising error {}'
                        .format(policy_type, domainString, e))

            #------------------------------
            # TODO - Not currently implemented as we aren't currently using these policy types
#             elif True:
#                 exit('NOT IMPLEMENTED... see msg at this point in code')
#             elif policy_type == 'type':
#                 from policy import TypePolicy
#                 policy = TypePolicy.TypePolicy()
#             elif policy_type == 'select':
#                 from policy import SelectPolicy
#                 policy = SelectPolicy.SelectPolicy(use_confreq=useconfreq)
#             elif policy_type == 'nn':
#                 from policy import NNPolicy
#                 # TODO - further change here - train is now implmented in config file. below needs updating
#                 policy = NNPolicy.NNPolicy(use_confreq=useconfreq, is_training=train)
#------------------------------
        return
Пример #3
0
    def __init__(self, domainUtil):
        """
        Constructor for Dialogue manager: has a belief state tracker and a policy.
        :param domainUtil: (instance) of :class:`DomainUtils`
        :return:
        """
        configlist = ['policytype']
        self.useconfreq = False
        self.actions = SummaryAction.SummaryAction(domainUtil)
        self.bcm = False
        self.curr_policy = -1
        #TODO adding domainUtil instance to class - for conditional tracking -- may not really be required
        self.domainUtil = domainUtil

        # General [policy] config options. (just bcm at present, rest use a domain tag as well)
        if Settings.config.has_option('policy', 'bcm'):
            configlist.append('bcm')
            self.bcm = Settings.config.getboolean('policy', 'bcm')

        if not Settings.config.has_section('policy_' +
                                           domainUtil.domainString):
            logger.warning("No policy section specified for domain: " +
                           domainUtil.domainString + " - defaulting to HDC")
            self.pol_type = 'hdc'

        self.learning = False
        if Settings.config.has_option('policy_' + domainUtil.domainString,
                                      'learning'):
            configlist.append('learning')
            self.learning = Settings.config.getboolean(
                'policy_' + domainUtil.domainString, 'learning')
        if Settings.config.has_option('policy_' + domainUtil.domainString,
                                      'useconfreq'):
            configlist.append('useconfreq')
            self.useconfreq = Settings.config.getboolean(
                'policy_' + domainUtil.domainString, 'useconfreq')
        if Settings.config.has_option('policy_' + domainUtil.domainString,
                                      "currpolicy"):
            configs.append('currpolicy')
            self.curr_policy = Settings.config.getint(
                'policy_' + domainUtil.domainString, "currpolicy")

        in_policy_file = None
        if Settings.config.has_option('policy_' + domainUtil.domainString,
                                      'inpolicyfile'):
            configlist.append('inpolicyfile')
            in_policy_file = Settings.config.get(
                'policy_' + domainUtil.domainString, 'inpolicyfile')

        out_policy_file = None
        if Settings.config.has_option('policy_' + domainUtil.domainString,
                                      'outpolicyfile'):
            configlist.append('outpolicyfile')
            out_policy_file = Settings.config.get(
                'policy_' + domainUtil.domainString, 'outpolicyfile')

        if in_policy_file is None:
            self.pol_type = 'hdc'
        else:
            self.pol_type = "gp"
            if Settings.config.has_option('policy_' + domainUtil.domainString,
                                          'policytype'):
                self.pol_type = Settings.config.get(
                    'policy_' + domainUtil.domainString, 'policytype')
            if self.pol_type == 'hdc':
                logger.warning(
                    'Policy file is given: %s, but policy type is set to hdc.')
                logger.warning(
                    'Ignoring the given policy file and using hdc policy.')

        if self.pol_type == 'hdc':
            from policy import HDCPolicy
            self.policy = HDCPolicy.HDCPolicy(use_confreq=self.useconfreq,
                                              domainUtil=domainUtil)
        elif self.pol_type == 'gp':
            from policy import GPPolicy
            if self.bcm:
                policy_files = DomainUtils.get_all_policies(
                )  # TODO - delete - deprecated -- policy_file.split(";")
                self.policies = []
                for pf in policy_files:
                    self.policies.append(
                        GPPolicy.GPPolicy(pf, len(self.actions.action_names),
                                          self.actions.action_names))
            else:
                self.policy = GPPolicy.GPPolicy(in_policy_file,
                                                out_policy_file,
                                                len(self.actions.action_names),
                                                self.actions.action_names,
                                                domainUtil, self.learning)
        elif self.pol_type == 'mcc':
            from policy import MCCPolicy
            self.policy = MCCPolicy.MCCPolicy(in_policy_file, out_policy_file,
                                              self.useconfreq, self.learning,
                                              domainUtil)

        #------------------------------
        # TODO - following policies need to receive the DomainUtils instance that Policy.Policy() requires
        # --- Not currently implemented as we aren't currently using these policy types
        elif True:
            exit('NOT IMPLEMENTED... see msg at this point in code')
        elif self.pol_type == 'type':
            from policy import TypePolicy
            self.policy = TypePolicy.TypePolicy()
        elif self.pol_type == 'select':
            from policy import SelectPolicy
            self.policy = SelectPolicy.SelectPolicy(
                use_confreq=self.useconfreq)
        elif self.pol_type == 'nn':
            from policy import NNPolicy
            # TODO - further change here - train is now implmented in config file. below needs updating
            self.policy = NNPolicy.NNPolicy(use_confreq=self.useconfreq,
                                            is_training=train)
        else:
            logger.error('Invalid policy type: ' + self.pol_type)
        #------------------------------

        if self.pol_type != 'gp' and self.pol_type != 'hdc' and self.pol_type != 'mcc':
            self.policy.load(policy_file)

        belief_type = 'baseline'  # can alternatively use 'focus' as the default
        if Settings.config.has_option('policy_' + domainUtil.domainString,
                                      'belieftype'):
            configlist.append('belieftype')
            belief_type = Settings.config.get(
                'policy_' + domainUtil.domainString, 'belieftype')

        self.startwithhello = False
        if Settings.config.has_option('policy_' + domainUtil.domainString,
                                      'startwithhello'):
            configlist.append('startwithhello')
            self.startwithhello = Settings.config.getboolean(
                'policy_' + domainUtil.domainString, 'startwithhello')

        if Settings.config.has_section('policy_' + domainUtil.domainString):
            for opt in Settings.config.options('policy_' +
                                               domainUtil.domainString):
                if opt not in configlist and opt not in Settings.config.defaults(
                ):
                    logger.error('Invalid config: ' + opt)

        if belief_type == 'focus':
            self.beliefs = BeliefTracker.FocusTracker(domainUtil)
        elif belief_type == 'baseline':
            self.beliefs = BeliefTracker.BaselineTracker(domainUtil)
        elif belief_type == 'rnn':
            self.beliefs = BeliefTracker.RNNTracker()
        else:
            logger.error('Invalid belief tracker: ' + belief_type)