示例#1
0
class UserSimulation(object):
    def __init__(self):
        self.config = GetConfig()
        assert (not self.config==None), 'Config file required'
        assert (self.config.has_option('LGus','LOGIN_PAGE')),'LGus section missing field LOGIN_PAGE'
        self.login_page = self.config.get('LGus','LOGIN_PAGE')
        assert (self.config.has_option('LGus','URL')),'LGus section missing field URL'
        self.url = self.config.get('LGus','URL')
        assert (self.config.has_option('LGus','ID')),'LGus section missing field ID'
        self.id = {'username':self.config.get('LGus','ID')}
        assert (self.config.has_option('LGus','PASSWD')),'LGus section missing field PASSWD'
        self.id['password'] = self.config.get('LGus','PASSWD')
        try:
            data = urllib.urlencode(self.id)
            req = urllib2.Request(self.login_page, data)
            cj = cookielib.CookieJar()
            self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
            response = self.opener.open(req)
            the_page = response.read()
#            print the_page
        except Exception, detail:
            print "Err ", detail
示例#2
0
class tokenizer(object):
	MY_ID = 'TOKENIZER'
	def __init__(self,mode=None):
		self.config = GetConfig()
		if mode:
			self.mode = mode
		else:
			if self.config.has_option(self.MY_ID,'mode'):
				self.mode = self.config.get(self.MY_ID,'mode')
			else:
				self.mode = 'NLTK'
		if self.mode == 'STANFORD':
			from nltk.tokenize.stanford import StanfordTokenizer as Tokenizer
			self.tokenizer = Tokenizer()
		elif self.mode == 'NLTK':
			pass
		elif self.mode == 'MINE':
			self.spacePunct = re.compile(ur'[`~!@#\$%\^&\*\(\)\[\]{}_\+\-=\|\\:;\"\'<>,\?/]')
			self.removePunct = re.compile(ur'\.')
		else:
			raise Exception('Error: tokenizer, Unknown mode %s!' %(self.mode))

	def tokenize(self, sent):
		if sent.endswith('-') or sent.endswith('~'):
			sent += ' '
		sent = sent.replace('~ ', ' ~ ')
		sent = sent.replace('- ', ' - ')
		if self.mode == 'STANFORD':
			tokens = self.tokenizer.tokenize(sent.strip())
		elif self.mode == 'NLTK':
			tokens = nltk.word_tokenize(sent.strip())
		elif self.mode == 'MINE':
			new_sent = sent.strip()
			new_sent = self.spacePunct.sub(' ', new_sent)
			new_sent = self.removePunct.sub('', new_sent)
			tokens = new_sent.split()
		p_sent = ' '.join(tokens)
		p_sent = p_sent.replace('% ', '%')
		p_sent = p_sent.replace('``', '\"')
		p_sent = p_sent.replace('\'\'', '\"')
		p_tokens = p_sent.split(' ')
		return p_tokens
示例#3
0
class Partition(object):
    '''
    Tracks a partition of listings.

    This class tracks a partition of listings.
    '''
    def __init__(self,existingPartition=None,fieldToSplit=None,value=None):
        '''
        Constructor, and copy constructor.

        If called as Partition(), returns a new root partition.

        If called as Partition(existingPartition,fieldToSplit,value), this creates
        a new child partition of existingPartition, split on fieldToSplit=value.  Does
        not modify existingPartition.

        This constructor is not meant to be called directly by application code.
        Application code should use the BeliefState wrapper.
        '''
        self.appLogger = logging.getLogger('Learning')
#        self.appLogger.info('Partition init')
        self.config = GetConfig()
        self.useLearnedUserModel = self.config.getboolean(MY_ID,'useLearnedUserModel')
        self.confirmUnlikelyDiscountFactor = self.config.getfloat(MY_ID,'confirmUnlikelyDiscountFactor')
        self.ignoreNonunderstandingFactor = self.config.getboolean(MY_ID,'ignoreNonunderstandingFactor')
        self.num_route = self.config.getint(MY_ID,'numberOfRoute')
        self.num_place = self.config.getint(MY_ID,'numberOfPlace')
        self.num_time = self.config.getint(MY_ID,'numberOfTime')
        self.offListBeliefUpdateMethod = self.config.get('PartitionDistribution','offListBeliefUpdateMethod')
        
        db = GetDB()
#        self.appLogger.info('Partition 1')
        if (existingPartition == None):
            #self.fieldList = db.GetFields()
            self.fieldList = ['route','departure_place','arrival_place','travel_time']
            self.fieldCount = len(self.fieldList)
            #self.totalCount = db.GetListingCount({})
            self.totalCount = self.num_route * self.num_place * self.num_place * self.num_time
            self.fields = {}
#            self.appLogger.info('Partition 2')
            for field in self.fieldList:
                self.fields[field] = _FieldEntry()
            self.count = self.totalCount
            self.prior = 1.0
            self.priorOfField = {'route':1.0,'departure_place':1.0,'arrival_place':1.0,'travel_time':1.0}
            self.countOfField = {'route':self.num_route,'departure_place':self.num_place,'arrival_place':self.num_place,'travel_time':self.num_time}
            
#            self.appLogger.info('Partition 3')
            if not self.useLearnedUserModel:
                umFields = ['request_nonUnderstandingProb',
                            'request_directAnswerProb',
                            'request_allOverCompleteProb',
                            'request_oogProb',
                            'request_irrelevantAnswerProb',
                            'confirm_directAnswerProb',
                            'confirm_nonUnderstandingProb',
                            'confirm_oogProb']
                assert (not self.config == None), 'Config file required (UserModel parameters)'
                self.umParams = {}
                for key in umFields:
                    assert (self.config.has_option('UserModel', key)),'UserModel section missing field %s' % (key)
                    self.umParams[key] = self.config.getfloat('UserModel',key)
                overCompleteActionCount = 0
                for i in range(1,self.fieldCount):
                    overCompleteActionCount += Combination(self.fieldCount-1,i)
                self.appLogger.info('fieldCount = %d; overCompleteActionCount = %d' % (self.fieldCount,overCompleteActionCount))
                self.umParams['request_overCompleteProb'] = \
                  1.0 * self.umParams['request_allOverCompleteProb'] / overCompleteActionCount
                self.umParams['open_answerProb'] = \
                  (1.0 - self.umParams['request_nonUnderstandingProb'] - self.umParams['request_oogProb']) / \
                  overCompleteActionCount
            else:
                modelPath = self.config.get('Global','modelPath')
#                self.appLogger.info('Partition 4')
                self.userModelPath = self.config.get(MY_ID,'userModelPath')
#                self.appLogger.info('Partition 5')
                self.userModel = pickle.load(open(os.path.join(modelPath,self.userModelPath),'rb'))
#                self.appLogger.info('Partition 6')
                if self.offListBeliefUpdateMethod == 'heuristicUsingPrior':
                    self.irrelevantUserActProb = self.config.getfloat(MY_ID,'irrelevantUserActProb_HeuristicUsingPrior')
                    self.minRelevantUserActProb = self.config.getfloat(MY_ID,'minRelevantUserActProb_HeuristicUsingPrior')
                elif self.offListBeliefUpdateMethod in ['plain','heuristicPossibleActions']:
                    self.irrelevantUserActProb = self.config.getfloat(MY_ID,'irrelevantUserActProb')
                    self.minRelevantUserActProb = self.config.getfloat(MY_ID,'minRelevantUserActProb')
                else:
                    raise RuntimeError,'Unknown offListBeliefUpdateMethod = %s'%self.offListBeliefUpdateMethod
#                self.appLogger.info('Partition 7')
        else:
            assert not fieldToSplit == None,'arg not defined'
            assert not value == None,'arg not defined'
            self.fieldList = existingPartition.fieldList
            self.fieldCount = existingPartition.fieldCount
            if not self.useLearnedUserModel:
                self.umParams = existingPartition.umParams
            else:
                self.userModel = existingPartition.userModel
                self.irrelevantUserActProb = existingPartition.irrelevantUserActProb
                self.minRelevantUserActProb = existingPartition.minRelevantUserActProb
            self.totalCount = existingPartition.totalCount
            self.countOfField = existingPartition.countOfField
            self.priorOfField = {}
            self.fields = {}
            self.count = 1
            for field in self.fieldList:
                if (field == fieldToSplit):
                    self.fields[field] = _FieldEntry(type='equals', equals=value)
                else:
                    self.fields[field] = existingPartition.fields[field].Copy()
                    
                if self.fields[field].type == 'equals':
                    self.count *= 1
                    self.priorOfField[field] = 1.0/self.countOfField[field]
#                elif field == 'route':
#                    self.count *= (self.num_route - len(self.fields[field].excludes.keys()))
#                elif field in ['departure_place','arrival_place']:
#                    self.count *= (self.num_place - len(self.fields[field].excludes.keys()))
#                elif field == 'travel_time':
#                    self.count *= (self.num_time - len(self.fields[field].excludes.keys()))
#                else:
#                    raise RuntimeError,'Invalid field %s'%field
                else:
                    self.count *= (self.countOfField[field] - len(self.fields[field].excludes.keys()))
                    self.priorOfField[field] = 1.0 - 1.0 * len(self.fields[field].excludes.keys())/self.countOfField[field]

            #self.count = db.GetListingCount(self.fields)
            self.prior = 1.0 * self.count / self.totalCount

    def Split(self,userAction):
        '''
        Attempts to split the partition on userAction.  Returns a list of zero
        or more child partitions, modifying this partition as appropriate.
        '''
        newPartitions = []
        if (userAction.type == 'non-understanding'):
                # silent doesn't split
            pass
        else:
            for field in userAction.content.keys():
                if (field == 'confirm'):
                    continue
                val = userAction.content[field]
                if (self.fields[field].type == 'equals'):
                    # Cant split this partition -- field already equals something
                    pass
                elif (val in self.fields[field].excludes):
                    # Cant split this partition -- field exludes this value already
                    pass
                else:
                    newPartition = Partition(existingPartition=self,fieldToSplit=field,value=val)
                    if (newPartition.count > 0):
                        self.fields[field].excludes[val] = True
                        self.count -= newPartition.count
                        self.prior = 1.0 * self.count / self.totalCount
                        self.priorOfField[field] = 1.0 - 1.0 * len(self.fields[field].excludes.keys())/self.countOfField[field]
                        newPartitions.append(newPartition)
        return newPartitions

    # This will only be called on a child with no children
    def Recombine(self,child):
        '''
        Attempts to recombine child partition with this (parent) partition.  If
        possible, does the recombination and returns True.  If not possible,
        makes no changes and returns False.
        '''
        fieldsToRecombine = []
        for field in self.fields:
            if (self.fields[field].type == 'excludes'):
                if (child.fields[field].type == 'equals'):
                # parent excludes, child equals
                    value = child.fields[field].equals
                    if (value in self.fields[field].excludes):
                        fieldsToRecombine.append((field,value))
                    else:
                        raise RuntimeError, 'Error: field %s: child equals %s but parent doesnt exclude it' % (field,value)
                else:
                    # parent excludes, child excludes
                    # ensure they exclude the same things
                    if (not len(self.fields[field].excludes) == len(child.fields[field].excludes)):
                        return False
                    for val in self.fields[field].excludes:
                        if (val not in child.fields[field].excludes):
                            return False
                    pass
            else:
                if (child.fields[field].type == 'equals'):
                    # parent equals, child equals (must be equal)
                    pass
                else:
                    raise RuntimeError,'Error: field %s: parent equals %s but child excludes this field' % (field,value)
        if (len(fieldsToRecombine) == 0):
            raise RuntimeError,'Error: parent and child are identical'
        if (len(fieldsToRecombine) > 1):
            raise RuntimeError,'Error: parent and child differ by more than 1 field: %s' % (fieldsToRecombine)
        self.count += child.count
        self.prior = 1.0 * self.count / self.totalCount
        del self.fields[fieldsToRecombine[0][0]].excludes[ fieldsToRecombine[0][1] ]
        return True

    def __str__(self):
        '''
        Renders this partition as a string.  Example:

          city x();state x();last x(WILLIAMS);first=JASON;count=386

        This is the partition of 386 listings which have first name
        JASON, and do NOT have last name WILLIAMS (located in any city
        and any state).
        '''
        s = ''
        if (len(self.fields) > 0):
            elems = []
            for conceptName in self.fieldList:
                if (self.fields[conceptName].type == 'equals') :
                    elems.append('%s=%s' % (conceptName,self.fields[conceptName].equals))
                elif (len(self.fields[conceptName].excludes) <= 2):
                    elems.append('%s x(%s)' % (conceptName,','.join(self.fields[conceptName].excludes.keys())))
                else:
                    elems.append('%s x([%d entries])' % (conceptName,len(self.fields[conceptName].excludes)))
            elems.append('count=%d' % (self.count))
            s = ';'.join(elems)
        else:
            s = "(all)"
        return s

    def _getClosestUserAct(self,userAction):
        if userAction.type == 'non-understanding':
            return 'non-understanding'
      
        acts = [['I:ap','I:bn','I:dp','I:tt'],\
                      ['I:ap','I:bn','I:dp'],\
                      ['I:ap','I:dp','I:tt'],\
                      ['I:bn','I:dp','I:tt'],\
                      ['I:ap','I:dp'],\
                      ['I:bn','I:tt'],\
                      ['I:bn'],\
                      ['I:dp'],\
                      ['I:ap'],\
                      ['I:tt'],\
                      ['yes'],\
                      ['no']]
        ua = []
        for field in userAction.content:
            if field == 'confirm':
                ua.append('yes' if userAction.content[field] == 'YES' else 'no')
            elif field == 'route':
                ua.append('I:bn')
            elif field == 'departure_place':
                ua.append('I:dp')
            elif field == 'arrival_place':
                ua.append('I:ap')
            elif field == 'travel_time':
                ua.append('I:tt')
        
        score = [float(len(set(act).intersection(set(ua))))/len(set(act).union(set(ua))) for act in acts] 
        closestUserAct = ','.join(acts[score.index(max(score))])
#        self.appLogger.info('Closest user action %s'%closestUserAct) 
        return closestUserAct

    def UserActionLikelihood(self, userAction, history, sysAction):
        '''
        Returns the probability of the user taking userAction given dialog
        history, sysAction, and that their goal is within this partition.
        '''
#        if (sysAction.type == 'ask'):
#            if (sysAction.force == 'request'):
#                if (userAction.type == 'non-understanding'):
#                    result = self.umParams['request_nonUnderstandingProb']
#                else:
#                    targetFieldIncludedFlag = False
#                    overCompleteFlag = False
#                    allFieldsMatchGoalFlag = True
#                    askedField = sysAction.content
#                    for field in userAction.content:
#                        if field == 'confirm':
#                            allFieldsMatchGoalFlag = False
#                            continue
#                        val = userAction.content[field]
#                        if (self.fields[field].type == 'equals' and self.fields[field].equals == val):
#                            if (field == askedField):
#                                targetFieldIncludedFlag = True
#                            else:
#                                overCompleteFlag = True
#                        else:
#                            allFieldsMatchGoalFlag = False
#                    if (not allFieldsMatchGoalFlag):
#                        # This action doesn't agree with this partition
#                        result = 0.0
#                    elif (askedField == 'all'):
#                        # A response to the open question
#                        result = self.umParams['open_answerProb']
#                    elif (not targetFieldIncludedFlag):
#                        # This action doesn't include the information that was asked for
#                        # This user model doesn't ever do this
#                        result = 0.0
#                    elif (overCompleteFlag):
#                        # This action include extra information - this happens
#                        # request_overCompleteProb amount of the time
#                        result = self.umParams['request_overCompleteProb']
#                    else:
#                        # This action just answers the question that was asked
#                        result = self.umParams['request_directAnswerProb']
#            elif (sysAction.force == 'confirm'):
#                if (userAction.type == 'non-understanding'):
#                    result = self.umParams['confirm_nonUnderstandingProb']
#                else:
#                    allFieldsMatchGoalFlag = True
#                    for field in sysAction.content:
#                        val = sysAction.content[field]
#                        if (self.fields[field].type == 'excludes' or not self.fields[field].equals == val):
#                            allFieldsMatchGoalFlag = False
#                    if (allFieldsMatchGoalFlag):
#                        if (userAction.content['confirm'] == 'YES'):
#                            result = self.umParams['confirm_directAnswerProb']
#                        else:
#                            result = 0.0
#                    else:
#                        if (userAction.content['confirm'] == 'NO'):
#                            result = self.umParams['confirm_directAnswerProb']
#                        else:
#                            result = 0.0
#            else:
#                raise RuntimeError, 'Dont know sysAction.force = %s' % (sysAction.force)
        if not self.useLearnedUserModel:
            result = 0.0
            if (sysAction.type == 'ask'):
                if (userAction.type == 'non-understanding'):
                    if (sysAction.force == 'confirm'):
                        result = self.umParams['confirm_nonUnderstandingProb']
                    else: 
                        result = self.umParams['request_nonUnderstandingProb']
                else:
                    targetFieldIncludedFlag = False
                    overCompleteFlag = False
                    allFieldsMatchGoalFlag = True
                    askedField = sysAction.content
                    for field in userAction.content:
                        if field == 'confirm':
                            if sysAction.force == 'request':
                                allFieldsMatchGoalFlag = False
                                continue
                            for field in sysAction.content:
                                val = sysAction.content[field]
                                if (self.fields[field].type == 'excludes' or not self.fields[field].equals == val):
                                    allFieldsMatchGoalFlag = False
                            if (allFieldsMatchGoalFlag):
                                if (userAction.content['confirm'] == 'YES'):
                                    result = self.umParams['confirm_directAnswerProb']
                                    targetFieldIncludedFlag = True
                                else:
                                    result = self.umParams['request_irrelevantAnswerProb']
                            else:
                                if (userAction.content['confirm'] == 'NO'):
                                    result = self.umParams['confirm_directAnswerProb']
                                    targetFieldIncludedFlag = True
                                else:
                                    result = self.umParams['request_irrelevantAnswerProb']
                        else:
                            val = userAction.content[field]
                            if (self.fields[field].type == 'equals' and self.fields[field].equals == val):
                                if (field == askedField):
                                    targetFieldIncludedFlag = True
                                else:
                                    overCompleteFlag = True
                            else:
                                allFieldsMatchGoalFlag = False
                    if (not allFieldsMatchGoalFlag):
                        # This action doesn't agree with this partition
                        result = self.umParams['request_irrelevantAnswerProb']
                    elif (askedField == 'all'):
                        # A response to the open question
                        result = self.umParams['open_answerProb']
                    elif (not targetFieldIncludedFlag):
                        # This action doesn't include the information that was asked for
                        # This user model doesn't ever do this
                        result = self.umParams['request_irrelevantAnswerProb']
                    elif (overCompleteFlag):
                        # This action include extra information - this happens
                        # request_overCompleteProb amount of the time
                        result = self.umParams['request_overCompleteProb']
                    else:
                        # This action just answers the question that was asked
                        result = result if result > 0 else self.umParams['request_directAnswerProb']
            else:
                raise RuntimeError, 'Dont know sysAction.type = %s' % (sysAction.type)
        else:
#            self.appLogger.info('Apply learned user model')
            if sysAction.type != 'ask':
                raise RuntimeError, 'Cannot handle sysAction %s'%str(sysAction)
            result = self.irrelevantUserActProb
            allFieldsMatchGoalFlag = True
            directAnswer = False
            if sysAction.force == 'confirm':
                askedField = sysAction.content.keys()[0]
                if userAction.type != 'non-understanding':
                    for ua_field in userAction.content:
                        self.appLogger.info('User action field: %s:%s'%(ua_field,userAction.content[ua_field]))
                        if ua_field == 'confirm' and userAction.content[ua_field] == 'YES':
                            val = sysAction.content[askedField]
                            if self.fields[askedField].type == 'excludes' or not self.fields[askedField].equals == val:
                                self.appLogger.info('Mismatched YES')
                                allFieldsMatchGoalFlag = False
                        elif ua_field == 'confirm' and userAction.content[ua_field] == 'NO':
                            val = sysAction.content[askedField]
                            if (self.fields[askedField].type == 'equals' and self.fields[askedField].equals == val) or\
                            (self.fields[askedField].type == 'excludes' and val not in self.fields[askedField].excludes):
                                self.appLogger.info('Mismatched NO')
                                allFieldsMatchGoalFlag = False
                        elif askedField == ua_field:
                            directAnswer = True
#                            val = sysAction.content[askedField]
#                            if self.fields[askedField].type != 'excludes' and \
#                            self.fields[askedField].equals == userAction.content[askedField]:
#                                self.appLogger.info('Matched %s'%userAction.content[askedField])
#                                allFieldsMatchGoalFlag = True
                            if self.fields[askedField].type == 'excludes' or \
                            self.fields[askedField].equals != userAction.content[askedField]:
                                self.appLogger.info('Mismatched %s'%userAction.content[askedField])
                                allFieldsMatchGoalFlag = False
                        else:
                            val = userAction.content[ua_field]
                            if self.fields[ua_field].type == 'excludes' or not self.fields[ua_field].equals == val:
                                if not ((ua_field == 'arrival_place' and 'departure_place' in userAction.content and \
                                userAction.content['departure_place'] == userAction.content['arrival_place'] and \
                                self.fields['departure_place'].type == 'equals' and \
                                self.fields['departure_place'].equals == userAction.content['departure_place']) or\
                                (ua_field == 'departure_place' and 'arrival_place' in userAction.content and \
                                userAction.content['departure_place'] == userAction.content['arrival_place'] and \
                                self.fields['arrival_place'].type == 'equals' and \
                                self.fields['arrival_place'].equals == userAction.content['arrival_place'])):
                                    self.appLogger.info('Mismatched %s in field %s'%(val,ua_field))
                                    allFieldsMatchGoalFlag = False
                elif self.ignoreNonunderstandingFactor:
                    allFieldsMatchGoalFlag = False
                if allFieldsMatchGoalFlag:
                    self.appLogger.info('All fields matched')
                    if (userAction.type != 'non-understanding' and 'confirm' in userAction.content and userAction.content['confirm'] == 'YES') or\
                    directAnswer:
                        result = self.userModel['C-o'][self._getClosestUserAct(userAction)]
                    else:
                        if userAction.type != 'non-understanding' and 'confirm' in userAction.content and directAnswer:
                            del userAction.content['confirm']
                        if 'departure_place' in userAction.content and 'arrival_place' in userAction.content and \
                        userAction.content['departure_place'] == userAction.content['arrival_place']:
                            tempUserAction = deepcopy(userAction)
                            del tempUserAction.content['arrival_place']
                            result = self.userModel['C-x'][self._getClosestUserAct(tempUserAction)]
                        else:
                            result = self.userModel['C-x'][self._getClosestUserAct(userAction)]
                    self.appLogger.info('User action likelihood %g'%result)
                    result = self.minRelevantUserActProb if result < self.minRelevantUserActProb else result
                    self.appLogger.info('Set minimum user action likelihood %g'%result)
            elif sysAction.force == 'request':
                askedField = sysAction.content
                if userAction.type != 'non-understanding':
                    for ua_field in userAction.content:
                        if ua_field != 'confirm':
                            val = userAction.content[ua_field]
                            if self.fields[ua_field].type == 'excludes' or not self.fields[ua_field].equals == val:
                                if not ((ua_field == 'arrival_place' and 'departure_place' in userAction.content and \
                                userAction.content['departure_place'] == userAction.content['arrival_place'] and \
                                self.fields['departure_place'].type == 'equals' and \
                                self.fields['departure_place'].equals == userAction.content['departure_place']) or\
                                (ua_field == 'departure_place' and 'arrival_place' in userAction.content and \
                                userAction.content['departure_place'] == userAction.content['arrival_place'] and \
                                self.fields['arrival_place'].type == 'equals' and \
                                self.fields['arrival_place'].equals == userAction.content['arrival_place'])):
                                    self.appLogger.info('Mismatched %s in field %s'%(val,ua_field))
                                    allFieldsMatchGoalFlag = False
                elif self.ignoreNonunderstandingFactor:
                    allFieldsMatchGoalFlag = False
                if allFieldsMatchGoalFlag:
                    if askedField == 'route':
#                        print self.userModel['R-bn']
                        result = self.userModel['R-bn'][self._getClosestUserAct(userAction)]
                    elif askedField == 'departure_place':
#                        print self.userModel['R-dp']
                        result = self.userModel['R-dp'][self._getClosestUserAct(userAction)]
                    elif askedField == 'arrival_place':
#                        print self.userModel['R-ap']
                        result = self.userModel['R-ap'][self._getClosestUserAct(userAction)]
                    elif askedField == 'travel_time':
#                        print self.userModel['R-tt']
                        if 'departure_place' in userAction.content and 'arrival_place' in userAction.content and \
                        userAction.content['departure_place'] == userAction.content['arrival_place']:
                            tempUserAction = deepcopy(userAction)
                            del tempUserAction.content['arrival_place']
                            result = self.userModel['R-tt'][self._getClosestUserAct(tempUserAction)]
                        else:
                            result = self.userModel['R-tt'][self._getClosestUserAct(userAction)]
                    elif askedField == 'all':
#                        print self.userModel['R-open']
                        if 'departure_place' in userAction.content and 'arrival_place' in userAction.content and \
                        userAction.content['departure_place'] == userAction.content['arrival_place']:
                            tempUserAction = deepcopy(userAction)
                            del tempUserAction.content['arrival_place']
                            result = self.userModel['R-open'][self._getClosestUserAct(tempUserAction)]
                        else:
                            result = self.userModel['R-open'][self._getClosestUserAct(userAction)]
                    self.appLogger.info('User action likelihood %g'%result)
                    result = self.minRelevantUserActProb if result < self.minRelevantUserActProb else result
                    self.appLogger.info('Set minimum user action likelihood %g'%result)
        return result
    
    def UserActionUnlikelihood(self, userAction, history, sysAction):
        '''
        Returns the probability of the user not taking userAction given dialog
        history, sysAction, and that their goal is within this partition.
        '''
        if sysAction.type != 'ask':
            raise RuntimeError, 'Dont know sysAction.type = %s' % (sysAction.type)

#        self.appLogger.info('Apply confirmUnlikelyDiscountFactor %f'%self.confirmUnlikelyDiscountFactor)
        if sysAction.force == 'request':
            result = self.prior
            reason = 'request'
        elif sysAction.force == 'confirm':
            result = self.confirmUnlikelyDiscountFactor * self.prior
            reason = 'confirm'
#        self.appLogger.info('UserActionUnlikelihood by (%s): %g'%(reason,result))
        return result
示例#4
0
文件: DB.py 项目: junion/LGrl
class DB(object):
    '''
    Wraps a sqlite3 database of listings.
    '''
    def __init__(self):
        '''
        Creates a DB instance.
        '''
        self.appLogger = logging.getLogger(MY_ID)
        self.config = GetConfig()
        self.dbStem = self.config.get(MY_ID,'dbStem')
        self.dbFile = '%s.sqlite' % (self.dbStem)
        self.dbHitCounter = 0
        self.conn = sqlite.connect(self.dbFile)
        self.conn.text_factory = str
        self.cur = self.conn.cursor()
        tableInfo = self._ExecuteSQL("PRAGMA table_info(%s)" % (_TABLE),'all')
        if (len(tableInfo)==0):
            raise RuntimeError,'Could not connect to DB %s' % (self.dbFile)
        self.fieldNames = []
        for colInfo in tableInfo:
            colName = colInfo[1]
            if (colName == 'rowid'):
                continue
            self.fieldNames.append(colName)
        self.appLogger.info('DB has fields: %s' % (self.fieldNames))
        self.rowCount = self._ExecuteSQLOneItem("SELECT count FROM %s WHERE value='all'" % (_TABLE_COUNTS))
        self.fieldSize = {}
        for field in self.fieldNames:
            self.fieldSize[field] = int(self._ExecuteSQLOneItem("SELECT count(*) FROM %s_%s" % (_TABLE_COUNTS,field)))
        self.appLogger.info('Loaded db with %d rows' % (self.rowCount))

    def GetRandomListing(self):
        '''
        Returns a random listing.
        '''
        listing = None
        while (listing == None):
            rowid = random.randint(1,self.rowCount)
            listing = self.GetListingByRowID(rowid)
        self.appLogger.info('listing=%s' % (listing))
        return listing

    def GetListingByRowID(self,rowid):
        '''
        Returns the listing at rowid (an integer)
        '''
        row = self._ExecuteSQL('SELECT %s FROM %s WHERE rowid=%d LIMIT 1' % (','.join(self.fieldNames),_TABLE,rowid))
        listing = {}
        for (i,field) in enumerate(self.fieldNames):
            listing[field] = row[i]
        return listing

    def GetListingsByQuery(self,query):
        '''
        Returns an array of all the listings that match query.  Each listing is
        a dict.
        '''
        where = self._BuildWhereClause(query)
        rows = self._ExecuteSQL('SELECT %s FROM %s WHERE %s' % (','.join(self.fieldNames),_TABLE,where),fetch='all')
        listings = []
        for row in rows:
            if (row == None):
                raise RuntimeError,'row == None'
            listing = {}
            for (i,field) in enumerate(self.fieldNames):
                listing[field] = row[i]
            listings.append(listing)
        return listings

    def GetListingCount(self,query):
        '''
        Returns the number of listings that match query.
        '''
        fields = []
        for field in query:
            if (query[field].type == 'excludes' and len(query[field].excludes)==0):
                continue
            else:
                fields.append(field)
        if (len(fields) == 0):
            count = self.rowCount
        elif (len(fields) == 1 and fields[0] in self.fieldNames):
            # use pre-computed count
            if (query[fields[0]].type == 'equals'):
                val = query[fields[0]].equals
                count = self._ExecuteSQLOneItem("SELECT count FROM %s_%s WHERE value='%s'" % (_TABLE_COUNTS,fields[0],val))
            else:
                excludes = ["'%s'" % (item) for item in query[fields[0]].excludes]
                minusCount = self._ExecuteSQLOneItem("SELECT SUM(count) FROM %s_%s WHERE value IN (%s)" % (_TABLE_COUNTS,fields[0],','.join(excludes)))
                plusCount = self.GetListingCount({})
                count = plusCount - minusCount
        else:
            # do normal count
            where = self._BuildWhereClause(query)
            count = self._ExecuteSQLOneItem('SELECT COUNT(*) FROM %s WHERE %s' % (_TABLE,where))
        return count

    def GetFieldSize(self,field):
        '''
        Returns the number of distinct values in field.
        '''
        result = int(self._ExecuteSQLOneItem("SELECT count(*) FROM %s_%s" % (_TABLE_COUNTS,field)))
        return result

    def GetFieldElementByIndex(self,field,rowid):
        '''
        Returns the rowid-th value of field, where rowid>=1 and
        rowid <= self.GetFieldSize(field).
        '''
        result = self._ExecuteSQLOneItem("SELECT value FROM %s_%s WHERE rowid=%d LIMIT 1" % (_TABLE_COUNTS,field,rowid))
        return result

    def GetFields(self):
        '''
        Returns the list of fields in the DB.
        '''
        return deepcopy(self.fieldNames)

    def GetDBStem(self):
        '''
        Returns the DB stem.  DB file names are of the form
        "dbStem.sqlite"; here the DB stem is "dbStem".
        '''
        return self.dbStem

    def GetDBFile(self):
        '''
        Returns the DB filename.  DB file names are of the form
        "dbStem.sqlite".
        '''
        return self.dbFile

    def RowIterator(self):
        '''
        Return an iterator over all the listings.  Each result
        is a dict.
        '''
        stmt = "SELECT rowid,%s FROM %s" % (','.join(self.fieldNames),_TABLE)
        self.appLogger.info('Query (RowIterator): %s [results omitted for space]' % (stmt))
        self.cur.execute(stmt)
        for row in self.cur:
            result = {}
            for (i,item) in enumerate(row):
                if (i==0):
                    result['rowid'] = int(item)
                else:
                    result[ self.fieldNames[i-1] ] = item
            yield result

    def _ExecuteSQL(self,stmt,fetch='oneRow',noneOK=False):
        self.cur.execute(stmt)
        self.dbHitCounter += 1
        if (fetch == 'all'):
            result = self.cur.fetchall()
        else:
            result = self.cur.fetchone()
        if (not noneOK and result == None):
            raise RuntimeError,'row == None'
        self.appLogger.info('Query: %s [%s]' % (stmt,result))
        return result

    def _ExecuteSQLOneItem(self,stmt):
        row = self._ExecuteSQL(stmt, fetch='oneRow')
        result = row[0]
        return result

    def _BuildWhereClause(self,query):
        whereItems = []
        for field in query:
            if (query[field].type == 'excludes' and len(query[field].excludes)==0):
                continue
            elif (query[field].type == 'equals'):
                whereItems.append("%s = '%s'" % (field,query[field].equals))
            else:
                if (len(query[field].excludes) == 1):
                    whereItems.append("%s != '%s'" % (field,query[field].excludes.keys()[0]))
                else:
                    excludeItems = ["'%s'" % (item) for item in query[field].excludes]
                    whereItems.append("%s NOT IN (%s)" % (field,','.join(excludeItems)))
        return ' AND '.join(whereItems)

    def RunTest(self,testSpec,N):
        '''
        Runs N tests of the DB using a test specified by testSpec

        testSpec is a dict like:

        spec = {
                'first' : 10,
                'last' : 10,
                'city' : 10,
                'state' : None,
                }

        where values indicate:

            None : equals a randomly sampled item
            0 = exludes nothing
            1 = excludes 1 value, etc.

        In each iteration, a random target row is sampled.  Then random values to exclude are
        sampled.  Then the query is run.

        Returns:

            (avRandTime,avQueryTime,longestQueryTime,avReturnedCallees)
        '''
        randomTime = 0.0
        queryTime = 0.0
        longestCountQueryTime = 0.0
        listingCount = 0
        i = 0
        while(i < N):
            startCPU = CPU()
            randomListing = self.GetRandomListing()
            endCPU = CPU()
            randomTime += (endCPU-startCPU)
            query = {}
            for field in testSpec:
                query[field] = _QueryClass()
                if (testSpec[field] == None):
                    query[field].type = 'equals'
                    query[field].equals = randomListing[field]
                else:
                    query[field].type = 'excludes'
                    indexes = random.sample(xrange(self.fieldSize[field]), testSpec[field])
                    excludeItems = dict(zip(["%s" % self.GetFieldElementByIndex(field,index+1) for index in indexes],[True] * testSpec[field]))
                    # excludeItems = dict(zip(["%s%d" % (field,index) for index in indexes],[True] * testSpec[field]))
                    query[field].excludes = excludeItems
            startCPU = CPU()
            count = self.GetListingCount(query)
            endCPU = CPU()
            queryTime += (endCPU-startCPU)
            if ((endCPU-startCPU) > longestCountQueryTime):
                longestCountQueryTime = (endCPU-startCPU)
            listingCount += count
            i += 1
        return (float(randomTime / N),float(queryTime / N),float(longestCountQueryTime),float(1.0 * listingCount / N))
示例#5
0
文件: Utils.py 项目: liangkai/DSTC4
class Tuple_Extractor(object):
    MY_ID = 'Tuple_Extractor'
    '''
    read a config file
    know which slot is enumerable and which is non-enumerable

    then it can extract tuple from Frame_Label
    '''
    def __init__(self, slot_config_file = None):
        '''
        slot_config_file tells while slot is enumerable and which is not
        '''
        self.config = GetConfig()
        self.appLogger = logging.getLogger(self.MY_ID)

        if not slot_config_file:
            self.appLogger.debug('Slot config file is not assigned, so use the default config file')
            slot_config_file = self.config.get(self.MY_ID,'slot_config_file')
            slot_config_file = os.path.join(os.path.dirname(__file__),'../config/', slot_config_file)
        self.appLogger.debug('Slot config file: %s' %(slot_config_file))

        input = codecs.open(slot_config_file, 'r', 'utf-8')
        self.slot_config = json.load(input)
        input.close()

    def enumerable(self, slot):
        if slot not in self.slot_config:
            self.appLogger.error('Error: Unknown slot: %s' %(slot))
            raise Exception('Error: Unknown slot: %s' %(slot))
        else:
            return self.slot_config[slot]

    def extract_tuple(self, frame_label):
        output_tuple = []
        for slot in frame_label:
            output_tuple.append('root:%s' %(slot))
            if self.enumerable(slot): 
                for value in frame_label[slot]:
                    output_tuple.append('%s:%s' %(slot, value))
        return list(set(output_tuple))

    def generate_frame(self, tuples, t_probs, mode = 'hr'):
        '''
        generate frame based on tuples
        there are two generate modes:
        high-precision mode: 'hp'
        high-recall mode: 'hr'
        '''
        if mode != 'hp' and mode != 'hr':
            self.appLogger.error('Error: Unknown generate mode: %s' %(mode))
            raise Exception('Error: Unknown generate mode: %s' %(mode))

        add_tuples = []
        for t in tuples:
            tokens = t.split(':')
            assert(len(tokens) == 2)
            add_tuples.append(tuple(tokens))

        probs = [p for p in t_probs]

        frame_label = {}

        while True:
            current_size = len(add_tuples)
            if current_size == 0:
                break
            remove_index = []
            for i, t in enumerate(add_tuples):
                if t[0] == 'root':
                    if t[1] not in frame_label:
                        frame_label[t[1]] = {'prob': probs[i], 'values':{}}
                    else:
                        if probs[i] > frame_label[t[1]]['prob']:
                            frame_label[t[1]]['prob'] = probs[i]
                    remove_index.append(i)
                else:
                    if t[0] in frame_label:
                        new_prob = probs[i]
                        if t[1] not in frame_label[t[0]]['values']:
                            frame_label[t[0]]['values'][t[1]] = new_prob
                        else:
                            if new_prob > frame_label[t[0]]['values'][t[1]]:
                                frame_label[t[0]]['values'][t[1]] = new_prob
                        remove_index.append(i)

            add_tuples = [t for i,t in enumerate(add_tuples) if i not in remove_index]
            probs = [p for i,p in enumerate(probs) if i not in remove_index]
            if len(add_tuples) == current_size:
                break
        if mode == 'hp':
            return frame_label
        else :
            for t, prob in zip(add_tuples, probs):
                if t[0] not in frame_label:
                    frame_label[t[0]] = {'prob': -1, 'values':{}}
                if t[1] not in frame_label[t[0]]['values']:
                    frame_label[t[0]]['values'][t[1]] = prob
                else:
                    if prob > frame_label[t[0]]['values'][t[1]]:
                        frame_label[t[0]]['values'][t[1]] = prob
            return frame_label
示例#6
0
class ASRResult:
    '''
    Represents an ASR result.

    Two constructors:

      ASRResult.FromWatson(watsonResult,grammar)
      ASRResult.Simulated(grammar,userActions,probs,isTerminal,correctPosition)

    '''
    MY_ID = 'ASRResult'
    def __init__(self):
        '''
        Not intended to be called directly.  Use one of the two
        constructors ASRResult.FromWatson(...) or
        ASRResult.Simulated(...).
        '''
        self.applogger = logging.getLogger(self.MY_ID)
        self.config = GetConfig()
        self.probTotal = 0.0
        self.correctPosition = None
#        self.watsonResult = None
        self.offListBeliefUpdateMethod = self.config.get('PartitionDistribution','offListBeliefUpdateMethod')
        self.numberOfRoute = self.config.getfloat('BeliefState','numberOfRoute')
        self.numberOfPlace = self.config.getfloat('BeliefState','numberOfPlace')
        self.numberOfTime = self.config.getfloat('BeliefState','numberOfTime')
        self.totalCount = self.numberOfRoute * self.numberOfPlace * self.numberOfPlace * self.numberOfTime
        self.fixedASRConfusionProbability = self.config.getfloat('BeliefState','fixedASRConfusionProbability')

#    @classmethod
#    def FromWatson(cls,watsonResult,grammar):
#        '''
#        Constructor for creating an ASRResult object from a real speech recognition
#        output.
#
#        watsonResult is JSON in the form:
#
#        {
#          'nbest': [
#            { ... },
#            { ... },
#            ...
#          ],
#          'nlu-sisr' : [
#            { 'interp' : {
#                'first' : 'JASON',
#                'last' : 'WILLIAMS'
#                ...
#               },
#            },
#            { 'interp' : {
#                'first' : 'JAMISON',
#                'last' : 'WILLIAMS'
#                ...
#               },
#            },
#            ...
#          ],
#        }
#
#        and grammar is a Grammar object.
#
#        Based on the features in the recognition result, probabilities are estimated
#        for each of the N-Best list entries.
#        '''
#        self = cls()
#        self.grammar = grammar
#        self.isTerminal = False
#        self.userActions = []
#        self.probs = []
#        self.watsonResult = watsonResult
#        db = GetDB()
#        self.fields = ['route','departure_place','arrival_place','travel_time']#db.GetFields()
#        self.fields.append('confirm')
#        if ('nlu-sisr' in watsonResult):
#            for result in watsonResult['nlu-sisr']:
#                content = {}
#                if ('interp' in result):
#                    for field in self.fields:
#                        if (field in result['interp']):
#                            content[field] = result['interp'][field]
#                if (len(content)>0):
#                    self.userActions.append(UserAction('ig',content))
#        if (len(self.userActions) == 0):
#            return self
#        fullGrammarName = self.grammar.GetFullName()
#        fullSectionName = '%s_%s' % (self.MY_ID,fullGrammarName)
#        wildcardSectionName = '%s_*' % (self.MY_ID)
#        if (self.config.has_section(fullSectionName)):
#            sectionName = fullSectionName
#        elif (self.config.has_section(wildcardSectionName)):
#            sectionName = wildcardSectionName
#        else:
#            raise RuntimeError,'Configuration file has neither %s nor %s defined' % (fullSectionName,wildcardSectionName)
#        self.params = ConfigSectionToDict(self.config,sectionName)
#        self.applogger.debug('Params = %s' % (self.params))
#        turn = { 'recoResults': watsonResult, }
#        self.features = [1]
##        asrFeatures = ExtractFeatures(turn)
#        asrFeatures = {}
#        if (None in asrFeatures):
#            self.userActions = []
#            return
#        self.features.extend(asrFeatures)
#        partial = {}
#        if (len(self.userActions) == 1):
#            types = ['correct','offList']
#        else:
#            types = ['correct','onList','offList']
#        for type in types:
#            exponent = 0.0
#            for (i,feature) in enumerate(self.features):
#                exponent += feature * self.params['regression'][type][str(i)]
#            partial[type] = math.exp(exponent)
#        rawProbs = {}
#        sum = 0.0
#        for type in types:
#            sum += partial[type]
#        for type in types:
#            rawProbs[type] = partial[type] / sum
#        self.probs = [ rawProbs['correct'] ]
#        N = len(self.userActions)
#        alpha = self.params['onListFraction']['alpha']
#        beta = self.params['onListFraction']['beta']
#        for n in range(1,len(self.userActions)):
#            bucketLeftEdge = 1.0*(n-1)/N
#            bucketRightEdge = 1.0*n/N
#            betaRight = lbetai(alpha,beta,bucketRightEdge) / lbetai(alpha,beta,1.0)
#            betaLeft = lbetai(alpha,beta,bucketLeftEdge) / lbetai(alpha,beta,1.0)
#            betaPart = betaRight - betaLeft
#            self.probs.append( 1.0 * rawProbs['onList'] * betaPart )
#        self.probTotal = 0.0
#        for prob in self.probs:
#            self.probTotal += prob
#        assert (self.probTotal <= 1.0),'Total probability exceeds 1.0: %f' % (self.probTotal)
#        return self

    @classmethod
    def FromHelios(cls,userActions,probs,isTerminal=False,correctPosition=None):
        '''
        Creates an ASRResult object for use in a simulated environment.

        grammar is a Grammar object.

        userActions is a list of UserAction objects on the N-Best list.  Up to one 'silent'
        userAction can be included.  Do not include an 'oog' action.

        probs is the list of probabilities indicating the ASR probabilities of
        each of the userActions.

        isTerminal indicates if the user hung up.  If not provided, defaults to False.

        correctPosition indicates the position of the correct N-Best list entry.
          None: unknown
          -1: not anywhere on the list
          0: first entry on the list
          1: second entry on the list, etc.
        if not provided, defaults to None
        '''
        self = cls()
        assert (len(userActions) == len(probs)),'In ASRResult, length of userActions (%d) not equal to length of probs (%d)' % (len(userActions),len(probs))
        for userAction in userActions:
            assert (not userAction.type == 'oog'),'userAction type for ASR result cannot be oog -- oog is implicit in left-over mass'
        self.userActions = userActions
        self.probs = probs
        for prob in self.probs:
            self.probTotal += prob
        assert (self.probTotal <= 1.0),'Total probability exceeds 1.0: %f' % (self.probTotal)
        return self

    @classmethod
    def Simulated(cls,grammar,userActions,probs,isTerminal=False,correctPosition=None):
        '''
        Creates an ASRResult object for use in a simulated environment.

        grammar is a Grammar object.

        userActions is a list of UserAction objects on the N-Best list.  Up to one 'silent'
        userAction can be included.  Do not include an 'oog' action.

        probs is the list of probabilities indicating the ASR probabilities of
        each of the userActions.

        isTerminal indicates if the user hung up.  If not provided, defaults to False.

        correctPosition indicates the position of the correct N-Best list entry.
          None: unknown
          -1: not anywhere on the list
          0: first entry on the list
          1: second entry on the list, etc.
        if not provided, defaults to None
        '''
        self = cls()
        assert (len(userActions) == len(probs)),'In ASRResult, length of userActions (%d) not equal to length of probs (%d)' % (len(userActions),len(probs))
        for userAction in userActions:
            assert (not userAction.type == 'oog'),'userAction type for ASR result cannot be oog -- oog is implicit in left-over mass'
        self.grammar = grammar
        self.userActions = userActions
        self.probs = probs
        self.isTerminal = isTerminal
        self.correctPosition=correctPosition
        for prob in self.probs:
            self.probTotal += prob
        assert (self.probTotal <= 1.0),'Total probability exceeds 1.0: %f' % (self.probTotal)
        return self

    def GetTopResult(self):
        '''
        Returns the top user action, or None if the N-Best list is empty.
        '''
        if (len(self.userActions) == 0):
            return None
        else:
            return self.userActions[0]

    def GetProbs(self):
        '''
        Returns an array with ASR probs of the N-Best list
        '''
        return deepcopy(self.probs)

    def __str__(self):
        s = self._GetTranscript(maxShow=5)
        return s

    def _GetTranscript(self,maxShow=1):
        items = []
        for i in range(min(maxShow,len(self.userActions))):
            items.append('%s (%f)' % (self.userActions[i],self.probs[i]))
        if (maxShow < len(self.userActions)):
            items[-1] += ' + %d more' % (len(self.userActions) - maxShow)
        items.append('[rest] (%f)' % (1.0 - self.probTotal))
        s = '\n'.join(items)
        return s

    def __iter__(self):
        '''
        Iterates over the N-Best list; for each entry, outputs a tuple:

          (userAction,prob,offListProb)

        where

          - userAction: userAction object for this entry
          - prob: ASR prob of this entry
          - offListProb: the ASR probability of a userAction which has not (yet)
            been observed on the N-Best list (including 'silence' and 'oog')

        For example, if the grammar cardinality is 11, and 3 entries have been observed
        on the N-Best list so far with probabilities 0.4, 0.2 and 0.1, then offListProb would
        be:

           Mass remaining / remaining number of unseen user actions
           (1.0 - (0.4 + 0.2 + 0.1)) / (11 + 2 - 3) = 0.03

        '''
        self.releasedProb = 0.0
        self.releasedActions = 0
        i = 0
        while (i < len(self.userActions)):
            userAction = self.userActions[i]
            prob = self.probs[i]
            self.releasedProb += prob
            self.releasedActions += 1
#            offListProb = 1.0 * (1.0 - self.releasedProb) / (self.grammar.cardinality + 2 - self.releasedActions)
#            offListProb = 1.0 * (1.0 - self.releasedProb) / (3000000 + 2 - self.releasedActions)
            if self.offListBeliefUpdateMethod in ['plain','heuristicUsingPrior']:
                if self.fixedASRConfusionProbability > 0:
                    offListProb = self.fixedASRConfusionProbability / self.totalCount
                else:
                    offListProb = 1.0 * (1.0 - self.releasedProb) / (self.totalCount + 2 - self.releasedActions)
            elif self.offListBeliefUpdateMethod == 'heuristicPossibleActions':
                if self.fixedASRConfusionProbability > 0:
                    offListProb = self.fixedASRConfusionProbability
                else:
                    offListProb = 1.0 - self.releasedProb
            else:
                raise RuntimeError,'Unknown offListBeliefUpdateMethod = %s'%self.offListBeliefUpdateMethod
            yield (userAction,prob,offListProb)
            i += 1