class UserSimulation(object): def __init__(self): self.config = GetConfig() assert (not self.config==None), 'Config file required' assert (self.config.has_option('LGus','LOGIN_PAGE')),'LGus section missing field LOGIN_PAGE' self.login_page = self.config.get('LGus','LOGIN_PAGE') assert (self.config.has_option('LGus','URL')),'LGus section missing field URL' self.url = self.config.get('LGus','URL') assert (self.config.has_option('LGus','ID')),'LGus section missing field ID' self.id = {'username':self.config.get('LGus','ID')} assert (self.config.has_option('LGus','PASSWD')),'LGus section missing field PASSWD' self.id['password'] = self.config.get('LGus','PASSWD') try: data = urllib.urlencode(self.id) req = urllib2.Request(self.login_page, data) cj = cookielib.CookieJar() self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) response = self.opener.open(req) the_page = response.read() # print the_page except Exception, detail: print "Err ", detail
class tokenizer(object): MY_ID = 'TOKENIZER' def __init__(self,mode=None): self.config = GetConfig() if mode: self.mode = mode else: if self.config.has_option(self.MY_ID,'mode'): self.mode = self.config.get(self.MY_ID,'mode') else: self.mode = 'NLTK' if self.mode == 'STANFORD': from nltk.tokenize.stanford import StanfordTokenizer as Tokenizer self.tokenizer = Tokenizer() elif self.mode == 'NLTK': pass elif self.mode == 'MINE': self.spacePunct = re.compile(ur'[`~!@#\$%\^&\*\(\)\[\]{}_\+\-=\|\\:;\"\'<>,\?/]') self.removePunct = re.compile(ur'\.') else: raise Exception('Error: tokenizer, Unknown mode %s!' %(self.mode)) def tokenize(self, sent): if sent.endswith('-') or sent.endswith('~'): sent += ' ' sent = sent.replace('~ ', ' ~ ') sent = sent.replace('- ', ' - ') if self.mode == 'STANFORD': tokens = self.tokenizer.tokenize(sent.strip()) elif self.mode == 'NLTK': tokens = nltk.word_tokenize(sent.strip()) elif self.mode == 'MINE': new_sent = sent.strip() new_sent = self.spacePunct.sub(' ', new_sent) new_sent = self.removePunct.sub('', new_sent) tokens = new_sent.split() p_sent = ' '.join(tokens) p_sent = p_sent.replace('% ', '%') p_sent = p_sent.replace('``', '\"') p_sent = p_sent.replace('\'\'', '\"') p_tokens = p_sent.split(' ') return p_tokens
class Partition(object): ''' Tracks a partition of listings. This class tracks a partition of listings. ''' def __init__(self,existingPartition=None,fieldToSplit=None,value=None): ''' Constructor, and copy constructor. If called as Partition(), returns a new root partition. If called as Partition(existingPartition,fieldToSplit,value), this creates a new child partition of existingPartition, split on fieldToSplit=value. Does not modify existingPartition. This constructor is not meant to be called directly by application code. Application code should use the BeliefState wrapper. ''' self.appLogger = logging.getLogger('Learning') # self.appLogger.info('Partition init') self.config = GetConfig() self.useLearnedUserModel = self.config.getboolean(MY_ID,'useLearnedUserModel') self.confirmUnlikelyDiscountFactor = self.config.getfloat(MY_ID,'confirmUnlikelyDiscountFactor') self.ignoreNonunderstandingFactor = self.config.getboolean(MY_ID,'ignoreNonunderstandingFactor') self.num_route = self.config.getint(MY_ID,'numberOfRoute') self.num_place = self.config.getint(MY_ID,'numberOfPlace') self.num_time = self.config.getint(MY_ID,'numberOfTime') self.offListBeliefUpdateMethod = self.config.get('PartitionDistribution','offListBeliefUpdateMethod') db = GetDB() # self.appLogger.info('Partition 1') if (existingPartition == None): #self.fieldList = db.GetFields() self.fieldList = ['route','departure_place','arrival_place','travel_time'] self.fieldCount = len(self.fieldList) #self.totalCount = db.GetListingCount({}) self.totalCount = self.num_route * self.num_place * self.num_place * self.num_time self.fields = {} # self.appLogger.info('Partition 2') for field in self.fieldList: self.fields[field] = _FieldEntry() self.count = self.totalCount self.prior = 1.0 self.priorOfField = {'route':1.0,'departure_place':1.0,'arrival_place':1.0,'travel_time':1.0} self.countOfField = {'route':self.num_route,'departure_place':self.num_place,'arrival_place':self.num_place,'travel_time':self.num_time} # self.appLogger.info('Partition 3') if not self.useLearnedUserModel: umFields = ['request_nonUnderstandingProb', 'request_directAnswerProb', 'request_allOverCompleteProb', 'request_oogProb', 'request_irrelevantAnswerProb', 'confirm_directAnswerProb', 'confirm_nonUnderstandingProb', 'confirm_oogProb'] assert (not self.config == None), 'Config file required (UserModel parameters)' self.umParams = {} for key in umFields: assert (self.config.has_option('UserModel', key)),'UserModel section missing field %s' % (key) self.umParams[key] = self.config.getfloat('UserModel',key) overCompleteActionCount = 0 for i in range(1,self.fieldCount): overCompleteActionCount += Combination(self.fieldCount-1,i) self.appLogger.info('fieldCount = %d; overCompleteActionCount = %d' % (self.fieldCount,overCompleteActionCount)) self.umParams['request_overCompleteProb'] = \ 1.0 * self.umParams['request_allOverCompleteProb'] / overCompleteActionCount self.umParams['open_answerProb'] = \ (1.0 - self.umParams['request_nonUnderstandingProb'] - self.umParams['request_oogProb']) / \ overCompleteActionCount else: modelPath = self.config.get('Global','modelPath') # self.appLogger.info('Partition 4') self.userModelPath = self.config.get(MY_ID,'userModelPath') # self.appLogger.info('Partition 5') self.userModel = pickle.load(open(os.path.join(modelPath,self.userModelPath),'rb')) # self.appLogger.info('Partition 6') if self.offListBeliefUpdateMethod == 'heuristicUsingPrior': self.irrelevantUserActProb = self.config.getfloat(MY_ID,'irrelevantUserActProb_HeuristicUsingPrior') self.minRelevantUserActProb = self.config.getfloat(MY_ID,'minRelevantUserActProb_HeuristicUsingPrior') elif self.offListBeliefUpdateMethod in ['plain','heuristicPossibleActions']: self.irrelevantUserActProb = self.config.getfloat(MY_ID,'irrelevantUserActProb') self.minRelevantUserActProb = self.config.getfloat(MY_ID,'minRelevantUserActProb') else: raise RuntimeError,'Unknown offListBeliefUpdateMethod = %s'%self.offListBeliefUpdateMethod # self.appLogger.info('Partition 7') else: assert not fieldToSplit == None,'arg not defined' assert not value == None,'arg not defined' self.fieldList = existingPartition.fieldList self.fieldCount = existingPartition.fieldCount if not self.useLearnedUserModel: self.umParams = existingPartition.umParams else: self.userModel = existingPartition.userModel self.irrelevantUserActProb = existingPartition.irrelevantUserActProb self.minRelevantUserActProb = existingPartition.minRelevantUserActProb self.totalCount = existingPartition.totalCount self.countOfField = existingPartition.countOfField self.priorOfField = {} self.fields = {} self.count = 1 for field in self.fieldList: if (field == fieldToSplit): self.fields[field] = _FieldEntry(type='equals', equals=value) else: self.fields[field] = existingPartition.fields[field].Copy() if self.fields[field].type == 'equals': self.count *= 1 self.priorOfField[field] = 1.0/self.countOfField[field] # elif field == 'route': # self.count *= (self.num_route - len(self.fields[field].excludes.keys())) # elif field in ['departure_place','arrival_place']: # self.count *= (self.num_place - len(self.fields[field].excludes.keys())) # elif field == 'travel_time': # self.count *= (self.num_time - len(self.fields[field].excludes.keys())) # else: # raise RuntimeError,'Invalid field %s'%field else: self.count *= (self.countOfField[field] - len(self.fields[field].excludes.keys())) self.priorOfField[field] = 1.0 - 1.0 * len(self.fields[field].excludes.keys())/self.countOfField[field] #self.count = db.GetListingCount(self.fields) self.prior = 1.0 * self.count / self.totalCount def Split(self,userAction): ''' Attempts to split the partition on userAction. Returns a list of zero or more child partitions, modifying this partition as appropriate. ''' newPartitions = [] if (userAction.type == 'non-understanding'): # silent doesn't split pass else: for field in userAction.content.keys(): if (field == 'confirm'): continue val = userAction.content[field] if (self.fields[field].type == 'equals'): # Cant split this partition -- field already equals something pass elif (val in self.fields[field].excludes): # Cant split this partition -- field exludes this value already pass else: newPartition = Partition(existingPartition=self,fieldToSplit=field,value=val) if (newPartition.count > 0): self.fields[field].excludes[val] = True self.count -= newPartition.count self.prior = 1.0 * self.count / self.totalCount self.priorOfField[field] = 1.0 - 1.0 * len(self.fields[field].excludes.keys())/self.countOfField[field] newPartitions.append(newPartition) return newPartitions # This will only be called on a child with no children def Recombine(self,child): ''' Attempts to recombine child partition with this (parent) partition. If possible, does the recombination and returns True. If not possible, makes no changes and returns False. ''' fieldsToRecombine = [] for field in self.fields: if (self.fields[field].type == 'excludes'): if (child.fields[field].type == 'equals'): # parent excludes, child equals value = child.fields[field].equals if (value in self.fields[field].excludes): fieldsToRecombine.append((field,value)) else: raise RuntimeError, 'Error: field %s: child equals %s but parent doesnt exclude it' % (field,value) else: # parent excludes, child excludes # ensure they exclude the same things if (not len(self.fields[field].excludes) == len(child.fields[field].excludes)): return False for val in self.fields[field].excludes: if (val not in child.fields[field].excludes): return False pass else: if (child.fields[field].type == 'equals'): # parent equals, child equals (must be equal) pass else: raise RuntimeError,'Error: field %s: parent equals %s but child excludes this field' % (field,value) if (len(fieldsToRecombine) == 0): raise RuntimeError,'Error: parent and child are identical' if (len(fieldsToRecombine) > 1): raise RuntimeError,'Error: parent and child differ by more than 1 field: %s' % (fieldsToRecombine) self.count += child.count self.prior = 1.0 * self.count / self.totalCount del self.fields[fieldsToRecombine[0][0]].excludes[ fieldsToRecombine[0][1] ] return True def __str__(self): ''' Renders this partition as a string. Example: city x();state x();last x(WILLIAMS);first=JASON;count=386 This is the partition of 386 listings which have first name JASON, and do NOT have last name WILLIAMS (located in any city and any state). ''' s = '' if (len(self.fields) > 0): elems = [] for conceptName in self.fieldList: if (self.fields[conceptName].type == 'equals') : elems.append('%s=%s' % (conceptName,self.fields[conceptName].equals)) elif (len(self.fields[conceptName].excludes) <= 2): elems.append('%s x(%s)' % (conceptName,','.join(self.fields[conceptName].excludes.keys()))) else: elems.append('%s x([%d entries])' % (conceptName,len(self.fields[conceptName].excludes))) elems.append('count=%d' % (self.count)) s = ';'.join(elems) else: s = "(all)" return s def _getClosestUserAct(self,userAction): if userAction.type == 'non-understanding': return 'non-understanding' acts = [['I:ap','I:bn','I:dp','I:tt'],\ ['I:ap','I:bn','I:dp'],\ ['I:ap','I:dp','I:tt'],\ ['I:bn','I:dp','I:tt'],\ ['I:ap','I:dp'],\ ['I:bn','I:tt'],\ ['I:bn'],\ ['I:dp'],\ ['I:ap'],\ ['I:tt'],\ ['yes'],\ ['no']] ua = [] for field in userAction.content: if field == 'confirm': ua.append('yes' if userAction.content[field] == 'YES' else 'no') elif field == 'route': ua.append('I:bn') elif field == 'departure_place': ua.append('I:dp') elif field == 'arrival_place': ua.append('I:ap') elif field == 'travel_time': ua.append('I:tt') score = [float(len(set(act).intersection(set(ua))))/len(set(act).union(set(ua))) for act in acts] closestUserAct = ','.join(acts[score.index(max(score))]) # self.appLogger.info('Closest user action %s'%closestUserAct) return closestUserAct def UserActionLikelihood(self, userAction, history, sysAction): ''' Returns the probability of the user taking userAction given dialog history, sysAction, and that their goal is within this partition. ''' # if (sysAction.type == 'ask'): # if (sysAction.force == 'request'): # if (userAction.type == 'non-understanding'): # result = self.umParams['request_nonUnderstandingProb'] # else: # targetFieldIncludedFlag = False # overCompleteFlag = False # allFieldsMatchGoalFlag = True # askedField = sysAction.content # for field in userAction.content: # if field == 'confirm': # allFieldsMatchGoalFlag = False # continue # val = userAction.content[field] # if (self.fields[field].type == 'equals' and self.fields[field].equals == val): # if (field == askedField): # targetFieldIncludedFlag = True # else: # overCompleteFlag = True # else: # allFieldsMatchGoalFlag = False # if (not allFieldsMatchGoalFlag): # # This action doesn't agree with this partition # result = 0.0 # elif (askedField == 'all'): # # A response to the open question # result = self.umParams['open_answerProb'] # elif (not targetFieldIncludedFlag): # # This action doesn't include the information that was asked for # # This user model doesn't ever do this # result = 0.0 # elif (overCompleteFlag): # # This action include extra information - this happens # # request_overCompleteProb amount of the time # result = self.umParams['request_overCompleteProb'] # else: # # This action just answers the question that was asked # result = self.umParams['request_directAnswerProb'] # elif (sysAction.force == 'confirm'): # if (userAction.type == 'non-understanding'): # result = self.umParams['confirm_nonUnderstandingProb'] # else: # allFieldsMatchGoalFlag = True # for field in sysAction.content: # val = sysAction.content[field] # if (self.fields[field].type == 'excludes' or not self.fields[field].equals == val): # allFieldsMatchGoalFlag = False # if (allFieldsMatchGoalFlag): # if (userAction.content['confirm'] == 'YES'): # result = self.umParams['confirm_directAnswerProb'] # else: # result = 0.0 # else: # if (userAction.content['confirm'] == 'NO'): # result = self.umParams['confirm_directAnswerProb'] # else: # result = 0.0 # else: # raise RuntimeError, 'Dont know sysAction.force = %s' % (sysAction.force) if not self.useLearnedUserModel: result = 0.0 if (sysAction.type == 'ask'): if (userAction.type == 'non-understanding'): if (sysAction.force == 'confirm'): result = self.umParams['confirm_nonUnderstandingProb'] else: result = self.umParams['request_nonUnderstandingProb'] else: targetFieldIncludedFlag = False overCompleteFlag = False allFieldsMatchGoalFlag = True askedField = sysAction.content for field in userAction.content: if field == 'confirm': if sysAction.force == 'request': allFieldsMatchGoalFlag = False continue for field in sysAction.content: val = sysAction.content[field] if (self.fields[field].type == 'excludes' or not self.fields[field].equals == val): allFieldsMatchGoalFlag = False if (allFieldsMatchGoalFlag): if (userAction.content['confirm'] == 'YES'): result = self.umParams['confirm_directAnswerProb'] targetFieldIncludedFlag = True else: result = self.umParams['request_irrelevantAnswerProb'] else: if (userAction.content['confirm'] == 'NO'): result = self.umParams['confirm_directAnswerProb'] targetFieldIncludedFlag = True else: result = self.umParams['request_irrelevantAnswerProb'] else: val = userAction.content[field] if (self.fields[field].type == 'equals' and self.fields[field].equals == val): if (field == askedField): targetFieldIncludedFlag = True else: overCompleteFlag = True else: allFieldsMatchGoalFlag = False if (not allFieldsMatchGoalFlag): # This action doesn't agree with this partition result = self.umParams['request_irrelevantAnswerProb'] elif (askedField == 'all'): # A response to the open question result = self.umParams['open_answerProb'] elif (not targetFieldIncludedFlag): # This action doesn't include the information that was asked for # This user model doesn't ever do this result = self.umParams['request_irrelevantAnswerProb'] elif (overCompleteFlag): # This action include extra information - this happens # request_overCompleteProb amount of the time result = self.umParams['request_overCompleteProb'] else: # This action just answers the question that was asked result = result if result > 0 else self.umParams['request_directAnswerProb'] else: raise RuntimeError, 'Dont know sysAction.type = %s' % (sysAction.type) else: # self.appLogger.info('Apply learned user model') if sysAction.type != 'ask': raise RuntimeError, 'Cannot handle sysAction %s'%str(sysAction) result = self.irrelevantUserActProb allFieldsMatchGoalFlag = True directAnswer = False if sysAction.force == 'confirm': askedField = sysAction.content.keys()[0] if userAction.type != 'non-understanding': for ua_field in userAction.content: self.appLogger.info('User action field: %s:%s'%(ua_field,userAction.content[ua_field])) if ua_field == 'confirm' and userAction.content[ua_field] == 'YES': val = sysAction.content[askedField] if self.fields[askedField].type == 'excludes' or not self.fields[askedField].equals == val: self.appLogger.info('Mismatched YES') allFieldsMatchGoalFlag = False elif ua_field == 'confirm' and userAction.content[ua_field] == 'NO': val = sysAction.content[askedField] if (self.fields[askedField].type == 'equals' and self.fields[askedField].equals == val) or\ (self.fields[askedField].type == 'excludes' and val not in self.fields[askedField].excludes): self.appLogger.info('Mismatched NO') allFieldsMatchGoalFlag = False elif askedField == ua_field: directAnswer = True # val = sysAction.content[askedField] # if self.fields[askedField].type != 'excludes' and \ # self.fields[askedField].equals == userAction.content[askedField]: # self.appLogger.info('Matched %s'%userAction.content[askedField]) # allFieldsMatchGoalFlag = True if self.fields[askedField].type == 'excludes' or \ self.fields[askedField].equals != userAction.content[askedField]: self.appLogger.info('Mismatched %s'%userAction.content[askedField]) allFieldsMatchGoalFlag = False else: val = userAction.content[ua_field] if self.fields[ua_field].type == 'excludes' or not self.fields[ua_field].equals == val: if not ((ua_field == 'arrival_place' and 'departure_place' in userAction.content and \ userAction.content['departure_place'] == userAction.content['arrival_place'] and \ self.fields['departure_place'].type == 'equals' and \ self.fields['departure_place'].equals == userAction.content['departure_place']) or\ (ua_field == 'departure_place' and 'arrival_place' in userAction.content and \ userAction.content['departure_place'] == userAction.content['arrival_place'] and \ self.fields['arrival_place'].type == 'equals' and \ self.fields['arrival_place'].equals == userAction.content['arrival_place'])): self.appLogger.info('Mismatched %s in field %s'%(val,ua_field)) allFieldsMatchGoalFlag = False elif self.ignoreNonunderstandingFactor: allFieldsMatchGoalFlag = False if allFieldsMatchGoalFlag: self.appLogger.info('All fields matched') if (userAction.type != 'non-understanding' and 'confirm' in userAction.content and userAction.content['confirm'] == 'YES') or\ directAnswer: result = self.userModel['C-o'][self._getClosestUserAct(userAction)] else: if userAction.type != 'non-understanding' and 'confirm' in userAction.content and directAnswer: del userAction.content['confirm'] if 'departure_place' in userAction.content and 'arrival_place' in userAction.content and \ userAction.content['departure_place'] == userAction.content['arrival_place']: tempUserAction = deepcopy(userAction) del tempUserAction.content['arrival_place'] result = self.userModel['C-x'][self._getClosestUserAct(tempUserAction)] else: result = self.userModel['C-x'][self._getClosestUserAct(userAction)] self.appLogger.info('User action likelihood %g'%result) result = self.minRelevantUserActProb if result < self.minRelevantUserActProb else result self.appLogger.info('Set minimum user action likelihood %g'%result) elif sysAction.force == 'request': askedField = sysAction.content if userAction.type != 'non-understanding': for ua_field in userAction.content: if ua_field != 'confirm': val = userAction.content[ua_field] if self.fields[ua_field].type == 'excludes' or not self.fields[ua_field].equals == val: if not ((ua_field == 'arrival_place' and 'departure_place' in userAction.content and \ userAction.content['departure_place'] == userAction.content['arrival_place'] and \ self.fields['departure_place'].type == 'equals' and \ self.fields['departure_place'].equals == userAction.content['departure_place']) or\ (ua_field == 'departure_place' and 'arrival_place' in userAction.content and \ userAction.content['departure_place'] == userAction.content['arrival_place'] and \ self.fields['arrival_place'].type == 'equals' and \ self.fields['arrival_place'].equals == userAction.content['arrival_place'])): self.appLogger.info('Mismatched %s in field %s'%(val,ua_field)) allFieldsMatchGoalFlag = False elif self.ignoreNonunderstandingFactor: allFieldsMatchGoalFlag = False if allFieldsMatchGoalFlag: if askedField == 'route': # print self.userModel['R-bn'] result = self.userModel['R-bn'][self._getClosestUserAct(userAction)] elif askedField == 'departure_place': # print self.userModel['R-dp'] result = self.userModel['R-dp'][self._getClosestUserAct(userAction)] elif askedField == 'arrival_place': # print self.userModel['R-ap'] result = self.userModel['R-ap'][self._getClosestUserAct(userAction)] elif askedField == 'travel_time': # print self.userModel['R-tt'] if 'departure_place' in userAction.content and 'arrival_place' in userAction.content and \ userAction.content['departure_place'] == userAction.content['arrival_place']: tempUserAction = deepcopy(userAction) del tempUserAction.content['arrival_place'] result = self.userModel['R-tt'][self._getClosestUserAct(tempUserAction)] else: result = self.userModel['R-tt'][self._getClosestUserAct(userAction)] elif askedField == 'all': # print self.userModel['R-open'] if 'departure_place' in userAction.content and 'arrival_place' in userAction.content and \ userAction.content['departure_place'] == userAction.content['arrival_place']: tempUserAction = deepcopy(userAction) del tempUserAction.content['arrival_place'] result = self.userModel['R-open'][self._getClosestUserAct(tempUserAction)] else: result = self.userModel['R-open'][self._getClosestUserAct(userAction)] self.appLogger.info('User action likelihood %g'%result) result = self.minRelevantUserActProb if result < self.minRelevantUserActProb else result self.appLogger.info('Set minimum user action likelihood %g'%result) return result def UserActionUnlikelihood(self, userAction, history, sysAction): ''' Returns the probability of the user not taking userAction given dialog history, sysAction, and that their goal is within this partition. ''' if sysAction.type != 'ask': raise RuntimeError, 'Dont know sysAction.type = %s' % (sysAction.type) # self.appLogger.info('Apply confirmUnlikelyDiscountFactor %f'%self.confirmUnlikelyDiscountFactor) if sysAction.force == 'request': result = self.prior reason = 'request' elif sysAction.force == 'confirm': result = self.confirmUnlikelyDiscountFactor * self.prior reason = 'confirm' # self.appLogger.info('UserActionUnlikelihood by (%s): %g'%(reason,result)) return result
class DB(object): ''' Wraps a sqlite3 database of listings. ''' def __init__(self): ''' Creates a DB instance. ''' self.appLogger = logging.getLogger(MY_ID) self.config = GetConfig() self.dbStem = self.config.get(MY_ID,'dbStem') self.dbFile = '%s.sqlite' % (self.dbStem) self.dbHitCounter = 0 self.conn = sqlite.connect(self.dbFile) self.conn.text_factory = str self.cur = self.conn.cursor() tableInfo = self._ExecuteSQL("PRAGMA table_info(%s)" % (_TABLE),'all') if (len(tableInfo)==0): raise RuntimeError,'Could not connect to DB %s' % (self.dbFile) self.fieldNames = [] for colInfo in tableInfo: colName = colInfo[1] if (colName == 'rowid'): continue self.fieldNames.append(colName) self.appLogger.info('DB has fields: %s' % (self.fieldNames)) self.rowCount = self._ExecuteSQLOneItem("SELECT count FROM %s WHERE value='all'" % (_TABLE_COUNTS)) self.fieldSize = {} for field in self.fieldNames: self.fieldSize[field] = int(self._ExecuteSQLOneItem("SELECT count(*) FROM %s_%s" % (_TABLE_COUNTS,field))) self.appLogger.info('Loaded db with %d rows' % (self.rowCount)) def GetRandomListing(self): ''' Returns a random listing. ''' listing = None while (listing == None): rowid = random.randint(1,self.rowCount) listing = self.GetListingByRowID(rowid) self.appLogger.info('listing=%s' % (listing)) return listing def GetListingByRowID(self,rowid): ''' Returns the listing at rowid (an integer) ''' row = self._ExecuteSQL('SELECT %s FROM %s WHERE rowid=%d LIMIT 1' % (','.join(self.fieldNames),_TABLE,rowid)) listing = {} for (i,field) in enumerate(self.fieldNames): listing[field] = row[i] return listing def GetListingsByQuery(self,query): ''' Returns an array of all the listings that match query. Each listing is a dict. ''' where = self._BuildWhereClause(query) rows = self._ExecuteSQL('SELECT %s FROM %s WHERE %s' % (','.join(self.fieldNames),_TABLE,where),fetch='all') listings = [] for row in rows: if (row == None): raise RuntimeError,'row == None' listing = {} for (i,field) in enumerate(self.fieldNames): listing[field] = row[i] listings.append(listing) return listings def GetListingCount(self,query): ''' Returns the number of listings that match query. ''' fields = [] for field in query: if (query[field].type == 'excludes' and len(query[field].excludes)==0): continue else: fields.append(field) if (len(fields) == 0): count = self.rowCount elif (len(fields) == 1 and fields[0] in self.fieldNames): # use pre-computed count if (query[fields[0]].type == 'equals'): val = query[fields[0]].equals count = self._ExecuteSQLOneItem("SELECT count FROM %s_%s WHERE value='%s'" % (_TABLE_COUNTS,fields[0],val)) else: excludes = ["'%s'" % (item) for item in query[fields[0]].excludes] minusCount = self._ExecuteSQLOneItem("SELECT SUM(count) FROM %s_%s WHERE value IN (%s)" % (_TABLE_COUNTS,fields[0],','.join(excludes))) plusCount = self.GetListingCount({}) count = plusCount - minusCount else: # do normal count where = self._BuildWhereClause(query) count = self._ExecuteSQLOneItem('SELECT COUNT(*) FROM %s WHERE %s' % (_TABLE,where)) return count def GetFieldSize(self,field): ''' Returns the number of distinct values in field. ''' result = int(self._ExecuteSQLOneItem("SELECT count(*) FROM %s_%s" % (_TABLE_COUNTS,field))) return result def GetFieldElementByIndex(self,field,rowid): ''' Returns the rowid-th value of field, where rowid>=1 and rowid <= self.GetFieldSize(field). ''' result = self._ExecuteSQLOneItem("SELECT value FROM %s_%s WHERE rowid=%d LIMIT 1" % (_TABLE_COUNTS,field,rowid)) return result def GetFields(self): ''' Returns the list of fields in the DB. ''' return deepcopy(self.fieldNames) def GetDBStem(self): ''' Returns the DB stem. DB file names are of the form "dbStem.sqlite"; here the DB stem is "dbStem". ''' return self.dbStem def GetDBFile(self): ''' Returns the DB filename. DB file names are of the form "dbStem.sqlite". ''' return self.dbFile def RowIterator(self): ''' Return an iterator over all the listings. Each result is a dict. ''' stmt = "SELECT rowid,%s FROM %s" % (','.join(self.fieldNames),_TABLE) self.appLogger.info('Query (RowIterator): %s [results omitted for space]' % (stmt)) self.cur.execute(stmt) for row in self.cur: result = {} for (i,item) in enumerate(row): if (i==0): result['rowid'] = int(item) else: result[ self.fieldNames[i-1] ] = item yield result def _ExecuteSQL(self,stmt,fetch='oneRow',noneOK=False): self.cur.execute(stmt) self.dbHitCounter += 1 if (fetch == 'all'): result = self.cur.fetchall() else: result = self.cur.fetchone() if (not noneOK and result == None): raise RuntimeError,'row == None' self.appLogger.info('Query: %s [%s]' % (stmt,result)) return result def _ExecuteSQLOneItem(self,stmt): row = self._ExecuteSQL(stmt, fetch='oneRow') result = row[0] return result def _BuildWhereClause(self,query): whereItems = [] for field in query: if (query[field].type == 'excludes' and len(query[field].excludes)==0): continue elif (query[field].type == 'equals'): whereItems.append("%s = '%s'" % (field,query[field].equals)) else: if (len(query[field].excludes) == 1): whereItems.append("%s != '%s'" % (field,query[field].excludes.keys()[0])) else: excludeItems = ["'%s'" % (item) for item in query[field].excludes] whereItems.append("%s NOT IN (%s)" % (field,','.join(excludeItems))) return ' AND '.join(whereItems) def RunTest(self,testSpec,N): ''' Runs N tests of the DB using a test specified by testSpec testSpec is a dict like: spec = { 'first' : 10, 'last' : 10, 'city' : 10, 'state' : None, } where values indicate: None : equals a randomly sampled item 0 = exludes nothing 1 = excludes 1 value, etc. In each iteration, a random target row is sampled. Then random values to exclude are sampled. Then the query is run. Returns: (avRandTime,avQueryTime,longestQueryTime,avReturnedCallees) ''' randomTime = 0.0 queryTime = 0.0 longestCountQueryTime = 0.0 listingCount = 0 i = 0 while(i < N): startCPU = CPU() randomListing = self.GetRandomListing() endCPU = CPU() randomTime += (endCPU-startCPU) query = {} for field in testSpec: query[field] = _QueryClass() if (testSpec[field] == None): query[field].type = 'equals' query[field].equals = randomListing[field] else: query[field].type = 'excludes' indexes = random.sample(xrange(self.fieldSize[field]), testSpec[field]) excludeItems = dict(zip(["%s" % self.GetFieldElementByIndex(field,index+1) for index in indexes],[True] * testSpec[field])) # excludeItems = dict(zip(["%s%d" % (field,index) for index in indexes],[True] * testSpec[field])) query[field].excludes = excludeItems startCPU = CPU() count = self.GetListingCount(query) endCPU = CPU() queryTime += (endCPU-startCPU) if ((endCPU-startCPU) > longestCountQueryTime): longestCountQueryTime = (endCPU-startCPU) listingCount += count i += 1 return (float(randomTime / N),float(queryTime / N),float(longestCountQueryTime),float(1.0 * listingCount / N))
class Tuple_Extractor(object): MY_ID = 'Tuple_Extractor' ''' read a config file know which slot is enumerable and which is non-enumerable then it can extract tuple from Frame_Label ''' def __init__(self, slot_config_file = None): ''' slot_config_file tells while slot is enumerable and which is not ''' self.config = GetConfig() self.appLogger = logging.getLogger(self.MY_ID) if not slot_config_file: self.appLogger.debug('Slot config file is not assigned, so use the default config file') slot_config_file = self.config.get(self.MY_ID,'slot_config_file') slot_config_file = os.path.join(os.path.dirname(__file__),'../config/', slot_config_file) self.appLogger.debug('Slot config file: %s' %(slot_config_file)) input = codecs.open(slot_config_file, 'r', 'utf-8') self.slot_config = json.load(input) input.close() def enumerable(self, slot): if slot not in self.slot_config: self.appLogger.error('Error: Unknown slot: %s' %(slot)) raise Exception('Error: Unknown slot: %s' %(slot)) else: return self.slot_config[slot] def extract_tuple(self, frame_label): output_tuple = [] for slot in frame_label: output_tuple.append('root:%s' %(slot)) if self.enumerable(slot): for value in frame_label[slot]: output_tuple.append('%s:%s' %(slot, value)) return list(set(output_tuple)) def generate_frame(self, tuples, t_probs, mode = 'hr'): ''' generate frame based on tuples there are two generate modes: high-precision mode: 'hp' high-recall mode: 'hr' ''' if mode != 'hp' and mode != 'hr': self.appLogger.error('Error: Unknown generate mode: %s' %(mode)) raise Exception('Error: Unknown generate mode: %s' %(mode)) add_tuples = [] for t in tuples: tokens = t.split(':') assert(len(tokens) == 2) add_tuples.append(tuple(tokens)) probs = [p for p in t_probs] frame_label = {} while True: current_size = len(add_tuples) if current_size == 0: break remove_index = [] for i, t in enumerate(add_tuples): if t[0] == 'root': if t[1] not in frame_label: frame_label[t[1]] = {'prob': probs[i], 'values':{}} else: if probs[i] > frame_label[t[1]]['prob']: frame_label[t[1]]['prob'] = probs[i] remove_index.append(i) else: if t[0] in frame_label: new_prob = probs[i] if t[1] not in frame_label[t[0]]['values']: frame_label[t[0]]['values'][t[1]] = new_prob else: if new_prob > frame_label[t[0]]['values'][t[1]]: frame_label[t[0]]['values'][t[1]] = new_prob remove_index.append(i) add_tuples = [t for i,t in enumerate(add_tuples) if i not in remove_index] probs = [p for i,p in enumerate(probs) if i not in remove_index] if len(add_tuples) == current_size: break if mode == 'hp': return frame_label else : for t, prob in zip(add_tuples, probs): if t[0] not in frame_label: frame_label[t[0]] = {'prob': -1, 'values':{}} if t[1] not in frame_label[t[0]]['values']: frame_label[t[0]]['values'][t[1]] = prob else: if prob > frame_label[t[0]]['values'][t[1]]: frame_label[t[0]]['values'][t[1]] = prob return frame_label
class ASRResult: ''' Represents an ASR result. Two constructors: ASRResult.FromWatson(watsonResult,grammar) ASRResult.Simulated(grammar,userActions,probs,isTerminal,correctPosition) ''' MY_ID = 'ASRResult' def __init__(self): ''' Not intended to be called directly. Use one of the two constructors ASRResult.FromWatson(...) or ASRResult.Simulated(...). ''' self.applogger = logging.getLogger(self.MY_ID) self.config = GetConfig() self.probTotal = 0.0 self.correctPosition = None # self.watsonResult = None self.offListBeliefUpdateMethod = self.config.get('PartitionDistribution','offListBeliefUpdateMethod') self.numberOfRoute = self.config.getfloat('BeliefState','numberOfRoute') self.numberOfPlace = self.config.getfloat('BeliefState','numberOfPlace') self.numberOfTime = self.config.getfloat('BeliefState','numberOfTime') self.totalCount = self.numberOfRoute * self.numberOfPlace * self.numberOfPlace * self.numberOfTime self.fixedASRConfusionProbability = self.config.getfloat('BeliefState','fixedASRConfusionProbability') # @classmethod # def FromWatson(cls,watsonResult,grammar): # ''' # Constructor for creating an ASRResult object from a real speech recognition # output. # # watsonResult is JSON in the form: # # { # 'nbest': [ # { ... }, # { ... }, # ... # ], # 'nlu-sisr' : [ # { 'interp' : { # 'first' : 'JASON', # 'last' : 'WILLIAMS' # ... # }, # }, # { 'interp' : { # 'first' : 'JAMISON', # 'last' : 'WILLIAMS' # ... # }, # }, # ... # ], # } # # and grammar is a Grammar object. # # Based on the features in the recognition result, probabilities are estimated # for each of the N-Best list entries. # ''' # self = cls() # self.grammar = grammar # self.isTerminal = False # self.userActions = [] # self.probs = [] # self.watsonResult = watsonResult # db = GetDB() # self.fields = ['route','departure_place','arrival_place','travel_time']#db.GetFields() # self.fields.append('confirm') # if ('nlu-sisr' in watsonResult): # for result in watsonResult['nlu-sisr']: # content = {} # if ('interp' in result): # for field in self.fields: # if (field in result['interp']): # content[field] = result['interp'][field] # if (len(content)>0): # self.userActions.append(UserAction('ig',content)) # if (len(self.userActions) == 0): # return self # fullGrammarName = self.grammar.GetFullName() # fullSectionName = '%s_%s' % (self.MY_ID,fullGrammarName) # wildcardSectionName = '%s_*' % (self.MY_ID) # if (self.config.has_section(fullSectionName)): # sectionName = fullSectionName # elif (self.config.has_section(wildcardSectionName)): # sectionName = wildcardSectionName # else: # raise RuntimeError,'Configuration file has neither %s nor %s defined' % (fullSectionName,wildcardSectionName) # self.params = ConfigSectionToDict(self.config,sectionName) # self.applogger.debug('Params = %s' % (self.params)) # turn = { 'recoResults': watsonResult, } # self.features = [1] ## asrFeatures = ExtractFeatures(turn) # asrFeatures = {} # if (None in asrFeatures): # self.userActions = [] # return # self.features.extend(asrFeatures) # partial = {} # if (len(self.userActions) == 1): # types = ['correct','offList'] # else: # types = ['correct','onList','offList'] # for type in types: # exponent = 0.0 # for (i,feature) in enumerate(self.features): # exponent += feature * self.params['regression'][type][str(i)] # partial[type] = math.exp(exponent) # rawProbs = {} # sum = 0.0 # for type in types: # sum += partial[type] # for type in types: # rawProbs[type] = partial[type] / sum # self.probs = [ rawProbs['correct'] ] # N = len(self.userActions) # alpha = self.params['onListFraction']['alpha'] # beta = self.params['onListFraction']['beta'] # for n in range(1,len(self.userActions)): # bucketLeftEdge = 1.0*(n-1)/N # bucketRightEdge = 1.0*n/N # betaRight = lbetai(alpha,beta,bucketRightEdge) / lbetai(alpha,beta,1.0) # betaLeft = lbetai(alpha,beta,bucketLeftEdge) / lbetai(alpha,beta,1.0) # betaPart = betaRight - betaLeft # self.probs.append( 1.0 * rawProbs['onList'] * betaPart ) # self.probTotal = 0.0 # for prob in self.probs: # self.probTotal += prob # assert (self.probTotal <= 1.0),'Total probability exceeds 1.0: %f' % (self.probTotal) # return self @classmethod def FromHelios(cls,userActions,probs,isTerminal=False,correctPosition=None): ''' Creates an ASRResult object for use in a simulated environment. grammar is a Grammar object. userActions is a list of UserAction objects on the N-Best list. Up to one 'silent' userAction can be included. Do not include an 'oog' action. probs is the list of probabilities indicating the ASR probabilities of each of the userActions. isTerminal indicates if the user hung up. If not provided, defaults to False. correctPosition indicates the position of the correct N-Best list entry. None: unknown -1: not anywhere on the list 0: first entry on the list 1: second entry on the list, etc. if not provided, defaults to None ''' self = cls() assert (len(userActions) == len(probs)),'In ASRResult, length of userActions (%d) not equal to length of probs (%d)' % (len(userActions),len(probs)) for userAction in userActions: assert (not userAction.type == 'oog'),'userAction type for ASR result cannot be oog -- oog is implicit in left-over mass' self.userActions = userActions self.probs = probs for prob in self.probs: self.probTotal += prob assert (self.probTotal <= 1.0),'Total probability exceeds 1.0: %f' % (self.probTotal) return self @classmethod def Simulated(cls,grammar,userActions,probs,isTerminal=False,correctPosition=None): ''' Creates an ASRResult object for use in a simulated environment. grammar is a Grammar object. userActions is a list of UserAction objects on the N-Best list. Up to one 'silent' userAction can be included. Do not include an 'oog' action. probs is the list of probabilities indicating the ASR probabilities of each of the userActions. isTerminal indicates if the user hung up. If not provided, defaults to False. correctPosition indicates the position of the correct N-Best list entry. None: unknown -1: not anywhere on the list 0: first entry on the list 1: second entry on the list, etc. if not provided, defaults to None ''' self = cls() assert (len(userActions) == len(probs)),'In ASRResult, length of userActions (%d) not equal to length of probs (%d)' % (len(userActions),len(probs)) for userAction in userActions: assert (not userAction.type == 'oog'),'userAction type for ASR result cannot be oog -- oog is implicit in left-over mass' self.grammar = grammar self.userActions = userActions self.probs = probs self.isTerminal = isTerminal self.correctPosition=correctPosition for prob in self.probs: self.probTotal += prob assert (self.probTotal <= 1.0),'Total probability exceeds 1.0: %f' % (self.probTotal) return self def GetTopResult(self): ''' Returns the top user action, or None if the N-Best list is empty. ''' if (len(self.userActions) == 0): return None else: return self.userActions[0] def GetProbs(self): ''' Returns an array with ASR probs of the N-Best list ''' return deepcopy(self.probs) def __str__(self): s = self._GetTranscript(maxShow=5) return s def _GetTranscript(self,maxShow=1): items = [] for i in range(min(maxShow,len(self.userActions))): items.append('%s (%f)' % (self.userActions[i],self.probs[i])) if (maxShow < len(self.userActions)): items[-1] += ' + %d more' % (len(self.userActions) - maxShow) items.append('[rest] (%f)' % (1.0 - self.probTotal)) s = '\n'.join(items) return s def __iter__(self): ''' Iterates over the N-Best list; for each entry, outputs a tuple: (userAction,prob,offListProb) where - userAction: userAction object for this entry - prob: ASR prob of this entry - offListProb: the ASR probability of a userAction which has not (yet) been observed on the N-Best list (including 'silence' and 'oog') For example, if the grammar cardinality is 11, and 3 entries have been observed on the N-Best list so far with probabilities 0.4, 0.2 and 0.1, then offListProb would be: Mass remaining / remaining number of unseen user actions (1.0 - (0.4 + 0.2 + 0.1)) / (11 + 2 - 3) = 0.03 ''' self.releasedProb = 0.0 self.releasedActions = 0 i = 0 while (i < len(self.userActions)): userAction = self.userActions[i] prob = self.probs[i] self.releasedProb += prob self.releasedActions += 1 # offListProb = 1.0 * (1.0 - self.releasedProb) / (self.grammar.cardinality + 2 - self.releasedActions) # offListProb = 1.0 * (1.0 - self.releasedProb) / (3000000 + 2 - self.releasedActions) if self.offListBeliefUpdateMethod in ['plain','heuristicUsingPrior']: if self.fixedASRConfusionProbability > 0: offListProb = self.fixedASRConfusionProbability / self.totalCount else: offListProb = 1.0 * (1.0 - self.releasedProb) / (self.totalCount + 2 - self.releasedActions) elif self.offListBeliefUpdateMethod == 'heuristicPossibleActions': if self.fixedASRConfusionProbability > 0: offListProb = self.fixedASRConfusionProbability else: offListProb = 1.0 - self.releasedProb else: raise RuntimeError,'Unknown offListBeliefUpdateMethod = %s'%self.offListBeliefUpdateMethod yield (userAction,prob,offListProb) i += 1