def preprocess(moDict, moSequence, minlen, maxlen): l = list() # each element is a list of possible words starting from the same index in the # sequence, ordered by length for i in range(len(moSequence)): l.append(None) for j in range(minlen, maxlen + 1): if i + j <= len(moSequence): s = moSequence[i:i + j] words = moDict.get(util.orderASCII(s.strip()), None) #print '[', i, ',', j, ':', s, '->', words if words != None: word = words[0] # pick a word if l[i] == None: l[i] = collections.deque() l[i].appendleft(word) return l
def buildDict(filepath): """Read data from file to build the keyword search dictionary""" d = dict() minlen = sys.maxint maxlen = -sys.maxint datafile = open(filepath, 'r') for line in datafile: s = line.strip() l = len(s) if l > 0: if l < minlen: minlen = l # find mininum word length if l > maxlen: maxlen = l # find maximum word length key = util.orderASCII(s) value = d.setdefault(key, collections.deque()) value.append(s) datafile.close() return (d, minlen, maxlen)
def test_orderASCII(self): self.assertEqual(util.orderASCII('char'), 'achr') self.assertEquals(util.orderASCII('slight'), 'ghilst')