示例#1
0
文件: tarcms.py 项目: yasusii/fooling
 def assertCMSIdx(self, cms, fname, keys):
     path = os.path.join(os.path.join(cms.basedir, 'idx'), fname)
     db = CDBReader(path)
     r = []
     for k in db.iterkeys():
         if k[0] == '\x00':
             (docid, sentid) = struct.unpack('>xll', k)
             r.append((docid, sentid))
         elif k[0] == '\xfd':
             pass
         elif k[0] == '\xfe':
             pass
         elif k == '\xff':
             pass
         else:
             (c, k) = (k[0], k[1:])
             w = k
             if '\x10' <= c and c <= '\x13':
                 w = unicode(k, 'utf-8')
             elif c == '\x20':
                 w = u''.join(unichr(0x3000 + ord(c)) for c in k)
             elif c == '\xf0':
                 if len(k) == 2:
                     w = '%04d' % struct.unpack('>h', k)
                 elif len(k) == 3:
                     w = '%04d/%02d' % struct.unpack('>hb', k)
                 elif len(k) == 4:
                     w = '%04d/%02d/%02d' % struct.unpack('>hbb', k)
             r.append(w)
     self.assertEqual(r, keys)
     return
示例#2
0
文件: mwcdb.py 项目: fagan2888/pymwp
 def __init__(self, path, ext='', codec='utf-8'):
     self._reader = CDBReader(path)
     self.ext = ext
     self.codec = codec
     return