Exemplo n.º 1
0
 def readData(self, dictFile):
     magicId = dictFile.read(4)
     if magicId != b'JDC0':
         raise RuntimeError('File is not JDIC file')
     charSetBuffer, = struct.unpack('32s', dictFile.read(32))
     self.charset = extractString(charSetBuffer).lower()
     fmt = 'III'
     (dartsSize, entryOffsetBlobSize, entryBlobSize) = \
          struct.unpack('III', dictFile.read(struct.calcsize(fmt)))
     self.lookupDict = DoubleArray(dictFile.read(dartsSize))
     self.entryOffsetBlob = dictFile.read(entryOffsetBlobSize)
     self.entryBlob = dictFile.read(entryBlobSize)
Exemplo n.º 2
0
 def loadFromBinary(self, dictFile):
     """
         Loads the dictionary from the blob
     """
     fmt = str('<IIIIIIIIII')
     header = dictFile.read(calcsize(fmt))
     magic, version, dictType, lexSize, \
     leftSize, rightSize, dataSize, \
     tokenPartSize, featurePartSize, dummy = \
         unpack(fmt, header)
     if version != 102:
         raise RuntimeError('Incompatible dictionary version: {0}'.format(version))
     charSetBuffer, = unpack(str('32s'), dictFile.read(32))
     self.charset = extractString(charSetBuffer).lower()
     self.doubleArray = DoubleArray(dictFile.read(dataSize))
     self.tokenBlob = dictFile.read(tokenPartSize)
     self.featureBlob = dictFile.read(featurePartSize)
Exemplo n.º 3
0
 def loadFromBinary(self, inFile, encoding):
  #   with open(fileName, 'rb') as inFile:
         uintSize = 4
         categoryBuffer = 32
         categoryNum, = unpack('<I', inFile.read(uintSize))
         calcFileSize = uintSize + categoryBuffer * categoryNum + uintSize * 0xffff
         for i in range(categoryNum):
             categoryStr, = unpack(str(categoryBuffer) + 's', inFile.read(categoryBuffer))
             self.__categories.append( utils.extractString(categoryStr, encoding))
         for i in range(0xffff):
             packedCharInfo, = unpack('<I', inFile.read(uintSize))
             charInfo = CharInfo( (packedCharInfo      ) & 0x3FFFF,
                                  (packedCharInfo >> 18) & 0xFF,
                                  (packedCharInfo >> 26) & 0xF,
                                  (packedCharInfo >> 30) & 0x1,
                                  (packedCharInfo >> 31) & 0x1)
             self.__map.append(charInfo)
Exemplo n.º 4
0
 def testExtractString(self):
     res = utils.extractString(b'sample\x00\x00\x00')
     self.assertEqual('sample', res)