Exemplo n.º 1
0
 def generate():
     preType = 5
     preChar = 0
     typeList = []
     for i in range(65535):
         type = TextUtility.charType(i)
         if type != preType:
             array = [int()] * 3
             array[0] = preChar
             array[1] = i - 1
             array[2] = preType
             typeList.append(array)
         preChar = i
     array = [int()] * 3
     array[0] = preChar
     array[1] = 65535
     array[2] = preType
     typeList.append(array)
     out = sys.file(Config.CharTypePath, 'w+')
     for array in typeList:
         out.writelines(Convert.convert_char(array[0]))
         out.writelines(Convert.convert_char(array[1]))
         out.writelines(Convert.convert_byte(array[2]))
     out.close()
     byteArray = ByteArray.createByteArray(Config.CharTypePath)
     return byteArray
Exemplo n.º 2
0
 def init(self):
     CharType.type = [bytes()] * 65536
     self.logger.info("字符类型对应表开始加载" + Config.CharTypePath)
     start = time()
     try:
         byteArray = pickle.load(open(Config.CharTypePath, 'rb'))
     except Exception as e:
         byteArray = ByteArray.createByteArray(Config.CharTypePath)
         out = open(Config.CharTypePath + Predefine.PIC_EXT, 'wb')
         pickle.dump(byteArray, out)
     if byteArray is None:
         try:
             byteArray = CharType.generate()
         except Exception as e:
             self.logger.error("字符类型对应表%s加载失败" % Config.CharTypePath)
             sys.exit(1)
     while byteArray.hasMore():
         b = byteArray.nextChar()
         e = byteArray.nextChar()
         t = byteArray.nextByte()
         for i in range(b, e + 1):
             self.type[i] = t
     self.logger.info("字符类型对应表加载成功,耗时%fms" % (time() - start) * 1000)
     print("字符类型对应表%s加载成功,耗时%fms" % (Config.CharTypePath,
                                     (time() - start) * 1000))
Exemplo n.º 3
0
    def loadBaseAndCheckByFileChannel(self, path):
        try:
            try:
                byte_Arr = pickle.load(open(path + Predefine.PIC_EXT, 'rb'))
            except Exception as e:
                byte_Arr = ByteArray.createByteArray(path)
                out = sys.file(path + Predefine.PIC_EXT, 'wb')
                pickle.dump(byte_Arr, out)
            byte_arr = byte_Arr.bytes
            index = 0
            self.size = ByteUtil.bytesHighFirstToInt(byte_arr, index)

            index += 4

            self.base = [0] * (self.size + 65535)  # 多留一些,防止越界
            self.check = [0] * (self.size + 65535)

            for i in range(self.size):
                self.base[i] = ByteUtil.bytesHighFirstToInt(byte_arr, index)
                index += 4
                self.check[i] = ByteUtil.bytesHighFirstToInt(byte_arr, index)
                index += 4
        except Exception as e:
            return False

        return True
Exemplo n.º 4
0
    def loadDat(path):
        """
        从磁盘加载双数组
        :param path:
        :return:
        """
        try:
            byteArray = pickle.load(open(path + Predefine.PIC_EXT, 'rb'))
        except Exception as e:
            byteArray = ByteArray.createByteArray(path + Predefine.BIN_EXT)
            out = open(path + Predefine.PIC_EXT, 'wb')
            pickle.dump(byteArray, out)

        if byteArray is None:
            return False
        size = byteArray.nextInt()
        # 一种兼容措施,当size小于零表示文件头部储存了-size个用户词性
        if size < 0:
            pass
        attributes = [None] * size
        natureIndexArray = list(Nature)
        for i in range(size):
            # 第一个是全部词频,第二个是词性个数
            currentTotalFrequency = byteArray.nextInt()
            length = byteArray.nextInt()
            attributes[i] = CoreDictionary.Attribute().init1(length)
            attributes[i].totalFrequency = currentTotalFrequency
            for j in range(length):
                attributes[i].nature[j] = natureIndexArray[byteArray.nextInt()]
                attributes[i].frequency[j] = byteArray.nextInt()
        if not CustomDictionary.dat.load(byteArray, attributes):
            return False

        return True
Exemplo n.º 5
0
 def loadDat(path):
     """
     从磁盘加载双数组
     :param path:
     :return:
     """
     try:
         byteArray = cPickle.load(open(path + Predefine.PIC_EXT, 'rb'))
     except Exception, e:
         byteArray = ByteArray.createByteArray(path + Predefine.BIN_EXT)
         out = file(path + Predefine.PIC_EXT, 'wb')
         cPickle.dump(byteArray, out)
Exemplo n.º 6
0
 def loadDat(self):
     try:
         byteArray = cPickle.load(
             open(
                 JapanesePersonDictionary.path + Predefine.VALUE_EXT +
                 Predefine.PIC_EXT, 'rb'))
     except Exception, e:
         byteArray = ByteArray.createByteArray(
             JapanesePersonDictionary.path + Predefine.VALUE_EXT)
         out = file(
             JapanesePersonDictionary.path + Predefine.VALUE_EXT +
             Predefine.PIC_EXT, 'wb')
         cPickle.dump(byteArray, out)
    def loadDat(self):
        try:
            byteArray = pickle.load(
                open(JapanesePersonDictionary.path + Predefine.VALUE_EXT + Predefine.PIC_EXT, 'rb'))
        except Exception as e:
            byteArray = ByteArray.createByteArray(JapanesePersonDictionary.path + Predefine.VALUE_EXT)
            out = open(JapanesePersonDictionary.path + Predefine.VALUE_EXT + Predefine.PIC_EXT, 'wb')
            pickle.dump(byteArray, out)

        if byteArray is None:
            return False
        size = byteArray.nextInt()
        valueArray = [None] * size
        for i in range(len(valueArray)):
            valueArray[i] = chr(byteArray.nextChar())
        return JapanesePersonDictionary.trie.load1(JapanesePersonDictionary.path + Predefine.TRIE_EXT, valueArray)
Exemplo n.º 8
0
 def loadDat(self, path):
     """
     从磁盘加载双数组
     :param path:
     :return:
     """
     start = time()
     try:
         try:
             byteArray = pickle.load(open(path + Predefine.PIC_EXT, 'rb'))
         except Exception as e:
             byteArray = ByteArray().createByteArray(path + Predefine.BIN_EXT)
             out = open(path + Predefine.PIC_EXT, 'wb')
             pickle.dump(byteArray, out)
         if byteArray is None:
             return False
         size = byteArray.nextInt()
         # 列表,存储Attribute对象
         attributes = [None] * size
         natureIndexArray = list(Nature)
         for i in range(size):
             # 第一个是全部频次,第二个是词性个数
             currentTotalFrequency = byteArray.nextInt()
             length = byteArray.nextInt()
             attributes[i] = CoreDictionary.Attribute().init1(length)
             attributes[i].totalFrequency = currentTotalFrequency
             for j in range(length):
                 attributes[i].nature[j] = natureIndexArray[byteArray.nextInt()]
                 attributes[i].frequency[j] = byteArray.nextInt()
         if not self.trie.load(byteArray, attributes) or byteArray.hasMore():
             return False
     except Exception as e:
         self.logger.warning("读取失败,问题发生在%s" % (str(e)))
         return False
     print ("核心词典加载成功%s,耗时%fms" % (path + Predefine.BIN_EXT, (time() - start) * 1000))
     return True
Exemplo n.º 9
0
 def __init__(self):
     ByteArray.__init__(self)
     self.bufferSize = int()