Exemplo n.º 1
0
 def roleTag(vertexList, wordNetAll):
     tagList = []
     for vertex in vertexList:
         if Nature.ns == vertex.getNature(
         ) and vertex.getAttribute().totalFrequency <= 1000:
             # 二字地名,认为其可以再接一个后缀或前缀
             if len(vertex.realword) < 3:
                 nsEnumItem = EnumItem().init2(NS.H, NS.G).labelMap.items()
                 tagList.append(nsEnumItem)
             # 否则只可以再加后缀
             else:
                 nsEnumItem = EnumItem().init2(NS.G).labelMap.items()
                 tagList.append(nsEnumItem)
             continue
         # 此处用等效词,更加精准
         NSEnumItem = PlaceDictionary.dictionary.get(vertex.word)
         if NSEnumItem is not None:
             NSEnumItem = sorted(NSEnumItem,
                                 key=itemgetter(1),
                                 reverse=True)
         if NSEnumItem is None:
             NSEnumItem = EnumItem().init1(
                 NS.Z,
                 PlaceDictionary.transformMatrixDictionary.
                 getTotalFrequency(NS.Z)).labelMap.items()
         tagList.append(NSEnumItem)
     return tagList
Exemplo n.º 2
0
    def onLoadValue(self, path):
        valueArray = self.loadDat1(path + '.value.dat')

        if valueArray is not None:
            return valueArray

        valueList = []
        line = None

        try:
            br = open(path, 'r')
            while 1:
                line = br.readline().strip(' \n\t\r')
                if not line:
                    break
                args = EnumItem.create(line)
                nrEnumItem = EnumItem()
                for e in args.values()[0]:
                    nrEnumItem = nrEnumItem.init1(NS.valueOf(e.keys()[0]),
                                                  int(e.values()[0]))
                valueList.append(nrEnumItem.labelMap.items())
            self.onSaveValue(valueList, path)
        except Exception, e:
            self.logger.error("读取%s失败[%s]\n该词典这一行格式不对:%s" %
                              (path, str(e), line))
            return None
Exemplo n.º 3
0
 def loadDat1(self, path):
     try:
         bytes = pickle.load(open(path + Predefine.PIC_EXT, 'rb'))
     except Exception as e:
         bytes = IOUtil().readBytes(path)
         out = open(path + Predefine.PIC_EXT, 'wb')
         pickle.dump(bytes, out)
     if bytes is None:
         return None
     nsArray = list(NT)
     index = 0
     size = ByteUtil.bytesHighFirstToInt(bytes, index)
     index += 4
     valueArray = [None] * size
     item = None
     for i in range(size):
         currentSize = ByteUtil.bytesHighFirstToInt(bytes, index)
         index += 4
         item = EnumItem()
         tm_dict = {}
         for j in range(currentSize):
             ns = nsArray[ByteUtil.bytesHighFirstToInt(bytes, index)]
             index += 4
             frequency = ByteUtil.bytesHighFirstToInt(bytes, index)
             index += 4
             item = item.init1(ns, frequency)
         valueArray[i] = item.labelMap.items()
     return valueArray
Exemplo n.º 4
0
    def roleTag(vertexList, wordNetAll):
        tagList = []

        for vertex in vertexList:

            nature = vertex.guessNature()

            if nature == Nature.nrf:
                if vertex.getAttribute().totalFrequency <= 1000:
                    ntEnumItem = EnumItem().init1(NT.F, 1000).labelMap.items()
                    tagList.append(ntEnumItem)
                    continue
            elif nature in [Nature.ni, Nature.nic, Nature.nis, Nature.nit]:
                initdict = {NT.K: 1000, NT.D: 1000}
                ntEnumItem = EnumItem().init4(initdict).labelMap.items()
                tagList.append(ntEnumItem)
                continue
            elif nature == Nature.m:
                ntEnumItem = EnumItem().init1(NT.M, 1000).labelMap.items()
                tagList.append(ntEnumItem)
                continue

            # 此处用等效词,更加精准
            NTEnumItem = OrganizationDictionary.dictionary.get(vertex.word)
            if NTEnumItem is not None:
                NTEnumItem = sorted(NTEnumItem,
                                    key=itemgetter(1),
                                    reverse=True)
            if NTEnumItem is None:
                NTEnumItem = EnumItem().init1(
                    NT.Z,
                    OrganizationDictionary.transformMatrixDictionary.
                    getTotalFrequency(NT.Z)).labelMap.items()
            tagList.append(NTEnumItem)
        return tagList
Exemplo n.º 5
0
 def isBadCase(name):
     """
     因为任何算法都无法解决100%的问题,总是有一些bad case,这些bad case会以“盖公章 A 1”的形式加入词典中<BR>
     这个方法返回是否是bad case
     :param name:
     :return:
     """
     nrEnumItem = None
     place_list = PlaceDictionary.dictionary.get(name)
     if place_list is not None:
         initdict = dict(place_list)
         nrEnumItem = EnumItem().init3(initdict)
     if nrEnumItem is None:
         return False
     return nrEnumItem.containsLabel(NS.Z)
Exemplo n.º 6
0
    def roleObserve(wordSegResult):
        """
        角色观察(从模型中加载所有词语对应的角色,允许规则补充)
        :param word_seg_result 粗分结果
        """

        tagList = []
        for vertex in wordSegResult:
            nrEnumItem = PersonDictionary.dictionary.get(vertex.realword)
            if nrEnumItem is None:
                for case in Switch(vertex.guessNature()):
                    if case(Nature.nr):
                        # 有些双名实际上可以构成更长的三名
                        if vertex.getAttribute(
                        ).totalFrequency <= 1000 and len(vertex.realword) == 2:
                            nrEnumItem = EnumItem().init2(
                                NR.X, NR.G).labelMap.items()
                        else:
                            nrEnumItem = EnumItem().init1(
                                NR.A,
                                PersonDictionary.transformMatrixDictionary.
                                getTotalFrequency(NR.A)).labelMap.items()
                        break
                    if case(Nature.nnt):
                        # 姓+职位
                        nrEnumItem = EnumItem().init2(NR.G,
                                                      NR.K).labelMap.items()
                        break
                    if case():
                        # nrEnumItem = [(NR.A, PersonDictionary.transformMatrixDictionary.getTotalFrequency(NR.A))]
                        nrEnumItem = EnumItem().init1(
                            NR.A,
                            PersonDictionary.transformMatrixDictionary.
                            getTotalFrequency(NR.A)).labelMap.items()
                        break
            tagList.append(nrEnumItem)
        return tagList
Exemplo n.º 7
0
 def insert(vertexList, tagList, wordNetAll, line, ns):
     vertex = wordNetAll.getFirst(line)
     assert vertex is not None
     vertexList.append(vertex)
     tagList.append(EnumItem().init1(ns, 1000))
Exemplo n.º 8
0
        except Exception, e:
            bytes = IOUtil().readBytes(path)
            out = file(path + Predefine.PIC_EXT, 'wb')
            cPickle.dump(bytes, out)
        if bytes is None:
            return None
        nsArray = list(NS)
        index = 0
        size = ByteUtil.bytesHighFirstToInt(bytes, index)
        index += 4
        valueArray = [None] * size
        item = None
        for i in range(size):
            currentSize = ByteUtil.bytesHighFirstToInt(bytes, index)
            index += 4
            item = EnumItem()
            tm_dict = {}
            for j in range(currentSize):
                ns = nsArray[ByteUtil.bytesHighFirstToInt(bytes, index)]
                index += 4
                frequency = ByteUtil.bytesHighFirstToInt(bytes, index)
                index += 4
                item = item.init1(ns, frequency)
            valueArray[i] = item.labelMap.items()
        return valueArray

    def onSaveValue(self, valueArray, path):
        return self.saveDat(path + '.value.dat', valueArray)

    def saveDat(self, path, valueArray):
        try: