Пример #1
0
class NRConstant(object):
    # 本词典专注的词的ID
    WORD_ID = CoreDictionary.getWordID(Predefine.TAG_PEOPLE)
    # 本词典专注的词的属性
    ATTRIBUTE = CoreDictionary.get2(WORD_ID)
Пример #2
0
 def newE():
     return Vertex().initVertex(
         Predefine.TAG_END, ' ',
         CoreDictionary.Attribute().init3(Nature.end,
                                          Predefine.MAX_FREQUENCY / 10),
         CoreDictionary.getWordID(Predefine.TAG_END))
Пример #3
0
class PlaceDictionary(object):
    # 地名词典
    dictionary = NSDictionary()
    # 转移矩阵词典
    transformMatrixDictionary = TransformMatrixDictionary()
    # AC算法用到的Trie树
    trie = AhoCorasickDoubleArrayTrie()
    # 本词典专注的词的ID
    WORD_ID = CoreDictionary.getWordID(Predefine.TAG_PLACE)
    # 本词典专注的词的属性
    ATTRIBUTE = CoreDictionary.get2(WORD_ID)

    def __init__(self):
        self.load()

    def load(self):
        start = time()
        PlaceDictionary.dictionary.load(Config.PlaceDictionaryPath)
        Predefine.logger.info("%s加载成功,耗时%fms" % (Config.PlaceDictionaryPath,
                                                 (time() - start) * 1000))
        print "%s加载成功,耗时%fms" % (Config.PlaceDictionaryPath,
                                 (time() - start) * 1000)
        PlaceDictionary.transformMatrixDictionary = PlaceDictionary.transformMatrixDictionary.init1(
            NS)
        PlaceDictionary.transformMatrixDictionary.load(
            Config.PlaceDictionaryTrPath)
        init_dict = {}
        init_dict["CDEH"] = "CDEH"
        init_dict["CDH"] = "CDH"
        init_dict["CH"] = "CH"
        init_dict["GH"] = "GH"
        PlaceDictionary.trie.build(TreeMap(init_dict))

    @staticmethod
    def parsePattern(nsList, vertexList, wordNetOptimum, wordNetAll, pld_obj):
        """
        模式匹配
        :param nsList: 确定的标注序列
        :param wordNetOptimum: 原始的未加角色标注的序列
        :param wordNetAll: 待优化的图
        :return:
        """
        sbPattern = ""
        for ns in nsList:
            sbPattern += str(ns)
        pattern = str(sbPattern)
        wordList = []
        for i in range(len(vertexList)):
            wordList.append(vertexList[i].realword)
        wordArray = np.array(wordList)
        PlaceDictionary.trie.parseText1(pattern, wordArray, pld_obj,
                                        wordNetOptimum, wordNetAll)

    @staticmethod
    def isBadCase(name):
        """
        因为任何算法都无法解决100%的问题,总是有一些bad case,这些bad case会以“盖公章 A 1”的形式加入词典中<BR>
        这个方法返回是否是bad case
        :param name:
        :return:
        """
        nrEnumItem = None
        place_list = PlaceDictionary.dictionary.get(name)
        if place_list is not None:
            initdict = dict(place_list)
            nrEnumItem = EnumItem().init3(initdict)
        if nrEnumItem is None:
            return False
        return nrEnumItem.containsLabel(NS.Z)
Пример #4
0
 def newB():
     return Vertex().initVertex(
         Predefine.TAG_BIGIN, ' ',
         CoreDictionary.Attribute().init3(Nature.begin,
                                          Predefine.MAX_FREQUENCY / 10),
         CoreDictionary.getWordID(Predefine.TAG_BIGIN))