Пример #1
0
    def test_LexiconLookup(self):
        LoadLexicon(dir_path + '/../../../fsa/X/defLexX.txt', lookupSource=LexiconLookupSource.defLex)
        LoadLexicon(dir_path + '/../../../fsa/X/defPlus.txt', lookupSource=LexiconLookupSource.defLex)

        Sentence="喝不惯"
        NodeList = Tokenization.Tokenize(Sentence)
        import ProcessSentence
        ProcessSentence.PrepareJSandJM(NodeList)
        LexiconLookup(NodeList, LexiconLookupSource.defLex)
        self.assertEqual(NodeList.size, 3)

        Sentence="李四"
        NodeList = Tokenization.Tokenize(Sentence)
        #import ProcessSentence
        ProcessSentence.PrepareJSandJM(NodeList)
        LexiconLookup(NodeList, LexiconLookupSource.defLex)
        self.assertEqual(NodeList.size, 3)
        self.assertFalse(utils.FeatureID_OOV in NodeList.head.features)
Пример #2
0
def LexicalAnalyzeTask( SubSentence, schema):

    NodeList = Tokenization.Tokenize(SubSentence)
    if not NodeList or NodeList.size == 0:
        return None, None

    Lexicon.ApplyLexiconToNodes(NodeList)
    # print("after ApplyLexiconToNodes" + OutputStringTokens_oneliner(NodeList))

    PrepareJSandJM(NodeList)
    #Lexicon.LexiconoQoCLookup(NodeList)

    NodeList, Dag, WinningRules = DynamicPipeline(NodeList, schema)
        # t = Thread(target=Cache.WriteSentenceDB, args=(SubSentence, NodeList))
        # t.start()

    return NodeList, Dag, WinningRules
Пример #3
0
    def test_LogicCombined(self):
        """Logic Combined"""

        blocks = SeparateOrBlocks("a|b|c")
        self.assertEqual(len(blocks), 3)

        blocks = SeparateOrBlocks("a")
        self.assertEqual(len(blocks), 1)

        blocks = SeparateOrBlocks("'a|b'|c")
        self.assertEqual(len(blocks), 2)


        strtokenlist = Tokenization.Tokenize('d')
        RuleTokenList = [Rules.RuleToken()]

        self.assertTrue(LogicMatch(strtokenlist, 0, 'd', RuleTokenList, 0))

        #strtokenlist = Tokenization.Tokenize("notfeature|'d'|notfeature2")
        self.assertTrue(LogicMatch(strtokenlist, 0, "notfeature|'d'|notfeature2", RuleTokenList, 0))
Пример #4
0
 def test_ApplyWordLengthFeature(self):
     Sentence="李四abc456,sab98中文"
     NodeList = Tokenization.Tokenize(Sentence)
     ApplyLexiconToNodes(NodeList)
     self.assertTrue(C1ID in NodeList.head.features)
     self.assertTrue(D1ID in NodeList.get(1).features)