def test_LexiconLookup(self): LoadLexicon(dir_path + '/../../../fsa/X/defLexX.txt', lookupSource=LexiconLookupSource.defLex) LoadLexicon(dir_path + '/../../../fsa/X/defPlus.txt', lookupSource=LexiconLookupSource.defLex) Sentence="喝不惯" NodeList = Tokenization.Tokenize(Sentence) import ProcessSentence ProcessSentence.PrepareJSandJM(NodeList) LexiconLookup(NodeList, LexiconLookupSource.defLex) self.assertEqual(NodeList.size, 3) Sentence="李四" NodeList = Tokenization.Tokenize(Sentence) #import ProcessSentence ProcessSentence.PrepareJSandJM(NodeList) LexiconLookup(NodeList, LexiconLookupSource.defLex) self.assertEqual(NodeList.size, 3) self.assertFalse(utils.FeatureID_OOV in NodeList.head.features)
def LexicalAnalyzeTask( SubSentence, schema): NodeList = Tokenization.Tokenize(SubSentence) if not NodeList or NodeList.size == 0: return None, None Lexicon.ApplyLexiconToNodes(NodeList) # print("after ApplyLexiconToNodes" + OutputStringTokens_oneliner(NodeList)) PrepareJSandJM(NodeList) #Lexicon.LexiconoQoCLookup(NodeList) NodeList, Dag, WinningRules = DynamicPipeline(NodeList, schema) # t = Thread(target=Cache.WriteSentenceDB, args=(SubSentence, NodeList)) # t.start() return NodeList, Dag, WinningRules
def test_LogicCombined(self): """Logic Combined""" blocks = SeparateOrBlocks("a|b|c") self.assertEqual(len(blocks), 3) blocks = SeparateOrBlocks("a") self.assertEqual(len(blocks), 1) blocks = SeparateOrBlocks("'a|b'|c") self.assertEqual(len(blocks), 2) strtokenlist = Tokenization.Tokenize('d') RuleTokenList = [Rules.RuleToken()] self.assertTrue(LogicMatch(strtokenlist, 0, 'd', RuleTokenList, 0)) #strtokenlist = Tokenization.Tokenize("notfeature|'d'|notfeature2") self.assertTrue(LogicMatch(strtokenlist, 0, "notfeature|'d'|notfeature2", RuleTokenList, 0))
def test_ApplyWordLengthFeature(self): Sentence="李四abc456,sab98中文" NodeList = Tokenization.Tokenize(Sentence) ApplyLexiconToNodes(NodeList) self.assertTrue(C1ID in NodeList.head.features) self.assertTrue(D1ID in NodeList.get(1).features)