def __init__(self, lexicon, uniGrams, maxAffixLength, Treshold, minWordFreq, mode, dbg): WordProb.__init__(self, debug=dbg) self.mode = mode self.UPsuffixTree = AffixTree(uniGrams, Treshold, maxAffixLength) self.UPprefixTree = AffixTree(uniGrams, Treshold, maxAffixLength) self.LOWsuffixTree = AffixTree(uniGrams, Treshold, maxAffixLength) self.LOWprefixTree = AffixTree(uniGrams, Treshold, maxAffixLength) self.CARprefixTree = AffixTree(uniGrams, Treshold, maxAffixLength) for wordEntry in lexicon: # String word = wordEntry wordFreq = 0 if len(word) == 0 or (word == "<STARTTAG>") or (word == "<ENDTAG>"): continue for tagEntry in lexicon[wordEntry]: wordFreq += lexicon[wordEntry][tagEntry] # AffixTree suffixtree = None # AffixTree prefixtree = None if self.cardinalPattern.match(word) is not None: prefixtree = self.CARprefixTree else: # boolean isUpper = word[0].isupper() if self.mode == 1 or self.mode == 2: suffixtree = java2python_runtime.ternary( isUpper, self.UPsuffixTree, self.LOWsuffixTree) if self.mode == 0 or self.mode == 2: prefixtree = java2python_runtime.ternary( isUpper, self.UPprefixTree, self.LOWprefixTree) if wordFreq > minWordFreq: if suffixtree is not None: suffixtree.addWord(word, lexicon[wordEntry]) if prefixtree is not None: prefixtree.addWord(self.reverseWord(word), lexicon[wordEntry]) if self.UPsuffixTree is not None: self.UPsuffixTree.Pruning() if self.UPprefixTree is not None: self.UPprefixTree.Pruning() if self.LOWsuffixTree is not None: self.LOWsuffixTree.Pruning() if self.LOWprefixTree is not None: self.LOWprefixTree.Pruning() if self.CARprefixTree is not None: self.CARprefixTree.Pruning()
def __init__(self, lexicon, uniGrams, maxAffixLength, Treshold, minWordFreq, mode, dbg): WordProb.__init__(self, debug=dbg) self.mode = mode self.UPsuffixTree = AffixTree(uniGrams, Treshold, maxAffixLength) self.UPprefixTree = AffixTree(uniGrams, Treshold, maxAffixLength) self.LOWsuffixTree = AffixTree(uniGrams, Treshold, maxAffixLength) self.LOWprefixTree = AffixTree(uniGrams, Treshold, maxAffixLength) self.CARprefixTree = AffixTree(uniGrams, Treshold, maxAffixLength) for wordEntry in lexicon: # String word = wordEntry wordFreq = 0 if len(word) == 0 or (word == "<STARTTAG>") or (word == "<ENDTAG>"): continue for tagEntry in lexicon[wordEntry]: wordFreq += lexicon[wordEntry][tagEntry] # AffixTree suffixtree = None # AffixTree prefixtree = None if self.cardinalPattern.match(word) is not None: prefixtree = self.CARprefixTree else: # boolean isUpper = word[0].isupper() if self.mode == 1 or self.mode == 2: suffixtree = java2python_runtime.ternary(isUpper, self.UPsuffixTree, self.LOWsuffixTree) if self.mode == 0 or self.mode == 2: prefixtree = java2python_runtime.ternary(isUpper, self.UPprefixTree, self.LOWprefixTree) if wordFreq > minWordFreq: if suffixtree is not None: suffixtree.addWord(word, lexicon[wordEntry]); if prefixtree is not None: prefixtree.addWord(self.reverseWord(word), lexicon[wordEntry]) if self.UPsuffixTree is not None: self.UPsuffixTree.Pruning() if self.UPprefixTree is not None: self.UPprefixTree.Pruning() if self.LOWsuffixTree is not None: self.LOWsuffixTree.Pruning() if self.LOWprefixTree is not None: self.LOWprefixTree.Pruning() if self.CARprefixTree is not None: self.CARprefixTree.Pruning()
def tagProbs(self, word): """ Returns Map<Integer, Double> Parameters: word: String """ # AffixTree suffixtree = None # AffixTree prefixtree = None # Map<Integer, Double> preProbVec = None # Map<Integer, Double> sufProbVec = None if self.cardinalPattern.match(word) is not None: prefixtree = self.CARprefixTree preProbVec = prefixtree.affixTagProbs(word) else: # boolean isUpper = word[0].isupper() if self.mode == 1 or self.mode == 2: suffixtree = java2python_runtime.ternary( isUpper, self.UPsuffixTree, self.LOWsuffixTree) sufProbVec = suffixtree.affixTagProbs(word) if self.mode == 0 or self.mode == 2: prefixtree = java2python_runtime.ternary( isUpper, self.UPprefixTree, self.LOWprefixTree) preProbVec = prefixtree.affixTagProbs(self.reverseWord(word)) if suffixtree is not None and prefixtree is not None: return self.CombinePreSuff(preProbVec, sufProbVec) else: if suffixtree == None: return self.ChangeIntoLog(preProbVec) else: return self.ChangeIntoLog(sufProbVec)
def tagProbs(self, word): """ Returns Map<Integer, Double> Parameters: word: String """ # AffixTree suffixtree = None # AffixTree prefixtree = None # Map<Integer, Double> preProbVec = None # Map<Integer, Double> sufProbVec = None if self.cardinalPattern.match(word) is not None: prefixtree = self.CARprefixTree preProbVec = prefixtree.affixTagProbs(word) else: # boolean isUpper = word[0].isupper(); if self.mode == 1 or self.mode == 2: suffixtree = java2python_runtime.ternary(isUpper, self.UPsuffixTree, self.LOWsuffixTree) sufProbVec = suffixtree.affixTagProbs(word) if self.mode == 0 or self.mode == 2: prefixtree = java2python_runtime.ternary(isUpper, self.UPprefixTree, self.LOWprefixTree) preProbVec = prefixtree.affixTagProbs(self.reverseWord(word)) if suffixtree is not None and prefixtree is not None: return self.CombinePreSuff(preProbVec, sufProbVec) else: if suffixtree == None: return self.ChangeIntoLog(preProbVec) else: return self.ChangeIntoLog(sufProbVec)