Пример #1
0
    def __init__(self, lexicon, uniGrams, maxAffixLength, Treshold,
                 minWordFreq, mode, dbg):
        WordProb.__init__(self, debug=dbg)
        self.mode = mode
        self.UPsuffixTree = AffixTree(uniGrams, Treshold, maxAffixLength)
        self.UPprefixTree = AffixTree(uniGrams, Treshold, maxAffixLength)
        self.LOWsuffixTree = AffixTree(uniGrams, Treshold, maxAffixLength)
        self.LOWprefixTree = AffixTree(uniGrams, Treshold, maxAffixLength)
        self.CARprefixTree = AffixTree(uniGrams, Treshold, maxAffixLength)
        for wordEntry in lexicon:  # String
            word = wordEntry
            wordFreq = 0
            if len(word) == 0 or (word == "<STARTTAG>") or (word
                                                            == "<ENDTAG>"):
                continue

            for tagEntry in lexicon[wordEntry]:
                wordFreq += lexicon[wordEntry][tagEntry]

            # AffixTree
            suffixtree = None
            # AffixTree
            prefixtree = None
            if self.cardinalPattern.match(word) is not None:
                prefixtree = self.CARprefixTree
            else:
                # boolean
                isUpper = word[0].isupper()
                if self.mode == 1 or self.mode == 2:
                    suffixtree = java2python_runtime.ternary(
                        isUpper, self.UPsuffixTree, self.LOWsuffixTree)
                if self.mode == 0 or self.mode == 2:
                    prefixtree = java2python_runtime.ternary(
                        isUpper, self.UPprefixTree, self.LOWprefixTree)

            if wordFreq > minWordFreq:
                if suffixtree is not None:
                    suffixtree.addWord(word, lexicon[wordEntry])
                if prefixtree is not None:
                    prefixtree.addWord(self.reverseWord(word),
                                       lexicon[wordEntry])

        if self.UPsuffixTree is not None: self.UPsuffixTree.Pruning()
        if self.UPprefixTree is not None: self.UPprefixTree.Pruning()
        if self.LOWsuffixTree is not None: self.LOWsuffixTree.Pruning()
        if self.LOWprefixTree is not None: self.LOWprefixTree.Pruning()
        if self.CARprefixTree is not None: self.CARprefixTree.Pruning()
Пример #2
0
    def __init__(self, lexicon, uniGrams, maxAffixLength, Treshold, minWordFreq, mode, dbg):
        WordProb.__init__(self, debug=dbg)
        self.mode = mode
        self.UPsuffixTree = AffixTree(uniGrams, Treshold, maxAffixLength)
        self.UPprefixTree = AffixTree(uniGrams, Treshold, maxAffixLength)
        self.LOWsuffixTree = AffixTree(uniGrams, Treshold, maxAffixLength)
        self.LOWprefixTree = AffixTree(uniGrams, Treshold, maxAffixLength)
        self.CARprefixTree = AffixTree(uniGrams, Treshold, maxAffixLength)
        for wordEntry in lexicon: # String
            word = wordEntry
            wordFreq = 0
            if len(word) == 0 or (word == "<STARTTAG>") or (word == "<ENDTAG>"): 
                continue

            for tagEntry in lexicon[wordEntry]: 
                wordFreq += lexicon[wordEntry][tagEntry]

            # AffixTree
            suffixtree = None
            # AffixTree
            prefixtree = None
            if self.cardinalPattern.match(word) is not None: 
                prefixtree = self.CARprefixTree
            else:
                # boolean
                isUpper = word[0].isupper()
                if self.mode == 1 or self.mode == 2: 
                    suffixtree = java2python_runtime.ternary(isUpper, self.UPsuffixTree, self.LOWsuffixTree)
                if self.mode == 0 or self.mode == 2: 
                    prefixtree = java2python_runtime.ternary(isUpper, self.UPprefixTree, self.LOWprefixTree)

            if wordFreq > minWordFreq: 
                if suffixtree is not None: 
                    suffixtree.addWord(word, lexicon[wordEntry]);
                if prefixtree is not None: 
                    prefixtree.addWord(self.reverseWord(word), lexicon[wordEntry])


        if self.UPsuffixTree is not None: self.UPsuffixTree.Pruning()
        if self.UPprefixTree is not None: self.UPprefixTree.Pruning()
        if self.LOWsuffixTree is not None: self.LOWsuffixTree.Pruning()
        if self.LOWprefixTree is not None: self.LOWprefixTree.Pruning()
        if self.CARprefixTree is not None: self.CARprefixTree.Pruning()
Пример #3
0
    def tagProbs(self, word):
        """
        Returns Map<Integer, Double>
        Parameters:
            word: String
        """
        # AffixTree
        suffixtree = None
        # AffixTree
        prefixtree = None
        # Map<Integer, Double>
        preProbVec = None
        # Map<Integer, Double>
        sufProbVec = None
        if self.cardinalPattern.match(word) is not None:
            prefixtree = self.CARprefixTree
            preProbVec = prefixtree.affixTagProbs(word)
        else:
            # boolean
            isUpper = word[0].isupper()
            if self.mode == 1 or self.mode == 2:
                suffixtree = java2python_runtime.ternary(
                    isUpper, self.UPsuffixTree, self.LOWsuffixTree)
                sufProbVec = suffixtree.affixTagProbs(word)

            if self.mode == 0 or self.mode == 2:
                prefixtree = java2python_runtime.ternary(
                    isUpper, self.UPprefixTree, self.LOWprefixTree)
                preProbVec = prefixtree.affixTagProbs(self.reverseWord(word))

        if suffixtree is not None and prefixtree is not None:
            return self.CombinePreSuff(preProbVec, sufProbVec)
        else:
            if suffixtree == None:
                return self.ChangeIntoLog(preProbVec)
            else:
                return self.ChangeIntoLog(sufProbVec)
Пример #4
0
    def tagProbs(self, word):
        """
        Returns Map<Integer, Double>
        Parameters:
            word: String
        """
        # AffixTree
        suffixtree = None
        # AffixTree
        prefixtree = None
        # Map<Integer, Double>
        preProbVec = None
        # Map<Integer, Double>
        sufProbVec = None
        if self.cardinalPattern.match(word) is not None: 
            prefixtree = self.CARprefixTree
            preProbVec = prefixtree.affixTagProbs(word)
        else:
            # boolean
            isUpper = word[0].isupper();
            if self.mode == 1 or self.mode == 2: 
                suffixtree = java2python_runtime.ternary(isUpper, self.UPsuffixTree, self.LOWsuffixTree)
                sufProbVec = suffixtree.affixTagProbs(word)

            if self.mode == 0 or self.mode == 2: 
                prefixtree = java2python_runtime.ternary(isUpper, self.UPprefixTree, self.LOWprefixTree)
                preProbVec = prefixtree.affixTagProbs(self.reverseWord(word))


        if suffixtree is not None and prefixtree is not None: 
            return self.CombinePreSuff(preProbVec, sufProbVec)
        else:
            if suffixtree == None: 
                return self.ChangeIntoLog(preProbVec)
            else:
                return self.ChangeIntoLog(sufProbVec)