示例#1
0
    def get(self, symbols):
        """
        Estimates the perplexity of a vector of symbols.

        @return float value

        """
        exr = symbols_to_items(symbols, self.ngram)
        entropy = 0

        for symbol, occurrences in exr.items():

            realsymbol = " ".join(symbol).strip()
            probability = self.model.get(realsymbol, self.epsilon)
            self_information = log2(1.0 / probability)
            entropy += (probability * self_information) * occurrences

        return pow(2, entropy)
示例#2
0
    def get(self):
        """
        Estimates the Shannon entropy of a vector of symbols.
        Shannon's entropy measures the information contained in a message as
        opposed to the portion of the message that is determined
        (or predictable).

        @return float value

        """
        exr = symbols_to_items(self.symbols, self.ngram)
        total = len(self.symbols) - self.ngram + 1
        entropy = 0

        for symbol,occurrences in exr.items():

            probability = 1.0 * occurrences / total
            self_information = log2( 1.0 / probability )
            entropy += (probability * self_information)

        return entropy