def get(self, symbols): """ Estimates the perplexity of a vector of symbols. @return float value """ exr = symbols_to_items(symbols, self.ngram) entropy = 0 for symbol, occurrences in exr.items(): realsymbol = " ".join(symbol).strip() probability = self.model.get(realsymbol, self.epsilon) self_information = log2(1.0 / probability) entropy += (probability * self_information) * occurrences return pow(2, entropy)
def get(self): """ Estimates the Shannon entropy of a vector of symbols. Shannon's entropy measures the information contained in a message as opposed to the portion of the message that is determined (or predictable). @return float value """ exr = symbols_to_items(self.symbols, self.ngram) total = len(self.symbols) - self.ngram + 1 entropy = 0 for symbol,occurrences in exr.items(): probability = 1.0 * occurrences / total self_information = log2( 1.0 / probability ) entropy += (probability * self_information) return entropy