def parse(self, text): tokens = re.split('\s+', text) for wnum in xrange(0, len(tokens)): for ng_ord in xrange(1, self.max_order + 1): if wnum + ng_ord < len(tokens): words_tuple = tuple(tokens[wnum:wnum + ng_ord]) ngram = self.storage_.get_n_gram(words_tuple) if ngram == None: ngram = Ngram(1) else: ngram.count = ngram.count + 1 self.storage_.set_n_gram(words_tuple, ngram)
def mock_ngram(self, string, count, frequency, sig_score): ngram = Ngram(string) ngram.count = count ngram.frequency = frequency ngram.sig_score = sig_score return ngram