def ngram_tuples(n, string, minlen=3, maxlen=25): return tlz.pipe(string, utils.lower, utils.splitter_of_words, utils.filter_whitespace, utils.filter_shorter_than(minlen), utils.filter_longer_than(maxlen), utils.filter_stopwords, sliding_window_c(n))
def test__filter_longer_than(tokenset, mintokenlen, count): length = tlz.pipe(tokenset, utils.filter_longer_than(mintokenlen), list, len) assert (length == count)
def test__filter_longer_than(tokenset, mintokenlen, count): length = tlz.pipe(tokenset, utils.filter_longer_than(mintokenlen), list, len) assert(length == count)