示例#1
0
 def test_sequence_length_two_returns_distinct_observed_ngrams(self):
     sequences = create_sequence_list(self.short_corpus, 2)
     self.assertEqual(set(sequences), set([
         ('alpha', 'beta'),
         ('beta', 'gamma'),
         ('alef','bet'),
         ('bet','gimel')]))
示例#2
0
 def test_sequence_length_one_returns_distinct_words(self):
     sequences = create_sequence_list(self.corpus, 1)
     self.assertEqual(set(sequences), set([(x,) for y in self.corpus for x in y]))
     self.assertEqual(len(sequences), len(set([(x,) for y in self.corpus for x in y])))