def test_iter_windows_include_below_window_size(self): texts = [['this', 'is', 'a'], ['test', 'document']] out = utils.iter_windows(texts, 3, ignore_below_size=False) windows = [list(w) for w in out] self.assertEqual(texts, windows) out = utils.iter_windows(texts, 3) windows = [list(w) for w in out] self.assertEqual([texts[0]], windows)
def test_iter_windows_uses_views(self): texts = [np.array(['this', 'is', 'a'], dtype='object'), ['test', 'document']] windows = list(utils.iter_windows(texts, 2)) list_windows = [list(iterable) for iterable in windows] expected = [['this', 'is'], ['is', 'a'], ['test', 'document']] self.assertListEqual(list_windows, expected) windows[0][0] = 'modified' self.assertEqual('modified', texts[0][0])
def accumulate(self, texts, window_size): relevant_texts = self._iter_texts(texts) windows = utils.iter_windows( relevant_texts, window_size, ignore_below_size=False, include_doc_num=True) for doc_num, virtual_document in windows: self.analyze_text(virtual_document, doc_num) self.num_docs += 1 return self
def test_iter_windows_with_copy(self): texts = [ np.array(['this', 'is', 'a'], dtype='object'), np.array(['test', 'document'], dtype='object') ] windows = list(utils.iter_windows(texts, 2, copy=True)) windows[0][0] = 'modified' self.assertEqual('this', texts[0][0]) windows[2][0] = 'modified' self.assertEqual('test', texts[1][0])
def test_iter_windows_list_texts(self): texts = [['this', 'is', 'a'], ['test', 'document']] windows = list(utils.iter_windows(texts, 2)) list_windows = [list(iterable) for iterable in windows] expected = [['this', 'is'], ['is', 'a'], ['test', 'document']] self.assertListEqual(list_windows, expected)