def test_keyword_in_context_ignore_case(): for keyword in ('All', 'all'): results = list(text_utils.keyword_in_context( TEXT, keyword, ignore_case=False, window_width=50, print_only=False)) for pre, kw, post in results: assert kw == keyword # also test for a null result, bc of case results = list(text_utils.keyword_in_context( TEXT, 'clinton', ignore_case=False, window_width=50, print_only=False)) assert results == []
def test_keyword_in_context_unicode(self): text = 'No llores porque ya se terminó, sonríe porque sucedió.' observed = list(text_utils.keyword_in_context( text, 'terminó', print_only=False)) expected = [('No llores porque ya se ', 'terminó', ', sonríe porque sucedió.')] for o, e in zip(observed, expected): self.assertEqual(o, e)
def test_keyword_in_context_unicode(): keyword = 'terminó' results = list(text_utils.keyword_in_context( 'No llores porque ya se terminó, sonríe porque sucedió.', keyword, print_only=False)) for pre, kw, post in results: assert kw == keyword
def test_keyword_in_context_width(self): observed = list(text_utils.keyword_in_context( TEXT, 'clinton', ignore_case=True, window_width=10, print_only=False)) expected = [ ('when Bill ', 'Clinton', ' was elect'), ('d Hillary ', 'Clinton', ' have pled')] for o, e in zip(observed, expected): self.assertEqual(o, e)
def test_keyword_in_context(self): observed = list(text_utils.keyword_in_context( TEXT, 'clinton', ignore_case=True, window_width=50, print_only=False)) expected = [ ('rtunes has been stark. Two decades ago, when Bill ', 'Clinton', ' was elected president, the 400 highest-earning ta'), (' While Democrats like Bernie Sanders and Hillary ', 'Clinton', ' have pledged to raise taxes on these voters, virt')] for o, e in zip(observed, expected): self.assertEqual(o, e)
def test_keyword_in_context_window_width(): for window_width in (10, 20): results = list(text_utils.keyword_in_context( TEXT, 'clinton', ignore_case=True, print_only=False, window_width=window_width)) for pre, kw, post in results: assert len(pre) <= window_width assert len(post) <= window_width
def test_keyword_in_context_ignore_case(self): for keyword in ('All', 'all'): results = list( text_utils.keyword_in_context(TEXT, keyword, ignore_case=False, window_width=50, print_only=False)) for pre, kw, post in results: self.assertEqual(kw, keyword) # also test for a null result, bc of case results = list( text_utils.keyword_in_context(TEXT, 'clinton', ignore_case=False, window_width=50, print_only=False)) self.assertEqual(results, [])
def test_keyword_in_context_case(self): observed = list( text_utils.keyword_in_context(TEXT, 'clinton', ignore_case=False, window_width=50, print_only=False)) expected = [] for o, e in zip(observed, expected): self.assertEqual(o, e)
def test_keyword_in_context_keyword(): for keyword in ("clinton", "all"): results = list( text_utils.keyword_in_context(TEXT, keyword, ignore_case=True, window_width=50, print_only=False)) for pre, kw, post in results: assert kw.lower() == keyword
def test_keyword_in_context_keyword(self): for keyword in ('clinton', 'all'): results = list( text_utils.keyword_in_context(TEXT, keyword, ignore_case=True, window_width=50, print_only=False)) for pre, kw, post in results: self.assertEqual(kw.lower(), keyword)
def test_plaintext_functionality(text): preprocessed_text = preprocess_text(text, lowercase=True, no_punct=True)[:100] assert all(char.islower() for char in preprocessed_text if char.isalpha()) assert all(char.isalnum() or char.isspace() for char in preprocessed_text) keyword = 'America' kwics = text_utils.keyword_in_context(text, keyword, window_width=35, print_only=False) for pre, kw, post in kwics: assert kw == keyword assert isinstance(pre, compat.unicode_) assert isinstance(post, compat.unicode_)
def test_plaintext_functionality(text): preprocessed_text = preprocessing.normalize_whitespace(text) preprocessed_text = preprocessing.remove_punctuation(text) preprocessed_text = preprocessed_text.lower() assert all(char.islower() for char in preprocessed_text if char.isalpha()) assert all(char.isalnum() or char.isspace() for char in preprocessed_text) keyword = "America" kwics = text_utils.keyword_in_context(text, keyword, window_width=35, print_only=False) for pre, kw, post in kwics: assert kw == keyword assert isinstance(pre, compat.unicode_) assert isinstance(post, compat.unicode_)
def test_keyword_in_context(text, keyword, ignore_case, window_width, has_results): results = list( text_utils.keyword_in_context( text, keyword, ignore_case=ignore_case, window_width=window_width, print_only=False, )) # check if any results if has_results: assert results else: assert not results for pre, kw, post in results: # check kw match by case if ignore_case is True: assert kw.lower() == keyword.lower() else: assert kw == keyword # check pre/post window widths assert len(pre) <= window_width assert len(post) <= window_width
def test_keyword_in_context_case(self): observed = list(text_utils.keyword_in_context( TEXT, 'clinton', ignore_case=False, window_width=50, print_only=False)) expected = [] for o, e in zip(observed, expected): self.assertEqual(o, e)