def test_segment_all_articles(self): title, sections, interlinks = next(segment_all_articles(self.fname, include_interlinks=True)) # Check title self.assertEqual(title, self.expected_title) # Check section titles section_titles = [s[0] for s in sections] self.assertEqual(section_titles, self.expected_section_titles) # Check text first_section_text = sections[0][1] first_sentence = "'''Anarchism''' is a political philosophy that advocates self-governed societies" self.assertTrue(first_sentence in first_section_text) # Check interlinks self.assertTrue(interlinks['self-governance'] == 'self-governed') self.assertTrue(interlinks['Hierarchy'] == 'hierarchical') self.assertTrue(interlinks['Pierre-Joseph Proudhon'] == 'Proudhon')
def test_segment_all_articles(self): title, sections, interlinks = next(segment_all_articles(self.fname, include_interlinks=True)) # Check title self.assertEqual(title, self.expected_title) # Check section titles section_titles = [s[0] for s in sections] self.assertEqual(section_titles, self.expected_section_titles) # Check text first_section_text = sections[0][1] first_sentence = "'''Anarchism''' is a political philosophy that advocates self-governed societies" self.assertTrue(first_sentence in first_section_text) # Check interlinks self.assertEqual(len(interlinks), 685) self.assertTrue(interlinks[0] == ("political philosophy", "political philosophy")) self.assertTrue(interlinks[1] == ("self-governance", "self-governed")) self.assertTrue(interlinks[2] == ("stateless society", "stateless societies"))
def test_generator_len(self): expected_num_articles = 106 num_articles = sum(1 for x in segment_all_articles(self.fname)) self.assertEqual(num_articles, expected_num_articles)