def setUp(self): super(TestPageExtractor, self).setUp() self.visitor = PageVisitor()
class TestPageExtractor(BaseTestCase): def setUp(self): super(TestPageExtractor, self).setUp() self.visitor = PageVisitor() def assert_extract( self, page, specs=None, compat=None, footnotes=None, issues=None): parsed = kumascript_grammar.parse(page) elements = self.visitor.visit(parsed) extractor = PageExtractor(elements=elements, feature=self.feature) extracted = extractor.extract() self.assertEqual(specs or [], extracted['specs']) self.assertEqual(compat or [], extracted['compat']) self.assertEqual(footnotes, extracted['footnotes']) self.assertEqual(issues or [], extracted['issues']) def test_valid_spec_section(self): sample_spec_section, expected_specs = self.get_sample_specs() self.assert_extract(sample_spec_section, specs=expected_specs) def test_invalid_spec_section(self): page = '<h2>Specifications</h2><p>Incomplete</p>' self.assert_extract(page, issues=[('skipped_content', 23, 40, {})]) def test_valid_compat_section(self): sample_compat_section, expected_compat = self.get_sample_compat() self.assert_extract(sample_compat_section, compat=expected_compat) def test_invalid_compat_section(self): page = '<h2>Browser Compatibility</h2><p>Not present</p>' self.assert_extract(page, issues=[('skipped_content', 30, 48, {})]) def test_full_page(self): sample_spec_section, expected_specs = self.get_sample_specs() sample_compat_section, expected_compat = self.get_sample_compat() page = """\ <p>Some lead content</p> <h2>Other Text</h2> <p>Here's some other content</p> %s %s <h2>Other Pages</h2> <p>See <a href="/foo">foo</a></p> """ % (sample_spec_section, sample_compat_section) self.assert_extract(page, specs=expected_specs, compat=expected_compat) def test_full_page_reversed_sections(self): # https://bugzilla.mozilla.org/show_bug.cgi?id=1175177 # https://developer.mozilla.org/en-US/docs/Web/API/Blob/slice sample_spec_section, expected_specs = self.get_sample_specs() sample_compat_section, expected_compat = self.get_sample_compat() page = """\ <p>Some lead content</p> <h2>Other Text</h2> <p>Here's some other content</p> <h3>More Detail</h3> <p>Trigger <h3> check not in a compat section.</p> %s %s <h2>Other Pages</h2> <p>See <a href="/foo">foo</a></p> """ % (sample_compat_section, sample_spec_section) self.assert_extract(page, specs=expected_specs, compat=expected_compat) def test_compat_h3(self): # https://developer.mozilla.org/en-US/docs/Web/API/MozContact/key sample_compat_section, expected_compat = self.get_sample_compat() page = ( sample_compat_section + '<h3 id="Gecko Gecko">Gecko Note</h3>\n<p>A note</p>') issues = [('skipped_h3', 354, 390, {'h3': 'Gecko Note'})] self.assert_extract(page, compat=expected_compat, issues=issues) def test_div_wrapped(self): # https://developer.mozilla.org/en-US/docs/Web/API/Document/execCommand sample_spec_section, expected_specs = self.get_sample_specs() sample_compat_section, expected_compat = self.get_sample_compat() page = """\ <p>Some lead content</p> <div> %s %s </div> """ % (sample_spec_section, sample_compat_section) issue = ('no_data', 0, 24, {}) self.assert_extract(page, issues=[issue])
class TestPageExtractor(BaseTestCase): def setUp(self): super(TestPageExtractor, self).setUp() self.visitor = PageVisitor() def assert_extract(self, page, specs=None, compat=None, footnotes=None, issues=None): parsed = kumascript_grammar.parse(page) elements = self.visitor.visit(parsed) extractor = PageExtractor(elements=elements, feature=self.feature) extracted = extractor.extract() self.assertEqual(specs or [], extracted['specs']) self.assertEqual(compat or [], extracted['compat']) self.assertEqual(footnotes, extracted['footnotes']) self.assertEqual(issues or [], extracted['issues']) def test_valid_spec_section(self): sample_spec_section, expected_specs = self.get_sample_specs() self.assert_extract(sample_spec_section, specs=expected_specs) def test_invalid_spec_section(self): page = '<h2>Specifications</h2><p>Incomplete</p>' self.assert_extract(page, issues=[('skipped_content', 23, 40, {})]) def test_valid_compat_section(self): sample_compat_section, expected_compat = self.get_sample_compat() self.assert_extract(sample_compat_section, compat=expected_compat) def test_invalid_compat_section(self): page = '<h2>Browser Compatibility</h2><p>Not present</p>' self.assert_extract(page, issues=[('skipped_content', 30, 48, {})]) def test_full_page(self): sample_spec_section, expected_specs = self.get_sample_specs() sample_compat_section, expected_compat = self.get_sample_compat() page = """\ <p>Some lead content</p> <h2>Other Text</h2> <p>Here's some other content</p> %s %s <h2>Other Pages</h2> <p>See <a href="/foo">foo</a></p> """ % (sample_spec_section, sample_compat_section) self.assert_extract(page, specs=expected_specs, compat=expected_compat) def test_full_page_reversed_sections(self): # https://bugzilla.mozilla.org/show_bug.cgi?id=1175177 # https://developer.mozilla.org/en-US/docs/Web/API/Blob/slice sample_spec_section, expected_specs = self.get_sample_specs() sample_compat_section, expected_compat = self.get_sample_compat() page = """\ <p>Some lead content</p> <h2>Other Text</h2> <p>Here's some other content</p> <h3>More Detail</h3> <p>Trigger <h3> check not in a compat section.</p> %s %s <h2>Other Pages</h2> <p>See <a href="/foo">foo</a></p> """ % (sample_compat_section, sample_spec_section) self.assert_extract(page, specs=expected_specs, compat=expected_compat) def test_compat_h3(self): # https://developer.mozilla.org/en-US/docs/Web/API/MozContact/key sample_compat_section, expected_compat = self.get_sample_compat() page = (sample_compat_section + '<h3 id="Gecko Gecko">Gecko Note</h3>\n<p>A note</p>') issues = [('skipped_h3', 354, 390, {'h3': 'Gecko Note'})] self.assert_extract(page, compat=expected_compat, issues=issues) def test_div_wrapped(self): # https://developer.mozilla.org/en-US/docs/Web/API/Document/execCommand sample_spec_section, expected_specs = self.get_sample_specs() sample_compat_section, expected_compat = self.get_sample_compat() page = """\ <p>Some lead content</p> <div> %s %s </div> """ % (sample_spec_section, sample_compat_section) issue = ('no_data', 0, 24, {}) self.assert_extract(page, issues=[issue])