def test_site_pages(self): """ Tests from real pages. More reliable and easy to build for more complicated structures """ SAMPLES_FILE_PREFIX = os.path.join(path, "samples/samples_pageparsing") count = 0 fname = "%s_%d.json" % (SAMPLES_FILE_PREFIX, count) while os.path.exists(fname): source = open("%s_%d.html" % (SAMPLES_FILE_PREFIX, count), "rb").read() source = source.decode('utf-8') annotations = json.loads(open(fname, "rb").read().decode('utf-8')) template = HtmlPage(body=source) parser = TemplatePageParser(TokenDict()) parser.feed(template) for annotation in parser.annotations: test_annotation = annotations.pop(0) for s in annotation.__slots__: if s == "tag_attributes": for pair in getattr(annotation, s): self.assertEqual(list(pair), test_annotation[s].pop(0)) else: self.assertEqual(getattr(annotation, s), test_annotation[s]) self.assertEqual(annotations, []) count += 1 fname = "%s_%d.json" % (SAMPLES_FILE_PREFIX, count)
def test_site_pages(self): """ Tests from real pages. More reliable and easy to build for more complicated structures """ for source, annotations in iter_samples('pageparsing'): template = HtmlPage(body=source) parser = TemplatePageParser(TokenDict()) parser.feed(template) for annotation in parser.annotations: test_annotation = annotations.pop(0) for s in annotation.__slots__: if s == "tag_attributes": for pair in getattr(annotation, s): self.assertEqual(list(pair), test_annotation[s].pop(0)) else: self.assertEqual(getattr(annotation, s), test_annotation[s]) self.assertEqual(annotations, [])