def main(): html = open('./samples/21853124_0.shtml').read() doc = Document(html) doc.transform() doc.get_publish_date() doc.short_title() doc.text_content()
def test_pub_date(self): html = self.html doc = Document(html) doc.transform() self.assertEqual(datetime.datetime(2013, 2, 1, 11, 16), doc.get_publish_date()) self.assertEqual('PN039', doc.get_author())