Пример #1
0
    def test_from_url(self):
        dateminer = DateMiner()

        # standard bunched together date
        results = list(dateminer.from_url('http://businessweek.com/news/2010-10-04/germany-steps-up-bonus-curbs-at-commerzbank-hypo-real-estate.html').sorted())
        self.assertGreater(len(results), 0, results)
        self.assertEquals(results[0], datetime.date(year=2010, month=10, day=4))

        # test the behavior of CNN-like urls which split the year form the rest of the date
        results = list(dateminer.from_url('http://www.cnn.com/2010/US/05/20/gulf.oil.spill/index.html?hpt=T2').sorted())
        self.assertGreater(len(results), 0, results)
        self.assertEquals(results[0], datetime.date(year=2010, month=5, day=20))

        # date without separators
        results = list(dateminer.from_url('http://www.latimes.com/news/nationworld/nation/la-na-texas-death-20110916,0,3367730.story').sorted())
        self.assertGreater(len(results), 0, results)
        self.assertEquals(results[0], datetime.date(year=2011, month=9, day=16))
Пример #2
0
 def test_from_html(self):
     dateminer = DateMiner()
     results = list(dateminer.from_html(get_fixture_data('cnn.html')).sorted())
     self.assertGreater(len(results), 0, results)
     self.assertEquals(results[0], datetime.date(year=2010, month=5, day=20))