def testCrawlJobsScheduledChecked(self): """ tests out the mark_job_scheduled and mark_job_checked logic """ urls = [u"http://feeds.feedburner.com/43folders", u"http://advocacy.python.org/podcasts/littlebit.rss", u"http://friendfeed.com/alawrence?format=atom", u"http://feeds.feedburner.com/antiwar"] # indicate url is scheduled to be crawled with transaction.manager: for url in urls: rec = scheduler.mark_job_scheduled(url) self.assert_(rec, "no rec for url %s" % url) recs = [r for r in \ meta.Session().query(scheduler.CrawlJobModel).all()] self.assert_(len(recs) == len(urls), (len(recs), len(urls))) # pretend we crawled the url and update the record with transaction.manager: etag = str(uuid.uuid4()) last_modified = datetime.datetime.now() rec = scheduler.mark_job_checked(url, etag=etag, last_modified=last_modified) self.assert_(rec, "no rec for url %s" % url) self.assert_(etag == rec.etag) self.assert_(last_modified == rec.last_modified)
def make_data(self): """ setup data """ urls = [u"http://feeds.feedburner.com/43folders", u"http://advocacy.python.org/podcasts/littlebit.rss", u"http://friendfeed.com/alawrence?format=atom", u"http://feeds.feedburner.com/antiwar"] recs = [] # indicate urls are scheduled with transaction.manager: for url in urls: recs.append(scheduler.mark_job_scheduled(url)) return recs
def testScheduleExisting(self): """ coverage schedule a url that already exists in the db as if it's already been scheduled/checked once """ recs = self.make_data() self.scheduled_backdate_recs(recs, 60) self.checked_backdate_recs(recs, 50) with transaction.manager: rec = meta.Session().merge(recs[0]) url = rec.url newrec = scheduler.mark_job_scheduled(url) meta.Session().add(newrec)