示例#1
0
    def testCrawlJobsScheduledChecked(self):
        """
        tests out the mark_job_scheduled and mark_job_checked logic
        """
        urls = [u"http://feeds.feedburner.com/43folders",
                u"http://advocacy.python.org/podcasts/littlebit.rss",
                u"http://friendfeed.com/alawrence?format=atom",
                u"http://feeds.feedburner.com/antiwar"]

        # indicate url is scheduled to be crawled
        with transaction.manager:
            for url in urls:
                rec = scheduler.mark_job_scheduled(url)
                self.assert_(rec, "no rec for url %s" % url)

        recs = [r for r in \
                meta.Session().query(scheduler.CrawlJobModel).all()]
        self.assert_(len(recs) == len(urls), (len(recs), len(urls)))

        # pretend we crawled the url and update the record
        with transaction.manager:
            etag = str(uuid.uuid4())
            last_modified = datetime.datetime.now()
            rec = scheduler.mark_job_checked(url,
                                             etag=etag,
                                             last_modified=last_modified)
            self.assert_(rec, "no rec for url %s" % url)
            self.assert_(etag == rec.etag)
            self.assert_(last_modified == rec.last_modified)
示例#2
0
    def make_data(self):
        """
        setup data
        """
        urls = [u"http://feeds.feedburner.com/43folders",
                u"http://advocacy.python.org/podcasts/littlebit.rss",
                u"http://friendfeed.com/alawrence?format=atom",
                u"http://feeds.feedburner.com/antiwar"]
        recs = []
        # indicate urls are scheduled
        with transaction.manager:
            for url in urls:
                recs.append(scheduler.mark_job_scheduled(url))

        return recs
示例#3
0
    def testScheduleExisting(self):
        """
        coverage
        schedule a url that already exists in the db as if it's
        already been scheduled/checked once
        """
        recs = self.make_data()
        self.scheduled_backdate_recs(recs, 60)
        self.checked_backdate_recs(recs, 50)

        with transaction.manager:
            rec = meta.Session().merge(recs[0])
            url = rec.url
            newrec = scheduler.mark_job_scheduled(url)
            meta.Session().add(newrec)