def test_delay(self): spider = FollowAllSpider() yield docrawl(spider, {"DOWNLOAD_DELAY": 1}) t = spider.times[0] for t2 in spider.times[1:]: self.assertTrue(t2-t > 0.45, "download delay too small: %s" % (t2-t)) t = t2
def test_closespider_pagecount(self): spider = FollowAllSpider() close_on = 5 yield docrawl(spider, {'CLOSESPIDER_PAGECOUNT': close_on}) reason = spider.meta['close_reason'] self.assertEqual(reason, 'closespider_pagecount') pagecount = spider.crawler.stats.get_value('response_received_count') self.assertTrue(pagecount >= close_on)
def _test_delay(self, delay, randomize): settings = {"DOWNLOAD_DELAY": delay, 'RANDOMIZE_DOWNLOAD_DELAY': randomize} spider = FollowAllSpider(maxlatency=delay * 2) yield docrawl(spider, settings) t = spider.times totaltime = t[-1] - t[0] avgd = totaltime / (len(t) - 1) tolerance = 0.6 if randomize else 0.2 self.assertTrue(avgd > delay * (1 - tolerance), "download delay too small: %s" % avgd)
def test_closespider_timeout(self): spider = FollowAllSpider(total=1000000) close_on = 0.1 yield docrawl(spider, {'CLOSESPIDER_TIMEOUT': close_on}) reason = spider.meta['close_reason'] self.assertEqual(reason, 'closespider_timeout') stats = spider.crawler.stats start = stats.get_value('start_time') stop = stats.get_value('finish_time') diff = stop - start total_seconds = diff.seconds + diff.microseconds self.assertTrue(total_seconds >= close_on)
def test_follow_all(self): spider = FollowAllSpider() yield docrawl(spider) self.assertEqual(len(spider.urls_visited), 11) # 10 + start_url
def run(self, args, opts): with MockServer(): spider = FollowAllSpider(total=100000) self.crawler.crawl(spider) self.crawler.start()
def run(self, args, opts): with MockServer(): spider = FollowAllSpider(total=100000) crawler = self.crawler_process.create_crawler() crawler.crawl(spider) self.crawler_process.start()