def run(self, args, opts): # load contracts # 获取系统基础的contracts类路径 contracts = build_component_list( self.settings.getwithbase('SPIDER_CONTRACTS')) # 使用ContractManager进行contract加载 conman = ContractsManager(load_object(c) for c in contracts) # 实例化一个TextTestRunner对象 runner = TextTestRunner(verbosity=2 if opts.verbose else 1) # 返回处理后的TextTestResult对象 result = TextTestResult(runner.stream, runner.descriptions, runner.verbosity) # contract requests # contract 请求 contract_reqs = defaultdict(list) # 实例化爬虫加载器 spider_loader = self.crawler_process.spider_loader for spidername in args or spider_loader.list(): spidercls = spider_loader.load(spidername) spidercls.start_requests = lambda s: conman.from_spider(s, result) # 获取测试方法 tested_methods = conman.tested_methods_from_spidercls(spidercls) if opts.list: for method in tested_methods: contract_reqs[spidercls.name].append(method) elif tested_methods: #如果有测试方法,则进行crawl方法 self.crawler_process.crawl(spidercls) # start checks # 开始检验 if opts.list: for spider, methods in sorted(contract_reqs.items()): if not methods and not opts.verbose: continue print(spider) for method in sorted(methods): print(' * %s' % method) else: start = time.time() self.crawler_process.start() stop = time.time() result.printErrors() result.printSummary(start, stop) self.exitcode = int(not result.wasSuccessful())
def run(self, args, opts): # load contracts contracts = build_component_list( self.settings.getwithbase('SPIDER_CONTRACTS')) conman = ContractsManager(load_object(c) for c in contracts) runner = TextTestRunner(verbosity=2 if opts.verbose else 1) result = TextTestResult(runner.stream, runner.descriptions, runner.verbosity) # contract requests contract_reqs = defaultdict(list) spider_loader = self.crawler_process.spider_loader with set_environ(SCRAPY_CHECK='true'): for spidername in args or spider_loader.list(): spidercls = spider_loader.load(spidername) spidercls.start_requests = lambda s: conman.from_spider( s, result) tested_methods = conman.tested_methods_from_spidercls( spidercls) if opts.list: for method in tested_methods: contract_reqs[spidercls.name].append(method) elif tested_methods: self.crawler_process.crawl(spidercls) # start checks if opts.list: for spider, methods in sorted(contract_reqs.items()): if not methods and not opts.verbose: continue print(spider) for method in sorted(methods): print(' * %s' % method) else: start = time.time() self.crawler_process.start() stop = time.time() result.printErrors() result.printSummary(start, stop) self.exitcode = int(not result.wasSuccessful())
def run(self, args, opts): # load contracts contracts = build_component_list( self.settings['SPIDER_CONTRACTS_BASE'], self.settings['SPIDER_CONTRACTS'], ) conman = ContractsManager([load_object(c) for c in contracts]) runner = TextTestRunner(verbosity=2 if opts.verbose else 1) result = TextTestResult(runner.stream, runner.descriptions, runner.verbosity) # contract requests contract_reqs = defaultdict(list) spiders = self.crawler_process.spiders for spidername in args or spiders.list(): spidercls = spiders.load(spidername) spidercls.start_requests = lambda s: conman.from_spider(s, result) tested_methods = conman.tested_methods_from_spidercls(spidercls) if opts.list: for method in tested_methods: contract_reqs[spidercls.name].append(method) elif tested_methods: self.crawler_process.crawl(spidercls) # start checks if opts.list: for spider, methods in sorted(contract_reqs.items()): if not methods and not opts.verbose: continue print(spider) for method in sorted(methods): print(' * %s' % method) else: start = time.time() self.crawler_process.start() stop = time.time() result.printErrors() result.printSummary(start, stop) self.exitcode = int(not result.wasSuccessful())
class ContractsManagerTest(unittest.TestCase): contracts = [UrlContract, ReturnsContract, ScrapesContract] def setUp(self): self.conman = ContractsManager(self.contracts) self.results = TextTestResult(stream=None, descriptions=False, verbosity=0) def should_succeed(self): self.assertFalse(self.results.failures) self.assertFalse(self.results.errors) def should_fail(self): self.assertTrue(self.results.failures) self.assertFalse(self.results.errors) def test_contracts(self): spider = TestSpider() # extract contracts correctly contracts = self.conman.extract_contracts(spider.returns_request) self.assertEqual(len(contracts), 2) self.assertEqual(frozenset(type(x) for x in contracts), frozenset([UrlContract, ReturnsContract])) # returns request for valid method request = self.conman.from_method(spider.returns_request, self.results) self.assertNotEqual(request, None) # no request for missing url request = self.conman.from_method(spider.parse_no_url, self.results) self.assertEqual(request, None) def test_returns(self): spider = TestSpider() response = ResponseMock() # returns_item request = self.conman.from_method(spider.returns_item, self.results) request.callback(response) self.should_succeed() # returns_dict_item request = self.conman.from_method(spider.returns_dict_item, self.results) request.callback(response) self.should_succeed() # returns_request request = self.conman.from_method(spider.returns_request, self.results) request.callback(response) self.should_succeed() # returns_fail request = self.conman.from_method(spider.returns_fail, self.results) request.callback(response) self.should_fail() # returns_dict_fail request = self.conman.from_method(spider.returns_dict_fail, self.results) request.callback(response) self.should_fail() def test_scrapes(self): spider = TestSpider() response = ResponseMock() # scrapes_item_ok request = self.conman.from_method(spider.scrapes_item_ok, self.results) request.callback(response) self.should_succeed() # scrapes_dict_item_ok request = self.conman.from_method(spider.scrapes_dict_item_ok, self.results) request.callback(response) self.should_succeed() # scrapes_item_fail request = self.conman.from_method(spider.scrapes_item_fail, self.results) request.callback(response) self.should_fail() # scrapes_dict_item_fail request = self.conman.from_method(spider.scrapes_dict_item_fail, self.results) request.callback(response) self.should_fail() def test_errback(self): spider = TestSpider() response = ResponseMock() try: raise HttpError(response, 'Ignoring non-200 response') except HttpError: failure_mock = failure.Failure() request = self.conman.from_method(spider.returns_request, self.results) request.errback(failure_mock) self.assertFalse(self.results.failures) self.assertTrue(self.results.errors) def test_inherited_contracts(self): spider = InheritsTestSpider() requests = self.conman.from_spider(spider, self.results) self.assertTrue(requests)
class ContractsManagerTest(unittest.TestCase): contracts = [ UrlContract, CallbackKeywordArgumentsContract, ReturnsContract, ScrapesContract, CustomFormContract, CustomSuccessContract, CustomFailContract, ] def setUp(self): self.conman = ContractsManager(self.contracts) self.results = TextTestResult(stream=None, descriptions=False, verbosity=0) def should_succeed(self): self.assertFalse(self.results.failures) self.assertFalse(self.results.errors) def should_fail(self): self.assertTrue(self.results.failures) self.assertFalse(self.results.errors) def should_error(self): self.assertTrue(self.results.errors) def test_contracts(self): spider = TestSpider() # extract contracts correctly contracts = self.conman.extract_contracts(spider.returns_request) self.assertEqual(len(contracts), 2) self.assertEqual( frozenset(type(x) for x in contracts), frozenset([UrlContract, ReturnsContract])) # returns request for valid method request = self.conman.from_method(spider.returns_request, self.results) self.assertNotEqual(request, None) # no request for missing url request = self.conman.from_method(spider.parse_no_url, self.results) self.assertEqual(request, None) def test_cb_kwargs(self): spider = TestSpider() response = ResponseMock() # extract contracts correctly contracts = self.conman.extract_contracts(spider.returns_request_cb_kwargs) self.assertEqual(len(contracts), 3) self.assertEqual(frozenset(type(x) for x in contracts), frozenset([UrlContract, CallbackKeywordArgumentsContract, ReturnsContract])) contracts = self.conman.extract_contracts(spider.returns_item_cb_kwargs) self.assertEqual(len(contracts), 3) self.assertEqual(frozenset(type(x) for x in contracts), frozenset([UrlContract, CallbackKeywordArgumentsContract, ReturnsContract])) contracts = self.conman.extract_contracts(spider.returns_item_cb_kwargs_error_unexpected_keyword) self.assertEqual(len(contracts), 3) self.assertEqual(frozenset(type(x) for x in contracts), frozenset([UrlContract, CallbackKeywordArgumentsContract, ReturnsContract])) contracts = self.conman.extract_contracts(spider.returns_item_cb_kwargs_error_missing_argument) self.assertEqual(len(contracts), 2) self.assertEqual(frozenset(type(x) for x in contracts), frozenset([UrlContract, ReturnsContract])) # returns_request request = self.conman.from_method(spider.returns_request_cb_kwargs, self.results) request.callback(response, **request.cb_kwargs) self.should_succeed() # returns_item request = self.conman.from_method(spider.returns_item_cb_kwargs, self.results) request.callback(response, **request.cb_kwargs) self.should_succeed() # returns_item (error, callback doesn't take keyword arguments) request = self.conman.from_method(spider.returns_item_cb_kwargs_error_unexpected_keyword, self.results) request.callback(response, **request.cb_kwargs) self.should_error() # returns_item (error, contract doesn't provide keyword arguments) request = self.conman.from_method(spider.returns_item_cb_kwargs_error_missing_argument, self.results) request.callback(response, **request.cb_kwargs) self.should_error() def test_returns(self): spider = TestSpider() response = ResponseMock() # returns_item request = self.conman.from_method(spider.returns_item, self.results) request.callback(response) self.should_succeed() # returns_dict_item request = self.conman.from_method(spider.returns_dict_item, self.results) request.callback(response) self.should_succeed() # returns_request request = self.conman.from_method(spider.returns_request, self.results) request.callback(response) self.should_succeed() # returns_fail request = self.conman.from_method(spider.returns_fail, self.results) request.callback(response) self.should_fail() # returns_dict_fail request = self.conman.from_method(spider.returns_dict_fail, self.results) request.callback(response) self.should_fail() def test_scrapes(self): spider = TestSpider() response = ResponseMock() # scrapes_item_ok request = self.conman.from_method(spider.scrapes_item_ok, self.results) request.callback(response) self.should_succeed() # scrapes_dict_item_ok request = self.conman.from_method(spider.scrapes_dict_item_ok, self.results) request.callback(response) self.should_succeed() # scrapes_item_fail request = self.conman.from_method(spider.scrapes_item_fail, self.results) request.callback(response) self.should_fail() # scrapes_dict_item_fail request = self.conman.from_method(spider.scrapes_dict_item_fail, self.results) request.callback(response) self.should_fail() # scrapes_multiple_missing_fields request = self.conman.from_method(spider.scrapes_multiple_missing_fields, self.results) request.callback(response) self.should_fail() message = 'ContractFail: Missing fields: name, url' assert message in self.results.failures[-1][-1] def test_custom_contracts(self): self.conman.from_spider(CustomContractSuccessSpider(), self.results) self.should_succeed() self.conman.from_spider(CustomContractFailSpider(), self.results) self.should_error() def test_errback(self): spider = TestSpider() response = ResponseMock() try: raise HttpError(response, 'Ignoring non-200 response') except HttpError: failure_mock = failure.Failure() request = self.conman.from_method(spider.returns_request, self.results) request.errback(failure_mock) self.assertFalse(self.results.failures) self.assertTrue(self.results.errors) @defer.inlineCallbacks def test_same_url(self): class TestSameUrlSpider(Spider): name = 'test_same_url' def __init__(self, *args, **kwargs): super(TestSameUrlSpider, self).__init__(*args, **kwargs) self.visited = 0 def start_requests(s): return self.conman.from_spider(s, self.results) def parse_first(self, response): self.visited += 1 return TestItem() def parse_second(self, response): self.visited += 1 return TestItem() with MockServer() as mockserver: contract_doc = '@url {}'.format(mockserver.url('/status?n=200')) TestSameUrlSpider.parse_first.__doc__ = contract_doc TestSameUrlSpider.parse_second.__doc__ = contract_doc crawler = CrawlerRunner().create_crawler(TestSameUrlSpider) yield crawler.crawl() self.assertEqual(crawler.spider.visited, 2) def test_form_contract(self): spider = TestSpider() request = self.conman.from_method(spider.custom_form, self.results) self.assertEqual(request.method, 'POST') self.assertIsInstance(request, FormRequest) def test_inherited_contracts(self): spider = InheritsTestSpider() requests = self.conman.from_spider(spider, self.results) self.assertTrue(requests)
class ContractsManagerTest(unittest.TestCase): contracts = [ UrlContract, ReturnsContract, ScrapesContract, CustomFormContract, CustomSuccessContract, CustomFailContract, ] def setUp(self): self.conman = ContractsManager(self.contracts) self.results = TextTestResult(stream=None, descriptions=False, verbosity=0) def should_succeed(self): self.assertFalse(self.results.failures) self.assertFalse(self.results.errors) def should_fail(self): self.assertTrue(self.results.failures) self.assertFalse(self.results.errors) def should_error(self): self.assertTrue(self.results.errors) def test_contracts(self): spider = TestSpider() # extract contracts correctly contracts = self.conman.extract_contracts(spider.returns_request) self.assertEqual(len(contracts), 2) self.assertEqual(frozenset(type(x) for x in contracts), frozenset([UrlContract, ReturnsContract])) # returns request for valid method request = self.conman.from_method(spider.returns_request, self.results) self.assertNotEqual(request, None) # no request for missing url request = self.conman.from_method(spider.parse_no_url, self.results) self.assertEqual(request, None) def test_returns(self): spider = TestSpider() response = ResponseMock() # returns_item request = self.conman.from_method(spider.returns_item, self.results) request.callback(response) self.should_succeed() # returns_dict_item request = self.conman.from_method(spider.returns_dict_item, self.results) request.callback(response) self.should_succeed() # returns_request request = self.conman.from_method(spider.returns_request, self.results) request.callback(response) self.should_succeed() # returns_fail request = self.conman.from_method(spider.returns_fail, self.results) request.callback(response) self.should_fail() # returns_dict_fail request = self.conman.from_method(spider.returns_dict_fail, self.results) request.callback(response) self.should_fail() def test_scrapes(self): spider = TestSpider() response = ResponseMock() # scrapes_item_ok request = self.conman.from_method(spider.scrapes_item_ok, self.results) request.callback(response) self.should_succeed() # scrapes_dict_item_ok request = self.conman.from_method(spider.scrapes_dict_item_ok, self.results) request.callback(response) self.should_succeed() # scrapes_item_fail request = self.conman.from_method(spider.scrapes_item_fail, self.results) request.callback(response) self.should_fail() # scrapes_dict_item_fail request = self.conman.from_method(spider.scrapes_dict_item_fail, self.results) request.callback(response) self.should_fail() def test_custom_contracts(self): self.conman.from_spider(CustomContractSuccessSpider(), self.results) self.should_succeed() self.conman.from_spider(CustomContractFailSpider(), self.results) self.should_error() def test_errback(self): spider = TestSpider() response = ResponseMock() try: raise HttpError(response, 'Ignoring non-200 response') except HttpError: failure_mock = failure.Failure() request = self.conman.from_method(spider.returns_request, self.results) request.errback(failure_mock) self.assertFalse(self.results.failures) self.assertTrue(self.results.errors) @defer.inlineCallbacks def test_same_url(self): class TestSameUrlSpider(Spider): name = 'test_same_url' def __init__(self, *args, **kwargs): super(TestSameUrlSpider, self).__init__(*args, **kwargs) self.visited = 0 def start_requests(s): return self.conman.from_spider(s, self.results) def parse_first(self, response): self.visited += 1 return TestItem() def parse_second(self, response): self.visited += 1 return TestItem() with MockServer() as mockserver: contract_doc = '@url {}'.format(mockserver.url('/status?n=200')) get_unbound_function(TestSameUrlSpider.parse_first).__doc__ = contract_doc get_unbound_function(TestSameUrlSpider.parse_second).__doc__ = contract_doc crawler = CrawlerRunner().create_crawler(TestSameUrlSpider) yield crawler.crawl() self.assertEqual(crawler.spider.visited, 2) def test_form_contract(self): spider = TestSpider() request = self.conman.from_method(spider.custom_form, self.results) self.assertEqual(request.method, 'POST') self.assertIsInstance(request, FormRequest) def test_inherited_contracts(self): spider = InheritsTestSpider() requests = self.conman.from_spider(spider, self.results) self.assertTrue(requests)