示例#1
0
文件: check.py 项目: xacprod/ve1
    def run(self, args, opts):
        # load contracts
        contracts = build_component_list(
            self.settings['SPIDER_CONTRACTS_BASE'],
            self.settings['SPIDER_CONTRACTS'],
        )
        self.conman = ContractsManager([load_object(c) for c in contracts])
        self.results = TextTestRunner(verbosity=opts.verbose)._makeResult()

        # contract requests
        contract_reqs = defaultdict(list)

        spman_cls = load_object(self.settings['SPIDER_MANAGER_CLASS'])
        spiders = spman_cls.from_settings(self.settings)

        for spider in args or spiders.list():
            spider = spiders.create(spider)
            requests = self.get_requests(spider)

            if opts.list:
                for req in requests:
                    contract_reqs[spider.name].append(req.callback.__name__)
            elif requests:
                crawler = self.crawler_process.create_crawler(spider.name)
                crawler.crawl(spider, requests)

        # start checks
        if opts.list:
            for spider, methods in sorted(contract_reqs.iteritems()):
                print spider
                for method in sorted(methods):
                    print '  * %s' % method
        else:
            self.crawler_process.start()
            self.results.printErrors()
示例#2
0
    def run(self, args, opts):
        # load contracts
        contracts = build_component_list(
            self.settings['SPIDER_CONTRACTS_BASE'],
            self.settings['SPIDER_CONTRACTS'],
        )
        self.conman = ContractsManager([load_object(c) for c in contracts])

        # contract requests
        contract_reqs = defaultdict(list)
        self.crawler.engine.has_capacity = lambda: True

        for spider in args or self.crawler.spiders.list():
            spider = self.crawler.spiders.create(spider)
            requests = self.get_requests(spider)

            if opts.list:
                for req in requests:
                    contract_reqs[spider.name].append(req.callback.__name__)
            else:
                self.crawler.crawl(spider, requests)

        # start checks
        if opts.list:
            for spider, methods in sorted(contract_reqs.iteritems()):
                print spider
                for method in sorted(methods):
                    print '  * %s' % method
        else:
            self.crawler.start()
示例#3
0
    def test_scrapes(self):
        conman = ContractsManager(self.contracts)

        spider = TestSpider()
        response = ResponseMock()

        # scrapes_item_ok
        request = conman.from_method(spider.scrapes_item_ok, fail=True)
        output = request.callback(response)
        self.assertEqual(map(type, output), [TestItem])

        # scrapes_item_fail
        request = conman.from_method(spider.scrapes_item_fail, fail=True)
        self.assertRaises(ContractFail, request.callback, response)
示例#4
0
    def test_scrapes(self):
        conman = ContractsManager(self.contracts)

        spider = TestSpider()
        response = ResponseMock()

        # scrapes_item_ok
        request = conman.from_method(spider.scrapes_item_ok, fail=True)
        output = request.callback(response)
        self.assertEqual(map(type, output), [TestItem])

        # scrapes_item_fail
        request = conman.from_method(spider.scrapes_item_fail, fail=True)
        self.assertRaises(ContractFail, request.callback, response)
示例#5
0
文件: check.py 项目: amferraz/scrapy
    def run(self, args, opts):
        # load contracts
        contracts = build_component_list(self.settings["SPIDER_CONTRACTS_BASE"], self.settings["SPIDER_CONTRACTS"])
        self.conman = ContractsManager([load_object(c) for c in contracts])
        self.results = TextTestRunner(verbosity=opts.verbose)._makeResult()

        # contract requests
        contract_reqs = defaultdict(list)

        spman_cls = load_object(self.settings["SPIDER_MANAGER_CLASS"])
        spiders = spman_cls.from_settings(self.settings)

        for spider in args or spiders.list():
            spider = spiders.create(spider)
            requests = self.get_requests(spider)

            if opts.list:
                for req in requests:
                    contract_reqs[spider.name].append(req.callback.__name__)
            elif requests:
                crawler = self.crawler_process.create_crawler(spider.name)
                crawler.crawl(spider, requests)

        # start checks
        if opts.list:
            for spider, methods in sorted(contract_reqs.iteritems()):
                print spider
                for method in sorted(methods):
                    print "  * %s" % method
        else:
            self.crawler_process.start()
            self.results.printErrors()
示例#6
0
    def run(self, args, opts):
        # load contracts
        contracts = build_component_list(
            self.settings['SPIDER_CONTRACTS_BASE'],
            self.settings['SPIDER_CONTRACTS'],
        )
        self.conman = ContractsManager([load_object(c) for c in contracts])

        # contract requests
        contract_reqs = defaultdict(list)
        self.crawler.engine.has_capacity = lambda: True

        for spider in args or self.crawler.spiders.list():
            spider = self.crawler.spiders.create(spider)
            requests = self.get_requests(spider)

            if opts.list:
                for req in requests:
                    contract_reqs[spider.name].append(req.callback.__name__)
            else:
                self.crawler.crawl(spider, requests)

        # start checks
        if opts.list:
            for spider, methods in sorted(contract_reqs.iteritems()):
                print spider
                for method in sorted(methods):
                    print '  * %s' % method
        else:
            self.crawler.start()
示例#7
0
    def test_contracts(self):
        conman = ContractsManager(self.contracts)

        # extract contracts correctly
        contracts = conman.extract_contracts(TestSpider.returns_request)
        self.assertEqual(len(contracts), 2)
        self.assertEqual(frozenset(map(type, contracts)),
            frozenset([UrlContract, ReturnsContract]))

        # returns request for valid method
        request = conman.from_method(TestSpider.returns_request)
        self.assertNotEqual(request, None)

        # no request for missing url
        request = conman.from_method(TestSpider.parse_no_url)
        self.assertEqual(request, None)
示例#8
0
    def test_contracts(self):
        conman = ContractsManager(self.contracts)

        # extract contracts correctly
        contracts = conman.extract_contracts(TestSpider.returns_request)
        self.assertEqual(len(contracts), 2)
        self.assertEqual(frozenset(map(type, contracts)),
                         frozenset([UrlContract, ReturnsContract]))

        # returns request for valid method
        request = conman.from_method(TestSpider.returns_request)
        self.assertNotEqual(request, None)

        # no request for missing url
        request = conman.from_method(TestSpider.parse_no_url)
        self.assertEqual(request, None)
示例#9
0
    def run(self, args, opts):
        # load contracts
        # 获取系统基础的contracts类路径
        contracts = build_component_list(
            self.settings.getwithbase('SPIDER_CONTRACTS'))
        # 使用ContractManager进行contract加载
        conman = ContractsManager(load_object(c) for c in contracts)
        # 实例化一个TextTestRunner对象
        runner = TextTestRunner(verbosity=2 if opts.verbose else 1)
        # 返回处理后的TextTestResult对象
        result = TextTestResult(runner.stream, runner.descriptions,
                                runner.verbosity)

        # contract requests
        # contract 请求
        contract_reqs = defaultdict(list)
        # 实例化爬虫加载器
        spider_loader = self.crawler_process.spider_loader

        for spidername in args or spider_loader.list():
            spidercls = spider_loader.load(spidername)
            spidercls.start_requests = lambda s: conman.from_spider(s, result)
            # 获取测试方法
            tested_methods = conman.tested_methods_from_spidercls(spidercls)
            if opts.list:
                for method in tested_methods:
                    contract_reqs[spidercls.name].append(method)
            elif tested_methods:  #如果有测试方法,则进行crawl方法
                self.crawler_process.crawl(spidercls)

        # start checks
        # 开始检验
        if opts.list:
            for spider, methods in sorted(contract_reqs.items()):
                if not methods and not opts.verbose:
                    continue
                print(spider)
                for method in sorted(methods):
                    print('  * %s' % method)
        else:
            start = time.time()
            self.crawler_process.start()
            stop = time.time()

            result.printErrors()
            result.printSummary(start, stop)
            self.exitcode = int(not result.wasSuccessful())
示例#10
0
class Command(ScrapyCommand):
    requires_project = True
    default_settings = {'LOG_ENABLED': False}

    def syntax(self):
        return "[options] <spider>"

    def short_desc(self):
        return "Check spider contracts"

    def add_options(self, parser):
        ScrapyCommand.add_options(self, parser)
        parser.add_option("-l", "--list", dest="list", action="store_true",
            help="only list contracts, without checking them")

    def run(self, args, opts):
        # load contracts
        contracts = build_component_list(
            self.settings['SPIDER_CONTRACTS_BASE'],
            self.settings['SPIDER_CONTRACTS'],
        )
        self.conman = ContractsManager([load_object(c) for c in contracts])

        # contract requests
        contract_reqs = defaultdict(list)
        self.crawler.engine.has_capacity = lambda: True

        for spider in args or self.crawler.spiders.list():
            spider = self.crawler.spiders.create(spider)
            requests = self.get_requests(spider)

            if opts.list:
                for req in requests:
                    contract_reqs[spider.name].append(req.callback.__name__)
            else:
                self.crawler.crawl(spider, requests)

        # start checks
        if opts.list:
            for spider, methods in sorted(contract_reqs.iteritems()):
                print spider
                for method in sorted(methods):
                    print '  * %s' % method
        else:
            self.crawler.start()

    def get_requests(self, spider):
        requests = []

        for key, value in vars(type(spider)).items():
            if callable(value) and value.__doc__:
                bound_method = value.__get__(spider, type(spider))
                request = self.conman.from_method(bound_method)

                if request:
                    request.callback = _generate(request.callback)
                    requests.append(request)

        return requests
示例#11
0
    def run(self, args, opts):
        # load contracts
        contracts = build_component_list(
            self.settings.getwithbase('SPIDER_CONTRACTS'))
        conman = ContractsManager(load_object(c) for c in contracts)
        runner = TextTestRunner(verbosity=2 if opts.verbose else 1)
        result = TextTestResult(runner.stream, runner.descriptions,
                                runner.verbosity)

        # contract requests
        contract_reqs = defaultdict(list)

        spider_loader = self.crawler_process.spider_loader

        with set_environ(SCRAPY_CHECK='true'):
            for spidername in args or spider_loader.list():
                spidercls = spider_loader.load(spidername)
                spidercls.start_requests = lambda s: conman.from_spider(
                    s, result)

                tested_methods = conman.tested_methods_from_spidercls(
                    spidercls)
                if opts.list:
                    for method in tested_methods:
                        contract_reqs[spidercls.name].append(method)
                elif tested_methods:
                    self.crawler_process.crawl(spidercls)

        # start checks
        if opts.list:
            for spider, methods in sorted(contract_reqs.items()):
                if not methods and not opts.verbose:
                    continue
                print(spider)
                for method in sorted(methods):
                    print('  * %s' % method)
        else:
            start = time.time()
            self.crawler_process.start()
            stop = time.time()

            result.printErrors()
            result.printSummary(start, stop)
            self.exitcode = int(not result.wasSuccessful())
示例#12
0
    def test_returns(self):
        conman = ContractsManager(self.contracts)

        spider = TestSpider()
        response = ResponseMock()

        # returns_item
        request = conman.from_method(spider.returns_item, fail=True)
        output = request.callback(response)
        self.assertEqual(map(type, output), [TestItem])

        # returns_request
        request = conman.from_method(spider.returns_request, fail=True)
        output = request.callback(response)
        self.assertEqual(map(type, output), [Request])

        # returns_fail
        request = conman.from_method(spider.returns_fail, fail=True)
        self.assertRaises(ContractFail, request.callback, response)
示例#13
0
    def test_returns(self):
        conman = ContractsManager(self.contracts)

        spider = TestSpider()
        response = ResponseMock()

        # returns_item
        request = conman.from_method(spider.returns_item, fail=True)
        output = request.callback(response)
        self.assertEqual(map(type, output), [TestItem])

        # returns_request
        request = conman.from_method(spider.returns_request, fail=True)
        output = request.callback(response)
        self.assertEqual(map(type, output), [Request])

        # returns_fail
        request = conman.from_method(spider.returns_fail, fail=True)
        self.assertRaises(ContractFail, request.callback, response)
示例#14
0
文件: check.py 项目: jtwaleson/scrapy
    def run(self, args, opts):
        # load contracts
        contracts = build_component_list(
            self.settings['SPIDER_CONTRACTS_BASE'],
            self.settings['SPIDER_CONTRACTS'],
        )
        conman = ContractsManager([load_object(c) for c in contracts])
        runner = TextTestRunner(verbosity=2 if opts.verbose else 1)
        result = TextTestResult(runner.stream, runner.descriptions, runner.verbosity)

        # contract requests
        contract_reqs = defaultdict(list)

        spiders = self.crawler_process.spiders

        for spidername in args or spiders.list():
            spidercls = spiders.load(spidername)
            spidercls.start_requests = lambda s: conman.from_spider(s, result)

            tested_methods = conman.tested_methods_from_spidercls(spidercls)
            if opts.list:
                for method in tested_methods:
                    contract_reqs[spidercls.name].append(method)
            elif tested_methods:
                self.crawler_process.crawl(spidercls)

        # start checks
        if opts.list:
            for spider, methods in sorted(contract_reqs.items()):
                if not methods and not opts.verbose:
                    continue
                print(spider)
                for method in sorted(methods):
                    print('  * %s' % method)
        else:
            start = time.time()
            self.crawler_process.start()
            stop = time.time()

            result.printErrors()
            result.printSummary(start, stop)
            self.exitcode = int(not result.wasSuccessful())
示例#15
0
    def run(self, args, opts):
        # load contracts
        contracts = build_component_list(
            self.settings['SPIDER_CONTRACTS_BASE'],
            self.settings['SPIDER_CONTRACTS'],
        )
        conman = ContractsManager([load_object(c) for c in contracts])
        runner = TextTestRunner(verbosity=2 if opts.verbose else 1)
        result = TextTestResult(runner.stream, runner.descriptions,
                                runner.verbosity)

        # contract requests
        contract_reqs = defaultdict(list)

        spman_cls = load_object(self.settings['SPIDER_MANAGER_CLASS'])
        spiders = spman_cls.from_settings(self.settings)

        for spider in args or spiders.list():
            spider = spiders.create(spider)
            requests = self.get_requests(spider, conman, result)
            contract_reqs[spider.name] = []

            if opts.list:
                for req in requests:
                    contract_reqs[spider.name].append(req.callback.__name__)
            elif requests:
                crawler = self.crawler_process.create_crawler(spider.name)
                crawler.crawl(spider, requests)

        # start checks
        if opts.list:
            for spider, methods in sorted(contract_reqs.items()):
                if not methods and not opts.verbose:
                    continue
                print(spider)
                for method in sorted(methods):
                    print('  * %s' % method)
        else:
            start = time.time()
            self.crawler_process.start()
            stop = time.time()

            result.printErrors()
            result.printSummary(start, stop)
            self.exitcode = int(not result.wasSuccessful())
示例#16
0
class ContractsManagerTest(unittest.TestCase):
    contracts = [
        UrlContract,
        CallbackKeywordArgumentsContract,
        ReturnsContract,
        ScrapesContract,
        CustomFormContract,
        CustomSuccessContract,
        CustomFailContract,
    ]

    def setUp(self):
        self.conman = ContractsManager(self.contracts)
        self.results = TextTestResult(stream=None, descriptions=False, verbosity=0)

    def should_succeed(self):
        self.assertFalse(self.results.failures)
        self.assertFalse(self.results.errors)

    def should_fail(self):
        self.assertTrue(self.results.failures)
        self.assertFalse(self.results.errors)

    def should_error(self):
        self.assertTrue(self.results.errors)

    def test_contracts(self):
        spider = TestSpider()

        # extract contracts correctly
        contracts = self.conman.extract_contracts(spider.returns_request)
        self.assertEqual(len(contracts), 2)
        self.assertEqual(
            frozenset(type(x) for x in contracts),
            frozenset([UrlContract, ReturnsContract]))

        # returns request for valid method
        request = self.conman.from_method(spider.returns_request, self.results)
        self.assertNotEqual(request, None)

        # no request for missing url
        request = self.conman.from_method(spider.parse_no_url, self.results)
        self.assertEqual(request, None)

    def test_cb_kwargs(self):
        spider = TestSpider()
        response = ResponseMock()

        # extract contracts correctly
        contracts = self.conman.extract_contracts(spider.returns_request_cb_kwargs)
        self.assertEqual(len(contracts), 3)
        self.assertEqual(frozenset(type(x) for x in contracts),
                         frozenset([UrlContract, CallbackKeywordArgumentsContract, ReturnsContract]))

        contracts = self.conman.extract_contracts(spider.returns_item_cb_kwargs)
        self.assertEqual(len(contracts), 3)
        self.assertEqual(frozenset(type(x) for x in contracts),
                         frozenset([UrlContract, CallbackKeywordArgumentsContract, ReturnsContract]))

        contracts = self.conman.extract_contracts(spider.returns_item_cb_kwargs_error_unexpected_keyword)
        self.assertEqual(len(contracts), 3)
        self.assertEqual(frozenset(type(x) for x in contracts),
                         frozenset([UrlContract, CallbackKeywordArgumentsContract, ReturnsContract]))

        contracts = self.conman.extract_contracts(spider.returns_item_cb_kwargs_error_missing_argument)
        self.assertEqual(len(contracts), 2)
        self.assertEqual(frozenset(type(x) for x in contracts),
                         frozenset([UrlContract, ReturnsContract]))

        # returns_request
        request = self.conman.from_method(spider.returns_request_cb_kwargs, self.results)
        request.callback(response, **request.cb_kwargs)
        self.should_succeed()

        # returns_item
        request = self.conman.from_method(spider.returns_item_cb_kwargs, self.results)
        request.callback(response, **request.cb_kwargs)
        self.should_succeed()

        # returns_item (error, callback doesn't take keyword arguments)
        request = self.conman.from_method(spider.returns_item_cb_kwargs_error_unexpected_keyword, self.results)
        request.callback(response, **request.cb_kwargs)
        self.should_error()

        # returns_item (error, contract doesn't provide keyword arguments)
        request = self.conman.from_method(spider.returns_item_cb_kwargs_error_missing_argument, self.results)
        request.callback(response, **request.cb_kwargs)
        self.should_error()

    def test_returns(self):
        spider = TestSpider()
        response = ResponseMock()

        # returns_item
        request = self.conman.from_method(spider.returns_item, self.results)
        request.callback(response)
        self.should_succeed()

        # returns_dict_item
        request = self.conman.from_method(spider.returns_dict_item, self.results)
        request.callback(response)
        self.should_succeed()

        # returns_request
        request = self.conman.from_method(spider.returns_request, self.results)
        request.callback(response)
        self.should_succeed()

        # returns_fail
        request = self.conman.from_method(spider.returns_fail, self.results)
        request.callback(response)
        self.should_fail()

        # returns_dict_fail
        request = self.conman.from_method(spider.returns_dict_fail, self.results)
        request.callback(response)
        self.should_fail()

    def test_scrapes(self):
        spider = TestSpider()
        response = ResponseMock()

        # scrapes_item_ok
        request = self.conman.from_method(spider.scrapes_item_ok, self.results)
        request.callback(response)
        self.should_succeed()

        # scrapes_dict_item_ok
        request = self.conman.from_method(spider.scrapes_dict_item_ok, self.results)
        request.callback(response)
        self.should_succeed()

        # scrapes_item_fail
        request = self.conman.from_method(spider.scrapes_item_fail, self.results)
        request.callback(response)
        self.should_fail()

        # scrapes_dict_item_fail
        request = self.conman.from_method(spider.scrapes_dict_item_fail, self.results)
        request.callback(response)
        self.should_fail()

        # scrapes_multiple_missing_fields
        request = self.conman.from_method(spider.scrapes_multiple_missing_fields, self.results)
        request.callback(response)
        self.should_fail()
        message = 'ContractFail: Missing fields: name, url'
        assert message in self.results.failures[-1][-1]

    def test_custom_contracts(self):
        self.conman.from_spider(CustomContractSuccessSpider(), self.results)
        self.should_succeed()

        self.conman.from_spider(CustomContractFailSpider(), self.results)
        self.should_error()

    def test_errback(self):
        spider = TestSpider()
        response = ResponseMock()

        try:
            raise HttpError(response, 'Ignoring non-200 response')
        except HttpError:
            failure_mock = failure.Failure()

        request = self.conman.from_method(spider.returns_request, self.results)
        request.errback(failure_mock)

        self.assertFalse(self.results.failures)
        self.assertTrue(self.results.errors)

    @defer.inlineCallbacks
    def test_same_url(self):

        class TestSameUrlSpider(Spider):
            name = 'test_same_url'

            def __init__(self, *args, **kwargs):
                super(TestSameUrlSpider, self).__init__(*args, **kwargs)
                self.visited = 0

            def start_requests(s):
                return self.conman.from_spider(s, self.results)

            def parse_first(self, response):
                self.visited += 1
                return TestItem()

            def parse_second(self, response):
                self.visited += 1
                return TestItem()

        with MockServer() as mockserver:
            contract_doc = '@url {}'.format(mockserver.url('/status?n=200'))

            TestSameUrlSpider.parse_first.__doc__ = contract_doc
            TestSameUrlSpider.parse_second.__doc__ = contract_doc

            crawler = CrawlerRunner().create_crawler(TestSameUrlSpider)
            yield crawler.crawl()

        self.assertEqual(crawler.spider.visited, 2)

    def test_form_contract(self):
        spider = TestSpider()
        request = self.conman.from_method(spider.custom_form, self.results)
        self.assertEqual(request.method, 'POST')
        self.assertIsInstance(request, FormRequest)

    def test_inherited_contracts(self):
        spider = InheritsTestSpider()

        requests = self.conman.from_spider(spider, self.results)
        self.assertTrue(requests)
示例#17
0
 def setUp(self):
     self.spider = TestBeibeiSpider()
     self.conman = ContractsManager(self.contracts)
     self.results = TextTestResult(stream=None,
                                   descriptions=False,
                                   verbosity=0)
示例#18
0
class CustomContractsTest(unittest.TestCase):
    contracts = [
        UrlContract, MetaContract, ReturnsContract, SpecificReturnsContract,
        ScrapesContract
    ]

    def setUp(self):
        self.spider = TestBeibeiSpider()
        self.conman = ContractsManager(self.contracts)
        self.results = TextTestResult(stream=None,
                                      descriptions=False,
                                      verbosity=0)

    def should_succeed(self):
        self.assertFalse(self.results.failures)
        self.assertFalse(self.results.errors)

    def should_fail(self):
        self.assertTrue(self.results.failures)
        self.assertFalse(self.results.errors)

    def test_contracts(self):
        # test if MetaContract and SpecificReturnsContract extracted properly
        contracts = self.conman.extract_contracts(self.spider.set_meta)
        self.assertEqual(len(contracts), 4)
        self.assertEqual(
            frozenset(type(x) for x in contracts),
            frozenset([UrlContract, MetaContract, SpecificReturnsContract]))

        # test if the meta is set properly
        request = self.conman.from_method(self.spider.set_meta, self.results)
        self.assertEqual(request.meta, {
            "metakey1": "metaval1",
            "metakey2": "metaval2"
        })

    def test_returns_details(self):
        response = ResponseMock()

        # returns_item
        request = self.conman.from_method(self.spider.returns_detail_ok,
                                          self.results)
        request.callback(response)
        self.should_succeed()

        request = self.conman.from_method(self.spider.returns_detail_fail,
                                          self.results)
        request.callback(response)
        self.should_fail()

    def test_returns_category_pages(self):
        response = ResponseMock()

        request = self.conman.from_method(
            self.spider.returns_detail_category_ok, self.results)
        request.callback(response)
        self.should_succeed()

        request = self.conman.from_method(
            self.spider.returns_detail_category_fail, self.results)
        request.callback(response)
        self.should_fail()
示例#19
0
 def setUp(self):
     self.conman = ContractsManager(self.contracts)
     self.results = TextTestRunner()._makeResult()
     self.results.stream = None
示例#20
0
class ContractsManagerTest(unittest.TestCase):
    contracts = [UrlContract, ReturnsContract, ScrapesContract]

    def setUp(self):
        self.conman = ContractsManager(self.contracts)
        self.results = TextTestRunner()._makeResult()
        self.results.stream = None

    def should_succeed(self):
        self.assertFalse(self.results.failures)
        self.assertFalse(self.results.errors)

    def should_fail(self):
        self.assertTrue(self.results.failures)
        self.assertFalse(self.results.errors)

    def test_contracts(self):
        spider = TestSpider()

        # extract contracts correctly
        contracts = self.conman.extract_contracts(spider.returns_request)
        self.assertEqual(len(contracts), 2)
        self.assertEqual(frozenset(type(x) for x in contracts),
            frozenset([UrlContract, ReturnsContract]))

        # returns request for valid method
        request = self.conman.from_method(spider.returns_request, self.results)
        self.assertNotEqual(request, None)

        # no request for missing url
        request = self.conman.from_method(spider.parse_no_url, self.results)
        self.assertEqual(request, None)

    def test_returns(self):
        spider = TestSpider()
        response = ResponseMock()

        # returns_item
        request = self.conman.from_method(spider.returns_item, self.results)
        output = request.callback(response)
        self.assertEqual([type(x) for x in output], [TestItem])
        self.should_succeed()

        # returns_request
        request = self.conman.from_method(spider.returns_request, self.results)
        output = request.callback(response)
        self.assertEqual([type(x) for x in output], [Request])
        self.should_succeed()

        # returns_fail
        request = self.conman.from_method(spider.returns_fail, self.results)
        request.callback(response)
        self.should_fail()

    def test_scrapes(self):
        spider = TestSpider()
        response = ResponseMock()

        # scrapes_item_ok
        request = self.conman.from_method(spider.scrapes_item_ok, self.results)
        output = request.callback(response)
        self.assertEqual([type(x) for x in output], [TestItem])
        self.should_succeed()

        # scrapes_item_fail
        request = self.conman.from_method(spider.scrapes_item_fail,
                self.results)
        request.callback(response)
        self.should_fail()
示例#21
0
 def setUp(self):
     self.conman = ContractsManager(self.contracts)
     self.results = TextTestRunner()._makeResult()
     self.results.stream = None
示例#22
0
class ContractsManagerTest(unittest.TestCase):
    contracts = [UrlContract, ReturnsContract, ScrapesContract]

    def setUp(self):
        self.conman = ContractsManager(self.contracts)
        self.results = TextTestRunner()._makeResult()
        self.results.stream = None

    def should_succeed(self):
        self.assertFalse(self.results.failures)
        self.assertFalse(self.results.errors)

    def should_fail(self):
        self.assertTrue(self.results.failures)
        self.assertFalse(self.results.errors)

    def test_contracts(self):
        spider = TestSpider()

        # extract contracts correctly
        contracts = self.conman.extract_contracts(spider.returns_request)
        self.assertEqual(len(contracts), 2)
        self.assertEqual(frozenset(type(x) for x in contracts),
                         frozenset([UrlContract, ReturnsContract]))

        # returns request for valid method
        request = self.conman.from_method(spider.returns_request, self.results)
        self.assertNotEqual(request, None)

        # no request for missing url
        request = self.conman.from_method(spider.parse_no_url, self.results)
        self.assertEqual(request, None)

    def test_returns(self):
        spider = TestSpider()
        response = ResponseMock()

        # returns_item
        request = self.conman.from_method(spider.returns_item, self.results)
        output = request.callback(response)
        self.assertEqual([type(x) for x in output], [TestItem])
        self.should_succeed()

        # returns_request
        request = self.conman.from_method(spider.returns_request, self.results)
        output = request.callback(response)
        self.assertEqual([type(x) for x in output], [Request])
        self.should_succeed()

        # returns_fail
        request = self.conman.from_method(spider.returns_fail, self.results)
        request.callback(response)
        self.should_fail()

    def test_scrapes(self):
        spider = TestSpider()
        response = ResponseMock()

        # scrapes_item_ok
        request = self.conman.from_method(spider.scrapes_item_ok, self.results)
        output = request.callback(response)
        self.assertEqual([type(x) for x in output], [TestItem])
        self.should_succeed()

        # scrapes_item_fail
        request = self.conman.from_method(spider.scrapes_item_fail,
                                          self.results)
        request.callback(response)
        self.should_fail()
示例#23
0
文件: check.py 项目: xacprod/ve1
class Command(ScrapyCommand):
    requires_project = True
    default_settings = {'LOG_ENABLED': False}

    def syntax(self):
        return "[options] <spider>"

    def short_desc(self):
        return "Check spider contracts"

    def add_options(self, parser):
        ScrapyCommand.add_options(self, parser)
        parser.add_option("-l", "--list", dest="list", action="store_true",
            help="only list contracts, without checking them")
        parser.add_option("-v", "--verbose", dest="verbose", default=1, action="count",
            help="print all contract hooks")

    def run(self, args, opts):
        # load contracts
        contracts = build_component_list(
            self.settings['SPIDER_CONTRACTS_BASE'],
            self.settings['SPIDER_CONTRACTS'],
        )
        self.conman = ContractsManager([load_object(c) for c in contracts])
        self.results = TextTestRunner(verbosity=opts.verbose)._makeResult()

        # contract requests
        contract_reqs = defaultdict(list)

        spman_cls = load_object(self.settings['SPIDER_MANAGER_CLASS'])
        spiders = spman_cls.from_settings(self.settings)

        for spider in args or spiders.list():
            spider = spiders.create(spider)
            requests = self.get_requests(spider)

            if opts.list:
                for req in requests:
                    contract_reqs[spider.name].append(req.callback.__name__)
            elif requests:
                crawler = self.crawler_process.create_crawler(spider.name)
                crawler.crawl(spider, requests)

        # start checks
        if opts.list:
            for spider, methods in sorted(contract_reqs.iteritems()):
                print spider
                for method in sorted(methods):
                    print '  * %s' % method
        else:
            self.crawler_process.start()
            self.results.printErrors()

    def get_requests(self, spider):
        requests = []

        for key, value in vars(type(spider)).items():
            if callable(value) and value.__doc__:
                bound_method = value.__get__(spider, type(spider))
                request = self.conman.from_method(bound_method, self.results)

                if request:
                    request.callback = _generate(request.callback)
                    requests.append(request)

        return requests
示例#24
0
 def setUp(self):
     self.conman = ContractsManager(self.contracts)
     self.results = TextTestResult(stream=None, descriptions=False, verbosity=0)
示例#25
0
class ContractsManagerTest(unittest.TestCase):
    contracts = [UrlContract, ReturnsContract, ScrapesContract]

    def setUp(self):
        self.conman = ContractsManager(self.contracts)
        self.results = TextTestResult(stream=None, descriptions=False, verbosity=0)

    def should_succeed(self):
        self.assertFalse(self.results.failures)
        self.assertFalse(self.results.errors)

    def should_fail(self):
        self.assertTrue(self.results.failures)
        self.assertFalse(self.results.errors)

    def test_contracts(self):
        spider = TestSpider()

        # extract contracts correctly
        contracts = self.conman.extract_contracts(spider.returns_request)
        self.assertEqual(len(contracts), 2)
        self.assertEqual(frozenset(type(x) for x in contracts),
            frozenset([UrlContract, ReturnsContract]))

        # returns request for valid method
        request = self.conman.from_method(spider.returns_request, self.results)
        self.assertNotEqual(request, None)

        # no request for missing url
        request = self.conman.from_method(spider.parse_no_url, self.results)
        self.assertEqual(request, None)

    def test_returns(self):
        spider = TestSpider()
        response = ResponseMock()

        # returns_item
        request = self.conman.from_method(spider.returns_item, self.results)
        request.callback(response)
        self.should_succeed()

        # returns_dict_item
        request = self.conman.from_method(spider.returns_dict_item, self.results)
        request.callback(response)
        self.should_succeed()

        # returns_request
        request = self.conman.from_method(spider.returns_request, self.results)
        request.callback(response)
        self.should_succeed()

        # returns_fail
        request = self.conman.from_method(spider.returns_fail, self.results)
        request.callback(response)
        self.should_fail()

        # returns_dict_fail
        request = self.conman.from_method(spider.returns_dict_fail, self.results)
        request.callback(response)
        self.should_fail()

    def test_scrapes(self):
        spider = TestSpider()
        response = ResponseMock()

        # scrapes_item_ok
        request = self.conman.from_method(spider.scrapes_item_ok, self.results)
        request.callback(response)
        self.should_succeed()

        # scrapes_dict_item_ok
        request = self.conman.from_method(spider.scrapes_dict_item_ok, self.results)
        request.callback(response)
        self.should_succeed()

        # scrapes_item_fail
        request = self.conman.from_method(spider.scrapes_item_fail,
                self.results)
        request.callback(response)
        self.should_fail()

        # scrapes_dict_item_fail
        request = self.conman.from_method(spider.scrapes_dict_item_fail,
                self.results)
        request.callback(response)
        self.should_fail()

    def test_errback(self):
        spider = TestSpider()
        response = ResponseMock()

        try:
            raise HttpError(response, 'Ignoring non-200 response')
        except HttpError:
            failure_mock = failure.Failure()

        request = self.conman.from_method(spider.returns_request, self.results)
        request.errback(failure_mock)

        self.assertFalse(self.results.failures)
        self.assertTrue(self.results.errors)

    def test_inherited_contracts(self):
        spider = InheritsTestSpider()

        requests = self.conman.from_spider(spider, self.results)
        self.assertTrue(requests)
示例#26
0
class Command(ScrapyCommand):
    requires_project = True
    default_settings = {'LOG_ENABLED': False}

    def syntax(self):
        return "[options] <spider>"

    def short_desc(self):
        return "Check contracts for given spider"

    def add_options(self, parser):
        ScrapyCommand.add_options(self, parser)
        parser.add_option("-l",
                          "--list",
                          dest="list",
                          action="store_true",
                          help="only list contracts, without checking them")

    def run(self, args, opts):
        # load contracts
        contracts = build_component_list(
            self.settings['SPIDER_CONTRACTS_BASE'],
            self.settings['SPIDER_CONTRACTS'],
        )
        self.conman = ContractsManager([load_object(c) for c in contracts])

        # contract requests
        contract_reqs = defaultdict(list)
        self.crawler.engine.has_capacity = lambda: True

        for spider in args or self.crawler.spiders.list():
            spider = self.crawler.spiders.create(spider)
            requests = self.get_requests(spider)

            if opts.list:
                for req in requests:
                    contract_reqs[spider.name].append(req.callback.__name__)
            else:
                self.crawler.crawl(spider, requests)

        # start checks
        if opts.list:
            for spider, methods in sorted(contract_reqs.iteritems()):
                print spider
                for method in sorted(methods):
                    print '  * %s' % method
        else:
            self.crawler.start()

    def get_requests(self, spider):
        requests = []

        for key, value in vars(type(spider)).items():
            if callable(value) and value.__doc__:
                bound_method = value.__get__(spider, type(spider))
                request = self.conman.from_method(bound_method)

                if request:
                    request.callback = _generate(request.callback)
                    requests.append(request)

        return requests
示例#27
0
class ContractsManagerTest(unittest.TestCase):
    contracts = [UrlContract, ReturnsContract, ScrapesContract]

    def setUp(self):
        self.conman = ContractsManager(self.contracts)
        self.results = TextTestResult(stream=None,
                                      descriptions=False,
                                      verbosity=0)

    def should_succeed(self):
        self.assertFalse(self.results.failures)
        self.assertFalse(self.results.errors)

    def should_fail(self):
        self.assertTrue(self.results.failures)
        self.assertFalse(self.results.errors)

    def test_contracts(self):
        spider = TestSpider()

        # extract contracts correctly
        contracts = self.conman.extract_contracts(spider.returns_request)
        self.assertEqual(len(contracts), 2)
        self.assertEqual(frozenset(type(x) for x in contracts),
                         frozenset([UrlContract, ReturnsContract]))

        # returns request for valid method
        request = self.conman.from_method(spider.returns_request, self.results)
        self.assertNotEqual(request, None)

        # no request for missing url
        request = self.conman.from_method(spider.parse_no_url, self.results)
        self.assertEqual(request, None)

    def test_returns(self):
        spider = TestSpider()
        response = ResponseMock()

        # returns_item
        request = self.conman.from_method(spider.returns_item, self.results)
        request.callback(response)
        self.should_succeed()

        # returns_dict_item
        request = self.conman.from_method(spider.returns_dict_item,
                                          self.results)
        request.callback(response)
        self.should_succeed()

        # returns_request
        request = self.conman.from_method(spider.returns_request, self.results)
        request.callback(response)
        self.should_succeed()

        # returns_fail
        request = self.conman.from_method(spider.returns_fail, self.results)
        request.callback(response)
        self.should_fail()

        # returns_dict_fail
        request = self.conman.from_method(spider.returns_dict_fail,
                                          self.results)
        request.callback(response)
        self.should_fail()

    def test_scrapes(self):
        spider = TestSpider()
        response = ResponseMock()

        # scrapes_item_ok
        request = self.conman.from_method(spider.scrapes_item_ok, self.results)
        request.callback(response)
        self.should_succeed()

        # scrapes_dict_item_ok
        request = self.conman.from_method(spider.scrapes_dict_item_ok,
                                          self.results)
        request.callback(response)
        self.should_succeed()

        # scrapes_item_fail
        request = self.conman.from_method(spider.scrapes_item_fail,
                                          self.results)
        request.callback(response)
        self.should_fail()

        # scrapes_dict_item_fail
        request = self.conman.from_method(spider.scrapes_dict_item_fail,
                                          self.results)
        request.callback(response)
        self.should_fail()

    def test_errback(self):
        spider = TestSpider()
        response = ResponseMock()

        try:
            raise HttpError(response, 'Ignoring non-200 response')
        except HttpError:
            failure_mock = failure.Failure()

        request = self.conman.from_method(spider.returns_request, self.results)
        request.errback(failure_mock)

        self.assertFalse(self.results.failures)
        self.assertTrue(self.results.errors)
示例#28
0
文件: check.py 项目: amferraz/scrapy
class Command(ScrapyCommand):
    requires_project = True
    default_settings = {"LOG_ENABLED": False}

    def syntax(self):
        return "[options] <spider>"

    def short_desc(self):
        return "Check spider contracts"

    def add_options(self, parser):
        ScrapyCommand.add_options(self, parser)
        parser.add_option(
            "-l", "--list", dest="list", action="store_true", help="only list contracts, without checking them"
        )
        parser.add_option("-v", "--verbose", dest="verbose", default=1, action="count", help="print all contract hooks")

    def run(self, args, opts):
        # load contracts
        contracts = build_component_list(self.settings["SPIDER_CONTRACTS_BASE"], self.settings["SPIDER_CONTRACTS"])
        self.conman = ContractsManager([load_object(c) for c in contracts])
        self.results = TextTestRunner(verbosity=opts.verbose)._makeResult()

        # contract requests
        contract_reqs = defaultdict(list)

        spman_cls = load_object(self.settings["SPIDER_MANAGER_CLASS"])
        spiders = spman_cls.from_settings(self.settings)

        for spider in args or spiders.list():
            spider = spiders.create(spider)
            requests = self.get_requests(spider)

            if opts.list:
                for req in requests:
                    contract_reqs[spider.name].append(req.callback.__name__)
            elif requests:
                crawler = self.crawler_process.create_crawler(spider.name)
                crawler.crawl(spider, requests)

        # start checks
        if opts.list:
            for spider, methods in sorted(contract_reqs.iteritems()):
                print spider
                for method in sorted(methods):
                    print "  * %s" % method
        else:
            self.crawler_process.start()
            self.results.printErrors()

    def get_requests(self, spider):
        requests = []

        for key, value in vars(type(spider)).items():
            if callable(value) and value.__doc__:
                bound_method = value.__get__(spider, type(spider))
                request = self.conman.from_method(bound_method, self.results)

                if request:
                    request.callback = _generate(request.callback)
                    requests.append(request)

        return requests
示例#29
0
class ContractsManagerTest(unittest.TestCase):
    contracts = [
        UrlContract,
        ReturnsContract,
        ScrapesContract,
        CustomFormContract,
        CustomSuccessContract,
        CustomFailContract,
    ]

    def setUp(self):
        self.conman = ContractsManager(self.contracts)
        self.results = TextTestResult(stream=None, descriptions=False, verbosity=0)

    def should_succeed(self):
        self.assertFalse(self.results.failures)
        self.assertFalse(self.results.errors)

    def should_fail(self):
        self.assertTrue(self.results.failures)
        self.assertFalse(self.results.errors)

    def should_error(self):
        self.assertTrue(self.results.errors)

    def test_contracts(self):
        spider = TestSpider()

        # extract contracts correctly
        contracts = self.conman.extract_contracts(spider.returns_request)
        self.assertEqual(len(contracts), 2)
        self.assertEqual(frozenset(type(x) for x in contracts),
            frozenset([UrlContract, ReturnsContract]))

        # returns request for valid method
        request = self.conman.from_method(spider.returns_request, self.results)
        self.assertNotEqual(request, None)

        # no request for missing url
        request = self.conman.from_method(spider.parse_no_url, self.results)
        self.assertEqual(request, None)

    def test_returns(self):
        spider = TestSpider()
        response = ResponseMock()

        # returns_item
        request = self.conman.from_method(spider.returns_item, self.results)
        request.callback(response)
        self.should_succeed()

        # returns_dict_item
        request = self.conman.from_method(spider.returns_dict_item, self.results)
        request.callback(response)
        self.should_succeed()

        # returns_request
        request = self.conman.from_method(spider.returns_request, self.results)
        request.callback(response)
        self.should_succeed()

        # returns_fail
        request = self.conman.from_method(spider.returns_fail, self.results)
        request.callback(response)
        self.should_fail()

        # returns_dict_fail
        request = self.conman.from_method(spider.returns_dict_fail, self.results)
        request.callback(response)
        self.should_fail()

    def test_scrapes(self):
        spider = TestSpider()
        response = ResponseMock()

        # scrapes_item_ok
        request = self.conman.from_method(spider.scrapes_item_ok, self.results)
        request.callback(response)
        self.should_succeed()

        # scrapes_dict_item_ok
        request = self.conman.from_method(spider.scrapes_dict_item_ok, self.results)
        request.callback(response)
        self.should_succeed()

        # scrapes_item_fail
        request = self.conman.from_method(spider.scrapes_item_fail, self.results)
        request.callback(response)
        self.should_fail()

        # scrapes_dict_item_fail
        request = self.conman.from_method(spider.scrapes_dict_item_fail, self.results)
        request.callback(response)
        self.should_fail()

    def test_custom_contracts(self):
        self.conman.from_spider(CustomContractSuccessSpider(), self.results)
        self.should_succeed()

        self.conman.from_spider(CustomContractFailSpider(), self.results)
        self.should_error()

    def test_errback(self):
        spider = TestSpider()
        response = ResponseMock()

        try:
            raise HttpError(response, 'Ignoring non-200 response')
        except HttpError:
            failure_mock = failure.Failure()

        request = self.conman.from_method(spider.returns_request, self.results)
        request.errback(failure_mock)

        self.assertFalse(self.results.failures)
        self.assertTrue(self.results.errors)

    @defer.inlineCallbacks
    def test_same_url(self):

        class TestSameUrlSpider(Spider):
            name = 'test_same_url'

            def __init__(self, *args, **kwargs):
                super(TestSameUrlSpider, self).__init__(*args, **kwargs)
                self.visited = 0

            def start_requests(s):
                return self.conman.from_spider(s, self.results)

            def parse_first(self, response):
                self.visited += 1
                return TestItem()

            def parse_second(self, response):
                self.visited += 1
                return TestItem()

        with MockServer() as mockserver:
            contract_doc = '@url {}'.format(mockserver.url('/status?n=200'))

            get_unbound_function(TestSameUrlSpider.parse_first).__doc__ = contract_doc
            get_unbound_function(TestSameUrlSpider.parse_second).__doc__ = contract_doc

            crawler = CrawlerRunner().create_crawler(TestSameUrlSpider)
            yield crawler.crawl()

        self.assertEqual(crawler.spider.visited, 2)

    def test_form_contract(self):
        spider = TestSpider()
        request = self.conman.from_method(spider.custom_form, self.results)
        self.assertEqual(request.method, 'POST')
        self.assertIsInstance(request, FormRequest)

    def test_inherited_contracts(self):
        spider = InheritsTestSpider()

        requests = self.conman.from_spider(spider, self.results)
        self.assertTrue(requests)