def test_follow_links_attribute_population(self): crawler = get_crawler() spider = self.spider_class.from_crawler(crawler, 'example.com') self.assertTrue(hasattr(spider, '_follow_links')) self.assertTrue(spider._follow_links) settings_dict = {'CRAWLSPIDER_FOLLOW_LINKS': False} crawler = get_crawler(settings_dict=settings_dict) spider = self.spider_class.from_crawler(crawler, 'example.com') self.assertTrue(hasattr(spider, '_follow_links')) self.assertFalse(spider._follow_links)
def test_follow_links_attribute_deprecated_population(self): spider = self.spider_class('example.com') self.assertFalse(hasattr(spider, '_follow_links')) spider.set_crawler(get_crawler()) self.assertTrue(hasattr(spider, '_follow_links')) self.assertTrue(spider._follow_links) spider = self.spider_class('example.com') settings_dict = {'CRAWLSPIDER_FOLLOW_LINKS': False} spider.set_crawler(get_crawler(settings_dict=settings_dict)) self.assertTrue(hasattr(spider, '_follow_links')) self.assertFalse(spider._follow_links)
def test_https_noconnect(self): os.environ['https_proxy'] = 'http://*****:*****@localhost:8888?noconnect' crawler = get_crawler(SimpleSpider) with LogCapture() as l: yield crawler.crawl("https://*****:*****@localhost:8888'
def setUp(self): self.logger = logging.getLogger("test") self.logger.setLevel(logging.NOTSET) self.logger.propagate = False self.crawler = get_crawler(settings_dict={"LOG_LEVEL": "WARNING"}) self.handler = LogCounterHandler(self.crawler) self.logger.addHandler(self.handler)
def test_https_tunnel_without_leak_proxy_authorization_header(self): request = Request("https://localhost:8999/echo") crawler = get_crawler(SingleRequestSpider) yield crawler.crawl(seed=request) self._assert_got_response_code(200) echo = json.loads(crawler.spider.meta['responses'][0].body) self.assertTrue('Proxy-Authorization' not in echo['headers'])
def run(self): self.port = start_test_site() self.portno = self.port.getHost().port start_urls = [self.geturl("/"), self.geturl("/redirect")] self.spider = TestSpider(start_urls=start_urls) for name, signal in vars(signals).items(): if not name.startswith('_'): dispatcher.connect(self.record_signal, signal) self.crawler = get_crawler() self.crawler.install() self.crawler.configure() self.crawler.signals.connect(self.item_scraped, signals.item_scraped) self.crawler.signals.connect( self.request_scheduled, signals.request_scheduled) self.crawler.signals.connect( self.response_downloaded, signals.response_downloaded) self.crawler.crawl(self.spider) self.crawler.start() self.deferred = defer.Deferred() dispatcher.connect(self.stop, signals.engine_stopped) return self.deferred
def test_setting_true_cookies_enabled(self): self.assertIsInstance( CookiesMiddleware.from_crawler( get_crawler(settings_dict={'COOKIES_ENABLED': True}) ), CookiesMiddleware )
def test_df_from_settings_scheduler(self): settings = {'DUPEFILTER_DEBUG': True, 'DUPEFILTER_CLASS': __name__ + '.FromSettingsRFPDupeFilter'} crawler = get_crawler(settings_dict=settings) scheduler = Scheduler.from_crawler(crawler) self.assertTrue(scheduler.df.debug) self.assertEqual(scheduler.df.method, 'from_settings')
def test_log_debug(self): with LogCapture() as l: settings = {'DUPEFILTER_DEBUG': True, 'DUPEFILTER_CLASS': __name__ + '.FromCrawlerRFPDupeFilter'} crawler = get_crawler(SimpleSpider, settings_dict=settings) scheduler = Scheduler.from_crawler(crawler) spider = SimpleSpider.from_crawler(crawler) dupefilter = scheduler.df dupefilter.open() r1 = Request('http://scrapytest.org/index.html') r2 = Request('http://scrapytest.org/index.html', headers={'Referer': 'http://scrapytest.org/INDEX.html'} ) dupefilter.log(r1, spider) dupefilter.log(r2, spider) assert crawler.stats.get_value('dupefilter/filtered') == 2 l.check_present(('scrapy.dupefilters', 'DEBUG', ('Filtered duplicate request: <GET http://scrapytest.org/index.html>' ' (referer: None)'))) l.check_present(('scrapy.dupefilters', 'DEBUG', ('Filtered duplicate request: <GET http://scrapytest.org/index.html>' ' (referer: http://scrapytest.org/INDEX.html)'))) dupefilter.close('finished')
def setUp(self): self.f = BytesIO() self.crawler = get_crawler(Spider) self.spider = self.crawler.spider = self.crawler._create_spider('test') self.log_observer = log.ScrapyFileLogObserver(self.f, log.INFO, 'utf-8', self.crawler) self.log_observer.start()
def setUp(self): self.crawler = get_crawler(Spider, self.settings_dict) self.spider = self.crawler._create_spider('foo') self.mwman = DownloaderMiddlewareManager.from_crawler(self.crawler) # some mw depends on stats collector self.crawler.stats.open_spider(self.spider) return self.mwman.open_spider(self.spider)
def test_download_gzip_response(self): if six.PY2 and twisted_version > (12, 3, 0): crawler = get_crawler(SingleRequestSpider) body = '1' * 100 # PayloadResource requires body length to be 100 request = Request( 'http://localhost:8998/payload', method='POST', body=body, meta={'download_maxsize': 50}) yield crawler.crawl(seed=request) failure = crawler.spider.meta['failure'] # download_maxsize < 100, hence the CancelledError self.assertIsInstance(failure.value, defer.CancelledError) request.headers.setdefault('Accept-Encoding', 'gzip,deflate') request = request.replace(url='http://localhost:8998/xpayload') yield crawler.crawl(seed=request) # download_maxsize = 50 is enough for the gzipped response failure = crawler.spider.meta.get('failure') self.assertTrue(failure == None) reason = crawler.spider.meta['close_reason'] self.assertTrue(reason, 'finished') else: raise unittest.SkipTest( "xpayload and payload endpoint only enabled for twisted > 12.3.0 and python 2.x" )
def setUp(self): self.spider = Spider('scrapytest.org') self.stats = StatsCollector(get_crawler()) self.stats.open_spider(self.spider) self.mw = DepthMiddleware(1, self.stats, True)
def test_referer_header(self): """Referer header is set by RefererMiddleware unless it is already set""" req0 = Request('http://localhost:8998/echo?headers=1&body=0', dont_filter=1) req1 = req0.replace() req2 = req0.replace(headers={'Referer': None}) req3 = req0.replace(headers={'Referer': 'http://example.com'}) req0.meta['next'] = req1 req1.meta['next'] = req2 req2.meta['next'] = req3 crawler = get_crawler(SingleRequestSpider) yield crawler.crawl(seed=req0) # basic asserts in case of weird communication errors self.assertIn('responses', crawler.spider.meta) self.assertNotIn('failures', crawler.spider.meta) # start requests doesn't set Referer header echo0 = json.loads(crawler.spider.meta['responses'][2].body) self.assertNotIn('Referer', echo0['headers']) # following request sets Referer to start request url echo1 = json.loads(crawler.spider.meta['responses'][1].body) self.assertEqual(echo1['headers'].get('Referer'), [req0.url]) # next request avoids Referer header echo2 = json.loads(crawler.spider.meta['responses'][2].body) self.assertNotIn('Referer', echo2['headers']) # last request explicitly sets a Referer header echo3 = json.loads(crawler.spider.meta['responses'][3].body) self.assertEqual(echo3['headers'].get('Referer'), ['http://example.com'])
def test_unbounded_response(self): # Completeness of responses without Content-Length or Transfer-Encoding # can not be determined, we treat them as valid but flagged as "partial" from six.moves.urllib.parse import urlencode query = urlencode({'raw': '''\ HTTP/1.1 200 OK Server: Apache-Coyote/1.1 X-Powered-By: Servlet 2.4; JBoss-4.2.3.GA (build: SVNTag=JBoss_4_2_3_GA date=200807181417)/JBossWeb-2.0 Set-Cookie: JSESSIONID=08515F572832D0E659FD2B0D8031D75F; Path=/ Pragma: no-cache Expires: Thu, 01 Jan 1970 00:00:00 GMT Cache-Control: no-cache Cache-Control: no-store Content-Type: text/html;charset=UTF-8 Content-Language: en Date: Tue, 27 Aug 2013 13:05:05 GMT Connection: close foo body with multiples lines '''}) crawler = get_crawler(SimpleSpider) with LogCapture() as l: yield crawler.crawl("http://localhost:8998/raw?{0}".format(query)) self.assertEqual(str(l).count("Got response 200"), 1)
def get_defaults_spider_mw(self): crawler = get_crawler() spider = BaseSpider('foo') spider.set_crawler(crawler) defaults = dict([(k, [v]) for k, v in \ crawler.settings.get('DEFAULT_REQUEST_HEADERS').iteritems()]) return defaults, spider, DefaultHeadersMiddleware()
def test_retry_dns_error(self): with mock.patch('socket.gethostbyname', side_effect=socket.gaierror(-5, 'No address associated with hostname')): crawler = get_crawler(SimpleSpider) with LogCapture() as l: yield crawler.crawl("http://example.com/") self._assert_retried(l)
def test_parse_credentials(self): try: import boto except ImportError: raise unittest.SkipTest("S3FeedStorage requires boto") aws_credentials = {'AWS_ACCESS_KEY_ID': 'settings_key', 'AWS_SECRET_ACCESS_KEY': 'settings_secret'} crawler = get_crawler(settings_dict=aws_credentials) # Instantiate with crawler storage = S3FeedStorage.from_crawler(crawler, 's3://mybucket/export.csv') self.assertEqual(storage.access_key, 'settings_key') self.assertEqual(storage.secret_key, 'settings_secret') # Instantiate directly storage = S3FeedStorage('s3://mybucket/export.csv', aws_credentials['AWS_ACCESS_KEY_ID'], aws_credentials['AWS_SECRET_ACCESS_KEY']) self.assertEqual(storage.access_key, 'settings_key') self.assertEqual(storage.secret_key, 'settings_secret') # URI priority > settings priority storage = S3FeedStorage('s3://uri_key:uri_secret@mybucket/export.csv', aws_credentials['AWS_ACCESS_KEY_ID'], aws_credentials['AWS_SECRET_ACCESS_KEY']) self.assertEqual(storage.access_key, 'uri_key') self.assertEqual(storage.secret_key, 'uri_secret') # Backwards compatibility for initialising without settings with warnings.catch_warnings(record=True) as w: storage = S3FeedStorage('s3://mybucket/export.csv') self.assertEqual(storage.access_key, 'conf_key') self.assertEqual(storage.secret_key, 'conf_secret') self.assertTrue('without AWS keys' in str(w[-1].message))
def test_download_gzip_response(self): if twisted_version > (12, 3, 0): crawler = get_crawler(SingleRequestSpider) body = b"1" * 100 # PayloadResource requires body length to be 100 request = Request("http://localhost:8998/payload", method="POST", body=body, meta={"download_maxsize": 50}) yield crawler.crawl(seed=request) failure = crawler.spider.meta["failure"] # download_maxsize < 100, hence the CancelledError self.assertIsInstance(failure.value, defer.CancelledError) if six.PY2: request.headers.setdefault(b"Accept-Encoding", b"gzip,deflate") request = request.replace(url="http://localhost:8998/xpayload") yield crawler.crawl(seed=request) # download_maxsize = 50 is enough for the gzipped response failure = crawler.spider.meta.get("failure") self.assertTrue(failure == None) reason = crawler.spider.meta["close_reason"] self.assertTrue(reason, "finished") else: # See issue https://twistedmatrix.com/trac/ticket/8175 raise unittest.SkipTest("xpayload only enabled for PY2") else: raise unittest.SkipTest("xpayload and payload endpoint only enabled for twisted > 12.3.0")
def test_not_configured_handler(self): handlers = {'scheme': 'tests.test_downloader_handlers.OffDH'} crawler = get_crawler(settings_dict={'DOWNLOAD_HANDLERS': handlers}) dh = DownloadHandlers(crawler) self.assertIn('scheme', dh._schemes) self.assertNotIn('scheme', dh._handlers) self.assertIn('scheme', dh._notconfigured)
def test_download(self): crawler = get_crawler(SingleRequestSpider) yield crawler.crawl(seed=Request(url=self.mockserver.url(''))) failure = crawler.spider.meta.get('failure') self.assertTrue(failure == None) reason = crawler.spider.meta['close_reason'] self.assertTrue(reason, 'finished')
def test_download(self): crawler = get_crawler(SingleRequestSpider) yield crawler.crawl(seed=Request(url='http://localhost:8998')) failure = crawler.spider.meta.get('failure') self.assertTrue(failure == None) reason = crawler.spider.meta['close_reason'] self.assertTrue(reason, 'finished')
def test_variants_dict_split(self): """ Checks if dict with "variants" is split as expected """ settings = {"SPLITVARIANTS_ENABLED": True} crawler = get_crawler(settings_dict=settings) mware = SplitVariantsMiddleware.from_crawler(crawler) # Define item with variants item = {"id": 12, "name": "Big chair", "variants": [{"size": "XL", "price": 200}, {"size": "L", "price": 220}]} # Define how split items should look expected = [ {"id": 12, "name": "Big chair", "size": "XL", "price": 200}, {"id": 12, "name": "Big chair", "size": "L", "price": 220}] # Calling middleware for given result as a Python dict result = [item] result = mware.process_spider_output(self.response, result, self.spider) self.assertEquals(list(result), expected)
def setUp(self): self.crawler = get_crawler() self.spider = BaseSpider('example.com') self.tmpdir = tempfile.mkdtemp() self.request = Request('http://www.example.com', headers={'User-Agent': 'test'}) self.response = Response('http://www.example.com', headers={'Content-Type': 'text/html'}, body='test body', status=202) self.crawler.stats.open_spider(self.spider)
def test_from_crawler_crawler_and_settings_population(self): crawler = get_crawler() spider = self.spider_class.from_crawler(crawler, 'example.com') self.assertTrue(hasattr(spider, 'crawler')) self.assertIs(spider.crawler, crawler) self.assertTrue(hasattr(spider, 'settings')) self.assertIs(spider.settings, crawler.settings)
def test_download_with_content_length(self): crawler = get_crawler(SingleRequestSpider) # http://localhost:8998/partial set Content-Length to 1024, use download_maxsize= 1000 to avoid # download it yield crawler.crawl(seed=Request(url="http://localhost:8998/partial", meta={"download_maxsize": 1000})) failure = crawler.spider.meta["failure"] self.assertIsInstance(failure.value, defer.CancelledError)
def test_https_noconnect(self): proxy = os.environ['https_proxy'] os.environ['https_proxy'] = proxy + '?noconnect' crawler = get_crawler(SimpleSpider) with LogCapture() as l: yield crawler.crawl(self.mockserver.url("/status?n=200", is_secure=True)) self._assert_got_response_code(200, l)
def test_download(self): crawler = get_crawler(SingleRequestSpider) yield crawler.crawl(seed=Request(url="http://localhost:8998")) failure = crawler.spider.meta.get("failure") self.assertTrue(failure == None) reason = crawler.spider.meta["close_reason"] self.assertTrue(reason, "finished")
def test_closespider_timeout(self): close_on = 0.1 crawler = get_crawler(FollowAllSpider, {'CLOSESPIDER_TIMEOUT': close_on}) yield crawler.crawl(total=1000000, mockserver=self.mockserver) reason = crawler.spider.meta['close_reason'] self.assertEqual(reason, 'closespider_timeout') total_seconds = crawler.stats.get_value('elapsed_time_seconds') self.assertTrue(total_seconds >= close_on)
def test_closespider_pagecount(self): close_on = 5 crawler = get_crawler(FollowAllSpider, {'CLOSESPIDER_PAGECOUNT': close_on}) yield crawler.crawl(mockserver=self.mockserver) reason = crawler.spider.meta['close_reason'] self.assertEqual(reason, 'closespider_pagecount') pagecount = crawler.stats.get_value('response_received_count') self.assertTrue(pagecount >= close_on)
def get_spider(*args, **kwargs): crawler = get_crawler(spidercls=kwargs.pop('spidercls', None), settings_dict=kwargs.pop('settings_dict', None)) return crawler._create_spider(*args, **kwargs)
def test_not_configured_handler(self): handlers = {'scheme': 'tests.test_downloader_handlers.OffDH'} dh = DownloadHandlers(get_crawler({'DOWNLOAD_HANDLERS': handlers})) self.assertNotIn('scheme', dh._handlers) self.assertIn('scheme', dh._notconfigured)
def setUp(self): self.crawler = get_crawler(Spider) self.spider = self.crawler._create_spider('foo') self.mw = RetryMiddleware.from_crawler(self.crawler) self.mw.max_retry_times = 2
def docrawl(spider, settings=None): crawler = get_crawler(settings) crawler.configure() crawler.crawl(spider) return crawler.start()
def setUp(self): crawler = get_crawler() self.download_handler = create_instance(DataURIDownloadHandler, crawler.settings, crawler) self.download_request = self.download_handler.download_request self.spider = Spider('foo')
def setUp(self): self.crawler = get_crawler(Spider) self.spider = self.crawler._create_spider('foo') self.mw = RedirectMiddleware.from_crawler(self.crawler)
def get_spider_and_mw(self, default_useragents): crawler = get_crawler( spidercls=Spider, settings_dict={'USER_AGENTS': default_useragents}) spider = crawler._create_spider('foo') return spider, UserAgentsMiddleware.from_crawler(crawler)
def test_https_connect_tunnel_error(self): crawler = get_crawler(SimpleSpider) with LogCapture() as l: yield crawler.crawl("https://localhost:99999/status?n=200") self._assert_got_tunnel_error(l)
def test_retry_conn_failed(self): crawler = get_crawler(SimpleSpider) with LogCapture() as log: yield crawler.crawl("http://localhost:65432/status?n=503", mockserver=self.mockserver) self._assert_retried(log)
def test_retry_503(self): crawler = get_crawler(SimpleSpider) with LogCapture() as log: yield crawler.crawl(self.mockserver.url("/status?n=503"), mockserver=self.mockserver) self._assert_retried(log)
def test_timeout_success(self): crawler = get_crawler(DelaySpider) yield crawler.crawl(n=0.5, mockserver=self.mockserver) self.assertTrue(crawler.spider.t1 > 0) self.assertTrue(crawler.spider.t2 > 0) self.assertTrue(crawler.spider.t2 > crawler.spider.t1)
def test_middleware_works(self): crawler = get_crawler(_HttpErrorSpider) yield crawler.crawl() assert not crawler.spider.skipped, crawler.spider.skipped self.assertEqual(crawler.spider.parsed, {'200'}) self.assertEqual(crawler.spider.failed, {'404', '402', '500'})
def test_close_downloader(self): e = ExecutionEngine(get_crawler(TestSpider), lambda _: None) yield e.close()
def test_dns_server_ip_address_none(self): crawler = get_crawler(SingleRequestSpider) url = self.mockserver.url('/status?n=200') yield crawler.crawl(seed=url, mockserver=self.mockserver) ip_address = crawler.spider.meta['responses'][0].ip_address self.assertIsNone(ip_address)
def hs_ext(monkeypatch): monkeypatch.setattr('sh_scrapy.extension.pipe_writer', mock.Mock()) monkeypatch.setattr('sh_scrapy.extension.hsref', mock.Mock()) crawler = get_crawler(Spider) return HubstorageExtension.from_crawler(crawler)
def test_follow_all(self): crawler = get_crawler(FollowAllSpider) yield crawler.crawl(mockserver=self.mockserver) self.assertEqual(len(crawler.spider.urls_visited), 11) # 10 + start_url
def setUp(self): crawler = get_crawler(Spider) self.spider = crawler._create_spider('foo') self.mw = MetaRefreshMiddleware.from_crawler(crawler)
def setUp(self): self.spider = self._get_spider() crawler = get_crawler() self.mw = OffsiteMiddleware.from_crawler(crawler) self.mw.spider_opened(self.spider)
def setUp(self): crawler = get_crawler() self.spider = Spider('foo') self.mw = RedirectMiddleware.from_crawler(crawler)
def get_spider_and_mw(self, default_useragent): crawler = get_crawler({'USER_AGENT': default_useragent}) spider = Spider('foo') spider.set_crawler(crawler) return spider, UserAgentMiddleware.from_crawler(crawler)
def setUp(self): self.tmpname = self.mktemp() with open(self.tmpname + '^', 'w') as f: f.write('0123456789') handler = create_instance(FileDownloadHandler, None, get_crawler()) self.download_request = handler.download_request
def setUp(self): crawler = get_crawler() self.spider = BaseSpider('foo') self.mw = RetryMiddleware.from_crawler(crawler) self.mw.max_retry_times = 2
def describe_swagger_spider_1(): to_test = ApiSwaggerSpider.from_crawler(get_crawler(), config_file=config_file) def describe_docs_page_1(): resp = response_from("confluence.html") results = to_test.swagger_app.parse_paths(resp) def should_collect_number_of_paths(): assert len(results) == 47 def should_contain_apis(): assert '/content/{id}/restriction/byOperation' in results assert '/user/watch/content/{contentId}' in results assert '/space/{spaceKey}/property/{key}' in results assert '/accessmode' in results assert '/content' in results def should_not_exists_in_apis(): assert '/content/{id}/riction/byOperation' not in results def should_update_existsing_api_with_operation(): assert 'get' in results['/audit/retention'] assert 'put' in results['/audit/retention'] assert 'get' in results['/content/{id}/property/{key}'] assert 'put' in results['/content/{id}/property/{key}'] assert 'delete' in results['/content/{id}/property/{key}'] assert 'post' in results['/content/{id}/property/{key}'] def should_return_swagger_path_with_bracket_and_with_many_methods_and_responses( ): post_item = results['/user/watch/content/{contentId}']['post'] assert post_item['responses']['404'][ 'description'] == u'Returned if no content exists for the specified content id or the calling user does not have permission to perform the operation' assert post_item['responses']['204'][ 'description'] == u'application/json Returned if the watcher was successfully created' assert post_item['parameters'] == [{ 'in': 'formData', 'type': u'string', 'name': u'key', 'description': u'userkey of the user to create the new watcher for' }, { 'in': 'formData', 'type': u'string', 'name': u'username', 'description': u'username of the user to create the new watcher for' }, { 'required': True, 'in': 'path', 'name': 'contentId', 'description': '', 'type': 'string' }] get_item = results['/user/watch/content/{contentId}']['get'] assert get_item['responses']['200'] == { 'description': u'application/json Returns a JSON representation containing the watching state' } assert get_item['responses']['404'] == { 'description': u'Returned if no content exists for the specified content id or calling user does not have permission to perform the operation' } assert get_item['parameters'] == [{ 'in': 'query', 'type': u'string', 'name': u'key', 'description': u'userkey of the user to check for watching state' }, { 'in': 'query', 'type': u'string', 'name': u'username', 'description': u'username of the user to check for watching state' }, { 'required': True, 'in': 'path', 'name': 'contentId', 'description': '', 'type': 'string' }] del_item = results['/user/watch/content/{contentId}']['delete'] assert del_item['responses']['204'] == { 'description': u'application/json Returned if the watcher was successfully deleted' } assert del_item['responses']['404'] == { 'description': u'Returned if no content exists for the specified content id or the calling user does not have permission to perform the operation' } assert del_item['parameters'] == [{ 'in': 'query', 'type': u'string', 'name': u'key', 'description': u'userkey of the user to delete the watcher for' }, { 'in': 'query', 'type': u'string', 'name': u'username', 'description': u'username of the user to delete the watcher for' }, { 'required': True, 'in': 'path', 'name': 'contentId', 'description': '', 'type': 'string' }] def should_return_path_with_no_bracket_and_no_responses(): item = results['/audit']['post'] assert item['responses'] == {'200': {'description': 'Unknown'}} item = results['/audit']['get'] assert item['responses'] == {'200': {'description': 'Unknown'}} assert item['parameters'] == [{ 'in': 'query', 'type': u'string', 'name': u'startDate', 'description': '' }, { 'in': 'query', 'type': u'string', 'name': u'endDate', 'description': '' }, { 'in': 'query', 'type': u'integer', 'name': u'start', 'description': u'where to start within results set' }, { 'in': 'query', 'type': u'integer', 'name': u'limit', 'description': u'the maximum results to fetch' }, { 'in': 'query', 'type': u'string', 'name': u'searchString', 'description': '' }] def describe_parameters(): def should_convert_int_to_integer(): endpoint = results['/content/{id}/property'] operation = endpoint['get'] assert operation['parameters'][1]["type"] == "integer" def should_add_in_parameters_that_has_no_set_parameters(): endpoint = results[ '/content/{id}/history/{version}/macro/hash/{hash}'] operation = endpoint['get'] assert len(operation['parameters']) == 3 assert operation['parameters'][0]['name'] == 'id' assert operation['parameters'][0]['in'] == 'path' assert operation['parameters'][0]['required'] == True assert operation['parameters'][1]['name'] == 'version' assert operation['parameters'][1]['in'] == 'path' assert operation['parameters'][1]['required'] == True assert operation['parameters'][2]['name'] == 'hash' assert operation['parameters'][2]['in'] == 'path' assert operation['parameters'][2]['required'] == True def describe_schema(): def should_have_security_definition(): to_test.swagger_app.parse_apis_info(resp) assert to_test.swagger_app.swagger['securityDefinitions'][ 'HTTP Basic'] == { 'type': 'basic' } def should_extract_definitions_from_response(): endpoint = results['/space/{spaceKey}/property'] operation = endpoint['post'] assert 'definitions' not in operation['responses']["200"][ 'schema'] assert to_test.swagger_app.definitions assert 'html-string' in to_test.swagger_app.definitions assert 'person' in to_test.swagger_app.definitions assert 'content-representation' in to_test.swagger_app.definitions assert 'unknown-user' in to_test.swagger_app.definitions assert 'space' in to_test.swagger_app.definitions assert 'content' in to_test.swagger_app.definitions assert 'known-user' in to_test.swagger_app.definitions assert 'web-resource-dependencies' in to_test.swagger_app.definitions assert 'version' in to_test.swagger_app.definitions assert 'operation-key' in to_test.swagger_app.definitions assert 'user' in to_test.swagger_app.definitions assert 'anonymous' in to_test.swagger_app.definitions assert 'icon' in to_test.swagger_app.definitions def should_extract_definitions_from_parameters(): endpoint = results['/space/{spaceKey}/property'] operation = endpoint['post'] assert 'definitions' not in operation['parameters'][0] assert to_test.swagger_app.definitions def should_remove_id(): endpoint = results['/space/{spaceKey}/property'] operation = endpoint['post'] assert 'id' not in operation['responses']["200"]['schema'] def should_remove_id_from_parameters(): endpoint = results['/space/{spaceKey}/property'] operation = endpoint['post'] assert 'id' not in operation['parameters'][0] def should_not_contain_xscope_key(): post_item = results['/space/_private']['post'] assert post_item['responses']['200'][ 'description'] == "application/json Returns a full JSON representation of a space" assert post_item['responses']['200']['schema']['properties'][ 'type'] == { u'$ref': u'#/definitions/space-type' } def describe_fix_schema_definitions(): def should_convert_type_to_string_for_missing_object_properties( ): assert to_test.swagger_app.definitions['html-string'][ 'type'] == "string" assert to_test.swagger_app.definitions['person'][ 'type'] == "string" assert to_test.swagger_app.definitions[ 'content-representation']['type'] == "string" assert to_test.swagger_app.definitions['operation-key'][ 'type'] == "string" def should_not_convert_type_to_string_for_existing_object_properties( ): assert to_test.swagger_app.definitions['unknown-user'][ 'type'] == 'object' assert to_test.swagger_app.definitions['space'][ 'type'] == 'object' assert to_test.swagger_app.definitions['content'][ 'type'] == 'object' assert to_test.swagger_app.definitions['known-user'][ 'type'] == 'object' assert to_test.swagger_app.definitions['user'][ 'type'] == 'object'
def test_setting_false_cookies_enabled(self): self.assertRaises( NotConfigured, CookiesMiddleware.from_crawler, get_crawler(settings_dict={'COOKIES_ENABLED': False}))
def test_disabled_handler(self): handlers = {'scheme': None} dh = DownloadHandlers(get_crawler({'DOWNLOAD_HANDLERS': handlers})) self.assertNotIn('scheme', dh._handlers) self.assertNotIn('scheme', dh._notconfigured)
def test_close_spiders_downloader(self): e = ExecutionEngine(get_crawler(TestSpider), lambda _: None) yield e.open_spider(TestSpider(), []) self.assertEqual(len(e.open_spiders), 1) yield e.close() self.assertEqual(len(e.open_spiders), 0)
def test_async_def_parse(self): crawler = get_crawler(AsyncDefSpider) with LogCapture() as log: yield crawler.crawl(self.mockserver.url("/status?n=200"), mockserver=self.mockserver) self.assertIn("Got response 200", str(log))
def setUp(self): self.spider = Spider('foo') crawler = get_crawler({'AJAXCRAWL_ENABLED': True}) self.mw = AjaxCrawlMiddleware.from_crawler(crawler)
def test_response_ssl_certificate_none(self): crawler = get_crawler(SingleRequestSpider) url = self.mockserver.url("/echo?body=test", is_secure=False) yield crawler.crawl(seed=url, mockserver=self.mockserver) self.assertIsNone(crawler.spider.meta['responses'][0].certificate)
def test_https_connect_tunnel(self): crawler = get_crawler(SimpleSpider) with LogCapture() as l: yield crawler.crawl(self.mockserver.url("/status?n=200", is_secure=True)) self._assert_got_response_code(200, l)