async def _download_request_page(self, request: Request, spider: Spider, page: Page) -> Response: self.stats.inc_value("pyppeteer/page_count") if self.navigation_timeout is not None: page.setDefaultNavigationTimeout(self.navigation_timeout) await page.setRequestInterception(True) page.on( "request", partial(_request_handler, scrapy_request=request, stats=self.stats)) page.on("response", partial(_response_handler, stats=self.stats)) start_time = time() response = await page.goto(request.url) page_coroutines = request.meta.get("pyppeteer_page_coroutines") or () if isinstance(page_coroutines, dict): page_coroutines = page_coroutines.values() for pc in page_coroutines: if isinstance(pc, PageCoroutine): method = getattr(page, pc.method) # set PageCoroutine timeout if self.page_coroutine_timeout is not None and not pc.kwargs.get( "timeout", None): pc.kwargs["timeout"] = self.page_coroutine_timeout if isinstance(pc, NavigationPageCoroutine): await asyncio.gather(page.waitForNavigation(), method(*pc.args, **pc.kwargs)) else: pc.result = await method(*pc.args, **pc.kwargs) body = (await page.content()).encode("utf8") request.meta["download_latency"] = time() - start_time callback = request.callback or spider.parse annotations = getattr(callback, "__annotations__", {}) for key, value in annotations.items(): if value is pyppeteer.page.Page: request.cb_kwargs[key] = page self.stats.inc_value("pyppeteer/page_count/injected_callback") break else: await page.close() self.stats.inc_value("pyppeteer/page_count/closed") headers = Headers(response.headers) headers.pop("Content-Encoding", None) respcls = responsetypes.from_args(headers=headers, url=page.url, body=body) return respcls( url=page.url, status=response.status, headers=headers, body=body, request=request, flags=["pyppeteer"], )
async def _download_request_with_page(self, request: Request, spider: Spider, page: Page) -> Response: start_time = time() response = await page.goto(request.url) page_coroutines = request.meta.get("playwright_page_coroutines") or () if isinstance(page_coroutines, dict): page_coroutines = page_coroutines.values() for pc in page_coroutines: if isinstance(pc, PageCoroutine): method = getattr(page, pc.method) pc.result = await method(*pc.args, **pc.kwargs) await page.wait_for_load_state( timeout=self.default_navigation_timeout) body = (await page.content()).encode("utf8") request.meta["download_latency"] = time() - start_time if request.meta.get("playwright_include_page"): request.meta["playwright_page"] = page else: await page.close() self.stats.inc_value("playwright/page_count/closed") headers = Headers(response.headers) headers.pop("Content-Encoding", None) respcls = responsetypes.from_args(headers=headers, url=page.url, body=body) return respcls( url=page.url, status=response.status, headers=headers, body=body, request=request, flags=["playwright"], )