async def __add_page_settings(self, page: Page) -> None: """Add custom settings to page.""" # Change the default maximum navigation timeout. if self.default_nav_timeout: page.setDefaultNavigationTimeout(self.default_nav_timeout) tasks = [] # Blocks URLs from loading. if self.blocked_urls: self.logger.info(f"Adding {len(self.blocked_urls)} blocked urls") tasks.append( page._client.send('Network.setBlockedURLs', { 'urls': self.blocked_urls, })) # Disable cache for each request. if self.disable_cache: self.logger.info("Setting cache disabled.") tasks.append(page.setCacheEnabled(False)) # Add a JavaScript function(s) that will be invoked whenever the page is navigated. if self.js_injection_scripts: self.logger.info( f"Adding {len(self.js_injection_scripts)} JavaScript injection scripts" ) for script in self.js_injection_scripts: tasks.append(page.evaluateOnNewDocument(script)) # Add a JavaScript functions to prevent automation detection. for f in Path(__file__).parent.joinpath('automation_detection').glob( "*.js"): self.logger.info( f"(page {page}) Adding automation detection prevention script: {f.name}" ) tasks.append(page.evaluateOnNewDocument(f.read_text())) # Add JavaScript functions to prevent detection of headless mode. if self.headless: for f in Path(__file__).parent.joinpath('headless_detection').glob( "*.js"): self.logger.info( f"(page {page}) Adding headless detection prevention script: {f.name}" ) tasks.append(page.evaluateOnNewDocument(f.read_text())) # Intercept all request and only allow requests for types not in self.request_abort_types. if self.request_abort_types: self.logger.info( f"Setting request interception for {self.request_abort_types}") tasks.append(page.setRequestInterception(True)) async def block_type(request): if request.resourceType in self.request_abort_types: await request.abort() else: await request.continue_() page.on('request', lambda request: asyncio.create_task(block_type(request))) await asyncio.gather(*tasks)
async def _add_page_settings(self, page: Page) -> None: """Add custom settings to a page.""" # launch options for this page. launch_options = self.browsers[page.browser]['launch_options'] # set the default maximum navigation time. if 'defaultNavigationTimeout' in launch_options: page.setDefaultNavigationTimeout( launch_options['defaultNavigationTimeout']) tasks = [self.set_stealth(page)] # blocks URLs from loading. if 'blockedURLs' in launch_options: tasks.append( self.set_blocked_urls(page, launch_options['blockedURLs'])) # disable cache for each request. if 'setCacheEnabled' in launch_options: tasks.append( page.setCacheEnabled(launch_options['setCacheEnabled'])) # add a JavaScript function(s) that will be invoked whenever the page is navigated. for script in launch_options.get('evaluateOnNewDocument', []): tasks.append(page.evaluateOnNewDocument(script)) # intercept all request and only allow requests for types not in request_abort_types. request_abort_types = launch_options.get('requestAbortTypes') if request_abort_types: # enable request interception. tasks.append(page.setRequestInterception(True)) async def block_type(request: Request): # condition(s) where requests should be aborted. if request.resourceType in request_abort_types: await request.abort() elif launch_options.get( 'blockRedirects', False) and request.isNavigationRequest() and len( request.redirectChain): await request.abort() else: await request.continue_() page.on('request', lambda request: asyncio.create_task(block_type(request))) await asyncio.gather(*tasks)
async def _add_page_settings(self, page: Page) -> None: """Add custom settings to a page.""" # add JavaScript functions to prevent automation detection. tasks = [ page.evaluateOnNewDocument( f"() => {{{Path(__file__).parent.joinpath('stealth.min.js').read_text()}}}" ) ] # launch options for this page. launch_options = self.browsers[page.browser]['launch_options'] # set the default maximum navigation time. if 'defaultNavigationTimeout' in launch_options: page.setDefaultNavigationTimeout( launch_options['defaultNavigationTimeout']) # blocks URLs from loading. if 'blockedURLs' in launch_options: await page._client.send('Network.setBlockedURLs', {'urls': launch_options['blockedURLs']}) # disable cache for each request. if 'setCacheEnabled' in launch_options: tasks.append( page.setCacheEnabled(launch_options['setCacheEnabled'])) # add a JavaScript function(s) that will be invoked whenever the page is navigated. for script in launch_options.get('evaluateOnNewDocument', []): tasks.append(page.evaluateOnNewDocument(script)) # intercept all request and only allow requests for types not in request_abort_types. request_abort_types = launch_options.get('requestAbortTypes') if request_abort_types: tasks.append(page.setRequestInterception(True)) async def block_type(request): if request.resourceType in request_abort_types: await request.abort() else: await request.continue_() page.on('request', lambda request: asyncio.create_task(block_type(request))) await asyncio.gather(*tasks)