else: browser_params[i]['save_content'] = SAVE_CONTENT browser_params[i]['headless'] = True # Manager configuration manager_params['data_directory'] = '~/Desktop/%s/' % CRAWL_DIRECTORY manager_params['log_directory'] = '~/Desktop/%s/' % CRAWL_DIRECTORY manager_params['output_format'] = 's3' manager_params['s3_bucket'] = S3_BUCKET manager_params['s3_directory'] = CRAWL_DIRECTORY # Allow the use of localstack's mock s3 service S3_ENDPOINT = os.getenv('S3_ENDPOINT') if S3_ENDPOINT: boto3.DEFAULT_SESSION = LocalS3Session(endpoint_url=S3_ENDPOINT) manager_params['s3_bucket'] = local_s3_bucket(boto3.resource('s3'), name=S3_BUCKET) # Instantiates the measurement platform # Commands time out by default after 60 seconds manager = TaskManager.TaskManager(manager_params, browser_params, logger_kwargs=LOGGER_SETTINGS) # At this point, Sentry should be initiated if SENTRY_DSN: # Add crawler.py-specific context with sentry_sdk.configure_scope() as scope: # tags generate breakdown charts and search filters scope.set_tag('NUM_BROWSERS', NUM_BROWSERS) scope.set_tag('CRAWL_DIRECTORY', CRAWL_DIRECTORY) scope.set_tag('S3_BUCKET', S3_BUCKET)
browser_params[i]["save_content"] = SAVE_CONTENT if PREFS: browser_params[i]["prefs"] = json.loads(PREFS) # Manager configuration manager_params["data_directory"] = "~/Desktop/%s/" % CRAWL_DIRECTORY manager_params["log_directory"] = "~/Desktop/%s/" % CRAWL_DIRECTORY manager_params["output_format"] = "s3" manager_params["s3_bucket"] = S3_BUCKET manager_params["s3_directory"] = CRAWL_DIRECTORY # Allow the use of localstack's mock s3 service S3_ENDPOINT = os.getenv("S3_ENDPOINT") if S3_ENDPOINT: boto3.DEFAULT_SESSION = LocalS3Session(endpoint_url=S3_ENDPOINT) manager_params["s3_bucket"] = local_s3_bucket(boto3.resource("s3"), name=S3_BUCKET) # Instantiates the measurement platform # Commands time out by default after 60 seconds manager = TaskManager.TaskManager(manager_params, browser_params, logger_kwargs=LOGGER_SETTINGS) # At this point, Sentry should be initiated if SENTRY_DSN: # Add crawler.py-specific context with sentry_sdk.configure_scope() as scope: # tags generate breakdown charts and search filters scope.set_tag("CRAWL_DIRECTORY", CRAWL_DIRECTORY) scope.set_tag("S3_BUCKET", S3_BUCKET) scope.set_tag("DISPLAY_MODE", DISPLAY_MODE)