def test_logging_retry(caplog, integrated_ff): # get es_client info from the health page es_url = ff_utils.get_health_page( key=integrated_ff['ff_key'])['elasticsearch'] log_utils.set_logging(env='fourfront-mastertest', es_server=es_url, in_prod=True) log = structlog.getLogger(__name__) log.warning('test_retry', _test_log_utils=True) assert len(caplog.records) == 1 assert caplog.records[0].__dict__['msg']['event'] == 'test_retry' log_uuid = caplog.records[0].__dict__['msg']['log_uuid'] # retrying will take 5 sec, so log shoudldn't be in ES yet time.sleep(1) es_client = es_utils.create_es_client(es_url, use_aws_auth=True) es_res = ff_utils.get_es_metadata([log_uuid], es_client=es_client, key=integrated_ff['ff_key']) assert len(es_res) == 0 # wait to allow logs to retry time.sleep(7) es_client = es_utils.create_es_client(es_url, use_aws_auth=True) es_res = ff_utils.get_es_metadata([log_uuid], es_client=es_client, key=integrated_ff['ff_key']) assert len(es_res) == 1 assert es_res[0]['log_uuid'] == log_uuid assert es_res[0]['event'] == 'test_retry'
def get_s3_encrypt_key_id_from_health_page(auth): try: health = get_health_page(key=auth) return health.get(HealthPageKey.S3_ENCRYPT_KEY_ID) except Exception: # pragma: no cover # We don't actually unit test this section because get_health_page realistically always returns # a dictionary, and so health.get(...) always succeeds, possibly returning None, which should # already be tested. Returning None here amounts to the same and needs no extra unit testing. # The presence of this error clause is largely pro forma and probably not really needed. return None
def test_set_logging_level(caplog, integrated_ff): """ Provides log_dir, log_name and level args to set_logging """ es_url = ff_utils.get_health_page( key=integrated_ff['ff_key'])['elasticsearch'] log_utils.set_logging(es_server=es_url, level=logging.ERROR, log_name='Errors', log_dir='.') log = structlog.getLogger('Errors') log.error('oh no an error!', foo='faux') assert len(caplog.records) == 1
def test_set_logging_in_prod(caplog, integrated_ff): # get es_client info from the health page health = ff_utils.get_health_page(key=integrated_ff['ff_key']) es_url = health['elasticsearch'] log_utils.set_logging(env='fourfront-mastertest', es_server=es_url, in_prod=True) log = structlog.getLogger(__name__) log.warning('meh', foo='bar') assert len(caplog.records) == 1 log_record = caplog.records[0] # make sure the ES handler is present assert len(log_record._logger.handlers) == 1 assert 'log_uuid' in caplog.records[0].__dict__['msg'] assert log_record.__dict__['msg']['event'] == 'meh' assert log_record.__dict__['msg']['foo'] == 'bar' assert log_record.__dict__['msg']['level'] == 'warning' log_uuid = log_record.__dict__['msg']['log_uuid'] # make sure the log was written successfully to mastertest ES time.sleep(1) es_client = es_utils.create_es_client(es_url, use_aws_auth=True) es_res = ff_utils.get_es_metadata([log_uuid], es_client=es_client, key=integrated_ff['ff_key']) assert len(es_res) == 1 assert es_res[0]['event'] == 'meh' assert es_res[0]['foo'] == 'bar' assert es_res[0]['log_uuid'] == log_uuid assert es_res[0]['level'] == 'warning' # setting _skip_es = True will cause the log not to be shipped to ES log.warning('test_skip', _skip_es=True) assert len(caplog.records) == 2 # two logs now log_record2 = caplog.records[1] # make sure the ES handler is present assert len(log_record2._logger.handlers) == 1 assert 'log_uuid' in log_record2.__dict__['msg'] assert log_record2.__dict__['msg']['event'] == 'test_skip' log_uuid = log_record2.__dict__['msg']['log_uuid'] time.sleep(1) es_client = es_utils.create_es_client(es_url, use_aws_auth=True) es_res = ff_utils.get_es_metadata([log_uuid], es_client=es_client, key=integrated_ff['ff_key']) assert len(es_res) == 0 # log is not in ES, as anticipated
def main(): """ Simple command to adjust settings on the Kibana index in ES, so that searches against all indices do not create issues due to default config """ logging.basicConfig() # Loading app will have configured from config file. Reconfigure here: logging.getLogger('encoded').setLevel(logging.INFO) parser = argparse.ArgumentParser( # noqa - PyCharm wrongly thinks the formatter_class is specified wrong here. description="Configure Kibana Index", epilog=EPILOG, formatter_class=argparse.RawDescriptionHelpFormatter, ) parser.add_argument('--es-endpoint', help='Elasticsearch endpoint, including port') parser.add_argument('--env', help='Use the Elasticsearch associated with this EB environment') args = parser.parse_args() # require either --es-endpoit or --env (not both) if not args.es_endpoint or args.env: log.error('configure_kibana_index: must provide either --es-endpoint' 'or --env to this command! You gave neither.') return elif args.es_endpoint and args.env: log.error('configure_kibana_index: must provide either --es-endpoint' 'or --env to this command! You gave both.') return elif args.es_endpoint: use_es = args.es_endpoint elif args.env: use_es = get_health_page(ff_env=args.env)['elasticsearch'] # create client and ensure kibana index exists client = create_es_client(use_es, use_aws_auth=True) if not client.indices.exists(index='.kibana'): log.error('configure_kibana_index: .kibana index does not exist for' 'endpoints %s' % use_es) return kibana_settings = {'max_result_window': 100000} client.indices.put_settings(index='.kibana', body=kibana_settings) log.info('configure_kibana_index: successfully changed settings %s' % list(kibana_settings.keys()))
def _discover_es_url_from_boto3_eb_metadata(integrated_envname): try: discovered_health_json_from_eb = _discover_es_health_from_boto3_eb_metadata( integrated_envname) assert discovered_health_json_from_eb, f"No health page for {integrated_envname} was discovered." PRINT( f"In _discover_es_url_from_boto3_eb_metadata," f"discovered_health_json_from_eb={json.dumps(discovered_health_json_from_eb, indent=2)}" ) time.sleep(1) # Reduce throttling risk ff_health_json = get_health_page(ff_env=integrated_envname) # Consistency check that both utilities are returning the same info. assert discovered_health_json_from_eb[ 'beanstalk_env'] == ff_health_json['beanstalk_env'] assert discovered_health_json_from_eb[ 'elasticsearch'] == ff_health_json['elasticsearch'] assert discovered_health_json_from_eb['namespace'] == ff_health_json[ 'namespace'] # Not all health pages have a namespace. Production ones may not. # But they are not good environments for us to use for testing. discovered_namespace = discovered_health_json_from_eb['namespace'] # We _think_ these are always the same, but maybe not. Perhaps worth noting if/when they diverge. assert discovered_namespace == integrated_envname, ( f"While doing ES URL discovery for integrated envname {integrated_envname}," f" the namespace, {discovered_namespace}, discovered on the health page" f" does not match the integrated envname.") # This should be all we actually need: return discovered_health_json_from_eb['elasticsearch'] except Exception as e: # Errors sometimes happen when running tests with the orchestration credentials. PRINT("********************************************") PRINT("** ERROR DURING ELASTICSEARCH DISCOVERY **") PRINT("** Make sure you have legacy credentials **") PRINT("** enabled while running these tests. **") PRINT("********************************************") PRINT(f"{e.__class__.__name__}: {e}") raise RuntimeError( f"Failed to discover ES URL for {integrated_envname}.")
def main(global_config, **local_config): """ This function returns a Pyramid WSGI application. """ settings = global_config settings.update(local_config) # BEGIN PART THAT'S NOT IN FOURFRONT # adjust log levels for some annoying loggers lnames = ['boto', 'urllib', 'elasticsearch', 'dcicutils'] for name in logging.Logger.manager.loggerDict: if any(logname in name for logname in lnames): logging.getLogger(name).setLevel(logging.WARNING) # END PART THAT'S NOT IN FOURFRONT set_logging(in_prod=settings.get('production')) # set_logging(settings.get('elasticsearch.server'), settings.get('production')) # source environment variables on elastic beanstalk source_beanstalk_env_vars() # settings['snovault.jsonld.namespaces'] = json_asset('encoded:schemas/namespaces.json') # settings['snovault.jsonld.terms_namespace'] = 'https://www.encodeproject.org/terms/' settings['snovault.jsonld.terms_prefix'] = 'encode' # set auth0 keys settings['auth0.secret'] = os.environ.get("Auth0Secret") settings['auth0.client'] = os.environ.get("Auth0Client") # set google reCAPTCHA keys settings['g.recaptcha.key'] = os.environ.get('reCaptchaKey') settings['g.recaptcha.secret'] = os.environ.get('reCaptchaSecret') # enable invalidation scope settings[INVALIDATION_SCOPE_ENABLED] = True # set mirrored Elasticsearch location (for staging and production servers) # does not exist for CGAP currently mirror = get_mirror_env_from_context(settings) if mirror is not None: settings['mirror.env.name'] = mirror settings['mirror_health'] = get_health_page(ff_env=mirror) config = Configurator(settings=settings) config.registry[APP_FACTORY] = main # used by mp_indexer config.include(app_version) config.include( 'pyramid_multiauth') # must be before calling set_authorization_policy # Override default authz policy set by pyramid_multiauth config.set_authorization_policy(LocalRolesAuthorizationPolicy()) config.include(session) # must include, as tm.attempts was removed from pyramid_tm config.include('pyramid_retry') # for CGAP, always enable type=nested mapping # NOTE: this MUST occur prior to including Snovault, otherwise it will not work config.add_settings({'mappings.use_nested': True}) config.include(configure_dbsession) config.include('snovault') config.commit() # commit so search can override listing # Render an HTML page to browsers and a JSON document for API clients # config.include(add_schemas_to_html_responses) config.include('.renderers') config.include('.authentication') config.include('.server_defaults') config.include('.root') config.include('.types') # Fourfront does this. Do we need that here? -kmp 8-Apr-2020 # config.include('.batch_download') config.include('.loadxl') config.include('.visualization') config.include('.ingestion_listener') config.include('.custom_embed') if 'elasticsearch.server' in config.registry.settings: config.include('snovault.elasticsearch') config.include('.search.search') config.include( '.search.compound_search') # could make enabling configurable # this contains fall back url, so make sure it comes just before static_resoruces config.include('.types.page') config.include(static_resources) config.include(changelogs) aws_ip_ranges = json_from_path(settings.get('aws_ip_ranges_path'), {'prefixes': []}) config.registry['aws_ipset'] = netaddr.IPSet( record['ip_prefix'] for record in aws_ip_ranges['prefixes'] if record['service'] == 'AMAZON') if asbool(settings.get('testing', False)): config.include('.tests.testing_views') # Load upgrades last so that all views (including testing views) are # registered. config.include('.upgrade') # initialize sentry reporting init_sentry(settings.get('sentry_dsn', None)) app = config.make_wsgi_app() workbook_filename = settings.get('load_workbook', '') load_test_only = asbool(settings.get('load_test_only', False)) docsdir = settings.get('load_docsdir', None) if docsdir is not None: docsdir = [path.strip() for path in docsdir.strip().split('\n')] if workbook_filename: load_workbook(app, workbook_filename, docsdir) return app