def test_logging_retry(caplog, integrated_ff): # get es_client info from the health page es_url = ff_utils.get_health_page( key=integrated_ff['ff_key'])['elasticsearch'] log_utils.set_logging(env='fourfront-mastertest', es_server=es_url, in_prod=True) log = structlog.getLogger(__name__) log.warning('test_retry', _test_log_utils=True) assert len(caplog.records) == 1 assert caplog.records[0].__dict__['msg']['event'] == 'test_retry' log_uuid = caplog.records[0].__dict__['msg']['log_uuid'] # retrying will take 5 sec, so log shoudldn't be in ES yet time.sleep(1) es_client = es_utils.create_es_client(es_url, use_aws_auth=True) es_res = ff_utils.get_es_metadata([log_uuid], es_client=es_client, key=integrated_ff['ff_key']) assert len(es_res) == 0 # wait to allow logs to retry time.sleep(7) es_client = es_utils.create_es_client(es_url, use_aws_auth=True) es_res = ff_utils.get_es_metadata([log_uuid], es_client=es_client, key=integrated_ff['ff_key']) assert len(es_res) == 1 assert es_res[0]['log_uuid'] == log_uuid assert es_res[0]['event'] == 'test_retry'
def includeme(config): settings = config.registry.settings config.add_request_method(datastore, 'datastore', reify=True) address = settings['elasticsearch.server'] use_aws_auth = settings.get('elasticsearch.aws_auth') # make sure use_aws_auth is bool if not isinstance(use_aws_auth, bool): use_aws_auth = True if use_aws_auth == 'true' else False # snovault specific ES options # this previously-used option was causing problems (?) # 'connection_class': TimedUrllib3HttpConnection es_options = {'serializer': PyramidJSONSerializer(json_renderer), 'connection_class': TimedRequestsHttpConnection} config.registry[ELASTIC_SEARCH] = create_es_client(address, use_aws_auth=use_aws_auth, **es_options) config.include('.cached_views') config.include('.esstorage') config.include('.indexer_queue') config.include('.indexer') if asbool(settings.get('mpindexer')) and not PY2: config.include('.mpindexer')
def elastic_search_space(connection, **kwargs): """ Checks that our ES nodes all have a certain amount of space remaining """ check = CheckResult(connection, 'elastic_search_space') full_output = {} client = es_utils.create_es_client(connection.ff_es, True) # use cat.nodes to get id,diskAvail for all nodes, filter out empties node_space_entries = filter(None, [ data.split() for data in client.cat.nodes(h='id,diskAvail').split('\n') ]) check.summary = check.description = None full_output['nodes'] = {} for _id, remaining_space in node_space_entries: if 'gb' not in remaining_space: if 'mb' not in remaining_space: check.status = 'FAIL' check.summary = check.description = 'At least one of the nodes in this env has no space remaining' else: check.status = 'WARN' check.summary = check.description = 'At least one of the nodes in this env is low on space' full_output['nodes'][_id.strip()] = { 'remaining_space': remaining_space } if check.summary is None: check.status = 'PASS' check.summary = check.description = 'All nodes have >1gb remaining disk space' check.full_output = full_output return check
def wipe_build_indices(es_url, check): """ Wipes all number-prefixed indices on the given es_url. Be careful not to run while builds are running as this will cause them to fail. """ check.status = 'PASS' check.summary = check.description = 'Wiped all test indices on url: %s' % es_url client = es_utils.create_es_client(es_url, True) full_output = [] _, indices = cat_indices(client) # index name is index 2 in row for index in indices: try: index_name = index[2] except IndexError: # empty [] sometimes returned by API call continue if re.match(BUILD_INDICES_REGEX, index_name) is not None: try: resp = Retry.retrying(client.indices.delete, retries_allowed=3)(index=index_name) except Exception as e: full_output.append({'acknowledged': True, 'error': str(e)}) else: full_output.append(resp) if any(output['acknowledged'] is not True for output in full_output): check.status = 'FAIL' check.summary = check.description = 'Failed to wipe all test indices, see full output' check.full_output = full_output return check
def status_of_elasticsearch_indices(connection, **kwargs): check = CheckResult(connection, 'status_of_elasticsearch_indices') ### the check client = es_utils.create_es_client(connection.ff_es, True) indices = client.cat.indices(v=True).split('\n') split_indices = [ind.split() for ind in indices] headers = split_indices.pop(0) index_info = {} # for full output warn_index_info = {} # for brief output for index in split_indices: if len(index) == 0: continue index_info[index[2]] = { header: index[idx] for idx, header in enumerate(headers) } if index_info[index[2]]['health'] != 'green' or index_info[ index[2]]['status'] != 'open': warn_index_info[index[2]] = index_info[index[2]] # set fields, store result if not index_info: check.status = 'FAIL' check.summary = 'Error reading status of ES indices' check.description = 'Error reading status of ES indices' elif warn_index_info: check.status = 'WARN' check.summary = 'ES indices may not be healthy' check.description = 'One or more ES indices have health != green or status != open.' check.brief_output = warn_index_info else: check.status = 'PASS' check.summary = 'ES indices seem healthy' check.full_output = index_info return check
def purge_uuid(self, rid, item_type=None, registry=None): """ Purge a uuid from the write storage (Elasticsearch) If there is a mirror environment set up for the indexer, also attempt to remove the uuid from the mirror Elasticsearch """ if not item_type: model = self.get_by_uuid(rid) item_type = model.item_type try: self.es.delete(id=rid, index=item_type, doc_type=item_type) except elasticsearch.exceptions.NotFoundError: # Case: Not yet indexed log.error('PURGE: Couldn\'t find %s in ElasticSearch. Continuing.' % rid) except Exception as exc: log.error('PURGE: Cannot delete %s in ElasticSearch. Error: %s Continuing.' % (item_type, str(exc))) if not registry: log.error('PURGE: Registry not available for ESStorage purge_uuid') return # if configured, delete the item from the mirrored ES as well if registry.settings.get('mirror.env.es'): mirror_es = registry.settings['mirror.env.es'] use_aws_auth = registry.settings.get('elasticsearch.aws_auth') # make sure use_aws_auth is bool if not isinstance(use_aws_auth, bool): use_aws_auth = True if use_aws_auth == 'true' else False mirror_client = es_utils.create_es_client(mirror_es, use_aws_auth=use_aws_auth) try: mirror_client.delete(id=rid, index=item_type, doc_type=item_type) except elasticsearch.exceptions.NotFoundError: # Case: Not yet indexed log.error('PURGE: Couldn\'t find %s in mirrored ElasticSearch (%s). Continuing.' % (rid, mirror_es)) except Exception as exc: log.error('PURGE: Cannot delete %s in mirrored ElasticSearch (%s). Error: %s Continuing.' % (item_type, mirror_es, str(exc)))
def indexing_records(connection, **kwargs): check = CheckResult(connection, 'indexing_records') client = es_utils.create_es_client(connection.ff_es, True) namespaced_index = connection.ff_env + 'indexing' # make sure we have the index and items within it if (not client.indices.exists(namespaced_index) or client.count(index=namespaced_index).get('count', 0) < 1): check.summary = check.description = 'No indexing records found' check.status = 'PASS' return check res = client.search(index=namespaced_index, doc_type='indexing', sort='uuid:desc', size=1000, body={ 'query': { 'query_string': { 'query': '_exists_:indexing_status' } } }) delta_days = datetime.timedelta(days=3) all_records = res.get('hits', {}).get('hits', []) recent_records = [] warn_records = [] for rec in all_records: if rec['_id'] == 'latest_indexing': continue time_diff = ( datetime.datetime.utcnow() - datetime.datetime.strptime(rec['_id'], "%Y-%m-%dT%H:%M:%S.%f")) if time_diff < delta_days: body = rec['_source'] # needed to handle transition to queue. can use 'indexing_started' body['timestamp'] = rec['_id'] if body.get('errors') or body.get('indexing_status') != 'finished': warn_records.append(body) recent_records.append(body) del all_records # sort so most recent records are first sort_records = sorted(recent_records, key=lambda rec: datetime.datetime.strptime( rec['timestamp'], "%Y-%m-%dT%H:%M:%S.%f"), reverse=True) check.full_output = sort_records if warn_records: sort_warn_records = sorted( warn_records, key=lambda rec: datetime.datetime.strptime(rec['timestamp'], "%Y-%m-%dT%H:%M:%S.%f"), reverse=True) check.summary = check.description = 'Indexing runs in the past three days may require attention' check.status = 'WARN' check.brief_output = sort_warn_records else: check.summary = check.description = 'Indexing runs from the past three days seem normal' check.status = 'PASS' return check
def test_set_logging_in_prod(caplog, integrated_ff): # get es_client info from the health page health = ff_utils.get_health_page(key=integrated_ff['ff_key']) es_url = health['elasticsearch'] log_utils.set_logging(env='fourfront-mastertest', es_server=es_url, in_prod=True) log = structlog.getLogger(__name__) log.warning('meh', foo='bar') assert len(caplog.records) == 1 log_record = caplog.records[0] # make sure the ES handler is present assert len(log_record._logger.handlers) == 1 assert 'log_uuid' in caplog.records[0].__dict__['msg'] assert log_record.__dict__['msg']['event'] == 'meh' assert log_record.__dict__['msg']['foo'] == 'bar' assert log_record.__dict__['msg']['level'] == 'warning' log_uuid = log_record.__dict__['msg']['log_uuid'] # make sure the log was written successfully to mastertest ES time.sleep(1) es_client = es_utils.create_es_client(es_url, use_aws_auth=True) es_res = ff_utils.get_es_metadata([log_uuid], es_client=es_client, key=integrated_ff['ff_key']) assert len(es_res) == 1 assert es_res[0]['event'] == 'meh' assert es_res[0]['foo'] == 'bar' assert es_res[0]['log_uuid'] == log_uuid assert es_res[0]['level'] == 'warning' # setting _skip_es = True will cause the log not to be shipped to ES log.warning('test_skip', _skip_es=True) assert len(caplog.records) == 2 # two logs now log_record2 = caplog.records[1] # make sure the ES handler is present assert len(log_record2._logger.handlers) == 1 assert 'log_uuid' in log_record2.__dict__['msg'] assert log_record2.__dict__['msg']['event'] == 'test_skip' log_uuid = log_record2.__dict__['msg']['log_uuid'] time.sleep(1) es_client = es_utils.create_es_client(es_url, use_aws_auth=True) es_res = ff_utils.get_es_metadata([log_uuid], es_client=es_client, key=integrated_ff['ff_key']) assert len(es_res) == 0 # log is not in ES, as anticipated
def __init__(self, index=None, doc_type='result', host=None): if not host: raise ElasticsearchException( "ESConnection error: Host must be specified") self.es = es_utils.create_es_client(host, use_aws_url=True) self.index = index if index and not self.index_exists(index): self.create_index(index) self.doc_type = doc_type
def __init__(self, env, es_server): """ Must be given a string es_server url to work. Calls __init__ of parent Handler """ self.resend_timer = None self.messages_to_resend = [] self.retry_limit = 2 self.namespace = self.get_namespace(env) self.es_client = es_utils.create_es_client(es_server, use_aws_auth=True) logging.Handler.__init__(self)
def main(): """ Simple command to adjust settings on the Kibana index in ES, so that searches against all indices do not create issues due to default config """ logging.basicConfig() # Loading app will have configured from config file. Reconfigure here: logging.getLogger('encoded').setLevel(logging.INFO) parser = argparse.ArgumentParser( # noqa - PyCharm wrongly thinks the formatter_class is specified wrong here. description="Configure Kibana Index", epilog=EPILOG, formatter_class=argparse.RawDescriptionHelpFormatter, ) parser.add_argument('--es-endpoint', help='Elasticsearch endpoint, including port') parser.add_argument('--env', help='Use the Elasticsearch associated with this EB environment') args = parser.parse_args() # require either --es-endpoit or --env (not both) if not args.es_endpoint or args.env: log.error('configure_kibana_index: must provide either --es-endpoint' 'or --env to this command! You gave neither.') return elif args.es_endpoint and args.env: log.error('configure_kibana_index: must provide either --es-endpoint' 'or --env to this command! You gave both.') return elif args.es_endpoint: use_es = args.es_endpoint elif args.env: use_es = get_health_page(ff_env=args.env)['elasticsearch'] # create client and ensure kibana index exists client = create_es_client(use_es, use_aws_auth=True) if not client.indices.exists(index='.kibana'): log.error('configure_kibana_index: .kibana index does not exist for' 'endpoints %s' % use_es) return kibana_settings = {'max_result_window': 100000} client.indices.put_settings(index='.kibana', body=kibana_settings) log.info('configure_kibana_index: successfully changed settings %s' % list(kibana_settings.keys()))
def es_client_fixture(integrated_ff): """ Fixture that creates an es client to mastertest """ return create_es_client(integrated_ff['es_url'])
def purge_download_tracking_items(connection, **kwargs): """ This check was originally created to take in any search through kwargs. Changed to hardcode a search for tracking items, but it can easily adapted; as it is, already handles recording for any number of item types. Ensure search includes limit, field=uuid, and status=deleted """ check = CheckResult(connection, 'purge_download_tracking_items') # Don't run if staging deployment is running # Only need to check if our env is data # XXX: Removing for now as we find the check can never run without this # if the staging deploy takes long enough or errors # if connection.fs_env == 'data': # from ..app_utils import AppUtils # staging_conn = AppUtils().init_connection('staging') # staging_deploy = CheckResult(staging_conn, 'staging_deployment').get_primary_result() # if staging_deploy['status'] != 'PASS': # check.summary = 'Staging deployment is running - skipping' # return check if Stage.is_stage_prod() is False: check.summary = check.description = 'This check only runs on Foursight prod' return check time_limit = 270 # 4.5 minutes t0 = time.time() check.full_output = {} # purged items by item type search = '/search/?type=TrackingItem&tracking_type=download_tracking&status=deleted&field=uuid&limit=300' search_res = ff_utils.search_metadata(search, key=connection.ff_keys) search_uuids = [res['uuid'] for res in search_res] client = es_utils.create_es_client(connection.ff_es, True) # a bit convoluted, but we want the frame=raw, which does not include uuid # use get_es_metadata to handle this. Use it as a generator for to_purge in ff_utils.get_es_metadata(search_uuids, es_client=client, is_generator=True, key=connection.ff_keys): if round(time.time() - t0, 2) > time_limit: break purge_properties = to_purge['properties'] purge_properties['uuid'] = to_purge['uuid'] # add uuid to frame=raw try: purge_res = ff_utils.purge_metadata(to_purge['uuid'], key=connection.ff_keys) except Exception as exc: purge_status = 'error' purge_detail = str(exc) else: purge_status = purge_res['status'] purge_detail = purge_properties if purge_status == 'success' else purge_res purge_record = {'uuid': to_purge['uuid'], 'result': purge_detail} if to_purge['item_type'] not in check.full_output: check.full_output[to_purge['item_type']] = {} if purge_status not in check.full_output[to_purge['item_type']]: check.full_output[to_purge['item_type']][purge_status] = [] check.full_output[to_purge['item_type']][purge_status].append( purge_record) purge_out_str = '. '.join([ '%s: %s' % (it, len(check.full_output[it]['success'])) for it in check.full_output if check.full_output[it].get('success') ]) check.description = 'Purged: ' + purge_out_str + '. Search used: %s' % search if any( [it for it in check.full_output if check.full_output[it].get('error')]): check.status = 'WARN' check.summary = 'Some items failed to purge. See full output' else: check.status = 'PASS' check.summary = 'Items purged successfully' return check