def load_es_template(apps, schema_editor): es = Elasticsearch(hosts=[settings.ES_URL], verify_certs=False) es.put_template(id="climate_data_template", body=json.dumps({ "template": "climate_data", "mappings": { "*": { "properties": { "measurement": { "type": "double" }, "tmax": { "type": "double" }, "tmin": { "type": "double" }, "tmean": { "type": "double" }, "tdev": { "type": "double" }, "rainfall": { "type": "double" }, "sunshine": { "type": "double" }, "region": { "type": "keyword" } } } } }))
def main(): es_host = raw_input("Elasticsearch host: ") es_port = raw_input("Elasticsearch port: ") db_name = raw_input("Dashboard name: ") send_get_body_as = raw_input( "Method for querying Elasticsearch[GET]: ") or 'GET' es = Elasticsearch(host=es_host, port=es_port, send_get_body_as=send_get_body_as) query = {'query': {'term': {'_id': db_name}}} res = es.search(index='kibana-int', doc_type='dashboard', body=query, _source_include=['dashboard']) if not res['hits']['hits']: print("No dashboard %s found" % (db_name)) exit() db = json.loads(res['hits']['hits'][0]['_source']['dashboard']) config_filters = filters_from_dashboard(db) print("\nPartial Config file") print("-----------\n") print("name: %s" % (db_name)) print("es_host: %s" % (es_host)) print("es_port: %s" % (es_port)) print("filter:") print(yaml.safe_dump(config_filters))
def configure_client(self): """Instantiate and configure the ElasticSearch client. It simply takes the given HOSTS list and uses PARAMS as the keyword arguments of the ElasticSearch class. The client's transport_class is given by the class attribute ``transport_class``, and the connection class used by the transport class is given by the class attribute ``connection_class``. An ``ImproperlyConfigured`` exception is raised if any of these elements is undefined. """ hosts = self.server['HOSTS'] params = self.server['PARAMS'] if not self.transport_class: raise ImproperlyConfigured( 'Djangoes backend %r is not properly configured: ' 'no transport class provided' % self.__class__) if not self.connection_class: raise ImproperlyConfigured( 'Djangoes backend %r is not properly configured: ' 'no connection class provided' % self.__class__) #pylint: disable=star-args self.client = Elasticsearch(hosts, transport_class=self.transport_class, connection_class=self.connection_class, **params)
def __init__(self, args): self.parse_args(args) self.conf = load_rules(self.args.config, use_rule=self.args.rule) self.max_query_size = self.conf['max_query_size'] self.rules = self.conf['rules'] self.debug = self.args.debug self.verbose = self.args.verbose self.writeback_index = self.conf['writeback_index'] self.es_host = self.conf['es_host'] self.es_port = self.conf['es_port'] self.run_every = self.conf['run_every'] self.alert_time_limit = self.conf['alert_time_limit'] self.old_query_limit = self.conf['old_query_limit'] self.alerts_sent = 0 self.num_hits = 0 self.current_es = None self.current_es_addr = None self.buffer_time = self.conf['buffer_time'] self.silence_cache = {} self.rule_hashes = get_rule_hashes(self.conf) self.writeback_es = Elasticsearch(host=self.es_host, port=self.es_port) if self.debug: self.verbose = True if self.verbose: logging.getLogger().setLevel(logging.INFO) for rule in self.rules: rule = self.init_rule(rule) if self.args.silence: self.silence()
def get_all_terms(self, args): """ Performs a terms aggregation for each field to get every existing term. """ self.es = Elasticsearch(host=self.rules['es_host'], port=self.rules['es_port'], timeout=self.rules.get('es_conn_timeout', 50)) window_size = datetime.timedelta(**self.rules.get('terms_window_size', {'days': 30})) field_name = {"field": "", "size": 2147483647} # Integer.MAX_VALUE query_template = {"aggs": {"values": {"terms": field_name}}} if args and args.start: end = ts_to_dt(args.start) else: end = ts_now() start = end - window_size if self.rules.get('use_strftime_index'): index = format_index(self.rules['index'], start, end) else: index = self.rules['index'] time_filter = {self.rules['timestamp_field']: {'lte': dt_to_ts(end), 'gte': dt_to_ts(start)}} query_template['filter'] = {'bool': {'must': [{'range': time_filter}]}} query = {'aggs': {'filtered': query_template}} for field in self.fields: # For composite keys, we will need to perform sub-aggregations if type(field) == list: level = query_template['aggs'] # Iterate on each part of the composite key and add a sub aggs clause to the elastic search query for i, sub_field in enumerate(field): level['values']['terms']['field'] = sub_field if i < len(field) - 1: # If we have more fields after the current one, then set up the next nested structure level['values']['aggs'] = {'values': {'terms': copy.deepcopy(field_name)}} level = level['values']['aggs'] else: # For non-composite keys, only a single agg is needed field_name['field'] = field res = self.es.search(body=query, index=index, ignore_unavailable=True, timeout='50s') if 'aggregations' in res: buckets = res['aggregations']['filtered']['values']['buckets'] if type(field) == list: # For composite keys, make the lookup based on all fields # Make it a tuple since it can be hashed and used in dictionary lookups self.seen_values[tuple(field)] = [] for bucket in buckets: # We need to walk down the hierarchy and obtain the value at each level self.seen_values[tuple(field)] += self.flatten_aggregation_hierarchy(bucket) # If we don't have any results, it could either be because of the absence of any baseline data # OR it may be because the composite key contained a non-primitive type. Either way, give the # end-users a heads up to help them debug what might be going on. if not self.seen_values[tuple(field)]: elastalert_logger.warning(( 'No results were found from all sub-aggregations. This can either indicate that there is ' 'no baseline data OR that a non-primitive field was used in a composite key.' )) else: keys = [bucket['key'] for bucket in buckets] self.seen_values[field] = keys elastalert_logger.info('Found %s unique values for %s' % (len(keys), field)) else: self.seen_values[field] = [] elastalert_logger.info('Found no values for %s' % (field))
def __init__(self, index_name, index_type, ip="127.0.0.1"): ''' @param index_name: 索引名称 @param index_type: 索引类型 ''' self.index_name = index_name self.index_type = index_type self.es = Elasticsearch([ip])
def search_fuzzy(request=None, project_id=None): project_id = project_id if project_id \ else json.loads(request.session['project_id']) index_name = elastic_cache_key(project_id, 'ec2') ebs_index_name = elastic_cache_key(project_id, 'ebs') elb_index_name = elastic_cache_key(project_id, 'elb') eip_index_name = elastic_cache_key(project_id, 'eip') vpc_index_name = elastic_cache_key(project_id, 'vpc') subnet_index_name = elastic_cache_key(project_id, 'subnet') security_group_index_name = elastic_cache_key(project_id, 'security_group') st = request.GET.get('st', None) client = Elasticsearch(hosts=settings.ELASTIC_SEARCH_NODES) query = { "query": { "query_string": { "fields": ["title"], "query": "*" + st + "*", } }, } total = client.search(index=[ index_name, ebs_index_name, elb_index_name, eip_index_name, vpc_index_name, subnet_index_name, security_group_index_name ], doc_type=[ "instance_id", "name_title", "prip_title", "puip_title", "ebs", "eip", "elb", "vpc", "subnet", "security_group_id", "security_group_name" ], body=query, ignore_unavailable=True)['hits']['total'] # Get Total search result and set size parameter equal to that, to get all results # ToDo Discuss and Optimize query['size'] = total search_results = client.search(index=[ index_name, ebs_index_name, elb_index_name, eip_index_name, vpc_index_name, subnet_index_name, security_group_index_name ], doc_type=[ "instance_id", "name_title", "prip_title", "puip_title", "ebs", "eip", "elb", "vpc", "subnet", "security_group_id", "security_group_name" ], body=query, ignore_unavailable=True) return search_results
def count(flt): ''' Given a filter, count return the number of users fitting that filter Examples: flt : {'gender':'male'} return value : int representing the number of users with 'male' as gender ''' req = init_query() write_filters_in_request(req, flt) es = Elasticsearch(ES_NODES) res = es.count(index=RIOT_USERS_INDEX, body=req)['count'] return res
def handle_error(self, message, data=None): ''' Logs message at error level and writes message, data and traceback to Elasticsearch. ''' if not self.writeback_es: self.writeback_es = Elasticsearch(host=self.es_host, port=self.es_port) logging.error(message) body = {'message': message} tb = traceback.format_exc() body['traceback'] = tb.strip().split('\n') if data: body['data'] = data self.writeback('elastalert_error', body)
def store_vacancy_record(es: Elasticsearch, index_name: str, record: dict, parent_id: str) -> str: hash_string = '' for k, v in record.items(): hash_string += "{}{}".format(k, v) hash_string += parent_id hash_object = hashlib.md5(hash_string.encode()) es.index(index=index_name, doc_type='vacancies', id=hash_object.hexdigest(), body=record, parent=parent_id) return hash_string
def __init__(self, urls=None, timeout=None, force_new=False, raw_results=False, **kwargs): ''' Creates a new ElasticSearch DSL object. Grabs the ElasticSearch connection from the pool if it has already been initialized. Otherwise, creates a new one. If no parameters are passed, everything is determined from the Django settings. :param urls: A list of URLs, or a single string of URL (without leading `http://`), or None to read from settings. :param idx: A list of indices or a single string representing an index_name name. Is optional. Will be merged with `idx_alias`. :param idx_alias: A list of index_name aliases or a single string representing an index_name alias, as defined in the settings. Will be merged with `index_name`. :param timeout: Timeout used in the connection. :param force_new: Set to `True` to force a new elasticsearch connection. Otherwise will aggressively use any connection with the exact same settings. :param **kwargs: Additional settings to pass to the low level elasticsearch client and to elasticsearch-sal-py.search.Search. ''' Bungiesearch.__load_settings__() urls = urls or Bungiesearch.BUNGIE['URLS'] if not timeout: timeout = Bungiesearch.BUNGIE.get('TIMEOUT', Bungiesearch.DEFAULT_TIMEOUT) search_keys = ['using', 'index', 'doc_type', 'extra'] search_settings, es_settings = {}, {} for k, v in iteritems(kwargs): if k in search_keys: search_settings[k] = v else: es_settings[k] = v if not es_settings: # If there aren't any provided elasticsearch settings, let's see if it's defined in the settings. es_settings = Bungiesearch.BUNGIE.get('ES_SETTINGS', {}) # Building a caching key to cache the es_instance for later use (and retrieved a previously cached es_instance). cache_key = Bungiesearch._build_key(urls, timeout, **es_settings) es_instance = None if not force_new: if cache_key in Bungiesearch._cached_es_instances: es_instance = Bungiesearch._cached_es_instances[cache_key] if not es_instance: es_instance = Elasticsearch(urls, timeout=timeout, **es_settings) Bungiesearch._cached_es_instances[cache_key] = es_instance if 'using' not in search_settings: search_settings['using'] = es_instance super(Bungiesearch, self).__init__(**search_settings) # Creating instance attributes. self._only = [ ] # Stores the exact fields to fetch from the database when mapping. self.results = [] # Store the mapped and unmapped results. self._raw_results_only = raw_results
def connect(): global _es if _es is None: server = config.get('elasticsearch_host', 'localhost') + ':9200' auth = config.get('elastic_search_basic_auth', None) _es = Elasticsearch(server, **{'http_auth': a for a in (auth, ) if a}) return _es
def get_list_of_indexes_to_reindex(full_reindex=False): db_names = all_db_names() try: list_of_indexes_out_of_sync = [] total_submissions = 0 for database_name in db_names: dbm = get_db_manager(database_name) questionnaires = dbm.load_all_rows_in_view('questionnaire') if not questionnaires: continue for row in questionnaires: if row['value']['is_registration_model']: continue form_model_doc = FormModelDocument.wrap(row["value"]) if full_reindex or is_mapping_out_of_sync(form_model_doc, dbm): es = Elasticsearch(hosts=[{ "host": ELASTIC_SEARCH_HOST, "port": ELASTIC_SEARCH_PORT }]) search = Search(using=es, index=dbm.database_name, doc_type=form_model_doc.id) no_of_submissions = search.count() questionnaire_info = dict( db_name=database_name, questionnaire_id=form_model_doc.id, name=form_model_doc.name, no_of_submissions=no_of_submissions) total_submissions += no_of_submissions list_of_indexes_out_of_sync.append(questionnaire_info) return list_of_indexes_out_of_sync, total_submissions except Exception as e: pass
def get_all_terms(self, args): """ Performs a terms aggregation for each field to get every existing term. """ self.es = Elasticsearch(host=self.rules['es_host'], port=self.rules['es_port']) window_size = datetime.timedelta(**self.rules.get('terms_window_size', {'days': 30})) field_name = {"field": "", "size": 2147483647} # Integer.MAX_VALUE query_template = {"aggs": {"values": {"terms": field_name}}} if args and args.start: end = ts_to_dt(args.start) else: end = ts_now() start = end - window_size if self.rules.get('use_strftime_index'): index = format_index(self.rules['index'], start, end) else: index = self.rules['index'] time_filter = {self.rules['timestamp_field']: {'lte': dt_to_ts(end), 'gte': dt_to_ts(start)}} query_template['filter'] = {'bool': {'must': [{'range': time_filter}]}} query = {'aggs': {'filtered': query_template}} for field in self.fields: field_name['field'] = field res = self.es.search(body=query, index=index, ignore_unavailable=True, timeout=50) if 'aggregations' in res: buckets = res['aggregations']['filtered']['values']['buckets'] keys = [bucket['key'] for bucket in buckets] self.seen_values[field] = keys elastalert_logger.info('Found %s unique values for %s' % (len(keys), field)) else: self.seen_values[field] = [] elastalert_logger.info('Found no values for %s' % (field))
def get(self, request): database_name = get_database_name(request.user) search_text = lower(request.GET["term"] or "") es = Elasticsearch(hosts=[{ "host": ELASTIC_SEARCH_HOST, "port": ELASTIC_SEARCH_PORT }]) search = Search(using=es, index=database_name, doc_type="reporter") search = search.extra(**{"size": "10"}) resp = [] if search_text: query_text_escaped = ElasticUtilsHelper().replace_special_chars( search_text) query_fields = [ "name", "name_value", "name_exact", "short_code", "short_code_exact", "short_code_value" ] search = search.query("query_string", query=query_text_escaped, fields=query_fields) search_results = search.execute() resp = [{ "id": result.short_code, "label": self.get_label(result) } for result in search_results.hits] return HttpResponse(json.dumps(resp))
def get_all_terms(self, args): """ Performs a terms aggregation for each field to get every existing term. """ self.es = Elasticsearch(host=self.rules["es_host"], port=self.rules["es_port"]) window_size = datetime.timedelta(**self.rules.get("terms_window_size", {"days": 30})) field_name = {"field": "", "size": 2147483647} # Integer.MAX_VALUE query_template = {"aggs": {"values": {"terms": field_name}}} if args and args.start: end = ts_to_dt(args.start) else: end = ts_now() start = end - window_size if self.rules.get("use_strftime_index"): index = format_index(self.rules["index"], start, end) else: index = self.rules["index"] time_filter = {self.rules["timestamp_field"]: {"lte": dt_to_ts(end), "gte": dt_to_ts(start)}} query_template["filter"] = {"bool": {"must": [{"range": time_filter}]}} query = {"aggs": {"filtered": query_template}} for field in self.fields: field_name["field"] = field res = self.es.search(body=query, index=index, ignore_unavailable=True, timeout=50) if "aggregations" in res: buckets = res["aggregations"]["filtered"]["values"]["buckets"] keys = [bucket["key"] for bucket in buckets] self.seen_values[field] = keys elastalert_logger.info("Found %s unique values for %s" % (len(keys), field)) else: self.seen_values[field] = [] elastalert_logger.info("Found no values for %s" % (field))
def __init__(self, hostName, postNum): self.host = hostName self.post = postNum if (requests.get('http://' + self.host + ':' + self.post).content): # Connect to cluster self.es = Elasticsearch([{'host': self.host, 'port': self.post}]) else: print("Please turn on elasticsearch")
def get_instance(): if ESLowLevelClient.__es is None: with ESLowLevelClient.__es_lock: if ESLowLevelClient.__es is None: ESLowLevelClient.__es = Elasticsearch(['localhost'], port=9200, maxsize=25) return ESLowLevelClient.__es
class ElasticClient: def __init__(self, index_name, index_type, ip="127.0.0.1"): ''' @param index_name: 索引名称 @param index_type: 索引类型 ''' self.index_name = index_name self.index_type = index_type self.es = Elasticsearch([ip]) def create_index(self, index_name="teacher_resume", index_type="tr_type"): #创建索引 _index_mappings = { "mappings": { self.index_type: { "properties": { "teachername": { "type": "keyword" }, "telephone": { "type": "text" }, "email": { "type": "keyword" }, "research_direction": { "type": "array" }, "personal_profile": { "type": "text" }, "teaching_results": { "type": "text" }, "research_results": { "type": "text" }, "lab_introduction": { "type": "text" }, } } } } self.es.indices.create(index=self.index_name, body=_index_mappings, ignore=400) def load_index(self): with open(os.path.join(BASE_DIR, 'static', 'files', 'test_json.json')) as f: result = json.load(f) for item in result: res = self.es.index(index=self.index_name, doc_type=self.index_type, body=item) print(res)
def _engine(): ":rtype elasticsearch.Elasticsearch" global _elastic if (not _elastic): _elastic = Elasticsearch([{ "host": settings.ELASTIC_HOST, "port": int(settings.ELASTIC_PORT) }]) return _elastic
def get_dashboard(self, rule, db_name): """ Download dashboard which matches use_kibana_dashboard from elasticsearch. """ es = Elasticsearch(host=rule['es_host'], port=rule['es_port']) if not db_name: raise EAException("use_kibana_dashboard undefined") query = {'query': {'term': {'_id': db_name}}} try: res = es.search(index='kibana-int', doc_type='dashboard', body=query, _source_include=['dashboard']) except ElasticsearchException as e: raise EAException("Error querying for dashboard: %s" % (e)) if res['hits']['hits']: return json.loads(res['hits']['hits'][0]['_source']['dashboard']) else: raise EAException("Could not find dashboard named %s" % (db_name))
class TestReindexer(unittest.TestCase): def setUp(self): self.source_index = "reindex" self.target_index = "reindex-a" self.client = Elasticsearch() self.reindexer = Reindexer(self.client) self.schema_manager = SchemaManager(self.client) # try: # read_only_setting = {"index": {"blocks": {"read_only": False}}} # self.client.indices.put_settings(index=self.source_index, body=read_only_setting) # except: # pass self.client.indices.create(index=self.source_index) def tearDown(self): for index in [self.source_index, self.target_index]: try: self.client.indices.delete(index=index) except: pass def test_reindex(self): create = [] for i in ['a', 'b', 'c', 'd', 'e']: doc = { '_op_type': 'create', '_index': self.source_index, '_type': 'document', 'doc': {'name': i} } create.append(doc) bulk(self.client, create, refresh=True) docs = self.client.search(index=self.source_index) self.assertEqual(len(docs['hits']['hits']), 5) self.reindexer.do_reindex(self.source_index, self.target_index, 3) self.client.indices.refresh(','.join([self.source_index, self.target_index])) docs = self.client.search(index=self.source_index) self.assertEqual(len(docs['hits']['hits']), 5) docs = self.client.search(index=self.target_index) self.assertEqual(len(docs['hits']['hits']), 5)
def populate_elastic_search(request=None, project_id=None): # 1. Create tag from "project_id" + "type" + "tag" #2. Get from all region cache, instances. #3. Generate index for each project #4. List the tag in the respective project index and doc type. project_id = project_id if project_id \ else json.loads(request.session['project_id']) index_name = elastic_cache_key(project_id, 'ec2') ebs_index_name = elastic_cache_key(project_id, 'ebs') elb_index_name = elastic_cache_key(project_id, 'elb') eip_index_name = elastic_cache_key(project_id, 'eip') vpc_index_name = elastic_cache_key(project_id, 'vpc') subnet_index_name = elastic_cache_key(project_id, 'subnet') security_group_index_name = elastic_cache_key(project_id, 'security_group') client = Elasticsearch(hosts=settings.ELASTIC_SEARCH_NODES) try: # First try to delete the index for this project if already exists client.indices.delete(index=[ index_name, ebs_index_name, elb_index_name, eip_index_name, vpc_index_name, security_group_index_name, subnet_index_name ]) except TransportError as e: LOG.error("Error while deleting the index {0} error : " "{1}".format(index_name, e)) try: obj_list = [] obj_list.extend( populate_ec2_indexes(request=request, project_id=project_id)) obj_list.extend( populate_ebs_indexes(request=request, project_id=project_id)) obj_list.extend( populate_elb_indexes(request=request, project_id=project_id)) obj_list.extend( populate_eip_indexes(request=request, project_id=project_id)) obj_list.extend( populate_vpc_indexes(request=request, project_id=project_id)) obj_list.extend( populate_subnet_indexes(request=request, project_id=project_id)) obj_list.extend( populate_security_group_indexes(request=request, project_id=project_id)) if obj_list: elastic_index_res = helpers.bulk( client, obj_list, stats_only=True) # Index elastic search in bulk LOG.info("Indexed {0} items Failed {1} items".format( elastic_index_res[0], elastic_index_res[1])) except Exception as e: LOG.error("Error while indexing project {0} error {1}".format( project_id, e))
def get_all_terms(self, args): """ Performs a terms aggregation for each field to get every existing term. """ self.es = Elasticsearch(host=self.rules['es_host'], port=self.rules['es_port'], use_ssl=self.rule['use_ssl'], timeout=self.rules.get('es_conn_timeout', 50)) window_size = datetime.timedelta( **self.rules.get('terms_window_size', {'days': 30})) field_name = {"field": "", "size": 2147483647} # Integer.MAX_VALUE query_template = {"aggs": {"values": {"terms": field_name}}} if args and args.start: end = ts_to_dt(args.start) else: end = ts_now() start = end - window_size if self.rules.get('use_strftime_index'): index = format_index(self.rules['index'], start, end) else: index = self.rules['index'] time_filter = { self.rules['timestamp_field']: { 'lte': dt_to_ts(end), 'gte': dt_to_ts(start) } } query_template['filter'] = {'bool': {'must': [{'range': time_filter}]}} query = {'aggs': {'filtered': query_template}} for field in self.fields: field_name['field'] = field res = self.es.search(body=query, index=index, ignore_unavailable=True, timeout='50s') if 'aggregations' in res: buckets = res['aggregations']['filtered']['values']['buckets'] keys = [bucket['key'] for bucket in buckets] self.seen_values[field] = keys elastalert_logger.info('Found %s unique values for %s' % (len(keys), field)) else: self.seen_values[field] = [] elastalert_logger.info('Found no values for %s' % (field))
class Mysql2Es(): config = { "db":{ "host":"192.168.0.196", "user":"******", "passwd":"xsycommercial123", "db":"prism1", "charset":"utf8" }, "max_query":"select max(id) from company", "query":"select id,name,company_org_type,reg_status from company", "index":{ "host":["http://192.168.0.196:9200","http://192.168.0.197:9200","http://192.168.0.198:9200"], "_index":"company0606", "_type":"company" }, "action":"index", "_id":"id" } def __init__(self,start_id=0,max_id=100000,step=10000,id_file=None,config=None): if config != None: self.config = json.loads(open(config).read()) #=================================================================== # connect to mysql #=================================================================== self.db = None try: self.db = MySQLdb.connect(**self.config["db"]) except MySQLdb.Error, e: print "Error %d: %s" % (e.args[0], e.args[1]) sys.exit (1) #=================================================================== # query select from table #=================================================================== self.cursor = self.db.cursor() #self.cursor.execute(self.config["max_query"]) self.start_id = start_id self.max_id = max_id self.step = step self.id_file = id_file self.limit = 50000 self.action = self.config['action'] self.metadata = {"_index":self.config["index"]["_index"],"_type":self.config["index"]["_type"]} self.es = Elasticsearch(self.config["index"]["host"]) self.mutex = threading.Lock() self.thread_num = 0 self.db_data=[] self.complete = False
class SoSoImp(object): ''' classdocs ''' def __init__(self): self.es = Elasticsearch(['192.168.2.129', '192.168.2.130']) ''' Constructor ''' ''' 添加搜索信息 ''' def addSoso(self,Content): title="" if Content.title !=None: title = Content.title txt ="" if Content.txt != None : txt = Content.txt #获取情感 source=fenci.mm(title,txt) body={"title":Content.title,"summary":Content.summary,"context":Content.txt,"site_cls":Content.site_cls,"domaintype":Content.domaintype, "countryid":Content.countryid,"province":Content.province,"city":Content.city,"area":Content.area,"url":Content.url,"publictime":Content.pubdate, "createtime":Content.created,"sitename":Content.site_name,"domain1":Content.domain_1,"domain2":Content.domain_2,"sentiment":source, "subname":Content.subname} self.es.index(index="yuqing", doc_type="yuqing_type", body=body, id=Content.rowkey) #es.
def get_es(self): if self.es is None: ssl_url = self.es_url.startswith('https') if ssl_url: # TODO add valid cert in ES setup logger.warning('ES does not use cert validation.') self.es = Elasticsearch([self.es_url], verify_certs=False) return self.es
def run_rule(self, rule): """ Run a rule including querying and alerting on results. :param rule: The rule configuration. :return: The number of matches that the rule produced. """ elastalert_logger.info('Start to run rule: %s', rule.get('name')) # Run the rule. If querying over a large time period, split it up into segments self.num_hits = 0 rule_request = rule.get("input").get("search").get("request") if rule_request.get("elastic_host", None) is not None and rule_request.get( "elastic_port", None) is not None: self.current_es = Elasticsearch( host=rule.get("input").get("search").get("request").get( "elastic_host"), port=rule.get("input").get("search").get("request").get( "elastic_port")) else: self.current_es = self.new_elasticsearch(self.global_config) self.run_query(rule) # Process any new matches num_matches = len(rule['type'].matches) while rule['type'].matches: match = rule['type'].matches.pop(0) #if self.is_silenced(rule['name'] + key) or self.is_silenced(rule['name']): # elastalert_logger.info('Ignoring match for silenced rule %s%s' % (rule['name'], key)) # continue if rule.get('realert'): next_alert, exponent = self.next_alert_time( rule, rule['name'] + key, ts_now()) self.set_realert(rule['name'] + key, next_alert, exponent) # If no aggregation, alert immediately #if not rule['aggregation']: # self.alert([match], rule) # continue self.alert([match], rule) # Add it as an aggregated match #self.add_aggregated_alert(match, rule) # Mark this endtime for next run's start #rule['previous_endtime'] = endtime #time_taken = time.time() - run_start return num_matches
def main(): es_host = raw_input("Elasticsearch host: ") es_port = raw_input("Elasticsearch port: ") db_name = raw_input("Dashboard name: ") es = Elasticsearch(host=es_host, port=es_port) query = {'query': {'term': {'_id': db_name}}} res = es.search(index='kibana-int', doc_type='dashboard', body=query, _source_include=['dashboard']) if not res['hits']['hits']: print("No dashboard %s found" % (db_name)) exit() db = json.loads(res['hits']['hits'][0]['_source']['dashboard']) config_filters = filters_from_dashboard(db) print("\nPartial Config file") print("-----------\n") print("name: %s" % (db_name)) print("es_host: %s" % (es_host)) print("es_port: %s" % (es_port)) print("filter:") print(yaml.safe_dump(config_filters))
def main(host, port, index, type, chunk_size, geojson_file): def _charge_doc(): for feature in load_geojson(geojson_file): yield { '_index': index, '_type': type, '_source': feature } es = Elasticsearch(host=host, port=port) helpers.bulk(es, _charge_doc(), chunk_size=chunk_size, request_timeout=6000)
def elasticsearch_client(conf): """ returns an Elasticsearch instance configured using an es_conn_config """ es_conn_conf = build_es_conn_config(conf) return Elasticsearch(host=es_conn_conf['es_host'], port=es_conn_conf['es_port'], url_prefix=es_conn_conf['es_url_prefix'], use_ssl=es_conn_conf['use_ssl'], verify_certs=es_conn_conf['verify_certs'], connection_class=RequestsHttpConnection, timeout=es_conn_conf['es_conn_timeout'], send_get_body_as=es_conn_conf['send_get_body_as'])
def setUp(self): super(TestESTermAggregationWeightProvider, self).setUp() self.es = Elasticsearch(hosts=['localhost:%d' % es_runner.es_state.port]) self.ic = IndicesClient(self.es) self.index = 'es_term_weight_provider_test' self.doc_type = 'test-doc' self.field = 'text' if self.ic.exists(self.index): self.ic.delete(self.index) self.ic.create(self.index) self.es.create(self.index, self.doc_type, {self.field: 'foo'}) self.es.create(self.index, self.doc_type, {self.field: 'knark'}) self.es.create(self.index, self.doc_type, {self.field: 'ba'}) self.es.create(self.index, self.doc_type, {self.field: 'knirk'}) self.es.create(self.index, self.doc_type, {self.field: 'ba'}) self.es.create(self.index, self.doc_type, {self.field: 'ba'}) self.es.create(self.index, self.doc_type, {self.field: 'knark '}) self.es.create(self.index, self.doc_type, {self.field: 'ba'}, refresh=True)
def main(): parser = ArgumentParser() parser.add_argument('-e', '--elasticsearch-server', default='localhost:9200') parser.add_argument('-d', '--dataset') parser.add_argument('-s', '--sections') opts = parser.parse_args() es_hosts = [opts.elasticsearch_server] dataset_name = opts.dataset dataset_sections = opts.sections es = Elasticsearch(hosts=es_hosts, timeout=120) if dataset_name == 'newsgroups': dataset = NewsgroupsDataset() elif dataset_name == 'aviskorpus': sections = None sources = None if dataset_sections: try: sections, sources = dataset_sections.split('-') sections = [int(s) for s in sections.split('|')] sources = [s for s in sources.split('|')] except Exception: logging.error('Malformed section specification "%s" ...' % dataset_sections) sys.exit(1) dataset = AviskorpusDataset(sections=sections, sources=sources) elif dataset_name == 'ndt': sections = None lang = None if dataset_sections: try: sections, lang = dataset_sections.split('-') sections = [int(s) for s in sections.split('|')] lang = [s for s in lang.split('|')] except Exception: logging.error('Malformed section specification "%s" ...' % dataset_sections) sys.exit(1) dataset = NDTDataset(lang=lang, sections=sections) else: logging.error('Unknown dataset %s ...' % dataset_name) sys.exit(1) dataset.install(es)
def filter(self, qs, value): client = Elasticsearch([settings.ELASTICSEARCH_HOST]) value = value.lower() search_query = { "bool": { "must_not": [ # исключает из выдачи is_published=False { "term": { "is_published": False } } ], "should": [ { "simple_query_string": { "fields": ["category_name"], "quote_field_suffix": ".exact", "query": value } }, ] } } s = Search(using=client, index='category') \ .query(search_query)\ .sort("_score", "-views")\ .extra(size=self.max_result, from_=0) hits_list = [] items = s.execute() if items: for item in items: hits_list.append(item.meta.id) hits_order = Case( *[When(pk=pk, then=pos) for pos, pk in enumerate(hits_list)]) qs = qs.filter(id__in=hits_list).order_by(hits_order) else: qs = qs.none() # TODO: fallback? # bits = value.split(' ') # search_clauses = reduce(operator.and_, # [Q(title__icontains=v) for v in bits]) # unpublished = Category.objects.get_queryset_descendants( # Category.objects.filter(is_published=False), include_self=True) # qs = (qs # .exclude(pk__in=unpublished) # .filter(search_clauses) # .order_by('-views')) return qs[:self.max_result]
def _connect(self): """ connect to a member of the ElasticSearch cluster """ try: if self.local_env: self.es = Elasticsearch([{'host': self.host, 'port': self.port}]) else: self.es = Elasticsearch([{'host': self.host, 'port': self.port}], sniff_on_start=True, sniff_on_connection_fail=True, sniffer_timeout=self.timeout) self.idx = IndicesClient(self.es) return except ConnectionError as e: return ElasticSearchError.no_host_available(self.host, self.port) except Exception as e: (type_e, value, traceback_prev) = exc_info() backtrace = extract_tb(traceback_prev) return ElasticSearchError.unknown_exception(backtrace, str(e))
def setUp(self): self.source_index = "reindex" self.target_index = "reindex-a" self.client = Elasticsearch() self.reindexer = Reindexer(self.client) self.schema_manager = SchemaManager(self.client) # try: # read_only_setting = {"index": {"blocks": {"read_only": False}}} # self.client.indices.put_settings(index=self.source_index, body=read_only_setting) # except: # pass self.client.indices.create(index=self.source_index)
def get_all_terms(self): """ Performs a terms aggregation for each field to get every existing term. """ self.es = Elasticsearch(host=self.rules['es_host'], port=self.rules['es_port']) window_size = datetime.timedelta(**self.rules.get('terms_window_size', {'days': 30})) field_name = {"field": "", "size": 2147483647} # Integer.MAX_VALUE query_template = {"aggs": {"values": {"terms": field_name}}} if self.rules.get('use_strftime_index'): end = ts_now() start = end - window_size index = format_index(self.rules['index'], start, end) else: index = self.rules['index'] for field in self.fields: field_name['field'] = field res = self.es.search(body=query_template, index=index, ignore_unavailable=True, timeout=50) buckets = res['aggregations']['values']['buckets'] keys = [bucket['key'] for bucket in buckets] self.seen_values[field] = keys
class NewTermsRule(RuleType): """ Alerts on a new value in a list of fields. """ def __init__(self, rule, args=None): super(NewTermsRule, self).__init__(rule, args) self.seen_values = {} # Allow the use of query_key or fields if 'fields' not in self.rules: if 'query_key' not in self.rules: raise EAException("fields or query_key must be specified") self.fields = self.rules['query_key'] else: self.fields = self.rules['fields'] if not self.fields: raise EAException("fields must not be an empty list") if type(self.fields) != list: self.fields = [self.fields] if self.rules.get('use_terms_query') and ( len(self.fields) != 1 or len(self.fields) == 1 and type(self.fields[0]) == list ): raise EAException("use_terms_query can only be used with a single non-composite field") try: self.get_all_terms(args) except Exception as e: # Refuse to start if we cannot get existing terms raise EAException('Error searching for existing terms: %s' % (repr(e))) def get_all_terms(self, args): """ Performs a terms aggregation for each field to get every existing term. """ self.es = Elasticsearch( host=self.rules['es_host'], port=self.rules['es_port'], timeout=self.rules.get('es_conn_timeout', 50), send_get_body_as=self.rules.get('send_get_body_as', 'GET') ) window_size = datetime.timedelta(**self.rules.get('terms_window_size', {'days': 30})) field_name = {"field": "", "size": 2147483647} # Integer.MAX_VALUE query_template = {"aggs": {"values": {"terms": field_name}}} if args and args.start: end = ts_to_dt(args.start) else: end = ts_now() start = end - window_size step = datetime.timedelta(**self.rules.get('window_step_size', {'days': 1})) for field in self.fields: tmp_start = start tmp_end = min(start + step, end) time_filter = {self.rules['timestamp_field']: {'lt': dt_to_ts(tmp_end), 'gte': dt_to_ts(tmp_start)}} query_template['filter'] = {'bool': {'must': [{'range': time_filter}]}} query = {'aggs': {'filtered': query_template}} # For composite keys, we will need to perform sub-aggregations if type(field) == list: self.seen_values.setdefault(tuple(field), []) level = query_template['aggs'] # Iterate on each part of the composite key and add a sub aggs clause to the elastic search query for i, sub_field in enumerate(field): level['values']['terms']['field'] = add_raw_postfix(sub_field) if i < len(field) - 1: # If we have more fields after the current one, then set up the next nested structure level['values']['aggs'] = {'values': {'terms': copy.deepcopy(field_name)}} level = level['values']['aggs'] else: self.seen_values.setdefault(field, []) # For non-composite keys, only a single agg is needed field_name['field'] = add_raw_postfix(field) # Query the entire time range in small chunks while tmp_start < end: if self.rules.get('use_strftime_index'): index = format_index(self.rules['index'], tmp_start, tmp_end) else: index = self.rules['index'] res = self.es.search(body=query, index=index, ignore_unavailable=True, timeout='50s') if 'aggregations' in res: buckets = res['aggregations']['filtered']['values']['buckets'] if type(field) == list: # For composite keys, make the lookup based on all fields # Make it a tuple since it can be hashed and used in dictionary lookups for bucket in buckets: # We need to walk down the hierarchy and obtain the value at each level self.seen_values[tuple(field)] += self.flatten_aggregation_hierarchy(bucket) else: keys = [bucket['key'] for bucket in buckets] self.seen_values[field] += keys else: self.seen_values.setdefault(field, []) if tmp_start == tmp_end: break tmp_start = tmp_end tmp_end = min(tmp_start + step, end) time_filter[self.rules['timestamp_field']] = {'lt': dt_to_ts(tmp_end), 'gte': dt_to_ts(tmp_start)} for key, values in self.seen_values.iteritems(): if not values: if type(key) == tuple: # If we don't have any results, it could either be because of the absence of any baseline data # OR it may be because the composite key contained a non-primitive type. Either way, give the # end-users a heads up to help them debug what might be going on. elastalert_logger.warning(( 'No results were found from all sub-aggregations. This can either indicate that there is ' 'no baseline data OR that a non-primitive field was used in a composite key.' )) else: elastalert_logger.info('Found no values for %s' % (field)) continue self.seen_values[key] = list(set(values)) elastalert_logger.info('Found %s unique values for %s' % (len(values), key)) def flatten_aggregation_hierarchy(self, root, hierarchy_tuple=()): """ For nested aggregations, the results come back in the following format: { "aggregations" : { "filtered" : { "doc_count" : 37, "values" : { "doc_count_error_upper_bound" : 0, "sum_other_doc_count" : 0, "buckets" : [ { "key" : "1.1.1.1", # IP address (root) "doc_count" : 13, "values" : { "doc_count_error_upper_bound" : 0, "sum_other_doc_count" : 0, "buckets" : [ { "key" : "80", # Port (sub-aggregation) "doc_count" : 3, "values" : { "doc_count_error_upper_bound" : 0, "sum_other_doc_count" : 0, "buckets" : [ { "key" : "ack", # Reason (sub-aggregation, leaf-node) "doc_count" : 3 }, { "key" : "syn", # Reason (sub-aggregation, leaf-node) "doc_count" : 1 } ] } }, { "key" : "82", # Port (sub-aggregation) "doc_count" : 3, "values" : { "doc_count_error_upper_bound" : 0, "sum_other_doc_count" : 0, "buckets" : [ { "key" : "ack", # Reason (sub-aggregation, leaf-node) "doc_count" : 3 }, { "key" : "syn", # Reason (sub-aggregation, leaf-node) "doc_count" : 3 } ] } } ] } }, { "key" : "2.2.2.2", # IP address (root) "doc_count" : 4, "values" : { "doc_count_error_upper_bound" : 0, "sum_other_doc_count" : 0, "buckets" : [ { "key" : "443", # Port (sub-aggregation) "doc_count" : 3, "values" : { "doc_count_error_upper_bound" : 0, "sum_other_doc_count" : 0, "buckets" : [ { "key" : "ack", # Reason (sub-aggregation, leaf-node) "doc_count" : 3 }, { "key" : "syn", # Reason (sub-aggregation, leaf-node) "doc_count" : 3 } ] } } ] } } ] } } } } Each level will either have more values and buckets, or it will be a leaf node We'll ultimately return a flattened list with the hierarchies appended as strings, e.g the above snippet would yield a list with: [ ('1.1.1.1', '80', 'ack'), ('1.1.1.1', '80', 'syn'), ('1.1.1.1', '82', 'ack'), ('1.1.1.1', '82', 'syn'), ('2.2.2.2', '443', 'ack'), ('2.2.2.2', '443', 'syn') ] A similar formatting will be performed in the add_data method and used as the basis for comparison """ results = [] # There are more aggregation hierarchies left. Traverse them. if 'values' in root: results += self.flatten_aggregation_hierarchy(root['values']['buckets'], hierarchy_tuple + (root['key'],)) else: # We've gotten to a sub-aggregation, which may have further sub-aggregations # See if we need to traverse further for node in root: if 'values' in node: results += self.flatten_aggregation_hierarchy(node, hierarchy_tuple) else: results.append(hierarchy_tuple + (node['key'],)) return results def add_data(self, data): for document in data: for field in self.fields: value = () lookup_field = field if type(field) == list: # For composite keys, make the lookup based on all fields # Make it a tuple since it can be hashed and used in dictionary lookups lookup_field = tuple(field) for sub_field in field: lookup_result = lookup_es_key(document, sub_field) if not lookup_result: value = None break value += (lookup_result,) else: value = lookup_es_key(document, field) if not value and self.rules.get('alert_on_missing_field'): document['missing_field'] = lookup_field self.add_match(copy.deepcopy(document)) elif value: if value not in self.seen_values[lookup_field]: document['new_field'] = lookup_field self.add_match(copy.deepcopy(document)) self.seen_values[lookup_field].append(value) def add_terms_data(self, terms): # With terms query, len(self.fields) is always 1 and the 0'th entry is always a string field = self.fields[0] for timestamp, buckets in terms.iteritems(): for bucket in buckets: if bucket['doc_count']: if bucket['key'] not in self.seen_values[field]: match = {field: bucket['key'], self.rules['timestamp_field']: timestamp, 'new_field': field} self.add_match(match) self.seen_values[field].append(bucket['key'])
def main(): if os.path.isfile('../config.yaml'): filename = '../config.yaml' elif os.path.isfile('config.yaml'): filename = 'config.yaml' else: filename = '' username = None password = None use_ssl = None http_auth = None if filename: with open(filename) as config_file: data = yaml.load(config_file) host = data.get('es_host') port = data.get('es_port') username = data.get('es_username') password = data.get('es_password') use_ssl = data.get('use_ssl') else: host = raw_input("Enter elasticsearch host: ") port = int(raw_input("Enter elasticsearch port: ")) while use_ssl is None: resp = raw_input("Use SSL? t/f: ").lower() use_ssl = True if resp in ('t', 'true') else (False if resp in ('f', 'false') else None) username = raw_input("Enter optional basic-auth username: "******"Enter optional basic-auth password: "******"Downloading existing data...") res = es.search(index=old_index, body={}, size=500000) print("Got %s documents" % (len(res['hits']['hits']))) es.indices.create(index) es.indices.put_mapping(index=index, doc_type='elastalert', body=es_mapping) es.indices.put_mapping(index=index, doc_type='elastalert_status', body=ess_mapping) es.indices.put_mapping(index=index, doc_type='silence', body=silence_mapping) es.indices.put_mapping(index=index, doc_type='elastalert_error', body=error_mapping) print("New index %s created" % (index)) if res: bulk = ''.join(['%s\n%s\n' % (json.dumps({'create': {'_type': doc['_type'], '_index': index}}), json.dumps(doc['_source'])) for doc in res['hits']['hits']]) print("Uploading data...") es.bulk(body=bulk, index=index) print("Done!")
def setUp(self): self.client = Elasticsearch() self.manager = SchemaManager(self.client, schema_index=self.test_schema_index)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--host', help='Elasticsearch host') parser.add_argument('--port', type=int, help='Elasticsearch port') parser.add_argument('--url-prefix', help='Elasticsearch URL prefix') parser.add_argument('--no-auth', action='store_const', const=True, help='Suppress prompt for basic auth') parser.add_argument('--ssl', action='store_true', default=None, help='Use SSL') parser.add_argument('--no-ssl', dest='ssl', action='store_false', help='Do not use SSL') parser.add_argument('--index', help='Index name to create') parser.add_argument('--old-index', help='Old index name to copy') args = parser.parse_args() if os.path.isfile('../config.yaml'): filename = '../config.yaml' elif os.path.isfile('config.yaml'): filename = 'config.yaml' else: filename = '' username = None password = None use_ssl = None url_prefix = None http_auth = None if filename: with open(filename) as config_file: data = yaml.load(config_file) host = data.get('es_host') port = data.get('es_port') username = data.get('es_username') password = data.get('es_password') url_prefix = data.get('es_url_prefix', '') use_ssl = data.get('use_ssl') else: host = args.host if args.host else raw_input('Enter elasticsearch host: ') port = args.port if args.port else int(raw_input('Enter elasticsearch port: ')) use_ssl = (args.ssl if args.ssl is not None else raw_input('Use SSL? t/f: ').lower() in ('t', 'true')) if args.no_auth is None: username = raw_input('Enter optional basic-auth username: '******'Enter optional basic-auth password: '******'Enter optional Elasticsearch URL prefix: ')) if username and password: http_auth = username + ':' + password es = Elasticsearch(host=host, port=port, use_ssl=use_ssl, http_auth=http_auth, url_prefix=url_prefix) silence_mapping = {'silence': {'properties': {'rule_name': {'index': 'not_analyzed', 'type': 'string'}, 'until': {'type': 'date', 'format': 'dateOptionalTime'}, '@timestamp': {'format': 'dateOptionalTime', 'type': 'date'}}}} ess_mapping = {'elastalert_status': {'properties': {'rule_name': {'index': 'not_analyzed', 'type': 'string'}, '@timestamp': {'format': 'dateOptionalTime', 'type': 'date'}}}} es_mapping = {'elastalert': {'properties': {'rule_name': {'index': 'not_analyzed', 'type': 'string'}, '@timestamp': {'format': 'dateOptionalTime', 'type': 'date'}, 'match_body': {'enabled': False, 'type': 'object'}, 'aggregate_id': {'index': 'not_analyzed', 'type': 'string'}}}} past_mapping = {'past_elastalert': {'properties': {'rule_name': {'index': 'not_analyzed', 'type': 'string'}, 'match_body': {'enabled': False, 'type': 'object'}, '@timestamp': {'format': 'dateOptionalTime', 'type': 'date'}, 'aggregate_id': {'index': 'not_analyzed', 'type': 'string'}}}} error_mapping = {'elastalert_error': {'properties': {'data': {'type': 'object', 'enabled': False}, '@timestamp': {'format': 'dateOptionalTime', 'type': 'date'}}}} index = args.index if args.index is not None else raw_input('New index name? (Default elastalert_status) ') if not index: index = 'elastalert_status' old_index = (args.old_index if args.old_index is not None else raw_input('Name of existing index to copy? (Default None) ')) res = None if old_index: print('Downloading existing data...') res = es.search(index=old_index, body={}, size=500000) print('Got %s documents' % (len(res['hits']['hits']))) es.indices.create(index) # To avoid a race condition. TODO: replace this with a real check time.sleep(2) es.indices.put_mapping(index=index, doc_type='elastalert', body=es_mapping) es.indices.put_mapping(index=index, doc_type='elastalert_status', body=ess_mapping) es.indices.put_mapping(index=index, doc_type='silence', body=silence_mapping) es.indices.put_mapping(index=index, doc_type='elastalert_error', body=error_mapping) es.indices.put_mapping(index=index, doc_type='past_elastalert', body=past_mapping) print('New index %s created' % (index)) if res: bulk = ''.join(['%s\n%s\n' % (json.dumps({'create': {'_type': doc['_type'], '_index': index}}), json.dumps(doc['_source'])) for doc in res['hits']['hits']]) print('Uploading data...') es.bulk(body=bulk, index=index) print('Done!')
class TestSchemaManager(unittest.TestCase): test_schema_index = 'test_pseudonym' def setUp(self): self.client = Elasticsearch() self.manager = SchemaManager(self.client, schema_index=self.test_schema_index) def tearDown(self): try: self.client.indices.delete(self.test_schema_index) except: pass def test_schema_compiling(self): cfg = {'aliases': [{'name': 'alias1', 'strategy': {'date': {'indexes': {'201401': datetime.date(2014, 1, 1).isoformat()}}}}]} self.manager.update(cfg) schema = self.client.get(index=self.test_schema_index, id='master') self.assertEqual(schema['_version'], 1) source = schema.pop('_source') schema_doc = json.loads(source.get('schema')) self.assertEqual({a['name'] for a in schema_doc['aliases']}, {'alias1'}) self.assertEqual({i['name'] for i in schema_doc['indexes']}, {'201401'}) cfg['aliases'][0]['strategy']['date']['indexes']['201402'] = datetime.date(2014, 2, 1).isoformat() self.manager.update(cfg) schema = self.client.get(index=self.test_schema_index, id='master') self.assertEqual(schema['_version'], 2) source = schema.pop('_source') schema_doc = json.loads(source.get('schema')) self.assertEqual({a['name'] for a in schema_doc['aliases']}, {'alias1'}) self.assertEqual({i['name'] for i in schema_doc['indexes']}, {'201401', '201402'}) cfg['aliases'].append({'name': 'alias2', 'strategy': {'date': {'indexes': {'201501': datetime.date(2015, 1, 1).isoformat()}}}}) self.manager.update(cfg) schema = self.client.get(index=self.test_schema_index, id='master') self.assertEqual(schema['_version'], 3) source = schema.pop('_source') schema_doc = json.loads(source.get('schema')) self.assertEqual({a['name'] for a in schema_doc['aliases']}, {'alias1', 'alias2'}) self.assertEqual({i['name'] for i in schema_doc['indexes']}, {'201401', '201402', '201501'}) def test_add_index(self): cfg = {'aliases': [{'name': 'alias1', 'strategy': {'date': {'indexes': {'201401': datetime.date(2014, 1, 1).isoformat()}}}}]} self.manager.update(cfg) self.manager.add_index('alias1', '201402', datetime.date(2014, 1, 2).isoformat()) schema = self.client.get(index=self.test_schema_index, id='master') source = schema.pop('_source') schema_doc = json.loads(source.get('schema')) for alias in schema_doc['aliases']: if alias['name'] == 'alias1': break self.assertIn('201402', alias['indexes']) self.assertIn('201402', [i['name'] for i in schema_doc['indexes']]) def test_remove_index(self): cfg = {'aliases': [{'name': 'alias1', 'strategy': {'date': {'indexes': {'201501': datetime.date(2015, 1, 1).isoformat(), '201401': datetime.date(2014, 1, 1).isoformat()}}}}]} self.manager.update(cfg) self.manager.remove_index('201401') schema = self.client.get(index=self.test_schema_index, id='master')['_source'] schema_doc = json.loads(schema.get('schema')) self.assertEqual(len(schema_doc['indexes']), 1) self.assertEqual(schema_doc['indexes'][0]['name'], '201501') self.assertEqual(len(schema_doc['aliases']), 1) self.assertEqual(schema_doc['aliases'][0]['indexes'], ['201501']) def test_reindex_cutover(self): source_index = "reindex_2017_01" # Add both indexes to aliases before cutover target_index = '%s-a' % source_index alias1 = 'cutover1' cfg = {'aliases': [{'name': alias1, 'strategy': {'date': {'indexes': {source_index: datetime.date(2017, 1, 1).isoformat()}}}}]} self.manager.update(cfg) _, schema = self.manager.get_current_schema(True) self.assertEquals(schema['aliases'][0]['name'], alias1) source_routing = None for index in schema['indexes']: if index['name'] == source_index: source_routing = index.get('routing') self.manager.reindex_cutover(source_index) _, schema = self.manager.get_current_schema(True) aliases = [alias for alias in schema['aliases'] if alias['name'] is alias1] for alias in aliases: self.assertTrue(target_index in alias['indexes']) self.assertTrue(source_index not in alias['indexes']) target_routing = None for index in schema['indexes']: if index['name'] == target_index: target_routing = index.get('routing') self.assertIsNotNone(target_routing) self.assertEquals(source_routing, target_routing) def test_get_target_index(self): source_name = 'assets_2017_01' target = self.manager._get_target_index(source_name) self.assertEquals(target, 'assets_2017_01-a') target = self.manager._get_target_index(target) self.assertEquals(target, 'assets_2017_01-b')
class NewTermsRule(RuleType): """ Alerts on a new value in a list of fields. """ def __init__(self, *args): super(NewTermsRule, self).__init__(*args) self.seen_values = {} # Allow the use of query_key or fields if 'fields' not in self.rules: if 'query_key' not in self.rules: raise EAException("fields or query_key must be specified") self.fields = self.rules['query_key'] else: self.fields = self.rules['fields'] if not self.fields: raise EAException("fields must not be an empty list") if type(self.fields) != list: self.fields = [self.fields] if self.rules.get('use_terms_query') and len(self.fields) != 1: raise EAException("use_terms_query can only be used with one field at a time") self.get_all_terms() def get_all_terms(self): """ Performs a terms aggregation for each field to get every existing term. """ self.es = Elasticsearch(host=self.rules['es_host'], port=self.rules['es_port']) window_size = datetime.timedelta(**self.rules.get('terms_window_size', {'days': 30})) field_name = {"field": "", "size": 2147483647} # Integer.MAX_VALUE query_template = {"aggs": {"values": {"terms": field_name}}} if self.rules.get('use_strftime_index'): end = ts_now() start = end - window_size index = format_index(self.rules['index'], start, end) else: index = self.rules['index'] for field in self.fields: field_name['field'] = field res = self.es.search(body=query_template, index=index, ignore_unavailable=True, timeout=50) buckets = res['aggregations']['values']['buckets'] keys = [bucket['key'] for bucket in buckets] self.seen_values[field] = keys def add_data(self, data): for document in data: for field in self.fields: value = document.get(field) if not value and self.rules.get('alert_on_missing_field'): document['missing_field'] = field self.add_match(document) elif value: if value not in self.seen_values[field]: document['new_field'] = field self.add_match(document) self.seen_values[field].append(value) def add_terms_data(self, terms): # With terms query, len(self.fields) is always 1 field = self.fields[0] for timestamp, buckets in terms.iteritems(): for bucket in buckets: if bucket['doc_count']: if bucket['key'] not in self.seen_values[field]: match = {field: bucket['key'], self.rules['timestamp_field']: timestamp, 'new_field': field} self.add_match(match)
class NewTermsRule(RuleType): """ Alerts on a new value in a list of fields. """ def __init__(self, rule, args=None): super(NewTermsRule, self).__init__(rule, args) self.seen_values = {} # Allow the use of query_key or fields if 'fields' not in self.rules: if 'query_key' not in self.rules: raise EAException("fields or query_key must be specified") self.fields = self.rules['query_key'] else: self.fields = self.rules['fields'] if not self.fields: raise EAException("fields must not be an empty list") if type(self.fields) != list: self.fields = [self.fields] if self.rules.get('use_terms_query') and len(self.fields) != 1: raise EAException("use_terms_query can only be used with one field at a time") try: self.get_all_terms(args) except Exception as e: # Refuse to start if we cannot get existing terms raise EAException('Error searching for existing terms: %s' % (e)) def get_all_terms(self, args): """ Performs a terms aggregation for each field to get every existing term. """ self.es = Elasticsearch(host=self.rules['es_host'], port=self.rules['es_port']) window_size = datetime.timedelta(**self.rules.get('terms_window_size', {'days': 30})) field_name = {"field": "", "size": 2147483647} # Integer.MAX_VALUE query_template = {"aggs": {"values": {"terms": field_name}}} if args and args.start: end = ts_to_dt(args.start) else: end = ts_now() start = end - window_size if self.rules.get('use_strftime_index'): index = format_index(self.rules['index'], start, end) else: index = self.rules['index'] time_filter = {self.rules['timestamp_field']: {'lte': dt_to_ts(end), 'gte': dt_to_ts(start)}} query_template['filter'] = {'bool': {'must': [{'range': time_filter}]}} query = {'aggs': {'filtered': query_template}} for field in self.fields: field_name['field'] = field res = self.es.search(body=query, index=index, ignore_unavailable=True, timeout=50) if 'aggregations' in res: buckets = res['aggregations']['filtered']['values']['buckets'] keys = [bucket['key'] for bucket in buckets] self.seen_values[field] = keys elastalert_logger.info('Found %s unique values for %s' % (len(keys), field)) else: self.seen_values[field] = [] elastalert_logger.info('Found no values for %s' % (field)) def add_data(self, data): for document in data: for field in self.fields: value = document.get(field) if not value and self.rules.get('alert_on_missing_field'): document['missing_field'] = field self.add_match(document) elif value: if value not in self.seen_values[field]: document['new_field'] = field self.add_match(document) self.seen_values[field].append(value) def add_terms_data(self, terms): # With terms query, len(self.fields) is always 1 field = self.fields[0] for timestamp, buckets in terms.iteritems(): for bucket in buckets: if bucket['doc_count']: if bucket['key'] not in self.seen_values[field]: match = {field: bucket['key'], self.rules['timestamp_field']: timestamp, 'new_field': field} self.add_match(match)
class BaseElasticsearchBackend(Base): """Base connection wrapper based on the ElasticSearch official library. It uses two entry points to configure the underlying connection: * ``transport_class``: the transport class from ``elasticsearch``. By default ``elasticsearch.transport.Transport``. * ``connection_class``: the connection class used by the transport class. It's undefined by default, as it is on the subclasses to provide one. If any of these elements is not defined, an ``ImproperlyConfigured`` error will be raised when the backend will try to configure the client. """ #: ElasticSearch transport class used by the client class to perform #: requests. transport_class = Transport #: ElasticSearch connection class used by the transport class to perform #: requests. connection_class = None def configure_client(self): """Instantiate and configure the ElasticSearch client. It simply takes the given HOSTS list and uses PARAMS as the keyword arguments of the ElasticSearch class. The client's transport_class is given by the class attribute ``transport_class``, and the connection class used by the transport class is given by the class attribute ``connection_class``. An ``ImproperlyConfigured`` exception is raised if any of these elements is undefined. """ hosts = self.server['HOSTS'] params = self.server['PARAMS'] if not self.transport_class: raise ImproperlyConfigured( 'Djangoes backend %r is not properly configured: ' 'no transport class provided' % self.__class__) if not self.connection_class: raise ImproperlyConfigured( 'Djangoes backend %r is not properly configured: ' 'no connection class provided' % self.__class__) #pylint: disable=star-args self.client = Elasticsearch(hosts, transport_class=self.transport_class, connection_class=self.connection_class, **params) # Server methods # ============== # The underlying client does not require index names to perform server # related queries, such as "ping" or "info". The connection wrapper act # for them as a proxy. def ping(self, **kwargs): return self.client.ping(**kwargs) def info(self, **kwargs): return self.client.info(**kwargs) def put_script(self, lang, script_id, body, **kwargs): return self.client.put_script(lang, script_id, body, **kwargs) def get_script(self, lang, script_id, **kwargs): return self.client.get_script(lang, script_id, **kwargs) def delete_script(self, lang, script_id, **kwargs): return self.client.delete_script(lang, script_id, **kwargs) def put_template(self, template_id, body, **kwargs): return self.client.put_template(template_id, body, **kwargs) def get_template(self, template_id, body=None, **kwargs): return self.client.get_template(template_id, body, **kwargs) def delete_template(self, template_id=None, **kwargs): return self.client.delete_template(template_id, **kwargs) # Bulk methods # ============ # The underlying client does not require index names, but it can be used. # As it makes sense to not give an index, developers are free to use these # as they want, as long as they are careful. def mget(self, body, index=None, doc_type=None, **kwargs): return self.client.mget(body, index, doc_type, **kwargs) def bulk(self, body, index=None, doc_type=None, **kwargs): return self.client.bulk(body, index, doc_type, **kwargs) def msearch(self, body, index=None, doc_type=None, **kwargs): return self.client.msearch(body, index, doc_type, **kwargs) def mpercolate(self, body, index=None, doc_type=None, **kwargs): return self.client.mpercolate(body, index, doc_type, **kwargs) # Scroll methods # ============== # The underlying client does not require an index to perform scroll. def scroll(self, scroll_id, **kwargs): return self.client.scroll(scroll_id, **kwargs) def clear_scroll(self, scroll_id, body=None, **kwargs): return self.client.clear_scroll(scroll_id, body, **kwargs) # Query methods # ============= # The underlying client requires index names (or alias names) to perform # queries. The connection wrapper overrides these client methods to # automatically uses the configured names (indices and/or aliases). def create(self, doc_type, body, doc_id=None, **kwargs): return self.client.create( self.indices, doc_type, body, doc_id, **kwargs) def index(self, doc_type, body, doc_id=None, **kwargs): return self.client.index( self.indices, doc_type, body, doc_id, **kwargs) def exists(self, doc_id, doc_type='_all', **kwargs): return self.client.exists(self.indices, doc_id, doc_type, **kwargs) def get(self, doc_id, doc_type='_all', **kwargs): return self.client.get(self.indices, doc_id, doc_type, **kwargs) def get_source(self, doc_id, doc_type='_all', **kwargs): return self.client.get_source(self.indices, doc_id, doc_type, **kwargs) def update(self, doc_type, doc_id, body=None, **kwargs): return self.client.update( self.indices, doc_type, doc_id, body, **kwargs) def search(self, doc_type=None, body=None, **kwargs): return self.client.search(self.indices, doc_type, body, **kwargs) def search_shards(self, doc_type=None, **kwargs): return self.client.search_shards(self.indices, doc_type, **kwargs) def search_template(self, doc_type=None, body=None, **kwargs): return self.client.search_template( self.indices, doc_type, body, **kwargs) def explain(self, doc_type, doc_id, body=None, **kwargs): return self.client.explain( self.indices, doc_type, doc_id, body, **kwargs) def delete(self, doc_type, doc_id, **kwargs): return self.client.delete(self.indices, doc_type, doc_id, **kwargs) def count(self, doc_type=None, body=None, **kwargs): return self.client.count(self.indices, doc_type, body, **kwargs) def delete_by_query(self, doc_type=None, body=None, **kwargs): return self.client.delete_by_query( self.indices, doc_type, body, **kwargs) def suggest(self, body, **kwargs): return self.client.suggest(body, self.indices, **kwargs) def percolate(self, doc_type, doc_id=None, body=None, **kwargs): return self.client.percolate( self.indices, doc_type, doc_id, body, **kwargs) def count_percolate(self, doc_type, doc_id=None, body=None, **kwargs): return self.client.count_percolate( self.indices, doc_type, doc_id, body, **kwargs) def mlt(self, doc_type, doc_id, body=None, **kwargs): return self.client.mlt(self.indices, doc_type, doc_id, body, **kwargs) def termvector(self, doc_type, doc_id, body=None, **kwargs): return self.client.termvector( self.indices, doc_type, doc_id, body, **kwargs) def mtermvectors(self, doc_type=None, body=None, **kwargs): return self.client.mtermvectors(self.indices, doc_type, body, **kwargs) def benchmark(self, doc_type=None, body=None, **kwargs): return self.client.benchmark(self.indices, doc_type, body, **kwargs) def abort_benchmark(self, name=None, **kwargs): return self.client.abort_benchmark(name, **kwargs) def list_benchmarks(self, doc_type=None, **kwargs): return self.client.list_benchmarks(self.indices, doc_type, **kwargs)
class TestESTermAggregationWeightProvider(TestCase): def setUp(self): super(TestESTermAggregationWeightProvider, self).setUp() self.es = Elasticsearch(hosts=['localhost:%d' % es_runner.es_state.port]) self.ic = IndicesClient(self.es) self.index = 'es_term_weight_provider_test' self.doc_type = 'test-doc' self.field = 'text' if self.ic.exists(self.index): self.ic.delete(self.index) self.ic.create(self.index) self.es.create(self.index, self.doc_type, {self.field: 'foo'}) self.es.create(self.index, self.doc_type, {self.field: 'knark'}) self.es.create(self.index, self.doc_type, {self.field: 'ba'}) self.es.create(self.index, self.doc_type, {self.field: 'knirk'}) self.es.create(self.index, self.doc_type, {self.field: 'ba'}) self.es.create(self.index, self.doc_type, {self.field: 'ba'}) self.es.create(self.index, self.doc_type, {self.field: 'knark '}) self.es.create(self.index, self.doc_type, {self.field: 'ba'}, refresh=True) def tearDown(self): super(TestESTermAggregationWeightProvider, self).tearDown() self.ic.delete(self.index) def test_getitem_single(self): provider = ESTermAggregationWeightProvider(self.es, self.index, self.doc_type, self.field, inverse=False, sublinear=False) term, w = provider['ba'] self.assertEqual('ba', term) self.assertAlmostEqual(.5, w) term, w = provider['knark'] self.assertEqual('knark', term) self.assertAlmostEqual(.25, w) term, w = provider['knirk'] self.assertEqual('knirk', term) self.assertAlmostEqual(.125, w) term, w = provider['foo'] self.assertEqual('foo', term) self.assertAlmostEqual(.125, w) def test_inverse(self): provider = ESTermAggregationWeightProvider(self.es, self.index, self.doc_type, self.field, inverse=True, sublinear=False) term, w = provider['ba'] self.assertEqual('ba', term) self.assertAlmostEqual(2., w) term, w = provider['knark'] self.assertEqual('knark', term) self.assertAlmostEqual(4., w) term, w = provider['knirk'] self.assertEqual('knirk', term) self.assertAlmostEqual(8., w) term, w = provider['foo'] self.assertEqual('foo', term) self.assertAlmostEqual(8., w) def test_sublinear(self): provider = ESTermAggregationWeightProvider(self.es, self.index, self.doc_type, self.field, inverse=False, sublinear=True) term, w = provider['ba'] self.assertEqual('ba', term) self.assertAlmostEqual(-0.693147, w, places=4) term, w = provider['knark'] self.assertEqual('knark', term) self.assertAlmostEqual(-1.386294, w, places=4) term, w = provider['knirk'] self.assertEqual('knirk', term) self.assertAlmostEqual(-2.079442, w, places=4) term, w = provider['foo'] self.assertEqual('foo', term) self.assertAlmostEqual(-2.079442, w, places=4) def test_inverse_sublinear(self): provider = ESTermAggregationWeightProvider(self.es, self.index, self.doc_type, self.field, inverse=True, sublinear=True) term, w = provider['ba'] self.assertEqual('ba', term) self.assertAlmostEqual(0.693147, w, places=4) term, w = provider['knark'] self.assertEqual('knark', term) self.assertAlmostEqual(1.386294, w, places=4) term, w = provider['knirk'] self.assertEqual('knirk', term) self.assertAlmostEqual(2.079442, w, places=4) term, w = provider['foo'] self.assertEqual('foo', term) self.assertAlmostEqual(2.079442, w, places=4) def test_getitem_multiple(self): provider = ESTermAggregationWeightProvider(self.es, self.index, self.doc_type, self.field, inverse=False, sublinear=False) weights = dict(provider[['ba', 'foo', 'knark', 'knirk']]) self.assertEqual(['ba', 'foo', 'knark', 'knirk'], sorted(weights.keys())) self.assertAlmostEqual(weights['ba'], .5) self.assertAlmostEqual(weights['knark'], .25) self.assertAlmostEqual(weights['knirk'], .125) self.assertAlmostEqual(weights['foo'], .125) weights = dict(provider['ba', 'foo', 'knark', 'knirk']) self.assertEqual(['ba', 'foo', 'knark', 'knirk'], sorted(weights.keys())) self.assertAlmostEqual(weights['ba'], .5) self.assertAlmostEqual(weights['knark'], .25) self.assertAlmostEqual(weights['knirk'], .125) self.assertAlmostEqual(weights['foo'], .125) def test_getitem_missing(self): provider = ESTermAggregationWeightProvider(self.es, self.index, self.doc_type, self.field, inverse=False, sublinear=False) self.assertRaises(KeyError, lambda: provider['notfound']) self.assertRaises(KeyError, lambda: provider['ba', 'notfound']) provider = ESTermAggregationWeightProvider(self.es, self.index, self.doc_type, self.field, inverse=False, sublinear=False, missing='ignore') self.assertIsNone(provider['notfound']) self.assertEqual([('ba', .5)], list(provider['ba', 'notfound']))
def main(in_args=None): parser = argparse.ArgumentParser() parser.add_argument("--host", help="Elasticsearch host") parser.add_argument("--port", type=int, help="Elasticsearch port") parser.add_argument("--url-prefix", help="Elasticsearch URL prefix") parser.add_argument("--no-auth", action="store_const", const=True, help="Suppress prompt for basic auth") parser.add_argument("--ssl", action="store_true", default=None, help="Use SSL") parser.add_argument("--no-ssl", dest="ssl", action="store_false", help="Do not use SSL") parser.add_argument("--index", help="Index name to create") parser.add_argument("--old-index", help="Old index name to copy") parser.add_argument("--config", help="Config file name") args = parser.parse_args(in_args) if args.config: filename = args.config elif os.path.isfile("../config.yaml"): filename = "../config.yaml" elif os.path.isfile("config.yaml"): filename = "config.yaml" else: filename = "" username = None password = None use_ssl = None url_prefix = None http_auth = None if filename: with open(filename) as config_file: data = yaml.load(config_file) host = data.get("es_host") port = data.get("es_port") username = data.get("es_username") password = data.get("es_password") url_prefix = data.get("es_url_prefix", "") use_ssl = data.get("use_ssl") else: host = args.host if args.host else raw_input("Enter elasticsearch host: ") port = args.port if args.port else int(raw_input("Enter elasticsearch port: ")) use_ssl = args.ssl if args.ssl is not None else raw_input("Use SSL? t/f: ").lower() in ("t", "true") if args.no_auth is None: username = raw_input("Enter optional basic-auth username: "******"Enter optional basic-auth password: "******"Enter optional Elasticsearch URL prefix: ") ) if username and password: http_auth = username + ":" + password es = Elasticsearch(host=host, port=port, use_ssl=use_ssl, http_auth=http_auth, url_prefix=url_prefix) silence_mapping = { "silence": { "properties": { "rule_name": {"index": "not_analyzed", "type": "string"}, "until": {"type": "date", "format": "dateOptionalTime"}, } } } ess_mapping = { "elastalert_status": { "properties": { "rule_name": {"index": "not_analyzed", "type": "string"}, "@timestamp": {"format": "dateOptionalTime", "type": "date"}, } } } es_mapping = { "elastalert": { "properties": { "rule_name": {"index": "not_analyzed", "type": "string"}, "match_body": {"enabled": False, "type": "object"}, "aggregate_id": {"index": "not_analyzed", "type": "string"}, } } } error_mapping = {"elastalert_error": {"properties": {"data": {"type": "object", "enabled": False}}}} index = args.index if args.index is not None else raw_input("New index name? (Default elastalert_status) ") if not index: index = "elastalert_status" res = None if args.old_index: print("Downloading existing data...") res = es.search(index=args.old_index, body={}, size=500000) print("Got %s documents" % (len(res["hits"]["hits"]))) es.indices.create(index) es.indices.put_mapping(index=index, doc_type="elastalert", body=es_mapping) es.indices.put_mapping(index=index, doc_type="elastalert_status", body=ess_mapping) es.indices.put_mapping(index=index, doc_type="silence", body=silence_mapping) es.indices.put_mapping(index=index, doc_type="elastalert_error", body=error_mapping) print("New index %s created" % (index)) if res: bulk = "".join( [ "%s\n%s\n" % (json.dumps({"create": {"_type": doc["_type"], "_index": index}}), json.dumps(doc["_source"])) for doc in res["hits"]["hits"] ] ) print("Uploading data...") es.bulk(body=bulk, index=index) print("Done!")
def reindex(from_hosts, from_index, to_hosts, to_index, to_type, source='{"query":{"match_all":{}}}', max_docs=0, page_size=10, logging_per_docs=1000, es_scroll='5m', request_timeout=60): if from_index is None: logger.warn('from_index is empty.') return from_es = Elasticsearch(hosts=from_hosts) to_es = Elasticsearch(hosts=to_hosts) scroll_id = None counter = 0 running = True bulk_data = [] while(running): try: if scroll_id is None: response = from_es.search(index=from_index, body=source, params={"request_timeout": request_timeout, "scroll": es_scroll, "size": page_size}) else: response = from_es.scroll(scroll_id=scroll_id, params={"request_timeout": request_timeout, "scroll": es_scroll}) if len(response['hits']['hits']) == 0: running = False break scroll_id = response['_scroll_id'] for hit in response['hits']['hits']: if '_source' in hit: counter += 1 if counter % logging_per_docs == 0: logger.info(u'Loaded {0} docs.'.format(counter)) if max_docs > 0 and counter >= max_docs: logger.info(u'{0} docs are loaded, but it exceeded {1} docs.'.format(counter, max_docs)) running = False break op_index = to_index if to_index is not None else hit['_index'] op_type = to_type if to_type is not None else hit['_type'] bulk_data.append({"index": {"_index": op_index, "_type": op_type, "_id": hit['_id']} }) bulk_data.append(hit['_source']) if len(bulk_data) != 0: to_es.bulk(body=bulk_data, params={"request_timeout": request_timeout}) bulk_data = [] except NotFoundError: break except: logger.exception(u"Failed to load documents from Elasticsearch(Loaded {0} doc).".format(counter)) break if len(bulk_data) != 0: to_es.bulk(body=bulk_data, params={"request_timeout": request_timeout}) logger.info('Loaded {0} documents.'.format(counter))