Exemplo n.º 1
0
def load_es_template(apps, schema_editor):
    es = Elasticsearch(hosts=[settings.ES_URL], verify_certs=False)
    es.put_template(id="climate_data_template", body=json.dumps({
        "template": "climate_data",
        "mappings": {
            "*": {
                "properties": {
                    "measurement": {
                        "type": "double"
                    },
                    "tmax": {
                        "type": "double"
                    },
                    "tmin": {
                        "type": "double"
                    },
                    "tmean": {
                        "type": "double"
                    },
                    "tdev": {
                        "type": "double"
                    },
                    "rainfall": {
                        "type": "double"
                    },
                    "sunshine": {
                        "type": "double"
                    },
                    "region": {
                        "type": "keyword"
                    }
                }
            }
        }
    }))
Exemplo n.º 2
0
def main():
    es_host = raw_input("Elasticsearch host: ")
    es_port = raw_input("Elasticsearch port: ")
    db_name = raw_input("Dashboard name: ")
    send_get_body_as = raw_input(
        "Method for querying Elasticsearch[GET]: ") or 'GET'
    es = Elasticsearch(host=es_host,
                       port=es_port,
                       send_get_body_as=send_get_body_as)
    query = {'query': {'term': {'_id': db_name}}}
    res = es.search(index='kibana-int',
                    doc_type='dashboard',
                    body=query,
                    _source_include=['dashboard'])
    if not res['hits']['hits']:
        print("No dashboard %s found" % (db_name))
        exit()

    db = json.loads(res['hits']['hits'][0]['_source']['dashboard'])
    config_filters = filters_from_dashboard(db)

    print("\nPartial Config file")
    print("-----------\n")
    print("name: %s" % (db_name))
    print("es_host: %s" % (es_host))
    print("es_port: %s" % (es_port))
    print("filter:")
    print(yaml.safe_dump(config_filters))
Exemplo n.º 3
0
    def configure_client(self):
        """Instantiate and configure the ElasticSearch client.

        It simply takes the given HOSTS list and uses PARAMS as the keyword
        arguments of the ElasticSearch class.

        The client's transport_class is given by the class attribute
        ``transport_class``, and the connection class used by the transport
        class is given by the class attribute ``connection_class``.

        An ``ImproperlyConfigured`` exception is raised if any of these
        elements is undefined.
        """
        hosts = self.server['HOSTS']
        params = self.server['PARAMS']

        if not self.transport_class:
            raise ImproperlyConfigured(
                'Djangoes backend %r is not properly configured: '
                'no transport class provided' % self.__class__)

        if not self.connection_class:
            raise ImproperlyConfigured(
                'Djangoes backend %r is not properly configured: '
                'no connection class provided' % self.__class__)

        #pylint: disable=star-args
        self.client = Elasticsearch(hosts,
                                    transport_class=self.transport_class,
                                    connection_class=self.connection_class,
                                    **params)
Exemplo n.º 4
0
    def __init__(self, args):
        self.parse_args(args)
        self.conf = load_rules(self.args.config, use_rule=self.args.rule)
        self.max_query_size = self.conf['max_query_size']
        self.rules = self.conf['rules']
        self.debug = self.args.debug
        self.verbose = self.args.verbose
        self.writeback_index = self.conf['writeback_index']
        self.es_host = self.conf['es_host']
        self.es_port = self.conf['es_port']
        self.run_every = self.conf['run_every']
        self.alert_time_limit = self.conf['alert_time_limit']
        self.old_query_limit = self.conf['old_query_limit']
        self.alerts_sent = 0
        self.num_hits = 0
        self.current_es = None
        self.current_es_addr = None
        self.buffer_time = self.conf['buffer_time']
        self.silence_cache = {}
        self.rule_hashes = get_rule_hashes(self.conf)

        self.writeback_es = Elasticsearch(host=self.es_host, port=self.es_port)

        if self.debug:
            self.verbose = True

        if self.verbose:
            logging.getLogger().setLevel(logging.INFO)

        for rule in self.rules:
            rule = self.init_rule(rule)

        if self.args.silence:
            self.silence()
Exemplo n.º 5
0
    def get_all_terms(self, args):
        """ Performs a terms aggregation for each field to get every existing term. """
        self.es = Elasticsearch(host=self.rules['es_host'], port=self.rules['es_port'], timeout=self.rules.get('es_conn_timeout', 50))
        window_size = datetime.timedelta(**self.rules.get('terms_window_size', {'days': 30}))
        field_name = {"field": "", "size": 2147483647}  # Integer.MAX_VALUE
        query_template = {"aggs": {"values": {"terms": field_name}}}
        if args and args.start:
            end = ts_to_dt(args.start)
        else:
            end = ts_now()
        start = end - window_size
        if self.rules.get('use_strftime_index'):
            index = format_index(self.rules['index'], start, end)
        else:
            index = self.rules['index']
        time_filter = {self.rules['timestamp_field']: {'lte': dt_to_ts(end), 'gte': dt_to_ts(start)}}
        query_template['filter'] = {'bool': {'must': [{'range': time_filter}]}}
        query = {'aggs': {'filtered': query_template}}

        for field in self.fields:
            # For composite keys, we will need to perform sub-aggregations
            if type(field) == list:
                level = query_template['aggs']
                # Iterate on each part of the composite key and add a sub aggs clause to the elastic search query
                for i, sub_field in enumerate(field):
                    level['values']['terms']['field'] = sub_field
                    if i < len(field) - 1:
                        # If we have more fields after the current one, then set up the next nested structure
                        level['values']['aggs'] = {'values': {'terms': copy.deepcopy(field_name)}}
                        level = level['values']['aggs']
            else:
                # For non-composite keys, only a single agg is needed
                field_name['field'] = field
            res = self.es.search(body=query, index=index, ignore_unavailable=True, timeout='50s')
            if 'aggregations' in res:
                buckets = res['aggregations']['filtered']['values']['buckets']
                if type(field) == list:
                    # For composite keys, make the lookup based on all fields
                    # Make it a tuple since it can be hashed and used in dictionary lookups
                    self.seen_values[tuple(field)] = []
                    for bucket in buckets:
                        # We need to walk down the hierarchy and obtain the value at each level
                        self.seen_values[tuple(field)] += self.flatten_aggregation_hierarchy(bucket)
                    # If we don't have any results, it could either be because of the absence of any baseline data
                    # OR it may be because the composite key contained a non-primitive type.  Either way, give the
                    # end-users a heads up to help them debug what might be going on.
                    if not self.seen_values[tuple(field)]:
                        elastalert_logger.warning((
                            'No results were found from all sub-aggregations.  This can either indicate that there is '
                            'no baseline data OR that a non-primitive field was used in a composite key.'
                        ))
                else:
                    keys = [bucket['key'] for bucket in buckets]
                    self.seen_values[field] = keys
                    elastalert_logger.info('Found %s unique values for %s' % (len(keys), field))
            else:
                self.seen_values[field] = []
                elastalert_logger.info('Found no values for %s' % (field))
Exemplo n.º 6
0
    def __init__(self, index_name, index_type, ip="127.0.0.1"):
        '''
        @param index_name: 索引名称
        @param index_type: 索引类型
        '''
        self.index_name = index_name
        self.index_type = index_type

        self.es = Elasticsearch([ip])
Exemplo n.º 7
0
def search_fuzzy(request=None, project_id=None):
    project_id = project_id if project_id \
        else json.loads(request.session['project_id'])

    index_name = elastic_cache_key(project_id, 'ec2')
    ebs_index_name = elastic_cache_key(project_id, 'ebs')
    elb_index_name = elastic_cache_key(project_id, 'elb')
    eip_index_name = elastic_cache_key(project_id, 'eip')
    vpc_index_name = elastic_cache_key(project_id, 'vpc')
    subnet_index_name = elastic_cache_key(project_id, 'subnet')
    security_group_index_name = elastic_cache_key(project_id, 'security_group')

    st = request.GET.get('st', None)
    client = Elasticsearch(hosts=settings.ELASTIC_SEARCH_NODES)

    query = {
        "query": {
            "query_string": {
                "fields": ["title"],
                "query": "*" + st + "*",
            }
        },
    }

    total = client.search(index=[
        index_name, ebs_index_name, elb_index_name, eip_index_name,
        vpc_index_name, subnet_index_name, security_group_index_name
    ],
                          doc_type=[
                              "instance_id", "name_title", "prip_title",
                              "puip_title", "ebs", "eip", "elb", "vpc",
                              "subnet", "security_group_id",
                              "security_group_name"
                          ],
                          body=query,
                          ignore_unavailable=True)['hits']['total']

    # Get Total search result and set size parameter equal to that, to get all results
    # ToDo Discuss and Optimize
    query['size'] = total

    search_results = client.search(index=[
        index_name, ebs_index_name, elb_index_name, eip_index_name,
        vpc_index_name, subnet_index_name, security_group_index_name
    ],
                                   doc_type=[
                                       "instance_id", "name_title",
                                       "prip_title", "puip_title", "ebs",
                                       "eip", "elb", "vpc", "subnet",
                                       "security_group_id",
                                       "security_group_name"
                                   ],
                                   body=query,
                                   ignore_unavailable=True)
    return search_results
Exemplo n.º 8
0
def count(flt):
    '''
    Given a filter, count return the number of users fitting that filter
    Examples:
    flt : {'gender':'male'}
    return value : int representing the number of users with 'male' as gender
    '''
    req = init_query()
    write_filters_in_request(req, flt)
    es = Elasticsearch(ES_NODES)
    res = es.count(index=RIOT_USERS_INDEX, body=req)['count']
    return res
Exemplo n.º 9
0
 def handle_error(self, message, data=None):
     ''' Logs message at error level and writes message, data and traceback to Elasticsearch. '''
     if not self.writeback_es:
         self.writeback_es = Elasticsearch(host=self.es_host,
                                           port=self.es_port)
     logging.error(message)
     body = {'message': message}
     tb = traceback.format_exc()
     body['traceback'] = tb.strip().split('\n')
     if data:
         body['data'] = data
     self.writeback('elastalert_error', body)
Exemplo n.º 10
0
def store_vacancy_record(es: Elasticsearch, index_name: str, record: dict,
                         parent_id: str) -> str:
    hash_string = ''
    for k, v in record.items():
        hash_string += "{}{}".format(k, v)
    hash_string += parent_id
    hash_object = hashlib.md5(hash_string.encode())
    es.index(index=index_name,
             doc_type='vacancies',
             id=hash_object.hexdigest(),
             body=record,
             parent=parent_id)
    return hash_string
Exemplo n.º 11
0
    def configure_client(self):
        """Instantiate and configure the ElasticSearch client.

        It simply takes the given HOSTS list and uses PARAMS as the keyword
        arguments of the ElasticSearch class.

        The client's transport_class is given by the class attribute
        ``transport_class``, and the connection class used by the transport
        class is given by the class attribute ``connection_class``.

        An ``ImproperlyConfigured`` exception is raised if any of these
        elements is undefined.
        """
        hosts = self.server['HOSTS']
        params = self.server['PARAMS']

        if not self.transport_class:
            raise ImproperlyConfigured(
                'Djangoes backend %r is not properly configured: '
                'no transport class provided' % self.__class__)

        if not self.connection_class:
            raise ImproperlyConfigured(
                'Djangoes backend %r is not properly configured: '
                'no connection class provided' % self.__class__)

        #pylint: disable=star-args
        self.client = Elasticsearch(hosts,
                                    transport_class=self.transport_class,
                                    connection_class=self.connection_class,
                                    **params)
Exemplo n.º 12
0
    def __init__(self,
                 urls=None,
                 timeout=None,
                 force_new=False,
                 raw_results=False,
                 **kwargs):
        '''
        Creates a new ElasticSearch DSL object. Grabs the ElasticSearch connection from the pool
        if it has already been initialized. Otherwise, creates a new one.

        If no parameters are passed, everything is determined from the Django settings.

        :param urls: A list of URLs, or a single string of URL (without leading `http://`), or None to read from settings.
        :param idx: A list of indices or a single string representing an index_name name. Is optional. Will be merged with `idx_alias`.
        :param idx_alias: A list of index_name aliases or a single string representing an index_name alias, as defined in the settings. Will be merged with `index_name`.
        :param timeout: Timeout used in the connection.
        :param force_new: Set to `True` to force a new elasticsearch connection. Otherwise will aggressively use any connection with the exact same settings.
        :param **kwargs: Additional settings to pass to the low level elasticsearch client and to elasticsearch-sal-py.search.Search.
        '''

        Bungiesearch.__load_settings__()

        urls = urls or Bungiesearch.BUNGIE['URLS']
        if not timeout:
            timeout = Bungiesearch.BUNGIE.get('TIMEOUT',
                                              Bungiesearch.DEFAULT_TIMEOUT)

        search_keys = ['using', 'index', 'doc_type', 'extra']
        search_settings, es_settings = {}, {}
        for k, v in iteritems(kwargs):
            if k in search_keys:
                search_settings[k] = v
            else:
                es_settings[k] = v

        if not es_settings:
            # If there aren't any provided elasticsearch settings, let's see if it's defined in the settings.
            es_settings = Bungiesearch.BUNGIE.get('ES_SETTINGS', {})

        # Building a caching key to cache the es_instance for later use (and retrieved a previously cached es_instance).
        cache_key = Bungiesearch._build_key(urls, timeout, **es_settings)
        es_instance = None
        if not force_new:
            if cache_key in Bungiesearch._cached_es_instances:
                es_instance = Bungiesearch._cached_es_instances[cache_key]

        if not es_instance:
            es_instance = Elasticsearch(urls, timeout=timeout, **es_settings)
            Bungiesearch._cached_es_instances[cache_key] = es_instance

        if 'using' not in search_settings:
            search_settings['using'] = es_instance

        super(Bungiesearch, self).__init__(**search_settings)

        # Creating instance attributes.
        self._only = [
        ]  # Stores the exact fields to fetch from the database when mapping.
        self.results = []  # Store the mapped and unmapped results.
        self._raw_results_only = raw_results
Exemplo n.º 13
0
def connect():
    global _es
    if _es is None:
        server = config.get('elasticsearch_host', 'localhost') + ':9200'
        auth = config.get('elastic_search_basic_auth', None)
        _es = Elasticsearch(server, **{'http_auth': a for a in (auth, ) if a})
    return _es
Exemplo n.º 14
0
def get_list_of_indexes_to_reindex(full_reindex=False):
    db_names = all_db_names()
    try:
        list_of_indexes_out_of_sync = []
        total_submissions = 0
        for database_name in db_names:
            dbm = get_db_manager(database_name)
            questionnaires = dbm.load_all_rows_in_view('questionnaire')
            if not questionnaires:
                continue
            for row in questionnaires:
                if row['value']['is_registration_model']:
                    continue

                form_model_doc = FormModelDocument.wrap(row["value"])
                if full_reindex or is_mapping_out_of_sync(form_model_doc, dbm):
                    es = Elasticsearch(hosts=[{
                        "host": ELASTIC_SEARCH_HOST,
                        "port": ELASTIC_SEARCH_PORT
                    }])
                    search = Search(using=es,
                                    index=dbm.database_name,
                                    doc_type=form_model_doc.id)
                    no_of_submissions = search.count()
                    questionnaire_info = dict(
                        db_name=database_name,
                        questionnaire_id=form_model_doc.id,
                        name=form_model_doc.name,
                        no_of_submissions=no_of_submissions)
                    total_submissions += no_of_submissions
                    list_of_indexes_out_of_sync.append(questionnaire_info)
        return list_of_indexes_out_of_sync, total_submissions
    except Exception as e:
        pass
Exemplo n.º 15
0
    def get_all_terms(self, args):
        """ Performs a terms aggregation for each field to get every existing term. """
        self.es = Elasticsearch(host=self.rules['es_host'], port=self.rules['es_port'])
        window_size = datetime.timedelta(**self.rules.get('terms_window_size', {'days': 30}))
        field_name = {"field": "", "size": 2147483647}  # Integer.MAX_VALUE
        query_template = {"aggs": {"values": {"terms": field_name}}}
        if args and args.start:
            end = ts_to_dt(args.start)
        else:
            end = ts_now()
        start = end - window_size
        if self.rules.get('use_strftime_index'):
            index = format_index(self.rules['index'], start, end)
        else:
            index = self.rules['index']
        time_filter = {self.rules['timestamp_field']: {'lte': dt_to_ts(end), 'gte': dt_to_ts(start)}}
        query_template['filter'] = {'bool': {'must': [{'range': time_filter}]}}
        query = {'aggs': {'filtered': query_template}}

        for field in self.fields:
            field_name['field'] = field
            res = self.es.search(body=query, index=index, ignore_unavailable=True, timeout=50)
            if 'aggregations' in res:
                buckets = res['aggregations']['filtered']['values']['buckets']
                keys = [bucket['key'] for bucket in buckets]
                self.seen_values[field] = keys
                elastalert_logger.info('Found %s unique values for %s' % (len(keys), field))
            else:
                self.seen_values[field] = []
                elastalert_logger.info('Found no values for %s' % (field))
Exemplo n.º 16
0
 def get(self, request):
     database_name = get_database_name(request.user)
     search_text = lower(request.GET["term"] or "")
     es = Elasticsearch(hosts=[{
         "host": ELASTIC_SEARCH_HOST,
         "port": ELASTIC_SEARCH_PORT
     }])
     search = Search(using=es, index=database_name, doc_type="reporter")
     search = search.extra(**{"size": "10"})
     resp = []
     if search_text:
         query_text_escaped = ElasticUtilsHelper().replace_special_chars(
             search_text)
         query_fields = [
             "name", "name_value", "name_exact", "short_code",
             "short_code_exact", "short_code_value"
         ]
         search = search.query("query_string",
                               query=query_text_escaped,
                               fields=query_fields)
         search_results = search.execute()
         resp = [{
             "id": result.short_code,
             "label": self.get_label(result)
         } for result in search_results.hits]
     return HttpResponse(json.dumps(resp))
Exemplo n.º 17
0
    def get_all_terms(self, args):
        """ Performs a terms aggregation for each field to get every existing term. """
        self.es = Elasticsearch(host=self.rules["es_host"], port=self.rules["es_port"])
        window_size = datetime.timedelta(**self.rules.get("terms_window_size", {"days": 30}))
        field_name = {"field": "", "size": 2147483647}  # Integer.MAX_VALUE
        query_template = {"aggs": {"values": {"terms": field_name}}}
        if args and args.start:
            end = ts_to_dt(args.start)
        else:
            end = ts_now()
        start = end - window_size
        if self.rules.get("use_strftime_index"):
            index = format_index(self.rules["index"], start, end)
        else:
            index = self.rules["index"]
        time_filter = {self.rules["timestamp_field"]: {"lte": dt_to_ts(end), "gte": dt_to_ts(start)}}
        query_template["filter"] = {"bool": {"must": [{"range": time_filter}]}}
        query = {"aggs": {"filtered": query_template}}

        for field in self.fields:
            field_name["field"] = field
            res = self.es.search(body=query, index=index, ignore_unavailable=True, timeout=50)
            if "aggregations" in res:
                buckets = res["aggregations"]["filtered"]["values"]["buckets"]
                keys = [bucket["key"] for bucket in buckets]
                self.seen_values[field] = keys
                elastalert_logger.info("Found %s unique values for %s" % (len(keys), field))
            else:
                self.seen_values[field] = []
                elastalert_logger.info("Found no values for %s" % (field))
Exemplo n.º 18
0
	def __init__(self, hostName, postNum):
		self.host = hostName
		self.post = postNum
		if (requests.get('http://' + self.host + ':' + self.post).content):
			# Connect to cluster
			self.es = Elasticsearch([{'host': self.host, 'port': self.post}])
		else:
			print("Please turn on elasticsearch")
Exemplo n.º 19
0
 def get_instance():
     if ESLowLevelClient.__es is None:
         with ESLowLevelClient.__es_lock:
             if ESLowLevelClient.__es is None:
                 ESLowLevelClient.__es = Elasticsearch(['localhost'],
                                                       port=9200,
                                                       maxsize=25)
     return ESLowLevelClient.__es
Exemplo n.º 20
0
    def get_all_terms(self, args):
        """ Performs a terms aggregation for each field to get every existing term. """
        self.es = Elasticsearch(host=self.rules['es_host'], port=self.rules['es_port'], timeout=self.rules.get('es_conn_timeout', 50))
        window_size = datetime.timedelta(**self.rules.get('terms_window_size', {'days': 30}))
        field_name = {"field": "", "size": 2147483647}  # Integer.MAX_VALUE
        query_template = {"aggs": {"values": {"terms": field_name}}}
        if args and args.start:
            end = ts_to_dt(args.start)
        else:
            end = ts_now()
        start = end - window_size
        if self.rules.get('use_strftime_index'):
            index = format_index(self.rules['index'], start, end)
        else:
            index = self.rules['index']
        time_filter = {self.rules['timestamp_field']: {'lte': dt_to_ts(end), 'gte': dt_to_ts(start)}}
        query_template['filter'] = {'bool': {'must': [{'range': time_filter}]}}
        query = {'aggs': {'filtered': query_template}}

        for field in self.fields:
            # For composite keys, we will need to perform sub-aggregations
            if type(field) == list:
                level = query_template['aggs']
                # Iterate on each part of the composite key and add a sub aggs clause to the elastic search query
                for i, sub_field in enumerate(field):
                    level['values']['terms']['field'] = sub_field
                    if i < len(field) - 1:
                        # If we have more fields after the current one, then set up the next nested structure
                        level['values']['aggs'] = {'values': {'terms': copy.deepcopy(field_name)}}
                        level = level['values']['aggs']
            else:
                # For non-composite keys, only a single agg is needed
                field_name['field'] = field
            res = self.es.search(body=query, index=index, ignore_unavailable=True, timeout='50s')
            if 'aggregations' in res:
                buckets = res['aggregations']['filtered']['values']['buckets']
                if type(field) == list:
                    # For composite keys, make the lookup based on all fields
                    # Make it a tuple since it can be hashed and used in dictionary lookups
                    self.seen_values[tuple(field)] = []
                    for bucket in buckets:
                        # We need to walk down the hierarchy and obtain the value at each level
                        self.seen_values[tuple(field)] += self.flatten_aggregation_hierarchy(bucket)
                    # If we don't have any results, it could either be because of the absence of any baseline data
                    # OR it may be because the composite key contained a non-primitive type.  Either way, give the
                    # end-users a heads up to help them debug what might be going on.
                    if not self.seen_values[tuple(field)]:
                        elastalert_logger.warning((
                            'No results were found from all sub-aggregations.  This can either indicate that there is '
                            'no baseline data OR that a non-primitive field was used in a composite key.'
                        ))
                else:
                    keys = [bucket['key'] for bucket in buckets]
                    self.seen_values[field] = keys
                    elastalert_logger.info('Found %s unique values for %s' % (len(keys), field))
            else:
                self.seen_values[field] = []
                elastalert_logger.info('Found no values for %s' % (field))
Exemplo n.º 21
0
class ElasticClient:
    def __init__(self, index_name, index_type, ip="127.0.0.1"):
        '''
        @param index_name: 索引名称
        @param index_type: 索引类型
        '''
        self.index_name = index_name
        self.index_type = index_type

        self.es = Elasticsearch([ip])

    def create_index(self, index_name="teacher_resume", index_type="tr_type"):
        #创建索引
        _index_mappings = {
            "mappings": {
                self.index_type: {
                    "properties": {
                        "teachername": {
                            "type": "keyword"
                        },
                        "telephone": {
                            "type": "text"
                        },
                        "email": {
                            "type": "keyword"
                        },
                        "research_direction": {
                            "type": "array"
                        },
                        "personal_profile": {
                            "type": "text"
                        },
                        "teaching_results": {
                            "type": "text"
                        },
                        "research_results": {
                            "type": "text"
                        },
                        "lab_introduction": {
                            "type": "text"
                        },
                    }
                }
            }
        }
        self.es.indices.create(index=self.index_name,
                               body=_index_mappings,
                               ignore=400)

    def load_index(self):
        with open(os.path.join(BASE_DIR, 'static', 'files',
                               'test_json.json')) as f:
            result = json.load(f)
            for item in result:
                res = self.es.index(index=self.index_name,
                                    doc_type=self.index_type,
                                    body=item)
                print(res)
Exemplo n.º 22
0
def _engine():
    ":rtype elasticsearch.Elasticsearch"
    global _elastic
    if (not _elastic):
        _elastic = Elasticsearch([{
            "host": settings.ELASTIC_HOST,
            "port": int(settings.ELASTIC_PORT)
        }])
    return _elastic
Exemplo n.º 23
0
    def get_dashboard(self, rule, db_name):
        """ Download dashboard which matches use_kibana_dashboard from elasticsearch. """
        es = Elasticsearch(host=rule['es_host'], port=rule['es_port'])
        if not db_name:
            raise EAException("use_kibana_dashboard undefined")
        query = {'query': {'term': {'_id': db_name}}}
        try:
            res = es.search(index='kibana-int',
                            doc_type='dashboard',
                            body=query,
                            _source_include=['dashboard'])
        except ElasticsearchException as e:
            raise EAException("Error querying for dashboard: %s" % (e))

        if res['hits']['hits']:
            return json.loads(res['hits']['hits'][0]['_source']['dashboard'])
        else:
            raise EAException("Could not find dashboard named %s" % (db_name))
Exemplo n.º 24
0
class TestReindexer(unittest.TestCase):
    def setUp(self):
        self.source_index = "reindex"
        self.target_index = "reindex-a"
        self.client = Elasticsearch()
        self.reindexer = Reindexer(self.client)
        self.schema_manager = SchemaManager(self.client)

        # try:
        #     read_only_setting = {"index": {"blocks": {"read_only": False}}}
        #     self.client.indices.put_settings(index=self.source_index, body=read_only_setting)
        # except:
        #     pass

        self.client.indices.create(index=self.source_index)

    def tearDown(self):
        for index in [self.source_index, self.target_index]:
            try:
                self.client.indices.delete(index=index)
            except:
                pass

    def test_reindex(self):
        create = []
        for i in ['a', 'b', 'c', 'd', 'e']:
            doc = {
                '_op_type': 'create',
                '_index': self.source_index,
                '_type': 'document',
                'doc': {'name': i}
            }
            create.append(doc)
        bulk(self.client, create, refresh=True)
        docs = self.client.search(index=self.source_index)
        self.assertEqual(len(docs['hits']['hits']), 5)

        self.reindexer.do_reindex(self.source_index, self.target_index, 3)

        self.client.indices.refresh(','.join([self.source_index, self.target_index]))
        docs = self.client.search(index=self.source_index)
        self.assertEqual(len(docs['hits']['hits']), 5)
        docs = self.client.search(index=self.target_index)
        self.assertEqual(len(docs['hits']['hits']), 5)
Exemplo n.º 25
0
def populate_elastic_search(request=None, project_id=None):
    # 1. Create tag from "project_id" + "type" + "tag"
    #2. Get from all region cache, instances.
    #3. Generate index for each project
    #4. List the tag in the respective project index and doc type.
    project_id = project_id if project_id \
        else json.loads(request.session['project_id'])

    index_name = elastic_cache_key(project_id, 'ec2')
    ebs_index_name = elastic_cache_key(project_id, 'ebs')
    elb_index_name = elastic_cache_key(project_id, 'elb')
    eip_index_name = elastic_cache_key(project_id, 'eip')
    vpc_index_name = elastic_cache_key(project_id, 'vpc')
    subnet_index_name = elastic_cache_key(project_id, 'subnet')
    security_group_index_name = elastic_cache_key(project_id, 'security_group')

    client = Elasticsearch(hosts=settings.ELASTIC_SEARCH_NODES)

    try:
        # First try to delete the index for this project if already exists
        client.indices.delete(index=[
            index_name, ebs_index_name, elb_index_name, eip_index_name,
            vpc_index_name, security_group_index_name, subnet_index_name
        ])
    except TransportError as e:
        LOG.error("Error while deleting the index {0} error : "
                  "{1}".format(index_name, e))

    try:
        obj_list = []
        obj_list.extend(
            populate_ec2_indexes(request=request, project_id=project_id))
        obj_list.extend(
            populate_ebs_indexes(request=request, project_id=project_id))
        obj_list.extend(
            populate_elb_indexes(request=request, project_id=project_id))
        obj_list.extend(
            populate_eip_indexes(request=request, project_id=project_id))
        obj_list.extend(
            populate_vpc_indexes(request=request, project_id=project_id))
        obj_list.extend(
            populate_subnet_indexes(request=request, project_id=project_id))
        obj_list.extend(
            populate_security_group_indexes(request=request,
                                            project_id=project_id))

        if obj_list:
            elastic_index_res = helpers.bulk(
                client, obj_list,
                stats_only=True)  # Index elastic search in bulk
            LOG.info("Indexed {0} items Failed {1} items".format(
                elastic_index_res[0], elastic_index_res[1]))

    except Exception as e:
        LOG.error("Error while indexing project {0} error {1}".format(
            project_id, e))
Exemplo n.º 26
0
    def get_all_terms(self, args):
        """ Performs a terms aggregation for each field to get every existing term. """
        self.es = Elasticsearch(host=self.rules['es_host'],
                                port=self.rules['es_port'],
                                use_ssl=self.rule['use_ssl'],
                                timeout=self.rules.get('es_conn_timeout', 50))
        window_size = datetime.timedelta(
            **self.rules.get('terms_window_size', {'days': 30}))
        field_name = {"field": "", "size": 2147483647}  # Integer.MAX_VALUE
        query_template = {"aggs": {"values": {"terms": field_name}}}
        if args and args.start:
            end = ts_to_dt(args.start)
        else:
            end = ts_now()
        start = end - window_size
        if self.rules.get('use_strftime_index'):
            index = format_index(self.rules['index'], start, end)
        else:
            index = self.rules['index']
        time_filter = {
            self.rules['timestamp_field']: {
                'lte': dt_to_ts(end),
                'gte': dt_to_ts(start)
            }
        }
        query_template['filter'] = {'bool': {'must': [{'range': time_filter}]}}
        query = {'aggs': {'filtered': query_template}}

        for field in self.fields:
            field_name['field'] = field
            res = self.es.search(body=query,
                                 index=index,
                                 ignore_unavailable=True,
                                 timeout='50s')
            if 'aggregations' in res:
                buckets = res['aggregations']['filtered']['values']['buckets']
                keys = [bucket['key'] for bucket in buckets]
                self.seen_values[field] = keys
                elastalert_logger.info('Found %s unique values for %s' %
                                       (len(keys), field))
            else:
                self.seen_values[field] = []
                elastalert_logger.info('Found no values for %s' % (field))
Exemplo n.º 27
0
class Mysql2Es():
    config = {
        "db":{
            "host":"192.168.0.196",
            "user":"******",
            "passwd":"xsycommercial123",
            "db":"prism1",
            "charset":"utf8"
            },
        "max_query":"select max(id) from company",
        "query":"select id,name,company_org_type,reg_status from company",
        "index":{
            "host":["http://192.168.0.196:9200","http://192.168.0.197:9200","http://192.168.0.198:9200"],
            "_index":"company0606",
            "_type":"company"
            },    
        "action":"index",
        "_id":"id"
        }


    def __init__(self,start_id=0,max_id=100000,step=10000,id_file=None,config=None):
        if config != None:
            self.config = json.loads(open(config).read())
        #===================================================================
        # connect to mysql
        #===================================================================
        self.db = None 
        try:
            self.db = MySQLdb.connect(**self.config["db"])
        except MySQLdb.Error, e:
            print "Error %d: %s" % (e.args[0], e.args[1])
            sys.exit (1)
        
        #===================================================================
        # query select from table
        #===================================================================
        
        self.cursor = self.db.cursor()   
        #self.cursor.execute(self.config["max_query"])
        self.start_id = start_id
        self.max_id = max_id
        self.step = step
        self.id_file = id_file
        self.limit = 50000
        
        self.action = self.config['action']
        self.metadata = {"_index":self.config["index"]["_index"],"_type":self.config["index"]["_type"]}
        
        self.es = Elasticsearch(self.config["index"]["host"])
        
        self.mutex = threading.Lock()
        self.thread_num = 0
        self.db_data=[]
        self.complete = False
Exemplo n.º 28
0
class SoSoImp(object):
    '''
    classdocs
    '''


    def __init__(self):
        self.es = Elasticsearch(['192.168.2.129', '192.168.2.130'])

        '''
        Constructor
        '''
    
    '''
         添加搜索信息
    '''
    def addSoso(self,Content):
        
        title=""
        if Content.title !=None:
            title = Content.title
            
        txt =""
        if Content.txt != None :
            txt = Content.txt
            
            #获取情感
        source=fenci.mm(title,txt)

         
        body={"title":Content.title,"summary":Content.summary,"context":Content.txt,"site_cls":Content.site_cls,"domaintype":Content.domaintype,
               "countryid":Content.countryid,"province":Content.province,"city":Content.city,"area":Content.area,"url":Content.url,"publictime":Content.pubdate,
               "createtime":Content.created,"sitename":Content.site_name,"domain1":Content.domain_1,"domain2":Content.domain_2,"sentiment":source,
               "subname":Content.subname}
         
        self.es.index(index="yuqing", doc_type="yuqing_type", body=body, id=Content.rowkey)
        
        #es.
         
        
    
        
Exemplo n.º 29
0
    def get_es(self):
        if self.es is None:
            ssl_url = self.es_url.startswith('https')

            if ssl_url:
                # TODO add valid cert in ES setup
                logger.warning('ES does not use cert validation.')

            self.es = Elasticsearch([self.es_url], verify_certs=False)

        return self.es
Exemplo n.º 30
0
    def run_rule(self, rule):
        """ Run a rule including querying and alerting on results.

        :param rule: The rule configuration.
        :return: The number of matches that the rule produced.
        """

        elastalert_logger.info('Start to run rule: %s', rule.get('name'))
        # Run the rule. If querying over a large time period, split it up into segments
        self.num_hits = 0
        rule_request = rule.get("input").get("search").get("request")
        if rule_request.get("elastic_host",
                            None) is not None and rule_request.get(
                                "elastic_port", None) is not None:
            self.current_es = Elasticsearch(
                host=rule.get("input").get("search").get("request").get(
                    "elastic_host"),
                port=rule.get("input").get("search").get("request").get(
                    "elastic_port"))
        else:
            self.current_es = self.new_elasticsearch(self.global_config)

        self.run_query(rule)

        # Process any new matches
        num_matches = len(rule['type'].matches)

        while rule['type'].matches:
            match = rule['type'].matches.pop(0)

            #if self.is_silenced(rule['name'] + key) or self.is_silenced(rule['name']):
            #    elastalert_logger.info('Ignoring match for silenced rule %s%s' % (rule['name'], key))
            #    continue

            if rule.get('realert'):
                next_alert, exponent = self.next_alert_time(
                    rule, rule['name'] + key, ts_now())
                self.set_realert(rule['name'] + key, next_alert, exponent)

            # If no aggregation, alert immediately
            #if not rule['aggregation']:
            #    self.alert([match], rule)
            #    continue
            self.alert([match], rule)

            # Add it as an aggregated match
            #self.add_aggregated_alert(match, rule)

        # Mark this endtime for next run's start
        #rule['previous_endtime'] = endtime

        #time_taken = time.time() - run_start

        return num_matches
Exemplo n.º 31
0
def main():
    es_host = raw_input("Elasticsearch host: ")
    es_port = raw_input("Elasticsearch port: ")
    db_name = raw_input("Dashboard name: ")
    es = Elasticsearch(host=es_host, port=es_port)
    query = {'query': {'term': {'_id': db_name}}}
    res = es.search(index='kibana-int', doc_type='dashboard', body=query, _source_include=['dashboard'])
    if not res['hits']['hits']:
        print("No dashboard %s found" % (db_name))
        exit()

    db = json.loads(res['hits']['hits'][0]['_source']['dashboard'])
    config_filters = filters_from_dashboard(db)

    print("\nPartial Config file")
    print("-----------\n")
    print("name: %s" % (db_name))
    print("es_host: %s" % (es_host))
    print("es_port: %s" % (es_port))
    print("filter:")
    print(yaml.safe_dump(config_filters))
Exemplo n.º 32
0
def main(host, port, index, type, chunk_size, geojson_file):

    def _charge_doc():
        for feature in load_geojson(geojson_file):
            yield {
                '_index': index,
                '_type': type,
                '_source': feature
            }

    es = Elasticsearch(host=host, port=port)
    helpers.bulk(es, _charge_doc(), chunk_size=chunk_size, request_timeout=6000)
Exemplo n.º 33
0
def elasticsearch_client(conf):
    """ returns an Elasticsearch instance configured using an es_conn_config """
    es_conn_conf = build_es_conn_config(conf)

    return Elasticsearch(host=es_conn_conf['es_host'],
                         port=es_conn_conf['es_port'],
                         url_prefix=es_conn_conf['es_url_prefix'],
                         use_ssl=es_conn_conf['use_ssl'],
                         verify_certs=es_conn_conf['verify_certs'],
                         connection_class=RequestsHttpConnection,
                         timeout=es_conn_conf['es_conn_timeout'],
                         send_get_body_as=es_conn_conf['send_get_body_as'])
    def setUp(self):
        super(TestESTermAggregationWeightProvider, self).setUp()

        self.es = Elasticsearch(hosts=['localhost:%d' % es_runner.es_state.port])
        self.ic = IndicesClient(self.es)
        self.index = 'es_term_weight_provider_test'
        self.doc_type = 'test-doc'
        self.field = 'text'

        if self.ic.exists(self.index):
            self.ic.delete(self.index)

        self.ic.create(self.index)
        self.es.create(self.index, self.doc_type, {self.field: 'foo'})
        self.es.create(self.index, self.doc_type, {self.field: 'knark'})
        self.es.create(self.index, self.doc_type, {self.field: 'ba'})
        self.es.create(self.index, self.doc_type, {self.field: 'knirk'})
        self.es.create(self.index, self.doc_type, {self.field: 'ba'})
        self.es.create(self.index, self.doc_type, {self.field: 'ba'})
        self.es.create(self.index, self.doc_type, {self.field: 'knark '})
        self.es.create(self.index, self.doc_type, {self.field: 'ba'}, refresh=True)
Exemplo n.º 35
0
def main():
    parser = ArgumentParser()
    parser.add_argument('-e',
                        '--elasticsearch-server',
                        default='localhost:9200')
    parser.add_argument('-d', '--dataset')
    parser.add_argument('-s', '--sections')
    opts = parser.parse_args()

    es_hosts = [opts.elasticsearch_server]
    dataset_name = opts.dataset
    dataset_sections = opts.sections

    es = Elasticsearch(hosts=es_hosts, timeout=120)

    if dataset_name == 'newsgroups':
        dataset = NewsgroupsDataset()
    elif dataset_name == 'aviskorpus':
        sections = None
        sources = None

        if dataset_sections:
            try:
                sections, sources = dataset_sections.split('-')
                sections = [int(s) for s in sections.split('|')]
                sources = [s for s in sources.split('|')]
            except Exception:
                logging.error('Malformed section specification "%s" ...' %
                              dataset_sections)
                sys.exit(1)

        dataset = AviskorpusDataset(sections=sections, sources=sources)
    elif dataset_name == 'ndt':
        sections = None
        lang = None

        if dataset_sections:
            try:
                sections, lang = dataset_sections.split('-')
                sections = [int(s) for s in sections.split('|')]
                lang = [s for s in lang.split('|')]
            except Exception:
                logging.error('Malformed section specification "%s" ...' %
                              dataset_sections)
                sys.exit(1)

        dataset = NDTDataset(lang=lang, sections=sections)
    else:
        logging.error('Unknown dataset %s ...' % dataset_name)
        sys.exit(1)

    dataset.install(es)
Exemplo n.º 36
0
    def filter(self, qs, value):
        client = Elasticsearch([settings.ELASTICSEARCH_HOST])
        value = value.lower()

        search_query = {
            "bool": {
                "must_not": [  # исключает из выдачи is_published=False
                    {
                        "term": {
                            "is_published": False
                        }
                    }
                ],
                "should": [
                    {
                        "simple_query_string": {
                            "fields": ["category_name"],
                            "quote_field_suffix": ".exact",
                            "query": value
                        }
                    },
                ]
            }
        }

        s = Search(using=client, index='category') \
            .query(search_query)\
            .sort("_score", "-views")\
            .extra(size=self.max_result, from_=0)

        hits_list = []
        items = s.execute()
        if items:
            for item in items:
                hits_list.append(item.meta.id)
            hits_order = Case(
                *[When(pk=pk, then=pos) for pos, pk in enumerate(hits_list)])
            qs = qs.filter(id__in=hits_list).order_by(hits_order)
        else:
            qs = qs.none()

            # TODO: fallback?
            # bits = value.split(' ')
            # search_clauses = reduce(operator.and_,
            #                         [Q(title__icontains=v) for v in bits])
            # unpublished = Category.objects.get_queryset_descendants(
            #     Category.objects.filter(is_published=False), include_self=True)
            # qs = (qs
            #       .exclude(pk__in=unpublished)
            #       .filter(search_clauses)
            #       .order_by('-views'))
        return qs[:self.max_result]
 def _connect(self):
     """
     connect to a member of the ElasticSearch cluster
     """
     try:
         if self.local_env:
             self.es = Elasticsearch([{'host': self.host,
                                       'port': self.port}])
         else:
             self.es = Elasticsearch([{'host': self.host,
                                       'port': self.port}],
                                     sniff_on_start=True,
                                     sniff_on_connection_fail=True,
                                     sniffer_timeout=self.timeout)
         self.idx = IndicesClient(self.es)
         return
     except ConnectionError as e:
         return ElasticSearchError.no_host_available(self.host, self.port)
     except Exception as e:
         (type_e, value, traceback_prev) = exc_info()
         backtrace = extract_tb(traceback_prev)
         return ElasticSearchError.unknown_exception(backtrace, str(e))
Exemplo n.º 38
0
    def setUp(self):
        self.source_index = "reindex"
        self.target_index = "reindex-a"
        self.client = Elasticsearch()
        self.reindexer = Reindexer(self.client)
        self.schema_manager = SchemaManager(self.client)

        # try:
        #     read_only_setting = {"index": {"blocks": {"read_only": False}}}
        #     self.client.indices.put_settings(index=self.source_index, body=read_only_setting)
        # except:
        #     pass

        self.client.indices.create(index=self.source_index)
Exemplo n.º 39
0
    def get_all_terms(self):
        """ Performs a terms aggregation for each field to get every existing term. """
        self.es = Elasticsearch(host=self.rules['es_host'], port=self.rules['es_port'])
        window_size = datetime.timedelta(**self.rules.get('terms_window_size', {'days': 30}))

        field_name = {"field": "", "size": 2147483647}  # Integer.MAX_VALUE
        query_template = {"aggs": {"values": {"terms": field_name}}}
        if self.rules.get('use_strftime_index'):
            end = ts_now()
            start = end - window_size
            index = format_index(self.rules['index'], start, end)
        else:
            index = self.rules['index']

        for field in self.fields:
            field_name['field'] = field
            res = self.es.search(body=query_template, index=index, ignore_unavailable=True, timeout=50)
            buckets = res['aggregations']['values']['buckets']
            keys = [bucket['key'] for bucket in buckets]
            self.seen_values[field] = keys
Exemplo n.º 40
0
class NewTermsRule(RuleType):
    """ Alerts on a new value in a list of fields. """

    def __init__(self, rule, args=None):
        super(NewTermsRule, self).__init__(rule, args)
        self.seen_values = {}
        # Allow the use of query_key or fields
        if 'fields' not in self.rules:
            if 'query_key' not in self.rules:
                raise EAException("fields or query_key must be specified")
            self.fields = self.rules['query_key']
        else:
            self.fields = self.rules['fields']
        if not self.fields:
            raise EAException("fields must not be an empty list")
        if type(self.fields) != list:
            self.fields = [self.fields]
        if self.rules.get('use_terms_query') and (
            len(self.fields) != 1 or len(self.fields) == 1 and type(self.fields[0]) == list
        ):
            raise EAException("use_terms_query can only be used with a single non-composite field")
        try:
            self.get_all_terms(args)
        except Exception as e:
            # Refuse to start if we cannot get existing terms
            raise EAException('Error searching for existing terms: %s' % (repr(e)))

    def get_all_terms(self, args):
        """ Performs a terms aggregation for each field to get every existing term. """
        self.es = Elasticsearch(
            host=self.rules['es_host'],
            port=self.rules['es_port'],
            timeout=self.rules.get('es_conn_timeout', 50),
            send_get_body_as=self.rules.get('send_get_body_as', 'GET')
        )
        window_size = datetime.timedelta(**self.rules.get('terms_window_size', {'days': 30}))
        field_name = {"field": "", "size": 2147483647}  # Integer.MAX_VALUE
        query_template = {"aggs": {"values": {"terms": field_name}}}
        if args and args.start:
            end = ts_to_dt(args.start)
        else:
            end = ts_now()
        start = end - window_size
        step = datetime.timedelta(**self.rules.get('window_step_size', {'days': 1}))

        for field in self.fields:
            tmp_start = start
            tmp_end = min(start + step, end)

            time_filter = {self.rules['timestamp_field']: {'lt': dt_to_ts(tmp_end), 'gte': dt_to_ts(tmp_start)}}
            query_template['filter'] = {'bool': {'must': [{'range': time_filter}]}}
            query = {'aggs': {'filtered': query_template}}
            # For composite keys, we will need to perform sub-aggregations
            if type(field) == list:
                self.seen_values.setdefault(tuple(field), [])
                level = query_template['aggs']
                # Iterate on each part of the composite key and add a sub aggs clause to the elastic search query
                for i, sub_field in enumerate(field):
                    level['values']['terms']['field'] = add_raw_postfix(sub_field)
                    if i < len(field) - 1:
                        # If we have more fields after the current one, then set up the next nested structure
                        level['values']['aggs'] = {'values': {'terms': copy.deepcopy(field_name)}}
                        level = level['values']['aggs']
            else:
                self.seen_values.setdefault(field, [])
                # For non-composite keys, only a single agg is needed
                field_name['field'] = add_raw_postfix(field)

            # Query the entire time range in small chunks
            while tmp_start < end:
                if self.rules.get('use_strftime_index'):
                    index = format_index(self.rules['index'], tmp_start, tmp_end)
                else:
                    index = self.rules['index']
                res = self.es.search(body=query, index=index, ignore_unavailable=True, timeout='50s')
                if 'aggregations' in res:
                    buckets = res['aggregations']['filtered']['values']['buckets']
                    if type(field) == list:
                        # For composite keys, make the lookup based on all fields
                        # Make it a tuple since it can be hashed and used in dictionary lookups
                        for bucket in buckets:
                            # We need to walk down the hierarchy and obtain the value at each level
                            self.seen_values[tuple(field)] += self.flatten_aggregation_hierarchy(bucket)
                    else:
                        keys = [bucket['key'] for bucket in buckets]
                        self.seen_values[field] += keys
                else:
                    self.seen_values.setdefault(field, [])
                if tmp_start == tmp_end:
                    break
                tmp_start = tmp_end
                tmp_end = min(tmp_start + step, end)
                time_filter[self.rules['timestamp_field']] = {'lt': dt_to_ts(tmp_end), 'gte': dt_to_ts(tmp_start)}

            for key, values in self.seen_values.iteritems():
                if not values:
                    if type(key) == tuple:
                        # If we don't have any results, it could either be because of the absence of any baseline data
                        # OR it may be because the composite key contained a non-primitive type.  Either way, give the
                        # end-users a heads up to help them debug what might be going on.
                        elastalert_logger.warning((
                            'No results were found from all sub-aggregations.  This can either indicate that there is '
                            'no baseline data OR that a non-primitive field was used in a composite key.'
                        ))
                    else:
                        elastalert_logger.info('Found no values for %s' % (field))
                    continue
                self.seen_values[key] = list(set(values))
                elastalert_logger.info('Found %s unique values for %s' % (len(values), key))

    def flatten_aggregation_hierarchy(self, root, hierarchy_tuple=()):
        """ For nested aggregations, the results come back in the following format:
            {
            "aggregations" : {
                "filtered" : {
                  "doc_count" : 37,
                  "values" : {
                    "doc_count_error_upper_bound" : 0,
                    "sum_other_doc_count" : 0,
                    "buckets" : [ {
                      "key" : "1.1.1.1", # IP address (root)
                      "doc_count" : 13,
                      "values" : {
                        "doc_count_error_upper_bound" : 0,
                        "sum_other_doc_count" : 0,
                        "buckets" : [ {
                          "key" : "80",    # Port (sub-aggregation)
                          "doc_count" : 3,
                          "values" : {
                            "doc_count_error_upper_bound" : 0,
                            "sum_other_doc_count" : 0,
                            "buckets" : [ {
                              "key" : "ack",  # Reason (sub-aggregation, leaf-node)
                              "doc_count" : 3
                            }, {
                              "key" : "syn",  # Reason (sub-aggregation, leaf-node)
                              "doc_count" : 1
                            } ]
                          }
                        }, {
                          "key" : "82",    # Port (sub-aggregation)
                          "doc_count" : 3,
                          "values" : {
                            "doc_count_error_upper_bound" : 0,
                            "sum_other_doc_count" : 0,
                            "buckets" : [ {
                              "key" : "ack",  # Reason (sub-aggregation, leaf-node)
                              "doc_count" : 3
                            }, {
                              "key" : "syn",  # Reason (sub-aggregation, leaf-node)
                              "doc_count" : 3
                            } ]
                          }
                        } ]
                      }
                    }, {
                      "key" : "2.2.2.2", # IP address (root)
                      "doc_count" : 4,
                      "values" : {
                        "doc_count_error_upper_bound" : 0,
                        "sum_other_doc_count" : 0,
                        "buckets" : [ {
                          "key" : "443",    # Port (sub-aggregation)
                          "doc_count" : 3,
                          "values" : {
                            "doc_count_error_upper_bound" : 0,
                            "sum_other_doc_count" : 0,
                            "buckets" : [ {
                              "key" : "ack",  # Reason (sub-aggregation, leaf-node)
                              "doc_count" : 3
                            }, {
                              "key" : "syn",  # Reason (sub-aggregation, leaf-node)
                              "doc_count" : 3
                            } ]
                          }
                        } ]
                      }
                    } ]
                  }
                }
              }
            }

            Each level will either have more values and buckets, or it will be a leaf node
            We'll ultimately return a flattened list with the hierarchies appended as strings,
            e.g the above snippet would yield a list with:

            [
             ('1.1.1.1', '80', 'ack'),
             ('1.1.1.1', '80', 'syn'),
             ('1.1.1.1', '82', 'ack'),
             ('1.1.1.1', '82', 'syn'),
             ('2.2.2.2', '443', 'ack'),
             ('2.2.2.2', '443', 'syn')
            ]

            A similar formatting will be performed in the add_data method and used as the basis for comparison

        """
        results = []
        # There are more aggregation hierarchies left.  Traverse them.
        if 'values' in root:
            results += self.flatten_aggregation_hierarchy(root['values']['buckets'], hierarchy_tuple + (root['key'],))
        else:
            # We've gotten to a sub-aggregation, which may have further sub-aggregations
            # See if we need to traverse further
            for node in root:
                if 'values' in node:
                    results += self.flatten_aggregation_hierarchy(node, hierarchy_tuple)
                else:
                    results.append(hierarchy_tuple + (node['key'],))
        return results

    def add_data(self, data):
        for document in data:
            for field in self.fields:
                value = ()
                lookup_field = field
                if type(field) == list:
                    # For composite keys, make the lookup based on all fields
                    # Make it a tuple since it can be hashed and used in dictionary lookups
                    lookup_field = tuple(field)
                    for sub_field in field:
                        lookup_result = lookup_es_key(document, sub_field)
                        if not lookup_result:
                            value = None
                            break
                        value += (lookup_result,)
                else:
                    value = lookup_es_key(document, field)
                if not value and self.rules.get('alert_on_missing_field'):
                    document['missing_field'] = lookup_field
                    self.add_match(copy.deepcopy(document))
                elif value:
                    if value not in self.seen_values[lookup_field]:
                        document['new_field'] = lookup_field
                        self.add_match(copy.deepcopy(document))
                        self.seen_values[lookup_field].append(value)

    def add_terms_data(self, terms):
        # With terms query, len(self.fields) is always 1 and the 0'th entry is always a string
        field = self.fields[0]
        for timestamp, buckets in terms.iteritems():
            for bucket in buckets:
                if bucket['doc_count']:
                    if bucket['key'] not in self.seen_values[field]:
                        match = {field: bucket['key'],
                                 self.rules['timestamp_field']: timestamp,
                                 'new_field': field}
                        self.add_match(match)
                        self.seen_values[field].append(bucket['key'])
Exemplo n.º 41
0
def main():
    if os.path.isfile('../config.yaml'):
        filename = '../config.yaml'
    elif os.path.isfile('config.yaml'):
        filename = 'config.yaml'
    else:
        filename = ''

    username = None
    password = None
    use_ssl = None
    http_auth = None

    if filename:
        with open(filename) as config_file:
            data = yaml.load(config_file)
        host = data.get('es_host')
        port = data.get('es_port')
        username = data.get('es_username')
        password = data.get('es_password')
        use_ssl = data.get('use_ssl')
    else:
        host = raw_input("Enter elasticsearch host: ")
        port = int(raw_input("Enter elasticsearch port: "))
        while use_ssl is None:
            resp = raw_input("Use SSL? t/f: ").lower()
            use_ssl = True if resp in ('t', 'true') else (False if resp in ('f', 'false') else None)
        username = raw_input("Enter optional basic-auth username: "******"Enter optional basic-auth password: "******"Downloading existing data...")
        res = es.search(index=old_index, body={}, size=500000)
        print("Got %s documents" % (len(res['hits']['hits'])))

    es.indices.create(index)
    es.indices.put_mapping(index=index, doc_type='elastalert', body=es_mapping)
    es.indices.put_mapping(index=index, doc_type='elastalert_status', body=ess_mapping)
    es.indices.put_mapping(index=index, doc_type='silence', body=silence_mapping)
    es.indices.put_mapping(index=index, doc_type='elastalert_error', body=error_mapping)
    print("New index %s created" % (index))

    if res:
        bulk = ''.join(['%s\n%s\n' % (json.dumps({'create': {'_type': doc['_type'], '_index': index}}),
                                      json.dumps(doc['_source'])) for doc in res['hits']['hits']])
        print("Uploading data...")
        es.bulk(body=bulk, index=index)

    print("Done!")
Exemplo n.º 42
0
 def setUp(self):
     self.client = Elasticsearch()
     self.manager = SchemaManager(self.client, schema_index=self.test_schema_index)
Exemplo n.º 43
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--host', help='Elasticsearch host')
    parser.add_argument('--port', type=int, help='Elasticsearch port')
    parser.add_argument('--url-prefix', help='Elasticsearch URL prefix')
    parser.add_argument('--no-auth', action='store_const', const=True, help='Suppress prompt for basic auth')
    parser.add_argument('--ssl', action='store_true', default=None, help='Use SSL')
    parser.add_argument('--no-ssl', dest='ssl', action='store_false', help='Do not use SSL')
    parser.add_argument('--index', help='Index name to create')
    parser.add_argument('--old-index', help='Old index name to copy')
    args = parser.parse_args()

    if os.path.isfile('../config.yaml'):
        filename = '../config.yaml'
    elif os.path.isfile('config.yaml'):
        filename = 'config.yaml'
    else:
        filename = ''

    username = None
    password = None
    use_ssl = None
    url_prefix = None
    http_auth = None

    if filename:
        with open(filename) as config_file:
            data = yaml.load(config_file)
        host = data.get('es_host')
        port = data.get('es_port')
        username = data.get('es_username')
        password = data.get('es_password')
        url_prefix = data.get('es_url_prefix', '')
        use_ssl = data.get('use_ssl')
    else:
        host = args.host if args.host else raw_input('Enter elasticsearch host: ')
        port = args.port if args.port else int(raw_input('Enter elasticsearch port: '))
        use_ssl = (args.ssl if args.ssl is not None
                   else raw_input('Use SSL? t/f: ').lower() in ('t', 'true'))
        if args.no_auth is None:
            username = raw_input('Enter optional basic-auth username: '******'Enter optional basic-auth password: '******'Enter optional Elasticsearch URL prefix: '))

    if username and password:
        http_auth = username + ':' + password

    es = Elasticsearch(host=host, port=port, use_ssl=use_ssl, http_auth=http_auth, url_prefix=url_prefix)

    silence_mapping = {'silence': {'properties': {'rule_name': {'index': 'not_analyzed', 'type': 'string'},
                                                  'until': {'type': 'date', 'format': 'dateOptionalTime'},
                                                  '@timestamp': {'format': 'dateOptionalTime', 'type': 'date'}}}}
    ess_mapping = {'elastalert_status': {'properties': {'rule_name': {'index': 'not_analyzed', 'type': 'string'},
                                                        '@timestamp': {'format': 'dateOptionalTime', 'type': 'date'}}}}
    es_mapping = {'elastalert': {'properties': {'rule_name': {'index': 'not_analyzed', 'type': 'string'},
                                                '@timestamp': {'format': 'dateOptionalTime', 'type': 'date'},
                                                'match_body': {'enabled': False, 'type': 'object'},
                                                'aggregate_id': {'index': 'not_analyzed', 'type': 'string'}}}}
    past_mapping = {'past_elastalert': {'properties': {'rule_name': {'index': 'not_analyzed', 'type': 'string'},
                                                       'match_body': {'enabled': False, 'type': 'object'},
                                                       '@timestamp': {'format': 'dateOptionalTime', 'type': 'date'},
                                                       'aggregate_id': {'index': 'not_analyzed', 'type': 'string'}}}}
    error_mapping = {'elastalert_error': {'properties': {'data': {'type': 'object', 'enabled': False},
                                                         '@timestamp': {'format': 'dateOptionalTime', 'type': 'date'}}}}

    index = args.index if args.index is not None else raw_input('New index name? (Default elastalert_status) ')
    if not index:
        index = 'elastalert_status'

    old_index = (args.old_index if args.old_index is not None
                 else raw_input('Name of existing index to copy? (Default None) '))

    res = None
    if old_index:
        print('Downloading existing data...')
        res = es.search(index=old_index, body={}, size=500000)
        print('Got %s documents' % (len(res['hits']['hits'])))

    es.indices.create(index)
    # To avoid a race condition. TODO: replace this with a real check
    time.sleep(2)
    es.indices.put_mapping(index=index, doc_type='elastalert', body=es_mapping)
    es.indices.put_mapping(index=index, doc_type='elastalert_status', body=ess_mapping)
    es.indices.put_mapping(index=index, doc_type='silence', body=silence_mapping)
    es.indices.put_mapping(index=index, doc_type='elastalert_error', body=error_mapping)
    es.indices.put_mapping(index=index, doc_type='past_elastalert', body=past_mapping)
    print('New index %s created' % (index))

    if res:
        bulk = ''.join(['%s\n%s\n' % (json.dumps({'create': {'_type': doc['_type'], '_index': index}}),
                                      json.dumps(doc['_source'])) for doc in res['hits']['hits']])
        print('Uploading data...')
        es.bulk(body=bulk, index=index)

    print('Done!')
Exemplo n.º 44
0
class TestSchemaManager(unittest.TestCase):
    test_schema_index = 'test_pseudonym'

    def setUp(self):
        self.client = Elasticsearch()
        self.manager = SchemaManager(self.client, schema_index=self.test_schema_index)

    def tearDown(self):
        try:
            self.client.indices.delete(self.test_schema_index)
        except:
            pass

    def test_schema_compiling(self):
        cfg = {'aliases': [{'name': 'alias1', 'strategy': {'date': {'indexes': {'201401': datetime.date(2014, 1, 1).isoformat()}}}}]}
        self.manager.update(cfg)

        schema = self.client.get(index=self.test_schema_index, id='master')
        self.assertEqual(schema['_version'], 1)
        source = schema.pop('_source')
        schema_doc = json.loads(source.get('schema'))

        self.assertEqual({a['name'] for a in schema_doc['aliases']}, {'alias1'})
        self.assertEqual({i['name'] for i in schema_doc['indexes']}, {'201401'})

        cfg['aliases'][0]['strategy']['date']['indexes']['201402'] = datetime.date(2014, 2, 1).isoformat()
        self.manager.update(cfg)
        schema = self.client.get(index=self.test_schema_index, id='master')
        self.assertEqual(schema['_version'], 2)
        source = schema.pop('_source')
        schema_doc = json.loads(source.get('schema'))

        self.assertEqual({a['name'] for a in schema_doc['aliases']}, {'alias1'})
        self.assertEqual({i['name'] for i in schema_doc['indexes']}, {'201401', '201402'})

        cfg['aliases'].append({'name': 'alias2', 'strategy': {'date': {'indexes': {'201501': datetime.date(2015, 1, 1).isoformat()}}}})

        self.manager.update(cfg)
        schema = self.client.get(index=self.test_schema_index, id='master')
        self.assertEqual(schema['_version'], 3)
        source = schema.pop('_source')
        schema_doc = json.loads(source.get('schema'))

        self.assertEqual({a['name'] for a in schema_doc['aliases']}, {'alias1', 'alias2'})
        self.assertEqual({i['name'] for i in schema_doc['indexes']}, {'201401', '201402', '201501'})

    def test_add_index(self):
        cfg = {'aliases': [{'name': 'alias1', 'strategy': {'date': {'indexes': {'201401': datetime.date(2014, 1, 1).isoformat()}}}}]}
        self.manager.update(cfg)
        self.manager.add_index('alias1', '201402', datetime.date(2014, 1, 2).isoformat())
        schema = self.client.get(index=self.test_schema_index, id='master')
        source = schema.pop('_source')
        schema_doc = json.loads(source.get('schema'))

        for alias in schema_doc['aliases']:
            if alias['name'] == 'alias1':
                break
        self.assertIn('201402', alias['indexes'])
        self.assertIn('201402', [i['name'] for i in schema_doc['indexes']])

    def test_remove_index(self):
        cfg = {'aliases': [{'name': 'alias1', 'strategy': {'date': {'indexes': {'201501': datetime.date(2015, 1, 1).isoformat(), '201401': datetime.date(2014, 1, 1).isoformat()}}}}]}
        self.manager.update(cfg)
        self.manager.remove_index('201401')
        schema = self.client.get(index=self.test_schema_index, id='master')['_source']
        schema_doc = json.loads(schema.get('schema'))

        self.assertEqual(len(schema_doc['indexes']), 1)
        self.assertEqual(schema_doc['indexes'][0]['name'], '201501')
        self.assertEqual(len(schema_doc['aliases']), 1)
        self.assertEqual(schema_doc['aliases'][0]['indexes'], ['201501'])

    def test_reindex_cutover(self):
        source_index = "reindex_2017_01"
        # Add both indexes to aliases before cutover
        target_index = '%s-a' % source_index
        alias1 = 'cutover1'

        cfg = {'aliases': [{'name': alias1, 'strategy': {'date': {'indexes': {source_index: datetime.date(2017, 1, 1).isoformat()}}}}]}
        self.manager.update(cfg)

        _, schema = self.manager.get_current_schema(True)
        self.assertEquals(schema['aliases'][0]['name'], alias1)

        source_routing = None
        for index in schema['indexes']:
            if index['name'] == source_index:
                source_routing = index.get('routing')

        self.manager.reindex_cutover(source_index)

        _, schema = self.manager.get_current_schema(True)
        aliases = [alias for alias in schema['aliases'] if alias['name'] is alias1]
        for alias in aliases:
            self.assertTrue(target_index in alias['indexes'])
            self.assertTrue(source_index not in alias['indexes'])

        target_routing = None
        for index in schema['indexes']:
            if index['name'] == target_index:
                target_routing = index.get('routing')
        self.assertIsNotNone(target_routing)
        self.assertEquals(source_routing, target_routing)

    def test_get_target_index(self):
        source_name = 'assets_2017_01'
        target = self.manager._get_target_index(source_name)
        self.assertEquals(target, 'assets_2017_01-a')

        target = self.manager._get_target_index(target)
        self.assertEquals(target, 'assets_2017_01-b')
Exemplo n.º 45
0
class NewTermsRule(RuleType):
    """ Alerts on a new value in a list of fields. """

    def __init__(self, *args):
        super(NewTermsRule, self).__init__(*args)
        self.seen_values = {}
        # Allow the use of query_key or fields
        if 'fields' not in self.rules:
            if 'query_key' not in self.rules:
                raise EAException("fields or query_key must be specified")
            self.fields = self.rules['query_key']
        else:
            self.fields = self.rules['fields']
        if not self.fields:
            raise EAException("fields must not be an empty list")
        if type(self.fields) != list:
            self.fields = [self.fields]
        if self.rules.get('use_terms_query') and len(self.fields) != 1:
            raise EAException("use_terms_query can only be used with one field at a time")
        self.get_all_terms()

    def get_all_terms(self):
        """ Performs a terms aggregation for each field to get every existing term. """
        self.es = Elasticsearch(host=self.rules['es_host'], port=self.rules['es_port'])
        window_size = datetime.timedelta(**self.rules.get('terms_window_size', {'days': 30}))

        field_name = {"field": "", "size": 2147483647}  # Integer.MAX_VALUE
        query_template = {"aggs": {"values": {"terms": field_name}}}
        if self.rules.get('use_strftime_index'):
            end = ts_now()
            start = end - window_size
            index = format_index(self.rules['index'], start, end)
        else:
            index = self.rules['index']

        for field in self.fields:
            field_name['field'] = field
            res = self.es.search(body=query_template, index=index, ignore_unavailable=True, timeout=50)
            buckets = res['aggregations']['values']['buckets']
            keys = [bucket['key'] for bucket in buckets]
            self.seen_values[field] = keys

    def add_data(self, data):
        for document in data:
            for field in self.fields:
                value = document.get(field)
                if not value and self.rules.get('alert_on_missing_field'):
                    document['missing_field'] = field
                    self.add_match(document)
                elif value:
                    if value not in self.seen_values[field]:
                        document['new_field'] = field
                        self.add_match(document)
                        self.seen_values[field].append(value)

    def add_terms_data(self, terms):
        # With terms query, len(self.fields) is always 1
        field = self.fields[0]
        for timestamp, buckets in terms.iteritems():
            for bucket in buckets:
                if bucket['doc_count']:
                    if bucket['key'] not in self.seen_values[field]:
                        match = {field: bucket['key'],
                                 self.rules['timestamp_field']: timestamp,
                                 'new_field': field}
                        self.add_match(match)
Exemplo n.º 46
0
class NewTermsRule(RuleType):
    """ Alerts on a new value in a list of fields. """

    def __init__(self, rule, args=None):
        super(NewTermsRule, self).__init__(rule, args)
        self.seen_values = {}
        # Allow the use of query_key or fields
        if 'fields' not in self.rules:
            if 'query_key' not in self.rules:
                raise EAException("fields or query_key must be specified")
            self.fields = self.rules['query_key']
        else:
            self.fields = self.rules['fields']
        if not self.fields:
            raise EAException("fields must not be an empty list")
        if type(self.fields) != list:
            self.fields = [self.fields]
        if self.rules.get('use_terms_query') and len(self.fields) != 1:
            raise EAException("use_terms_query can only be used with one field at a time")
        try:
            self.get_all_terms(args)
        except Exception as e:
            # Refuse to start if we cannot get existing terms
            raise EAException('Error searching for existing terms: %s' % (e))

    def get_all_terms(self, args):
        """ Performs a terms aggregation for each field to get every existing term. """
        self.es = Elasticsearch(host=self.rules['es_host'], port=self.rules['es_port'])
        window_size = datetime.timedelta(**self.rules.get('terms_window_size', {'days': 30}))
        field_name = {"field": "", "size": 2147483647}  # Integer.MAX_VALUE
        query_template = {"aggs": {"values": {"terms": field_name}}}
        if args and args.start:
            end = ts_to_dt(args.start)
        else:
            end = ts_now()
        start = end - window_size
        if self.rules.get('use_strftime_index'):
            index = format_index(self.rules['index'], start, end)
        else:
            index = self.rules['index']
        time_filter = {self.rules['timestamp_field']: {'lte': dt_to_ts(end), 'gte': dt_to_ts(start)}}
        query_template['filter'] = {'bool': {'must': [{'range': time_filter}]}}
        query = {'aggs': {'filtered': query_template}}

        for field in self.fields:
            field_name['field'] = field
            res = self.es.search(body=query, index=index, ignore_unavailable=True, timeout=50)
            if 'aggregations' in res:
                buckets = res['aggregations']['filtered']['values']['buckets']
                keys = [bucket['key'] for bucket in buckets]
                self.seen_values[field] = keys
                elastalert_logger.info('Found %s unique values for %s' % (len(keys), field))
            else:
                self.seen_values[field] = []
                elastalert_logger.info('Found no values for %s' % (field))

    def add_data(self, data):
        for document in data:
            for field in self.fields:
                value = document.get(field)
                if not value and self.rules.get('alert_on_missing_field'):
                    document['missing_field'] = field
                    self.add_match(document)
                elif value:
                    if value not in self.seen_values[field]:
                        document['new_field'] = field
                        self.add_match(document)
                        self.seen_values[field].append(value)

    def add_terms_data(self, terms):
        # With terms query, len(self.fields) is always 1
        field = self.fields[0]
        for timestamp, buckets in terms.iteritems():
            for bucket in buckets:
                if bucket['doc_count']:
                    if bucket['key'] not in self.seen_values[field]:
                        match = {field: bucket['key'],
                                 self.rules['timestamp_field']: timestamp,
                                 'new_field': field}
                        self.add_match(match)
Exemplo n.º 47
0
class BaseElasticsearchBackend(Base):
    """Base connection wrapper based on the ElasticSearch official library.

    It uses two entry points to configure the underlying connection:

    * ``transport_class``: the transport class from ``elasticsearch``. By
      default ``elasticsearch.transport.Transport``.
    * ``connection_class``: the connection class used by the transport class.
      It's undefined by default, as it is on the subclasses to provide one.

    If any of these elements is not defined, an ``ImproperlyConfigured`` error
    will be raised when the backend will try to configure the client.
    """
    #: ElasticSearch transport class used by the client class to perform
    #: requests.
    transport_class = Transport
    #: ElasticSearch connection class used by the transport class to perform
    #: requests.
    connection_class = None

    def configure_client(self):
        """Instantiate and configure the ElasticSearch client.

        It simply takes the given HOSTS list and uses PARAMS as the keyword
        arguments of the ElasticSearch class.

        The client's transport_class is given by the class attribute
        ``transport_class``, and the connection class used by the transport
        class is given by the class attribute ``connection_class``.

        An ``ImproperlyConfigured`` exception is raised if any of these
        elements is undefined.
        """
        hosts = self.server['HOSTS']
        params = self.server['PARAMS']

        if not self.transport_class:
            raise ImproperlyConfigured(
                'Djangoes backend %r is not properly configured: '
                'no transport class provided' % self.__class__)

        if not self.connection_class:
            raise ImproperlyConfigured(
                'Djangoes backend %r is not properly configured: '
                'no connection class provided' % self.__class__)

        #pylint: disable=star-args
        self.client = Elasticsearch(hosts,
                                    transport_class=self.transport_class,
                                    connection_class=self.connection_class,
                                    **params)

    # Server methods
    # ==============
    # The underlying client does not require index names to perform server
    # related queries, such as "ping" or "info". The connection wrapper act
    # for them as a proxy.

    def ping(self, **kwargs):
        return self.client.ping(**kwargs)

    def info(self, **kwargs):
        return self.client.info(**kwargs)

    def put_script(self, lang, script_id, body, **kwargs):
        return self.client.put_script(lang, script_id, body, **kwargs)

    def get_script(self, lang, script_id, **kwargs):
        return self.client.get_script(lang, script_id, **kwargs)

    def delete_script(self, lang, script_id, **kwargs):
        return self.client.delete_script(lang, script_id, **kwargs)

    def put_template(self, template_id, body, **kwargs):
        return self.client.put_template(template_id, body, **kwargs)

    def get_template(self, template_id, body=None, **kwargs):
        return self.client.get_template(template_id, body, **kwargs)

    def delete_template(self, template_id=None, **kwargs):
        return self.client.delete_template(template_id, **kwargs)

    # Bulk methods
    # ============
    # The underlying client does not require index names, but it can be used.
    # As it makes sense to not give an index, developers are free to use these
    # as they want, as long as they are careful.

    def mget(self, body, index=None, doc_type=None, **kwargs):
        return self.client.mget(body, index, doc_type, **kwargs)

    def bulk(self, body, index=None, doc_type=None, **kwargs):
        return self.client.bulk(body, index, doc_type, **kwargs)

    def msearch(self, body, index=None, doc_type=None, **kwargs):
        return self.client.msearch(body, index, doc_type, **kwargs)

    def mpercolate(self, body, index=None, doc_type=None, **kwargs):
        return self.client.mpercolate(body, index, doc_type, **kwargs)

    # Scroll methods
    # ==============
    # The underlying client does not require an index to perform scroll.

    def scroll(self, scroll_id, **kwargs):
        return self.client.scroll(scroll_id, **kwargs)

    def clear_scroll(self, scroll_id, body=None, **kwargs):
        return self.client.clear_scroll(scroll_id, body, **kwargs)

    # Query methods
    # =============
    # The underlying client requires index names (or alias names) to perform
    # queries. The connection wrapper overrides these client methods to
    # automatically uses the configured names (indices and/or aliases).

    def create(self, doc_type, body, doc_id=None, **kwargs):
        return self.client.create(
            self.indices, doc_type, body, doc_id, **kwargs)

    def index(self, doc_type, body, doc_id=None, **kwargs):
        return self.client.index(
            self.indices, doc_type, body, doc_id, **kwargs)

    def exists(self, doc_id, doc_type='_all', **kwargs):
        return self.client.exists(self.indices, doc_id, doc_type, **kwargs)

    def get(self, doc_id, doc_type='_all', **kwargs):
        return self.client.get(self.indices, doc_id, doc_type, **kwargs)

    def get_source(self, doc_id, doc_type='_all', **kwargs):
        return self.client.get_source(self.indices, doc_id, doc_type, **kwargs)

    def update(self, doc_type, doc_id, body=None, **kwargs):
        return self.client.update(
            self.indices, doc_type, doc_id, body, **kwargs)

    def search(self, doc_type=None, body=None, **kwargs):
        return self.client.search(self.indices, doc_type, body, **kwargs)

    def search_shards(self, doc_type=None, **kwargs):
        return self.client.search_shards(self.indices, doc_type, **kwargs)

    def search_template(self, doc_type=None, body=None, **kwargs):
        return self.client.search_template(
            self.indices, doc_type, body, **kwargs)

    def explain(self, doc_type, doc_id, body=None, **kwargs):
        return self.client.explain(
            self.indices, doc_type, doc_id, body, **kwargs)

    def delete(self, doc_type, doc_id, **kwargs):
        return self.client.delete(self.indices, doc_type, doc_id, **kwargs)

    def count(self, doc_type=None, body=None, **kwargs):
        return self.client.count(self.indices, doc_type, body, **kwargs)

    def delete_by_query(self, doc_type=None, body=None, **kwargs):
        return self.client.delete_by_query(
            self.indices, doc_type, body, **kwargs)

    def suggest(self, body, **kwargs):
        return self.client.suggest(body, self.indices, **kwargs)

    def percolate(self, doc_type, doc_id=None, body=None, **kwargs):
        return self.client.percolate(
            self.indices, doc_type, doc_id, body, **kwargs)

    def count_percolate(self, doc_type, doc_id=None, body=None, **kwargs):
        return self.client.count_percolate(
            self.indices, doc_type, doc_id, body, **kwargs)

    def mlt(self, doc_type, doc_id, body=None, **kwargs):
        return self.client.mlt(self.indices, doc_type, doc_id, body, **kwargs)

    def termvector(self, doc_type, doc_id, body=None, **kwargs):
        return self.client.termvector(
            self.indices, doc_type, doc_id, body, **kwargs)

    def mtermvectors(self, doc_type=None, body=None, **kwargs):
        return self.client.mtermvectors(self.indices, doc_type, body, **kwargs)

    def benchmark(self, doc_type=None, body=None, **kwargs):
        return self.client.benchmark(self.indices, doc_type, body, **kwargs)

    def abort_benchmark(self, name=None, **kwargs):
        return self.client.abort_benchmark(name, **kwargs)

    def list_benchmarks(self, doc_type=None, **kwargs):
        return self.client.list_benchmarks(self.indices, doc_type, **kwargs)
class TestESTermAggregationWeightProvider(TestCase):

    def setUp(self):
        super(TestESTermAggregationWeightProvider, self).setUp()

        self.es = Elasticsearch(hosts=['localhost:%d' % es_runner.es_state.port])
        self.ic = IndicesClient(self.es)
        self.index = 'es_term_weight_provider_test'
        self.doc_type = 'test-doc'
        self.field = 'text'

        if self.ic.exists(self.index):
            self.ic.delete(self.index)

        self.ic.create(self.index)
        self.es.create(self.index, self.doc_type, {self.field: 'foo'})
        self.es.create(self.index, self.doc_type, {self.field: 'knark'})
        self.es.create(self.index, self.doc_type, {self.field: 'ba'})
        self.es.create(self.index, self.doc_type, {self.field: 'knirk'})
        self.es.create(self.index, self.doc_type, {self.field: 'ba'})
        self.es.create(self.index, self.doc_type, {self.field: 'ba'})
        self.es.create(self.index, self.doc_type, {self.field: 'knark '})
        self.es.create(self.index, self.doc_type, {self.field: 'ba'}, refresh=True)

    def tearDown(self):
        super(TestESTermAggregationWeightProvider, self).tearDown()

        self.ic.delete(self.index)


    def test_getitem_single(self):
        provider = ESTermAggregationWeightProvider(self.es, self.index, self.doc_type, self.field,
                                        inverse=False, sublinear=False)

        term, w = provider['ba']
        self.assertEqual('ba', term)
        self.assertAlmostEqual(.5, w)
        term, w = provider['knark']
        self.assertEqual('knark', term)
        self.assertAlmostEqual(.25, w)
        term, w = provider['knirk']
        self.assertEqual('knirk', term)
        self.assertAlmostEqual(.125, w)
        term, w = provider['foo']
        self.assertEqual('foo', term)
        self.assertAlmostEqual(.125, w)

    def test_inverse(self):
        provider = ESTermAggregationWeightProvider(self.es, self.index, self.doc_type, self.field,
                                        inverse=True, sublinear=False)
        term, w = provider['ba']
        self.assertEqual('ba', term)
        self.assertAlmostEqual(2., w)
        term, w = provider['knark']
        self.assertEqual('knark', term)
        self.assertAlmostEqual(4., w)
        term, w = provider['knirk']
        self.assertEqual('knirk', term)
        self.assertAlmostEqual(8., w)
        term, w = provider['foo']
        self.assertEqual('foo', term)
        self.assertAlmostEqual(8., w)

    def test_sublinear(self):
        provider = ESTermAggregationWeightProvider(self.es, self.index, self.doc_type, self.field,
                                        inverse=False, sublinear=True)
        term, w = provider['ba']
        self.assertEqual('ba', term)
        self.assertAlmostEqual(-0.693147, w, places=4)
        term, w = provider['knark']
        self.assertEqual('knark', term)
        self.assertAlmostEqual(-1.386294, w, places=4)
        term, w = provider['knirk']
        self.assertEqual('knirk', term)
        self.assertAlmostEqual(-2.079442, w, places=4)
        term, w = provider['foo']
        self.assertEqual('foo', term)
        self.assertAlmostEqual(-2.079442, w, places=4)

    def test_inverse_sublinear(self):
        provider = ESTermAggregationWeightProvider(self.es, self.index, self.doc_type, self.field,
                                        inverse=True, sublinear=True)
        term, w = provider['ba']
        self.assertEqual('ba', term)
        self.assertAlmostEqual(0.693147, w, places=4)
        term, w = provider['knark']
        self.assertEqual('knark', term)
        self.assertAlmostEqual(1.386294, w, places=4)
        term, w = provider['knirk']
        self.assertEqual('knirk', term)
        self.assertAlmostEqual(2.079442, w, places=4)
        term, w = provider['foo']
        self.assertEqual('foo', term)
        self.assertAlmostEqual(2.079442, w, places=4)

    def test_getitem_multiple(self):
        provider = ESTermAggregationWeightProvider(self.es, self.index, self.doc_type, self.field,
                                        inverse=False, sublinear=False)

        weights = dict(provider[['ba', 'foo', 'knark', 'knirk']])
        self.assertEqual(['ba', 'foo', 'knark', 'knirk'], sorted(weights.keys()))
        self.assertAlmostEqual(weights['ba'], .5)
        self.assertAlmostEqual(weights['knark'], .25)
        self.assertAlmostEqual(weights['knirk'], .125)
        self.assertAlmostEqual(weights['foo'], .125)

        weights = dict(provider['ba', 'foo', 'knark', 'knirk'])
        self.assertEqual(['ba', 'foo', 'knark', 'knirk'], sorted(weights.keys()))
        self.assertAlmostEqual(weights['ba'], .5)
        self.assertAlmostEqual(weights['knark'], .25)
        self.assertAlmostEqual(weights['knirk'], .125)
        self.assertAlmostEqual(weights['foo'], .125)

    def test_getitem_missing(self):
        provider = ESTermAggregationWeightProvider(self.es, self.index, self.doc_type, self.field,
                                        inverse=False, sublinear=False)

        self.assertRaises(KeyError, lambda: provider['notfound'])
        self.assertRaises(KeyError, lambda: provider['ba', 'notfound'])

        provider = ESTermAggregationWeightProvider(self.es, self.index, self.doc_type, self.field,
                                        inverse=False, sublinear=False, missing='ignore')

        self.assertIsNone(provider['notfound'])
        self.assertEqual([('ba', .5)], list(provider['ba', 'notfound']))
Exemplo n.º 49
0
def main(in_args=None):
    parser = argparse.ArgumentParser()
    parser.add_argument("--host", help="Elasticsearch host")
    parser.add_argument("--port", type=int, help="Elasticsearch port")
    parser.add_argument("--url-prefix", help="Elasticsearch URL prefix")
    parser.add_argument("--no-auth", action="store_const", const=True, help="Suppress prompt for basic auth")
    parser.add_argument("--ssl", action="store_true", default=None, help="Use SSL")
    parser.add_argument("--no-ssl", dest="ssl", action="store_false", help="Do not use SSL")
    parser.add_argument("--index", help="Index name to create")
    parser.add_argument("--old-index", help="Old index name to copy")
    parser.add_argument("--config", help="Config file name")

    args = parser.parse_args(in_args)

    if args.config:
        filename = args.config
    elif os.path.isfile("../config.yaml"):
        filename = "../config.yaml"
    elif os.path.isfile("config.yaml"):
        filename = "config.yaml"
    else:
        filename = ""

    username = None
    password = None
    use_ssl = None
    url_prefix = None
    http_auth = None

    if filename:
        with open(filename) as config_file:
            data = yaml.load(config_file)
        host = data.get("es_host")
        port = data.get("es_port")
        username = data.get("es_username")
        password = data.get("es_password")
        url_prefix = data.get("es_url_prefix", "")
        use_ssl = data.get("use_ssl")
    else:
        host = args.host if args.host else raw_input("Enter elasticsearch host: ")
        port = args.port if args.port else int(raw_input("Enter elasticsearch port: "))
        use_ssl = args.ssl if args.ssl is not None else raw_input("Use SSL? t/f: ").lower() in ("t", "true")
        if args.no_auth is None:
            username = raw_input("Enter optional basic-auth username: "******"Enter optional basic-auth password: "******"Enter optional Elasticsearch URL prefix: ")
        )

    if username and password:
        http_auth = username + ":" + password

    es = Elasticsearch(host=host, port=port, use_ssl=use_ssl, http_auth=http_auth, url_prefix=url_prefix)

    silence_mapping = {
        "silence": {
            "properties": {
                "rule_name": {"index": "not_analyzed", "type": "string"},
                "until": {"type": "date", "format": "dateOptionalTime"},
            }
        }
    }
    ess_mapping = {
        "elastalert_status": {
            "properties": {
                "rule_name": {"index": "not_analyzed", "type": "string"},
                "@timestamp": {"format": "dateOptionalTime", "type": "date"},
            }
        }
    }
    es_mapping = {
        "elastalert": {
            "properties": {
                "rule_name": {"index": "not_analyzed", "type": "string"},
                "match_body": {"enabled": False, "type": "object"},
                "aggregate_id": {"index": "not_analyzed", "type": "string"},
            }
        }
    }
    error_mapping = {"elastalert_error": {"properties": {"data": {"type": "object", "enabled": False}}}}

    index = args.index if args.index is not None else raw_input("New index name? (Default elastalert_status) ")
    if not index:
        index = "elastalert_status"

    res = None
    if args.old_index:
        print("Downloading existing data...")
        res = es.search(index=args.old_index, body={}, size=500000)
        print("Got %s documents" % (len(res["hits"]["hits"])))

    es.indices.create(index)
    es.indices.put_mapping(index=index, doc_type="elastalert", body=es_mapping)
    es.indices.put_mapping(index=index, doc_type="elastalert_status", body=ess_mapping)
    es.indices.put_mapping(index=index, doc_type="silence", body=silence_mapping)
    es.indices.put_mapping(index=index, doc_type="elastalert_error", body=error_mapping)
    print("New index %s created" % (index))

    if res:
        bulk = "".join(
            [
                "%s\n%s\n"
                % (json.dumps({"create": {"_type": doc["_type"], "_index": index}}), json.dumps(doc["_source"]))
                for doc in res["hits"]["hits"]
            ]
        )
        print("Uploading data...")
        es.bulk(body=bulk, index=index)

    print("Done!")
Exemplo n.º 50
0
def reindex(from_hosts,
            from_index,
            to_hosts,
            to_index,
            to_type,
            source='{"query":{"match_all":{}}}',
            max_docs=0,
            page_size=10,
            logging_per_docs=1000,
            es_scroll='5m',
            request_timeout=60):

    if from_index is None:
        logger.warn('from_index is empty.')
        return

    from_es = Elasticsearch(hosts=from_hosts)
    to_es = Elasticsearch(hosts=to_hosts)

    scroll_id = None
    counter = 0
    running = True
    bulk_data = []
    while(running):
        try:
            if scroll_id is None:
                response = from_es.search(index=from_index,
                                          body=source,
                                          params={"request_timeout": request_timeout,
                                                  "scroll": es_scroll,
                                                  "size": page_size})
            else:
                response = from_es.scroll(scroll_id=scroll_id,
                                          params={"request_timeout": request_timeout,
                                                  "scroll": es_scroll})
            if len(response['hits']['hits']) == 0:
                running = False
                break
            scroll_id = response['_scroll_id']
            for hit in response['hits']['hits']:
                if '_source' in hit:
                    counter += 1
                    if counter % logging_per_docs == 0:
                        logger.info(u'Loaded {0} docs.'.format(counter))
                    if max_docs > 0 and counter >= max_docs:
                        logger.info(u'{0} docs are loaded, but it exceeded {1} docs.'.format(counter, max_docs))
                        running = False
                        break
                    op_index = to_index if to_index is not None else hit['_index']
                    op_type = to_type if to_type is not None else hit['_type']
                    bulk_data.append({"index": {"_index": op_index,
                                                "_type": op_type,
                                                "_id": hit['_id']}
                                      })
                    bulk_data.append(hit['_source'])
            if len(bulk_data) != 0:
                to_es.bulk(body=bulk_data, params={"request_timeout": request_timeout})
                bulk_data = []
        except NotFoundError:
            break
        except:
            logger.exception(u"Failed to load documents from Elasticsearch(Loaded {0} doc).".format(counter))
            break

    if len(bulk_data) != 0:
        to_es.bulk(body=bulk_data, params={"request_timeout": request_timeout})

    logger.info('Loaded {0} documents.'.format(counter))