Python Elasticsearch.search примеры, elasticsearch5.Elasticsearch.search Python примеры использования

Пример #1

0

Показать файл

Файл: notify_domain-status.py Проект: xiaochuanjiejie/devops

def find_es(idx_name, domain, status):
    body = {
        "query": {
            "bool": {
                "filter": {
                    "range": {
                        "@timestamp": {
                            "gt": 'now-2m'
                        }
                    }
                },
                "must": [
                    # {"match":{"http_host" : "flight.01zhuanche.com"}},
                    # {"match":{"status":"404"}}
                    {
                        "match": {
                            "http_host": domain
                        }
                    },
                    {
                        "match": {
                            "status": status
                        }
                    }
                ]
            }
        }
    }

    es = Elasticsearch(hosts='http://10.66.5.28:9200', timeout=300)
    Es_Data = es.search(index='%s' % idx_name, body=body)
    print Es_Data
    print Es_Data["hits"]["hits"][0]['_source']['remote_addr']

Пример #2

0

Показать файл

def lambda_handler(event, context):
    # elasticsearch
    host = 'vpc-photos-djta6afabridi6a46k6zz6mq44.us-east-1.es.amazonaws.com'  # For example, my-test-domain.us-east-1.es.amazonaws.com
    service = 'es'
    credentials = boto3.Session().get_credentials()
    awsauth = AWS4Auth(credentials.access_key,
                       credentials.secret_key,
                       'us-east-1',
                       service,
                       session_token=credentials.token)
    es = Elasticsearch(hosts=[{
        'host': host,
        'port': 443
    }],
                       http_auth=awsauth,
                       use_ssl=True,
                       verify_certs=True,
                       connection_class=RequestsHttpConnection)
    res = es.search(index="photos",
                    doc_type="photos",
                    body={"query": {
                        "match_all": {}
                    }})

    print(json.dumps(res["hits"]["hits"], indent=2))

    return {'statusCode': 200, 'body': json.dumps('Hello from Lambda!')}

Пример #3

0

Показать файл

def Get_Error(stime, dtime, idx_name, domain, status):
    # body = {
    #   "size": 10000,
    #   "sort": {
    #     "@timestamp": {
    #       "order": "desc",
    #       "unmapped_type": "boolean"
    #     }
    #   },
    #   "_source": {
    #     "excludes": []
    #   },
    #   "stored_fields": ["*"],
    #   "docvalue_fields": ["@timestamp"],
    #   "query" : {
    #     "constant_score" : {
    #       "filter" : {
    #           "bool": {
    #               # "must": {"exists": {"field": "%s" % field_name}},
    #               # "must_not": {"term": {"%s.keyword" % field_name: ""}},
    #               {"match": {"http_host": domain}},
    #               {"match": {"status": status}},
    #               "must": {"range":{"@timestamp":{"gte":stime,"lte":dtime}}}
    #           }
    #       }
    #     }
    #   }
    # }
    body = {
        "size": 10000,
        "query": {
            "bool": {
                "filter": {
                    "range": {
                        "@timestamp": {
                            "gte": stime,
                            "lte": dtime
                        }
                    }
                },
                "must": [
                    # {"match":{"http_host" : "flight.01zhuanche.com"}},
                    # {"match":{"status":"404"}}
                    {
                        "match_phrase": {
                            "http_host": domain
                        }
                    },
                    {
                        "match": {
                            "status": status
                        }
                    }
                ]
            }
        }
    }
    es = Elasticsearch(hosts='http://10.66.5.28:9200', timeout=300)
    Es_Data = es.search(index='%s' % idx_name, body=body)
    print Es_Data

Пример #4

0

Показать файл

Файл: sq_domain_status-v1.1.py Проект: xiaochuanjiejie/devops

def find_es(stime, dtime, idx_name, domain, status):
    body = {
        "size": 10000,
        "query": {
            "bool": {
                "filter": {
                    "range": {
                        "@timestamp": {
                            "gte": stime,
                            "lte": dtime
                        }
                    }
                },
                "must": [{
                    "match_phrase": {
                        "http_host": domain
                    }
                }, {
                    "match": {
                        "status": status
                    }
                }]
            }
        }
    }
    es = Elasticsearch(hosts='http://10.66.5.28:9200', timeout=300)
    Es_Data = es.search(index='%s' % idx_name, body=body)
    print Es_Data
    print 'Type Es_Data: %s' % type(Es_Data)
    print Es_Data["hits"]["hits"][0]['_source']['remote_addr']
    return Es_Data

Пример #5

0

Показать файл

class ESClient(DBClient):
    @property
    def parameter_definitions(self):
        return query_parameter_mappers

    def setup_connection(self):
        self.store = Elasticsearch(hosts=[self.endpoint], http_auth=(self.user, self.passwd))

    def get_content(self, mapped_params):
        query_body = get_content.build_query_body(**mapped_params)
        es_response = self.query(query_body, index='pips', doc_type='clip', scroll='1m')
        clips = map_hits_to_api_spec(es_response)
        return {'results': clips}

    def get_item(self, mapped_params):
        query_body = get_item.build_query_body(**mapped_params)
        es_response = self.query(query_body, index='pips', doc_type='clip', scroll='1m')
        clips = map_hits_to_api_spec(es_response)
        if len(clips) == 0:
            raise NoResultsFoundError(f'No results for URI: {mapped_params}')
        return clips[0]

    def get_similar(self, mapped_params):
        query_body = get_similar.build_query_body(**mapped_params)
        es_response = self.query(query_body, index='pips', doc_type='clip', scroll='1m')
        clips = map_hits_to_api_spec(es_response)
        return {'results': clips}

    def query(self, query, **params):
        return self.store.search(body=query, **params)

    def close_connection(self):
        # handled by garbage collection
        pass

Пример #6

0

Показать файл

Файл: ElsaticHelper.py Проект: yangfei812/WatchAD-Web

class ElasticHelper(object):
    def __init__(self):
        self.es = Elasticsearch(ElasticConfig.uri)

    def index(self, body, index, doc_type):
        self.es.index(body=body, index=index, doc_type=doc_type)

    def bulk(self, body, index, doc_type):
        self.es.bulk(body=body, index=index, doc_type=doc_type)

    def scan(self, body, index, doc_type):
        return helpers.scan(self.es,
                            query=body,
                            index=index,
                            doc_type=doc_type,
                            preserve_order=True)

    def search(self, body, index, doc_type):
        try:
            rsp = self.es.search(body=body,
                                 index=index,
                                 doc_type=doc_type,
                                 request_timeout=100)
            if rsp.get("errors"):
                print("es search error")
                return
            return rsp
        except Exception as e:
            print("es search error: " + str(e))

    def count(self, body, index, doc_type):
        return self.es.count(index=index,
                             doc_type=doc_type,
                             body=body,
                             request_timeout=100)

    def delete_index(self, index):
        return self.es.indices.delete(index=index)

Пример #7

0

Показать файл

            "excludes": []
        },
        "aggs": {
            "aggdata": {
                "terms": {
                    "field": "myxxid.keyword",
                    "size": 100,
                    "order": {
                        "_count": "desc"
                    }
                }
            }
        }
    }

    resp = es.search(index='logstash-*', body=query)
    aggs['views'] = resp['aggregations']['aggdata']['buckets']

    # get shares
    query = {
        "query": {
            "bool": {
                "must": [{
                    "query_string": {
                        "query": "share",
                        "analyze_wildcard": True
                    }
                }, {
                    "range": {
                        "@timestamp": {
                            "gte": epoch(date),

Пример #8

0

Показать файл

Файл: es02.py Проект: mrok88/es

class es02:
    def __init__(self):
        self.es = Elasticsearch([
            'https://search-el-dev-znz7hdtpcgghjcq4vatwtc3xiu.ap-northeast-2.es.amazonaws.com:443'
        ])
        self.set_service()
        pass

    def load(self, fname="es01.pkl"):
        self.data = pickle.load(open(fname, "rb"))

    def load_datas(self,
                   start_date=date(2017, 12, 1),
                   end_date=date(2018, 1, 9)):
        d = start_date
        delta = datetime.timedelta(days=1)
        while d <= end_date:
            es_date = d.strftime("%Y.%m.%d")
            print(es_date)
            try:
                self.load_data(es_date)
                self.parse()
                #print(es.dset)
            except Exception:
                print(traceback.format_exc())
            d += delta

    def load_data(self, dt="2018.01.08"):
        es_index = 'slowquery-' + dt
        page = self.es.search(index=es_index,
                              doc_type='elltdev',
                              body={'query': {
                                  'match_all': {}
                              }})
        self.data = page

    # print("test")

    def load_datas2(self,
                    start_date=date(2017, 12, 1),
                    end_date=date(2018, 1, 9)):
        d = start_date
        delta = datetime.timedelta(days=1)
        while d <= end_date:
            es_date = d.strftime("%Y.%m.%d")
            print(es_date)
            try:
                self.load_data2(es_date)
                self.parse()
                #print(es.dset)
            except:
                print("can't not find data")
            d += delta

    def load_data2(self, dt):
        url = 'https://search-el-dev-znz7hdtpcgghjcq4vatwtc3xiu.ap-northeast-2.es.amazonaws.com:443/slowquery-' + dt + '/elltdev/_search'
        resp = requests.get(url=url)
        self.data = json.loads(resp.text)

    #data = {'took': 1, '_shards': {'total': 5, 'successful': 5, 'failed': 0}, 'timed_out': False, 'hits': {'max_score': 1.0, 'total': 1550, 'hits': [{'_source': {'host': 'omuser[omuser] @  [10.125.224.9]  Id: 1005635', 'Rows_examined': 514, 'query': '''SELECT \t/*+ [goods-api].GoodsDetailDAO.getGdItemInfo */\t\t\t\titemT.GOODS_NO\t\t        , GROUP_CONCAT(DISTINCT itemT.ITEM_NO separator ',') AS ITEM_NO\t\t        , itemT.OPT_NM\t\t        , itemT.OPT_VAL\t\t\t\t, optT.OPT_SEQ\t\t\t\t \t\t  FROM (\t\t\t\tSELECT /*+ [goods-api].GoodsDetailDAO.getGdItemInfo */\t\t\t\t\t\tgd_item_opt.ITEM_NO\t\t\t            , GOODS_NO\t\t\t\t\t\t, OPT_NM\t\t\t\t\t\t, OPT_VAL\t\t\t\t  FROM gd_item , gd_item_opt\t\t\t\t WHERE gd_item_opt.ITEM_NO = gd_item.ITEM_NO\t\t\t\t ) itemT\t\t INNER JOIN gd_goods_opt optT\t        ON itemT.GOODS_NO = optT.GOODS_NO\t\t   AND itemT.OPT_NM = optT.OPT_NM\t\t \t\t   AND optT.GOODS_NO = '1000000644'\t\t   \t \t\t    \t\t   AND optT.OPT_SEQ = '1'\t\t GROUP BY itemT.GOODS_NO, itemT.OPT_NM, itemT.OPT_VAL, optT.OPT_SEQ;'''}}] }}

    # 데이터를 저장하기 위한 영역
    dset = []
    dtmp = {}

    def set_service(self, svc="goods"):
        self.svc = svc

    def get_dbio(self, sql):
        pat = re.compile("\[\w+\-api][\w|.]+")
        m = pat.findall(sql)
        if len(m) > 0:
            return (m[0]).strip()
        else:
            return None

    def get_tables(self, sql):
        pat = re.compile(
            "(?<=\W)(?:GD|AT|CC|CH|DP|ET|MB|OM|PR|ST)\_[\_\w\.]+(?=\W)", re.I)
        tables = pat.findall(sql)
        if len(sql) > 0:
            return [x.upper() for x in tables if x.find(".") == -1]
        else:
            return None

    def print_kv(self, k, v):
        if (k in [
                'host', 'Rows_examined', 'Query_time', '@timestamp', 'service',
                'Lock_time'
        ]):
            #print(k,":",v)
            self.dtmp[k] = v
        elif (k in ['query']):
            #print("dbio :", get_dbio(v))
            self.dtmp['dbio'] = self.get_dbio(v)
            #print("tables :",get_tables(v))
            self.dtmp['tables'] = self.get_tables(v)
        elif (k in ['_source']):
            #print("="*80)
            self.print_data(v)
            if self.dtmp['dbio'] != None and len(self.dtmp['tables']) > 0:
                #self.dset[self.dtmp['dbio']] = self.dtmp['tables']
                if (self.dtmp['service'] == self.svc):
                    self.dset.append(self.dtmp['tables'])
            self.dtmp = {}
        else:
            #print(k,":")
            self.print_data(v)

    def print_data(self, d):
        if (type(d) == dict):
            for k, v in d.items():
                self.print_kv(k, v)
        elif (type(d) == list):
            for item in d:
                self.print_data(item)
        elif (type(d) in [str, int, bool, float]):
            pass
        else:
            print("=" * 80)
            print(type(d))

    # print_data argement없이 호출하는 함수.
    def parse(self):
        self.print_data(self.data)

Пример #9

0

Показать файл

                'date': None,
                'item': None,
                'default_brand': None,
                'scraped_brand': None,
                'ingprod': None,
                'recipeimpressions': 0,
                'cartimpressions': 0,
                'favs': 0,
                'addToCarts': 0,
                'prints': 0,
                'shares': 0,
                'ingsToCarts': 0
            }

            for metric in metrics:
                resp = es.search(index='logstash-*',
                                 body=elastic_query(metric))
                aggs[metric] = resp['aggregations']['aggdata']['buckets']
                if metric == "ingsToCarts":
                    reportData['ingsToCarts'] = resp['hits']['total']
                else:
                    ids = [i['key'] for i in aggs[metric]]
                    mongo_query = {
                        "myxxid": {
                            "$in": ids
                        },
                        "ingredients_edited.mappedingredient": item
                    }
                    docs = list(coll.find(mongo_query))
                    df1 = pd.DataFrame(docs)
                    df2 = pd.DataFrame(aggs[metric])

Пример #10

0

Показать файл

Файл: es02.py Проект: mrok88/es

class es02 :
    def __init__(self):
        self.es = Elasticsearch(['https://search-el-dev-znz7hdtpcgghjcq4vatwtc3xiu.ap-northeast-2.es.amazonaws.com:443'])
        self.set_service()
        pass
    
    def load(self,fname="es01.pkl"):
        self.data = pickle.load( open( fname, "rb" ))

    def load_datas(self,start_date=date(2017, 12, 1),end_date=date(2018,1,9)):
        d = start_date
        delta = datetime.timedelta(days=1)
        while d <= end_date:
            es_date = d.strftime("%Y.%m.%d")
            print(es_date)
            try:
                self.load_data(es_date)
                self.parse()
                #print(es.dset)            
            except Exception:
                print(traceback.format_exc())
            d += delta

    def load_data(self,dt="2018.01.08"):
        es_index = 'slowquery-'+dt
        page = self.es.search(
            index = es_index,
            doc_type = 'elltdev',
            body = { 
                'query' : { 'match_all' : {}}
            }
        )
        self.data = page
    # print("test")


    def load_datas2(self,start_date=date(2017, 12, 1),end_date=date(2018,1,9)):
        d = start_date
        delta = datetime.timedelta(days=1)
        while d <= end_date:
            es_date = d.strftime("%Y.%m.%d")
            print(es_date)
            try:
                self.load_data2(es_date)
                self.parse()
                #print(es.dset)            
            except:
                print("can't not find data")
            d += delta    
    def load_data2(self,dt):
        url = 'https://search-el-dev-znz7hdtpcgghjcq4vatwtc3xiu.ap-northeast-2.es.amazonaws.com:443/slowquery-'+dt+'/elltdev/_search'
        resp = requests.get(url=url)
        self.data = json.loads(resp.text)

    #data = {'took': 1, '_shards': {'total': 5, 'successful': 5, 'failed': 0}, 'timed_out': False, 'hits': {'max_score': 1.0, 'total': 1550, 'hits': [{'_source': {'host': 'omuser[omuser] @  [10.125.224.9]  Id: 1005635', 'Rows_examined': 514, 'query': '''SELECT \t/*+ [goods-api].GoodsDetailDAO.getGdItemInfo */\t\t\t\titemT.GOODS_NO\t\t        , GROUP_CONCAT(DISTINCT itemT.ITEM_NO separator ',') AS ITEM_NO\t\t        , itemT.OPT_NM\t\t        , itemT.OPT_VAL\t\t\t\t, optT.OPT_SEQ\t\t\t\t \t\t  FROM (\t\t\t\tSELECT /*+ [goods-api].GoodsDetailDAO.getGdItemInfo */\t\t\t\t\t\tgd_item_opt.ITEM_NO\t\t\t            , GOODS_NO\t\t\t\t\t\t, OPT_NM\t\t\t\t\t\t, OPT_VAL\t\t\t\t  FROM gd_item , gd_item_opt\t\t\t\t WHERE gd_item_opt.ITEM_NO = gd_item.ITEM_NO\t\t\t\t ) itemT\t\t INNER JOIN gd_goods_opt optT\t        ON itemT.GOODS_NO = optT.GOODS_NO\t\t   AND itemT.OPT_NM = optT.OPT_NM\t\t \t\t   AND optT.GOODS_NO = '1000000644'\t\t   \t \t\t    \t\t   AND optT.OPT_SEQ = '1'\t\t GROUP BY itemT.GOODS_NO, itemT.OPT_NM, itemT.OPT_VAL, optT.OPT_SEQ;'''}}] }}

    # 데이터를 저장하기 위한 영역 
    dset = []
    dtmp = {}

    def set_service(self,svc="goods"):
        self.svc = svc

    def get_dbio(self,sql):
        pat = re.compile("\[\w+\-api][\w|.]+")
        m = pat.findall(sql)
        if  len(m) > 0 :
            return (m[0]).strip()
        else:
            return None

    def get_tables(self,sql):
        pat = re.compile("(?<=\W)(?:GD|AT|CC|CH|DP|ET|MB|OM|PR|ST)\_[\_\w\.]+(?=\W)",re.I)
        tables = pat.findall(sql)
        if len(sql) > 0 :
            return [x.upper()   for x in tables if x.find(".") == -1 ]
        else:
            return None

    def print_kv(self,k,v):
        if ( k in ['host','Rows_examined','Query_time','@timestamp','service','Lock_time']):
            #print(k,":",v)
            self.dtmp[k] = v
        elif ( k in ['query']):
            #print("dbio :", get_dbio(v))
            self.dtmp['dbio'] = self.get_dbio(v)
            #print("tables :",get_tables(v))
            self.dtmp['tables'] = self.get_tables(v)
        elif ( k in ['_source']):
            #print("="*80)
            self.print_data(v)
            if self.dtmp['dbio'] != None  and len(self.dtmp['tables']) > 0 :
                #self.dset[self.dtmp['dbio']] = self.dtmp['tables']  
                if(self.dtmp['service'] == self.svc) :              
                    self.dset.append(self.dtmp['tables'])
            self.dtmp = {}
        else:
            #print(k,":")
            self.print_data(v)


    def print_data(self,d):
        if ( type(d) == dict ):
            for k,v in d.items():            
                self.print_kv(k,v)
        elif( type(d) == list ):
            for item in d:
                self.print_data(item)
        elif( type(d) in [str,int,bool,float] ) :
            pass
        else:
            print("="*80)
            print(type(d))            
    # print_data argement없이 호출하는 함수.            
    def parse(self):
        self.print_data(self.data)

Пример #11

0

Показать файл

 #The rest of the lines have no special characters (we cut 4 slots for the tag)
 if counter == 0:
     tag = line[:5]
     tag = tag[1:4]
     line = line[5:]
     counter = 1
 else:
     tag = line[:4]
     tag = tag[:3]
     line = line[4:]
 #We need k+1 results for each test
 result20 = es.search(index='test',
                      doc_type='project',
                      body={
                          'query': {
                              'match': {
                                  'text': line
                              }
                          },
                          'size': 21
                      })
 result30 = es.search(index='test',
                      doc_type='project',
                      body={
                          'query': {
                              'match': {
                                  'text': line
                              }
                          },
                          'size': 31
                      })
 print(tag + " - 20:")

Пример #12

0

Показать файл

esPort = os.environ['esport']
esPass = os.environ['espass']
esUser = os.environ['esuser']
rootOrg = os.environ['rootOrg']
org = os.environ['org']

esObj = Elasticsearch([{
    "host": esUrl,
    "port": esPort
}],
                      http_auth=(esUser, esPass))

response = esObj.search("mlsearch_*",
                        "searchresources",
                        '''{
        "size":1000,
        "_source":["locale","keywords","catalogPaths","name","sourceName","sourceShortName"]
    }''',
                        scroll="5s")
result_pending = [response]
cnt = 1
indexDocs = []
stData = {}
while result_pending:
    print("OBJECT NUMBER " + str(cnt))
    cnt += 1
    curr_obj = result_pending.pop()
    scroll_id = curr_obj.get("_scroll_id")
    for hit in curr_obj["hits"]["hits"]:
        hitSource = hit["_source"]
        for key, val in hitSource.items():

Пример #13

0

Показать файл

Файл: make_dataset_from_ES.py Проект: joonable/playground

        return None
    term_dict = {}
    for term, val in term_vectors[0].items():
        for pos_info in val['tokens']:
            term_dict[pos_info['position']] = term
    sorted_terms = sorted(term_dict.items())
    sorted_terms = [tup[1] for tup in sorted_terms]
    return sorted_terms


if __name__ == '__main__':
    count_list = [x for x in range(0, count, 10000)]
    count_list.append(count)

    results = list()
    results.append(es.search(index='prd_review', size=10000, scroll='1m'))
    scroll_id = results[0]['_scroll_id']
    results = results[0]['hits']['hits']

    for _ in range(count // 10000):
        results.extend(es.scroll(scroll_id=scroll_id, scroll='1m')['hits']['hits'])

    results = [result['_source'] for result in results]

    data = []
    for result in results:
        data.append({})
        data[-1]['m_id'] = result['message_id']
        data[-1]['score'] = result['prd_satisfact']
        data[-1]['cus_grade'] = result['cus_grade']
        data[-1]['best_flag'] = result['best_flag']

Пример #14

0

Показать файл

Файл: es_test.py Проект: 209364619/myproject

from elasticsearch5 import Elasticsearch

useIndex = 'tw_user_database_*'
TWEETSINDEX = "tweets_database*"
# host = "192.168.209.113"
# port = "9200"
host = "192.168。8.200"
port = "9201"
es_client = Elasticsearch([{"host": host, "port": port}])
info = es_client.info()

userid = "25073877"
body = {"query": {"match": {"user.id": userid}}}

rs = es_client.search(index=TWEETSINDEX, body=body)
print rs
print type(rs)

Пример #15

0

Показать файл

Файл: es_storage.py Проект: cchaplin/log-anomaly-detector

class ESStorage(Storage):
    """Elasticsearch storage backend."""

    NAME = "es"
    _MESSAGE_FIELD_NAME = "_source.message"

    def __init__(self, configuration):
        """Initialize Elasticsearch storage backend."""
        self.config = configuration
        self._connect()

    def _connect(self):
        urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
        if len(self.config.ES_CERT_DIR) and os.path.isdir(
                self.config.ES_CERT_DIR):
            _LOGGER.warn(
                "Using cert and key in %s for connection to %s (verify_certs=%s)."
                % (
                    self.config.ES_CERT_DIR,
                    self.config.ES_ENDPOINT,
                    self.config.ES_VERIFY_CERTS,
                ))
            self.es = Elasticsearch(
                self.config.ES_ENDPOINT,
                use_ssl=self.config.ES_USE_SSL,
                verify_certs=self.config.ES_VERIFY_CERTS,
                client_cert=os.path.join(self.config.ES_CERT_DIR, "es.crt"),
                client_key=os.path.join(self.config.ES_CERT_DIR, "es.key"),
                timeout=60,
                max_retries=2,
            )
        else:
            _LOGGER.warn("Conecting to ElasticSearch without authentication.")
            print(self.config.ES_USE_SSL)
            self.es = Elasticsearch(
                self.config.ES_ENDPOINT,
                use_ssl=self.config.ES_USE_SSL,
                verify_certs=self.config.ES_VERIFY_CERTS,
                timeout=60,
                max_retries=2,
            )

    def _prep_index_name(self, prefix):
        # appends the correct date to the index prefix
        now = datetime.datetime.now()
        date = now.strftime("%Y.%m.%d")
        index = prefix + date
        return index

    def retrieve(self,
                 time_range: int,
                 number_of_entries: int,
                 false_data=None):
        """Retrieve data from ES."""
        index_in = self._prep_index_name(self.config.ES_INPUT_INDEX)

        query = {
            "sort": {
                "@timestamp": {
                    "order": "desc"
                }
            },
            "query": {
                "bool": {
                    "must": [
                        {
                            "query_string": {
                                "analyze_wildcard": True,
                                "query": ""
                            }
                        },
                        {
                            "range": {
                                "@timestamp": {
                                    "gte": "now-900s",
                                    "lte": "now"
                                }
                            }
                        },
                    ],
                    "must_not": [],
                }
            },
        }
        _LOGGER.info(
            "Reading in max %d log entries in last %d seconds from %s",
            number_of_entries,
            time_range,
            self.config.ES_ENDPOINT,
        )

        query["size"] = number_of_entries
        query["query"]["bool"]["must"][1]["range"]["@timestamp"][
            "gte"] = "now-%ds" % time_range
        query["query"]["bool"]["must"][0]["query_string"][
            "query"] = self.config.ES_QUERY

        es_data = self.es.search(index_in, body=json.dumps(query))
        if es_data["hits"]["total"] == 0:
            return pandas.DataFrame(), es_data
        # only use _source sub-dict
        es_data = [x["_source"] for x in es_data["hits"]["hits"]]
        es_data_normalized = pandas.DataFrame(
            json_normalize(es_data)["message"])

        _LOGGER.info("%d logs loaded in from last %d seconds",
                     len(es_data_normalized), time_range)

        self._preprocess(es_data_normalized)

        return es_data_normalized, es_data  # bad solution, this is how Entry objects could come in.

    def store_results(self, data):
        """Store results back to ES."""
        index_out = self._prep_index_name(self.config.ES_TARGET_INDEX)

        actions = [{
            "_index": index_out,
            "_type": "log",
            "_source": data[i]
        } for i in range(len(data))]

        helpers.bulk(self.es, actions, chunk_size=int(len(data) / 4) + 1)

Пример #16

0

Показать файл

class Searcher():
    """Searches papers from elasticsearch database

    Longer class information....
    Longer class information....

    """
    def __init__(self, index_name='paperdb', doc_type='papers', host='10.1.114.114', port=9200):
        """Initialize a search engine

        Args:
            host: A host name of elasticsearch
            port: A port number of elasticsearch
            index_name: name of the index you want to search for
            doc_type: name of the doc_type under certain index

        """
        self.es = Elasticsearch([{'host': host, 'port': port}])
        self.index = index_name
        self.doc_type = doc_type

    def generate_dsl(self, search_info):
        """Generate DSL given query and search settings

        Args:
            search_info: a dict including a query and other settings
            Attention that 'query_type' must be consistent with 'match' !
        Example:
            {
                'query_type': 'integrated_search',
                'query': 'attention network',
                'match': {
                    'title': True,
                    'abstract': True,
                    'paperContent': True,
                    'videoContent': True,
                },
                'filter': {
                    'yearfrom': 1000,
                    'yearbefore': 3000,
                },
                'sort': 'year',
                'is_filter': True,
                'is_rescore': True,
                'is_cited': False
            }
            or
            {
                'query_type': 'advanced_search',
                'match': {
                    'title': 'attention',
                    'abstract': 'attention',
                    'paperContent': 'attention',
                    'videoContent': None,
                },
                'filter': {
                    'yearfrom': 1000,
                    'yearbefore': 3000,
                },
                'sort': 'relevance',
                'is_filter': False,
                'is_rescore': True,
                'is_cited': False
            }
        Return:
            dsl: a dsl translated from search info
        """

        # check search_info
        if 'integrated' in search_info['query_type']:
            assert 'query' in search_info, "Integrated search must have query !"
            assert isinstance(search_info['match']['title'], bool), "Here needs bool type !"
        else:
            assert isinstance(search_info['match']['title'], (str, None)), "Here needs a string or None !"

        if search_info['is_cited'] is False:
            dsl = Vividict()
            dsl['query']['bool']['must'] = []
            dsl['query']['bool']['should'] = []
            dsl['rescore'] = []

            if 'integrated' in search_info['query_type']:
                match = self.get_integrated_match(search_info['query'], search_info['match'])
                dsl['query']['bool']['should'] = match
                if search_info['is_filter'] is True:
                    filter = self.get_filter_query(search_info['query'])
                    dsl['query']['bool']['must'].append(filter)
                if search_info['is_rescore'] is True:
                    rescore = self.get_rescore_query(match)
                    dsl['rescore'] = rescore

            else:  # 'advanced_search'
                match = self.get_advanced_match(search_info['match'])
                dsl['query']['bool']['must'] = match
                if search_info['is_rescore'] is True:
                    rescore = self.get_rescore_query(match)
                    dsl['rescore'] = rescore

            year_range = Vividict()
            year_range['range']['year']['gte'] = search_info['filter'].get('yearfrom', 1000)
            year_range['range']['year']['lte'] = search_info['filter'].get('yearbefore', 3000)
            dsl['query']['bool']['must'].append(year_range)

        else:  # cited-function_score
            dsl = Vividict()
            dsl['query']['function_score']['query']['bool']['must'] = []
            dsl['query']['function_score']['query']['bool']['should'] = []
            dsl['query']['function_score']['field_value_factor'] = []
            dsl['rescore'] = []

            if 'integrated' in search_info['query_type']:
                match = self.get_integrated_match(search_info['query'], search_info['match'])
                dsl['query']['function_score']['query']['bool']['should'] = match
                cited = self.get_function_factor()
                dsl['query']['function_score']['field_value_factor'] = cited
                if search_info['is_filter'] is True:
                    filter = self.get_filter_query(search_info['query'])
                    dsl['query']['function_score']['query']['bool']['must'].append(filter)
                if search_info['is_rescore'] is True:
                    rescore = self.get_rescore_query(match)
                    dsl['rescore'] = rescore

            else:  # 'advanced_search'
                match = self.get_advanced_match(search_info['match'])
                dsl['query']['bool']['must'] = match
                if search_info['is_rescore'] is True:
                    rescore = self.get_rescore_query(match)
                    dsl['rescore'] = rescore

            year_range = Vividict()
            year_range['range']['year']['gte'] = search_info['filter'].get('yearfrom', 1000)
            year_range['range']['year']['lte'] = search_info['filter'].get('yearbefore', 3000)
            dsl['query']['function_score']['query']['bool']['must'].append(year_range)

        if search_info['sort'] == 'year':
            dsl['sort']['year'] = 'desc'
        elif search_info['sort'] == 'cited':
            dsl['sort']['cited'] = 'asc'

        return dsl

    def get_integrated_match(self, query, match):
        """get match of intergrated search

        Args:
            query: query string from user
            match: A dict contained title, abstract...

        Return:
            res: A list of match
        """
        res = []

        if match['title'] or match['abstract']:
            tmp = Vividict()
            tmp['multi_match']['query'] = query

            fields = []
            if match['title']:
                fields.append('title^3')

            if match['abstract']:
                fields.append('abstract^2')

            tmp['multi_match']['fields'] = fields
            res.append(tmp)

        if match['paperContent']:
            nest = self.get_nested_query_paperContent(query)
            res.append(nest)

        if match['videoContent']:
            nest = self.get_nested_query_videoContent(query)
            res.append(nest)

        if match['authors']:
            nest = self.get_nested_query_authors(query)
            res.append(nest)

        return res

    def get_advanced_match(self, match):
        """get match of advanced search

        Args:
            match: A dict contained title, abstract, paper_content...

        Return:
            res: A list of match
        """
        res = []
        if match['title']:
            _match = {'match': {'title': match['title']}}
            res.append(_match)

        if match['abstract']:
            _match = {'match': {'abstract': match['abstract']}}
            res.append(_match)

        if match['paperContent']:
            nest = self.get_nested_query_paperContent(match['paperContent'])
            res.append(nest)

        if match['videoContent']:
            nest = self.get_nested_query_videoContent(match['videoContent'])
            res.append(nest)

        if match['authors']:
            nest = self.get_nested_query_authors(match['authors'])
            res.append(nest)

        return res

    def get_nested_query_authors(self, query):

        nest = Vividict()
        nest['nested']['path'] = 'authors'
        nest['nested']['score_mode'] = 'max'

        tmp = Vividict()
        fields = ['authors.firstName', 'authors.lastName']
        tmp['multi_match']['fields'] = fields
        tmp['multi_match']['query'] = query
        nest['nested']['query']['bool']['must'] = tmp

        return nest

    def get_nested_query_paperContent(self, query):

        nest = Vividict()
        nest['nested']['path'] = 'paperContent'
        nest['nested']['score_mode'] = 'max'

        tmp = Vividict()
        fields = ['paperContent.text', 'paperContent.subtitles^2', 'paperContent.subtexts']
        tmp['multi_match']['fields'] = fields
        tmp['multi_match']['query'] = query
        nest['nested']['query']['bool']['must'] = tmp

        return nest

    def get_nested_query_videoContent(self, query):

        nest = Vividict()
        nest['nested']['path'] = 'videoContent'
        nest['nested']['score_mode'] = 'max'

        tmp = Vividict()
        tmp['match']['videoContent.textEnglish'] = query
        nest['nested']['query']['bool']['must'] = tmp

        return nest

    def get_function_factor(self):
        cited = Vividict()
        cited['field'] = 'cited'
        cited['modifier'] = 'log1p'
        cited['factor'] = 0.5
        cited['missing'] = 0

        return cited

    def get_filter_query(self, query):
        filter = Vividict()
        tag_list = []
        word_list = query.split()
        for word in word_list:
            tag_list.append(word.capitalize())
            tag_list.append(word.lower())
        filter['terms']['abstract'] = tag_list

        return filter

    def get_rescore_query(self, match):
        rescore = Vividict()
        rescore['window_size'] = 100
        rescore['query']['rescore_query'] = match[0]
        rescore['query']['query_weight'] = 1.5
        rescore['query']['rescore_query_weight'] = 0.5

        return rescore

    def search_paper_by_name(self, search_info, only_top_k=True, size=100):
        """Search paper by name
        Args:
            query: query string from user

        Return:
            paper_list: A list of paper information
            paper_id  : A list of paper id
            paper_num : The number of returned paper
        """
        dsl = self.generate_dsl(search_info)
        result = self.es.search(index=self.index, doc_type=self.doc_type, body=dsl, size=size)
        return self.get_paper_info(result)

    def get_video_pos_by_paper_id(self, search_info, paper_id, threshold=0.8):
        """
        Args:
            search_info: the same as that in self.generate_dsl()
            paper_id: A string, given by es

        Return:
            a sorted video captions' list according to similarity between
            captions and query
        """

        assert isinstance(paper_id, str), "paper_id must be a string, here need only one id !"

        paper = self.es.get_source(index=self.index, doc_type=self.doc_type, id=paper_id)

        return self.get_video_pos_by_paper(search_info=search_info,
                                           paper=paper,
                                           threshold=threshold)

    def get_video_pos_by_paper(self, search_info, paper, threshold=0.8):
        """
        Args:
            paper: A dict contained title, abstract ...

        Return:
            a sorted video captions' list according to similarity between
            captions and query
        """

        assert isinstance(paper, dict), "paper must be a dict, here need only one paper !"

        if 'integrated' in search_info['query_type']:
            query = search_info['query']
        else:
            query = search_info['match']['videoContent']

        assert (query is not None)

        if 'videoContent' not in paper:
            return [None]

        pos = self.get_video_pos(query=query, videoContent=paper['videoContent'], threshold=threshold)
        return pos

    @staticmethod
    def get_paper_info(res):
        """Return raw paper info given es search result
        Args:
            res: A dict of result from es.search

        Return:
            paper_list: A list of dicts, each dict stores information of a paper
            num: length of paper_list
        """
        paper_list = []
        paper_id = []
        hits = res['hits']['hits']
        num = res['hits']['total']
        # import pdb; pdb.set_trace();
        for hit in hits:
            paper_list.append(hit['_source'])
            paper_id.append(hit['_id'])
        return paper_list, paper_id, num

    @staticmethod
    def remove_text_embedding(papers):
        """Remove textEmbedding in videoContent
        Args:
            papers: A list of paper
        """
        for paper in papers:
            if 'videoContent' in paper:
                for v in paper['videoContent']:
                    if 'textEmbedding' in v:
                        v.pop('textEmbedding')

    @staticmethod
    def get_video_pos(query, videoContent, threshold=0.8):
        """Return a list of video captions related to user's query

        Args:
            query: english query text
            videoContent: a list of video caption information
            threshold: captions whose similarity score is > threshold are returned

        Return:
            res_list: a sorted video captions' list according to similarity between
                    captions and query
        """

        emd_list = [v.pop('textEmbedding') for v in videoContent]
        sim_list = test_similarity(query, emd_list)
        if sim_list == '__ERROR__':
            return sim_list

        res_list = []
        for s, v in zip(sim_list, videoContent):
            v['score'] = s
            if v['score'] > threshold:
                res_list.append(v)
            elif query in v['textEnglish']:
                res_list.append(v)

        # print('query:' + query)
        # pprint(res_list)
        return res_list

Пример #17

0

Показать файл

Файл: elastic-query.py Проект: devopsenggineer/devSetup

#res = es.get(index="test-index", id=1)
#print(res['_source'])

#es.indices.refresh(index="test-index")
indexes = es.indices.get('*')
#print(indexes)
for j in range(0, 10):
    print("value of j is: ", j)
    for i in indexes:
        print(i)
        print(" ")

        res = es.search(index=i,
                        body={
                            "query": {
                                "match_all": {}
                            },
                            "size": 1000
                        })
        #res = es.search(index="fx-testsuite-responses", body={"query": {"match_all": {}}, "size": 1000})
        #print((res))
        #print("Got %d Hits:" % res['hits']['total']['value'])

        for hit in res['hits']['hits']:
            #print("Hello")
            #print("%(timestamp)s %(author)s: %(text)s" % hit["_source"])
            #es.index(index="fx-testsuite-responses",body={hit})
            a = hit["_source"]
            e = es.index(index=i, doc_type="test", body=a)
            #e = es.index(index="fx-testsuite-responses",doc_type="test" ,body=a)

Пример #18

0

Показать файл

Файл: client.py Проект: eliass97/IRS_Projects_AUEB_2

     fd2 = open("es_results_20_b.txt", 'w', encoding='utf-8')
 line = fd.readline()
 # For each line-query it sends a search request to elasticsearch
 while line:
     # Removes the tag (Q#) from the line-query
     # We cut 4 slots for the tag
     tag = line[:4]
     tag = tag[:3]
     line = line[4:]
     # We need 21 results
     if part == "a":
         result = es.search(index='test',
                            doc_type='project',
                            body={
                                'query': {
                                    'match': {
                                        'text': line
                                    }
                                },
                                'size': 21
                            })
     elif part == "b":
         result = es.search(index='test2',
                            doc_type='project',
                            body={
                                'query': {
                                    'match': {
                                        'text': line
                                    }
                                },
                                'size': 21
                            })

Пример #19

0

Показать файл

Файл: joon_text.py Проект: dunna-error/shopping-classification

    sorted_terms = sorted(term_dict.items())
    sorted_terms = [tup[1] for tup in sorted_terms]
    return sorted_terms


df = pd.read_pickle(dataset_dir + 'df_product_dataset.pkl')
df.pid = df.pid.str.strip()
df['term_vectors'] = None

# df = dd.from_pandas(df, npartitions=50)
count_list = list(range(0, len(df), 10000)) + [len(df)]
sorted_term_vectors = list()
sorted_term_vectors.append(
    es.search(
        index='nori_with_adjv',
        size=10000,
        scroll='1m',
        filter_path=['hits.hits._source.sorted_term',
                     'hits.hits._source.pid']))
scroll_id = sorted_term_vectors[0]['_scroll_id']

# def gen_bulk_2(pid, sorted_term):
#     _head = {"update": {"_id": pid, "_type": "_doc", "_index": conf.es_adjv_index, "retry_on_conflict": 3}}
# =======
#
# print(''' get_parsed_token and upload sorted term vectors ''')
# def get_mtermvectors(ids):
#     body = dict()
#     body['ids'] = ids
#     body['parameters'] = {"fields": ["product"]}
#     # TODO ES_INDEX : conf.es_nouns_index or conf.es_adjv_index
#     res = es.mtermvectors(index=conf.es_nouns_index, doc_type='_doc', body=body)['docs']

Пример #20

0

Показать файл

class EsClientConnection:
    host = ''
    errorMessage = ''

    def __init__(self, host, index=None, type=None, body=None):
        '''
        创建的时候需要两个都要存在
        :param host:
        :param index:
        :param type:
        :param body:
        '''
        self.host = host
        self.conn = Elasticsearch([self.host])
        # 初始化mapping设置,即创建index
        indexExists = self.conn.indices.exists(index=index)
        typeExists = self.conn.indices.exists_type(index=index, doc_type=type)
        if body is not None:
            if indexExists is not True:
                if typeExists is not True:
                    self.conn.indices.create(index=index, body=body)
                else:
                    self.errorMessage = 'index not exists and type exists. it is not possible!'
            else:
                if typeExists is not True:
                    self.errorMessage = 'index index exists and type not exists'
                else:
                    self.errorMessage = 'index exists and type exists. you not need create it'

    def __del__(self):
        self.close()

    def check(self):
        '''
        输出当前系统的ES信息
        :return:
        '''
        return self.conn.info()

    def insertDocument(self, index, type, body, id=None):
        '''
        插入一条数据body到指定的index、指定的type下;可指定Id,若不指定,ES会自动生成
        :param index: 待插入的index值
        :param type: 待插入的type值
        :param body: 待插入的数据 -> dict型
        :param id: 自定义Id值
        :return:
        '''
        return self.conn.index(index=index, doc_type=type, body=body, id=id)

    def insertDataFrame(self, index, type, dataFrame):
        '''
        批量插入接口;
        bulk接口所要求的数据列表结构为:[{{optionType}: {Condition}}, {data}]
        其中optionType可为index、delete、update
        Condition可设置每条数据所对应的index值和type值
        data为具体要插入/更新的单条数据
        :param index: 默认插入的index值
        :param type: 默认插入的type值
        :param dataFrame: 待插入数据集
        :return:
        '''
        dataList = dataFrame.to_dict(orient='records')
        insertHeadInfoList = [{"index": {}} for i in range(len(dataList))]
        temp = [dict] * (len(dataList) * 2)
        temp[::2] = insertHeadInfoList
        temp[1::2] = dataList
        try:
            return self.conn.bulk(index=index, doc_type=type, body=temp)
        except Exception as e:
            return str(e)

    def deleteDocById(self, index, type, id):
        '''
        删除指定index、type、id对应的数据
        :param index:
        :param type:
        :param id:
        :return:
        '''
        return self.conn.delete(index=index, doc_type=type, id=id)

    def deleteDocByQuery(self, index, query, type=None):
        '''
        删除idnex下符合条件query的所有数据
        :param index:
        :param query: 满足DSL语法格式
        :param type:
        :return:
        '''
        return self.conn.delete_by_query(index=index,
                                         body=query,
                                         doc_type=type)

    def deleteAllDocByIndex(self, index, type=None):
        '''
        删除指定index下的所有数据
        :param index:
        :return:
        '''
        try:
            query = {'query': {'match_all': {}}}
            return self.conn.delete_by_query(index=index,
                                             body=query,
                                             doc_type=type)
        except Exception as e:
            return str(e) + ' -> ' + index

    def searchDoc(self, index=None, type=None, body=None):
        '''
        查找index下所有符合条件的数据
        :param index:
        :param type:
        :param body: 筛选语句,符合DSL语法格式
        :return:
        '''
        return self.conn.search(index=index, doc_type=type, body=body)

    def getDocById(self, index, type, id):
        '''
        获取指定index、type、id对应的数据
        :param index:
        :param type:
        :param id:
        :return:
        '''
        return self.conn.get(index=index, doc_type=type, id=id)

    def updateDocById(self, index, type, id, body=None):
        '''
        更新指定index、type、id所对应的数据
        :param index:
        :param type:
        :param id:
        :param body: 待更新的值
        :return:
        '''
        return self.conn.update(index=index, doc_type=type, id=id, body=body)

    def close(self):
        if self.conn is not None:
            try:
                self.conn.close()
            except Exception as e:
                pass
            finally:
                self.conn = None

    def mysqlToEs(self, mysqlData):
        doc = []
        for value in mysqlData:
            doc.append({"index": {}})
            doc.append(value)
        self.conn.bulk(index='product', doc_type='tour_product', body=doc)

Пример #21

0

Показать файл

def db_es():
    es = Elasticsearch(
        ['113.107.166.14'],
        #        http_auth=('elastic', 'passwd'),
        port=19200)
    #platID=1&roomID=2384875205&startTime=1561482671&endTime=1561523909
    #query ={'query': {'match_all': {}}}

    #query = {
    #        "size": 10000,
    #    "query":{
    #        "match":{
    #            "platform_id":"2"
    ##            "timestamp":"1561735836"
    #        }
    #    }
    #}

    #query = {
    #        "size": 10000,
    #    "query":{
    #         "bool": {
    #        "must":[
    #        {"match":{
    #            "platform_id":"20"
    ##            "timestamp":"1561735836"
    #        }},
    #        {"match":{
    #            "room_id":"281147838"
    ##            "timestamp":"1561735836"
    #        }},
    #        {"match":{
    #            "gift_type":"0"
    ##            "timestamp":"1561735836"
    #        }},
    #        {"range":{
    #           "timestamp":{"lte":"1562638920",} #gte,lte
    #        }}
    ##        {"range":{
    ##           "timestamp":{"lte":"1562638920",} #gte,lte
    ##        }}
    ##        {"sort":{
    ##           "from_id":{"order by":"desc",} #gte,lte
    ##        }}
    #        ]
    #        }
    #    }
    #}
    #
    query = {
        "size": 10,
        "query": {
            "bool": {
                "must": [
                    {
                        "match": {
                            "platform_id": "1"
                        }
                    },
                    #            {"match":{
                    #                "from_id":"cc_1333_38802060_1477136798"
                    #            }}
                    {
                        "match": {
                            "gift_name": ""
                        }
                    },
                    {
                        "match": {
                            "gift_type": "1"
                        }
                    }
                    #            {"range":{
                    #               "count":{"gte":2} #gte,lte
                    #            }}
                    #            {"range":{
                    #               "timestamp":{"gte":"1563798600"} #gte,lte ,"lte":"1563728700"    "gte":'1562567226'
                    #            }}
                ]
            }
        }
    }
    #query = {
    #    "query":{
    #        "terms":{
    #            "room_id":[
    #                "432863","432863"
    #            ]
    #        }
    #    }
    #}
    #res = es.get(index="liveshow-2018-07-20",doc_type='gift',id='1')  # 获取所有数据
    #res = es.search(index='liveshow-online-page-2019-06-29', doc_type='page',body=query)
    res = es.search(index='xiaohulu-liveshow-2019-08-26',
                    doc_type='gift',
                    body=query)
    return res

Пример #22

0

Показать файл

Файл: es01.py Проект: mrok88/es

from elasticsearch5 import Elasticsearch
import chardet
import pickle
es = Elasticsearch([
    'https://search-el-dev-znz7hdtpcgghjcq4vatwtc3xiu.ap-northeast-2.es.amazonaws.com:443'
])
print(es.info())

page = es.search(index='slowquery-2018.01.09',
                 doc_type='elltdev',
                 body={'query': {
                     'match_all': {}
                 }})
pickle.dump(page, open('es01.pkl', 'wb'))
#f = open("es01.json",'w')
#data = str(page)
# print(chardet.detect(page))
# f.write(data)
# f.close()

Пример #23

0

Показать файл

class ElasticHelper(object):
    def __init__(self):
        self.es = Elasticsearch(ElasticConfig.uri)
        self._multi_search_results = []
        self.bulk_task_queue = []
        self.bulk_last_time = datetime_now_obj()

    def delay_index(self, body, index, doc_type):
        self.bulk_task_queue.append(
            {"index": {
                "_index": index,
                "_type": doc_type
            }})
        self.bulk_task_queue.append(body)

        if self._can_do_bulk():
            self.bulk(body=self.bulk_task_queue,
                      index=index,
                      doc_type=doc_type)
            self.bulk_task_queue = []

        self.bulk_last_time = datetime_now_obj()

    def _can_do_bulk(self):
        # 任务队列超过100条数据
        if len(self.bulk_task_queue) > 100:
            return True
        # 时间间隔超过1分钟
        if get_n_min_ago(1) > self.bulk_last_time:
            return True
        return False

    def index(self, body, index, doc_type):
        self.es.index(body=body, index=index, doc_type=doc_type)

    def bulk(self, body, index, doc_type):
        self.es.bulk(body=body, index=index, doc_type=doc_type)

    def scan(self, body, index, doc_type):
        return helpers.scan(self.es,
                            query=body,
                            index=index,
                            doc_type=doc_type,
                            preserve_order=True)

    def search(self, body, index, doc_type):
        try:
            rsp = self.es.search(body=body,
                                 index=index,
                                 doc_type=doc_type,
                                 request_timeout=100)
            if rsp.get("error"):
                logger.error(rsp.get("error").get("reason"))
                return
            return rsp
        except Exception as e:
            print(body)
            logger.error("es search error: " + str(e) + index)

    def count(self, body, index, doc_type):
        return self.es.count(index=index,
                             doc_type=doc_type,
                             body=body,
                             request_timeout=100)

    def delete_index(self, index):
        return self.es.indices.delete(index=index)

    def put_template(self, name, body, **kwargs):
        return self.es.indices.put_template(name=name, body=body, **kwargs)

    def exists_template(self, name, **kwargs) -> bool:
        return self.es.indices.exists_template(name=name, **kwargs)

    def delete_template(self, name, **kwargs):
        return self.es.indices.delete_template(name=name, **kwargs)

    def get_template(self, name, **kwargs):
        return self.es.indices.get_template(name=name, **kwargs)

    def wait_log_in_database(self, computer_name, record_number):
        """
            因为消息队列和入库ES是分开进行的，所以可能会出现当消费到某条日志时，ES还没入库，所以需要检查同步
        """
        count = 0
        query = {
            "query":
            get_must_statement(
                get_term_statement("computer_name", computer_name),
                get_term_statement("record_number", record_number)),
            "_source":
            False,
            "size":
            1
        }
        while True:
            try:
                rsp = self.es.search(body=query,
                                     index=ElasticConfig.event_log_index,
                                     doc_type=ElasticConfig.event_log_doc_type,
                                     request_timeout=100)
                if rsp.get("error"):
                    logger.error(rsp.get("error").get("reason"))
                    break
                if len(rsp["hits"]["hits"]) > 0:
                    return rsp["hits"]["hits"][0]["_id"]
                time.sleep(2)
                # 最多等5次，即 2 * 5 = 10秒
                if count == 10:
                    break
                count += 1
            except Exception as e:
                logger.error("es wait_log_in_database search error: " + str(e))
                break

    def multi_search(self, body, index, doc_type):
        try:
            rsp = self.es.msearch(body=body,
                                  index=index,
                                  doc_type=doc_type,
                                  request_timeout=100)
            if rsp.get("error"):
                logger.error(rsp.get("error").get("reason"))
                return
            return rsp
        except Exception as e:
            logger.error("es msearch error: " + str(e))

Python Elasticsearch.search примеры использования