Python scan示例，elasticsearch1.helpers.scan Python示例

示例#1

0

显示文件

文件： fwd.py 项目： Beaconsys/Beacon

def search(time_s, time_e, fwd_host_list, index):
    time_start = time_s[:10] +'T' + time_s[11:] + '.000Z'
    time_end =time_e[:10] + 'T' + time_e[11:] + '.000Z'
    index_all = "logstash-" + index
    ES_SERVERS = [{'host' : ES_HOST, 'port': 9200}]
    es_client = ES.Elasticsearch(hosts = ES_SERVERS)

    match_query = []
    for i in xrange(len(fwd_host_list)):
        match_query.append({"match":{"host": fwd_host_list[i]}})
    es_search_options = {"query":{"bool": {"must":[{"range":{"@timestamp":{"gt":time_start,"le":time_end}}}]}}}

    es_result = helpers.scan(
            client = es_client,
            query = es_search_options,
            scroll = '3m',
            index = index_all,
            doc_type = 'redis-input',
            timeout = '1m'
            )

    final_result_host = []
    final_result_message = []
    final_result_time = []
    
    rlen = 0
    for item in es_result:
        rlen += 1
        final_result_message.append(str(item['_source']['message']))                                                            
        final_result_host.append(str(item['_source']['host']))
        final_result_time.append(str(item['_source']['@timestamp']))

    return final_result_message,final_result_time, final_result_host

示例#2

0

显示文件

文件： es_search_fwd.py 项目： TianyuZhang1214/job_script_taihuIO

def get_search_result(es_search_options,
                      index,
                      scroll='3m',
                      raise_on_error=True,
                      preserve_order=False,
                      doc_type='redis-input',
                      timeout="1m"):
    es_result = helpers.scan(client=es_client,
                             query=es_search_options,
                             scroll=scroll,
                             index=index,
                             doc_type=doc_type,
                             timeout=timeout)
    return es_result

示例#3

0

显示文件

文件： cache_cn.py 项目： Beaconsys/Beacon

def search(es_stime, es_etime, ip_list, index):
    index_all = 'logstash-' + index
    es_servers = [{'host':ES_HOST, 'port':9200}]
    
    match_query = []
    for i in range(len(ip_list)):
        match_query.append({'match':{'host':ip_list[i]}})

    es_search_options = {'query':{'bool':{
                                'must':[{'bool':{'should':[match_query]}},{'range':{'@timestamp':{'gt':es_stime, 'lt':es_etime}}}],
                                'should':[{'match':{'message':'OPEN'}},{'match':{'message':'RELEASE'}},
                                          {'match':{'message':'READ'}},{'match':{'message':'WRITE'}}]
                        }}}
    es_client = ES.Elasticsearch(hosts = es_servers)
    es_result = helpers.scan(client = es_client, query = es_search_options, scroll = '5m', index = index_all, doc_type = 'redis-input', timeout = '1m')

    res_message = []
    for item in es_result:
        res_message.append((item['_source']['@timestamp'][0:10] + ' ' + item['_source']['@timestamp'][11:19] + ' ' +  item['_source']['message'][24:] + ' ' + item['_source']['host']).split(' '))
    return res_message

示例#4

0

显示文件

文件： redis_test.py 项目： Beaconsys/Beacon

def search(time_s, time_e, index, host_t):
    time_start = time_s[:10] + 'T' + time_s[11:] + '.000Z'
    time_end = time_e[:10] + 'T' + time_e[11:] + '.000Z'
    host_all = '20.0.8.' + str(host_t)
    index_all = "logstash-" + index[0]
    ES_SERVERS = [{'host': host_all, 'port': 9200}]
    es_client = ES.Elasticsearch(hosts=ES_SERVERS)
    es_search_options = {
        "query": {
            "bool": {
                "must": [{
                    "range": {
                        "@timestamp": {
                            "gt": time_start,
                            "lt": time_end
                        }
                    }
                }]
            }
        }
    }
    es_result = helpers.scan(client=es_client,
                             query=es_search_options,
                             scroll='3m',
                             index=index_all,
                             doc_type='redis-input',
                             timeout='1m')

    final_result_message = []
    final_result_time = []

    rlen = 0
    for item in es_result:
        rlen += 1
        final_result_message.append(str(item['_source']['message']))
        final_result_time.append(str(item['_source']['@timestamp']))

    print "RESULT LEN : " + str(rlen)
    return final_result_message, final_result_time

示例#5

0

显示文件

    def __migrate_logstash(self):
        self.log.info("Connecting to the elastic cluster by address  %s",
                      self.config.get('DEFAULT', 'elastic_logstash_host_port'))
        es = ES1x([self.config.get('DEFAULT', 'elastic_logstash_host_port')],
                  timeout=int(self.config.get('DEFAULT', 'timeout')),
                  use_ssl=False,
                  retry_on_timeout=True)

        self.log.info(
            "Connecting to the elastic cluster by address  %s",
            self.config.get('DEFAULT', 'elastic_logstash_host_port_new'))
        es7 = ES7x(
            [self.config.get('DEFAULT', 'elastic_logstash_host_port_new')],
            use_ssl=False)
        _, data = es.transport.perform_request('GET', '/_all/_mapping')
        indices_type = []

        total_count = int(
            es.transport.perform_request('GET',
                                         '/_cat/count?h=count')[1].strip())

        pb_instance = ProgressBar(total=100,
                                  decimals=3,
                                  length=50,
                                  fill='X',
                                  zfill='-')

        for index, value in data.items():
            for type in value.get("mappings"):
                indices_type.append((index, type))

        match_all = {
            "size": self.config.getint('DEFAULT', "query_batch_size"),
            "query": {
                "match_all": {}
            }
        }

        es7.indices.put_index_template("merge_tmp_1", body=INDEX_TEMPLATE)
        speed = {"min": 1000000, "max": 0, "current": 0, "prev_count": 0}

        def progress_run():
            progress = 0
            try:
                current_count = int(
                    es7.transport.perform_request(
                        'GET', '/_cat/count?h=count').strip())
                progress = int(100 * current_count / total_count)
                pb_instance.print_progress_bar(progress)
                speed["current"] = (current_count -
                                    speed["prev_count"]) / PROGRESS_INTERVAL
                if speed["current"] >= speed["max"]:
                    speed["max"] = speed["current"]
                if speed["current"] <= speed["min"]:
                    speed["min"] = speed["current"]
                speed["prev_count"] = current_count
                self.log.info(
                    ">>> Documents processed: %d/%d; current speed %d doc's/sec <<<",
                    current_count, total_count, speed["current"])
            finally:
                if progress != 100:
                    threading.Timer(PROGRESS_INTERVAL, progress_run).start()

        threading.Timer(PROGRESS_INTERVAL, progress_run).start()

        for index, type in indices_type:
            self.log.info("Starting fetching index: %s; type: %s", index, type)

            data = scan(es,
                        query=match_all,
                        scroll="10m",
                        size=self.config.getint('DEFAULT', "query_batch_size"),
                        index=index,
                        doc_type=type)

            def _transfer_data(data):
                for item in data:
                    item["_source"].update({"type": type})
                    yield {
                        '_op_type': 'create',
                        '_index': index,
                        '_id': item["_id"],
                        '_source': item["_source"]
                    }

            pb = parallel_bulk(
                es7,
                _transfer_data(data),
                thread_count=4,
                queue_size=4,
                chunk_size=int(self.config.get('DEFAULT', 'chunk_size')),
                max_chunk_bytes=int(
                    self.config.get('DEFAULT', 'max_chunk_bytes')) * 1024 *
                1024,
                timeout="%ss" % self.config.get('DEFAULT', 'timeout'))
            deque(pb, maxlen=0)

            self.log.info("Finished transfer index: %s; type: %s", index, type)
            self.log.info(
                "Performance for current index avg: %d; min: %d; max: %d - doc's/sec",
                (speed["max"] + speed["min"]) / 2, speed["min"], speed["max"])

        del INDEX_TEMPLATE["template"]["settings"]
        self.log.info("Remove bulk performance settings from index template")
        es7.indices.put_index_template("merge_tmp_1", body=INDEX_TEMPLATE)
        self.log.info("Restore refresh_interval to 1s for all indices")
        es7.transport.perform_request(
            'PUT', '/_settings', body={"index": {
                "refresh_interval": "1s"
            }})

        self.log.info("Migration finished successfully")
        self.log.info(
            "Performance for all indices avg: %d; min: %d; max: %d - doc's/sec",
            (speed["max"] + speed["min"]) / 2, speed["min"], speed["max"])