def __make_es(self): if isinstance(Config.host, str): return ES([Config.host]) elif isinstance(Config.host, (list, tuple)): return ES(*Config.host) else: raise TypeError( "Query.HOST must be a str like '127.0.0.1:9200' or a list/tuple contains host str," "but Your HOST is type %s" % type(Config.host))
def setup(self): try: self.logger.debug( "Connecting to elasticsearch at %s:%d" % (self.options.get("host"), self.options.get("port"))) self.engine = ES( [{ "host": self.options.get("host"), "port": self.options.get("port") }], # sniff_on_start=True, # sniff_on_connection_fail=True, # sniffer_timeout=60, timeout=self.options.get("connection_timeout"), ) if not self.engine.ping(): self.logger.error( "Error connecting to elasticsearch at %s:%d" % (self.options.get("host"), self.options.get("port"))) self.logger.debug("Connected to elasticsearch") # Disable elasticsearch logging @FIXME maybe a mistake disable_loggers = [ "elasticsearch", "elasticsearch.trace", "urllib3" ] for dl in disable_loggers: dl_obj = logging.getLogger(dl) dl_obj.propagate = False dl_obj.setLevel(logging.CRITICAL) except Exception as e: self.logger.error("Error setting up Elasticsearch datastore: %s" % str(e))
def test_size(self): global server, pwd print("==> test_size") try: host_params1 = { 'host': server, 'port': 9200, 'use_ssl': True } # RPINUM es = ES([host_params1], connection_class=RC, http_auth=("user", pwd), use_ssl=True, verify_certs=False) print(es.info()) res2 = es_helper.elastic_to_dataframe( es, index="docker_stats*", size=10, timestampfield="read", start=datetime.datetime.now() - datetime.timedelta(hours=1), end=datetime.datetime.now()) print(len(res2)) self.assertTrue(res2 is not None) self.assertTrue((len(res2) >= 0) and (len(res2) <= 10)) finally: pass
def test_date_cols(self): global server, pwd, user print("==> test_date_cols") try: host_params1 = {'host': server, 'port': 9200, 'use_ssl': True} # RPINUM es = ES([host_params1], connection_class=RC, http_auth=( user, pwd), use_ssl=True, verify_certs=False) print(es.info()) try: es.indices.delete('test_date_cols') except elasticsearch.NotFoundError: pass doc = { 'attr1': 'test' } es.index(index="test_date_cols", id="t1", doc_type='_doc', body=doc) time.sleep(1) res = es_helper.elastic_to_dataframe(es, index="test_date_cols", datecolumns=["date1"]) print(len(res)) print(res.columns) self.assertTrue("date1" in res.columns) doc = { 'attr1': 'test', 'date1': datetime.now() } es.index(index="test_date_cols", id="t2", doc_type='_doc', body=doc) time.sleep(1) res = es_helper.elastic_to_dataframe(es, index="test_date_cols", datecolumns=["date1"]) self.assertTrue("date1" in res.columns) doc = { 'attr1': 'test', 'date2': datetime.now() } es.index(index="test_date_cols", id="t3", doc_type='_doc', body=doc) time.sleep(1) res = es_helper.elastic_to_dataframe(es, index="test_date_cols", datecolumns=["date1", "date2"]) self.assertTrue("date1" in res.columns) self.assertTrue("date2" in res.columns) es.indices.delete('test_date_cols') finally: pass
def test_elastic_to_panda(self): """ Send Receive """ global server, pwd, user print("==> test_elastic_to_panda") try: host_params1 = {'host': server, 'port': 9200, 'use_ssl': True} # RPINUM es = ES([host_params1], connection_class=RC, http_auth=( user, pwd), use_ssl=True, verify_certs=False) print(es.info()) res2 = es_helper.elastic_to_dataframe(es, index="docker_stats*", scrollsize=1000, datecolumns=[ "read"], timestampfield="read", start=datetime.now()-timedelta(hours=1), end=datetime.now()) res2["read"].describe() print(len(res2)) self.assertTrue(res2 is not None) self.assertTrue(len(res2) > 0) finally: pass
def conn(self): """Open connection on Elasticsearch DataBase""" conn = ES([ {"host": self.conf.get('host'), "port": self.conf.get('port'), "url_prefix": self.conf.get('db')} ]) return conn
def __init__(self, es_host, es_port, es_index, doc_type): try: self.es = ES([{"host": es_host, "port": es_port}]) except Exception as e: return False self._check_index_exist(es_index) self.index = es_index self.doc_type = doc_type
def es_connect(): try: es = ES([{'host': '192.168.56.101', 'port': 9200}]) #es = ES([{'host': '127.0.0.1', 'port': 9200}]) # print('Connected', es.info()) return es except Exception as ex: print('Error: ', ex) sys.exit()
def main(): argparser = argparse.ArgumentParser() group = argparser.add_mutually_exclusive_group(required=True) group.add_argument('-d', '--development', action='store_true') args = argparser.parse_args() if args.development: es = ES([{'host': '127.0.0.1', 'port': '9200'}]) es.indices.create(index='ithome')
def __init__(self): """Open connection on Elasticsearch DataBase""" super(Elasticsearch, self).__init__() self.search = True self.conn = ES([{ "host": self.conf.get('host'), "port": int(self.conf.get('port')) }], connection_class=RequestsHttpConnection) self.base_url = self.conn.transport.get_connection().base_url
def autocomplete(request, string): if request.method == 'GET': es = ES() query = {"query": {"function_score": {"query": {"match_phrase_prefix": {"title": string}}, "script_score": {"script": " doc['rating'].value / 2 * _score * Math.log(1 + 3* doc['no_ratings'].value ) " }}}, "_source": ["title", "_score"], "size": 8} response = [x['_source']['title'] for x in es.search(index='csfd', doc_type='movie', body=query)["hits"]['hits']] return JsonResponse(response, safe=False) else: return HttpResponseNotFound('<h1>Page not found</h1>')
def __init__(self, connection, es_index='covid_tweets'): aws_auth = AWS4Auth(connection['ACCESS_KEY'], connection['SECRET_KEY'], 'us-east-2', 'es') self.es = ES(hosts=[{ 'host': connection['AWS_HOST'], 'port': 443 }], http_auth=aws_auth, use_ssl=True, verify_certs=True, connection_class=RequestsHttpConnection, timeout=60) self.es_index = es_index
def searchInEs(des, tar_index, tar_docType, keyName, num_res): esDriver = ES([{"host": "127.0.0.1", "port": 9200}]) #构造请求结构 query_body = {"query": {"match": {keyName: des}}} #在es中搜索 result = esDriver.search(index=tar_index, doc_type=tar_docType, body=query_body, size=num_res) return result["hits"]["hits"]
def test_empty_attr(self): global server, pwd, user print("==> test_empty_attr") try: host_params1 = {'host': server, 'port': 9200, 'use_ssl': True} # RPINUM es = ES([host_params1], connection_class=RC, http_auth=( user, pwd), use_ssl=True, verify_certs=False) print(es.info()) try: es.indices.delete('test_empty_attr') except elasticsearch.NotFoundError: pass arr = [ { '_id': 't1', 'attr1': 'test' }, { '_id': 't2', 'attr2': 'test' }, { '_id': 't3', 'attr3': 'test' }, ] df = pd.DataFrame(arr) INDEX_NAME = 'test_empty_attr' df['_index'] = INDEX_NAME es_helper.dataframe_to_elastic(es, df) time.sleep(1) rec = es.get(index = INDEX_NAME, id = 't1', doc_type = 'doc') self.assertTrue("attr1" in rec['_source']) self.assertTrue("attr2" not in rec['_source']) self.assertTrue("attr3" not in rec['_source']) finally: pass
def send_to_ttp(district, today, ttp_ip, ttp_port, source, doc_type): """ Sends the districts newly produced aggregation to the TTP. Returns the index used to store the documents """ index = '{0}-aggrevents-{1}'.format(district, today) userlog.info('Sending the aggregated events to TTP\'s ES instance') userlog.info('TTP ip:port : {0}:{1}'.format(ttp_ip, ttp_port)) userlog.info('Index: {0} \t doc_type: {1} \t '.format(index, doc_type)) ttp_es = ES(hosts=ttp_ip, port=ttp_port, timeout=30) ttp_es.index(index=index, doc_type=doc_type, body=source) return index
def similar_action_no(actions, state_key, side, sqlite_cursor): # decomp key decomp_state_key = KoreanChess.decompress_state_key(state_key) # full text search for similar state key on elasticsearch es = ES('52.79.135.2:80') result = es.search('i_irelia_state', 't_blue_state' if side is 'b' else 't_red_state', { "query": {"match": { "state": decomp_state_key}} }) if not result or result['_shards']['failed'] > 0: return random.randint(0, len(actions) - 1) actions_map = {} for i, act in enumerate(actions): actions_map[KoreanChess.build_action_key(act)] = i for item in result['hits']['hits']: similar_state = KoreanChess.compress_state_key(item['_source']['state']) sqlite_cursor.execute( "SELECT quality_json FROM t_quality WHERE state_key='" + KoreanChess.compress_state_key( similar_state) + "'") q_json = sqlite_cursor.fetchone() if not q_json or q_json[0] == '0': continue similar_state_map = KoreanChess.convert_state_map(similar_state) if side == 'r': similar_state_map = KoreanChess.reverse_state_map(similar_state_map) similar_state_actions = KoreanChess.get_actions(similar_state_map, side) q_values = json.loads(q_json[0]) q_values = sorted(q_values.items(), key=lambda x: (-x[1], x[0])) for q_value_tuple in q_values: # get action no action_no = int(q_value_tuple[0]) q_value = q_value_tuple[1] if q_value <= 0: break sim_action = similar_state_actions[action_no] sim_action_key = KoreanChess.build_action_key(sim_action) if sim_action_key in actions_map: return actions_map[sim_action_key] return random.randint(0, len(actions) - 1)
def searchInES(des, index, keyname, resNum): esDriver = ES([{"host":"localhost","port":9200}]) #query body queryBody = { "query":{ "term":{ keyname:des } } } print(queryBody) #search result = esDriver.search(index = index, doc_type="_doc", body = queryBody, size = resNum) print(result) return result['hits']['hits']
def search_entity(q, search_size=200): from itertools import product from elasticsearch import Elasticsearch as ES es = ES('localhost:9200') result = es.search(q='sentence:({})'.format(q), size=search_size) result = result['hits']['hits'] companies = [] for hit in result: hits = hit['_source']['entity'] score = hit['_score'] companies.extend(product(hits, [score / len(hits)])) # companies.extend(hits) return companies
def hodnotenie(request, number, string, gte, lte): query = {"_source": ["title", "rating", "creators.Režie:", "_score", "content.plot"], "query": {"bool": {"must": [{"match": {"titles": string}}, {"range": {"rating": {"gte": gte, "lte": lte}}}]}}, "size": 10, "from": int(number) * 10 } es = ES() result_es = es.search(index='csfd', doc_type='movie', body=query)["hits"] pages = int(result_es['total']) // 10 if pages > 10: pages = 10 response = [x for x in result_es["hits"]] return render(request, 'search.html', {'movies': [Model(i, 0) for i in response], 'pages': [ 'search@strana-' + str(x) + '@' + string + '@hodnotenie@' + str(gte) + '@' + str(lte) for x in range(pages + 1)]})
def __init__(self): #可修改:需要从哪个索引中查找 self._index="qa_data" #可修改,es服务器设置 self.es=ES([{"host":"127.0.0.1","port":9200}]) #寻找的文档的属性 self.doc_type="qa" #设定句子向量空间的维数 self.embedSize=300 #匹配的分数控制 self.min_score=0.4 self.min_sim=0.4
def searchInEs(des, tar_index, tar_docType, keyName, num_res): esDriver = ES([{"host": "localhost", "port": 9200}]) #构造请求结构 query_body = {"query": {"match": {keyName: des}}} #在es中搜索 #注意 doc_type已被强制锁定,这是为了适应es7.0没有doctype的设定 这是在deploy版本的代码里才具有的 #但是在函数参数中依然保留,为确保修改的地方最少 print("{+} es搜索函数被调用\n {+} 原始结果如下:") result = esDriver.search(index=tar_index, doc_type="_doc", body=query_body, size=num_res) print(result) return result["hits"]["hits"]
def insert_state_key(state_key, is_red=False): side = 'red' if is_red else 'blue' es = ES('52.79.135.2:80') result = es.search('i_irelia_state', 't_%s_state' % side, { "query": { "constant_score": { "filter": { "term": { "state.keyword": state_key} } } } }) if result and 'hits' in result and result['hits']['total'] > 0: return True result = es.index('i_irelia_state', 't_%s_state' % side, {"state": state_key}) return result and result['created'] == True
def __init__(self): '''初始化设置''' #可修改:定义索引名称 self._index = "news_case" #可修改,但一般不需要,定义es服务器设置 self.es = ES([{"host": "127.0.0.1", "port": 9200}]) #可修改:定义文档类型 self.doc_type = "case" #无需修改,链接mongodb self.MGclient = MG() #可修改,指定数据库名称 self.db = self.MGclient.spider_data #可修改,指定collection的名称 self.collect = self.db.tagged_case
def __init__(self): '''初始化设置''' #可修改:定义索引名称 self._index = "law_data" #可修改,但一般不需要,定义es服务器设置 self.es = ES([{"host": "127.0.0.1", "port": 9200}]) #可修改:定义文档类型 self.doc_type = "line" #无需修改,链接mongodb self.MGclient = MG("mongodb://*****:*****@localhost:27017") #可修改,指定数据库名称 self.db = self.MGclient.spider_data #可修改,指定collection的名称 self.collect = self.db.LAW
def __init__(self): '''初始化设置''' #可修改:定义索引名称 self._index = "baike_data_abstract" #可修改,但一般不需要,定义es服务器设置 self.es = ES([{"host": "localhost", "port": 9200}]) #可修改:定义文档类型 self.doc_type = "knowledge" #无需修改,链接mongodb self.MGclient = MG("mongodb://*****:*****@localhost:27017") #可修改,指定数据库名称 self.db = self.MGclient.spider_data #可修改,指定collection的名称 self.collect = self.db.baidu_baike_BIG
def __init__(self, args): """ Sets up initial configuration :param args: argparse.Namespace object """ # setup node self.es_node = ES([{"host": args.host, "port": args.port}]) self.es_node_url = "http://%s:%d" % (args.host, args.port) # connect to node connected_to_node = self.check_node_connection(self.es_node_url) if not connected_to_node: sys.exit(1) # setup default configuration self.region = args.region self.region_url = self.countries_url + "?pRegion=%s" % args.region self.index_name = "countries_index" self.doc_type = '%s_countries' % self.region
def search(env, message, size, start, q): ent = Event() host = '10.168.169.51' client = ES([{'host': host, 'port': 9200}]) resp = client.search( size=size, from_=start, sort="timestamp", fields=['timestamp', 'full_message', 'source'], body={ 'query': { 'filtered': { 'query': [ { 'query_string': { 'query': '"' + message + '"' } }, ] } }, 'filter': { 'and': [ { 'term': { 'environment': env } }, #指定环境 { 'range': { 'timestamp': { 'from': '2016-11-18 16:00:00.000', 'to': '2016-11-23 16:00:00.000' } } }, #{'regexp':{'full_message':message}},#regexp不支持中文,中文会匹配不到 ] } }) q.put((resp, ent))
def search(request, number, string, avg): if avg == '1': avega = get_average() else: avega = 0 query = {"query": {"function_score": { "query": {"bool": {"must": [{"match": {"titles": string}}, {"range": {"rating": {"gte": avega}}}]}}, "field_value_factor": {"field": "rating", "modifier": "log1p", "factor": 0.1 }}}, "_source": ["title", "rating", "creators.Režie:", "_score", "content.plot"], "size": 10, "from": int(number) * 10} es = ES() result_es = es.search(index='csfd', doc_type='movie', body=query)["hits"] pages = int(result_es['total']) // 10 if pages > 10: pages = 10 response = [x for x in result_es["hits"]] return render(request, 'search.html', {'movies': [Model(i, 0) for i in response], 'pages': ['search@strana-' + str(x) + '@' + string + '@' + avg for x in range(pages + 1)]})
def __init__(self): '''初始化设置''' #可修改:定义索引名称 self._index = "qa_data" #可修改,但一般不需要,定义es服务器设置 self.es = ES([{"host": "localhost", "port": 9200}]) #可修改:定义文档类型 self.doc_type = "qa" #无需修改,链接mongodb self.MGclient = MG("mongodb://*****:*****@localhost:27017") #可修改,指定数据库名称 self.db = self.MGclient.spider_data #self.db.authenticate("reader","reader") #可修改,指定collection的名称 self.collect = self.db.qa_byHand
def __connect(self): '''Private method used to connect to the ElasticSearch instance.''' es = ES(hosts=[{'host': self.host, 'port': self.port}]) # checks if server exists if not es.ping(): err = ('It appears that nothing is running at http://%s:%s' % (self.host, self.port)) raise OSError(err) # load the credentials file (if possible) # with file(self.cred_path) as cf: # username, password = [l.strip() for l in cf.readlines()][:2] # data = json.dumps({'username': username, 'password': password}) url = 'http://%s:%s/login' % (self.host, self.port) resp = json.loads(requests.post(url).text) # if resp['status'] == 200: # self.auth_token = resp['token'] # else: # self.auth_token = '' # checks if index exists try: es.indices.get_mapping(self.index_name) except TransportError as e: if e.args[0] == 403: err = list(e.args) err[1] = ('Credentials not valid for %s:%s/%s' % (self.host, self.port, self.index_name)) e.args = tuple(err) elif e.args[0] == 404: self.__del__() err = list(e.args) err[1] = ('No index named "%s" is avaliable at %s:%s' % (self.index_name, self.host, self.port)) e.args = tuple(err) raise return es