def query(data): tangelo.log('domaindive:query') tangelo.log(data) # set default es options host = conf.get_es_host() port = conf.get_es_port() indd = conf.get_es_index() url = '%s:%s'%(host,port) max_results_per_node = conf.get_es_mrpn() cred = conf.get_es_cred() protocol = 'https' # override defaults if supplied with query if 'protocol' in data and data['protocol'] in ['http','https']: protocol = data['protocol'] if protocol != 'https': cred = None if 'url' in data and data['url'] is not None and len(data['url']) > 0: url = data['url'] cred = None # if the url changed reset the credentials if 'mrpn' in data and data['mrpn'] is not None: max_results_per_node = data['mrpn'] if 'index' in data and data['index'] is not None and len(data['index']) >0: indd = data['index'] if 'credentials' in data and data['credentials'] is not None and len(data['credentials']) >0: cred = data['credentials'] tangelo.log('using elastic serach instance: '+protocol+"://"+url) search_terms = data['search_terms'] es = None if cred is not None and len(cred) > 0: es = Elasticsearch([protocol+'://' + cred + '@' + url]) else: es = Elasticsearch([url]) ind = indd rr = [] num = 0 for t in search_terms: types = {'selection','phone','email','person','organization','misc'} if t['type'].lower() in types: num_to_search = t['id'] if t['type'] == 'selection': num_to_search = t['data'] #results = es.search(index=ind,body={"size":max_results_per_node,"fields":["_index","_type","_id"],"query":{"match_phrase": {"_all": num_to_search}}}) results = es.search(index=ind,body={"size":max_results_per_node,"query":{"match_phrase": {"_all": num_to_search}}}) num += results['hits']['total'] for hit in results['hits']['hits']: tangelo.log(hit) rr.append({'nid':t['id'],'search_term':num_to_search,'eid':hit['_id'],'itype':hit['_type'],'jindex':ind,'url':url,'_source':hit['_source']}) result = dict(num=num,hits=rr) tangelo.log(result) return json.dumps(result)
def indices(): host = conf.get_es_host() port = conf.get_es_port() url = '%s:%s' % (host, port) cred = conf.get_es_cred() protocol = 'https' if cred is not None and len(cred) > 0: es = Elasticsearch([protocol + '://' + cred + '@' + url]) else: es = Elasticsearch([url]) indices = Elasticsearch.cat.indices(h='i') return json.dumps([x.strip() for x in indices.split('\n')])
def indices(): host = conf.get_es_host() port = conf.get_es_port() url = '%s:%s'%(host,port) cred = conf.get_es_cred() protocol = 'https' if cred is not None and len(cred) > 0: es = Elasticsearch([protocol+'://' + cred + '@' + url]) else: es = Elasticsearch([url]) indices = Elasticsearch.cat.indices(h='i') return json.dumps([x.strip() for x in indices.split('\n')])
def query(data): tangelo.log('domaindive:query') tangelo.log(data) # set default es options host = conf.get_es_host() port = conf.get_es_port() indd = conf.get_es_index() url = '%s:%s' % (host, port) max_results_per_node = conf.get_es_mrpn() cred = conf.get_es_cred() protocol = 'https' # override defaults if supplied with query if 'protocol' in data and data['protocol'] in ['http', 'https']: protocol = data['protocol'] if protocol != 'https': cred = None if 'url' in data and data['url'] is not None and len(data['url']) > 0: url = data['url'] cred = None # if the url changed reset the credentials if 'mrpn' in data and data['mrpn'] is not None: max_results_per_node = data['mrpn'] if 'index' in data and data['index'] is not None and len( data['index']) > 0: indd = data['index'] if 'credentials' in data and data['credentials'] is not None and len( data['credentials']) > 0: cred = data['credentials'] tangelo.log('using elastic serach instance: ' + protocol + "://" + url) search_terms = data['search_terms'] es = None if cred is not None and len(cred) > 0: es = Elasticsearch([protocol + '://' + cred + '@' + url]) else: es = Elasticsearch([url]) ind = indd rr = [] num = 0 for t in search_terms: types = { 'selection', 'phone', 'email', 'person', 'organization', 'misc' } if t['type'].lower() in types: num_to_search = t['id'] if t['type'] == 'selection': num_to_search = t['data'] #results = es.search(index=ind,body={"size":max_results_per_node,"fields":["_index","_type","_id"],"query":{"match_phrase": {"_all": num_to_search}}}) results = es.search(index=ind, body={ "size": max_results_per_node, "query": { "match_phrase": { "_all": num_to_search } } }) num += results['hits']['total'] for hit in results['hits']['hits']: tangelo.log(hit) rr.append({ 'nid': t['id'], 'search_term': num_to_search, 'eid': hit['_id'], 'itype': hit['_type'], 'jindex': ind, 'url': url, '_source': hit['_source'] }) result = dict(num=num, hits=rr) tangelo.log(result) return json.dumps(result)