示例#1
0
def query(data):

    tangelo.log('domaindive:query')
    tangelo.log(data)

    # set default es options
    host = conf.get_es_host()
    port = conf.get_es_port()
    indd = conf.get_es_index()
    url = '%s:%s'%(host,port)
    max_results_per_node = conf.get_es_mrpn()
    cred = conf.get_es_cred()
    protocol = 'https'

    # override defaults if supplied with query
    if 'protocol' in data and data['protocol'] in ['http','https']:
        protocol = data['protocol']
        if protocol != 'https': cred = None
    if 'url' in data and data['url'] is not None and len(data['url']) > 0:
        url = data['url']
        cred = None  # if the url changed reset the credentials
    if 'mrpn' in data and data['mrpn'] is not None: max_results_per_node = data['mrpn']
    if 'index' in data and data['index'] is not None and len(data['index']) >0: indd = data['index']
    if 'credentials' in data and data['credentials'] is not None and len(data['credentials']) >0: cred = data['credentials']


    tangelo.log('using elastic serach instance: '+protocol+"://"+url)

    search_terms = data['search_terms']
    es = None
    if cred is not None and len(cred) > 0:
        es = Elasticsearch([protocol+'://' + cred + '@' + url])
    else:
        es = Elasticsearch([url])

    ind = indd
    rr = []
    num = 0
    for t in search_terms:
        types = {'selection','phone','email','person','organization','misc'}
        if t['type'].lower() in types:
            num_to_search = t['id']
            if t['type'] == 'selection':
                num_to_search = t['data']
            #results = es.search(index=ind,body={"size":max_results_per_node,"fields":["_index","_type","_id"],"query":{"match_phrase": {"_all": num_to_search}}})
            results = es.search(index=ind,body={"size":max_results_per_node,"query":{"match_phrase": {"_all": num_to_search}}})
            num += results['hits']['total']
            for hit in results['hits']['hits']:
                tangelo.log(hit)
                rr.append({'nid':t['id'],'search_term':num_to_search,'eid':hit['_id'],'itype':hit['_type'],'jindex':ind,'url':url,'_source':hit['_source']})

    result = dict(num=num,hits=rr)
    tangelo.log(result)
    return json.dumps(result)
示例#2
0
def indices():
    host = conf.get_es_host()
    port = conf.get_es_port()
    url = '%s:%s' % (host, port)
    cred = conf.get_es_cred()

    protocol = 'https'
    if cred is not None and len(cred) > 0:
        es = Elasticsearch([protocol + '://' + cred + '@' + url])
    else:
        es = Elasticsearch([url])
    indices = Elasticsearch.cat.indices(h='i')
    return json.dumps([x.strip() for x in indices.split('\n')])
示例#3
0
def indices():
    host = conf.get_es_host()
    port = conf.get_es_port()
    url = '%s:%s'%(host,port)
    cred = conf.get_es_cred()

    protocol = 'https'
    if cred is not None and len(cred) > 0:
        es = Elasticsearch([protocol+'://' + cred + '@' + url])
    else:
        es = Elasticsearch([url])
    indices = Elasticsearch.cat.indices(h='i')
    return json.dumps([x.strip() for x in indices.split('\n')])
示例#4
0
def query(data):

    tangelo.log('domaindive:query')
    tangelo.log(data)

    # set default es options
    host = conf.get_es_host()
    port = conf.get_es_port()
    indd = conf.get_es_index()
    url = '%s:%s' % (host, port)
    max_results_per_node = conf.get_es_mrpn()
    cred = conf.get_es_cred()
    protocol = 'https'

    # override defaults if supplied with query
    if 'protocol' in data and data['protocol'] in ['http', 'https']:
        protocol = data['protocol']
        if protocol != 'https': cred = None
    if 'url' in data and data['url'] is not None and len(data['url']) > 0:
        url = data['url']
        cred = None  # if the url changed reset the credentials
    if 'mrpn' in data and data['mrpn'] is not None:
        max_results_per_node = data['mrpn']
    if 'index' in data and data['index'] is not None and len(
            data['index']) > 0:
        indd = data['index']
    if 'credentials' in data and data['credentials'] is not None and len(
            data['credentials']) > 0:
        cred = data['credentials']

    tangelo.log('using elastic serach instance: ' + protocol + "://" + url)

    search_terms = data['search_terms']
    es = None
    if cred is not None and len(cred) > 0:
        es = Elasticsearch([protocol + '://' + cred + '@' + url])
    else:
        es = Elasticsearch([url])

    ind = indd
    rr = []
    num = 0
    for t in search_terms:
        types = {
            'selection', 'phone', 'email', 'person', 'organization', 'misc'
        }
        if t['type'].lower() in types:
            num_to_search = t['id']
            if t['type'] == 'selection':
                num_to_search = t['data']
            #results = es.search(index=ind,body={"size":max_results_per_node,"fields":["_index","_type","_id"],"query":{"match_phrase": {"_all": num_to_search}}})
            results = es.search(index=ind,
                                body={
                                    "size": max_results_per_node,
                                    "query": {
                                        "match_phrase": {
                                            "_all": num_to_search
                                        }
                                    }
                                })
            num += results['hits']['total']
            for hit in results['hits']['hits']:
                tangelo.log(hit)
                rr.append({
                    'nid': t['id'],
                    'search_term': num_to_search,
                    'eid': hit['_id'],
                    'itype': hit['_type'],
                    'jindex': ind,
                    'url': url,
                    '_source': hit['_source']
                })

    result = dict(num=num, hits=rr)
    tangelo.log(result)
    return json.dumps(result)