def test_delete_article_index(app): """ Test for delete Article index """ with app.app_context(): db.create_all() new_article = ArticleModel( title="ES title", tags=[], categories=[], unique_id="unique_id2", citation="citation", cfr40_part280="cfr40_part280", legal_language="en", ) db.session.add(new_article) db.session.commit() db.session.delete(new_article) db.session.commit() resp = es.search(index=ARTICLE_INDEX, body={"query": { "term": { "title": "ES title" } }}) assert not resp["hits"]["total"]["value"]
def get_locations_by_name(location_string, region_id): """ Liefert Location-Einträge für einen Namen zurück. """ query_parts = [] for location_string in location_string.replace(',', '').split(): query_parts.append({ 'multi_match': { 'fields': ['name', 'bodyName', 'postalcode'], 'type': 'phrase_prefix', 'query': location_string } }) query_parts.append({ 'terms': { 'bodyId': app.config['regions'][region_id]['body'], 'minimum_should_match': 1 } }) print query_parts result = es.search( index = app.config['es_location_index'] + '-latest', doc_type = 'street', fields = 'name,bodyName,postalcode,point', body = { 'query': { 'bool': { 'must': query_parts } } }, size = 10 ) locations = [] if result['hits']['total']: for location in result['hits']['hits']: tmp_location = { 'name': location['fields']['name'][0], 'bodyName': location['fields']['bodyName'][0], 'point': location['fields']['point'][0] } if 'postalcode' in location['fields']: tmp_location['postalcode'] = location['fields']['postalcode'][0] locations.append(tmp_location) return locations
def get_locations_by_name(location_string, region_id): """ Liefert Location-Einträge für einen Namen zurück. """ query_parts = [] for location_string in location_string.replace(',', '').split(): query_parts.append({ 'multi_match': { 'fields': ['name', 'bodyName', 'postalcode'], 'type': 'phrase_prefix', 'query': location_string } }) query_parts.append({ 'terms': { 'bodyId': app.config['regions'][region_id]['body'], 'minimum_should_match': 1 } }) result = es.search( index = app.config['es_location_index'] + '-latest', doc_type = 'street', fields = 'name,bodyName,postalcode,point', body = { 'query': { 'bool': { 'must': query_parts } } }, size = 10 ) locations = [] if result['hits']['total']: for location in result['hits']['hits']: tmp_location = { 'name': location['fields']['name'][0], 'bodyName': location['fields']['bodyName'][0], 'point': location['fields']['point'][0] } if 'postalcode' in location['fields']: tmp_location['postalcode'] = location['fields']['postalcode'][0] locations.append(tmp_location) return locations
def query_paper_num(region_id, q): result = es.search( index = app.config['es_paper_index'] + '-latest', doc_type = 'paper', fields = 'name,publishedDate', body = { 'query': { 'bool': { 'must': [ { 'multi_match': { 'fields': ['file.fulltext', 'file.name', 'name'], 'type': 'phrase', 'query': q } }, { 'terms': { 'bodyId': app.config['regions'][region_id]['body'], 'minimum_should_match': 1 } } ] } } }, size = 1, sort = 'publishedDate:desc' ) if result['hits']['total']: return { 'num': result['hits']['total'], 'name': result['hits']['hits'][0]['fields']['name'][0], 'publishedDate': result['hits']['hits'][0]['fields']['publishedDate'][0] if 'publishedDate' in result['hits']['hits'][0]['fields'] else None } else: return { 'num': result['hits']['total'] }
def region_search(): start_time = time.time() result = [] search_string = request.args.get('q', False) # generate fulltext search string if not search_string: search_results = [] else: search_string = search_string.split() search_string_to_complete = search_string[-1] query_parts = [] query_parts.append({ 'match_phrase_prefix': { 'name': search_string_to_complete.lower() } }) if len(search_string[0:-1]): query_parts.append({ 'query_string': { 'fields': ['name'], 'query': " ".join(search_string[0:-1]), 'default_operator': 'and' } }) try: result = es.search( index = "%s-latest" % app.config['REGION_ES'], doc_type = 'regions', fields = 'name,slug,postalcode,location', body = { 'query': { 'bool': { 'must': query_parts } }, 'aggs': { 'fragment': { 'terms': { 'field': 'name', 'include': { 'pattern': search_string_to_complete.lower() + '.*', 'flags': 'CANON_EQ|CASE_INSENSITIVE', }, 'min_doc_count': 0, 'size': 10 } } } }, size = 10 ) except elasticsearch.NotFoundError: abort(403) search_results = [] for dataset in result['hits']['hits']: tmp_search_result = { 'name': dataset['fields']['name'][0], 'postalcode': dataset['fields']['postalcode'][0] if len(dataset['fields']['postalcode']) else None, 'slug': dataset['fields']['slug'][0] } search_results.append(tmp_search_result) ret = { 'status': 0, 'duration': round((time.time() - start_time) * 1000), 'response': search_results } json_output = json.dumps(ret, cls=util.MyEncoder, sort_keys=True) response = make_response(json_output, 200) response.mimetype = 'application/json' response.headers['Expires'] = util.expires_date(hours=24) response.headers['Cache-Control'] = util.cache_max_age(hours=24) return(response)
def search_traffic_items_es(): start_time = time.time() limits = request.form.get('l', None) traffic_item_type = request.form.get('traffic_item_type', None) construction_site_date = request.form.get('date', None) occupancy_rate = request.form.get('occupancy_rate', None) zoom = request.form.get('zoom', None) saved_request = { 'limits': limits, 'traffic_item_type': traffic_item_type, 'construction_site_date': construction_site_date, 'occupancy_rate': occupancy_rate, 'zoom': zoom } if limits: limits = limits.split(';') query_parts_must = [] query_parts_should = [] if traffic_item_type: traffic_item_type = traffic_item_type.split(',') query_parts_must.append({ 'terms': { 'traffic_item_type': traffic_item_type } }) if '1' in traffic_item_type: query_parts_should.append( { 'bool': { 'must': [ { 'range': { 'start': { 'lte': construction_site_date } } }, { 'range': { 'end': { 'gte': construction_site_date } } }, { 'term': { 'traffic_item_type': 1 } } ] } } ) if '2' in traffic_item_type: query_parts_should.append( { 'bool': { 'must': [ #{ # 'range': { # 'occupancy_rate': { # 'gte': occupancy_rate # } # } #}, { 'term': { 'traffic_item_type': 2 } } ] } } ) if limits: limit_queries = {} for limit in limits: if limit.find('<=') >= 0: limit_split = limit.split('<=') if (limit_split[0] not in limit_queries): limit_queries[limit_split[0]] = {} limit_queries[limit_split[0]]['lte'] = limit_split[1] elif limit.find('>=') >= 0: limit_split = limit.split('>=') if (limit_split[0] not in limit_queries): limit_queries[limit_split[0]] = {} limit_queries[limit_split[0]]['gte'] = limit_split[1] elif limit.find('>') >= 0: limit_split = limit.split('>') if (limit_split[0] not in limit_queries): limit_queries[limit_split[0]] = {} limit_queries[limit_split[0]]['lt'] = limit_split[1] elif limit.find('<') >= 0: limit_split = limit.split('<') if (limit_split[0] not in limit_queries): limit_queries[limit_split[0]] = {} limit_queries[limit_split[0]]['lt'] = limit_split[1] for limit_query_key, limit_query_value in limit_queries.iteritems(): query_parts_must.append({ 'range': { limit_query_key: limit_query_value } }) query = { 'query': { 'constant_score': { 'filter': { 'bool': { 'must': [{"match_all": {}}] + query_parts_must, 'should': query_parts_should } } } } } es_result = es.search( index = app.config['TRAFFIC_ITEMS_ES'] + '-latest', doc_type = 'traffic_item', fields = 'id,location.lat,location.lon,traffic_item_type,area,start,end,occupancy_rate', body = query, size = 10000 ) result = [] for single in es_result['hits']['hits']: item = { 'id': single['fields']['id'][0], 'lat': single['fields']['location.lat'][0], 'lon': single['fields']['location.lon'][0], 'type': single['fields']['traffic_item_type'][0] } if 'area' in single['fields']: item['area'] = json.loads(single['fields']['area'][0]) if 'start' in single['fields']: item['start'] = single['fields']['start'][0] if 'end' in single['fields']: item['end'] = single['fields']['end'][0] if 'occupancy_rate' in single['fields']: item['occupancy_rate'] = single['fields']['occupancy_rate'][0] result.append(item) ret = { 'status': 0, 'request': saved_request, 'duration': round((time.time() - start_time) * 1000), 'response': result } json_output = json.dumps(ret, cls=util.MyEncoder, sort_keys=True) response = make_response(json_output, 200) response.mimetype = 'application/json' response.headers['Expires'] = util.expires_date(hours=24) response.headers['Cache-Control'] = util.cache_max_age(hours=24) return(response)
def search_sharing_stations(): start_time = time.time() fq = request.form.get('fq', '') limits = request.form.get('l', None) vehicle_all = request.form.get('vehicle_all', None) if vehicle_all == "0,20": vehicle_all = None vehicle_type = request.form.get('vehicle_type', None) if vehicle_type == '' or vehicle_type == '1,2,3,4,5': vehicle_type = None sort = request.form.get('sort', 'name.sort:asc') start = int(request.form.get('start', '0')) per_page = int(request.form.get('pp', '50')) view_type = request.form.get('vt', 's') saved_request = {'sort': sort, 'start': start, 'per_page': per_page} if fq: saved_request['fq'] = fq if limits: limits = limits.split(';') (sort_field, sort_order) = sort.split(':') if sort_field == 'score': sort_field = '_score' sort = {sort_field: {'order': sort_order}} query_parts_must = [] query_parts_filter = [] query_parts_should = [] # all_count if vehicle_all: vehicle_all = vehicle_all.split(',') query_parts_must.append({ 'range': { 'vehicle_all': { 'gte': vehicle_all[0], 'lte': 64 if vehicle_all[1] == '20' else vehicle_all[1] } } }) # vehicle_type if vehicle_type: vehicle_type = vehicle_type.split(',') query_parts_filter.append({'terms': {'station_type': vehicle_type}}) if limits: limit_queries = {} for limit in limits: if limit.find('<=') >= 0: limit_split = limit.split('<=') if (limit_split[0] not in limit_queries): limit_queries[limit_split[0]] = {} limit_queries[limit_split[0]]['lte'] = limit_split[1] elif limit.find('>=') >= 0: limit_split = limit.split('>=') if (limit_split[0] not in limit_queries): limit_queries[limit_split[0]] = {} limit_queries[limit_split[0]]['gte'] = limit_split[1] elif limit.find('>') >= 0: limit_split = limit.split('>') if (limit_split[0] not in limit_queries): limit_queries[limit_split[0]] = {} limit_queries[limit_split[0]]['lt'] = limit_split[1] elif limit.find('<') >= 0: limit_split = limit.split('<') if (limit_split[0] not in limit_queries): limit_queries[limit_split[0]] = {} limit_queries[limit_split[0]]['lt'] = limit_split[1] for limit_query_key, limit_query_value in limit_queries.iteritems(): query_parts_must.append( {'range': { limit_query_key: limit_query_value }}) query = { 'query': { 'bool': { 'must': [{ "match_all": {} }] + query_parts_must, 'filter': query_parts_filter } } } if view_type == 's': fields = 'name,station_type,vehicle_all,location.lat,location.lon,sharing_provider.name,sharing_provider.slug' es_result = es.search(index=app.config['SHARING_STATION_ES'] + '-latest', doc_type='sharing_station', fields=fields, body=query, from_=start, size=per_page, sort=sort_field + ':' + sort_order) result = [] for single in es_result['hits']['hits']: item = { 'name': single['fields']['name'][0], 'lat': single['fields']['location.lat'][0], 'lon': single['fields']['location.lon'][0], 'station_type': single['fields']['station_type'][0], 'sharing_provider_slug': single['fields']['sharing_provider.slug'][0], 'sharing_provider_name': single['fields']['sharing_provider.name'][0] } if 'vehicle_all' in single['fields']: item['vehicle_all'] = single['fields']['vehicle_all'][0] result.append(item) ret = { 'status': 0, 'request': saved_request, 'duration': round((time.time() - start_time) * 1000), 'response': result } json_output = json.dumps(ret, cls=util.MyEncoder, sort_keys=True) response = make_response(json_output, 200) response.mimetype = 'application/json' response.headers['Expires'] = util.expires_date(hours=24) response.headers['Cache-Control'] = util.cache_max_age(hours=24) return (response)
def get_papers_live(search_string, region_id): search_string = search_string.split() if not len(search_string): return [] search_string_to_complete = search_string[-1] query_parts = [] query_parts.append({ 'match_phrase_prefix': { 'text_all': search_string_to_complete } }) query_parts.append({ 'terms': { 'bodyId': app.config['regions'][region_id]['body'], 'minimum_should_match': 1 } }) if len(search_string[0:-1]): query_parts.append({ 'query_string': { 'fields': ['text_all'], 'query': " ".join(search_string[0:-1]), 'default_operator': 'and' } }) print query_parts result = es.search( index = app.config['es_paper_index'] + '-latest', doc_type = 'paper', fields = 'name', body = { 'query': { 'bool': { 'must': query_parts } }, 'aggs': { 'fragment': { 'terms': { 'field': 'text_all', 'include': { 'pattern': search_string_to_complete + '.*', 'flags': 'CANON_EQ|CASE_INSENSITIVE' }, 'size': 10 } } } }, size = 0 ) search_results = [] prefix = "" if len(search_string[0:-1]): prefix = " ".join(search_string[0:-1]) + " " for search_result in result['aggregations']['fragment']['buckets']: tmp_search_result = { 'name': prefix + search_result['key'].capitalize(), 'count' : search_result['doc_count'] } search_results.append(tmp_search_result) return search_results
def query_paper(region=None, q='', fq=None, sort='score desc', start=0, papers_per_page=10, facets=None): (sort_field, sort_order) = sort.split(':') if sort_field == 'score': sort_field = '_score' sort = {sort_field: {'order': sort_order}} rest = True x = 0 result = [] while rest: y = fq.find(":", x) if y == -1: break temp = fq[x:y] x = y + 1 if fq[x:x+5] == """: y = fq.find(""", x+5) if y == -1: break result.append((temp, fq[x+5:y])) x = y + 6 if x > len(fq): break else: y = fq.find(";", x) if y == -1: result.append((temp, fq[x:len(fq)])) break else: result.append((temp, fq[x:y])) x = y + 1 facet_terms = [] for sfq in result: if sfq[0] == 'publishedDate': (year, month) = sfq[1].split('-') date_start = datetime.datetime(int(year), int(month), 1) date_end = date_start + dateutil.relativedelta.relativedelta(months=+1,seconds=-1) facet_terms.append({ 'range': { 'publishedDate': { 'gt': date_start.isoformat('T'), 'lt': date_end.isoformat('T') } } }) else: facet_terms.append({ 'term': { sfq[0]: sfq[1] } }) if region: facet_terms.append({ 'terms': { 'bodyId': app.config['regions'][region]['body'], 'minimum_should_match': 1 } }) # Let's see if there are some " "s in our search string matches = re.findall(""(.*?)"", q, re.DOTALL) match_query = [] for match in matches: if match.strip(): match_query.append({ 'multi_match': { 'fields': ['file.fulltext', 'file.name', 'name'], 'type': 'phrase', 'query': match.strip() } }) q = q.replace(""" + match + """, "") q = q.replace(""", "").strip() if q: simple_query = [{ 'query_string': { 'fields': ['file.fulltext', 'file.name', 'name'], 'query': q, 'default_operator': 'and' } }] else: simple_query = [] query = { 'query': { 'bool': { 'must': simple_query + match_query + facet_terms } }, 'highlight': { 'pre_tags' : ['<strong>'], 'post_tags' : ['</strong>'], 'fields': { 'file.fulltext': { 'fragment_size': 200, 'number_of_fragments': 1 } } }, 'aggs': { 'publishedDate': { 'date_histogram': { 'field': 'publishedDate', 'interval': 'month' } }, 'paperType': { 'terms': { 'field': 'paperType' } }, 'bodyName': { 'terms': { 'field': 'bodyName' } } }, } result = es.search( index = app.config['es_paper_index'] + '-latest', doc_type = 'paper', fields = 'name,paperType,publishedDate,bodyId,bodyName,externalId,file.fulltext', body = query, from_ = start, size = 10, sort = sort_field + ':' + sort_order ) ret = { 'numhits': result['hits']['total'], 'maxscore': result['hits']['max_score'], 'result': [], 'facets': {} } for r in result['hits']['hits']: ret['result'].append({ 'id': r['_id'], 'score': r['_score'], 'bodyId': r['fields']['bodyId'][0], 'bodyName': r['fields']['bodyName'][0], 'name': r['fields']['name'][0] if 'name' in r['fields'] else '', 'paperType': r['fields']['paperType'][0] if 'paperType' in r['fields'] else '', 'publishedDate': r['fields']['publishedDate'][0] if 'publishedDate' in r['fields'] else '', 'fileFulltext': r['highlight']['file.fulltext'][0].strip() if 'highlight' in r else None }) if result['hits']['max_score'] is not None: ret['maxscore'] = result['hits']['max_score'] for key in result['aggregations']: ret['facets'][key] = {} if key == 'publishedDate': for subval in result['aggregations'][key]['buckets']: ret['facets'][key][datetime.datetime.fromtimestamp(int(subval['key'])/1000).strftime('%Y-%m')] = subval['doc_count'] if key in ['paperType', 'bodyName']: for subval in result['aggregations'][key]['buckets']: ret['facets'][key][subval['key']] = subval['doc_count'] return ret
def get_papers_live(search_string, region_id): search_string = search_string.split() if not len(search_string): return [] search_string_to_complete = search_string[-1] query_parts = [] query_parts.append({ 'match_phrase_prefix': { 'text_all': search_string_to_complete } }) query_parts.append({ 'terms': { 'bodyId': app.config['regions'][region_id]['body'], 'minimum_should_match': 1 } }) if len(search_string[0:-1]): query_parts.append({ 'query_string': { 'fields': ['text_all'], 'query': " ".join(search_string[0:-1]), 'default_operator': 'and' } }) result = es.search( index = app.config['es_paper_index'] + '-latest', doc_type = 'paper', fields = 'name', body = { 'query': { 'bool': { 'must': query_parts } }, 'aggs': { 'fragment': { 'terms': { 'field': 'text_all', 'include': { 'pattern': search_string_to_complete + '.*', 'flags': 'CANON_EQ|CASE_INSENSITIVE' }, 'size': 10 } } } }, size = 0 ) search_results = [] prefix = "" if len(search_string[0:-1]): prefix = " ".join(search_string[0:-1]) + " " for search_result in result['aggregations']['fragment']['buckets']: tmp_search_result = { 'name': prefix + search_result['key'].capitalize(), 'count' : search_result['doc_count'] } search_results.append(tmp_search_result) return search_results