def do_search(search_object, params={}, size=100, start=0, index=primary_index, fields=None): search_object.size = size search_object.start = start if fields: search_object.fields = fields try: sort = params.get('sort', None) if 'sort' in params: docs = conn.search(query=search_object, indexes=[index], sort=sort) else: docs = conn.search(query=search_object, indexes=[index]) except pyes.exceptions.SearchPhaseExecutionException as e: return [], None udocs = [] for doc in docs: data = {} data['_id'] = doc.get_id() data['_type'] = doc.get_meta()['type'] for k in doc: data[k] = doc[k] data = explode_doc(data) udocs.append(data) return udocs, docs.facets
def export_full_library_addresses(request): q = pyes.query.TermQuery('organisation_type', 'library') conn.default_indices=['documents'] parents = conn.search(q, indices=settings.INDEX_NAME, doc_types=['organisation'], sort='name_fi') rows = [] for p in parents: cq = pyes.query.TermQuery('parent_organisation', p.get_id()) kids = conn.search(cq, indices=settings.INDEX_NAME, doc_types=['organisation'], sort='name_fi') rows.append(export.extract_address_row(p)) rows.append(export.extract_alternate_address_row(p)) rows.append({'br': ''}) for k in kids: rows.append(export.extract_address_row(k)) rows.append(export.extract_alternate_address_row(k)) rows.append({'br': ''}) # Filter errorneous rows rows = filter(None, rows) response = HttpResponse(export.dump_csv(rows)) response['content-type'] = 'text/csv' response['Content-Disposition'] = 'attachment; filename=kirjastojen_osoitteet_full.csv' return response
def export_staff(request): q = pyes.query.MatchAllQuery() staff = conn.search(q, indices=settings.INDEX_NAME, doc_types=['person'], sort='last_name') organisations = conn.search(q, indices=settings.INDEX_NAME, doc_types=['organisation'], sort='name_fi') labels = OrderedDict() labels[""] = "" labels["name"] = "Nimi" labels["email"] = "Email" labels["title"] = "Nimike" labels["responsibility"] = "Vastuualue" labels["head"] = "Johtaja" rows = [labels] rows.append({'br': ''}) used = [] cache = OrderedDict() for row in staff: oid = row['organisation'] if oid not in cache: cache[oid] = [] cache[oid].append(row) #print("Staff: {0}".format(len(staff))) #print("Libs: {0}".format(len(organisations))) for organisation in organisations: oid = organisation._meta.id if oid not in cache: continue if oid not in used: used.append(oid) rows.append({'name': organisation['name_fi']}) for row in cache[oid]: rows.append(export.extract_staff_row(row)) rows.append({'br': ''}) response = HttpResponse(export.dump_csv(rows)) response['content-type'] = 'text/csv' response['Content-Disposition'] = 'attachment; filename=kirjastojen_henkilosto.csv' return response
def search_family_tree(indexes=[primary_index], fields=FAMILY_FIELDS, user=None): query = authenticated_docs_query(user) query = pyes.Search(query=query, fields=fields) docs = conn.search(query=query, size=10000, indexes=indexes) processed_docs = [] for doc in docs: data = {} data['_id'] = doc.get_id() data['_type'] = doc.get_meta()['type'] for k in doc: data[k] = doc[k] data = explode_doc(data) processed_docs.append(data) return family_tree(processed_docs)
def get_choices_from_es(fields, query): """ Hakee pyesistä annetulla kyselyllä dokumentit ja tekee annetuista kentistä listojen listan choices-pudotusvalikkoa varten. """ #todo:get rid of fixed size limit s = pyes.Search(query=pyes.TermQuery(*query), size=500) resp = conn.search(query=s, indexes=[primary_index])['hits'].get('hits') choices = [] for r in resp: if fields[0] == '_id': choices.append((r['_id'], u' '.join([r['_source'].get(key, "") for key in fields[1:]]))) else: choices.append((r['_source'].get(fields[0], "no-field"), r['_source'].get(fields[1], "not found"),)) return [['', _('Valitse arvo')]] + sorted(choices, key=lambda x: x[1].lower())
def autocomplete(fields, term, doctype): """ Does term wildcard searches for given field and doctype for simple autocomplete Only uses wildcard search for the last term, and combines the previous terms using boolean 'must' """ terms = term.split() last_term = terms.pop() queries = [] for t in terms: queries = [pyes.TermQuery(field, t) for field in fields] queries.extend([pyes.WildcardQuery(field, last_term + "*") for field in fields]) query = pyes.BoolQuery(must=queries) docs = conn.search(query=query, doc_types=[doctype], indexes=[primary_index]) return docs
def hours_view(request, querytype): eid = qfilter = None if not 'time' in request.GET or ( not 'doc_id' in request.GET and not 'filter' in request.GET): return HttpResponseBadRequest( "Required parameter: doc_id or filter and time.") if 'doc_id' in request.GET: eid = request.GET['doc_id'] elif 'filter' in request.GET: qfilter = request.GET.getlist('filter') time = request.GET['time'] # todo: parse time if time == 'now': today = datetime.datetime.now() else: today = parse_time(time) if today == None: return HttpResponseBadRequest("Time format should be YYYY-MM-DD.") if querytype != 'week': # only weekly query implemented for now return HttpResponseBadRequest("Query type must be 'week'") js_docs = [] if eid: # single document by id try: js_doc = conn.get(primary_index, 'organisation', eid) js_doc = es_tools.convert_to_legacy_format(js_doc) js_doc['_source']['_id'] = js_doc['_id'] js_doc['_source']['_type'] = js_doc['_type'] js_docs.append(js_doc['_source']) except ElasticSearchException as e: if e.result['exists'] == False: raise Http404, "Elastic Search can't find requested document." else: return HttpResponseServerError( "Elastic Search: error executing query.") elif qfilter: # multiple documents by search filter strings = parse_filters(qfilter) string_queries = [pyes.query.StringQuery(value, default_field = field) for field,value in strings] if len(string_queries) == 1: query = string_queries[0] else: query = pyes.query.BoolQuery(must = string_queries) try: results = conn.search(query, size = 1000, indexes = [primary_index], doc_types = ['organisation']) except ElasticSearchException as e: return HttpResponseServerError( "Elastic search: error executing query.") for d in results['hits']['hits']: d['_source']['_id'] = d['_id'] d['_source']['_type'] = d['_type'] js_docs.append(d['_source']) my_results = [] for doc in js_docs: periods = doc.get ('period', '') lib_name = doc.get ('name_fi', '') lib_id = doc.get ('_id') relevant_periods = find_periods_this_week(periods, today) aggregated_weekdays = aggregate_weekdays(relevant_periods, today) my_results.append({'id': lib_id, 'name_fi' : lib_name, 'hours' : aggregated_weekdays}) if my_results: if len(my_results) == 0: raise Http404, "Couldn't find documents." if eid: # the contract is that id-based match returns only 1 doc return HttpResponse(json.dumps(my_results[0])) else: # filtering=searching returns an array return HttpResponse(json.dumps(my_results)) else: raise Http404, "Couldn't find documents."