def entity_documents(entity, state): """Try and find all documents mentioning a particular entity.""" shoulds = [{"term": {"entities.id": entity.get('id')}}] text_queries = [] for name in entity.get('names', []): text_queries.append(phrase_match(name, 'text')) text_queries.append(phrase_match(name, 'text_latin')) shoulds.append(phrase_match(name, 'title')) shoulds.append(phrase_match(name, 'summary')) state.highlight.append(name) for fp in entity.get('fingerprints', []): text_queries.append(phrase_match(fp, 'text')) state.highlight.append(fp) # for ident in entity.get('identifiers', []): # text_queries.append(multi_match(ident, ['text'])) shoulds.append(child_record({"bool": {"should": text_queries}})) # TODO: add in other entity info like phone numbers, addresses, etc. for # ranking. state.raw_query = {"bool": {"should": shoulds, "minimum_should_match": 1}} # pprint(state.raw_query) return documents_query(state)
def text_query(text): """Part of a query which finds a piece of text.""" if text is None or not len(text.strip()): return match_all() return { "bool": { "minimum_should_match": 1, "should": [ meta_query_string(text), child_record({"bool": { "should": [text_query_string(text)] }}) ] } }
def text_query(text): """Part of a query which finds a piece of text.""" if text is None or not len(text.strip()): return match_all() return { "bool": { "minimum_should_match": 1, "should": [ meta_query_string(text), child_record({ "bool": { "should": [text_query_string(text)] } }) ] } }
def text_query(text): """ Construct the part of a query which is responsible for finding a piece of thext in the selected documents. """ if text is None or not len(text.strip()): return match_all() return { "bool": { "minimum_should_match": 1, "should": [ meta_query_string(text), child_record({ "bool": { "should": [text_query_string(text)] } }) ] } }
def analyze_terms(terms, seen=None): if seen is None: seen = set() for term in terms: query = { "bool": { "minimum_should_match": 1, "should": [ meta_query_string(term, literal=True), child_record({ "bool": { "should": [text_query_string(term, literal=True)] } }) ] } } for doc_id in query_doc_ids(query): if doc_id not in seen: analyze_document.delay(doc_id) seen.add(doc_id)
def analyze_terms(terms, seen=None): if seen is None: seen = set() for term in terms: term = normalize_strong(term) query = { "bool": { "minimum_should_match": 1, "should": [ meta_query_string(term), child_record( {"bool": { "should": [text_query_string(term)] }}) ] } } for doc_id in query_doc_ids(query): if doc_id not in seen: analyze_document.delay(doc_id) seen.add(doc_id)