def _xref_item(item, collection_id=None): """Cross-reference an entity or document, given as an indexed document.""" name = item.get('name') or item.get('title') query = entity_query(item, collection_id=collection_id) if 'match_none' in query: return query = { 'query': query, 'size': 10, '_source': ['collection_id', 'name'], } result = search_safe(index=entities_index(), body=query) results = result.get('hits').get('hits') entity_id, document_id = None, None if Document.SCHEMA in item.get('schemata'): document_id = item.get('id') else: entity_id = item.get('id') dq = db.session.query(Match) dq = dq.filter(Match.entity_id == entity_id) dq = dq.filter(Match.document_id == document_id) if collection_id is not None: dq = dq.filter(Match.match_collection_id == collection_id) dq.delete() for result in results: source = result.get('_source', {}) log.info("Xref [%.1f]: %s <=> %s", result.get('_score'), name, source.get('name')) obj = Match() obj.entity_id = entity_id obj.document_id = document_id obj.collection_id = item.get('collection_id') obj.match_id = result.get('_id') obj.match_collection_id = source.get('collection_id') obj.score = result.get('_score') db.session.add(obj) db.session.commit()
def xref_item(item, collection_id=None): """Cross-reference an entity or document, given as an indexed document.""" name = item.get('name') or item.get('title') result = es.search(index=es_index, doc_type=TYPE_ENTITY, body={ 'query': entity_query(item, collection_id), 'size': 10, '_source': ['collection_id', 'name'], }) results = result.get('hits').get('hits') entity_id, document_id = None, None if item.get('$type') == TYPE_DOCUMENT: document_id = item.get('id') else: entity_id = item.get('id') dq = db.session.query(Match) dq = dq.filter(Match.entity_id == entity_id) dq = dq.filter(Match.document_id == document_id) if collection_id is not None: dq = dq.filter(Match.match_collection_id == collection_id) dq.delete() for result in results: source = result.get('_source', {}) log.info("Xref [%.1f]: %s <=> %s", result.get('_score'), name, source.get('name')) obj = Match() obj.entity_id = entity_id obj.document_id = document_id obj.collection_id = item.get('collection_id') obj.match_id = result.get('_id') obj.match_collection_id = source.get('collection_id') obj.score = result.get('_score') db.session.add(obj) db.session.commit()
def get_query(self): query = super(SimilarEntitiesQuery, self).get_query() return entity_query(self.entity, query=query)