def xref_collection(collection_id): """Cross-reference all the entities and documents in a collection.""" matchable = [s.name for s in model if s.matchable] entities = iter_proxies(collection_id=collection_id, schemata=matchable) for entity in entities: proxy = model.get_proxy(entity) entity_id, document_id = None, None if Document.SCHEMA in proxy.schema.names: document_id = proxy.id else: entity_id = proxy.id dq = db.session.query(Match) dq = dq.filter(Match.entity_id == entity_id) dq = dq.filter(Match.document_id == document_id) dq.delete() matches = xref_item(proxy) for (score, other_id, other) in matches: log.info("Xref [%.1f]: %s <=> %s", score, proxy, other) obj = Match() obj.entity_id = entity_id obj.document_id = document_id obj.collection_id = collection_id obj.match_id = other.id obj.match_collection_id = other_id obj.score = score db.session.add(obj) db.session.commit()
def _xref_item(item, collection_id=None): """Cross-reference an entity or document, given as an indexed document.""" name = item.get('name') or item.get('title') query = entity_query(item, collection_id=collection_id) if 'match_none' in query: return query = { 'query': query, 'size': 10, '_source': ['collection_id', 'name'], } result = search_safe(index=entities_index(), body=query) results = result.get('hits').get('hits') entity_id, document_id = None, None if Document.SCHEMA in item.get('schemata'): document_id = item.get('id') else: entity_id = item.get('id') dq = db.session.query(Match) dq = dq.filter(Match.entity_id == entity_id) dq = dq.filter(Match.document_id == document_id) if collection_id is not None: dq = dq.filter(Match.match_collection_id == collection_id) dq.delete() for result in results: source = result.get('_source', {}) log.info("Xref [%.1f]: %s <=> %s", result.get('_score'), name, source.get('name')) obj = Match() obj.entity_id = entity_id obj.document_id = document_id obj.collection_id = item.get('collection_id') obj.match_id = result.get('_id') obj.match_collection_id = source.get('collection_id') obj.score = result.get('_score') db.session.add(obj) db.session.commit()
def xref_item(item, collection_id=None): """Cross-reference an entity or document, given as an indexed document.""" name = item.get('name') or item.get('title') result = es.search(index=es_index, doc_type=TYPE_ENTITY, body={ 'query': entity_query(item, collection_id), 'size': 10, '_source': ['collection_id', 'name'], }) results = result.get('hits').get('hits') entity_id, document_id = None, None if item.get('$type') == TYPE_DOCUMENT: document_id = item.get('id') else: entity_id = item.get('id') dq = db.session.query(Match) dq = dq.filter(Match.entity_id == entity_id) dq = dq.filter(Match.document_id == document_id) if collection_id is not None: dq = dq.filter(Match.match_collection_id == collection_id) dq.delete() for result in results: source = result.get('_source', {}) log.info("Xref [%.1f]: %s <=> %s", result.get('_score'), name, source.get('name')) obj = Match() obj.entity_id = entity_id obj.document_id = document_id obj.collection_id = item.get('collection_id') obj.match_id = result.get('_id') obj.match_collection_id = source.get('collection_id') obj.score = result.get('_score') db.session.add(obj) db.session.commit()