def _normalize_document_uris_window(session, window): query = session.query(models.DocumentURI) \ .filter(models.DocumentURI.updated.between(window.start, window.end)) \ .order_by(models.DocumentURI.updated.asc()) for docuri in query: documents = models.Document.find_by_uris(session, [docuri.uri]) if documents.count() > 1: models.merge_documents(session, documents) existing = session.query(models.DocumentURI).filter( models.DocumentURI.id != docuri.id, models.DocumentURI.document_id == docuri.document_id, models.DocumentURI.claimant_normalized == uri.normalize(docuri.claimant), models.DocumentURI.uri_normalized == uri.normalize(docuri.uri), models.DocumentURI.type == docuri.type, models.DocumentURI.content_type == docuri.content_type) if existing.count() > 0: session.delete(docuri) else: docuri._claimant_normalized = uri.normalize(docuri.claimant) docuri._uri_normalized = uri.normalize(docuri.uri) session.flush()
def _normalize_document_uris_window(session, window): query = session.query(models.DocumentURI) \ .filter(models.DocumentURI.updated.between(window.start, window.end)) \ .order_by(models.DocumentURI.updated.asc()) for docuri in query: documents = models.Document.find_by_uris(session, [docuri.uri]) if documents.count() > 1: models.merge_documents(session, documents) existing = session.query(models.DocumentURI).filter( models.DocumentURI.id != docuri.id, models.DocumentURI.document_id == docuri.document_id, models.DocumentURI.claimant_normalized == uri.normalize( docuri.claimant), models.DocumentURI.uri_normalized == uri.normalize(docuri.uri), models.DocumentURI.type == docuri.type, models.DocumentURI.content_type == docuri.content_type) if existing.count() > 0: session.delete(docuri) else: docuri._claimant_normalized = uri.normalize(docuri.claimant) docuri._uri_normalized = uri.normalize(docuri.uri) session.flush()
def move_uri(ctx, old, new): """ Move annotations and document equivalence data from one URL to another. This will **replace** the annotation's ``target_uri`` and all the document uri's ``claimant``, plus the matching ``uri`` for self-claim and canonical uris. """ request = ctx.obj['bootstrap']() annotations = _fetch_annotations(request.db, old) docuris_claimant = _fetch_document_uri_claimants(request.db, old) docuris_uri = _fetch_document_uri_canonical_self_claim(request.db, old) prompt = ( 'Changing all annotations and document data matching:\n' + '"{old}"\nto:\n"{new}"\n' + 'This will affect {ann_count} annotations, {doc_claimant} ' + 'document uri claimants, and {doc_uri} document uri self-claims ' + 'or canonical uris.\n' + 'Are you sure? [y/N]').format( old=old, new=new, ann_count=len(annotations), doc_claimant=len(docuris_claimant), doc_uri=len(docuris_uri)) c = click.prompt(prompt, default='n', show_default=False) if c != 'y': print('Aborted') return for annotation in annotations: annotation.target_uri = new for docuri in docuris_claimant: docuri.claimant = new for docuri in docuris_uri: docuri.uri = new if annotations: indexer = BatchIndexer(request.db, request.es, request) ids = [a.id for a in annotations] indexer.index(ids) request.db.flush() documents = models.Document.find_by_uris(request.db, [new]) if documents.count() > 1: merge_documents(request.db, documents) request.tm.commit()
def move_uri(ctx, old, new): """ Move annotations and document equivalence data from one URL to another. This will **replace** the annotation's ``target_uri`` and all the document uri's ``claimant``, plus the matching ``uri`` for self-claim and canonical uris. """ request = ctx.obj['bootstrap']() annotations = _fetch_annotations(request.db, old) docuris_claimant = _fetch_document_uri_claimants(request.db, old) docuris_uri = _fetch_document_uri_canonical_self_claim(request.db, old) prompt = ('Changing all annotations and document data matching:\n' + '"{old}"\nto:\n"{new}"\n' + 'This will affect {ann_count} annotations, {doc_claimant} ' + 'document uri claimants, and {doc_uri} document uri self-claims ' + 'or canonical uris.\n' + 'Are you sure? [y/N]').format(old=old, new=new, ann_count=len(annotations), doc_claimant=len(docuris_claimant), doc_uri=len(docuris_uri)) c = click.prompt(prompt, default='n', show_default=False) if c != 'y': print('Aborted') return for annotation in annotations: annotation.target_uri = new for docuri in docuris_claimant: docuri.claimant = new for docuri in docuris_uri: docuri.uri = new if annotations: indexer = BatchIndexer(request.db, request.es, request) ids = [a.id for a in annotations] indexer.index(ids) request.db.flush() documents = models.Document.find_by_uris(request.db, [new]) if documents.count() > 1: merge_documents(request.db, documents) request.tm.commit()