Exemplo n.º 1
0
def reindex(session, es, request):
    """Reindex all annotations into a new index, and update the alias."""

    if get_aliased_index(es) is None:
        raise RuntimeError('cannot reindex if current index is not aliased')

    settings = request.find_service(name='settings')

    new_index = configure_index(es)

    try:
        settings.put(SETTING_NEW_INDEX, new_index)
        request.tm.commit()

        indexer = BatchIndexer(session,
                               es,
                               request,
                               target_index=new_index,
                               op_type='create')

        errored = indexer.index()
        if errored:
            log.debug('failed to index {} annotations, retrying...'.format(
                len(errored)))
            errored = indexer.index(errored)
            if errored:
                log.warn('failed to index {} annotations: {!r}'.format(
                    len(errored), errored))

        update_aliased_index(es, new_index)

    finally:
        settings.delete(SETTING_NEW_INDEX)
        request.tm.commit()
Exemplo n.º 2
0
def move_uri(ctx, old, new):
    """
    Move annotations and document equivalence data from one URL to another.

    This will **replace** the annotation's ``target_uri`` and all the
    document uri's ``claimant``, plus the matching ``uri`` for self-claim and
    canonical uris.
    """

    request = ctx.obj["bootstrap"]()

    annotations = _fetch_annotations(request.db, old)
    docuris_claimant = _fetch_document_uri_claimants(request.db, old)
    docuris_uri = _fetch_document_uri_canonical_self_claim(request.db, old)

    prompt = (
        "Changing all annotations and document data matching:\n"
        '"{old}"\nto:\n"{new}"\n'
        "This will affect {ann_count} annotations, {doc_claimant} "
        "document uri claimants, and {doc_uri} document uri self-claims "
        "or canonical uris.\n"
        "Are you sure? [y/N]"
    ).format(
        old=old,
        new=new,
        ann_count=len(annotations),
        doc_claimant=len(docuris_claimant),
        doc_uri=len(docuris_uri),
    )
    c = click.prompt(prompt, default="n", show_default=False)

    if c != "y":
        print("Aborted")
        return

    for annotation in annotations:
        annotation.target_uri = new

    for docuri in docuris_claimant:
        docuri.claimant = new

    for docuri in docuris_uri:
        docuri.uri = new

    if annotations:
        indexer = BatchIndexer(request.db, request.es, request)
        ids = [a.id for a in annotations]
        indexer.index(ids)

    request.db.flush()

    documents = models.Document.find_by_uris(request.db, [new])
    if documents.count() > 1:
        merge_documents(request.db, documents)

    request.tm.commit()
Exemplo n.º 3
0
def move_uri(ctx, old, new):
    """
    Move annotations and document equivalence data from one URL to another.

    This will **replace** the annotation's ``target_uri`` and all the
    document uri's ``claimant``, plus the matching ``uri`` for self-claim and
    canonical uris.
    """

    request = ctx.obj["bootstrap"]()

    annotations = _fetch_annotations(request.db, old)
    docuris_claimant = _fetch_document_uri_claimants(request.db, old)
    docuris_uri = _fetch_document_uri_canonical_self_claim(request.db, old)

    prompt = ("Changing all annotations and document data matching:\n"
              '"{old}"\nto:\n"{new}"\n'
              "This will affect {ann_count} annotations, {doc_claimant} "
              "document uri claimants, and {doc_uri} document uri self-claims "
              "or canonical uris.\n"
              "Are you sure? [y/N]").format(
                  old=old,
                  new=new,
                  ann_count=len(annotations),
                  doc_claimant=len(docuris_claimant),
                  doc_uri=len(docuris_uri),
              )
    c = click.prompt(prompt, default="n", show_default=False)

    if c != "y":
        print("Aborted")
        return

    for annotation in annotations:
        annotation.target_uri = new

    for docuri in docuris_claimant:
        docuri.claimant = new

    for docuri in docuris_uri:
        docuri.uri = new

    if annotations:
        indexer = BatchIndexer(request.db, request.es, request)
        ids = [a.id for a in annotations]
        indexer.index(ids)

    request.db.flush()

    documents = models.Document.find_by_uris(request.db, [new])
    if documents.count() > 1:
        merge_documents(request.db, documents)

    request.tm.commit()
Exemplo n.º 4
0
def reindex_user_annotations(userid):
    ids = [a.id for a in celery.request.db.query(models.Annotation.id).filter_by(userid=userid)]

    indexer = BatchIndexer(celery.request.db, celery.request.es, celery.request)
    errored = indexer.index(ids)
    if errored:
        log.warning('Failed to re-index annotations %s', errored)

    if celery.request.feature('index_es6'):
        indexer = BatchIndexer(celery.request.db, celery.request.es6, celery.request)
        errored = indexer.index(ids)
        if errored:
            log.warning('Failed to re-index annotations into ES6 %s', errored)
Exemplo n.º 5
0
def reindex(session, es, request):
    """Reindex all annotations into a new index, and update the alias."""

    current_index = get_aliased_index(es)
    if current_index is None:
        raise RuntimeError('cannot reindex if current index is not aliased')

    settings = request.find_service(name='settings')

    # Preload userids of shadowbanned users.
    nipsa_svc = request.find_service(name='nipsa')
    nipsa_svc.fetch_all_flagged_userids()

    new_index = configure_index(es)
    log.info('configured new index {}'.format(new_index))
    setting_name = 'reindex.new_index'
    if es.version < (2, ):
        setting_name = 'reindex.new_index'

    try:
        settings.put(setting_name, new_index)
        request.tm.commit()

        log.info('reindexing annotations into new index {}'.format(new_index))
        indexer = BatchIndexer(session,
                               es,
                               request,
                               target_index=new_index,
                               op_type='create')

        errored = indexer.index()
        if errored:
            log.debug('failed to index {} annotations, retrying...'.format(
                len(errored)))
            errored = indexer.index(errored)
            if errored:
                log.warn('failed to index {} annotations: {!r}'.format(
                    len(errored), errored))

        log.info('making new index {} current'.format(new_index))
        update_aliased_index(es, new_index)

        log.info('removing previous index {}'.format(current_index))
        delete_index(es, current_index)

    finally:
        settings.delete(setting_name)
        request.tm.commit()
Exemplo n.º 6
0
def reindex_user_annotations(userid):
    ids = [a.id for a in celery.request.db.query(models.Annotation.id).filter_by(userid=userid)]

    indexer = BatchIndexer(celery.request.db, celery.request.es, celery.request)
    errored = indexer.index(ids)
    if errored:
        log.warning('Failed to re-index annotations %s', errored)
Exemplo n.º 7
0
def reindex_annotations_in_date_range(start_date, end_date, max_annotations=250000):
    """Re-index annotations from Postgres to Elasticsearch in a date range.

    :param start_date: Begin at this time (greater or equal)
    :param end_date: End at this time (less than or equal)
    :param max_annotations: Maximum number of items to process overall

    """
    log.info(f"Re-indexing from {start_date} to {end_date}...")

    indexer = BatchIndexer(celery.request.db, celery.request.es, celery.request)
    errored = indexer.index(
        annotation.id
        for annotation in celery.request.db.query(Annotation.id)
        .filter(Annotation.updated >= start_date)
        .filter(Annotation.updated <= end_date)
        .limit(max_annotations)
    )

    if errored:
        log.warning("Failed to re-index annotations into ES6 %s", errored)

    log.info(
        "Re-index from %s to %s complete.", start_date, end_date,
    )
Exemplo n.º 8
0
def reindex(session, es, request):
    """Reindex all annotations into a new index, and update the alias."""

    current_index = get_aliased_index(es)
    if current_index is None:
        raise RuntimeError("cannot reindex if current index is not aliased")

    settings = request.find_service(name="settings")

    # Preload userids of shadowbanned users.
    nipsa_svc = request.find_service(name="nipsa")
    nipsa_svc.fetch_all_flagged_userids()

    new_index = configure_index(es)
    log.info("configured new index {}".format(new_index))
    setting_name = "reindex.new_index"

    try:
        settings.put(setting_name, new_index)
        request.tm.commit()

        log.info("reindexing annotations into new index {}".format(new_index))
        indexer = BatchIndexer(
            session, es, request, target_index=new_index, op_type="create"
        )

        errored = indexer.index()
        if errored:
            log.debug(
                "failed to index {} annotations, retrying...".format(len(errored))
            )
            errored = indexer.index(errored)
            if errored:
                log.warning(
                    "failed to index {} annotations: {!r}".format(len(errored), errored)
                )

        log.info("making new index {} current".format(new_index))
        update_aliased_index(es, new_index)

        log.info("removing previous index {}".format(current_index))
        delete_index(es, current_index)

    finally:
        settings.delete(setting_name)
        request.tm.commit()
Exemplo n.º 9
0
def reindex(session, es, request):
    """Reindex all annotations into a new index, and update the alias."""

    current_index = get_aliased_index(es)
    if current_index is None:
        raise RuntimeError("cannot reindex if current index is not aliased")

    settings = request.find_service(name="settings")

    # Preload userids of shadowbanned users.
    nipsa_svc = request.find_service(name="nipsa")
    nipsa_svc.fetch_all_flagged_userids()

    new_index = configure_index(es)
    log.info("configured new index %s", new_index)
    setting_name = "reindex.new_index"

    try:
        settings.put(setting_name, new_index)
        request.tm.commit()

        log.info("reindexing annotations into new index %s", new_index)
        indexer = BatchIndexer(
            session, es, request, target_index=new_index, op_type="create"
        )

        errored = indexer.index()
        if errored:
            log.debug("failed to index %d annotations, retrying...", len(errored))
            errored = indexer.index(errored)
            if errored:
                log.warning("failed to index %d annotations: %r", len(errored), errored)

        log.info("making new index %s current", new_index)
        update_aliased_index(es, new_index)

        log.info("removing previous index %s", current_index)
        delete_index(es, current_index)

    finally:
        settings.delete(setting_name)
        request.tm.commit()