Exemplo n.º 1
0
def process_collection(stage,
                       collection,
                       ingest=True,
                       reset=False,
                       sync=False):
    """Trigger a full re-parse of all documents and re-build the
    search index from the aggregator."""
    ingest = ingest or reset
    if reset:
        reset_collection(collection, sync=True)
    aggregator = get_aggregator(collection)
    try:
        writer = aggregator.bulk()
        for proxy in _collection_proxies(collection):
            writer.put(proxy, fragment='db')
            stage.report_finished(1)
        writer.flush()
        if ingest:
            for proxy in aggregator:
                ingest_entity(collection, proxy, job_id=stage.job.id)
        else:
            queue_task(collection,
                       OP_INDEX,
                       job_id=stage.job.id,
                       context={'sync': sync})
    finally:
        aggregator.close()
Exemplo n.º 2
0
Arquivo: manage.py Projeto: wdsn/aleph
def update(foreign_id=None, index=False, process=False, reset=False):
    """Re-index all the collections and entities."""
    update_roles()
    q = Collection.all(deleted=True)
    if foreign_id is not None:
        q = [get_collection(foreign_id)]
    for collection in q:
        if reset:
            reset_collection(collection, sync=True)
        refresh_collection(collection.id)
        index_collection(collection)
        if collection.deleted_at is not None:
            continue
        if index or process:
            payload = {'ingest': process}
            queue_task(collection, OP_PROCESS, payload=payload)
Exemplo n.º 3
0
def process_collection(collection, ingest=True, reset=False):
    """Trigger a full re-parse of all documents and re-build the
    search index from the aggregator."""
    if reset:
        reset_collection(collection)
    aggregator = get_aggregator(collection)
    try:
        writer = aggregator.bulk()
        for proxy in _collection_proxies(collection):
            writer.put(proxy, fragment='db')
            if ingest:
                ingest_entity(collection, proxy)
        writer.flush()
        if ingest:
            ingest_wait(collection)
        else:
            index_entities(collection, aggregator)
    finally:
        aggregator.close()
Exemplo n.º 4
0
 def handle(self, task):
     stage = task.stage
     payload = task.payload
     collection = Collection.by_foreign_id(task.job.dataset.name)
     if collection is None:
         log.error("Collection not found: %s", task.job.dataset)
         return
     sync = task.context.get('sync', False)
     if stage.stage == OP_INDEX:
         index_aggregate(stage, collection, sync=sync, **payload)
     if stage.stage == OP_BULKLOAD:
         bulk_load(stage, collection, payload)
     if stage.stage == OP_PROCESS:
         if payload.pop('reset', False):
             reset_collection(collection, sync=True)
         process_collection(stage, collection, sync=sync, **payload)
     if stage.stage == OP_XREF:
         xref_collection(stage, collection, **payload)
     if stage.stage == OP_XREF_ITEM:
         xref_item(stage, collection, **payload)
     log.info("Task [%s]: %s (done)", task.job.dataset, stage.stage)
Exemplo n.º 5
0
def reset(foreign_id, sync=False):
    """Clear the search index and entity cache or a collection."""
    collection = get_collection(foreign_id)
    reset_collection(collection, sync=False)