示例#1
0
def delete_collection(collection_id, sync=False):
    """Delete all documents from a particular collection."""
    es.delete(collections_index(),
              doc_type='doc',
              id=str(collection_id),
              refresh=refresh_sync(sync),
              ignore=[404])
示例#2
0
def delete_entity(entity_id, exclude=None, sync=False):
    """Delete an entity from the index."""
    query = {'query': {'ids': {'values': str(entity_id)}}}
    es.delete_by_query(index=entities_read_index(exclude=exclude),
                       body=query,
                       wait_for_completion=sync,
                       refresh=refresh_sync(sync))
示例#3
0
def index_collection(collection, sync=False):
    """Index a collection."""
    if collection.deleted_at is not None:
        return delete_collection(collection.id)

    data = get_collection(collection.id)
    if data is None:
        return

    log.info(
        "[%s] Index: %s (%s things)...",
        collection,
        data.get("label"),
        data.get("count"),
    )
    text = [data.get("label")]
    text.append(normalize(data.get("label")))
    text.append(normalize(data.get("foreign_id")))
    text.append(normalize(data.get("summary")))
    data["text"] = text
    data.pop("id", None)
    return index_safe(collections_index(),
                      collection.id,
                      data,
                      refresh=refresh_sync(sync))
示例#4
0
文件: collections.py 项目: pudo/aleph
def index_collection(collection, sync=False):
    """Index a collection."""
    if collection.deleted_at is not None:
        return delete_collection(collection.id)

    data = get_collection(collection.id)
    data.pop('id', None)
    return index_safe(collections_index(), collection.id, data,
                      refresh=refresh_sync(sync))
示例#5
0
def delete_entity(entity_id, exclude=None, sync=False):
    """Delete an entity from the index."""
    if exclude is not None:
        exclude = entities_write_index(exclude)
    for entity in entities_by_ids(entity_id, excludes='*'):
        index = entity.get('_index')
        if index == exclude:
            continue
        es.delete(index=index, id=entity_id, refresh=refresh_sync(sync))
示例#6
0
文件: entities.py 项目: pudo/aleph
def index_entity(entity, sync=False):
    """Index an entity."""
    if entity.deleted_at is not None:
        return delete_entity(entity.id)

    entity_id, index, data = index_operation(entity.to_dict())
    refresh = refresh_sync(sync)
    # This is required if an entity changes its type:
    # delete_entity(entity_id, exclude=proxy.schema, sync=False)
    return index_safe(index, entity_id, data, refresh=refresh)
示例#7
0
文件: entities.py 项目: pudo/aleph
def delete_entity(entity_id, exclude=None, sync=False):
    """Delete an entity from the index."""
    if exclude is not None:
        exclude = entities_write_index(exclude)
    for entity in entities_by_ids(entity_id, excludes='*'):
        index = entity.get('_index')
        if index == exclude:
            continue
        es.delete(index=index, id=entity_id,
                  refresh=refresh_sync(sync))
示例#8
0
def index_entity(entity, sync=False):
    """Index an entity."""
    if entity.deleted_at is not None:
        return delete_entity(entity.id)

    entity_id, index, data = index_operation(entity.to_dict())
    refresh = refresh_sync(sync)
    # This is required if an entity changes its type:
    # delete_entity(entity_id, exclude=proxy.schema, sync=False)
    return index_safe(index, entity_id, data, refresh=refresh)
示例#9
0
def index_collection(collection, sync=False):
    """Index a collection."""
    if collection.deleted_at is not None:
        return delete_collection(collection.id)

    data = get_collection(collection.id)
    data.pop('id', None)
    return index_safe(collections_index(),
                      collection.id,
                      data,
                      refresh=refresh_sync(sync))
示例#10
0
def index_single(obj, proxy, data, texts, sync=False):
    """Indexing aspects common to entities and documents."""
    data = finalize_index(proxy, data, texts)
    data['bulk'] = False
    data['collection_id'] = obj.collection_id
    data['created_at'] = obj.created_at
    data['updated_at'] = obj.updated_at
    # pprint(data)
    index = entities_write_index(proxy.schema)
    refresh = refresh_sync(sync)
    if settings.ENTITIES_INDEX_SPLIT:
        delete_entity(obj.id, exclude=proxy.schema, sync=False)
    return index_safe(index, obj.id, data, refresh=refresh)
示例#11
0
def index_notification(event, actor_id, params, channels, sync=False):
    """Index a notification."""
    params = params or {}
    params = {n: get_entity_id(params.get(n)) for n in event.params.keys()}
    channels = list(set([c for c in channels if c is not None]))
    data = {
        'actor_id': actor_id,
        'params': params,
        'event': event.name,
        'channels': channels,
        'created_at': datetime.utcnow(),
    }
    index = notifications_index()
    id_ = hash_data((actor_id, event.name, channels, params))
    return index_safe(index, id_, data, refresh=refresh_sync(sync))
示例#12
0
def index_collection(collection, sync=False):
    """Index a collection."""
    if collection.deleted_at is not None:
        return delete_collection(collection.id)

    log.info("Index [%s]: %s", collection.id, collection.label)
    data = get_collection(collection.id)
    text = [data.get('label')]
    text.append(normalize(data.get('label')))
    text.append(normalize(data.get('foreign_id')))
    text.append(normalize(data.get('summary')))
    data['text'] = text
    data.pop('id', None)
    return index_safe(collections_index(),
                      collection.id,
                      data,
                      refresh=refresh_sync(sync))
示例#13
0
def index_bulk(collection_id, entities):
    """Index a set of entities."""
    lock = cache.lock(cache.key('index_bulk'))
    lock.acquire(blocking=True)
    try:
        actions = _index_updates(collection_id, entities)
        chunk_size = len(actions) + 1
        return bulk(es, actions,
                    chunk_size=chunk_size,
                    max_retries=10,
                    initial_backoff=2,
                    request_timeout=REQUEST_TIMEOUT,
                    timeout=TIMEOUT,
                    refresh=refresh_sync(True))
    except BulkIndexError as exc:
        log.warning('Indexing error: %s', exc)
    finally:
        try:
            lock.release()
        except Exception:
            log.exception("Cannot release index lock.")
示例#14
0
def delete_entity(entity_id, exclude=None, sync=False):
    """Delete an entity from the index."""
    if exclude is not None:
        exclude = entities_write_index(exclude)
    for entity in entities_by_ids(entity_id, excludes='*'):
        index = entity.get('_index')
        if index == exclude:
            continue
        try:
            es.delete(index=index, id=entity_id, refresh=refresh_sync(sync))
            q = {'term': {'entities': entity_id}}
            query_delete(entities_read_index(), q, sync=sync)
        except NotFoundError:
            # This is expected in some cases. For example, when 2 Things are
            # connected by an Interval and all the 3 entities get deleted
            # simultaneously, Aleph tries to delete the Interval thrice due to
            # recursive deletion of adjacent entities. ElasticSearch throws a
            # 404 in that case.
            # In those cases, we want to skip both the `es.delete` step and
            # the `query_delete` step.
            log.warning("Delete failed for entity %s - not found", entity_id)
            continue
示例#15
0
文件: collections.py 项目: pudo/aleph
def delete_collection(collection_id, sync=False):
    """Delete all documents from a particular collection."""
    es.delete(collections_index(),
              id=str(collection_id),
              refresh=refresh_sync(sync),
              ignore=[404])