Python Document.by_collection примеры использования

Язык программирования: Python

Пространство имен/Пакет: aleph.model

Класс/Тип: Document

Метод/Функция: by_collection

Примеров на hotexamples.com: 7

Python Document.by_collection - 7 примеров найдено. Это лучшие примеры Python кода для aleph.model.Document.by_collection, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

by_id(30)

all(20)

by_keys(11)

all_ids(9)

delete_by_collection(8)

by_collection(6)

save(5)

Document(4)

meta(3)

by_parent(3)

foreign_id(2)

doc_data_to_schema(2)

content_hash(2)

by_meta(2)

collection_id(2)

type(2)

add_country(1)

crawler_last_run(1)

crawler_stats(1)

collections(1)

find_ids(1)

cleanup_deleted(1)

is_crawler_active(1)

pending_count(1)

by_content_hash(1)

source_collection_id(1)

source_id(1)

Пример #1

Показать файл

def reingest_collection(collection, job_id=None, index=False, flush=True):
    """Trigger a re-ingest for all documents in the collection."""
    job_id = job_id or Job.random_id()
    if flush:
        ingest_flush(collection)
    for document in Document.by_collection(collection.id):
        proxy = document.to_proxy(ns=collection.ns)
        ingest_entity(collection, proxy, job_id=job_id, index=index)

Пример #2

Показать файл

Файл: collections.py Проект: djoffrey/aleph

def reingest_collection(collection, job_id=None, index=False):
    """Trigger a re-ingest for all documents in the collection."""
    job_id = job_id or Job.random_id()
    aggregator = get_aggregator(collection)
    aggregator.delete(origin=OP_ANALYZE)
    aggregator.delete(origin=OP_INGEST)
    aggregator.close()
    for document in Document.by_collection(collection.id):
        proxy = document.to_proxy(ns=collection.ns)
        ingest_entity(collection, proxy, job_id=job_id, index=index)

Пример #3

Показать файл

Файл: documents.py Проект: pudo/aleph

def generate_collection_docs(collection):
    q = Document.by_collection(collection.id)
    q = q.order_by(Document.id.asc())
    for idx, document in enumerate(q.yield_per(BULK_PAGE)):
        try:
            log.info("Index [%s]: %s", document.id, document.name)
            yield from generate_document(document)
        except Exception:
            log.exception("Cannot index [%s]: %s", document.id, document.name)

        if idx % 1000 == 0:
            db.session.expunge_all()

Пример #4

Показать файл

def generate_collection_docs(collection):
    q = Document.by_collection(collection.id)
    q = q.order_by(Document.id.asc())
    for idx, document in enumerate(q.yield_per(BULK_PAGE)):
        try:
            log.info("Index [%s]: %s", document.id, document.name)
            yield from generate_document(document)
        except Exception:
            log.exception("Cannot index [%s]: %s", document.id, document.name)

        if idx % 1000 == 0:
            db.session.expunge_all()

Пример #5

Показать файл

def aggregate_model(collection, aggregator):
    """Sync up the aggregator from the Aleph domain model."""
    log.debug("[%s] Aggregating model...", collection)
    aggregator.delete(origin=MODEL_ORIGIN)
    writer = aggregator.bulk()
    for document in Document.by_collection(collection.id):
        proxy = document.to_proxy(ns=collection.ns)
        writer.put(proxy, fragment="db", origin=MODEL_ORIGIN)
    for entity in Entity.by_collection(collection.id):
        proxy = entity.to_proxy()
        aggregator.delete(entity_id=proxy.id)
        writer.put(proxy, fragment="db", origin=MODEL_ORIGIN)
    writer.flush()

Пример #6

Показать файл

Файл: manage.py Проект: vishalbelsare/aleph

def _export_balkhash_collection(collection, retries=0, backoff=30, offset=0):
    MAX_RETRIES = 5
    RETRY_BACKOFF_FACTOR = 2
    try:
        from followthemoney import model
        dataset = get_dataset(collection.foreign_id)
        writer = dataset.bulk()
        q = Document.by_collection(collection.id)
        q = q.order_by(Document.id.asc()).offset(offset)
        for doc in q.yield_per(5000):
            log.debug("Export [%s:%s]: %s", doc.id, doc.schema, doc.name)
            dproxy = doc.to_proxy()
            writer.put(dproxy)
            if doc.supports_records:
                q = db.session.query(DocumentRecord)
                q = q.filter(DocumentRecord.document_id == doc.id)
                for record in q.yield_per(100):
                    rproxy = record.to_proxy()
                    writer.put(rproxy)
                    dpart = model.make_entity(doc.schema)
                    dpart.id = dproxy.id
                    dpart.add('indexText', list(record.texts))
                    writer.put(dpart, fragment=str(record.id))
            offset += 1
        dataset.close()
    except DBAPIError as exc:
        if retries < MAX_RETRIES:
            log.debug("Error occurred: %s", exc)
            log.debug("Retrying in %s seconds", backoff)
            db.session.close()
            dataset.close()
            time.sleep(backoff)
            retries = retries + 1
            backoff = backoff * RETRY_BACKOFF_FACTOR
            return _export_balkhash_collection(collection, retries, backoff,
                                               offset)
        else:
            log.exception(exc)

Пример #7

Показать файл

 def _proxies(collection):
     for entity in Entity.by_collection(collection.id).yield_per(5000):
         yield entity.to_proxy()
     for document in Document.by_collection(collection.id).yield_per(5000):
         yield document.to_proxy()