Exemplos de index_document em Python, exemplos de aleph.index.documents.index_document em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: util.py Projeto: mustafaascha/aleph

 def load_fixtures(self, file_name):
     filepath = self.get_fixture_path(file_name)
     load_fixtures(db, loaders.load(filepath))
     db.session.commit()
     for document in Document.all():
         index_document(document)
     self.update_index()

Exemplo n.º 2

0

Exibir arquivo

def process_document(document):
    """Perform post-ingest tasks like analysis and indexing."""
    analyze_document(document)
    index_document(document)
    index_records(document)
    if document.collection.casefile:
        index_collection(document.collection)

Exemplo n.º 3

0

Exibir arquivo

def ingest(document_id, file_path=None, refresh=False):
    """Process a given document by extracting its contents.
    This may include creating or updating child documents."""
    document = Document.by_id(document_id)
    if document is None:
        log.error("Could not find document: %s", document_id)
        return

    # Work path will be used by storagelayer to cache a local
    # copy of data from an S3-based archive, and by ingestors
    # to perform processing and generate intermediary files.
    work_path = mkdtemp(prefix="aleph.ingest.")
    if file_path is None:
        file_path = archive.load_file(document.content_hash,
                                      file_name=document.safe_file_name,
                                      temp_path=work_path)

    try:
        manager = get_manager()
        result = DocumentResult(manager, document, file_path=file_path)
        get_manager().ingest(file_path, result=result, work_path=work_path)

        document.status = Document.STATUS_SUCCESS
        log.debug('Ingested [%s:%s]: %s', document.id, document.schema,
                  document.name)

        if document.collection.casefile and not refresh:
            params = {'collection': document.collection, 'document': document}
            publish(Events.INGEST_DOCUMENT,
                    actor_id=document.uploader_id,
                    params=params)

        db.session.commit()
    except Exception:
        db.session.rollback()
        document = Document.by_id(document_id)
        log.exception("Ingest failed [%s]: %s", document.id, document.name)
        document.status = Document.STATUS_FAIL
        db.session.commit()
    finally:
        # Removing the temp_path given to storagelayer makes it redundant
        # to also call cleanup on the archive.
        remove_directory(work_path)

    extract_document_tags(document)
    # delete_entity(document.id, exclude=document.schema)
    index_document(document)
    refresh_entity(document)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: ingest.py Projeto: dkhurshudian/aleph

def ingest_document(document, file_path, role_id=None):
    """Given a stub document and file path, extract information.
    This does not attempt to infer metadata such as a file name."""
    document.status = Document.STATUS_PENDING
    if role_id is not None:
        document.uploader_id = role_id

    if file_path is not None:
        # Directories cannot be archived first and then processed
        # later. So they are effectively sent into a short-cut here
        if os.path.isdir(file_path):
            db.session.commit()
            return ingest(document.id, file_path=file_path)
        document.content_hash = archive.archive_file(file_path)

    db.session.commit()
    index_document(document)
    priority = 5 if document.collection.casefile else 3
    ingest.apply_async(args=[document.id], priority=priority)

Exemplo n.º 5

0

Exibir arquivo

Arquivo: documents.py Projeto: kaue-cauin/aleph

def update_document(document):
    # These are operations that should be executed after each
    # write to a document or its metadata.
    analyze_document_id.apply_async([document.id], queue=USER_QUEUE,
                                    routing_key=USER_ROUTING_KEY)
    index.index_document(document)

Exemplo n.º 6

0

Exibir arquivo

def process_document(document):
    """Perform post-ingest tasks like analysis and indexing."""
    analyze_document(document)
    index.index_document(document)
    index.index_records(document)

Exemplo n.º 7

0

Exibir arquivo

def update_document(document):
    # These are operations that should be executed after each
    # write to a document or its metadata.
    process_document_id.delay(document.id)
    index.index_document(document)

Exemplo n.º 8

0

Exibir arquivo

Arquivo: ingest.py Projeto: fork-for-review/aleph

def process_document(document):
    """Perform post-ingest tasks like analysis and indexing."""
    extract_document_tags(document)
    refresh_entity(document)
    index_document(document)

Exemplo n.º 9

0

Exibir arquivo

Arquivo: __init__.py Projeto: atom-cmd/eskom-enquiry

def update_document(document):
    # These are operations that should be executed after each
    # write to a document or its metadata.
    index.index_document(document)

Exemplo n.º 10

0

Exibir arquivo

def update_document(document, shallow=False, sync=False):
    # These are operations that should be executed after each
    # write to a document or its metadata.
    data = index.index_document(document, shallow=shallow, sync=sync)
    refresh_collection(document.collection, sync=sync)
    return data

Exemplo n.º 11

0

Exibir arquivo

def index_document_id(document_id):
    document = Document.by_id(document_id)
    if document is None:
        log.info("Could not find document: %r", document_id)
        return
    index.index_document(document, shallow=False, sync=False)

Exemplo n.º 12

0

Exibir arquivo

Arquivo: __init__.py Projeto: pudo/aleph

def update_document(document, shallow=False, sync=False):
    # These are operations that should be executed after each
    # write to a document or its metadata.
    refresh_entity(document, sync=sync)
    return index.index_document(document, shallow=shallow, sync=sync)

Exemplo n.º 13

0

Exibir arquivo

def process_document(document):
    """Perform post-ingest tasks like analysis and indexing."""
    extract_document_tags(document)
    index_document(document)
    flush_collection_stats(document.collection_id)