Python queue_task示例，aleph.queues.queue_task Python示例

示例#1

0

显示文件

def process_collection(stage,
                       collection,
                       ingest=True,
                       reset=False,
                       sync=False):
    """Trigger a full re-parse of all documents and re-build the
    search index from the aggregator."""
    ingest = ingest or reset
    if reset:
        reset_collection(collection, sync=True)
    aggregator = get_aggregator(collection)
    try:
        writer = aggregator.bulk()
        for proxy in _collection_proxies(collection):
            writer.put(proxy, fragment='db')
            stage.report_finished(1)
        writer.flush()
        if ingest:
            for proxy in aggregator:
                ingest_entity(collection, proxy, job_id=stage.job.id)
        else:
            queue_task(collection,
                       OP_INDEX,
                       job_id=stage.job.id,
                       context={'sync': sync})
    finally:
        aggregator.close()

示例#2

0

显示文件

def generate(collection_id):
    data = parse_request(XrefSchema)
    collection = get_db_collection(collection_id, request.authz.WRITE)
    against = ensure_list(data.get("against_collection_ids"))
    payload = {'against_collection_ids': against}
    queue_task(collection, OP_XREF, payload=payload)
    return jsonify({'status': 'accepted'}, status=202)

示例#3

0

显示文件

def export(collection_id):
    """
    ---
    post:
      summary: Download cross-reference results
      description: Download results of cross-referencing as an Excel file
      parameters:
      - in: path
        name: collection_id
        required: true
        schema:
          type: integer
      responses:
        '202':
          description: Accepted
      tags:
      - Xref
      - Collection
    """
    collection = get_db_collection(collection_id, request.authz.READ)
    label = "%s - Crossreference results" % collection.label
    export = create_export(
        operation=OP_EXPORT_XREF_RESULTS,
        role_id=request.authz.id,
        label=label,
        collection=collection,
        mime_type=XLSX,
    )
    job_id = get_session_id()
    payload = {
        "collection_id": collection_id,
        "export_id": export.id,
    }
    queue_task(None, OP_EXPORT_XREF_RESULTS, job_id=job_id, payload=payload)
    return ("", 202)

示例#4

0

显示文件

文件： mappings_api.py 项目： we1l1n/aleph

def flush(collection_id, mapping_id):
    """Flush all entities loaded by mapping with id `mapping_id`.
    ---
    post:
      summary: Flush entities loaded by a mapping
      parameters:
      - description: The collection id.
        in: path
        name: collection_id
        required: true
        schema:
          minimum: 1
          type: integer
        example: 2
      - description: The mapping id.
        in: path
        name: mapping_id
        required: true
        schema:
          minimum: 1
          type: integer
        example: 2
      responses:
        '202':
          description: No Content
      tags:
      - Collection
    """
    collection = get_db_collection(collection_id, request.authz.WRITE)
    mapping = obj_or_404(Mapping.by_id(mapping_id))
    queue_task(collection,
               OP_FLUSH_MAPPING,
               job_id=get_session_id(),
               payload={'mapping_id': mapping.id})
    return ('', 202)

示例#5

0

显示文件

def generate(collection_id):
    """
    ---
    post:
      summary: Generate cross-reference matches
      description: >
        Generate cross-reference matches for entities in a collection.
      parameters:
      - in: path
        name: collection_id
        required: true
        schema:
          type: integer
      responses:
        '202':
          content:
            application/json:
              schema:
                properties:
                  status:
                    description: accepted
                    type: string
                type: object
          description: Accepted
      tags:
      - Xref
      - Collection
    """
    collection = get_db_collection(collection_id, request.authz.WRITE)
    queue_task(collection, OP_XREF)
    return jsonify({"status": "accepted"}, status=202)

示例#6

0

显示文件

def reingest(collection_id):
    """
    ---
    post:
      summary: Re-ingest a collection
      description: >
        Trigger a process to re-parse the content of all documents stored
        in the collection with id `collection_id`.
      parameters:
      - description: The collection ID.
        in: path
        name: collection_id
        required: true
        schema:
          minimum: 1
          type: integer
      - in: query
        name: index
        description: Index documents while they're being processed.
        schema:
          type: boolean
      responses:
        '202':
          description: Accepted
      tags:
      - Collection
    """
    collection = get_db_collection(collection_id, request.authz.WRITE)
    job_id = get_session_id()
    data = {"index": get_flag("index", False)}
    queue_task(collection, OP_REINGEST, job_id=job_id, payload=data)
    return ("", 202)

示例#7

0

显示文件

def reindex(collection_id):
    """
    ---
    post:
      summary: Re-index a collection
      description: >
        Re-index the entities in the collection with id `collection_id`
      parameters:
      - description: The collection ID.
        in: path
        name: collection_id
        required: true
        schema:
          minimum: 1
          type: integer
      - in: query
        description: Delete the index before re-generating it.
        name: flush
        schema:
          type: boolean
      responses:
        '202':
          description: Accepted
      tags:
      - Collection
    """
    collection = get_db_collection(collection_id, request.authz.WRITE)
    job_id = get_session_id()
    data = {"flush": get_flag("flush", False)}
    queue_task(collection, OP_REINDEX, job_id=job_id, payload=data)
    return ("", 202)

示例#8

0

显示文件

def export():
    """
    ---
    post:
      summary: Download the results of a search
      description: >-
        Downloads all the results of a search as a zip archive; upto a max of
        10,000 results. The returned file will contain an Excel document with
        structured data as well as the binary files from all matching
        documents.

        Supports the same query parameters as the search API.
      responses:
        '202':
          description: Accepted
      tags:
      - Entity
    """
    require(request.authz.logged_in)
    parser = SearchQueryParser(request.args, request.authz)
    tag_request(query=parser.text, prefix=parser.prefix)
    query = EntitiesQuery(parser)
    label = gettext("Search: %s") % query.to_text()
    export = create_export(
        operation=OP_EXPORT_SEARCH,
        role_id=request.authz.id,
        label=label,
        mime_type=ZIP,
        meta={"query": query.get_full_query()},
    )
    job_id = get_session_id()
    queue_task(None, OP_EXPORT_SEARCH, job_id=job_id, export_id=export.id)
    return ("", 202)

示例#9

0

显示文件

文件： entities.py 项目： moreymat/aleph

def upsert_entity(data,
                  collection,
                  authz=None,
                  sync=False,
                  sign=False,
                  job_id=None):
    """Create or update an entity in the database. This has a side effect  of migrating
    entities created via the _bulk API or a mapper to a database entity in the event
    that it gets edited by the user.
    """
    from aleph.logic.profiles import profile_fragments

    entity = None
    entity_id = collection.ns.sign(data.get("id"))
    if entity_id is not None:
        entity = Entity.by_id(entity_id, collection=collection)
    if entity is None:
        role_id = authz.id if authz is not None else None
        entity = Entity.create(data, collection, sign=sign, role_id=role_id)
    else:
        entity.update(data, collection, sign=sign)
    collection.touch()

    proxy = entity.to_proxy()
    aggregator = get_aggregator(collection)
    aggregator.delete(entity_id=proxy.id)
    aggregator.put(proxy, origin=MODEL_ORIGIN)
    profile_fragments(collection, aggregator, entity_id=proxy.id)

    index.index_proxy(collection, proxy, sync=sync)
    refresh_entity(collection, proxy.id)
    queue_task(collection, OP_UPDATE_ENTITY, job_id=job_id, entity_id=proxy.id)
    return entity.id

示例#10

0

显示文件

def xref_collection(stage, collection):
    """Cross-reference all the entities and documents in a collection."""
    index.delete_xref(collection)
    matchable = [s.name for s in model if s.matchable]
    entities = iter_entities(collection_id=collection.id, schemata=matchable)
    for entity in entities:
        queue_task(collection, OP_XREF_ITEM, job_id=stage.job.id,
                   payload={'entity_id': entity.get('id')})

示例#11

0

显示文件

def item_update(entityset_id):
    """Add an item to the entity set with id `entityset_id`, or change
    the items judgement.

    To delete an item from the entity set, apply the judgement: `no_judgement`.
    ---
    post:
      summary: Add item to an entityset
      parameters:
      - description: The entityset id.
        in: path
        name: entityset_id
        required: true
        schema:
          type: string
        example: 3a0d91ece2dce88ad3259594c7b642485235a048
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/EntitySetItemUpdate'
      responses:
        '200':
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/EntitySetItem'
          description: OK
        '204':
          description: Item removed
      tags:
      - EntitySetItem
    """
    entityset = get_entityset(entityset_id, request.authz.WRITE)
    data = parse_request("EntitySetItemUpdate")
    entity = data.pop("entity", {})
    entity_id = data.pop("entity_id", entity.get("id"))
    entity = get_index_entity(entity_id, request.authz.READ)
    collection = get_db_collection(entity["collection_id"])
    data["added_by_id"] = request.authz.id
    data.pop("collection", None)
    item = save_entityset_item(entityset, collection, entity_id, **data)
    db.session.commit()
    job_id = get_session_id()
    queue_task(collection, OP_UPDATE_ENTITY, job_id=job_id, entity_id=entity_id)
    if item is not None:
        # The entityset is needed to check if the item is writeable in the serializer:
        item = item.to_dict(entityset=entityset)
    else:
        item = {
            "id": "$".join((entityset_id, entity_id)),
            "entityset_id": entityset_id,
            "entityset_collection_id": entityset.collection_id,
            "entity_id": entity_id,
            "collection_id": entity["collection_id"],
            "judgement": Judgement.NO_JUDGEMENT,
        }
    return EntitySetItemSerializer.jsonify(item)

示例#12

0

显示文件

文件： collections_api.py 项目： seekersapp2013/aleph

def process(collection_id):
    collection = get_db_collection(collection_id, request.authz.WRITE)
    # re-process the documents
    payload = {
        'ingest': get_flag('ingest', True),
        'reset': get_flag('reset', True)
    }
    queue_task(collection, OP_PROCESS, payload=payload)
    return ('', 202)

示例#13

0

显示文件

文件： collections_api.py 项目： SinSiXX/aleph

def process(collection_id):
    collection = get_db_collection(collection_id, request.authz.WRITE)
    # re-process the documents
    data = {'reset': get_flag('reset', True)}
    queue_task(collection, OP_PROCESS, job_id=get_session_id(), payload=data)
    collection.touch()
    db.session.commit()
    refresh_collection(collection_id)
    return ('', 202)

示例#14

0

显示文件

文件： manage.py 项目： SinSiXX/aleph

def bulkload(file_name):
    """Load entities from the specified mapping file."""
    log.info("Loading bulk data from: %s", file_name)
    config = load_mapping_file(file_name)
    for foreign_id, data in config.items():
        data['foreign_id'] = foreign_id
        data['label'] = data.get('label', foreign_id)
        create_collection(data)
        collection = Collection.by_foreign_id(foreign_id)
        queue_task(collection, OP_BULKLOAD, payload=data)

示例#15

0

显示文件

def load_mapping(stage, collection, mapping_id):
    """Flush and reload all entities generated by a mapping."""
    mapping = Mapping.by_id(mapping_id)
    if mapping is None:
        return log.error("Could not find mapping: %s", mapping_id)
    flush_mapping(stage, collection, mapping_id)
    publish(Events.LOAD_MAPPING,
            params={'collection': collection, 'table': mapping.table_id},
            channels=[collection, mapping.role],
            actor_id=mapping.role_id)
    mapper = make_mapper(collection, mapping)
    aggregator = get_aggregator(collection)
    try:
        writer = aggregator.bulk()
        entities_count = 0
        entity_ids = set()
        for idx, record in enumerate(mapper.source.records, 1):
            for entity in mapper.map(record).values():
                if entity.schema.is_a('Thing'):
                    entity.add('proof', mapping.table_id)
                entity = collection.ns.apply(entity)
                entity_ids.add(entity.id)
                entities_count += 1
                fragment = '%s-%s' % (mapping.id, idx)
                writer.put(entity, fragment=fragment)

            if idx > 0 and idx % 500 == 0:
                payload = {
                    'entity_ids': entity_ids,
                    'mapping_id': mapping.id
                }
                queue_task(collection, OP_INDEX,
                           job_id=stage.job.id,
                           payload=payload)
                entity_ids = set()
                stage.report_finished(500)
                log.info("[%s] Loaded %s records, %s entities...",
                         collection.foreign_id,
                         idx, entities_count)

        writer.flush()
        payload = {
            'entity_ids': entity_ids,
            'mapping_id': mapping.id
        }
        queue_task(collection, OP_INDEX,
                   job_id=stage.job.id,
                   payload=payload)
        mapping.set_status(status=Mapping.SUCCESS)
        log.info("[%s] Mapping done (%s entities)",
                 mapping.id, entities_count)
    except Exception as exc:
        mapping.set_status(status=Mapping.FAILED, error=str(exc))
    finally:
        aggregator.close()

示例#16

0

显示文件

文件： bulkload.py 项目： seekersapp2013/aleph

def bulk_load(queue, collection, config):
    """Bulk load entities from a CSV file or SQL database.

    This is done by mapping the rows in the source data to entities and links
    which can be understood by the entity index.
    """
    queries = keys_values(config, 'queries', 'query')
    for query in queries:
        bulk_load_query(queue, collection, hash_data(query), query)
    queue_task(collection, OP_INDEX)
    queue.remove()

示例#17

0

显示文件

文件： profiles_api.py 项目： sunu/aleph

def pairwise():
    """
    ---
    post:
      summary: Make a pairwise judgement between an entity and a match.
      description: >
        This lets a user decide if they think a given xref match is a true or
        false match. Implicitly, this might create or alter a profile in the
        collection used by
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/Pairwise'
      responses:
        '200':
          content:
            application/json:
              schema:
                properties:
                  status:
                    description: accepted
                    type: string
                  profile_id:
                    description: profile_id for `entity`.
                    type: string
                type: object
          description: Accepted
      tags:
      - Profile
    """
    data = parse_request("Pairwise")
    entity = get_index_entity(data.get("entity_id"))
    collection = get_db_collection(entity["collection_id"],
                                   request.authz.WRITE)
    match = get_index_entity(data.get("match_id"))
    match_collection = get_db_collection(match["collection_id"])
    profile = decide_pairwise(
        collection,
        entity,
        match_collection,
        match,
        judgement=data.get("judgement"),
        authz=request.authz,
    )
    job_id = get_session_id()
    queue_task(collection,
               OP_UPDATE_ENTITY,
               job_id=job_id,
               entity_id=entity.get("id"))
    profile_id = profile.id if profile is not None else None
    return jsonify({"status": "ok", "profile_id": profile_id}, status=200)

示例#18

0

显示文件

文件： collections_api.py 项目： seekersapp2013/aleph

def mapping(collection_id):
    collection = get_db_collection(collection_id, request.authz.WRITE)
    require(request.authz.can_bulk_import())
    if not request.is_json:
        raise BadRequest()
    data = request.get_json().get(collection.foreign_id)
    for query in keys_values(data, 'queries', 'query'):
        try:
            model.make_mapping(query)
        except InvalidMapping as invalid:
            raise BadRequest(invalid)
    queue_task(collection, OP_BULKLOAD, payload=data)
    return ('', 202)

示例#19

0

显示文件

def xref_collection(stage, collection, against_collection_ids=None):
    """Cross-reference all the entities and documents in a collection."""
    matchable = [s.name for s in model if s.matchable]
    entities = iter_entities(collection_id=collection.id, schemata=matchable)
    for entity in entities:
        payload = {
            'entity_id': entity.get('id'),
            'against_collection_ids': against_collection_ids
        }
        queue_task(collection,
                   OP_XREF_ITEM,
                   job_id=stage.job.id,
                   payload=payload)

示例#20

0

显示文件

def process_collection(stage, collection, ingest=True, sync=False):
    """Trigger a full re-parse of all documents and re-build the
    search index from the aggregator."""
    aggregator = get_aggregator(collection)
    for proxy in _collection_proxies(collection):
        if ingest and proxy.schema.is_a(Document.SCHEMA):
            ingest_entity(collection, proxy, job_id=stage.job.id, sync=sync)
        else:
            aggregator.put(proxy, fragment='db')
            queue_task(collection,
                       OP_INDEX,
                       job_id=stage.job.id,
                       payload={'entity_id': proxy.id},
                       context={'sync': sync})
    aggregator.close()

示例#21

0

显示文件

文件： manage.py 项目： wdsn/aleph

def update(foreign_id=None, index=False, process=False, reset=False):
    """Re-index all the collections and entities."""
    update_roles()
    q = Collection.all(deleted=True)
    if foreign_id is not None:
        q = [get_collection(foreign_id)]
    for collection in q:
        if reset:
            reset_collection(collection, sync=True)
        refresh_collection(collection.id)
        index_collection(collection)
        if collection.deleted_at is not None:
            continue
        if index or process:
            payload = {'ingest': process}
            queue_task(collection, OP_PROCESS, payload=payload)

示例#22

0

显示文件

def flush(collection_id, mapping_id):
    """Flush all entities loaded by mapping with id `mapping_id`.
    ---
    post:
      summary: Flush entities loaded by a mapping
      parameters:
      - description: The collection id.
        in: path
        name: collection_id
        required: true
        schema:
          minimum: 1
          type: integer
        example: 2
      - description: The mapping id.
        in: path
        name: mapping_id
        required: true
        schema:
          minimum: 1
          type: integer
        example: 2
      responses:
        '202':
          description: No Content
      tags:
      - Collection
      - Mapping
    """
    collection = get_db_collection(collection_id, request.authz.WRITE)
    mapping = obj_or_404(Mapping.by_id(mapping_id))
    mapping.disabled = True
    mapping.last_run_status = None
    mapping.last_run_err_msg = None
    db.session.add(mapping)
    db.session.commit()
    queue_task(
        collection,
        OP_FLUSH_MAPPING,
        job_id=get_session_id(),
        mapping_id=mapping_id,
    )
    return ("", 202)

示例#23

0

显示文件

def trigger(collection_id, mapping_id):
    """Load entities by running the mapping with id `mapping_id`. Flushes
    previously loaded entities before loading new entities.
    ---
    post:
      summary: Load entities from a mapping
      parameters:
      - description: The collection id.
        in: path
        name: collection_id
        required: true
        schema:
          minimum: 1
          type: integer
        example: 2
      - description: The mapping id.
        in: path
        name: mapping_id
        required: true
        schema:
          minimum: 1
          type: integer
        example: 2
      responses:
        '202':
          description: No Content
      tags:
      - Collection
      - Mapping
    """
    collection = get_db_collection(collection_id, request.authz.WRITE)
    mapping = obj_or_404(Mapping.by_id(mapping_id))
    mapping.disabled = False
    mapping.set_status(Status.PENDING)
    db.session.commit()
    job_id = get_session_id()
    queue_task(collection,
               OP_LOAD_MAPPING,
               job_id=job_id,
               mapping_id=mapping.id)
    mapping = obj_or_404(Mapping.by_id(mapping_id))
    return MappingSerializer.jsonify(mapping, status=202)

示例#24

0

显示文件

文件： entities_api.py 项目： aaronarnold2/aleph

def export():
    """
    ---
    post:
      summary: Download the results of a search
      description: >-
        Downloads all the results of a search as a zip archive; upto a max of
        10,000 results. The returned file will contain an Excel document with
        structured data as well as the binary files from all matching
        documents.

        Supports the same query parameters as the search API.
      responses:
        '202':
          description: Accepted
      tags:
      - Entity
    """
    require(request.authz.logged_in)
    parser = SearchQueryParser(request.args, request.authz)
    parser.limit = MAX_PAGE
    tag_request(query=parser.text, prefix=parser.prefix)
    result = EntitiesQuery.handle(request, parser=parser)
    label = "Search results for query: %s" % parser.text
    export = create_export(
        operation=OP_EXPORT_SEARCH_RESULTS,
        role_id=request.authz.id,
        label=label,
        file_path=None,
        expires_after=Export.DEFAULT_EXPIRATION,
        collection=None,
        mime_type=ZIP,
    )
    job_id = get_session_id()
    payload = {
        "export_id": export.id,
        "result": result.to_dict(),
    }
    queue_task(None, OP_EXPORT_SEARCH_RESULTS, job_id=job_id, payload=payload)
    return ("", 202)

示例#25

0

显示文件

def delete(collection_id, mapping_id):
    """Delete a mapping.
    ---
    delete:
      summary: Delete a mapping
      parameters:
      - description: The collection id.
        in: path
        name: collection_id
        required: true
        schema:
          minimum: 1
          type: integer
        example: 2
      - description: The mapping id.
        in: path
        name: mapping_id
        required: true
        schema:
          minimum: 1
          type: integer
        example: 2
      responses:
        '204':
          description: No Content
      tags:
      - Collection
      - Mapping
    """
    collection = get_db_collection(collection_id, request.authz.WRITE)
    mapping = obj_or_404(Mapping.by_id(mapping_id))
    mapping.delete()
    db.session.commit()
    queue_task(
        collection,
        OP_FLUSH_MAPPING,
        job_id=get_session_id(),
        mapping_id=mapping_id,
    )
    return ("", 204)

示例#26

0

显示文件

文件： xref_api.py 项目： we1l1n/aleph

def generate(collection_id):
    """
    ---
    post:
      summary: Generate cross-reference matches
      description: >
        Generate cross-reference matches for entities in a collection.
      parameters:
      - in: path
        name: collection_id
        required: true
        schema:
          type: integer
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/XrefGenerate'
      responses:
        '202':
          content:
            application/json:
              schema:
                properties:
                  status:
                    description: accepted
                    type: string
                type: object
          description: Accepted
      tags:
      - Xref
      - Collection
    """
    data = parse_request('XrefGenerate')
    collection = get_db_collection(collection_id, request.authz.WRITE)
    against = ensure_list(data.get("against_collection_ids"))
    payload = {'against_collection_ids': against}
    queue_task(collection, OP_XREF, payload=payload)
    return jsonify({'status': 'accepted'}, status=202)

示例#27

0

显示文件

文件： mappings_api.py 项目： x0rzkov/aleph

def trigger(collection_id, mapping_id):
    """Load entities by running the mapping with id `mapping_id`. Flushes
    previously loaded entities before loading new entities.
    ---
    post:
      summary: Load entities from a mapping
      parameters:
      - description: The collection id.
        in: path
        name: collection_id
        required: true
        schema:
          minimum: 1
          type: integer
        example: 2
      - description: The mapping id.
        in: path
        name: mapping_id
        required: true
        schema:
          minimum: 1
          type: integer
        example: 2
      responses:
        '202':
          description: No Content
      tags:
      - Collection
      - Mapping
    """
    collection = get_db_collection(collection_id, request.authz.WRITE)
    mapping = obj_or_404(Mapping.by_id(mapping_id))
    job_id = get_session_id()
    payload = {'mapping_id': mapping.id}
    queue_task(collection, OP_LOAD_MAPPING, job_id=job_id, payload=payload)
    collection.touch()
    db.session.commit()
    return ('', 202)

示例#28

0

显示文件

文件： collections_api.py 项目： jalmquist/aleph

def process(collection_id):
    """
    ---
    post:
      summary: Process a collection
      description: Start processing the collection with id `collection_id`
      parameters:
      - description: The collection ID.
        in: path
        name: collection_id
        required: true
        schema:
          minimum: 1
          type: integer
      - in: query
        name: ingest
        schema:
          type: boolean
      - in: query
        name: reset
        schema:
          type: boolean
      responses:
        '202':
          description: Accepted
      tags:
      - Collection
    """
    collection = get_db_collection(collection_id, request.authz.WRITE)
    # re-process the documents
    data = {'reset': get_flag('reset', True)}
    queue_task(collection, OP_PROCESS, job_id=get_session_id(), payload=data)
    collection.touch()
    db.session.commit()
    refresh_collection(collection_id)
    return ('', 202)

示例#29

0

显示文件

文件： manage.py 项目： djoffrey/aleph

def xref(foreign_id, against=None):
    """Cross-reference all entities and documents in a collection."""
    collection = get_collection(foreign_id)
    against = [get_collection(c).id for c in ensure_list(against)]
    against = {'against_collection_ids': against}
    queue_task(collection, OP_XREF, payload=against)

示例#30

0

显示文件

def retry_exports():
    for export in Export.get_pending():
        queue_task(None, export.operation, payload={"export_id": export.id})