Exemplo n.º 1
0
def view(collection_id):
    """
    ---
    get:
      summary: Get a collection
      description: Return the collection with id `collection_id`
      parameters:
      - description: The collection ID.
        in: path
        name: collection_id
        required: true
        schema:
          minimum: 1
          type: integer
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/CollectionFull'
      tags:
      - Collection
    """
    data = get_index_collection(collection_id)
    cobj = get_db_collection(collection_id)
    if get_flag("refresh", False):
        update_collection_stats(collection_id, ["schema"])
    data.update({
        "statistics": get_collection_stats(cobj.id),
        "status": get_status(cobj),
        "shallow": False,
    })
    return CollectionSerializer.jsonify(data)
Exemplo n.º 2
0
def status(foreign_id=None):
    """Get the queue status (pending and finished tasks.)"""
    if foreign_id is not None:
        collection = get_collection(foreign_id)
        status = get_status(collection)
        status = {"datasets": {foreign_id: status}}
    else:
        status = get_active_dataset_status()
    headers = ["Collection", "Job", "Stage", "Pending", "Running", "Finished"]
    rows = []
    for foreign_id, dataset in status.get("datasets").items():
        rows.append(
            [
                foreign_id,
                "",
                "",
                dataset["pending"],
                dataset["running"],
                dataset["finished"],
            ]
        )
        for job in dataset.get("jobs"):
            for stage in job.get("stages"):
                rows.append(
                    [
                        foreign_id,
                        stage["job_id"],
                        stage["stage"],
                        stage["pending"],
                        stage["running"],
                        stage["finished"],
                    ]
                )
    print(tabulate(rows, headers))
Exemplo n.º 3
0
    def test_upload_csv_doc(self):
        _, headers = self.login(is_admin=True)
        meta = {
            'countries': ['de', 'usa'],
            'languages': ['en'],
            'mime_type': 'text/csv',
            'source_url': 'http://pudo.org/experts.csv'
        }
        csv_path = self.get_fixture_path('experts.csv')
        data = {
            'meta': json.dumps(meta),
            'foo': open(csv_path, 'rb'),
        }
        res = self.client.post(self.url, data=data, headers=headers)
        assert res.status_code == 201, (res, res.data)
        assert 'id' in res.json, res.json

        db_id, _ = res.json.get('id').split('.', 1)
        doc = Document.by_id(db_id)
        assert doc.schema == Document.SCHEMA, doc.schema
        assert doc.meta['countries'] == ['de', 'us'], doc.meta
        assert doc.meta['languages'] == ['eng'], doc.meta

        status = get_status(self.col)
        assert status.get('pending') == 1, status
        job = status.get('jobs')[0]
        assert job.get('pending') == 1, job
        stage = job.get('stages')[0]
        assert stage.get('stage') == OP_INGEST, stage
        assert stage.get('pending') == 1, stage
Exemplo n.º 4
0
def status(collection_id):
    """
    ---
    get:
      summary: Check processing status of a collection
      description: >
        Return the task queue status for the collection with id `collection_id`
      parameters:
      - description: The collection ID.
        in: path
        name: collection_id
        required: true
        schema:
          minimum: 1
          type: integer
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/CollectionStatus'
      tags:
      - Collection
    """
    collection = get_db_collection(collection_id, request.authz.READ)
    request.rate_limit = None
    return jsonify(get_status(collection))
Exemplo n.º 5
0
    def test_upload_csv_doc(self):
        _, headers = self.login(is_admin=True)
        meta = {
            'countries': ['de', 'us'],
            'languages': ['en'],
            'mime_type': 'text/csv',
            'source_url': 'http://pudo.org/experts.csv'
        }
        csv_path = self.get_fixture_path('experts.csv')
        data = {
            'meta': json.dumps(meta),
            'foo': open(csv_path, 'rb'),
        }
        res = self.client.post(self.url, data=data, headers=headers)
        assert res.status_code == 201, (res, res.data)
        assert 'id' in res.json, res.json

        doc = Document.by_id(res.json.get('id'))
        assert doc.schema == Document.SCHEMA, doc.schema

        status = get_status(self.col)
        assert status.get('pending') == 1, status
        op = status.get('operations')[0]
        assert op.get('pending') == 1, op
        assert op.get('operation') == OP_INGEST, op
Exemplo n.º 6
0
    def test_upload_csv_doc(self):
        _, headers = self.login(is_admin=True)
        meta = {
            "countries": ["de", "usa"],
            "languages": ["en"],
            "mime_type": "text/csv",
            "source_url": "http://pudo.org/experts.csv",
        }
        csv_path = self.get_fixture_path("experts.csv")
        data = {
            "meta": json.dumps(meta),
            "foo": open(csv_path, "rb"),
        }
        res = self.client.post(self.url, data=data, headers=headers)
        assert res.status_code == 201, (res, res.data)
        assert "id" in res.json, res.json

        db_id, _ = res.json.get("id").split(".", 1)
        doc = Document.by_id(db_id)
        assert doc.schema == Document.SCHEMA, doc.schema
        assert doc.meta["countries"] == ["de", "us"], doc.meta
        assert doc.meta["languages"] == ["eng"], doc.meta

        status = get_status(self.col)
        assert status.get("pending") == 1, status
        job = status.get("jobs")[0]
        assert job.get("pending") == 1, job
        stage = job.get("stages")[0]
        assert stage.get("stage") == OP_INGEST, stage
        assert stage.get("pending") == 1, stage
Exemplo n.º 7
0
def status(foreign_id=None):
    """Get the queue status (pending and finished tasks.)"""
    if foreign_id is not None:
        collection = get_collection(foreign_id)
        status = get_status(collection)
    else:
        status = get_active_collection_status()
    pprint(status)
Exemplo n.º 8
0
def get_deep_collection(collection):
    mappings = Mapping.by_collection(collection.id).count()
    entitysets = EntitySet.type_counts(collection_id=collection.id)
    return {
        "statistics": index.get_collection_stats(collection.id),
        "counts": {"mappings": mappings, "entitysets": entitysets},
        "status": get_status(collection),
        "shallow": False,
    }
Exemplo n.º 9
0
def cancel(collection_id):
    collection = get_db_collection(collection_id, request.authz.WRITE)
    cancel_queue(collection)
    return jsonify(get_status(collection))
Exemplo n.º 10
0
def status(collection_id):
    collection = get_db_collection(collection_id, request.authz.READ)
    return jsonify(get_status(collection))
Exemplo n.º 11
0
def status(foreign_id):
    """Get the queue status (pending and finished tasks.)"""
    collection = get_collection(foreign_id)
    status = get_status(collection)
    pprint(status)