def test_control_number_update(app, client, user):
    """Test control number."""
    body = {
        "_access": {
            "owner": ["*****@*****.**"],
            "update": ["*****@*****.**"],
            "delete": ["*****@*****.**"]
        },
        "_data": {
            "title": "test_control_number_update",
            "description": "Not updated document"
        }
    }

    # Create test record
    resp = client.post('/records/',
                       headers=get_headers(),
                       data=json.dumps(body))

    assert resp.status_code == HTTPStatus.CREATED

    orig_record = resp.json['metadata']

    # Update without control_number
    body["_data"]['description'] = 'Update with no control number'
    resp = client.put('/record/{control_number}'.format(
        control_number=orig_record['control_number']),
                      headers=get_headers(),
                      data=json.dumps(body))

    put_record = resp.json['metadata']
    assert resp.status_code == HTTPStatus.OK
    assert put_record.get('control_number') is not None
    assert put_record.get('control_number') == orig_record['control_number']
    assert put_record["_data"]['description'] == body["_data"]['description']

    # Update with a wrong control_number
    body["_data"]['description'] = 'Update with wrong control number'
    resp = client.put('/record/{control_number}'.format(
        control_number=orig_record['control_number']),
                      headers=get_headers(),
                      data=json.dumps(body))

    put_record = resp.json['metadata']
    assert resp.status_code == HTTPStatus.OK
    assert put_record.get('control_number') is not None
    assert put_record.get('control_number') == orig_record['control_number']
    assert put_record["_data"]['description'] == body["_data"]['description']

    # Delete test record
    resp = client.delete('/record/{control_number}'.format(
        control_number=orig_record['control_number']),
                         headers=get_headers())

    assert resp.status_code == HTTPStatus.NO_CONTENT
def test_suggester(app, client, user):
    """Test search over public documents.

    Test that the ``_access.*`` field is not searched over.
    """
    # Create records
    control_numbers = [
        __create_record(client, 'The First Suggestion'),
        __create_record(client, 'Documentation site title'),
        __create_record(client, 'CERN Search Documentation'),
        __create_record(client, 'Invenio docs site'),
        __create_record(client, 'The final suggester')
    ]

    time.sleep(2)

    query = {"q": 'suggest:the f'}

    # 'the f' should return 1st and 5th record
    resp = client.get('/records/', query_string=query, headers=get_headers())

    assert resp.status_code == HTTPStatus.OK

    resp_hits = resp.json['hits']
    assert resp_hits.get('total') == 2

    # 'doc' should return 2nd, 3rd and 4th record
    query['q'] = 'suggest:doc'
    resp = client.get('/records/', query_string=query, headers=get_headers())

    assert resp.status_code == HTTPStatus.OK

    resp_hits = resp.json['hits']
    assert resp_hits.get('total') == 3

    # 'f sugg' should return 1st and 5th record
    query['q'] = 'suggest:f sugg'
    resp = client.get('/records/', query_string=query, headers=get_headers())

    assert resp.status_code == HTTPStatus.OK

    resp_hits = resp.json['hits']
    assert resp_hits.get('total') == 2

    # delete records

    for control_number in control_numbers:
        resp = client.delete(
            '/record/{control_number}'.format(control_number=control_number),
            headers=get_headers())

        assert resp.status_code == HTTPStatus.NO_CONTENT
def __create_record(client, title):
    body = {
        "_access": {
            "owner": ["*****@*****.**"],
            "update": ["*****@*****.**"],
            "delete": ["*****@*****.**"]
        },
        "_data": {
            "title": title
        },
        "$schema": get_schemas_endpoint("test/suggest_v0.0.2.json")
    }

    # Create test record
    resp = client.post('/records/',
                       headers=get_headers(),
                       data=json.dumps(body))

    assert resp.status_code == HTTPStatus.CREATED

    # Check non presence of OCR content in DB record
    resp_body = resp.json['metadata']
    assert resp_body.get('control_number') is not None
    resp_data = resp_body.get("_data")
    assert resp_data.get('title') == title

    return resp_body.get("control_number")
def test_data_field_existence(app, client, user):
    """Test _data field."""
    # Create test record without _data field
    body = {
        "_access": {
            "owner": ["*****@*****.**"],
            "update": ["*****@*****.**"],
            "delete": ["*****@*****.**"]
        },
        "title": "test_access_fields_existence",
        "description": "No _access field"
    }

    resp = client.post('/records/',
                       headers=get_headers(),
                       data=json.dumps(body))

    assert resp.status_code == HTTPStatus.BAD_REQUEST
    assert {
        "field": "_schema",
        "message": "Missing field _data",
        'parents': []
    } in resp.json['errors']
Пример #5
0
def test_file_ops(app, appctx, db, client, user, location):
    """Test file operations."""
    headers = get_headers()

    body = {
        "_access": {
            "owner": ["*****@*****.**"],
            "update": ["*****@*****.**"],
            "delete": ["*****@*****.**"]
        },
        "_data": {
            "title": "Test default search field",
        },
        "_url": "my-domain.com/my-file",
        "$schema": get_schemas_endpoint("test/file_v0.0.4.json")
    }

    # Create test record
    res = client.post('/records/',
                      headers=get_headers(),
                      data=json.dumps(body))
    res_json = get_json(res, code=HTTPStatus.CREATED)

    control_number = res_json['metadata']['control_number']
    bucket = res_json['metadata']['_bucket']
    bucket_content = res_json['metadata']['_bucket_content']

    first_file = 'test.txt'
    second_file = 'test-another.pdf'

    cases = [
        # first upload
        dict(
            name=first_file,
            content=b'test',
        ),
        # update content
        dict(
            name=first_file,
            content=b'test 2',
        ),
        # update file
        dict(
            name=second_file,
            content=b'test 3',
        ),
    ]

    for case in cases:
        # Upload file content
        url = f"/record/{control_number}/files/{case['name']}"
        res = client.put(url, data=case['content'], headers=headers)
        assert res.status_code == HTTPStatus.OK

        # Get record maintains metadata
        res = client.get(f"/record/{control_number}", headers=headers)
        res_json = get_json(res, code=HTTPStatus.OK)
        assert bucket == res_json['metadata']['_bucket']
        assert bucket_content == res_json['metadata']['_bucket_content']
        assert body['_url'] == res_json['metadata']['_url']

        # Get file content
        res = client.get(url, headers=headers)
        assert_file(res, case['content'], HTTPStatus.OK)

        # Search file content

        # Needed to allow ES to process the file
        import time
        time.sleep(2)

        res = client.get(f'/records/?q={quote_plus(case["content"])}',
                         headers=get_headers())
        assert res.status_code == HTTPStatus.OK

        res_hits = res.json['hits']

        assert res_hits.get('total') == 1
        assert case['name'] == res_hits['hits'][0]['metadata']['_file']
        assert case['content'].decode() == res_hits['hits'][0]['metadata'][
            '_data']['_attachment']['_content']

    file_url = f"/record/{control_number}/files/{second_file}"

    # Get file - unauthenticated
    res = client.get(file_url)
    assert res.status_code == HTTPStatus.NOT_FOUND

    # Get file - wrong auth
    invalid_token = gen_salt(
        current_app.config.get('OAUTH2SERVER_TOKEN_PERSONAL_SALT_LEN'))
    res = client.get(file_url, headers=get_headers(invalid_token))
    assert res.status_code == HTTPStatus.NOT_FOUND

    # List files - blocked action
    res = client.get(f'/records/{control_number}/files', headers=headers)
    assert res.status_code == HTTPStatus.NOT_FOUND

    # File does not exists / not processed yet
    assert client.get(f"/record/{control_number}/files/invalid",
                      headers=headers).status_code == HTTPStatus.NOT_FOUND

    # Delete file
    assert client.delete(file_url,
                         headers=headers).status_code == HTTPStatus.NO_CONTENT
    assert client.get(file_url,
                      headers=headers).status_code == HTTPStatus.NOT_FOUND

    # Re Upload file content
    third_file = 'one-more-test.pdf'
    third_file_content = b'test 4'
    file_url = f"/record/{control_number}/files/{third_file}"

    res = client.put(file_url, data=third_file_content, headers=headers)
    assert res.status_code == HTTPStatus.OK

    # Get file content
    res = client.get(file_url, headers=headers)
    assert_file(res, third_file_content, HTTPStatus.OK)

    # Update records mantains file content
    body['_url'] = "my-domain-changed.com/cdn/my-file"

    res = client.put(f'/record/{control_number}',
                     headers=get_headers(),
                     data=json.dumps(body))
    res_json = get_json(res, code=HTTPStatus.OK)
    assert bucket == res_json['metadata']['_bucket']
    assert bucket_content == res_json['metadata']['_bucket_content']
    assert body['_url'] == res_json['metadata']['_url']

    res = client.get(file_url, headers=headers)
    assert_file(res, third_file_content, HTTPStatus.OK)

    # Delete record
    assert client.delete(f"/record/{control_number}",
                         headers=headers).status_code == HTTPStatus.NO_CONTENT
    assert client.get(file_url, headers=headers).status_code == HTTPStatus.GONE
Пример #6
0
def test_testclient(app, client, user):
    """Test search over public documents.

    Test that the ``_access.*`` field is not searched over.
    """
    body = {
        "_access": {
            "owner": ["*****@*****.**"],
            "update": ["*****@*****.**"],
            "delete": ["*****@*****.**"]
        },
        "_data": {
            "title": "Test default search field",
            "description": "This contains CernSearch and should appear"
        },
        "$schema": get_schemas_endpoint("test/doc_v0.0.2.json")
    }

    # Create first test record
    resp = client.post('/records/',
                       headers=get_headers(),
                       data=json.dumps(body))

    assert resp.status_code == HTTPStatus.CREATED

    # Check non presence of OCR content in DB record
    resp_body = resp.json['metadata']
    assert resp_body.get('control_number') is not None
    resp_data = resp_body.get("_data")
    assert resp_data.get('title') == 'Test default search field'
    assert resp_data.get(
        'description') == 'This contains CernSearch and should appear'

    control_number_one = resp_body.get("control_number")

    # Create second test record
    body["_data"][
        'description'] = 'This does not contains the magic word and should not appear'

    # Create test record
    resp = client.post('/records/',
                       headers=get_headers(),
                       data=json.dumps(body))

    assert resp.status_code == HTTPStatus.CREATED

    # Check non presence of OCR content in DB record
    resp_body = resp.json['metadata']
    assert resp_body.get('control_number') is not None
    resp_data = resp_body.get("_data")
    assert resp_data.get('title') == 'Test default search field'
    assert resp_data.get(
        'description'
    ) == 'This does not contains the magic word and should not appear'

    control_number_two = resp_body.get("control_number")

    # # Needed to allow ES to process the file
    import time
    time.sleep(2)

    # Search records
    # Test search with no query
    resp = client.get('/records/', headers=get_headers())

    assert resp.status_code == HTTPStatus.OK

    resp_hits = resp.json['hits']
    assert resp_hits.get('total') == 2

    resp = client.get('/records/?q=CernSearch', headers=get_headers())

    assert resp.status_code == HTTPStatus.OK

    resp_hits = resp.json['hits']
    assert resp_hits.get('total') == 1

    description = resp_hits['hits'][0]['metadata'].get("_data").get(
        'description')
    assert description is not None
    assert description == 'This contains CernSearch and should appear'

    # Test query params
    resp = client.get('/records/',
                      headers=get_headers(),
                      query_string={
                          'q': 'CernSearch',
                          'explain': 'true',
                          'highlight': '*',
                          'type': 'cross_fields'
                      })
    assert resp.status_code == HTTPStatus.OK

    resp_hits = resp.json['hits']
    explanation = resp_hits['hits'][0].get('explanation')
    print(resp_hits['hits'][0])
    assert explanation

    highlight = resp_hits['hits'][0].get('highlight')
    assert highlight

    # Clean the instance. Delete record
    resp = client.delete(
        '/record/{control_number}'.format(control_number=control_number_one),
        headers=get_headers(),
        data=json.dumps(body))

    assert resp.status_code == HTTPStatus.NO_CONTENT

    resp = client.delete(
        '/record/{control_number}'.format(control_number=control_number_two),
        headers=get_headers(),
        data=json.dumps(body))

    assert resp.status_code == HTTPStatus.NO_CONTENT
def test_access_fields_existence(app, client, user):
    """Test _access field."""
    # POST and PUT should follow the same workflow. Only checking POST.
    # Without _access field
    body = {
        "_data": {
            "title": "test_access_fields_existence",
            "description": "No _access field"
        }
    }
    resp = client.post('/records/',
                       headers=get_headers(),
                       data=json.dumps(body))

    assert resp.status_code == HTTPStatus.BAD_REQUEST
    assert {
        "field": "_schema",
        "message": "Missing field _access",
        'parents': []
    } in resp.json['errors']

    # Without _access.delete field
    body = {
        "_access": {
            "owner": ["*****@*****.**"],
            "update": ["*****@*****.**"]
        },
        "_data": {
            "title": "test_access_fields_existence",
            "description": "No _access.delete field"
        }
    }
    resp = client.post('/records/',
                       headers=get_headers(),
                       data=json.dumps(body))

    assert resp.status_code == HTTPStatus.BAD_REQUEST
    assert {
        "field": "_schema",
        "message":
        "Missing or wrong type (not an array) in field _access.delete",
        'parents': []
    } in resp.json['errors']

    # Without _access.update field
    body = {
        "_access": {
            "owner": ["*****@*****.**"],
            "delete": ["*****@*****.**"]
        },
        "_data": {
            "title": "test_access_fields_existence",
            "description": "No _access.update field"
        }
    }
    resp = client.post('/records/',
                       headers=get_headers(),
                       data=json.dumps(body))

    assert resp.status_code == HTTPStatus.BAD_REQUEST
    assert {
        "field": "_schema",
        "message":
        "Missing or wrong type (not an array) in field _access.update",
        'parents': []
    } in resp.json['errors']

    # Without _access.owner field
    body = {
        "_access": {
            "update": ["*****@*****.**"],
            "delete": ["*****@*****.**"]
        },
        "_data": {
            "title": "test_access_fields_existence",
            "description": "No _access.owner field"
        }
    }
    resp = client.post('/records/',
                       headers=get_headers(),
                       data=json.dumps(body))

    assert resp.status_code == HTTPStatus.BAD_REQUEST
    assert {
        "field": "_schema",
        "message":
        "Missing or wrong type (not an array) in field _access.owner",
        'parents': []
    } in resp.json['errors']