def test_control_number_update(app, client, user): """Test control number.""" body = { "_access": { "owner": ["*****@*****.**"], "update": ["*****@*****.**"], "delete": ["*****@*****.**"] }, "_data": { "title": "test_control_number_update", "description": "Not updated document" } } # Create test record resp = client.post('/records/', headers=get_headers(), data=json.dumps(body)) assert resp.status_code == HTTPStatus.CREATED orig_record = resp.json['metadata'] # Update without control_number body["_data"]['description'] = 'Update with no control number' resp = client.put('/record/{control_number}'.format( control_number=orig_record['control_number']), headers=get_headers(), data=json.dumps(body)) put_record = resp.json['metadata'] assert resp.status_code == HTTPStatus.OK assert put_record.get('control_number') is not None assert put_record.get('control_number') == orig_record['control_number'] assert put_record["_data"]['description'] == body["_data"]['description'] # Update with a wrong control_number body["_data"]['description'] = 'Update with wrong control number' resp = client.put('/record/{control_number}'.format( control_number=orig_record['control_number']), headers=get_headers(), data=json.dumps(body)) put_record = resp.json['metadata'] assert resp.status_code == HTTPStatus.OK assert put_record.get('control_number') is not None assert put_record.get('control_number') == orig_record['control_number'] assert put_record["_data"]['description'] == body["_data"]['description'] # Delete test record resp = client.delete('/record/{control_number}'.format( control_number=orig_record['control_number']), headers=get_headers()) assert resp.status_code == HTTPStatus.NO_CONTENT
def test_suggester(app, client, user): """Test search over public documents. Test that the ``_access.*`` field is not searched over. """ # Create records control_numbers = [ __create_record(client, 'The First Suggestion'), __create_record(client, 'Documentation site title'), __create_record(client, 'CERN Search Documentation'), __create_record(client, 'Invenio docs site'), __create_record(client, 'The final suggester') ] time.sleep(2) query = {"q": 'suggest:the f'} # 'the f' should return 1st and 5th record resp = client.get('/records/', query_string=query, headers=get_headers()) assert resp.status_code == HTTPStatus.OK resp_hits = resp.json['hits'] assert resp_hits.get('total') == 2 # 'doc' should return 2nd, 3rd and 4th record query['q'] = 'suggest:doc' resp = client.get('/records/', query_string=query, headers=get_headers()) assert resp.status_code == HTTPStatus.OK resp_hits = resp.json['hits'] assert resp_hits.get('total') == 3 # 'f sugg' should return 1st and 5th record query['q'] = 'suggest:f sugg' resp = client.get('/records/', query_string=query, headers=get_headers()) assert resp.status_code == HTTPStatus.OK resp_hits = resp.json['hits'] assert resp_hits.get('total') == 2 # delete records for control_number in control_numbers: resp = client.delete( '/record/{control_number}'.format(control_number=control_number), headers=get_headers()) assert resp.status_code == HTTPStatus.NO_CONTENT
def __create_record(client, title): body = { "_access": { "owner": ["*****@*****.**"], "update": ["*****@*****.**"], "delete": ["*****@*****.**"] }, "_data": { "title": title }, "$schema": get_schemas_endpoint("test/suggest_v0.0.2.json") } # Create test record resp = client.post('/records/', headers=get_headers(), data=json.dumps(body)) assert resp.status_code == HTTPStatus.CREATED # Check non presence of OCR content in DB record resp_body = resp.json['metadata'] assert resp_body.get('control_number') is not None resp_data = resp_body.get("_data") assert resp_data.get('title') == title return resp_body.get("control_number")
def test_data_field_existence(app, client, user): """Test _data field.""" # Create test record without _data field body = { "_access": { "owner": ["*****@*****.**"], "update": ["*****@*****.**"], "delete": ["*****@*****.**"] }, "title": "test_access_fields_existence", "description": "No _access field" } resp = client.post('/records/', headers=get_headers(), data=json.dumps(body)) assert resp.status_code == HTTPStatus.BAD_REQUEST assert { "field": "_schema", "message": "Missing field _data", 'parents': [] } in resp.json['errors']
def test_file_ops(app, appctx, db, client, user, location): """Test file operations.""" headers = get_headers() body = { "_access": { "owner": ["*****@*****.**"], "update": ["*****@*****.**"], "delete": ["*****@*****.**"] }, "_data": { "title": "Test default search field", }, "_url": "my-domain.com/my-file", "$schema": get_schemas_endpoint("test/file_v0.0.4.json") } # Create test record res = client.post('/records/', headers=get_headers(), data=json.dumps(body)) res_json = get_json(res, code=HTTPStatus.CREATED) control_number = res_json['metadata']['control_number'] bucket = res_json['metadata']['_bucket'] bucket_content = res_json['metadata']['_bucket_content'] first_file = 'test.txt' second_file = 'test-another.pdf' cases = [ # first upload dict( name=first_file, content=b'test', ), # update content dict( name=first_file, content=b'test 2', ), # update file dict( name=second_file, content=b'test 3', ), ] for case in cases: # Upload file content url = f"/record/{control_number}/files/{case['name']}" res = client.put(url, data=case['content'], headers=headers) assert res.status_code == HTTPStatus.OK # Get record maintains metadata res = client.get(f"/record/{control_number}", headers=headers) res_json = get_json(res, code=HTTPStatus.OK) assert bucket == res_json['metadata']['_bucket'] assert bucket_content == res_json['metadata']['_bucket_content'] assert body['_url'] == res_json['metadata']['_url'] # Get file content res = client.get(url, headers=headers) assert_file(res, case['content'], HTTPStatus.OK) # Search file content # Needed to allow ES to process the file import time time.sleep(2) res = client.get(f'/records/?q={quote_plus(case["content"])}', headers=get_headers()) assert res.status_code == HTTPStatus.OK res_hits = res.json['hits'] assert res_hits.get('total') == 1 assert case['name'] == res_hits['hits'][0]['metadata']['_file'] assert case['content'].decode() == res_hits['hits'][0]['metadata'][ '_data']['_attachment']['_content'] file_url = f"/record/{control_number}/files/{second_file}" # Get file - unauthenticated res = client.get(file_url) assert res.status_code == HTTPStatus.NOT_FOUND # Get file - wrong auth invalid_token = gen_salt( current_app.config.get('OAUTH2SERVER_TOKEN_PERSONAL_SALT_LEN')) res = client.get(file_url, headers=get_headers(invalid_token)) assert res.status_code == HTTPStatus.NOT_FOUND # List files - blocked action res = client.get(f'/records/{control_number}/files', headers=headers) assert res.status_code == HTTPStatus.NOT_FOUND # File does not exists / not processed yet assert client.get(f"/record/{control_number}/files/invalid", headers=headers).status_code == HTTPStatus.NOT_FOUND # Delete file assert client.delete(file_url, headers=headers).status_code == HTTPStatus.NO_CONTENT assert client.get(file_url, headers=headers).status_code == HTTPStatus.NOT_FOUND # Re Upload file content third_file = 'one-more-test.pdf' third_file_content = b'test 4' file_url = f"/record/{control_number}/files/{third_file}" res = client.put(file_url, data=third_file_content, headers=headers) assert res.status_code == HTTPStatus.OK # Get file content res = client.get(file_url, headers=headers) assert_file(res, third_file_content, HTTPStatus.OK) # Update records mantains file content body['_url'] = "my-domain-changed.com/cdn/my-file" res = client.put(f'/record/{control_number}', headers=get_headers(), data=json.dumps(body)) res_json = get_json(res, code=HTTPStatus.OK) assert bucket == res_json['metadata']['_bucket'] assert bucket_content == res_json['metadata']['_bucket_content'] assert body['_url'] == res_json['metadata']['_url'] res = client.get(file_url, headers=headers) assert_file(res, third_file_content, HTTPStatus.OK) # Delete record assert client.delete(f"/record/{control_number}", headers=headers).status_code == HTTPStatus.NO_CONTENT assert client.get(file_url, headers=headers).status_code == HTTPStatus.GONE
def test_testclient(app, client, user): """Test search over public documents. Test that the ``_access.*`` field is not searched over. """ body = { "_access": { "owner": ["*****@*****.**"], "update": ["*****@*****.**"], "delete": ["*****@*****.**"] }, "_data": { "title": "Test default search field", "description": "This contains CernSearch and should appear" }, "$schema": get_schemas_endpoint("test/doc_v0.0.2.json") } # Create first test record resp = client.post('/records/', headers=get_headers(), data=json.dumps(body)) assert resp.status_code == HTTPStatus.CREATED # Check non presence of OCR content in DB record resp_body = resp.json['metadata'] assert resp_body.get('control_number') is not None resp_data = resp_body.get("_data") assert resp_data.get('title') == 'Test default search field' assert resp_data.get( 'description') == 'This contains CernSearch and should appear' control_number_one = resp_body.get("control_number") # Create second test record body["_data"][ 'description'] = 'This does not contains the magic word and should not appear' # Create test record resp = client.post('/records/', headers=get_headers(), data=json.dumps(body)) assert resp.status_code == HTTPStatus.CREATED # Check non presence of OCR content in DB record resp_body = resp.json['metadata'] assert resp_body.get('control_number') is not None resp_data = resp_body.get("_data") assert resp_data.get('title') == 'Test default search field' assert resp_data.get( 'description' ) == 'This does not contains the magic word and should not appear' control_number_two = resp_body.get("control_number") # # Needed to allow ES to process the file import time time.sleep(2) # Search records # Test search with no query resp = client.get('/records/', headers=get_headers()) assert resp.status_code == HTTPStatus.OK resp_hits = resp.json['hits'] assert resp_hits.get('total') == 2 resp = client.get('/records/?q=CernSearch', headers=get_headers()) assert resp.status_code == HTTPStatus.OK resp_hits = resp.json['hits'] assert resp_hits.get('total') == 1 description = resp_hits['hits'][0]['metadata'].get("_data").get( 'description') assert description is not None assert description == 'This contains CernSearch and should appear' # Test query params resp = client.get('/records/', headers=get_headers(), query_string={ 'q': 'CernSearch', 'explain': 'true', 'highlight': '*', 'type': 'cross_fields' }) assert resp.status_code == HTTPStatus.OK resp_hits = resp.json['hits'] explanation = resp_hits['hits'][0].get('explanation') print(resp_hits['hits'][0]) assert explanation highlight = resp_hits['hits'][0].get('highlight') assert highlight # Clean the instance. Delete record resp = client.delete( '/record/{control_number}'.format(control_number=control_number_one), headers=get_headers(), data=json.dumps(body)) assert resp.status_code == HTTPStatus.NO_CONTENT resp = client.delete( '/record/{control_number}'.format(control_number=control_number_two), headers=get_headers(), data=json.dumps(body)) assert resp.status_code == HTTPStatus.NO_CONTENT
def test_access_fields_existence(app, client, user): """Test _access field.""" # POST and PUT should follow the same workflow. Only checking POST. # Without _access field body = { "_data": { "title": "test_access_fields_existence", "description": "No _access field" } } resp = client.post('/records/', headers=get_headers(), data=json.dumps(body)) assert resp.status_code == HTTPStatus.BAD_REQUEST assert { "field": "_schema", "message": "Missing field _access", 'parents': [] } in resp.json['errors'] # Without _access.delete field body = { "_access": { "owner": ["*****@*****.**"], "update": ["*****@*****.**"] }, "_data": { "title": "test_access_fields_existence", "description": "No _access.delete field" } } resp = client.post('/records/', headers=get_headers(), data=json.dumps(body)) assert resp.status_code == HTTPStatus.BAD_REQUEST assert { "field": "_schema", "message": "Missing or wrong type (not an array) in field _access.delete", 'parents': [] } in resp.json['errors'] # Without _access.update field body = { "_access": { "owner": ["*****@*****.**"], "delete": ["*****@*****.**"] }, "_data": { "title": "test_access_fields_existence", "description": "No _access.update field" } } resp = client.post('/records/', headers=get_headers(), data=json.dumps(body)) assert resp.status_code == HTTPStatus.BAD_REQUEST assert { "field": "_schema", "message": "Missing or wrong type (not an array) in field _access.update", 'parents': [] } in resp.json['errors'] # Without _access.owner field body = { "_access": { "update": ["*****@*****.**"], "delete": ["*****@*****.**"] }, "_data": { "title": "test_access_fields_existence", "description": "No _access.owner field" } } resp = client.post('/records/', headers=get_headers(), data=json.dumps(body)) assert resp.status_code == HTTPStatus.BAD_REQUEST assert { "field": "_schema", "message": "Missing or wrong type (not an array) in field _access.owner", 'parents': [] } in resp.json['errors']