def test_public_authz_and_acl_object_upload_file_with_failed_authz_check( client, indexd_client_accepting_record, mock_arborist_requests, user_client, rsa_private_key, kid, ): """ Test `GET /data/upload/1` in which the `1` Indexd record has authz populated with the public value, but the user doesn't have the correct authz permission' """ indexd_client_accepting_record(INDEXD_RECORD_WITH_PUBLIC_AUTHZ_AND_ACL_POPULATED) mock_arborist_requests({"arborist/auth/request": {"POST": ({"auth": False}, 200)}}) headers = { "Authorization": "Bearer " + jwt.encode( utils.authorized_download_context_claims( user_client.username, user_client.user_id ), key=rsa_private_key, headers={"kid": kid}, algorithm="RS256", ).decode("utf-8") } path = "/data/upload/1" response = client.get(path, headers=headers) assert response.status_code == 401 assert response.data assert response.mimetype == "text/html" assert not response.json
def test_public_authz_and_acl_object_upload_file( client, indexd_client_accepting_record, mock_arborist_requests, user_client, rsa_private_key, kid, ): """ Test `GET /data/upload/1` in which the `1` Indexd record has both authz and acl populated with public values. In this case, authz takes precedence over acl. """ indexd_client_accepting_record(INDEXD_RECORD_WITH_PUBLIC_AUTHZ_AND_ACL_POPULATED) mock_arborist_requests({"arborist/auth/request": {"POST": ({"auth": True}, 200)}}) headers = { "Authorization": "Bearer " + jwt.encode( utils.authorized_download_context_claims( user_client.username, user_client.user_id ), key=rsa_private_key, headers={"kid": kid}, algorithm="RS256", ).decode("utf-8") } path = "/data/upload/1" response = client.get(path, headers=headers) assert response.status_code == 200 assert "url" in response.json
def test_indexd_download_file_no_protocol( client, oauth_client, user_client, indexd_client, kid, rsa_private_key, google_proxy_group, primary_google_service_account, cloud_manager, google_signed_url, ): """ Test ``GET /data/download/1``. """ path = "/data/download/1" headers = { "Authorization": "Bearer " + jwt.encode( utils.authorized_download_context_claims( user_client.username, user_client.user_id ), key=rsa_private_key, headers={"kid": kid}, algorithm="RS256", ).decode("utf-8") } response = client.get(path, headers=headers) assert response.status_code == 200 assert "url" in list(response.json.keys())
def test_indexd_download_file( client, oauth_client, user_client, indexd_client, kid, rsa_private_key, google_proxy_group, primary_google_service_account, cloud_manager, google_signed_url, ): """ Test ``GET /data/download/1``. """ indexed_file_location = indexd_client["indexed_file_location"] path = "/data/download/1" query_string = {"protocol": indexed_file_location} headers = { "Authorization": "Bearer " + jwt.encode( utils.authorized_download_context_claims( user_client.username, user_client.user_id ), key=rsa_private_key, headers={"kid": kid}, algorithm="RS256", ).decode("utf-8") } response = client.get(path, headers=headers, query_string=query_string) assert response.status_code == 200 assert "url" in list(response.json.keys()) # defaults to signing url, check that it's not just raw url assert urllib.parse.urlparse(response.json["url"]).query != ""
def test_get_presigned_url_no_access_id( client, user_client, indexd_client, kid, rsa_private_key, google_proxy_group, primary_google_service_account, cloud_manager, google_signed_url, ): access_id = indexd_client["indexed_file_location"] test_guid = "1" user = { "Authorization": "Bearer " + jwt.encode( utils.authorized_download_context_claims(user_client.username, user_client.user_id), key=rsa_private_key, headers={ "kid": kid }, algorithm="RS256", ).decode("utf-8") } res = client.get( "/ga4gh/drs/v1/objects/" + test_guid + "/access/", headers=user, ) assert res.status_code == 400
def test_get_presigned_url_with_query_params( client, user_client, indexd_client, kid, rsa_private_key, google_proxy_group, primary_google_service_account, cloud_manager, google_signed_url, ): access_id = indexd_client["indexed_file_location"] test_guid = "1" user = { "Authorization": "Bearer " + jwt.encode( utils.authorized_download_context_claims(user_client.username, user_client.user_id), key=rsa_private_key, headers={ "kid": kid }, algorithm="RS256", ).decode("utf-8") } data = get_doc() data["did"] = "dg.TEST/ed8f4658-6acd-4f96-9dd8-3709890c959e" did = "dg.TEST%2Fed8f4658-6acd-4f96-9dd8-3709890c959e" res = client.get( "/ga4gh/drs/v1/objects/" + did + "/access/" + access_id + "?userProject=someproject&arbitrary_parameter=val", headers=user, ) assert res.status_code == 200
def test_presigned_url_log_disabled( client, user_client, mock_arborist_requests, indexd_client_with_arborist, kid, rsa_private_key, primary_google_service_account, cloud_manager, google_signed_url, monkeypatch, ): """ Disable presigned URL logs, enable login logs, get a presigned URL from Fence and make sure no audit log was created. """ mock_arborist_requests( {"arborist/auth/request": { "POST": ('{"auth": "true"}', 200) }}) audit_service_mocker = mock.patch( "fence.resources.audit_service_client.requests", new_callable=mock.Mock) monkeypatch.setitem(config, "ENABLE_AUDIT_LOGS", { "presigned_url": False, "login": True }) protocol = "gs" guid = "dg.hello/abc" path = f"/data/download/{guid}" if protocol: path += f"?protocol={protocol}" resource_paths = ["/my/resource/path1", "/path2"] indexd_client = indexd_client_with_arborist(resource_paths) headers = { "Authorization": "Bearer " + jwt.encode( utils.authorized_download_context_claims(user_client.username, user_client.user_id), key=rsa_private_key, headers={ "kid": kid }, algorithm="RS256", ).decode("utf-8") } # protocol=None should fall back to s3 (first indexed location): expected_protocol = protocol or "s3" with audit_service_mocker as audit_service_requests: audit_service_requests.post.return_value = MockResponse( data={}, status_code=201, ) response = client.get(path, headers=headers) assert response.status_code == 200, response assert response.json.get("url") audit_service_requests.post.assert_not_called()
def test_indexd_prometheus_presigned_url_counter( app, client, oauth_client, user_client, indexd_client, kid, rsa_private_key, google_proxy_group, primary_google_service_account, cloud_manager, google_signed_url, ): """ Test that when a user requests a presigned URL, the prometheus counter is increased. """ before = ( app.prometheus_registry.get_sample_value( "pre_signed_url_req_total", { "requested_protocol": indexd_client["indexed_file_location"], }, ) or 0 ) # make a presigned URL request indexed_file_location = indexd_client["indexed_file_location"] path = "/data/download/1" query_string = {"protocol": indexed_file_location} headers = { "Authorization": "Bearer " + jwt.encode( utils.authorized_download_context_claims( user_client.username, user_client.user_id ), key=rsa_private_key, headers={"kid": kid}, algorithm="RS256", ).decode("utf-8") } response = client.get(path, headers=headers, query_string=query_string) assert response.status_code == 200 # assert metrics have been processed successfully after = app.prometheus_registry.get_sample_value( "pre_signed_url_req_total", { "requested_protocol": indexd_client["indexed_file_location"], }, ) assert after, "Presigned URL requests should have been counted" assert 1 == (after - before), "1 presigned URL request should have been counted" # defaults to signing url, check that it's not just raw url assert urllib.parse.urlparse(response.json["url"]).query != ""
def test_disabled_audit( client, user_client, mock_arborist_requests, indexd_client_with_arborist, kid, rsa_private_key, primary_google_service_account, cloud_manager, google_signed_url, monkeypatch, ): """ Disable all audit logs, get a presigned URL from Fence and make sure the logic to create audit logs did not run. """ mock_arborist_requests( {"arborist/auth/request": { "POST": ({ "auth": True }, 200) }}) protocol = "gs" guid = "dg.hello/abc" path = f"/data/download/{guid}" if protocol: path += f"?protocol={protocol}" resource_paths = ["/my/resource/path1", "/path2"] indexd_client_with_arborist(resource_paths) headers = { "Authorization": "Bearer " + jwt.encode( utils.authorized_download_context_claims(user_client.username, str(user_client.user_id)), key=rsa_private_key, headers={ "kid": kid }, algorithm="RS256", ).decode("utf-8") } audit_decorator_mocker = mock.patch( "fence.resources.audit.utils.create_audit_log_for_request", new_callable=mock.Mock, ) with audit_decorator_mocker as audit_decorator: response = client.get(path, headers=headers) assert response.status_code == 200, response assert response.json.get("url") audit_decorator.assert_not_called()
def test_indexd_download_with_uploader_unauthorized( client, oauth_client, user_client, kid, rsa_private_key, google_proxy_group, primary_google_service_account, cloud_manager, google_signed_url, ): """ Test ``GET /data/download/1`` with unauthorized user (user is not the uploader). """ did = str(uuid.uuid4()) index_document = { "did": did, "baseid": "", "uploader": "fake_uploader_123", "rev": "", "size": 10, "file_name": "file1", "urls": ["s3://bucket1/key-{}".format(did[:8])], "acl": ["phs000178"], "hashes": {}, "metadata": {}, "form": "", "created_date": "", "updated_date": "", } mock_index_document = mock.patch( "fence.blueprints.data.indexd.IndexedFile.index_document", index_document) mock_index_document.start() indexed_file_location = "s3" path = "/data/download/1" query_string = {"protocol": indexed_file_location} headers = { "Authorization": "Bearer " + jwt.encode( utils.authorized_download_context_claims(user_client.username, user_client.user_id), key=rsa_private_key, headers={ "kid": kid }, algorithm="RS256", ).decode("utf-8") } response = client.get(path, headers=headers, query_string=query_string) assert response.status_code == 401
def test_download_fails_with_wrong_authz_and_public_acl( client, indexd_client_accepting_record, mock_arborist_requests, user_client, rsa_private_key, kid, ): """ Test that generating a download url returns a 401 when acl is public, but authz is a permission the user doesn't have access to. Authz takes precedence. """ indexd_record_with_wrong_authz_and_public_acl = { "did": "1", "baseid": "", "rev": "", "size": 10, "file_name": "file1", "urls": ["s3://bucket1/key"], "hashes": {}, "metadata": {}, "authz": ["/foo"], "acl": ["*"], "form": "", "created_date": "", "updated_date": "", } indexd_client_accepting_record(indexd_record_with_wrong_authz_and_public_acl) mock_arborist_requests({"arborist/auth/request": {"POST": ({"auth": False}, 200)}}) headers = { "Authorization": "Bearer " + jwt.encode( utils.authorized_download_context_claims( user_client.username, user_client.user_id ), key=rsa_private_key, headers={"kid": kid}, algorithm="RS256", ).decode("utf-8") } path = "/data/download/1" response = client.get(path, headers=headers) assert response.status_code == 401 assert response.data assert response.mimetype == "text/html" assert not response.json
def test_non_public_authz_and_public_acl_object_upload_file( client, indexd_client_accepting_record, mock_arborist_requests, user_client, rsa_private_key, kid, ): """ Test that a user can successfully generate an upload url for an Indexd record with a non-public authz field and a public acl field. """ indexd_record_with_non_public_authz_and_public_acl_populated = { "did": "1", "baseid": "", "rev": "", "size": 10, "file_name": "file1", "urls": ["s3://bucket1/key"], "hashes": {}, "metadata": {}, "authz": ["/programs/DEV/projects/test"], "acl": ["*"], "form": "", "created_date": "", "updated_date": "", } indexd_client_accepting_record( indexd_record_with_non_public_authz_and_public_acl_populated ) mock_arborist_requests({"arborist/auth/request": {"POST": ({"auth": True}, 200)}}) headers = { "Authorization": "Bearer " + jwt.encode( utils.authorized_download_context_claims( user_client.username, user_client.user_id ), key=rsa_private_key, headers={"kid": kid}, algorithm="RS256", ).decode("utf-8") } path = "/data/upload/1" response = client.get(path, headers=headers) assert response.status_code == 200 assert "url" in response.json
def test_abac( app, client, mock_arborist_requests, indexd_client_with_arborist, user_client, rsa_private_key, kid, google_proxy_group, primary_google_service_account, cloud_manager, google_signed_url, ): mock_arborist_requests( {"arborist/auth/request": { "POST": ('{"auth": "true"}', 200) }}) indexd_client = indexd_client_with_arborist("test_abac") indexed_file_location = indexd_client["indexed_file_location"] path = "/data/download/1" query_string = {"protocol": indexed_file_location} headers = { "Authorization": "Bearer " + jwt.encode( utils.authorized_download_context_claims(user_client.username, user_client.user_id), key=rsa_private_key, headers={ "kid": kid }, algorithm="RS256", ).decode("utf-8") } response = client.get(path, headers=headers, query_string=query_string) assert response.status_code == 200 assert "url" in list(response.json.keys()) mock_arborist_requests( {"arborist/auth/request": { "POST": ('{"auth": "false"}', 403) }}) response = client.get(path, headers=headers, query_string=query_string) assert response.status_code == 403
def test_presigned_url_log_acl( client, user_client, mock_arborist_requests, indexd_client_with_arborist, kid, rsa_private_key, primary_google_service_account, cloud_manager, google_signed_url, monkeypatch, ): """ Same as `test_presigned_url_log`, but the record contains `acl` instead of `authz`. The ACL is ["phs000178", "phs000218"] as defined in conftest. """ mock_arborist_requests( {"arborist/auth/request": { "POST": ('{"auth": "true"}', 200) }}) audit_service_mocker = mock.patch( "fence.resources.audit_service_client.requests", new_callable=mock.Mock) monkeypatch.setitem(config, "ENABLE_AUDIT_LOGS", {"presigned_url": True}) protocol = "gs" guid = "dg.hello/abc" path = f"/data/download/{guid}?protocol={protocol}" indexd_client = indexd_client_with_arborist(None) headers = { "Authorization": "Bearer " + jwt.encode( utils.authorized_download_context_claims(user_client.username, user_client.user_id), key=rsa_private_key, headers={ "kid": kid }, algorithm="RS256", ).decode("utf-8") } with audit_service_mocker as audit_service_requests: audit_service_requests.post.return_value = MockResponse( data={}, status_code=201, ) response = client.get(path, headers=headers) assert response.status_code == 200, response assert response.json.get("url") audit_service_requests.post.assert_called_once_with( "http://audit-service/log/presigned_url", json={ "request_url": path, "status_code": 200, "username": user_client.username, "sub": user_client.user_id, "guid": guid, "resource_paths": ["phs000178", "phs000218"], "action": "download", "protocol": protocol, }, )
def test_presigned_url_log_push_to_sqs( app, client, user_client, mock_arborist_requests, indexd_client_with_arborist, kid, rsa_private_key, primary_google_service_account, cloud_manager, google_signed_url, monkeypatch, ): """ Get a presigned URL from Fence and make sure an audit log was pushed to the configured SQS. """ mock_arborist_requests( {"arborist/auth/request": { "POST": ({ "auth": True }, 200) }}) monkeypatch.setitem(config, "ENABLE_AUDIT_LOGS", {"presigned_url": True}) mocked_sqs = mock_audit_service_sqs(app) # get a presigned URL protocol = "gs" guid = "dg.hello/abc" path = f"/data/download/{guid}?protocol={protocol}" resource_paths = ["/my/resource/path1", "/path2"] indexd_client_with_arborist(resource_paths) headers = { "Authorization": "Bearer " + jwt.encode( utils.authorized_download_context_claims(user_client.username, str(user_client.user_id)), key=rsa_private_key, headers={ "kid": kid }, algorithm="RS256", ).decode("utf-8") } response = client.get(path, headers=headers) assert response.status_code == 200, response assert response.json.get("url") expected_audit_data = { "request_url": path, "status_code": 200, "username": user_client.username, "sub": user_client.user_id, "guid": guid, "resource_paths": resource_paths, "action": "download", "protocol": protocol, "category": "presigned_url", } mocked_sqs.send_message.assert_called_once_with( MessageBody=json.dumps(expected_audit_data), QueueUrl=mocked_sqs.url)
def test_presigned_url_log( protocol, client, user_client, mock_arborist_requests, indexd_client_with_arborist, kid, rsa_private_key, primary_google_service_account, cloud_manager, google_signed_url, monkeypatch, ): """ Get a presigned URL from Fence and make sure a call to the Audit Service was made to create an audit log. Test with and without a requested protocol. """ mock_arborist_requests( {"arborist/auth/request": { "POST": ({ "auth": True }, 200) }}) audit_service_mocker = mock.patch("fence.resources.audit.client.requests", new_callable=mock.Mock) monkeypatch.setitem(config, "ENABLE_AUDIT_LOGS", {"presigned_url": True}) guid = "dg.hello/abc" path = f"/data/download/{guid}" if protocol: path += f"?protocol={protocol}" resource_paths = ["/my/resource/path1", "/path2"] indexd_client_with_arborist(resource_paths) headers = { "Authorization": "Bearer " + jwt.encode( utils.authorized_download_context_claims( # cast user_id to str because that's what we get back # from the DB, but audit-service expects an int. user_client.username, str(user_client.user_id), ), key=rsa_private_key, headers={ "kid": kid }, algorithm="RS256", ).decode("utf-8") } # protocol=None should fall back to s3 (first indexed location): expected_protocol = protocol or "s3" with audit_service_mocker as audit_service_requests: audit_service_requests.post.return_value = MockResponse( data={}, status_code=201, ) response = client.get(path, headers=headers) assert response.status_code == 200, response assert response.json.get("url") audit_service_requests.post.assert_called_once_with( "http://audit-service/log/presigned_url", json={ "request_url": path, "status_code": 200, "username": user_client.username, "sub": user_client.user_id, # it's an int now "guid": guid, "resource_paths": resource_paths, "action": "download", "protocol": expected_protocol, }, )