def test_permissions_revoke_all_iam_permissions(clean_db, monkeypatch): """ Smoke test that Permissions.revoke_all_iam_permissions calls revoke_download_access the right arguments. """ gcloud_client = mock_gcloud_client(monkeypatch) user = Users(email="*****@*****.**") user.insert() trial = TrialMetadata(trial_id=TRIAL_ID, metadata_json=METADATA) trial.insert() upload_types = ["wes_bam", "ihc", "rna_fastq", "plasma"] for upload_type in upload_types: Permissions( granted_to_user=user.id, trial_id=trial.trial_id, upload_type=upload_type, granted_by_user=user.id, ).insert() Permissions.revoke_all_iam_permissions() gcloud_client.revoke_download_access.assert_has_calls( [call(user.email, trial.trial_id, upload_type) for upload_type in upload_types] ) # not called on admins or nci biobank users gcloud_client.revoke_download_access.reset_mock() for role in [CIDCRole.ADMIN.value, CIDCRole.NCI_BIOBANK_USER.value]: user.role = role user.update() Permissions.revoke_all_iam_permissions() gcloud_client.revoke_download_access.assert_not_called()
def test_downloadable_files_additional_metadata_default(clean_db): TrialMetadata.create(TRIAL_ID, METADATA) df = DownloadableFiles( trial_id=TRIAL_ID, upload_type="wes_bam", object_url="10021/Patient 1/sample 1/aliquot 1/wes_forward.fastq", file_size_bytes=1, md5_hash="hash1234", uploaded_timestamp=datetime.now(), ) # Check no value passed df.insert() assert df.additional_metadata == {} for nullish_value in [None, "null", {}]: df.additional_metadata = nullish_value df.update() assert df.additional_metadata == {} # Non-nullish value doesn't get overridden non_nullish_value = {"foo": "bar"} df.additional_metadata = non_nullish_value df.update() assert df.additional_metadata == non_nullish_value
def test_create_downloadable_file_from_metadata(db, monkeypatch): """Try to create a downloadable file from artifact_core metadata""" # fake file metadata file_metadata = { "artifact_category": "Assay Artifact from CIMAC", "object_url": "10021/Patient 1/sample 1/aliquot 1/wes_forward.fastq", "file_name": "wes_forward.fastq", "file_size_bytes": 1, "md5_hash": "hash1234", "uploaded_timestamp": datetime.now(), "foo": "bar", # unsupported column - should be filtered } # Create the trial (to avoid violating foreign-key constraint) TrialMetadata.patch_trial_metadata(TRIAL_ID, METADATA) # Create the file DownloadableFiles.create_from_metadata(TRIAL_ID, "wes", file_metadata) # Check that we created the file new_file = (db.query(DownloadableFiles).filter_by( file_name=file_metadata["file_name"]).first()) assert new_file del file_metadata["foo"] for k in file_metadata.keys(): assert getattr(new_file, k) == file_metadata[k]
def test_permissions_delete(clean_db, monkeypatch, caplog): gcloud_client = mock_gcloud_client(monkeypatch) user = Users(email="*****@*****.**") user.insert() trial = TrialMetadata(trial_id=TRIAL_ID, metadata_json=METADATA) trial.insert() perm = Permissions( granted_to_user=user.id, trial_id=trial.trial_id, upload_type="wes_bam", granted_by_user=user.id, ) perm.insert() # Deleting a record by a user doesn't exist leads to an error gcloud_client.reset_mocks() with pytest.raises(NoResultFound, match="no user with id"): perm.delete(deleted_by=999999) # Deletion of an existing permission leads to no error gcloud_client.reset_mocks() with caplog.at_level(logging.DEBUG): perm.delete(deleted_by=user.id) gcloud_client.revoke_download_access.assert_called_once() gcloud_client.grant_download_access.assert_not_called() assert any( log_record.message.strip() == f"admin-action: {user.email} removed from {user.email} the permission wes_bam on {trial.trial_id}" for log_record in caplog.records ) # Deleting an already-deleted record is idempotent gcloud_client.reset_mocks() perm.delete(deleted_by=user) gcloud_client.revoke_download_access.assert_called_once() gcloud_client.grant_download_access.assert_not_called() # Deleting a record whose user doesn't exist leads to an error gcloud_client.reset_mocks() with pytest.raises(NoResultFound, match="no user with id"): Permissions(granted_to_user=999999).delete(deleted_by=user) gcloud_client.revoke_download_access.assert_not_called() gcloud_client.grant_download_access.assert_not_called() # If revoking a permission from a "network-viewer", no GCS IAM actions are taken gcloud_client.revoke_download_access.reset_mock() user.role = CIDCRole.NETWORK_VIEWER.value user.update() perm = Permissions( granted_to_user=user.id, trial_id=trial.trial_id, upload_type="ihc", granted_by_user=user.id, ) perm.insert() perm.delete(deleted_by=user) gcloud_client.revoke_download_access.assert_not_called()
def test_create_downloadable_file_from_metadata(clean_db, monkeypatch): """Try to create a downloadable file from artifact_core metadata""" # fake file metadata file_metadata = { "object_url": "10021/Patient 1/sample 1/aliquot 1/wes_forward.fastq", "file_size_bytes": 1, "md5_hash": "hash1234", "uploaded_timestamp": datetime.now(), "foo": "bar", # unsupported column - should be filtered } additional_metadata = {"more": "info"} # Mock artifact upload publishing publisher = MagicMock() monkeypatch.setattr("cidc_api.models.models.publish_artifact_upload", publisher) # Create the trial (to avoid violating foreign-key constraint) TrialMetadata.create(TRIAL_ID, METADATA) # Create files with empty or "null" additional metadata for nullish_value in ["null", None, {}]: df = DownloadableFiles.create_from_metadata( TRIAL_ID, "wes_bam", file_metadata, additional_metadata=nullish_value ) clean_db.refresh(df) assert df.additional_metadata == {} # Create the file DownloadableFiles.create_from_metadata( TRIAL_ID, "wes_bam", file_metadata, additional_metadata=additional_metadata ) # Check that we created the file new_file = ( clean_db.query(DownloadableFiles) .filter_by(object_url=file_metadata["object_url"]) .first() ) assert new_file del file_metadata["foo"] for k in file_metadata.keys(): assert getattr(new_file, k) == file_metadata[k] assert new_file.additional_metadata == additional_metadata # Check that no artifact upload event was published publisher.assert_not_called() # Check that artifact upload publishes DownloadableFiles.create_from_metadata( TRIAL_ID, "wes_bam", file_metadata, additional_metadata=additional_metadata, alert_artifact_upload=True, ) publisher.assert_called_once_with(file_metadata["object_url"])
def test_user_get_data_access_report(clean_db, monkeypatch): """Test that user data access info is collected as expected""" mock_gcloud_client(monkeypatch) admin_user = Users( email="*****@*****.**", organization="CIDC", approval_date=datetime.now(), role=CIDCRole.ADMIN.value, ) admin_user.insert() cimac_user = Users( email="*****@*****.**", organization="DFCI", approval_date=datetime.now(), role=CIDCRole.CIMAC_USER.value, ) cimac_user.insert() trial = TrialMetadata(trial_id=TRIAL_ID, metadata_json=METADATA) trial.insert() upload_types = ["wes_bam", "ihc"] # Note that admins don't need permissions to view data, # so we're deliberately issuing unnecessary permissions here. for user in [admin_user, cimac_user]: for t in upload_types: Permissions( granted_to_user=user.id, granted_by_user=admin_user.id, trial_id=trial.trial_id, upload_type=t, ).insert() bio = io.BytesIO() result_df = Users.get_data_access_report(bio) bio.seek(0) # Make sure bytes were written to the BytesIO instance assert bio.getbuffer().nbytes > 0 # Make sure report data has expected info assert set(result_df.columns) == set( ["email", "role", "organization", "trial_id", "permissions"] ) for user in [admin_user, cimac_user]: user_df = result_df[result_df.email == user.email] assert set([user.role]) == set(user_df.role) assert set([user.organization]) == set(user_df.organization) if user == admin_user: assert set(["*"]) == set(user_df.permissions) else: assert set(user_df.permissions).issubset(["wes_bam,ihc", "ihc,wes_bam"])
def test_partial_patch_trial_metadata(clean_db): """Update an existing trial_metadata_record""" # Create the initial trial clean_db.add(TrialMetadata(trial_id=TRIAL_ID, metadata_json=METADATA)) clean_db.commit() # Create patch without all required fields (no "participants") metadata_patch = {PROTOCOL_ID_FIELD_NAME: TRIAL_ID, "assays": {}} # patch it - should be no error/exception TrialMetadata._patch_trial_metadata(TRIAL_ID, metadata_patch)
def test_downloadable_files_get_related_files(clean_db): # Create a trial to avoid constraint errors TrialMetadata.create(trial_id=TRIAL_ID, metadata_json=METADATA) # Convenience function for building file records def create_df(facet_group, additional_metadata={}) -> DownloadableFiles: df = DownloadableFiles( facet_group=facet_group, additional_metadata=additional_metadata, trial_id=TRIAL_ID, uploaded_timestamp=datetime.now(), file_size_bytes=0, object_url=facet_group, # just filler, not relevant to the test upload_type="", ) df.insert() clean_db.refresh(df) return df # Set up test data cimac_id_1 = "CTTTPPP01.01" cimac_id_2 = "CTTTPPP02.01" files = [ create_df( "/cytof/normalized_and_debarcoded.fcs", {"some.path.cimac_id": cimac_id_1} ), create_df( "/cytof_analysis/assignment.csv", # NOTE: this isn't realistic - assignment files aren't sample-specific - but # it serves the purpose of the test. {"path.cimac_id": cimac_id_1, "another.path.cimac_id": cimac_id_1}, ), create_df("/cytof_analysis/source.fcs", {"path.to.cimac_id": cimac_id_2}), create_df("/cytof_analysis/reports.zip"), create_df("/cytof_analysis/analysis.zip"), create_df("/wes/r1_L.fastq.gz"), ] # Based on setup, we expect the following disjoint sets of related files: related_file_groups = [ [files[0], files[1]], [files[2]], [files[3], files[4]], [files[5]], ] # Check that get_related_files returns what we expect for file_group in related_file_groups: for file_record in file_group: other_ids = [f.id for f in file_group if f.id != file_record.id] related_files = file_record.get_related_files() assert set([f.id for f in related_files]) == set(other_ids) assert len(related_files) == len(other_ids)
def test_upload_job_no_file_map(clean_db): """Try to create an assay upload""" new_user = Users.create(PROFILE) metadata_patch = {PROTOCOL_ID_FIELD_NAME: TRIAL_ID} gcs_xlsx_uri = "xlsx/assays/wes/12:0:1.5123095" TrialMetadata.create(TRIAL_ID, METADATA) new_job = UploadJobs.create( prism.SUPPORTED_MANIFESTS[0], EMAIL, None, metadata_patch, gcs_xlsx_uri ) assert list(new_job.upload_uris_with_data_uris_with_uuids()) == [] job = UploadJobs.find_by_id_and_email(new_job.id, PROFILE["email"]) assert list(job.upload_uris_with_data_uris_with_uuids()) == []
def test_assay_upload_ingestion_success(clean_db, monkeypatch, caplog): """Check that the ingestion success method works as expected""" caplog.set_level(logging.DEBUG) new_user = Users.create(PROFILE) trial = TrialMetadata.create(TRIAL_ID, METADATA) assay_upload = UploadJobs.create( upload_type="ihc", uploader_email=EMAIL, gcs_file_map={}, metadata={PROTOCOL_ID_FIELD_NAME: TRIAL_ID}, gcs_xlsx_uri="", commit=False, ) clean_db.commit() # Ensure that success can't be declared from a starting state with pytest.raises(Exception, match="current status"): assay_upload.ingestion_success(trial) # Update assay_upload status to simulate a completed but not ingested upload assay_upload.status = UploadJobStatus.UPLOAD_COMPLETED.value assay_upload.ingestion_success(trial) # Check that status was updated and email wasn't sent by default db_record = UploadJobs.find_by_id(assay_upload.id) assert db_record.status == UploadJobStatus.MERGE_COMPLETED.value assert "Would send email with subject '[UPLOAD SUCCESS]" not in caplog.text # Check that email gets sent when specified assay_upload.ingestion_success(trial, send_email=True) assert "Would send email with subject '[UPLOAD SUCCESS]" in caplog.text
def setup_trial_and_user(cidc_api, monkeypatch) -> int: """ Insert a trial and a cimac-user into the database, and set the user as the current user. """ # this is necessary for adding/removing permissions from this user # without trying to contact GCP mock_gcloud_client(monkeypatch) user = Users(email=user_email, role=CIDCRole.CIMAC_USER.value, approval_date=datetime.now()) mock_current_user(user, monkeypatch) with cidc_api.app_context(): TrialMetadata( trial_id="test_trial", metadata_json={ prism.PROTOCOL_ID_FIELD_NAME: trial_id, "participants": [], "allowed_cohort_names": ["Arm_Z"], "allowed_collection_event_names": [], }, ).insert() user.insert() return user.id
def setup_permissions(cidc_api, monkeypatch) -> Tuple[int, int]: """ Create two users, one trial, and three permissions in `db`. Two permissions will belong to the first user, and the third will belong to the second one. Returns the first and second user ids as a tuple. """ current_user = Users( id=1, email="*****@*****.**", role=CIDCRole.CIMAC_USER.value, approval_date=datetime.now(), ) other_user = Users(id=2, email="*****@*****.**") mock_current_user(current_user, monkeypatch) with cidc_api.app_context(): # Create users current_user.insert() other_user.insert() # Create trial TrialMetadata.create( TRIAL_ID, { "protocol_identifier": TRIAL_ID, "allowed_collection_event_names": [], "allowed_cohort_names": [], "participants": [], }, ) # Create permissions def create_permission(uid, assay): Permissions( granted_by_user=uid, granted_to_user=uid, trial_id=TRIAL_ID, upload_type=assay, ).insert() create_permission(current_user.id, "ihc") create_permission(current_user.id, "olink") create_permission(other_user.id, "olink") return current_user.id, other_user.id
def test_trial_metadata_insert(clean_db): """Test that metadata validation on insert works as expected""" # No error with valid metadata trial = TrialMetadata(trial_id=TRIAL_ID, metadata_json=METADATA) trial.insert() # Error with invalid metadata trial.metadata_json = {"foo": "bar"} with pytest.raises(ValidationMultiError): trial.insert() # No error if validate_metadata=False trial.insert(validate_metadata=False)
def test_trial_metadata_patch_manifest(clean_db): """Update manifest data in a trial_metadata record""" # Add a participant to the trial metadata_with_participant = METADATA.copy() metadata_with_participant["participants"] = [ { "samples": [], "cimac_participant_id": "CTSTP01", "participant_id": "trial a", "cohort_name": "Arm_Z", } ] with pytest.raises(NoResultFound, match=f"No trial found with id {TRIAL_ID}"): TrialMetadata.patch_manifest(TRIAL_ID, metadata_with_participant) # Create trial TrialMetadata.create(TRIAL_ID, METADATA) # Try again TrialMetadata.patch_manifest(TRIAL_ID, metadata_with_participant) # Look the trial up and check that it has the participant in it trial = TrialMetadata.find_by_trial_id(TRIAL_ID) assert ( trial.metadata_json["participants"] == metadata_with_participant["participants"] )
def test_assay_upload_merge_extra_metadata(clean_db, monkeypatch): """Try to create an assay upload""" new_user = Users.create(PROFILE) TrialMetadata.create(TRIAL_ID, METADATA) assay_upload = UploadJobs.create( upload_type="assay_with_extra_md", uploader_email=EMAIL, gcs_file_map={}, metadata={ PROTOCOL_ID_FIELD_NAME: TRIAL_ID, "whatever": { "hierarchy": [ {"we just need a": "uuid-1", "to be able": "to merge"}, {"and": "uuid-2"}, ] }, }, gcs_xlsx_uri="", commit=False, ) assay_upload.id = 111 clean_db.commit() custom_extra_md_parse = MagicMock() custom_extra_md_parse.side_effect = lambda f: {"extra": f.read().decode()} monkeypatch.setattr( "cidc_schemas.prism.merger.EXTRA_METADATA_PARSERS", {"assay_with_extra_md": custom_extra_md_parse}, ) UploadJobs.merge_extra_metadata( 111, { "uuid-1": io.BytesIO(b"within extra md file 1"), "uuid-2": io.BytesIO(b"within extra md file 2"), }, session=clean_db, ) assert 1 == clean_db.query(UploadJobs).count() au = clean_db.query(UploadJobs).first() assert "extra" in au.metadata_patch["whatever"]["hierarchy"][0] assert "extra" in au.metadata_patch["whatever"]["hierarchy"][1]
def test_permissions_broad_perms(clean_db, monkeypatch): gcloud_client = mock_gcloud_client(monkeypatch) user = Users(email="*****@*****.**") user.insert() trial = TrialMetadata(trial_id=TRIAL_ID, metadata_json=METADATA) trial.insert() other_trial = TrialMetadata( trial_id="other-trial", metadata_json={**METADATA, "protocol_identifier": "other-trial"}, ) other_trial.insert() for ut in ["wes_fastq", "olink"]: for tid in [trial.trial_id, other_trial.trial_id]: Permissions( granted_to_user=user.id, trial_id=tid, upload_type=ut, granted_by_user=user.id, ).insert() # Can't insert a permission for access to all trials and assays with pytest.raises(ValueError, match="must have a trial id or upload type"): Permissions(granted_to_user=user.id, granted_by_user=user.id).insert() # Inserting a trial-level permission should delete other more specific related perms. trial_query = clean_db.query(Permissions).filter( Permissions.trial_id == trial.trial_id ) assert trial_query.count() == 2 Permissions( trial_id=trial.trial_id, granted_to_user=user.id, granted_by_user=user.id ).insert() assert trial_query.count() == 1 perm = trial_query.one() assert perm.trial_id == trial.trial_id assert perm.upload_type is None # Inserting an upload-level permission should delete other more specific related perms. olink_query = clean_db.query(Permissions).filter(Permissions.upload_type == "olink") assert olink_query.count() == 1 assert olink_query.one().trial_id == other_trial.trial_id Permissions( upload_type="olink", granted_to_user=user.id, granted_by_user=user.id ).insert() assert olink_query.count() == 1 perm = olink_query.one() assert perm.trial_id is None assert perm.upload_type == "olink" # Getting perms for a particular user-trial-type returns broader perms perm = Permissions.find_for_user_trial_type(user.id, trial.trial_id, "ihc") assert perm is not None and perm.upload_type is None perm = Permissions.find_for_user_trial_type(user.id, "some random trial", "olink") assert perm is not None and perm.trial_id is None
def setup_db_records(cidc_api): extra = {"_etag": ETAG} with cidc_api.app_context(): Users(**users["json"], **extra).insert(compute_etag=False) TrialMetadata(**trial_metadata["json"], **extra).insert(compute_etag=False) DownloadableFiles(**downloadable_files["json"], **extra).insert( compute_etag=False ) Permissions(**permissions["json"], **extra).insert(compute_etag=False) UploadJobs(**upload_jobs["json"], **extra).insert(compute_etag=False)
def create_trial(n, grant_perm=False): trial_id = f"test-trial-{n}" metadata_json = { "protocol_identifier": trial_id, "participants": [] if n == 2 else [{ "cimac_participant_id": "CTTTPP1", "participant_id": "x", "samples": [{ "cimac_id": f"CTTTPP1SS.01", "sample_location": "", "type_of_primary_container": "Other", "type_of_sample": "Other", "collection_event_name": "", "parent_sample_id": "", }], }], "allowed_collection_event_names": [""], "allowed_cohort_names": [], "assays": {}, "analysis": {}, "shipments": [], } trial = TrialMetadata(trial_id=trial_id, metadata_json=metadata_json) trial.insert() if grant_perm and user_id: Permissions( granted_to_user=user_id, trial_id=trial.trial_id, upload_type="olink", granted_by_user=user_id, ).insert() Permissions( granted_to_user=user_id, trial_id=trial.trial_id, upload_type="ihc", granted_by_user=user_id, ).insert() return trial.id
def test_create_assay_upload(clean_db): """Try to create an assay upload""" new_user = Users.create(PROFILE) gcs_file_map = { "my/first/wes/blob1/2019-08-30T15:51:38.450978": "test-uuid-1", "my/first/wes/blob2/2019-08-30T15:51:38.450978": "test-uuid-2", } metadata_patch = {PROTOCOL_ID_FIELD_NAME: TRIAL_ID} gcs_xlsx_uri = "xlsx/assays/wes/12:0:1.5123095" # Should fail, since trial doesn't exist yet with pytest.raises(IntegrityError): UploadJobs.create("wes_bam", EMAIL, gcs_file_map, metadata_patch, gcs_xlsx_uri) clean_db.rollback() TrialMetadata.create(TRIAL_ID, METADATA) new_job = UploadJobs.create( "wes_bam", EMAIL, gcs_file_map, metadata_patch, gcs_xlsx_uri ) job = UploadJobs.find_by_id_and_email(new_job.id, PROFILE["email"]) assert len(new_job.gcs_file_map) == len(job.gcs_file_map) assert set(new_job.gcs_file_map) == set(job.gcs_file_map) assert job.status == "started" assert list(job.upload_uris_with_data_uris_with_uuids()) == [ ( "my/first/wes/blob1/2019-08-30T15:51:38.450978", "my/first/wes/blob1", "test-uuid-1", ), ( "my/first/wes/blob2/2019-08-30T15:51:38.450978", "my/first/wes/blob2", "test-uuid-2", ), ]
def test_permissions_grant_iam_permissions(clean_db, monkeypatch): """ Smoke test that Permissions.grant_iam_permissions calls grant_download_access with the right arguments. """ refresh_intake_access = MagicMock() monkeypatch.setattr( "cidc_api.models.models.refresh_intake_access", refresh_intake_access ) gcloud_client = mock_gcloud_client(monkeypatch) user = Users(email="*****@*****.**", role=CIDCRole.NETWORK_VIEWER.value) user.insert() trial = TrialMetadata(trial_id=TRIAL_ID, metadata_json=METADATA) trial.insert() upload_types = ["wes_bam", "ihc", "rna_fastq", "plasma"] for upload_type in upload_types: Permissions( granted_to_user=user.id, trial_id=trial.trial_id, upload_type=upload_type, granted_by_user=user.id, ).insert() # IAM permissions not granted to network viewers Permissions.grant_iam_permissions(user=user) gcloud_client.grant_download_access.assert_not_called() # IAM permissions should be granted for any other role user.role = CIDCRole.CIMAC_USER.value Permissions.grant_iam_permissions(user=user) for upload_type in upload_types: assert ( call(user.email, trial.trial_id, upload_type) in gcloud_client.grant_download_access.call_args_list ) refresh_intake_access.assert_called_once_with(user.email)
def test_update_trial_metadata(db): """Update an existing trial_metadata_record""" # Create the initial trial TrialMetadata.patch_trial_metadata(TRIAL_ID, METADATA) # Add metadata to the trial metadata_patch = METADATA.copy() metadata_patch["participants"] = [{ "samples": [], "cimac_participant_id": "b", "trial_participant_id": "trial a", "cohort_id": "cohort_id", "arm_id": "arm_id", }] TrialMetadata.patch_trial_metadata(TRIAL_ID, metadata_patch) # Look the trial up and check that it was merged as expected trial = TrialMetadata.find_by_trial_id(TRIAL_ID) sort = lambda participant_list: sorted( participant_list, key=lambda d: d["cimac_participant_id"]) expected_participants = METADATA["participants"] + metadata_patch[ "participants"] actual_participants = trial.metadata_json["participants"] assert sort(actual_participants) == sort(expected_participants)
def test_create_trial_metadata(clean_db): """Insert a trial metadata record if one doesn't exist""" TrialMetadata.create(TRIAL_ID, METADATA) trial = TrialMetadata.find_by_trial_id(TRIAL_ID) assert trial assert trial.metadata_json == METADATA # Check that you can't insert a trial with invalid metadata with pytest.raises(ValidationMultiError, match="'buzz' was unexpected"): TrialMetadata.create("foo", {"buzz": "bazz"}) with pytest.raises(ValidationMultiError, match="'buzz' was unexpected"): TrialMetadata(trial_id="foo", metadata_json={"buzz": "bazz"}).insert()
def test_trial_metadata_patch_assay(clean_db): """Update assay data in a trial_metadata record""" # Add an assay to the trial metadata_with_assay = METADATA.copy() metadata_with_assay["assays"] = {"wes": []} with pytest.raises(NoResultFound, match=f"No trial found with id {TRIAL_ID}"): TrialMetadata.patch_manifest(TRIAL_ID, metadata_with_assay) # Create trial TrialMetadata.create(TRIAL_ID, METADATA) # Try again TrialMetadata.patch_manifest(TRIAL_ID, metadata_with_assay) # Look the trial up and check that it has the assay in it trial = TrialMetadata.find_by_trial_id(TRIAL_ID) assert trial.metadata_json["assays"] == metadata_with_assay["assays"]
def test_create_trial(cidc_api, clean_db, monkeypatch): """Check that creating a new trial works as expected""" user_id = setup_user(cidc_api, monkeypatch) trial_id = "test-trial" trial_json = { "trial_id": trial_id, "metadata_json": { "protocol_identifier": trial_id, "participants": [], "allowed_collection_event_names": [], "allowed_cohort_names": [], }, } client = cidc_api.test_client() # Non-admins can't create trials res = client.post("/trial_metadata", json=trial_json) assert res.status_code == 401 # Allowed users can create trials for role in trial_modifier_roles: make_role(user_id, role, cidc_api) res = client.post("/trial_metadata", json=trial_json) assert res.status_code == 201 assert {**res.json, **trial_json} == res.json # No two trials can have the same trial_id res = client.post("/trial_metadata", json=trial_json) assert res.status_code == 400 # No trial can be created with invalid metadata bad_trial_json = {"trial_id": "foo", "metadata_json": {"foo": "bar"}} res = client.post("/trial_metadata", json=bad_trial_json) assert res.status_code == 422 assert res.json["_error"]["message"] == bad_trial_error_message # Clear created trial with cidc_api.app_context(): trial = TrialMetadata.find_by_trial_id(trial_id) trial.delete()
def setup_downloadable_files(cidc_api) -> Tuple[int, int]: """Insert two downloadable files into the database.""" metadata_json = { "protocol_identifier": trial_id_1, "allowed_collection_event_names": [], "allowed_cohort_names": [], "participants": [], } trial_1 = TrialMetadata(trial_id=trial_id_1, metadata_json=metadata_json) trial_2 = TrialMetadata(trial_id=trial_id_2, metadata_json=metadata_json) def make_file(trial_id, object_url, upload_type, facet_group) -> DownloadableFiles: return DownloadableFiles( trial_id=trial_id, upload_type=upload_type, object_url=f"{trial_id}/{object_url}", facet_group=facet_group, uploaded_timestamp=datetime.now(), file_size_bytes=int(51 * 1e6), # 51MB ) wes_file = make_file(trial_id_1, "wes/.../reads_123.bam", "wes_bam", "/wes/r1_L.fastq.gz") cytof_file = make_file( trial_id_2, "cytof/.../analysis.zip", "cytof_10021_9204", "/cytof_analysis/analysis.zip", ) with cidc_api.app_context(): trial_1.insert() trial_2.insert() wes_file.insert() cytof_file.insert() return wes_file.id, cytof_file.id
def test_get_trial_by_trial_id(cidc_api, clean_db, monkeypatch): """Check that getting a single trial by trial id works as expected""" user_id = setup_user(cidc_api, monkeypatch) trial_id, _ = set(setup_trial_metadata(cidc_api)) with cidc_api.app_context(): trial = TrialMetadata.find_by_id(trial_id) client = cidc_api.test_client() # Non-admins can't get single trials res = client.get(f"/trial_metadata/{trial.trial_id}") assert res.status_code == 401 # Allowed users can get single trials for role in trial_modifier_roles: make_role(user_id, role, cidc_api) res = client.get(f"/trial_metadata/{trial.trial_id}") assert res.status_code == 200 assert res.json == TrialMetadataSchema().dump(trial) # Getting non-existent trials yields 404 res = client.get(f"/trial_metadata/foobar") assert res.status_code == 404
def _derive_files_from_upload(trial_id: str, upload_type: str, session): # Get trial metadata JSON for the associated trial trial_record: TrialMetadata = TrialMetadata.find_by_trial_id( trial_id, session=session) # Run the file derivation derivation_context = unprism.DeriveFilesContext(trial_record.metadata_json, upload_type, fetch_artifact) derivation_result = unprism.derive_files(derivation_context) # TODO: consider parallelizing this step if necessary for artifact in derivation_result.artifacts: # Save to GCS blob = upload_to_data_bucket(artifact.object_url, artifact.data) # Build basic facet group facet_group = f"{artifact.data_format}|{artifact.file_type}" # Save to database df_record = DownloadableFiles.create_from_blob( trial_record.trial_id, artifact.file_type, artifact.data_format, facet_group, blob, session=session, alert_artifact_upload=True, ) df_record.additional_metadata = artifact.metadata # Assume that a derived file will be directly useful for data analysis df_record.analysis_friendly = True # Update the trial metadata blob (in case the file derivation modified it) trial_record.metadata_json = derivation_result.trial_metadata session.commit()
def insert_trial(trial_id, num_participants, num_samples): TrialMetadata( trial_id=trial_id, metadata_json={ prism.PROTOCOL_ID_FIELD_NAME: trial_id, "allowed_cohort_names": [""], "allowed_collection_event_names": [""], "participants": [{ "cimac_participant_id": f"CTTTPP{p}", "participant_id": "x", "samples": [{ "cimac_id": f"CTTTPP1SS.0{s}", "sample_location": "", "type_of_primary_container": "Other", "type_of_sample": "Other", "collection_event_name": "", "parent_sample_id": "", } for s in range(num_samples[p])], } for p in range(num_participants)], }, ).insert()
def test_upload_data_files(cidc_api, monkeypatch): user = Users(email="*****@*****.**") trial = TrialMetadata( trial_id="test_trial", metadata_json={ prism.PROTOCOL_ID_FIELD_NAME: trial_id, "participants": [], "allowed_cohort_names": ["Arm_Z"], "allowed_collection_event_names": [], }, ) template_type = "foo" xlsx_file = MagicMock() md_patch = {} file_infos = [ finfo( "localfile1.ext", "test_trial/url/file1.ext", "uuid-1", metadata_availability=None, allow_empty=None, ), finfo( "localfile2.ext", "test_trial/url/file2.ext", "uuid-2", metadata_availability=True, allow_empty=None, ), finfo( "localfile3.ext", "test_trial/url/file3.ext", "uuid-3", metadata_availability=None, allow_empty=True, ), finfo( "localfile4.ext", "test_trial/url/file4.ext", "uuid-4", metadata_availability=True, allow_empty=True, ), ] gcloud_client = MagicMock() gcloud_client.grant_upload_access = MagicMock() gcloud_client.upload_xlsx_to_gcs = MagicMock() gcs_blob = MagicMock() gcs_blob.name = "blob" gcloud_client.upload_xlsx_to_gcs.return_value = gcs_blob monkeypatch.setattr("cidc_api.resources.upload_jobs.gcloud_client", gcloud_client) create = MagicMock() job = MagicMock() job.id = "id" job._etag = "_etag" job.token = "token" create.return_value = job monkeypatch.setattr("cidc_api.resources.upload_jobs.UploadJobs.create", create) with cidc_api.app_context(): response = upload_data_files(user, trial, template_type, xlsx_file, md_patch, file_infos) json = response.get_json() assert "job_id" in json and json["job_id"] == "id" assert "job_etag" in json and json["job_etag"] == "_etag" assert "url_mapping" in json url_mapping = { k: v.rsplit("/", 1)[0] for k, v in json["url_mapping"].items() } assert url_mapping == { "localfile1.ext": "test_trial/url/file1.ext", "localfile2.ext": "test_trial/url/file2.ext", "localfile3.ext": "test_trial/url/file3.ext", "localfile4.ext": "test_trial/url/file4.ext", } assert "gcs_bucket" in json and json["gcs_bucket"] == "cidc-uploads-staging" assert "extra_metadata" in json and json["extra_metadata"] == { "localfile2.ext": "uuid-2", "localfile4.ext": "uuid-4", } assert "gcs_file_map" in json gcs_file_map = sorted( [(k.rsplit("/", 1)[0], v) for k, v in json["gcs_file_map"].items()], key=lambda i: i[0], ) assert gcs_file_map == [ ("test_trial/url/file1.ext", "uuid-1"), ("test_trial/url/file2.ext", "uuid-2"), ("test_trial/url/file3.ext", "uuid-3"), ("test_trial/url/file4.ext", "uuid-4"), ] assert "optional_files" in json and json["optional_files"] == [ "localfile3.ext", "localfile4.ext", ] assert "token" in json and json["token"] == "token"
def setup_data(cidc_api, clean_db): user = Users(email="*****@*****.**", approval_date=datetime.now()) shipment = { "courier": "FEDEX", "ship_to": "", "ship_from": "", "assay_type": assay_type, "manifest_id": manifest_id, "date_shipped": "2020-06-10 00:00:00", "date_received": "2020-06-11 00:00:00", "account_number": "", "assay_priority": "1", "receiving_party": "MSSM_Rahman", "tracking_number": "", "shipping_condition": "Frozen_Dry_Ice", "quality_of_shipment": "Specimen shipment received in good condition", } metadata = { "protocol_identifier": trial_id, "shipments": [ # we get duplicate shipment uploads sometimes shipment, shipment, ], "participants": [{ "cimac_participant_id": f"CTTTPP{p}", "participant_id": "x", "cohort_name": "", "samples": [{ "cimac_id": f"CTTTPP{p}SS.0{s}", "sample_location": "", "type_of_primary_container": "Other", "type_of_sample": "Other", "collection_event_name": "", "parent_sample_id": "", } for s in range(num_samples[p])], } for p in range(num_participants)], "allowed_cohort_names": [""], "allowed_collection_event_names": [""], } trial = TrialMetadata(trial_id=trial_id, metadata_json=metadata) upload_job = UploadJobs( uploader_email=user.email, trial_id=trial.trial_id, upload_type="pbmc", gcs_xlsx_uri="", metadata_patch=metadata, multifile=False, ) upload_job._set_status_no_validation(UploadJobStatus.MERGE_COMPLETED.value) with cidc_api.app_context(): user.insert() trial.insert() upload_job.insert() clean_db.refresh(user) clean_db.refresh(upload_job) clean_db.refresh(trial) return user, upload_job, trial