def test_downloadable_files_additional_metadata_default(clean_db): TrialMetadata.create(TRIAL_ID, METADATA) df = DownloadableFiles( trial_id=TRIAL_ID, upload_type="wes_bam", object_url="10021/Patient 1/sample 1/aliquot 1/wes_forward.fastq", file_size_bytes=1, md5_hash="hash1234", uploaded_timestamp=datetime.now(), ) # Check no value passed df.insert() assert df.additional_metadata == {} for nullish_value in [None, "null", {}]: df.additional_metadata = nullish_value df.update() assert df.additional_metadata == {} # Non-nullish value doesn't get overridden non_nullish_value = {"foo": "bar"} df.additional_metadata = non_nullish_value df.update() assert df.additional_metadata == non_nullish_value
def test_trial_metadata_patch_manifest(clean_db): """Update manifest data in a trial_metadata record""" # Add a participant to the trial metadata_with_participant = METADATA.copy() metadata_with_participant["participants"] = [ { "samples": [], "cimac_participant_id": "CTSTP01", "participant_id": "trial a", "cohort_name": "Arm_Z", } ] with pytest.raises(NoResultFound, match=f"No trial found with id {TRIAL_ID}"): TrialMetadata.patch_manifest(TRIAL_ID, metadata_with_participant) # Create trial TrialMetadata.create(TRIAL_ID, METADATA) # Try again TrialMetadata.patch_manifest(TRIAL_ID, metadata_with_participant) # Look the trial up and check that it has the participant in it trial = TrialMetadata.find_by_trial_id(TRIAL_ID) assert ( trial.metadata_json["participants"] == metadata_with_participant["participants"] )
def test_create_downloadable_file_from_metadata(clean_db, monkeypatch): """Try to create a downloadable file from artifact_core metadata""" # fake file metadata file_metadata = { "object_url": "10021/Patient 1/sample 1/aliquot 1/wes_forward.fastq", "file_size_bytes": 1, "md5_hash": "hash1234", "uploaded_timestamp": datetime.now(), "foo": "bar", # unsupported column - should be filtered } additional_metadata = {"more": "info"} # Mock artifact upload publishing publisher = MagicMock() monkeypatch.setattr("cidc_api.models.models.publish_artifact_upload", publisher) # Create the trial (to avoid violating foreign-key constraint) TrialMetadata.create(TRIAL_ID, METADATA) # Create files with empty or "null" additional metadata for nullish_value in ["null", None, {}]: df = DownloadableFiles.create_from_metadata( TRIAL_ID, "wes_bam", file_metadata, additional_metadata=nullish_value ) clean_db.refresh(df) assert df.additional_metadata == {} # Create the file DownloadableFiles.create_from_metadata( TRIAL_ID, "wes_bam", file_metadata, additional_metadata=additional_metadata ) # Check that we created the file new_file = ( clean_db.query(DownloadableFiles) .filter_by(object_url=file_metadata["object_url"]) .first() ) assert new_file del file_metadata["foo"] for k in file_metadata.keys(): assert getattr(new_file, k) == file_metadata[k] assert new_file.additional_metadata == additional_metadata # Check that no artifact upload event was published publisher.assert_not_called() # Check that artifact upload publishes DownloadableFiles.create_from_metadata( TRIAL_ID, "wes_bam", file_metadata, additional_metadata=additional_metadata, alert_artifact_upload=True, ) publisher.assert_called_once_with(file_metadata["object_url"])
def test_downloadable_files_get_related_files(clean_db): # Create a trial to avoid constraint errors TrialMetadata.create(trial_id=TRIAL_ID, metadata_json=METADATA) # Convenience function for building file records def create_df(facet_group, additional_metadata={}) -> DownloadableFiles: df = DownloadableFiles( facet_group=facet_group, additional_metadata=additional_metadata, trial_id=TRIAL_ID, uploaded_timestamp=datetime.now(), file_size_bytes=0, object_url=facet_group, # just filler, not relevant to the test upload_type="", ) df.insert() clean_db.refresh(df) return df # Set up test data cimac_id_1 = "CTTTPPP01.01" cimac_id_2 = "CTTTPPP02.01" files = [ create_df( "/cytof/normalized_and_debarcoded.fcs", {"some.path.cimac_id": cimac_id_1} ), create_df( "/cytof_analysis/assignment.csv", # NOTE: this isn't realistic - assignment files aren't sample-specific - but # it serves the purpose of the test. {"path.cimac_id": cimac_id_1, "another.path.cimac_id": cimac_id_1}, ), create_df("/cytof_analysis/source.fcs", {"path.to.cimac_id": cimac_id_2}), create_df("/cytof_analysis/reports.zip"), create_df("/cytof_analysis/analysis.zip"), create_df("/wes/r1_L.fastq.gz"), ] # Based on setup, we expect the following disjoint sets of related files: related_file_groups = [ [files[0], files[1]], [files[2]], [files[3], files[4]], [files[5]], ] # Check that get_related_files returns what we expect for file_group in related_file_groups: for file_record in file_group: other_ids = [f.id for f in file_group if f.id != file_record.id] related_files = file_record.get_related_files() assert set([f.id for f in related_files]) == set(other_ids) assert len(related_files) == len(other_ids)
def test_create_trial_metadata(clean_db): """Insert a trial metadata record if one doesn't exist""" TrialMetadata.create(TRIAL_ID, METADATA) trial = TrialMetadata.find_by_trial_id(TRIAL_ID) assert trial assert trial.metadata_json == METADATA # Check that you can't insert a trial with invalid metadata with pytest.raises(ValidationMultiError, match="'buzz' was unexpected"): TrialMetadata.create("foo", {"buzz": "bazz"}) with pytest.raises(ValidationMultiError, match="'buzz' was unexpected"): TrialMetadata(trial_id="foo", metadata_json={"buzz": "bazz"}).insert()
def test_assay_upload_ingestion_success(clean_db, monkeypatch, caplog): """Check that the ingestion success method works as expected""" caplog.set_level(logging.DEBUG) new_user = Users.create(PROFILE) trial = TrialMetadata.create(TRIAL_ID, METADATA) assay_upload = UploadJobs.create( upload_type="ihc", uploader_email=EMAIL, gcs_file_map={}, metadata={PROTOCOL_ID_FIELD_NAME: TRIAL_ID}, gcs_xlsx_uri="", commit=False, ) clean_db.commit() # Ensure that success can't be declared from a starting state with pytest.raises(Exception, match="current status"): assay_upload.ingestion_success(trial) # Update assay_upload status to simulate a completed but not ingested upload assay_upload.status = UploadJobStatus.UPLOAD_COMPLETED.value assay_upload.ingestion_success(trial) # Check that status was updated and email wasn't sent by default db_record = UploadJobs.find_by_id(assay_upload.id) assert db_record.status == UploadJobStatus.MERGE_COMPLETED.value assert "Would send email with subject '[UPLOAD SUCCESS]" not in caplog.text # Check that email gets sent when specified assay_upload.ingestion_success(trial, send_email=True) assert "Would send email with subject '[UPLOAD SUCCESS]" in caplog.text
def test_upload_job_no_file_map(clean_db): """Try to create an assay upload""" new_user = Users.create(PROFILE) metadata_patch = {PROTOCOL_ID_FIELD_NAME: TRIAL_ID} gcs_xlsx_uri = "xlsx/assays/wes/12:0:1.5123095" TrialMetadata.create(TRIAL_ID, METADATA) new_job = UploadJobs.create( prism.SUPPORTED_MANIFESTS[0], EMAIL, None, metadata_patch, gcs_xlsx_uri ) assert list(new_job.upload_uris_with_data_uris_with_uuids()) == [] job = UploadJobs.find_by_id_and_email(new_job.id, PROFILE["email"]) assert list(job.upload_uris_with_data_uris_with_uuids()) == []
def setup_permissions(cidc_api, monkeypatch) -> Tuple[int, int]: """ Create two users, one trial, and three permissions in `db`. Two permissions will belong to the first user, and the third will belong to the second one. Returns the first and second user ids as a tuple. """ current_user = Users( id=1, email="*****@*****.**", role=CIDCRole.CIMAC_USER.value, approval_date=datetime.now(), ) other_user = Users(id=2, email="*****@*****.**") mock_current_user(current_user, monkeypatch) with cidc_api.app_context(): # Create users current_user.insert() other_user.insert() # Create trial TrialMetadata.create( TRIAL_ID, { "protocol_identifier": TRIAL_ID, "allowed_collection_event_names": [], "allowed_cohort_names": [], "participants": [], }, ) # Create permissions def create_permission(uid, assay): Permissions( granted_by_user=uid, granted_to_user=uid, trial_id=TRIAL_ID, upload_type=assay, ).insert() create_permission(current_user.id, "ihc") create_permission(current_user.id, "olink") create_permission(other_user.id, "olink") return current_user.id, other_user.id
def test_assay_upload_merge_extra_metadata(clean_db, monkeypatch): """Try to create an assay upload""" new_user = Users.create(PROFILE) TrialMetadata.create(TRIAL_ID, METADATA) assay_upload = UploadJobs.create( upload_type="assay_with_extra_md", uploader_email=EMAIL, gcs_file_map={}, metadata={ PROTOCOL_ID_FIELD_NAME: TRIAL_ID, "whatever": { "hierarchy": [ {"we just need a": "uuid-1", "to be able": "to merge"}, {"and": "uuid-2"}, ] }, }, gcs_xlsx_uri="", commit=False, ) assay_upload.id = 111 clean_db.commit() custom_extra_md_parse = MagicMock() custom_extra_md_parse.side_effect = lambda f: {"extra": f.read().decode()} monkeypatch.setattr( "cidc_schemas.prism.merger.EXTRA_METADATA_PARSERS", {"assay_with_extra_md": custom_extra_md_parse}, ) UploadJobs.merge_extra_metadata( 111, { "uuid-1": io.BytesIO(b"within extra md file 1"), "uuid-2": io.BytesIO(b"within extra md file 2"), }, session=clean_db, ) assert 1 == clean_db.query(UploadJobs).count() au = clean_db.query(UploadJobs).first() assert "extra" in au.metadata_patch["whatever"]["hierarchy"][0] assert "extra" in au.metadata_patch["whatever"]["hierarchy"][1]
def test_trial_metadata_patch_assay(clean_db): """Update assay data in a trial_metadata record""" # Add an assay to the trial metadata_with_assay = METADATA.copy() metadata_with_assay["assays"] = {"wes": []} with pytest.raises(NoResultFound, match=f"No trial found with id {TRIAL_ID}"): TrialMetadata.patch_manifest(TRIAL_ID, metadata_with_assay) # Create trial TrialMetadata.create(TRIAL_ID, METADATA) # Try again TrialMetadata.patch_manifest(TRIAL_ID, metadata_with_assay) # Look the trial up and check that it has the assay in it trial = TrialMetadata.find_by_trial_id(TRIAL_ID) assert trial.metadata_json["assays"] == metadata_with_assay["assays"]
def test_create_assay_upload(clean_db): """Try to create an assay upload""" new_user = Users.create(PROFILE) gcs_file_map = { "my/first/wes/blob1/2019-08-30T15:51:38.450978": "test-uuid-1", "my/first/wes/blob2/2019-08-30T15:51:38.450978": "test-uuid-2", } metadata_patch = {PROTOCOL_ID_FIELD_NAME: TRIAL_ID} gcs_xlsx_uri = "xlsx/assays/wes/12:0:1.5123095" # Should fail, since trial doesn't exist yet with pytest.raises(IntegrityError): UploadJobs.create("wes_bam", EMAIL, gcs_file_map, metadata_patch, gcs_xlsx_uri) clean_db.rollback() TrialMetadata.create(TRIAL_ID, METADATA) new_job = UploadJobs.create( "wes_bam", EMAIL, gcs_file_map, metadata_patch, gcs_xlsx_uri ) job = UploadJobs.find_by_id_and_email(new_job.id, PROFILE["email"]) assert len(new_job.gcs_file_map) == len(job.gcs_file_map) assert set(new_job.gcs_file_map) == set(job.gcs_file_map) assert job.status == "started" assert list(job.upload_uris_with_data_uris_with_uuids()) == [ ( "my/first/wes/blob1/2019-08-30T15:51:38.450978", "my/first/wes/blob1", "test-uuid-1", ), ( "my/first/wes/blob2/2019-08-30T15:51:38.450978", "my/first/wes/blob2", "test-uuid-2", ), ]