Exemplo n.º 1
0
def test_downloadable_files_additional_metadata_default(clean_db):
    TrialMetadata.create(TRIAL_ID, METADATA)
    df = DownloadableFiles(
        trial_id=TRIAL_ID,
        upload_type="wes_bam",
        object_url="10021/Patient 1/sample 1/aliquot 1/wes_forward.fastq",
        file_size_bytes=1,
        md5_hash="hash1234",
        uploaded_timestamp=datetime.now(),
    )

    # Check no value passed
    df.insert()
    assert df.additional_metadata == {}

    for nullish_value in [None, "null", {}]:
        df.additional_metadata = nullish_value
        df.update()
        assert df.additional_metadata == {}

    # Non-nullish value doesn't get overridden
    non_nullish_value = {"foo": "bar"}
    df.additional_metadata = non_nullish_value
    df.update()
    assert df.additional_metadata == non_nullish_value
Exemplo n.º 2
0
def test_trial_metadata_patch_manifest(clean_db):
    """Update manifest data in a trial_metadata record"""
    # Add a participant to the trial
    metadata_with_participant = METADATA.copy()
    metadata_with_participant["participants"] = [
        {
            "samples": [],
            "cimac_participant_id": "CTSTP01",
            "participant_id": "trial a",
            "cohort_name": "Arm_Z",
        }
    ]

    with pytest.raises(NoResultFound, match=f"No trial found with id {TRIAL_ID}"):
        TrialMetadata.patch_manifest(TRIAL_ID, metadata_with_participant)

    # Create trial
    TrialMetadata.create(TRIAL_ID, METADATA)

    # Try again
    TrialMetadata.patch_manifest(TRIAL_ID, metadata_with_participant)

    # Look the trial up and check that it has the participant in it
    trial = TrialMetadata.find_by_trial_id(TRIAL_ID)
    assert (
        trial.metadata_json["participants"] == metadata_with_participant["participants"]
    )
Exemplo n.º 3
0
def test_create_downloadable_file_from_metadata(clean_db, monkeypatch):
    """Try to create a downloadable file from artifact_core metadata"""
    # fake file metadata
    file_metadata = {
        "object_url": "10021/Patient 1/sample 1/aliquot 1/wes_forward.fastq",
        "file_size_bytes": 1,
        "md5_hash": "hash1234",
        "uploaded_timestamp": datetime.now(),
        "foo": "bar",  # unsupported column - should be filtered
    }
    additional_metadata = {"more": "info"}

    # Mock artifact upload publishing
    publisher = MagicMock()
    monkeypatch.setattr("cidc_api.models.models.publish_artifact_upload", publisher)

    # Create the trial (to avoid violating foreign-key constraint)
    TrialMetadata.create(TRIAL_ID, METADATA)

    # Create files with empty or "null" additional metadata
    for nullish_value in ["null", None, {}]:
        df = DownloadableFiles.create_from_metadata(
            TRIAL_ID, "wes_bam", file_metadata, additional_metadata=nullish_value
        )
        clean_db.refresh(df)
        assert df.additional_metadata == {}

    # Create the file
    DownloadableFiles.create_from_metadata(
        TRIAL_ID, "wes_bam", file_metadata, additional_metadata=additional_metadata
    )

    # Check that we created the file
    new_file = (
        clean_db.query(DownloadableFiles)
        .filter_by(object_url=file_metadata["object_url"])
        .first()
    )
    assert new_file
    del file_metadata["foo"]
    for k in file_metadata.keys():
        assert getattr(new_file, k) == file_metadata[k]
    assert new_file.additional_metadata == additional_metadata

    # Check that no artifact upload event was published
    publisher.assert_not_called()

    # Check that artifact upload publishes
    DownloadableFiles.create_from_metadata(
        TRIAL_ID,
        "wes_bam",
        file_metadata,
        additional_metadata=additional_metadata,
        alert_artifact_upload=True,
    )
    publisher.assert_called_once_with(file_metadata["object_url"])
Exemplo n.º 4
0
def test_downloadable_files_get_related_files(clean_db):
    # Create a trial to avoid constraint errors
    TrialMetadata.create(trial_id=TRIAL_ID, metadata_json=METADATA)

    # Convenience function for building file records
    def create_df(facet_group, additional_metadata={}) -> DownloadableFiles:
        df = DownloadableFiles(
            facet_group=facet_group,
            additional_metadata=additional_metadata,
            trial_id=TRIAL_ID,
            uploaded_timestamp=datetime.now(),
            file_size_bytes=0,
            object_url=facet_group,  # just filler, not relevant to the test
            upload_type="",
        )
        df.insert()
        clean_db.refresh(df)
        return df

    # Set up test data
    cimac_id_1 = "CTTTPPP01.01"
    cimac_id_2 = "CTTTPPP02.01"
    files = [
        create_df(
            "/cytof/normalized_and_debarcoded.fcs", {"some.path.cimac_id": cimac_id_1}
        ),
        create_df(
            "/cytof_analysis/assignment.csv",
            # NOTE: this isn't realistic - assignment files aren't sample-specific - but
            # it serves the purpose of the test.
            {"path.cimac_id": cimac_id_1, "another.path.cimac_id": cimac_id_1},
        ),
        create_df("/cytof_analysis/source.fcs", {"path.to.cimac_id": cimac_id_2}),
        create_df("/cytof_analysis/reports.zip"),
        create_df("/cytof_analysis/analysis.zip"),
        create_df("/wes/r1_L.fastq.gz"),
    ]

    # Based on setup, we expect the following disjoint sets of related files:
    related_file_groups = [
        [files[0], files[1]],
        [files[2]],
        [files[3], files[4]],
        [files[5]],
    ]

    # Check that get_related_files returns what we expect
    for file_group in related_file_groups:
        for file_record in file_group:
            other_ids = [f.id for f in file_group if f.id != file_record.id]
            related_files = file_record.get_related_files()
            assert set([f.id for f in related_files]) == set(other_ids)
            assert len(related_files) == len(other_ids)
Exemplo n.º 5
0
def test_create_trial_metadata(clean_db):
    """Insert a trial metadata record if one doesn't exist"""
    TrialMetadata.create(TRIAL_ID, METADATA)
    trial = TrialMetadata.find_by_trial_id(TRIAL_ID)
    assert trial
    assert trial.metadata_json == METADATA

    # Check that you can't insert a trial with invalid metadata
    with pytest.raises(ValidationMultiError, match="'buzz' was unexpected"):
        TrialMetadata.create("foo", {"buzz": "bazz"})

    with pytest.raises(ValidationMultiError, match="'buzz' was unexpected"):
        TrialMetadata(trial_id="foo", metadata_json={"buzz": "bazz"}).insert()
Exemplo n.º 6
0
def test_assay_upload_ingestion_success(clean_db, monkeypatch, caplog):
    """Check that the ingestion success method works as expected"""
    caplog.set_level(logging.DEBUG)

    new_user = Users.create(PROFILE)
    trial = TrialMetadata.create(TRIAL_ID, METADATA)
    assay_upload = UploadJobs.create(
        upload_type="ihc",
        uploader_email=EMAIL,
        gcs_file_map={},
        metadata={PROTOCOL_ID_FIELD_NAME: TRIAL_ID},
        gcs_xlsx_uri="",
        commit=False,
    )

    clean_db.commit()

    # Ensure that success can't be declared from a starting state
    with pytest.raises(Exception, match="current status"):
        assay_upload.ingestion_success(trial)

    # Update assay_upload status to simulate a completed but not ingested upload
    assay_upload.status = UploadJobStatus.UPLOAD_COMPLETED.value
    assay_upload.ingestion_success(trial)

    # Check that status was updated and email wasn't sent by default
    db_record = UploadJobs.find_by_id(assay_upload.id)
    assert db_record.status == UploadJobStatus.MERGE_COMPLETED.value
    assert "Would send email with subject '[UPLOAD SUCCESS]" not in caplog.text

    # Check that email gets sent when specified
    assay_upload.ingestion_success(trial, send_email=True)
    assert "Would send email with subject '[UPLOAD SUCCESS]" in caplog.text
Exemplo n.º 7
0
def test_upload_job_no_file_map(clean_db):
    """Try to create an assay upload"""
    new_user = Users.create(PROFILE)

    metadata_patch = {PROTOCOL_ID_FIELD_NAME: TRIAL_ID}
    gcs_xlsx_uri = "xlsx/assays/wes/12:0:1.5123095"

    TrialMetadata.create(TRIAL_ID, METADATA)

    new_job = UploadJobs.create(
        prism.SUPPORTED_MANIFESTS[0], EMAIL, None, metadata_patch, gcs_xlsx_uri
    )
    assert list(new_job.upload_uris_with_data_uris_with_uuids()) == []

    job = UploadJobs.find_by_id_and_email(new_job.id, PROFILE["email"])
    assert list(job.upload_uris_with_data_uris_with_uuids()) == []
Exemplo n.º 8
0
def setup_permissions(cidc_api, monkeypatch) -> Tuple[int, int]:
    """
    Create two users, one trial, and three permissions in `db`.
    Two permissions will belong to the first user, and the third will
    belong to the second one. Returns the first and second user ids 
    as a tuple.
    """
    current_user = Users(
        id=1,
        email="*****@*****.**",
        role=CIDCRole.CIMAC_USER.value,
        approval_date=datetime.now(),
    )
    other_user = Users(id=2, email="*****@*****.**")

    mock_current_user(current_user, monkeypatch)

    with cidc_api.app_context():
        # Create users
        current_user.insert()
        other_user.insert()

        # Create trial
        TrialMetadata.create(
            TRIAL_ID,
            {
                "protocol_identifier": TRIAL_ID,
                "allowed_collection_event_names": [],
                "allowed_cohort_names": [],
                "participants": [],
            },
        )

        # Create permissions
        def create_permission(uid, assay):
            Permissions(
                granted_by_user=uid,
                granted_to_user=uid,
                trial_id=TRIAL_ID,
                upload_type=assay,
            ).insert()

        create_permission(current_user.id, "ihc")
        create_permission(current_user.id, "olink")
        create_permission(other_user.id, "olink")

        return current_user.id, other_user.id
Exemplo n.º 9
0
def test_assay_upload_merge_extra_metadata(clean_db, monkeypatch):
    """Try to create an assay upload"""
    new_user = Users.create(PROFILE)

    TrialMetadata.create(TRIAL_ID, METADATA)

    assay_upload = UploadJobs.create(
        upload_type="assay_with_extra_md",
        uploader_email=EMAIL,
        gcs_file_map={},
        metadata={
            PROTOCOL_ID_FIELD_NAME: TRIAL_ID,
            "whatever": {
                "hierarchy": [
                    {"we just need a": "uuid-1", "to be able": "to merge"},
                    {"and": "uuid-2"},
                ]
            },
        },
        gcs_xlsx_uri="",
        commit=False,
    )
    assay_upload.id = 111
    clean_db.commit()

    custom_extra_md_parse = MagicMock()
    custom_extra_md_parse.side_effect = lambda f: {"extra": f.read().decode()}
    monkeypatch.setattr(
        "cidc_schemas.prism.merger.EXTRA_METADATA_PARSERS",
        {"assay_with_extra_md": custom_extra_md_parse},
    )

    UploadJobs.merge_extra_metadata(
        111,
        {
            "uuid-1": io.BytesIO(b"within extra md file 1"),
            "uuid-2": io.BytesIO(b"within extra md file 2"),
        },
        session=clean_db,
    )

    assert 1 == clean_db.query(UploadJobs).count()
    au = clean_db.query(UploadJobs).first()
    assert "extra" in au.metadata_patch["whatever"]["hierarchy"][0]
    assert "extra" in au.metadata_patch["whatever"]["hierarchy"][1]
Exemplo n.º 10
0
def test_trial_metadata_patch_assay(clean_db):
    """Update assay data in a trial_metadata record"""
    # Add an assay to the trial
    metadata_with_assay = METADATA.copy()
    metadata_with_assay["assays"] = {"wes": []}

    with pytest.raises(NoResultFound, match=f"No trial found with id {TRIAL_ID}"):
        TrialMetadata.patch_manifest(TRIAL_ID, metadata_with_assay)

    # Create trial
    TrialMetadata.create(TRIAL_ID, METADATA)

    # Try again
    TrialMetadata.patch_manifest(TRIAL_ID, metadata_with_assay)

    # Look the trial up and check that it has the assay in it
    trial = TrialMetadata.find_by_trial_id(TRIAL_ID)
    assert trial.metadata_json["assays"] == metadata_with_assay["assays"]
Exemplo n.º 11
0
def test_create_assay_upload(clean_db):
    """Try to create an assay upload"""
    new_user = Users.create(PROFILE)

    gcs_file_map = {
        "my/first/wes/blob1/2019-08-30T15:51:38.450978": "test-uuid-1",
        "my/first/wes/blob2/2019-08-30T15:51:38.450978": "test-uuid-2",
    }
    metadata_patch = {PROTOCOL_ID_FIELD_NAME: TRIAL_ID}
    gcs_xlsx_uri = "xlsx/assays/wes/12:0:1.5123095"

    # Should fail, since trial doesn't exist yet
    with pytest.raises(IntegrityError):
        UploadJobs.create("wes_bam", EMAIL, gcs_file_map, metadata_patch, gcs_xlsx_uri)
    clean_db.rollback()

    TrialMetadata.create(TRIAL_ID, METADATA)

    new_job = UploadJobs.create(
        "wes_bam", EMAIL, gcs_file_map, metadata_patch, gcs_xlsx_uri
    )
    job = UploadJobs.find_by_id_and_email(new_job.id, PROFILE["email"])
    assert len(new_job.gcs_file_map) == len(job.gcs_file_map)
    assert set(new_job.gcs_file_map) == set(job.gcs_file_map)
    assert job.status == "started"

    assert list(job.upload_uris_with_data_uris_with_uuids()) == [
        (
            "my/first/wes/blob1/2019-08-30T15:51:38.450978",
            "my/first/wes/blob1",
            "test-uuid-1",
        ),
        (
            "my/first/wes/blob2/2019-08-30T15:51:38.450978",
            "my/first/wes/blob2",
            "test-uuid-2",
        ),
    ]