Пример #1
0
def test_create_instance(mock_datetime, patient_mapping,
                         fhir_concept_map_identifier):
    mock_datetime.now.return_value = mockdatetime()

    resource_mapping = patient_mapping

    attr_identifier = Attribute("identifier[0].value",
                                columns=[SqlColumn("a", "b")])
    attr_birthDate = Attribute("birthDate", columns=[SqlColumn("a", "c")])
    attr_maritalStatus = Attribute("maritalStatus.coding[0].code",
                                   columns=[SqlColumn("a", "d")])
    attr_generalPractitioner = Attribute("generalPractitioner[0].type",
                                         static_inputs=["Practitioner"])

    attributes = [
        attr_identifier, attr_birthDate, attr_maritalStatus,
        attr_generalPractitioner
    ]

    row = {
        attr_maritalStatus: "D",
        attr_birthDate: "2000-10-10",
        attr_identifier: "A",
    }

    actual = transform.create_instance(row, resource_mapping, attributes)

    assert actual == {
        "meta": {
            "lastUpdated":
            "now",
            "tag": [
                {
                    "system": ARKHN_CODE_SYSTEMS.source,
                    "code": patient_mapping["source"]["id"]
                },
                {
                    "system": ARKHN_CODE_SYSTEMS.resource,
                    "code": patient_mapping["id"]
                },
            ],
        },
        "id": actual["id"],
        "identifier": [{
            "value": "A"
        }],
        "resourceType": "Patient",
        "birthDate": "2000-10-10",
        "maritalStatus": {
            "coding": [{
                "code": "D"
            }]
        },
        "generalPractitioner": [{
            "type": "Practitioner"
        }],
    }
Пример #2
0
def test_create_static_instance(mock_datetime, fhir_concept_map_identifier):
    mock_datetime.now.return_value = mockdatetime()

    resource_mapping = {
        "id": "resource_id",
        "source": {
            "id": "source_id"
        },
        "definition": {
            "type": "instance_type",
            "kind": "resource",
            "derivation": "resource"
        },
    }

    attr_identifier_val = Attribute("identifier[0].value",
                                    static_inputs=["static"])
    attr_identifier_sys = Attribute("identifier[0].system",
                                    static_inputs=["identifier_sys"])
    attr_val = Attribute("path.to.attribute", static_inputs=["attribute_val"])

    attributes = [attr_identifier_val, attr_identifier_sys, attr_val]

    actual = transform.create_static_instance(resource_mapping, attributes)

    assert actual == {
        "meta": {
            "lastUpdated":
            "now",
            "tag": [
                {
                    "system": ARKHN_CODE_SYSTEMS.source,
                    "code": resource_mapping["source"]["id"]
                },
                {
                    "system": ARKHN_CODE_SYSTEMS.resource,
                    "code": resource_mapping["id"]
                },
            ],
        },
        "id": actual["id"],
        "identifier": [{
            "value": "static",
            "system": "identifier_sys"
        }],
        "path": {
            "to": {
                "attribute": "attribute_val",
            }
        },
        "resourceType": "instance_type",
    }
Пример #3
0
def test_merge_dataframe(_):
    attr_name = Attribute("name", columns=[SqlColumn("PATIENTS", "NAME")])
    attr_id = Attribute(
        "id",
        columns=[SqlColumn("PATIENTS", "ID"),
                 SqlColumn("PATIENTS", "ID2")],
        static_inputs=["unknown"],
        merging_script=MergingScript("merge"),
    )
    attr_language = Attribute("language",
                              columns=[SqlColumn("ADMISSIONS", "LANGUAGE")])
    attr_admid = Attribute("admid", columns=[SqlColumn("ADMISSIONS", "ID")])

    df_columns = pd.MultiIndex.from_tuples([
        (attr_name, ("PATIENTS_NAME", "PATIENTS")),
        (attr_id, ("PATIENTS_ID", "PATIENTS")),
        (attr_id, ("PATIENTS_ID2", "PATIENTS")),
        (attr_language, ("ADMISSIONS_LANGUAGE", "ADMISSIONS")),
        (attr_admid, ("ADMISSIONS_ID", "ADMISSIONS")),
        ("pk", ("PATIENTS_ID", "PATIENTS")),
    ])

    df = pd.DataFrame(
        {
            df_columns[0]: ["bob", "bob", "alice", "bob"],
            df_columns[1]: ["id1", "id1", "id2", "id3"],
            df_columns[2]: ["id21", "id21", "id22", "id23"],
            df_columns[3]: ["lang1", "lang2", "lang3", "lang4"],
            df_columns[4]: ["hadmid1", "hadmid2", "hadmid3", "hadmid4"],
            df_columns[5]: ["id1", "id2", "id3", "id4"],
        }, )
    attributes = [attr_name, attr_id, attr_language, attr_admid]
    primary_key_column = SqlColumn("PATIENTS", "ID")

    actual = transform.merge_dataframe(df, attributes, primary_key_column)

    expected = pd.DataFrame(
        {
            attr_name: ["bob", "bob", "alice", "bob"],
            attr_id:
            ["id1id21merge", "id1id21merge", "id2id22merge", "id3id23merge"],
            attr_language: ["lang1", "lang2", "lang3", "lang4"],
            attr_admid: ["hadmid1", "hadmid2", "hadmid3", "hadmid4"],
        }, )

    assert actual.equals(expected)
Пример #4
0
def test_fetch_values_from_dataframe():
    attr_identifier = Attribute("identifier[0].value",
                                columns=[SqlColumn("a", "b")])
    attr_birthDate = Attribute("birthDate", columns=[SqlColumn("a", "c")])
    attr_maritalStatus = Attribute("maritalStatus.coding[0].code",
                                   columns=[SqlColumn("a", "d")])

    attribute = attr_birthDate

    row = {
        attr_maritalStatus: "D",
        attr_birthDate: "2000-10-10",
        attr_identifier: "A",
    }

    value = transform.fetch_values_from_dataframe(row, attribute)

    assert value == "2000-10-10"
Пример #5
0
def test_squash_rows():
    attr_name = Attribute("name", columns=[SqlColumn("PATIENTS", "NAME")])
    attr_id = Attribute("id", columns=[SqlColumn("PATIENTS", "ID")])
    attr_language = Attribute("language",
                              columns=[SqlColumn("ADMISSIONS", "LANGUAGE")])
    attr_admid = Attribute("admid", columns=[SqlColumn("ADMISSIONS", "ID")])

    df_columns = pd.MultiIndex.from_tuples([
        (attr_name, ("PATIENTS_NAME", "PATIENTS")),
        (attr_id, ("PATIENTS_ID", "PATIENTS")),
        (attr_language, ("ADMISSIONS_LANGUAGE", "ADMISSIONS")),
        (attr_admid, ("ADMISSIONS_ID", "ADMISSIONS")),
    ])

    df = pd.DataFrame(
        {
            df_columns[0]: ["bob", "bob", "alice", "bob"],
            df_columns[1]: ["id1", "id1", "id2", "id3"],
            df_columns[2]: ["lang1", "lang2", "lang3", "lang4"],
            df_columns[3]: ["id1", "id2", "id3", "id4"],
        }, )
    squash_rules = ["PATIENTS", [["ADMISSIONS", []]]]

    actual = transform.squash_rows(df, squash_rules)
    # Sort to be sure actual and expected are in the same order
    actual = actual.sort_values(by=df_columns[1]).reset_index(drop=True)

    expected = pd.DataFrame(
        {
            df_columns[0]: ["bob", "alice", "bob"],
            df_columns[1]: ["id1", "id2", "id3"],
            df_columns[2]: [("lang1", "lang2"), ("lang3", ), ("lang4", )],
            df_columns[3]: [("id1", "id2"), ("id3", ), ("id4", )],
        }, )
    # Sort to be sure actual and expected are in the same order
    expected = expected.sort_values(by=df_columns[1]).reset_index(drop=True)

    assert actual.equals(expected)
Пример #6
0
def test_handle_array_attributes():
    attr1 = Attribute("attr1", columns=[SqlColumn("a", "b")])
    attr2 = Attribute("attr2", columns=[SqlColumn("a", "c")])
    row = {
        attr1: ("A1", "A2", "A3"),
        attr2: "B",
    }
    attributes_in_array = {
        "path1": attr1,
        "path2": attr2,
    }

    value = transform.handle_array_attributes(attributes_in_array, row)

    assert value == [
        {
            "path1": "A1",
            "path2": "B"
        },
        {
            "path1": "A2",
            "path2": "B"
        },
        {
            "path1": "A3",
            "path2": "B"
        },
    ]

    # With mismatch in lengths
    row = {
        attr1: ("A1", "A2", "A3"),
        attr2: ("B1", "B2"),
    }
    with raises(AssertionError, match="mismatch in array lengths"):
        transform.handle_array_attributes(attributes_in_array, row)
Пример #7
0
def test_analyze_mapping(patient_mapping, fhir_concept_map_gender,
                         fhir_concept_map_identifier):
    analyzer = Analyzer()

    analyzer.analyze_mapping(patient_mapping)

    assert analyzer.analysis.attributes == [
        Attribute(
            "identifier[0].value",
            columns=[SqlColumn("patients", "row_id")],
            static_inputs=[],
            merging_script=None,
        ),
        Attribute(
            "deceasedBoolean",
            columns=[SqlColumn("patients", "expire_flag")],
            static_inputs=[],
            merging_script=None,
        ),
        Attribute(
            "generalPractitioner[0].identifier.value",
            columns=[SqlColumn("icustays", "hadm_id")],
            static_inputs=[],
            merging_script=None,
        ),
        Attribute(
            "birthDate",
            columns=[SqlColumn("patients", "dob")],
            static_inputs=[],
            merging_script=None,
        ),
        Attribute(
            "deceasedDateTime",
            columns=[SqlColumn("patients", "dod")],
            static_inputs=[],
            merging_script=None,
        ),
        Attribute(
            "gender",
            columns=[SqlColumn("patients", "gender")],
            static_inputs=["unknown"],
            merging_script=MergingScript("select_first_not_empty"),
        ),
        Attribute(
            "maritalStatus.coding[0].code",
            columns=[SqlColumn("admissions", "marital_status")],
            static_inputs=[],
            merging_script=None,
        ),
        Attribute(
            "generalPractitioner[0].type",
            columns=[],
            static_inputs=["Practitioner"],
            merging_script=None,
        ),
    ]

    assert analyzer.analysis.columns == {
        SqlColumn("patients", "row_id"),
        SqlColumn("patients", "gender"),
        SqlColumn("patients", "dob"),
        SqlColumn("patients", "dod"),
        SqlColumn("patients", "expire_flag"),
        SqlColumn("admissions", "marital_status"),
        SqlColumn("icustays", "hadm_id"),
    }
    assert analyzer.analysis.joins == {
        SqlJoin(SqlColumn("patients", "subject_id"),
                SqlColumn("admissions", "subject_id")),
        SqlJoin(SqlColumn("patients", "subject_id"),
                SqlColumn("icustays", "subject_id")),
    }
Пример #8
0
def test_clean_data(_, fhir_concept_map_code, fhir_concept_map_gender):
    df = pd.DataFrame(
        {
            "PATIENTS_NAME": ["alice", "bob", "charlie"],
            "PATIENTS_ID": ["id1", "id2", "id3"],
            "PATIENTS_ID2": ["id21", "id22", "id23"],
            "ADMISSIONS_LANGUAGE": ["M", "F", "F"],
            "ADMISSIONS_ID": ["ABC", "DEF", "GHI"],
        }, )
    attr_name = Attribute("name",
                          columns=[
                              SqlColumn(
                                  "PATIENTS",
                                  "NAME",
                                  cleaning_script=CleaningScript("clean1"),
                              )
                          ])
    attr_id = Attribute(
        "id",
        columns=[SqlColumn("PATIENTS", "ID"),
                 SqlColumn("PATIENTS", "ID2")],
        static_inputs=["null"],
    )
    attr_language = Attribute(
        "language",
        columns=[
            SqlColumn("ADMISSIONS",
                      "LANGUAGE",
                      concept_map=ConceptMap("id_cm_gender"))
        ],
        static_inputs=["val"],
    )
    attr_admid = Attribute(
        "code",
        columns=[
            SqlColumn(
                "ADMISSIONS",
                "ID",
                cleaning_script=CleaningScript("clean2"),
                concept_map=ConceptMap("id_cm_code"),
            )
        ],
    )
    attributes = [attr_name, attr_id, attr_language, attr_admid]
    primary_key_column = SqlColumn("PATIENTS", "ID")

    cleaned_df = transform.clean_dataframe(df, attributes, primary_key_column)

    df_columns = pd.MultiIndex.from_tuples([
        (attr_name, ("PATIENTS_NAME", "PATIENTS")),
        (attr_id, ("PATIENTS_ID", "PATIENTS")),
        (attr_id, ("PATIENTS_ID2", "PATIENTS")),
        (attr_language, ("ADMISSIONS_LANGUAGE", "ADMISSIONS")),
        (attr_admid, ("ADMISSIONS_ID", "ADMISSIONS")),
        ("pk", ("PATIENTS_ID", "PATIENTS")),
    ])

    expected = pd.DataFrame(
        {
            df_columns[0]: ["alicecleaned", "bobcleaned", "charliecleaned"],
            df_columns[1]: ["id1", "id2", "id3"],
            df_columns[2]: ["id21", "id22", "id23"],
            df_columns[3]: ["male", "female", "female"],
            df_columns[4]: ["abc", "def", "ghi"],
            df_columns[5]: ["id1", "id2", "id3"],
        }, )

    assert cleaned_df.equals(expected)