Пример #1
0
def test_merge_attributes(_):
    attr_name = Attribute("name", columns=[SqlColumn("PATIENTS", "NAME")])
    attr_id = Attribute(
        "id",
        columns=[SqlColumn("PATIENTS", "ID"), SqlColumn("PATIENTS", "ID2")],
        static_inputs=["unknown"],
        merging_script=MergingScript("merge"),
    )
    attr_language = Attribute("language", columns=[SqlColumn("ADMISSIONS", "LANGUAGE")])
    attr_admid = Attribute("admid", columns=[SqlColumn("ADMISSIONS", "ID")])

    data = {
        ("name", ("PATIENTS", "NAME")): "bob",
        ("id", ("PATIENTS", "ID")): "id1",
        ("id", ("PATIENTS", "ID2")): "id21",
        ("language", ("ADMISSIONS", "LANGUAGE")): ("lang1", "lang2", "lang3", "lang4"),
        ("admid", ("ADMISSIONS", "ID")): ("hadmid1", "hadmid2", "hadmid3", "hadmid4"),
    }

    attributes = [attr_name, attr_id, attr_language, attr_admid]

    actual = transform.merge_attributes(data, attributes, "pk")
    expected = {
        "name": "bob",
        "id": "id1id21merge",
        "language": ("lang1", "lang2", "lang3", "lang4"),
        "admid": ("hadmid1", "hadmid2", "hadmid3", "hadmid4"),
    }

    assert actual == expected
Пример #2
0
def test_apply_filters():
    extractor = Extractor()
    resource_mapping = {
        "filters": [
            {
                "relation": "LIKE",
                "value": "'2150-08-29'",
                "sqlColumn": {"owner": None, "table": "admissions", "column": "admittime"},
            },
            {
                "relation": "<=",
                "value": "1000",
                "sqlColumn": {"owner": None, "table": "patients", "column": "row_id"},
            },
        ]
    }
    pk_column = SqlColumn("patients", "subject_id")
    pk_values = [123, 456]

    base_query = mock.MagicMock()

    extractor.apply_filters(base_query, resource_mapping, pk_column, pk_values)

    binary_expressions = [
        extractor.get_column(SqlColumn("patients", "subject_id")).in_(pk_values),
        extractor.get_column(SqlColumn("admissions", "admittime")).like("'2150-08-29'"),
        extractor.get_column(SqlColumn("patients", "row_id")) <= "1000",
    ]

    for call, binary_expression in zip(base_query.filter.call_args_list, binary_expressions):
        args, _ = call
        assert args[0].compare(binary_expression)
Пример #3
0
def test_apply_joins():
    extractor = Extractor()
    joins = [
        SqlJoin(SqlColumn("patients", "subject_id"), SqlColumn("admissions", "subject_id")),
        SqlJoin(SqlColumn("admissions", "row_id"), SqlColumn("prescriptions", "row_id")),
    ]

    base_query = mock.MagicMock()

    extractor.apply_joins(base_query, joins)

    foreign_tables = [tables["admissions"], tables["prescriptions"]]
    binary_expressions = [
        mock_get_column("", SqlColumn("patients", "subject_id"))
        == mock_get_column("", SqlColumn("admissions", "subject_id")),
        mock_get_column("", SqlColumn("admissions", "row_id"))
        == mock_get_column("", SqlColumn("prescriptions", "row_id")),
    ]
    for call, foreign_table, binary_expression in zip(
        base_query.join.call_args_list, foreign_tables, binary_expressions
    ):
        args, kwargs = call
        assert args[0] == foreign_table
        assert args[1].compare(binary_expression)
        assert kwargs == {"isouter": True}
Пример #4
0
def test_build_squash_rules():
    cols = [
        "ADMISSIONS.LANGUAGE",
        "PATIENTS.DOD",
        "PATIENTS.SUBJECT_ID",
    ]  # NOTE: I use a list instead of a set to keep the order of elements
    joins = {
        SqlJoin(SqlColumn("PATIENTS", "SUBJECT_ID"),
                SqlColumn("ADMISSIONS", "SUBJECT_ID"))
    }
    table = "PATIENTS"

    actual = mapping.build_squash_rules(cols, joins, table)

    assert actual == ["PATIENTS", [["ADMISSIONS", []]]]
Пример #5
0
 def get_column(self, column: SqlColumn) -> AlchemyColumn:
     """ Get the sql alchemy column corresponding to the SqlColumn (custom type)
     from the analysis.
     """
     table = self.get_table(column)
     # Note that we label the column manually to avoid collisions and
     # sqlAlchemy automatic labelling
     return table.c[column.column].label(column.dataframe_column_name())
Пример #6
0
def test_fetch_values_from_dataframe():
    attr_identifier = Attribute("identifier[0].value",
                                columns=[SqlColumn("a", "b")])
    attr_birthDate = Attribute("birthDate", columns=[SqlColumn("a", "c")])
    attr_maritalStatus = Attribute("maritalStatus.coding[0].code",
                                   columns=[SqlColumn("a", "d")])

    attribute = attr_birthDate

    row = {
        attr_maritalStatus.path: "D",
        attr_birthDate.path: "2000-10-10",
        attr_identifier.path: "A",
    }

    value = transform.fetch_values_from_dataframe(row, attribute)

    assert value == "2000-10-10"
Пример #7
0
def test_create_instance(mock_datetime):
    mock_datetime.now.return_value = mockdatetime()

    attr_identifier = Attribute("identifier[0].value",
                                columns=[SqlColumn("a", "b")])
    attr_birthDate = Attribute("birthDate", columns=[SqlColumn("a", "c")])
    attr_maritalStatus = Attribute("maritalStatus.coding[0].code",
                                   columns=[SqlColumn("a", "d")])
    attr_generalPractitioner = Attribute("generalPractitioner[0].type",
                                         static_inputs=["Practitioner"])

    path_attributes_map = {
        attr_identifier.path: attr_identifier,
        attr_birthDate.path: attr_birthDate,
        attr_maritalStatus.path: attr_maritalStatus,
        attr_generalPractitioner.path: attr_generalPractitioner,
    }

    row = {
        attr_maritalStatus.path: "D",
        attr_birthDate.path: "2000-10-10",
        attr_identifier.path: "A",
    }

    actual = transform.build_fhir_object(row, path_attributes_map)

    assert actual == {
        "id": actual["id"],
        "identifier": [{
            "value": "A"
        }],
        "birthDate": "2000-10-10",
        "maritalStatus": {
            "coding": [{
                "code": "D"
            }]
        },
        "generalPractitioner": [{
            "type": "Practitioner"
        }],
    }
Пример #8
0
def test_sqlalchemy_query():
    extractor = Extractor()
    extractor.session = mock.MagicMock()

    def mock_alchemy_query(*columns):
        return Query([*columns])

    extractor.session.query = mock_alchemy_query

    columns = [
        SqlColumn("patients", "subject_id"),
        SqlColumn("patients", "row_id"),
        SqlColumn("admissions", "admittime"),
    ]
    joins = [
        SqlJoin(columns[1], SqlColumn("admissions", "row_id")),
    ]
    pk_column = columns[0]
    resource_mapping = {
        "filters": [
            {
                "relation": "LIKE",
                "value": "'2150-08-29'",
                "sqlColumn": {"owner": None, "table": "admissions", "column": "admittime"},
            }
        ]
    }
    pk_values = None

    query = extractor.sqlalchemy_query(columns, joins, pk_column, resource_mapping, pk_values)

    assert str(query) == (
        "SELECT patients.subject_id AS patients_subject_id, patients.row_id AS patients_row_id, "
        "admissions.admittime AS admissions_admittime \n"
        "FROM patients LEFT OUTER JOIN admissions ON admissions.row_id = patients.row_id \n"
        "WHERE admissions.admittime LIKE :admittime_1"
    )
Пример #9
0
def test_get_primary_key(mock_login):
    analyzer = Analyzer(PyrogClient())

    # With owner
    resource_mapping = {
        "primaryKeyOwner": "owner",
        "primaryKeyTable": "table",
        "primaryKeyColumn": "col",
    }
    primary_key = analyzer.get_primary_key(resource_mapping)

    assert primary_key == SqlColumn("table", "col", "owner")

    # Without owner
    resource_mapping = {
        "primaryKeyOwner": "",
        "primaryKeyTable": "table",
        "primaryKeyColumn": "col",
    }
    primary_key = analyzer.get_primary_key(resource_mapping)

    assert primary_key == SqlColumn("table", "col")

    # Raising error
    resource_mapping = {
        "primaryKeyOwner": "",
        "primaryKeyTable": "",
        "primaryKeyColumn": "col",
        "definitionId": "fhirtype",
    }
    with pytest.raises(
            ValueError,
            match=
            "You need to provide a primary key table and column in the mapping"
    ):
        analyzer.get_primary_key(resource_mapping)
Пример #10
0
def test_handle_array_attributes():
    attr1 = Attribute("attr1", columns=[SqlColumn("a", "b")])
    attr2 = Attribute("attr2", columns=[SqlColumn("a", "c")])
    row = {
        attr1.path: ("A1", "A2", "A3"),
        attr2.path: "B",
    }
    attributes_in_array = {
        "path1": attr1,
        "path2": attr2,
    }

    value = transform.handle_array_attributes(attributes_in_array, row)

    assert value == [
        {
            "path1": "A1",
            "path2": "B"
        },
        {
            "path1": "A2",
            "path2": "B"
        },
        {
            "path1": "A3",
            "path2": "B"
        },
    ]

    # With mismatch in lengths
    row = {
        attr1.path: ("A1", "A2", "A3"),
        attr2.path: ("B1", "B2"),
    }
    with raises(AssertionError, match="mismatch in array lengths"):
        transform.handle_array_attributes(attributes_in_array, row)
Пример #11
0
def test_clean_data(_):
    data = {
        "PATIENTS_NAME": ["alice", "bob", "charlie"],
        "PATIENTS_ID": ["id1", "id2", "id3"],
        "PATIENTS_ID2": ["id21", "id22", "id23"],
        "ADMISSIONS_LANGUAGE": ["M", "F", "F"],
        "ADMISSIONS_ID": ["ABC", "DEF", "GHI"],
    }

    attr_name = Attribute(
        "name", columns=[SqlColumn("PATIENTS", "NAME", cleaning_script=CleaningScript("clean1"),)]
    )
    attr_id = Attribute(
        "id",
        columns=[SqlColumn("PATIENTS", "ID"), SqlColumn("PATIENTS", "ID2")],
        static_inputs=["null"],
    )
    attr_language = Attribute(
        "language",
        columns=[SqlColumn("ADMISSIONS", "LANGUAGE", concept_map=ConceptMap("id_cm_gender"))],
        static_inputs=["val"],
    )
    attr_admid = Attribute(
        "code",
        columns=[
            SqlColumn(
                "ADMISSIONS",
                "ID",
                cleaning_script=CleaningScript("clean2"),
                concept_map=ConceptMap("id_cm_code"),
            )
        ],
    )
    attributes = [attr_name, attr_id, attr_language, attr_admid]
    primary_key_column = SqlColumn("PATIENTS", "ID")

    cleaned_data = transform.clean_data(data, attributes, primary_key_column)

    columns = [
        ("name", ("PATIENTS", "NAME")),
        ("id", ("PATIENTS", "ID")),
        ("id", ("PATIENTS", "ID2")),
        ("language", ("ADMISSIONS", "LANGUAGE")),
        ("code", ("ADMISSIONS", "ID")),
    ]

    expected = {
        columns[0]: ["alicecleaned", "bobcleaned", "charliecleaned"],
        columns[1]: ["id1", "id2", "id3"],
        columns[2]: ["id21", "id22", "id23"],
        columns[3]: ["male", "female", "female"],
        columns[4]: ["abc", "def", "ghi"],
    }

    assert cleaned_data == expected
Пример #12
0
    def apply_filters(
        self, query: Query, resource_mapping, pk_column: SqlColumn, pk_values
    ) -> Query:
        """ Augment the sql alchemy query with filters from the analysis.
        """
        if pk_values is not None:
            query = query.filter(self.get_column(pk_column).in_(pk_values))

        if resource_mapping["filters"]:
            for filter in resource_mapping["filters"]:
                col = self.get_column(
                    SqlColumn(
                        filter["sqlColumn"]["table"],
                        filter["sqlColumn"]["column"],
                        filter["sqlColumn"]["owner"],
                    )
                )
                rel_method = SQL_RELATIONS_TO_METHOD[filter["relation"]]
                query = query.filter(getattr(col, rel_method)(filter["value"]))

        return query
Пример #13
0
def test_analyze_mapping(mock_login, patient_mapping):
    analyzer = Analyzer(PyrogClient())

    analysis = analyzer.analyze_mapping(patient_mapping)

    assert analysis.attributes == [
        Attribute(
            "identifier[0].value",
            columns=[SqlColumn("patients", "row_id")],
            static_inputs=[],
            merging_script=None,
        ),
        Attribute(
            "deceasedBoolean",
            columns=[SqlColumn("patients", "expire_flag")],
            static_inputs=[],
            merging_script=None,
        ),
        Attribute(
            "generalPractitioner[0].identifier.value",
            columns=[SqlColumn("icustays", "hadm_id")],
            static_inputs=[],
            merging_script=None,
        ),
        Attribute(
            "birthDate",
            columns=[SqlColumn("patients", "dob")],
            static_inputs=[],
            merging_script=None,
        ),
        Attribute(
            "deceasedDateTime",
            columns=[SqlColumn("patients", "dod")],
            static_inputs=[],
            merging_script=None,
        ),
        Attribute(
            "gender",
            columns=[SqlColumn("patients", "gender")],
            static_inputs=["unknown"],
            merging_script=MergingScript("select_first_not_empty"),
        ),
        Attribute(
            "maritalStatus.coding[0].code",
            columns=[SqlColumn("admissions", "marital_status")],
            static_inputs=[],
            merging_script=None,
        ),
        Attribute(
            "generalPractitioner[0].type",
            columns=[],
            static_inputs=["Practitioner"],
            merging_script=None,
        ),
    ]

    assert analysis.columns == {
        SqlColumn("patients", "row_id"),
        SqlColumn("patients", "gender"),
        SqlColumn("patients", "dob"),
        SqlColumn("patients", "dod"),
        SqlColumn("patients", "expire_flag"),
        SqlColumn("admissions", "marital_status"),
        SqlColumn("icustays", "hadm_id"),
    }
    assert analysis.joins == {
        SqlJoin(SqlColumn("patients", "subject_id"),
                SqlColumn("admissions", "subject_id")),
        SqlJoin(SqlColumn("patients", "subject_id"),
                SqlColumn("icustays", "subject_id")),
    }