def test_column_name_clashes_produce_errors(): with pytest.raises(ValueError): StudyDefinition( population=patients.all(), age=patients.age_as_of("2020-01-01"), status=patients.satisfying( "age > 70 AND sex = 'M'", sex=patients.sex(), age=patients.age_as_of("2010-01-01"), ), )
def test_patient_characteristics_for_covid_status(): session = make_session() old_patient_with_covid = Patient( DateOfBirth="1900-01-01", CovidStatus=CovidStatus(Result="COVID19", AdmittedToITU=True), Sex="M", ) young_patient_1_with_covid = Patient( DateOfBirth="2000-01-01", CovidStatus=CovidStatus(Result="COVID19", Died=True), Sex="F", ) young_patient_2_without_covid = Patient(DateOfBirth="2001-01-01", Sex="F") session.add(old_patient_with_covid) session.add(young_patient_1_with_covid) session.add(young_patient_2_without_covid) session.commit() study = StudyDefinition( population=patients.with_positive_covid_test(), age=patients.age_as_of("2020-01-01"), sex=patients.sex(), died=patients.have_died_of_covid(), ) results = study.to_dicts() assert [x["sex"] for x in results] == ["M", "F"] assert [x["died"] for x in results] == ["0", "1"] assert [x["age"] for x in results] == ["120", "20"]
def test_age_dtype_generation(): study = StudyDefinition( # This line defines the study population population=patients.all(), age=patients.age_as_of("2020-02-01"), ) result = _converters_to_names(study.pandas_csv_args) assert result == { "dtype": {"age": "int"}, "parse_dates": [], "date_col_for": {}, "converters": {}, }
def test_patients_satisfying_with_hidden_columns(): condition_code = "ASTHMA" condition_code2 = "COPD" session = make_session() patient_1 = Patient(DateOfBirth="1940-01-01", Sex="M") patient_2 = Patient(DateOfBirth="1940-01-01", Sex="F") patient_3 = Patient(DateOfBirth="1990-01-01", Sex="M") patient_4 = Patient(DateOfBirth="1940-01-01", Sex="F") patient_4.CodedEvents.append( CodedEvent(CTV3Code=condition_code, ConsultationDate="2010-01-01") ) patient_5 = Patient(DateOfBirth="1940-01-01", Sex="F") patient_5.CodedEvents.append( CodedEvent(CTV3Code=condition_code, ConsultationDate="2010-01-01") ) patient_5.CodedEvents.append( CodedEvent(CTV3Code=condition_code2, ConsultationDate="2010-01-01") ) session.add_all([patient_1, patient_2, patient_3, patient_4, patient_5]) session.commit() study = StudyDefinition( population=patients.all(), sex=patients.sex(), age=patients.age_as_of("2020-01-01"), at_risk=patients.satisfying( """ (age > 70 AND sex = "M") OR (has_asthma AND NOT copd) """, has_asthma=patients.with_these_clinical_events( codelist([condition_code], "ctv3") ), copd=patients.with_these_clinical_events( codelist([condition_code2], "ctv3") ), ), ) results = study.to_dicts() assert [i["at_risk"] for i in results] == ["1", "0", "0", "1", "0"] assert "has_asthma" not in results[0].keys()
def test_patients_satisfying(): condition_code = "ASTHMA" session = make_session() patient_1 = Patient(DateOfBirth="1940-01-01", Sex="M") patient_2 = Patient(DateOfBirth="1940-01-01", Sex="F") patient_3 = Patient(DateOfBirth="1990-01-01", Sex="M") patient_4 = Patient(DateOfBirth="1940-01-01", Sex="F") patient_4.CodedEvents.append( CodedEvent(CTV3Code=condition_code, ConsultationDate="2010-01-01") ) session.add_all([patient_1, patient_2, patient_3, patient_4]) session.commit() study = StudyDefinition( population=patients.all(), sex=patients.sex(), age=patients.age_as_of("2020-01-01"), has_asthma=patients.with_these_clinical_events( codelist([condition_code], "ctv3") ), at_risk=patients.satisfying("(age > 70 AND sex = 'M') OR has_asthma"), ) results = study.to_dicts() assert [i["at_risk"] for i in results] == ["1", "0", "0", "1"]
def test_using_expression_in_population_definition(): session = make_session() session.add_all( [ Patient( Sex="M", DateOfBirth="1970-01-01", CodedEvents=[ CodedEvent(CTV3Code="foo1", ConsultationDate="2000-01-01") ], ), Patient(Sex="M", DateOfBirth="1975-01-01"), Patient( Sex="F", DateOfBirth="1980-01-01", CodedEvents=[ CodedEvent(CTV3Code="foo1", ConsultationDate="2000-01-01") ], ), Patient(Sex="F", DateOfBirth="1985-01-01"), ] ) session.commit() study = StudyDefinition( population=patients.satisfying( "has_foo_code AND sex = 'M'", has_foo_code=patients.with_these_clinical_events( codelist(["foo1"], "ctv3") ), sex=patients.sex(), ), age=patients.age_as_of("2020-01-01"), ) results = study.to_dicts() assert results[0].keys() == {"patient_id", "age"} assert [i["age"] for i in results] == ["50"]
), died_date_ons=patients.died_from_any_cause( on_or_after="2020-03-01", returning="date_of_death", include_month=True, include_day=True, return_expectations={"date": { "earliest": "2020-03-01" }}, ), ## DEMOGRAPHIC INFORMATION age=patients.age_as_of( "2020-02-29", return_expectations={ "rate": "universal", "int": { "distribution": "population_ages" }, }, ), sex=patients.sex(return_expectations={ "rate": "universal", "category": { "ratios": { "M": 0.49, "F": 0.51 } }, }), stp=patients.registered_practice_as_of( "2020-02-29",
died_ons_covid_flag_any=patients.with_these_codes_on_death_certificate( covid_codelist, on_or_before="2020-06-01", match_only_underlying_cause=False ), died_ons_covid_flag_underlying=patients.with_these_codes_on_death_certificate( covid_codelist, on_or_before="2020-06-01", match_only_underlying_cause=True ), died_date_ons=patients.died_from_any_cause( on_or_before="2020-06-01", returning="date_of_death", include_month=True, include_day=True, ), # The rest of the lines define the covariates with associated GitHub issues # https://github.com/ebmdatalab/tpp-sql-notebook/issues/33 age=patients.age_as_of("2020-02-01"), # https://github.com/ebmdatalab/tpp-sql-notebook/issues/46 sex=patients.sex(), # https://github.com/ebmdatalab/tpp-sql-notebook/issues/52 imd=patients.address_as_of( "2020-02-01", returning="index_of_multiple_deprivation", round_to_nearest=100 ), # https://github.com/ebmdatalab/tpp-sql-notebook/issues/37 rural_urban=patients.address_as_of( "2020-02-01", returning="rural_urban_classification" ), # https://github.com/ebmdatalab/tpp-sql-notebook/issues/54