def test_make_df_from_expectations_doesnt_alter_defaults(): study = StudyDefinition( default_expectations={ "rate": "exponential_increase", "incidence": 1.0, "category": { "ratios": { "M": 0.5, "F": 0.5 } }, }, population=patients.all(), sex_altered=patients.sex(return_expectations={ "incidence": 0.1, "category": { "ratios": { "M": 0.5, "F": 0.5 } }, }), sex_default=patients.sex( return_expectations={"category": { "ratios": { "M": 0.5, "F": 0.5 } }}), ) population_size = 10000 # Just ensuring no exception is raised result = study.make_df_from_expectations(population_size) assert len(result[pd.isnull(result.sex_default)]) == 0
def test_make_df_from_expectations_with_categories_expression_validation(): study = StudyDefinition( population=patients.all(), category=patients.categorised_as( { "A": "sex = 'F'", "B": "sex = 'M'" }, sex=patients.sex(), return_expectations={ "rate": "exponential_increase", "incidence": 0.2, "category": { "ratios": { "A": 0.3, "B": 0.6, "C": 0.1 } }, "date": { "earliest": "1900-01-01", "latest": "today" }, }, ), ) population_size = 10000 with pytest.raises(ValueError): study.make_df_from_expectations(population_size)
def test_make_df_from_expectations_with_categories_expression(): study = StudyDefinition( population=patients.all(), category=patients.categorised_as( { "A": "sex = 'F'", "B": "sex = 'M'" }, sex=patients.sex(), return_expectations={ "rate": "exponential_increase", "incidence": 0.2, "category": { "ratios": { "A": 0.3, "B": 0.7 } }, "date": { "earliest": "1900-01-01", "latest": "today" }, }, ), ) population_size = 10000 result = study.make_df_from_expectations(population_size) value_counts = result.category.value_counts() assert value_counts["A"] < value_counts["B"]
def test_patient_characteristics_for_covid_status(): session = make_session() old_patient_with_covid = Patient( DateOfBirth="1900-01-01", CovidStatus=CovidStatus(Result="COVID19", AdmittedToITU=True), Sex="M", ) young_patient_1_with_covid = Patient( DateOfBirth="2000-01-01", CovidStatus=CovidStatus(Result="COVID19", Died=True), Sex="F", ) young_patient_2_without_covid = Patient(DateOfBirth="2001-01-01", Sex="F") session.add(old_patient_with_covid) session.add(young_patient_1_with_covid) session.add(young_patient_2_without_covid) session.commit() study = StudyDefinition( population=patients.with_positive_covid_test(), age=patients.age_as_of("2020-01-01"), sex=patients.sex(), died=patients.have_died_of_covid(), ) results = study.to_dicts() assert [x["sex"] for x in results] == ["M", "F"] assert [x["died"] for x in results] == ["0", "1"] assert [x["age"] for x in results] == ["120", "20"]
def test_patients_categorised_as(): session = make_session() session.add_all( [ Patient( Sex="M", CodedEvents=[ CodedEvent(CTV3Code="foo1", ConsultationDate="2000-01-01") ], ), Patient( Sex="F", CodedEvents=[ CodedEvent(CTV3Code="foo2", ConsultationDate="2000-01-01"), CodedEvent(CTV3Code="bar1", ConsultationDate="2000-01-01"), ], ), Patient( Sex="M", CodedEvents=[ CodedEvent(CTV3Code="foo2", ConsultationDate="2000-01-01") ], ), Patient( Sex="F", CodedEvents=[ CodedEvent(CTV3Code="foo3", ConsultationDate="2000-01-01") ], ), ] ) session.commit() foo_codes = codelist([("foo1", "A"), ("foo2", "B"), ("foo3", "C")], "ctv3") bar_codes = codelist(["bar1"], "ctv3") study = StudyDefinition( population=patients.all(), category=patients.categorised_as( { "W": "foo_category = 'B' AND female_with_bar", "X": "sex = 'F' AND (foo_category = 'B' OR foo_category = 'C')", "Y": "sex = 'M' AND foo_category = 'A'", "Z": "DEFAULT", }, sex=patients.sex(), foo_category=patients.with_these_clinical_events( foo_codes, returning="category", find_last_match_in_period=True ), female_with_bar=patients.satisfying( "has_bar AND sex = 'F'", has_bar=patients.with_these_clinical_events(bar_codes), ), ), ) results = study.to_dicts() assert [x["category"] for x in results] == ["Y", "W", "Z", "X"] # Assert that internal columns do not appear assert "foo_category" not in results[0].keys() assert "female_with_bar" not in results[0].keys() assert "has_bar" not in results[0].keys()
def test_sex_dtype_generation(): study = StudyDefinition(population=patients.all(), sex=patients.sex()) result = _converters_to_names(study.pandas_csv_args) assert result == { "dtype": {"sex": "category"}, "converters": {}, "date_col_for": {}, "parse_dates": [], }
def test_column_name_clashes_produce_errors(): with pytest.raises(ValueError): StudyDefinition( population=patients.all(), age=patients.age_as_of("2020-01-01"), status=patients.satisfying( "age > 70 AND sex = 'M'", sex=patients.sex(), age=patients.age_as_of("2010-01-01"), ), )
def test_make_df_no_categories_validation_when_no_categories_in_definition(): study = StudyDefinition( population=patients.all(), sex=patients.sex( return_expectations={ "rate": "universal", "category": {"ratios": {"M": 0.49, "F": 0.51}}, } ), ) population_size = 10000 # Just ensuring no exception is raised study.make_df_from_expectations(population_size)
def test_minimal_study_to_csv(): session = make_session() patient_1 = Patient(DateOfBirth="1900-01-01", Sex="M") patient_2 = Patient(DateOfBirth="1900-01-01", Sex="F") session.add_all([patient_1, patient_2]) session.commit() study = StudyDefinition(population=patients.all(), sex=patients.sex()) with tempfile.NamedTemporaryFile(mode="w+") as f: study.to_csv(f.name) results = list(csv.DictReader(f)) assert results == [ {"patient_id": str(patient_1.Patient_ID), "sex": "M"}, {"patient_id": str(patient_2.Patient_ID), "sex": "F"}, ]
def test_patients_satisfying_with_hidden_columns(): condition_code = "ASTHMA" condition_code2 = "COPD" session = make_session() patient_1 = Patient(DateOfBirth="1940-01-01", Sex="M") patient_2 = Patient(DateOfBirth="1940-01-01", Sex="F") patient_3 = Patient(DateOfBirth="1990-01-01", Sex="M") patient_4 = Patient(DateOfBirth="1940-01-01", Sex="F") patient_4.CodedEvents.append( CodedEvent(CTV3Code=condition_code, ConsultationDate="2010-01-01") ) patient_5 = Patient(DateOfBirth="1940-01-01", Sex="F") patient_5.CodedEvents.append( CodedEvent(CTV3Code=condition_code, ConsultationDate="2010-01-01") ) patient_5.CodedEvents.append( CodedEvent(CTV3Code=condition_code2, ConsultationDate="2010-01-01") ) session.add_all([patient_1, patient_2, patient_3, patient_4, patient_5]) session.commit() study = StudyDefinition( population=patients.all(), sex=patients.sex(), age=patients.age_as_of("2020-01-01"), at_risk=patients.satisfying( """ (age > 70 AND sex = "M") OR (has_asthma AND NOT copd) """, has_asthma=patients.with_these_clinical_events( codelist([condition_code], "ctv3") ), copd=patients.with_these_clinical_events( codelist([condition_code2], "ctv3") ), ), ) results = study.to_dicts() assert [i["at_risk"] for i in results] == ["1", "0", "0", "1", "0"] assert "has_asthma" not in results[0].keys()
def test_patients_satisfying(): condition_code = "ASTHMA" session = make_session() patient_1 = Patient(DateOfBirth="1940-01-01", Sex="M") patient_2 = Patient(DateOfBirth="1940-01-01", Sex="F") patient_3 = Patient(DateOfBirth="1990-01-01", Sex="M") patient_4 = Patient(DateOfBirth="1940-01-01", Sex="F") patient_4.CodedEvents.append( CodedEvent(CTV3Code=condition_code, ConsultationDate="2010-01-01") ) session.add_all([patient_1, patient_2, patient_3, patient_4]) session.commit() study = StudyDefinition( population=patients.all(), sex=patients.sex(), age=patients.age_as_of("2020-01-01"), has_asthma=patients.with_these_clinical_events( codelist([condition_code], "ctv3") ), at_risk=patients.satisfying("(age > 70 AND sex = 'M') OR has_asthma"), ) results = study.to_dicts() assert [i["at_risk"] for i in results] == ["1", "0", "0", "1"]
def test_using_expression_in_population_definition(): session = make_session() session.add_all( [ Patient( Sex="M", DateOfBirth="1970-01-01", CodedEvents=[ CodedEvent(CTV3Code="foo1", ConsultationDate="2000-01-01") ], ), Patient(Sex="M", DateOfBirth="1975-01-01"), Patient( Sex="F", DateOfBirth="1980-01-01", CodedEvents=[ CodedEvent(CTV3Code="foo1", ConsultationDate="2000-01-01") ], ), Patient(Sex="F", DateOfBirth="1985-01-01"), ] ) session.commit() study = StudyDefinition( population=patients.satisfying( "has_foo_code AND sex = 'M'", has_foo_code=patients.with_these_clinical_events( codelist(["foo1"], "ctv3") ), sex=patients.sex(), ), age=patients.age_as_of("2020-01-01"), ) results = study.to_dicts() assert results[0].keys() == {"patient_id", "age"} assert [i["age"] for i in results] == ["50"]
), ## DEMOGRAPHIC INFORMATION age=patients.age_as_of( "2020-02-29", return_expectations={ "rate": "universal", "int": { "distribution": "population_ages" }, }, ), sex=patients.sex(return_expectations={ "rate": "universal", "category": { "ratios": { "M": 0.49, "F": 0.51 } }, }), stp=patients.registered_practice_as_of( "2020-02-29", returning="stp_code", return_expectations={ "rate": "universal", "category": { "ratios": { "STP1": 0.1, "STP2": 0.1, "STP3": 0.1, "STP4": 0.1,
died_ons_covid_flag_underlying=patients.with_these_codes_on_death_certificate( covid_codelist, on_or_before="2020-06-01", match_only_underlying_cause=True ), died_date_ons=patients.died_from_any_cause( on_or_before="2020-06-01", returning="date_of_death", include_month=True, include_day=True, ), # The rest of the lines define the covariates with associated GitHub issues # https://github.com/ebmdatalab/tpp-sql-notebook/issues/33 age=patients.age_as_of("2020-02-01"), # https://github.com/ebmdatalab/tpp-sql-notebook/issues/46 sex=patients.sex(), # https://github.com/ebmdatalab/tpp-sql-notebook/issues/52 imd=patients.address_as_of( "2020-02-01", returning="index_of_multiple_deprivation", round_to_nearest=100 ), # https://github.com/ebmdatalab/tpp-sql-notebook/issues/37 rural_urban=patients.address_as_of( "2020-02-01", returning="rural_urban_classification" ), # https://github.com/ebmdatalab/tpp-sql-notebook/issues/54 stp=patients.registered_practice_as_of("2020-02-01", returning="stp_code"), # region - one of NHS England 9 regions