def test_make_df_from_expectations_with_categories_expression_validation(): study = StudyDefinition( population=patients.all(), category=patients.categorised_as( { "A": "sex = 'F'", "B": "sex = 'M'" }, sex=patients.sex(), return_expectations={ "rate": "exponential_increase", "incidence": 0.2, "category": { "ratios": { "A": 0.3, "B": 0.6, "C": 0.1 } }, "date": { "earliest": "1900-01-01", "latest": "today" }, }, ), ) population_size = 10000 with pytest.raises(ValueError): study.make_df_from_expectations(population_size)
def test_make_df_from_expectations_with_categories_expression(): study = StudyDefinition( population=patients.all(), category=patients.categorised_as( { "A": "sex = 'F'", "B": "sex = 'M'" }, sex=patients.sex(), return_expectations={ "rate": "exponential_increase", "incidence": 0.2, "category": { "ratios": { "A": 0.3, "B": 0.7 } }, "date": { "earliest": "1900-01-01", "latest": "today" }, }, ), ) population_size = 10000 result = study.make_df_from_expectations(population_size) value_counts = result.category.value_counts() assert value_counts["A"] < value_counts["B"]
def test_patients_categorised_as(): session = make_session() session.add_all( [ Patient( Sex="M", CodedEvents=[ CodedEvent(CTV3Code="foo1", ConsultationDate="2000-01-01") ], ), Patient( Sex="F", CodedEvents=[ CodedEvent(CTV3Code="foo2", ConsultationDate="2000-01-01"), CodedEvent(CTV3Code="bar1", ConsultationDate="2000-01-01"), ], ), Patient( Sex="M", CodedEvents=[ CodedEvent(CTV3Code="foo2", ConsultationDate="2000-01-01") ], ), Patient( Sex="F", CodedEvents=[ CodedEvent(CTV3Code="foo3", ConsultationDate="2000-01-01") ], ), ] ) session.commit() foo_codes = codelist([("foo1", "A"), ("foo2", "B"), ("foo3", "C")], "ctv3") bar_codes = codelist(["bar1"], "ctv3") study = StudyDefinition( population=patients.all(), category=patients.categorised_as( { "W": "foo_category = 'B' AND female_with_bar", "X": "sex = 'F' AND (foo_category = 'B' OR foo_category = 'C')", "Y": "sex = 'M' AND foo_category = 'A'", "Z": "DEFAULT", }, sex=patients.sex(), foo_category=patients.with_these_clinical_events( foo_codes, returning="category", find_last_match_in_period=True ), female_with_bar=patients.satisfying( "has_bar AND sex = 'F'", has_bar=patients.with_these_clinical_events(bar_codes), ), ), ) results = study.to_dicts() assert [x["category"] for x in results] == ["Y", "W", "Z", "X"] # Assert that internal columns do not appear assert "foo_category" not in results[0].keys() assert "female_with_bar" not in results[0].keys() assert "has_bar" not in results[0].keys()
smoking_status=patients.categorised_as( { "S": "most_recent_smoking_code = 'S'", "E": """ most_recent_smoking_code = 'E' OR ( most_recent_smoking_code = 'N' AND ever_smoked ) """, "N": "most_recent_smoking_code = 'N' AND NOT ever_smoked", "M": "DEFAULT", }, return_expectations={ "category": { "ratios": { "S": 0.6, "E": 0.1, "N": 0.2, "M": 0.1 } } }, most_recent_smoking_code=patients.with_these_clinical_events( clear_smoking_codes, find_last_match_in_period=True, on_or_before="2020-02-29", returning="category", ), ever_smoked=patients.with_these_clinical_events( filter_codes_by_category(clear_smoking_codes, include=["S", "E"]), on_or_before="2020-02-29", ), ),
include_measurement_date=True, include_month=True, ), # https://github.com/ebmdatalab/tpp-sql-notebook/issues/6 smoking_status=patients.categorised_as( { "S": "most_recent_smoking_code = 'S'", "E": """ most_recent_smoking_code = 'E' OR ( most_recent_smoking_code = 'N' AND ever_smoked ) """, "N": "most_recent_smoking_code = 'N' AND NOT ever_smoked", "M": "DEFAULT" }, most_recent_smoking_code=patients.with_these_clinical_events( clear_smoking_codes, find_last_match_in_period=True, on_or_before='2020-02-01', returning="category", ), ever_smoked=patients.with_these_clinical_events( filter_codes_by_category(clear_smoking_codes, include=['S', 'E']), on_or_before='2020-02-01' ), ), smoking_status_date=patients.with_these_clinical_events( clear_smoking_codes, on_or_before='2020-02-01', return_last_date_in_period=True, include_month=True,