def test_make_df_from_expectations_with_categories_expression_validation(): study = StudyDefinition( population=patients.all(), category=patients.categorised_as( { "A": "sex = 'F'", "B": "sex = 'M'" }, sex=patients.sex(), return_expectations={ "rate": "exponential_increase", "incidence": 0.2, "category": { "ratios": { "A": 0.3, "B": 0.6, "C": 0.1 } }, "date": { "earliest": "1900-01-01", "latest": "today" }, }, ), ) population_size = 10000 with pytest.raises(ValueError): study.make_df_from_expectations(population_size)
def test_make_df_from_expectations_with_categories_in_codelist_validation(): categorised_codelist = codelist([("X", "Y")], system="ctv3") categorised_codelist.has_categories = True study = StudyDefinition( population=patients.all(), ethnicity=patients.with_these_clinical_events( categorised_codelist, returning="category", return_expectations={ "rate": "exponential_increase", "incidence": 0.2, "category": { "ratios": { "A": 0.3, "B": 0.7 } }, "date": { "earliest": "1900-01-01", "latest": "today" }, }, find_last_match_in_period=True, include_date_of_match=False, ), ) population_size = 10000 with pytest.raises(ValueError): study.make_df_from_expectations(population_size)
def test_make_df_no_categories_validation_when_no_categories_in_definition(): study = StudyDefinition( population=patients.all(), sex=patients.sex( return_expectations={ "rate": "universal", "category": {"ratios": {"M": 0.49, "F": 0.51}}, } ), ) population_size = 10000 # Just ensuring no exception is raised study.make_df_from_expectations(population_size)
def test_make_df_from_expectations_with_distribution_and_date(): study = StudyDefinition( population=patients.all(), bmi=patients.most_recent_bmi( on_or_after="2010-02-01", minimum_age_at_measurement=16, include_measurement_date=True, include_month=True, return_expectations={ "rate": "exponential_increase", "incidence": 0.6, "float": { "distribution": "normal", "mean": 35, "stddev": 10 }, "date": { "earliest": "1900-01-01", "latest": "today" }, }, ), ) population_size = 10000 result = study.make_df_from_expectations(population_size) assert list(sorted(result.columns)) == ["bmi", "bmi_date_measured"] # Check that the null-valued rows are aligned with each other assert (result["bmi"][pd.isnull( result["bmi"])].fillna(0) == result["bmi_date_measured"][pd.isnull( result["bmi_date_measured"])].fillna(0)).all()
def test_make_df_from_expectations_with_date_filter(): study = StudyDefinition( population=patients.all(), asthma_condition=patients.with_these_clinical_events( codelist(["X"], system="ctv3"), between=["2001-12-01", "2002-06-01"], returning="date", return_expectations={ "rate": "exponential_increase", "incidence": 0.2, "date": { "earliest": "1900-01-01", "latest": "today" }, }, find_first_match_in_period=True, include_month=True, include_day=True, ), ) population_size = 10000 result = study.make_df_from_expectations(population_size) assert result.columns == ["asthma_condition"] assert result[~pd.isnull(result["asthma_condition"])].max( )[0] <= "2002-06-01"
def test_make_df_from_expectations_with_categories_expression(): study = StudyDefinition( population=patients.all(), category=patients.categorised_as( { "A": "sex = 'F'", "B": "sex = 'M'" }, sex=patients.sex(), return_expectations={ "rate": "exponential_increase", "incidence": 0.2, "category": { "ratios": { "A": 0.3, "B": 0.7 } }, "date": { "earliest": "1900-01-01", "latest": "today" }, }, ), ) population_size = 10000 result = study.make_df_from_expectations(population_size) value_counts = result.category.value_counts() assert value_counts["A"] < value_counts["B"]
def test_make_df_from_expectations_with_categories(): categorised_codelist = codelist([("1", "A"), ("2", "B")], system="ctv3") categorised_codelist.has_categories = True study = StudyDefinition( population=patients.all(), ethnicity=patients.with_these_clinical_events( categorised_codelist, returning="category", return_expectations={ "rate": "exponential_increase", "incidence": 0.2, "category": { "ratios": { "A": 0.3, "B": 0.7 } }, "date": { "earliest": "1900-01-01", "latest": "today" }, }, find_last_match_in_period=True, include_date_of_match=False, ), ) population_size = 10000 result = study.make_df_from_expectations(population_size) assert result.columns == ["ethnicity"] category_counts = result.reset_index().groupby("ethnicity").count() assert category_counts.loc["A", :][0] < category_counts.loc["B", :][0]
def test_make_df_from_expectations_doesnt_alter_date_defaults(): study = StudyDefinition( default_expectations={ "rate": "exponential_increase", "incidence": 1.0, "date": {"earliest": "1900-01-01", "latest": "today"}, "category": {"ratios": {"M": 0.5, "F": 0.5}}, }, population=patients.all(), with_different_incidence=patients.with_these_clinical_events( codelist(["X"], system="ctv3"), returning="date", return_expectations={"incidence": 0.2}, include_day=True, ), with_different_date=patients.with_these_clinical_events( codelist(["X"], system="ctv3"), returning="date", return_expectations={"date": {"earliest": "2015-01-01", "latest": "today"}}, include_day=True, ), with_defaults=patients.with_these_clinical_events( codelist(["X"], system="ctv3"), returning="date", return_expectations={"date": {}}, include_day=True, ), ) population_size = 10000 result = study.make_df_from_expectations(population_size) # Regression test: make sure defaults are respected even when they've been overridden assert result.with_defaults.min() < "2015-01-01" assert len(result[pd.isnull(result.with_defaults)]) == 0
def test_make_df_from_expectations_partial_default_overrides(): study = StudyDefinition( default_expectations={ "date": { "earliest": "1900-01-01", "latest": "today" }, "rate": "exponential_increase", "incidence": 0.2, }, population=patients.all(), asthma_condition=patients.with_these_clinical_events( codelist(["X"], system="ctv3"), returning="date", find_first_match_in_period=True, date_format="YYYY", return_expectations={"date": { "latest": "2000-01-01" }}, ), ) population_size = 10000 result = study.make_df_from_expectations(population_size) assert result.asthma_condition.astype("float").max() == 2000
def test_make_df_from_expectations_with_number_of_episodes(): study = StudyDefinition( population=patients.all(), episode_count=patients.with_these_clinical_events( codelist(["A", "B", "C"], system="ctv3"), ignore_days_where_these_codes_occur=codelist(["D", "E"], system="ctv3"), returning="number_of_episodes", episode_defined_as="series of events each <= 14 days apart", return_expectations={ "int": { "distribution": "normal", "mean": 4, "stddev": 2 }, "date": { "earliest": "1900-01-01", "latest": "today" }, "incidence": 0.2, }, ), ) population_size = 10000 result = study.make_df_from_expectations(population_size) assert result.columns == ["episode_count"]
def test_make_df_from_expectations_doesnt_alter_defaults(): study = StudyDefinition( default_expectations={ "rate": "exponential_increase", "incidence": 1.0, "category": { "ratios": { "M": 0.5, "F": 0.5 } }, }, population=patients.all(), sex_altered=patients.sex(return_expectations={ "incidence": 0.1, "category": { "ratios": { "M": 0.5, "F": 0.5 } }, }), sex_default=patients.sex( return_expectations={"category": { "ratios": { "M": 0.5, "F": 0.5 } }}), ) population_size = 10000 # Just ensuring no exception is raised result = study.make_df_from_expectations(population_size) assert len(result[pd.isnull(result.sex_default)]) == 0
def test_make_df_from_binary_default_outcome(): study = StudyDefinition( population=patients.all(), died=patients.died_from_any_cause(return_expectations={"incidence": 0.1}), ) population_size = 10000 result = study.make_df_from_expectations(population_size) assert len(result[~pd.isnull(result.died)]) == 0.1 * population_size
def test_make_df_from_expectations_with_mean_recorded_value(): study = StudyDefinition( population=patients.all(), drug_x=patients.mean_recorded_value( codelist(["X"], system="ctv3"), on_most_recent_day_of_measurement=True, return_expectations={ "rate": "exponential_increase", "incidence": 0.6, "float": {"distribution": "normal", "mean": 35, "stddev": 10}, }, ), ) population_size = 10000 result = study.make_df_from_expectations(population_size) assert abs(35 - int(result["drug_x"].mean())) < 5
def test_make_df_from_expectations_returning_date_using_defaults(): study = StudyDefinition( default_expectations={ "date": {"earliest": "1900-01-01", "latest": "today"}, "rate": "exponential_increase", }, population=patients.all(), asthma_condition=patients.with_these_clinical_events( codelist(["X"], system="ctv3"), returning="date", return_expectations={"incidence": 0.2}, find_first_match_in_period=True, date_format="YYYY-MM-DD", ), ) population_size = 10000 result = study.make_df_from_expectations(population_size) assert result[~pd.isnull(result["asthma_condition"])].min()[0] < "1960-01-01"
def test_make_df_from_expectations_with_care_home_status(): study = StudyDefinition( population=patients.all(), is_in_care_home=patients.care_home_status_as_of( "2020-01-01", return_expectations={ "rate": "exponential_increase", "incidence": 0.3, "date": { "earliest": "1900-01-01", "latest": "2020-01-01" }, "bool": True, }, ), care_home_type=patients.care_home_status_as_of( "2020-01-01", categorised_as={ "PN": "IsPotentialCareHome AND LocationRequiresNursing='Y'", "PC": "IsPotentialCareHome", "U": "DEFAULT", }, return_expectations={ "rate": "exponential_increase", "incidence": 0.2, "category": { "ratios": { "PN": 0.1, "PC": 0.2, "U": 0.7 } }, "date": { "earliest": "1900-01-01", "latest": "today" }, }, ), ) population_size = 10000 result = study.make_df_from_expectations(population_size) value_counts = result.care_home_type.value_counts() assert value_counts["PN"] < value_counts["U"]
def test_make_df_from_expectations_with_satisfying(): study = StudyDefinition( population=patients.all(), has_condition=patients.satisfying( "condition_a OR condition_b", condition_a=patients.with_these_clinical_events( codelist(["A", "B", "C"], system="ctv3")), condition_b=patients.with_these_clinical_events( codelist(["X", "Y", "Z"], system="ctv3")), return_expectations={ "date": { "earliest": "2001-01-01", "latest": "2020-03-01" }, "incidence": 0.95, }, ), ) population_size = 10000 result = study.make_df_from_expectations(population_size) assert result.columns == ["has_condition"]