def test_make_df_from_binary_default_outcome(): study = StudyDefinition( population=patients.all(), died=patients.died_from_any_cause( return_expectations={ "date": {"earliest": "1900-01-01", "latest": "today"}, "incidence": 0.1, } ), ) population_size = 10000 result = study.make_df_from_expectations(population_size) assert len(result[~pd.isnull(result.died)]) == 0.1 * population_size
# Configure the expectations framework default_expectations={ "date": { "earliest": "2020-01-01", "latest": "today" }, "rate": "universal", }, # define the study index date index_date=index_date, # This line defines the study population population=patients.satisfying( "(NOT died) AND (registered) AND (pregnant) AND age >= 16", died=patients.died_from_any_cause(on_or_before=index_date, returning="binary_flag"), registered=patients.registered_as_of(index_date), pregnant=patients.with_these_clinical_events( pregnant_code, between=["index_date", "index_date + 1 month"], returning="binary_flag", return_expectations={"incidence": 0.6}, ), ), age=patients.age_as_of(index_date, return_expectations={ "rate": "universal", "int": { "distribution": "population_ages" } }),
}, "rate": "uniform", }, index_date=index_date, # This line defines the study population population=patients.satisfying( """ (sex = 'F' OR sex = 'M') AND (age >= 18 AND age < 120) AND (NOT died) AND (registered) """, registered=patients.registered_as_of(index_date), died=patients.died_from_any_cause( on_or_before=index_date, returning="binary_flag", ), ), age=patients.age_as_of( index_date, return_expectations={ "int": { "distribution": "population_ages" }, "incidence": 1 }, ), sex=patients.sex(return_expectations={ "category": { "ratios": { "M": 0.49,
covid_identification, on_or_after="2020-03-01", match_only_underlying_cause=True, return_expectations={ "date": { "earliest": "2020-03-01" }, "incidence": 0.1 }, ), died_date_ons=patients.died_from_any_cause( on_or_after="2020-03-01", returning="date_of_death", include_month=True, include_day=True, return_expectations={ "date": { "earliest": "2020-03-01" }, "incidence": 0.1 }, ), # COVID-19 outcomes first_pos_test_sgss=patients.with_test_result_in_sgss( pathogen="SARS-CoV-2", test_result="positive", find_first_match_in_period=True, returning="date", date_format="YYYY-MM-DD", return_expectations={"date": { "earliest": "2020-01-01" }},
on_or_after="2020-02-01", date_format="YYYY-MM", ), # OUTCOMES worms=patients.with_these_clinical_events( worms_codes, return_first_date_in_period=True, include_month=True, return_expectations={"date": { "earliest": "2019-02-01" }}, ), died_date_ons=patients.died_from_any_cause( on_or_before="2020-08-01", returning="date_of_death", include_month=True, include_day=True, ), ## DEMOGRAPHIC COVARIATES # AGE age=patients.age_as_of( "2019-02-01", return_expectations={ "rate": "universal", "int": { "distribution": "population_ages" }, }, ),
"rate": "exponential_increase", "incidence": 0.1, }, population=patients.satisfying( """ registered AND (NOT died) AND (sex = 'F' OR sex='M') AND (age != 'missing') """, registered=patients.registered_as_of( "index_date", return_expectations={"incidence": 0.9}, ), died=patients.died_from_any_cause( on_or_before="index_date", returning="binary_flag", return_expectations={"incidence": 0.1}), ), age=patients.age_as_of( "index_date", return_expectations={ "rate": "universal", "int": { "distribution": "population_ages" }, }, ), age_band=patients.categorised_as( { "0-4": "age >= 0 AND age < 5", "5-9": "age >= 5 AND age < 10",
), died_ons_covid_flag_underlying=patients. with_these_codes_on_death_certificate( covid_codelist, on_or_after="2020-02-01", match_only_underlying_cause=True, return_expectations={"date": { "earliest": "2020-02-01" }}, ), died_date_ons=patients.died_from_any_cause( on_or_after="2020-02-01", returning="date_of_death", include_month=True, include_day=True, return_expectations={ "date": { "earliest": "2020-02-01" }, "rate": "exponential_increase" }, ), first_tested_for_covid=patients.with_test_result_in_sgss( pathogen="SARS-CoV-2", test_result="any", on_or_after="2020-02-01", find_first_match_in_period=True, returning="date", date_format="YYYY-MM-DD", return_expectations={ "date": { "earliest": "2020-02-01"
"earliest": from_date }}, ), died_ons_covid_underlying=patients.with_these_codes_on_death_certificate( codes_covid_death, returning="binary_flag", on_or_after=from_date, match_only_underlying_cause=True, return_expectations={"date": { "earliest": "2020-03-01" }}, ), died_ons=patients.died_from_any_cause( returning="binary_flag", on_or_after=from_date, return_expectations={"date": { "earliest": from_date }}, ), died_ons_noncovid=patients.satisfying( """(NOT died_ons_covid) AND died_ons""", return_expectations={"incidence": 0.15}, ), death_category=patients.categorised_as( { "alive": "NOT died_ons", "covid-death": "died_ons_covid", "non-covid-death": "died_ons_noncovid", "unknown": "DEFAULT", }, return_expectations={
), ketoacidosis=patients.satisfying( "ketoacidosis_hospital OR ketoacidosis_ons", ketoacidosis_hospital=patients.admitted_to_hospital( with_these_diagnoses=ketoacidosis_codes, between=["index_date", "last_day_of_month(index_date)"], return_expectations={"incidence": 0.05}, ), ketoacidosis_ons=patients.with_these_codes_on_death_certificate( ketoacidosis_codes, between=["index_date", "last_day_of_month(index_date)"], return_expectations={"incidence": 0.05}, ), ), died=patients.died_from_any_cause( between=["index_date", "last_day_of_month(index_date)"], return_expectations={"incidence": 0.1}, ), ) measures = [ Measure( id="died_rate", numerator="died", denominator="population", group_by=["covid_hospitalisation"], ), Measure( id="DVT_rate", numerator="DVT", denominator="population", group_by=["covid_hospitalisation"],
AND NOT has_died """), has_follow_up=patients.registered_with_one_practice_between( start_date="2019-12-01", end_date=campaign_start, return_expectations={"incidence": 0.90}, ), registered=patients.registered_as_of( campaign_start, # day before vaccination campaign starts - discuss with team if this should be "today" return_expectations={"incidence": 0.98}, ), has_died=patients.died_from_any_cause( on_or_before=campaign_start, returning="binary_flag", return_expectations={"incidence": 0.05}, ), # Demographic information # CAREHOME STATUS care_home_type=patients.care_home_status_as_of( campaign_start, categorised_as={ "PC": """ IsPotentialCareHome AND LocationDoesNotRequireNursing='Y' AND LocationRequiresNursing='N' """, "PN": """