def test_make_df_from_expectations_with_distribution_and_date(): study = StudyDefinition( population=patients.all(), bmi=patients.most_recent_bmi( on_or_after="2010-02-01", minimum_age_at_measurement=16, return_expectations={ "rate": "exponential_increase", "incidence": 0.6, "float": { "distribution": "normal", "mean": 35, "stddev": 10 }, "date": { "earliest": "1900-01-01", "latest": "today" }, }, ), bmi_date_measured=patients.date_of( "bmi", date_format="YYYY-MM", ), ) population_size = 10000 result = study.make_df_from_expectations(population_size) assert list(sorted(result.columns)) == ["bmi", "bmi_date_measured"] # Check that the null-valued rows are aligned with each other assert ((result["bmi"] == 0.0) == pd.isnull( result["bmi_date_measured"])).all()
def test_bmi_dtype_generation(): categorised_codelist = codelist([("X", "Y")], system="ctv3") categorised_codelist.has_categories = True study = StudyDefinition( population=patients.all(), bmi=patients.most_recent_bmi( on_or_after="2010-02-01", minimum_age_at_measurement=16, ), bmi_date_measured=patients.date_of("bmi", date_format="YYYY-MM"), ) result = _converters_to_names(study.pandas_csv_args) assert result == { "converters": { "bmi_date_measured": "add_day_to_date" }, "dtype": { "bmi": "float" }, "date_col_for": { "bmi": "bmi_date_measured" }, "parse_dates": ["bmi_date_measured"], }
return_last_date_in_period=True, include_month=True, return_expectations={"date": { "latest": "2020-02-29" }}, ), ckd=first_diagnosis_in_period(ckd_codes), organ_transplant=first_diagnosis_in_period(organ_transplant_codes), # https://github.com/ebmdatalab/tpp-sql-notebook/issues/10 bmi=patients.most_recent_bmi( on_or_after="2010-02-01", minimum_age_at_measurement=16, include_measurement_date=True, include_month=True, return_expectations={ "incidence": 0.6, "float": { "distribution": "normal", "mean": 35, "stddev": 10 }, }, ), # https://github.com/ebmdatalab/tpp-sql-notebook/issues/54 stp=patients.registered_practice_as_of( "2020-03-01", returning="stp_code", return_expectations={ "rate": "universal", "category": { "ratios": { "STP1": 0.5,
# define the study index date index_date = "2020-01-01", # define the study population population = patients.all(), # define the study variables age = patients.age_as_of("2020-02-01", return_expectations={"rate" : "universal", "int" : {"distribution" : "population_ages"}}), ## bmi bmi=patients.most_recent_bmi(between=["2010-02-01", "2020-01-31"], minimum_age_at_measurement=18, include_measurement_date=True, date_format="YYYY-MM", return_expectations={"date": {"earliest": "2010-02-01", "latest": "2020-01-31"}, "float": {"distribution": "normal", "mean": 28, "stddev": 8}, "incidence": 0.80,}), ## systolic blood pressure bp_sys=patients.mean_recorded_value(systolic_blood_pressure_codes, on_most_recent_day_of_measurement=True, between=["2017-02-01", "2020-01-31"], include_measurement_date=True, date_format="YYYY-MM", return_expectations={"float": {"distribution": "normal", "mean": 80, "stddev": 10}, "date": {"earliest": "2019-02-01", "latest": "2020-01-31"}, "incidence": 0.95,}), ## ace inhibitor
with_complete_gp_consultation_history_between( "2019-02-01", "2020-01-31", return_expectations={"incidence": 0.9}, ), # CONTINUOUS MEASURED COVARIATES IN 12 MONTHS PRIOR TO BASELINE (10 years for BMI) bmi=patients.most_recent_bmi( between=["2010-02-01", "2020-01-31"], minimum_age_at_measurement=16, include_measurement_date=True, include_month=True, return_expectations={ "date": { "earliest": "2010-02-01", "latest": "2020-01-31" }, "float": { "distribution": "normal", "mean": 35, "stddev": 10 }, "incidence": 0.95, }, ), # Blood pressure bp_sys=patients.mean_recorded_value( systolic_blood_pressure_codes, on_most_recent_day_of_measurement=True, between=["2017-02-01", "2020-01-31"], include_measurement_date=True,
"5": 0.19, } }, }, ), # BMI bmi=patients.categorised_as( { "Not obese": "DEFAULT", "Obese I (30-34.9)": """ bmi_value >= 30 AND bmi_value < 35""", "Obese II (35-39.9)": """ bmi_value >= 35 AND bmi_value < 40""", "Obese III (40+)": """ bmi_value >= 40 AND bmi_value < 100""", # set maximum to avoid any impossibly extreme values being classified as obese }, bmi_value=patients.most_recent_bmi(on_or_after="2015-12-01", minimum_age_at_measurement=16), return_expectations={ "rate": "universal", "category": { "ratios": { "Not obese": 0.7, "Obese I (30-34.9)": 0.1, "Obese II (35-39.9)": 0.1, "Obese III (40+)": 0.1, } }, }, ), # CLINICAL CO-MORBIDITIES WORK IN PROGRESS IN COLLABORATION WITH NHSX # https://github.com/opensafely/vaccine-eligibility/blob/master/analysis/study_definition.py