def test_make_df_from_expectations_with_distribution_and_date(): study = StudyDefinition( population=patients.all(), bmi=patients.most_recent_bmi( on_or_after="2010-02-01", minimum_age_at_measurement=16, return_expectations={ "rate": "exponential_increase", "incidence": 0.6, "float": { "distribution": "normal", "mean": 35, "stddev": 10 }, "date": { "earliest": "1900-01-01", "latest": "today" }, }, ), bmi_date_measured=patients.date_of( "bmi", date_format="YYYY-MM", ), ) population_size = 10000 result = study.make_df_from_expectations(population_size) assert list(sorted(result.columns)) == ["bmi", "bmi_date_measured"] # Check that the null-valued rows are aligned with each other assert ((result["bmi"] == 0.0) == pd.isnull( result["bmi_date_measured"])).all()
def test_clinical_events_numeric_value_dtype_generation(): test_codelist = codelist(["X"], system="ctv3") study = StudyDefinition( population=patients.all(), creatinine=patients.with_these_clinical_events( test_codelist, find_last_match_in_period=True, on_or_before="2020-02-01", returning="numeric_value", ), creatinine_date=patients.date_of("creatinine", date_format="YYYY-MM"), ) result = _converters_to_names(study.pandas_csv_args) assert result == { "converters": { "creatinine_date": "add_day_to_date" }, "dtype": { "creatinine": "float" }, "date_col_for": { "creatinine": "creatinine_date" }, "parse_dates": ["creatinine_date"], }
def test_categorical_clinical_events_with_date_dtype_generation(): categorised_codelist = codelist([("X", "Y")], system="ctv3") categorised_codelist.has_categories = True study = StudyDefinition( population=patients.all(), ethnicity=patients.with_these_clinical_events( categorised_codelist, returning="category", find_last_match_in_period=True, ), ethnicity_date=patients.date_of("ethnicity"), ) result = _converters_to_names(study.pandas_csv_args) assert result == { "converters": { "ethnicity_date": "add_month_and_day_to_date" }, "date_col_for": { "ethnicity": "ethnicity_date" }, "dtype": { "ethnicity": "category" }, "parse_dates": ["ethnicity_date"], }
def test_bmi_dtype_generation(): categorised_codelist = codelist([("X", "Y")], system="ctv3") categorised_codelist.has_categories = True study = StudyDefinition( population=patients.all(), bmi=patients.most_recent_bmi( on_or_after="2010-02-01", minimum_age_at_measurement=16, ), bmi_date_measured=patients.date_of("bmi", date_format="YYYY-MM"), ) result = _converters_to_names(study.pandas_csv_args) assert result == { "converters": { "bmi_date_measured": "add_day_to_date" }, "dtype": { "bmi": "float" }, "date_col_for": { "bmi": "bmi_date_measured" }, "parse_dates": ["bmi_date_measured"], }
def test_mean_recorded_value_dtype_generation(): test_codelist = codelist(["X"], system="ctv3") study = StudyDefinition( population=patients.all(), bp_sys=patients.mean_recorded_value( test_codelist, on_most_recent_day_of_measurement=True, on_or_before="2020-02-01", ), bp_sys_date_measured=patients.date_of("bp_sys", date_format="YYYY-MM"), ) result = _converters_to_names(study.pandas_csv_args) assert result == { "converters": {"bp_sys_date_measured": "add_day_to_date"}, "dtype": {"bp_sys": "float"}, "date_col_for": {"bp_sys": "bp_sys_date_measured"}, "parse_dates": ["bp_sys_date_measured"], }