def test_make_df_from_expectations_with_distribution_and_date():
    study = StudyDefinition(
        population=patients.all(),
        bmi=patients.most_recent_bmi(
            on_or_after="2010-02-01",
            minimum_age_at_measurement=16,
            return_expectations={
                "rate": "exponential_increase",
                "incidence": 0.6,
                "float": {
                    "distribution": "normal",
                    "mean": 35,
                    "stddev": 10
                },
                "date": {
                    "earliest": "1900-01-01",
                    "latest": "today"
                },
            },
        ),
        bmi_date_measured=patients.date_of(
            "bmi",
            date_format="YYYY-MM",
        ),
    )
    population_size = 10000
    result = study.make_df_from_expectations(population_size)
    assert list(sorted(result.columns)) == ["bmi", "bmi_date_measured"]

    # Check that the null-valued rows are aligned with each other
    assert (result["bmi"][pd.isnull(
        result["bmi"])].fillna(0) == result["bmi_date_measured"][pd.isnull(
            result["bmi_date_measured"])].fillna(0)).all()
def test_bmi_dtype_generation():
    categorised_codelist = codelist([("X", "Y")], system="ctv3")
    categorised_codelist.has_categories = True
    study = StudyDefinition(
        population=patients.all(),
        bmi=patients.most_recent_bmi(
            on_or_after="2010-02-01",
            minimum_age_at_measurement=16,
        ),
        bmi_date_measured=patients.date_of("bmi", date_format="YYYY-MM"),
    )

    result = _converters_to_names(study.pandas_csv_args)
    assert result == {
        "converters": {
            "bmi_date_measured": "add_day_to_date"
        },
        "dtype": {
            "bmi": "float"
        },
        "date_col_for": {
            "bmi": "bmi_date_measured"
        },
        "parse_dates": ["bmi_date_measured"],
    }
def test_clinical_events_numeric_value_dtype_generation():
    test_codelist = codelist(["X"], system="ctv3")
    study = StudyDefinition(
        population=patients.all(),
        creatinine=patients.with_these_clinical_events(
            test_codelist,
            find_last_match_in_period=True,
            on_or_before="2020-02-01",
            returning="numeric_value",
        ),
        creatinine_date=patients.date_of("creatinine", date_format="YYYY-MM"),
    )
    result = _converters_to_names(study.pandas_csv_args)
    assert result == {
        "converters": {
            "creatinine_date": "add_day_to_date"
        },
        "dtype": {
            "creatinine": "float"
        },
        "date_col_for": {
            "creatinine": "creatinine_date"
        },
        "parse_dates": ["creatinine_date"],
    }
def test_explicit_bmi_fallback():
    session = make_session()

    weight_code = "X76C7"
    bmi_code = "22K.."

    patient = Patient(DateOfBirth="1950-01-01")
    patient.CodedEvents.append(
        CodedEvent(CTV3Code=weight_code, NumericValue=50, ConsultationDate="2001-06-01")
    )
    patient.CodedEvents.append(
        CodedEvent(CTV3Code=bmi_code, NumericValue=99, ConsultationDate="2001-10-01")
    )
    session.add(patient)
    session.commit()

    study = StudyDefinition(
        population=patients.all(),
        BMI=patients.most_recent_bmi(
            on_or_after="1995-01-01", on_or_before="2005-01-01",
        ),
        BMI_date_measured=patients.date_of("BMI", date_format="YYYY-MM-DD"),
    )
    results = study.to_dicts()
    assert [x["BMI"] for x in results] == ["99.0"]
    assert [x["BMI_date_measured"] for x in results] == ["2001-10-01"]
def test_categorical_clinical_events_with_date_dtype_generation():
    categorised_codelist = codelist([("X", "Y")], system="ctv3")
    categorised_codelist.has_categories = True
    study = StudyDefinition(
        population=patients.all(),
        ethnicity=patients.with_these_clinical_events(
            categorised_codelist,
            returning="category",
            find_last_match_in_period=True,
        ),
        ethnicity_date=patients.date_of("ethnicity"),
    )

    result = _converters_to_names(study.pandas_csv_args)
    assert result == {
        "converters": {
            "ethnicity_date": "add_month_and_day_to_date"
        },
        "date_col_for": {
            "ethnicity": "ethnicity_date"
        },
        "dtype": {
            "ethnicity": "category"
        },
        "parse_dates": ["ethnicity_date"],
    }
def test_bmi_when_only_some_measurements_of_child():
    session = make_session()

    bmi_code = "22K.."
    weight_code = "X76C7"
    height_code = "XM01E"

    patient = Patient(DateOfBirth="1990-01-01")
    patient.CodedEvents.append(
        CodedEvent(CTV3Code=bmi_code, NumericValue=99, ConsultationDate="1995-01-01")
    )
    patient.CodedEvents.append(
        CodedEvent(CTV3Code=weight_code, NumericValue=50, ConsultationDate="2010-01-01")
    )
    patient.CodedEvents.append(
        CodedEvent(CTV3Code=height_code, NumericValue=10, ConsultationDate="2010-01-01")
    )
    session.add(patient)
    session.commit()

    study = StudyDefinition(
        population=patients.all(),
        BMI=patients.most_recent_bmi(
            on_or_after="2005-01-01", on_or_before="2015-01-01",
        ),
        BMI_date_measured=patients.date_of("BMI", date_format="YYYY-MM-DD"),
    )
    results = study.to_dicts()
    assert [x["BMI"] for x in results] == ["0.5"]
    assert [x["BMI_date_measured"] for x in results] == ["2010-01-01"]
def test_bmi_rounded():
    session = make_session()

    weight_code = "X76C7"
    height_code = "XM01E"

    patient = Patient(DateOfBirth="1950-01-01")
    patient.CodedEvents.append(
        CodedEvent(
            CTV3Code=weight_code, NumericValue=10.12345, ConsultationDate="2001-06-01"
        )
    )
    patient.CodedEvents.append(
        CodedEvent(CTV3Code=height_code, NumericValue=10, ConsultationDate="2000-02-01")
    )
    session.add(patient)
    session.commit()

    study = StudyDefinition(
        population=patients.all(),
        BMI=patients.most_recent_bmi("2005-01-01",),
        BMI_date_measured=patients.date_of("BMI", date_format="YYYY-MM-DD"),
    )
    results = study.to_dicts()
    assert [x["BMI"] for x in results] == ["0.1"]
    assert [x["BMI_date_measured"] for x in results] == ["2001-06-01"]
def test_clinical_event_with_category():
    session = make_session()
    session.add_all(
        [
            Patient(),
            Patient(
                CodedEvents=[
                    CodedEvent(CTV3Code="foo1", ConsultationDate="2018-01-01"),
                    CodedEvent(CTV3Code="foo2", ConsultationDate="2020-01-01"),
                ]
            ),
            Patient(
                CodedEvents=[CodedEvent(CTV3Code="foo3", ConsultationDate="2019-01-01")]
            ),
        ]
    )
    session.commit()
    codes = codelist([("foo1", "A"), ("foo2", "B"), ("foo3", "C")], "ctv3")
    study = StudyDefinition(
        population=patients.all(),
        code_category=patients.with_these_clinical_events(
            codes, returning="category", find_last_match_in_period=True
        ),
        code_category_date=patients.date_of("code_category"),
    )
    results = study.to_dicts()
    assert [x["code_category"] for x in results] == ["", "B", "C"]
    assert [x["code_category_date"] for x in results] == ["", "2020", "2019"]
def test_clinical_event_with_numeric_value():
    condition_code = "ASTHMA"
    _make_clinical_events_selection(
        condition_code,
        patient_dates=[
            None,
            # Include date before period starts, which should be ignored
            [
                ("2001-01-01", 1),
                ("2002-01-01", 2),
                ("2002-02-01", 3),
                ("2002-06-01", 4),
            ],
            [("2001-06-01", 7)],
        ],
    )
    study = StudyDefinition(
        population=patients.all(),
        asthma_value=patients.with_these_clinical_events(
            codelist([condition_code], "ctv3"),
            between=["2001-12-01", "2002-06-01"],
            returning="numeric_value",
            find_first_match_in_period=True,
        ),
        asthma_value_date=patients.date_of("asthma_value", date_format="YYYY-MM"),
    )
    results = study.to_dicts()
    assert [x["asthma_value"] for x in results] == ["0.0", "2.0", "0.0"]
    assert [x["asthma_value_date"] for x in results] == ["", "2002-01", ""]
def test_simple_bmi(include_dates):
    session = make_session()

    weight_code = "X76C7"
    height_code = "XM01E"

    patient = Patient(DateOfBirth="1950-01-01")
    patient.CodedEvents.append(
        CodedEvent(CTV3Code=weight_code, NumericValue=50, ConsultationDate="2002-06-01")
    )
    patient.CodedEvents.append(
        CodedEvent(CTV3Code=height_code, NumericValue=10, ConsultationDate="2001-06-01")
    )
    session.add(patient)
    session.commit()

    if include_dates == "none":
        bmi_date = None
        date_query = None
    elif include_dates == "year":
        bmi_date = "2002"
        date_query = patients.date_of("BMI")
    elif include_dates == "month":
        bmi_date = "2002-06"
        date_query = patients.date_of("BMI", date_format="YYYY-MM")
    elif include_dates == "day":
        bmi_date = "2002-06-01"
        date_query = patients.date_of("BMI", date_format="YYYY-MM-DD")
    study = StudyDefinition(
        population=patients.all(),
        BMI=patients.most_recent_bmi(
            on_or_after="1995-01-01", on_or_before="2005-01-01"
        ),
        **dict(BMI_date_measured=date_query) if date_query else {}
    )
    results = study.to_dicts()
    assert [x["BMI"] for x in results] == ["0.5"]
    assert [x.get("BMI_date_measured") for x in results] == [bmi_date]
示例#11
0
def test_mean_recorded_value_dtype_generation():
    test_codelist = codelist(["X"], system="ctv3")
    study = StudyDefinition(
        population=patients.all(),
        bp_sys=patients.mean_recorded_value(
            test_codelist,
            on_most_recent_day_of_measurement=True,
            on_or_before="2020-02-01",
        ),
        bp_sys_date_measured=patients.date_of("bp_sys", date_format="YYYY-MM"),
    )
    result = _converters_to_names(study.pandas_csv_args)
    assert result == {
        "converters": {"bp_sys_date_measured": "add_day_to_date"},
        "dtype": {"bp_sys": "float"},
        "date_col_for": {"bp_sys": "bp_sys_date_measured"},
        "parse_dates": ["bp_sys_date_measured"],
    }
def test_no_bmi_when_measurement_after_reference_date():
    session = make_session()

    bmi_code = "22K.."

    patient = Patient(DateOfBirth="1900-01-01")
    patient.CodedEvents.append(
        CodedEvent(CTV3Code=bmi_code, NumericValue=99, ConsultationDate="2001-01-01")
    )
    session.add(patient)
    session.commit()

    study = StudyDefinition(
        population=patients.all(),
        BMI=patients.most_recent_bmi(
            on_or_after="1990-01-01", on_or_before="2000-01-01",
        ),
        BMI_date_measured=patients.date_of("BMI", date_format="YYYY-MM-DD"),
    )
    results = study.to_dicts()
    assert [x["BMI"] for x in results] == ["0.0"]
    assert [x["BMI_date_measured"] for x in results] == [""]
def test_mean_recorded_value():
    code = "2469."
    session = make_session()
    patient = Patient()
    values = [
        ("2020-02-10", 90),
        ("2020-02-10", 100),
        ("2020-02-10", 98),
        # This day is outside period and should be ignored
        ("2020-04-01", 110),
    ]
    for date, value in values:
        patient.CodedEvents.append(
            CodedEvent(CTV3Code=code, NumericValue=value, ConsultationDate=date)
        )
    patient_with_old_reading = Patient()
    patient_with_old_reading.CodedEvents.append(
        CodedEvent(CTV3Code=code, NumericValue=100, ConsultationDate="2010-01-01")
    )
    patient_with_no_reading = Patient()
    session.add_all([patient, patient_with_old_reading, patient_with_no_reading])
    session.commit()
    study = StudyDefinition(
        population=patients.all(),
        bp_systolic=patients.mean_recorded_value(
            codelist([code], system="ctv3"),
            on_most_recent_day_of_measurement=True,
            between=["2018-01-01", "2020-03-01"],
        ),
        bp_systolic_date_measured=patients.date_of(
            "bp_systolic", date_format="YYYY-MM-DD"
        ),
    )
    results = study.to_dicts()
    results = [(i["bp_systolic"], i["bp_systolic_date_measured"]) for i in results]
    assert results == [("96.0", "2020-02-10"), ("0.0", ""), ("0.0", "")]