def test_make_df_from_expectations_with_categories_expression():
    study = StudyDefinition(
        population=patients.all(),
        category=patients.categorised_as(
            {
                "A": "sex = 'F'",
                "B": "sex = 'M'",
                "": "DEFAULT"
            },
            sex=patients.sex(),
            return_expectations={
                "rate": "exponential_increase",
                "incidence": 0.2,
                "category": {
                    "ratios": {
                        "A": 0.3,
                        "B": 0.7
                    }
                },
                "date": {
                    "earliest": "1900-01-01",
                    "latest": "today"
                },
            },
        ),
    )
    population_size = 10000
    result = study.make_df_from_expectations(population_size)
    value_counts = result.category.value_counts()
    assert value_counts["A"] < value_counts["B"]
def test_make_df_from_expectations_with_categories_expression_validation():
    study = StudyDefinition(
        population=patients.all(),
        category=patients.categorised_as(
            {
                "A": "sex = 'F'",
                "B": "sex = 'M'",
                "": "DEFAULT"
            },
            sex=patients.sex(),
            return_expectations={
                "rate": "exponential_increase",
                "incidence": 0.2,
                "category": {
                    "ratios": {
                        "A": 0.3,
                        "B": 0.6,
                        "C": 0.1
                    }
                },
                "date": {
                    "earliest": "1900-01-01",
                    "latest": "today"
                },
            },
        ),
    )
    population_size = 10000
    with pytest.raises(ValueError):
        study.make_df_from_expectations(population_size)
def test_make_df_from_expectations_with_using_dates_as_categories():
    study = StudyDefinition(
        default_expectations={
            "date": {
                "earliest": "1900-01-01",
                "latest": "today"
            },
            "rate": "exponential_increase",
            "incidence": 0.2,
        },
        population=patients.all(),
        eligible_date=patients.categorised_as(
            {
                "2020-04-14": "age >= 80",
                "2020-06-16": "age >= 70 AND age < 80",
                "2020-08-18": "DEFAULT",
            },
            age=patients.age_as_of("2020-01-01"),
            return_expectations={
                "category": {
                    "ratios": {
                        "2020-04-14": 0.25,
                        "2020-06-16": 0.25,
                        "2020-08-18": 0.5,
                    }
                },
                "incidence": 1,
            },
        ),
    )
    population_size = 100
    result = study.make_df_from_expectations(population_size)
    assert set(result.eligible_date) == set(
        ["2020-08-18", "2020-06-16", "2020-04-14"])
 smoking_status=patients.categorised_as(
     {
         "S": "most_recent_smoking_code = 'S'",
         "E": """
              most_recent_smoking_code = 'E' OR (
                most_recent_smoking_code = 'N' AND ever_smoked
              )
         """,
         "N": "most_recent_smoking_code = 'N' AND NOT ever_smoked",
         "M": "DEFAULT",
     },
     return_expectations={
         "category": {
             "ratios": {
                 "S": 0.6,
                 "E": 0.1,
                 "N": 0.2,
                 "M": 0.1
             }
         }
     },
     most_recent_smoking_code=patients.with_these_clinical_events(
         clear_smoking_codes,
         find_last_match_in_period=True,
         on_or_before="2019-02-01",
         returning="category",
     ),
     ever_smoked=patients.with_these_clinical_events(
         filter_codes_by_category(clear_smoking_codes, include=["S", "E"]),
         on_or_before="2019-02-01",
     ),
 ),
 age_band=patients.categorised_as(
     {
         "0-4": "age >= 0 AND age < 5",
         "5-9": "age >= 5 AND age < 10",
         "10-14": "age >= 10 AND age < 15",
         "15-19": "age >= 15 AND age < 20",
         "20-24": "age >= 20 AND age < 25",
         "25-29": "age >= 25 AND age < 30",
         "30-34": "age >= 30 AND age < 35",
         "35-39": "age >= 35 AND age < 40",
         "40-44": "age >= 40 AND age < 45",
         "45-49": "age >= 45 AND age < 50",
         "50-54": "age >= 50 AND age < 55",
         "55-59": "age >= 55 AND age < 60",
         "60-64": "age >= 60 AND age < 65",
         "65-69": "age >= 65 AND age < 70",
         "70-74": "age >= 70 AND age < 75",
         "75-79": "age >= 75 AND age < 80",
         "80-84": "age >= 80 AND age < 85",
         "85-89": "age >= 85 AND age < 90",
         "90plus": "age >= 90",
         "missing": "DEFAULT",
     },
     return_expectations={
         "rate": "universal",
         "category": {
             "ratios": {
                 "0-4": 0.05,
                 "5-9": 0.05,
                 "10-14": 0.05,
                 "15-19": 0.05,
                 "20-24": 0.05,
                 "25-29": 0.05,
                 "30-34": 0.05,
                 "35-39": 0.05,
                 "40-44": 0.05,
                 "45-49": 0.1,
                 "50-54": 0.05,
                 "55-59": 0.05,
                 "60-64": 0.05,
                 "65-69": 0.05,
                 "70-74": 0.05,
                 "75-79": 0.05,
                 "80-84": 0.05,
                 "85-89": 0.05,
                 "90plus": 0.05,
                 "missing": 0,
             }
         },
     }),
Exemplo n.º 6
0
 age_group=patients.categorised_as(
     {
         "0": "DEFAULT",
         "0 - under 16": """ age < 16""",
         "16 - under 40": """ age >= 16 AND age < 40""",
         "40 - under 50": """ age >= 40 AND age < 50""",
         "50 - under 55": """ age >= 50 AND age < 55""",
         "55 - under 60": """ age >= 55 AND age < 60""",
         "60 - under 65": """ age >= 60 AND age < 65""",
         "65 - under 70": """ age >= 65 AND age < 70""",
         "70 - under 75": """ age >= 70 AND age < 75""",
         "75 - under 80": """ age >= 75 AND age < 80""",
         "80 - under 85": """ age >= 80 AND age < 85""",
         "85 plus": """ age >=  85""",
     },
     return_expectations={
         "rate": "universal",
         "category": {
             "ratios": {
                 "0 - under 16": 0.05,
                 "16 - under 40": 0.1,
                 "40 - under 50": 0.1,
                 "50 - under 55": 0.1,
                 "55 - under 60": 0.1,
                 "60 - under 65": 0.1,
                 "65 - under 70": 0.1,
                 "70 - under 75": 0.1,
                 "75 - under 80": 0.1,
                 "80 - under 85": 0.1,
                 "85 plus": 0.05,
             }
         },
     },
 ),
Exemplo n.º 7
0
 imd=patients.categorised_as(
     {
         "0": "DEFAULT",
         "1":
         """index_of_multiple_deprivation >=1 AND index_of_multiple_deprivation < 32844*1/5""",
         "2":
         """index_of_multiple_deprivation >= 32844*1/5 AND index_of_multiple_deprivation < 32844*2/5""",
         "3":
         """index_of_multiple_deprivation >= 32844*2/5 AND index_of_multiple_deprivation < 32844*3/5""",
         "4":
         """index_of_multiple_deprivation >= 32844*3/5 AND index_of_multiple_deprivation < 32844*4/5""",
         "5": """index_of_multiple_deprivation >= 32844*4/5 """,
     },
     index_of_multiple_deprivation=patients.address_as_of(
         "index_date",
         returning="index_of_multiple_deprivation",
         round_to_nearest=100,
     ),
     return_expectations={
         "rate": "universal",
         "category": {
             "ratios": {
                 "0": 0.01,
                 "1": 0.20,
                 "2": 0.20,
                 "3": 0.20,
                 "4": 0.20,
                 "5": 0.19,
             }
         },
     }),
Exemplo n.º 8
0
        },
    ),
    death_category=patients.categorised_as(
        {
            "covid-death": "died_covid",
            "non-covid-death": "(NOT died_covid) AND died_any",
            "alive": "DEFAULT"
        },
        died_covid=patients.with_these_codes_on_death_certificate(
            codes_ICD10_covid,
            returning="binary_flag",
            match_only_underlying_cause=False,
            between=[index_date, end_date],
        ),
        died_any=patients.died_from_any_cause(
            between=[index_date, end_date],
            returning="binary_flag",
        ),
        return_expectations={
            "category": {
                "ratios": {
                    "alive": 0.8,
                    "covid-death": 0.1,
                    "non-covid-death": 0.1
                }
            },
            "incidence": 1
        },
    ),
)
Exemplo n.º 9
0
                    "PS": 0.05,
                    "": 0.85,
                },
            },
        },
    ),

    # simple care home flag
    care_home=patients.categorised_as(
        {
            1: """care_home_type""",
            0: "DEFAULT",
        },
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    1: 0.15,
                    0: 0.85,
                },
            },
        },
    ),
    age=patients.age_as_of(
        "2021-03-31",  # PHE defined date for calulating eligibilty across all vaccination campaigns
        return_expectations={
            "rate": "universal",
            "int": {
                "distribution": "population_ages"
            },
        },
 covid_hospitalisation=patients.categorised_as(
     {
         "COVID-19 positive": "covid_positive AND NOT covid_hospitalised",
         "COVID-19 hospitalised": "covid_hospitalised",
         "General population": "DEFAULT",
     },
     return_expectations={
         "incidence": 1,
         "category": {
             "ratios": {
                 "COVID-19 positive": 0.1,
                 "COVID-19 hospitalised": 0.1,
                 "General population": 0.8,
             }
         },
     },
     covid_positive=patients.with_test_result_in_sgss(
         pathogen="SARS-CoV-2",
         test_result="positive",
         between=["2020-01-01", "last_day_of_month(index_date)"],
         date_format="YYYY-MM-DD",
         return_expectations={"date": {
             "earliest": "index_date"
         }},
     ),
     covid_hospitalised=patients.admitted_to_hospital(
         with_these_diagnoses=covid_codelist,
         between=["2020-01-01", "last_day_of_month(index_date)"],
         return_expectations={"incidence": 0.20},
     ),
 ),
Exemplo n.º 11
0
covariate_definitions = study.covariate_definitions

fixtures_path = Path(__file__).parent / "fixtures" / "dummy-data"


# Create a second test study to which we can add columns without needing to rebuild all
# the test fixtures
study_2 = StudyDefinition(
    **column_definitions,
    category_date=patients.categorised_as(
        {
            "2020-10-15": "age > 50",
            "2021-11-16": "DEFAULT",
        },
        return_expectations={
            "category": {
                "ratios": {
                    "2020-10-15": 0.5,
                    "2021-11-16": 0.5,
                }
            },
        },
    ),
)
covariate_definitions_2 = study_2.covariate_definitions


@pytest.mark.parametrize("file_format", SUPPORTED_FILE_FORMATS)
def test_validate_dummy_data_valid(file_format, tmpdir):
    rows = zip(
        ["patient_id", "11", "22"],
        ["sex", "F", "M"],
                        return_expectations={
                            "rate": "universal",
                            "int": {
                                "distribution": "population_ages"
                            }
                        }),
 age_group=patients.categorised_as(
     {
         "0": "DEFAULT",
         "16 - under 40": """ age >= 16 AND age < 40""",
         "40 - under 50": """ age >= 40 AND age < 50""",
         "50 - under 65": """ age >= 50 AND age < 65""",
         "65 plus": """ age >=  65""",
     },
     return_expectations={
         "rate": "universal",
         "category": {
             "ratios": {
                 "16 - under 40": 0.25,
                 "40 - under 50": 0.25,
                 "50 - under 65": 0.25,
                 "65 plus": 0.25,
             }
         },
     },
 ),
 stp=patients.registered_practice_as_of(
     "index_date",
     returning="stp_code",
     return_expectations={
         "category": {
             "ratios": {
Exemplo n.º 13
0
 smoking_status=patients.categorised_as(
     {
         "S": "most_recent_smoking_code = 'S'",
         "E": """
              most_recent_smoking_code = 'E' OR (
                most_recent_smoking_code = 'N' AND ever_smoked
              )
         """,
         "N": "most_recent_smoking_code = 'N' AND NOT ever_smoked",
         "M": "DEFAULT",
     },
     return_expectations={
         "category": {
             "ratios": {
                 "S": 0.6,
                 "E": 0.1,
                 "N": 0.2,
                 "M": 0.1
             }
         }
     },
     most_recent_smoking_code=patients.with_these_clinical_events(
         clear_smoking_codes,
         find_last_match_in_period=True,
         on_or_before="sgss_pos_inrange",
         returning="category",
     ),
     ever_smoked=patients.with_these_clinical_events(
         filter_codes_by_category(clear_smoking_codes, include=["S", "E"]),
         on_or_before="sgss_pos_inrange",
     ),
 ),
 smoking_status=patients.categorised_as(
     {
         "S": "most_recent_smoking_code = 'S'",
         "E": """
                  most_recent_smoking_code = 'E' OR (    
                    most_recent_smoking_code = 'N' AND ever_smoked   
                  )  
             """,
         "N": "most_recent_smoking_code = 'N' AND NOT ever_smoked",
         "M": "DEFAULT",
     },
     return_expectations={
         "category": {
             "ratios": {
                 "S": 0.6,
                 "E": 0.1,
                 "N": 0.2,
                 "M": 0.1
             }
         }
     },
     most_recent_smoking_code=patients.with_these_clinical_events(
         clear_smoking_codes,
         find_last_match_in_period=True,
         on_or_before="2020-02-29",
         returning="category",
     ),
     ever_smoked=patients.with_these_clinical_events(
         filter_codes_by_category(clear_smoking_codes, include=["S", "E"]),
         on_or_before="2020-02-29",
     ),
 ),
Exemplo n.º 15
0
    # age 
    age=patients.age_as_of(
        "index_date",
        return_expectations={
            "rate": "universal",
            "int": {"distribution": "population_ages"},
        },
    ),
    # age band 
    ageband_narrow = patients.categorised_as(
        {   
            "0": "DEFAULT",
            "65-74": """ age >=  65 AND age < 75""",
            "75-79": """ age >=  75 AND age < 80""",
            "80-84": """ age >=  80 AND age < 85""",
            "85-89": """ age >=  85 AND age < 120""",
        },
        return_expectations={
            "rate":"universal",
            "category": {"ratios": {"65-74": 0.4, "75-79": 0.2, "80-84":0.2, "85-89":0.2 }}
        },
    ),

    # SELECTED DEMOGRAPHIC CHARACTERISTICS TO DESCRIBE 
    # sex 
    sex=patients.sex(
        return_expectations={
            "rate": "universal",
            "category": {"ratios": {"M": 0.49, "F": 0.51}},
        }
    ),
Exemplo n.º 16
0
 age=patients.age_as_of(index_date,
                        return_expectations={
                            "rate": "universal",
                            "int": {
                                "distribution": "population_ages"
                            }
                        }),
 age_group=patients.categorised_as(
     {
         "0": "DEFAULT",
         "16 - under 50": " age >= 16 AND age < 50",
         "50 - under 65": " age >= 50 AND age < 65",
         "65 plus": " age >= 65",
     },
     return_expectations={
         "rate": "universal",
         "category": {
             "ratios": {
                 "16 - under 50": 0.5,
                 "50 - under 65": 0.25,
                 "65 plus": 0.25,
             }
         },
     },
 ),
 stp=patients.registered_practice_as_of(
     "index_date",
     returning="stp_code",
     return_expectations={
         "category": {
             "ratios": {
                 "STP1": 0.5,
Exemplo n.º 17
0
         "earliest": from_date
     }},
 ),
 died_ons_noncovid=patients.satisfying(
     """(NOT died_ons_covid) AND died_ons""",
     return_expectations={"incidence": 0.15},
 ),
 death_category=patients.categorised_as(
     {
         "alive": "NOT died_ons",
         "covid-death": "died_ons_covid",
         "non-covid-death": "died_ons_noncovid",
         "unknown": "DEFAULT",
     },
     return_expectations={
         "category": {
             "ratios": {
                 "alive": 0.8,
                 "covid-death": 0.1,
                 "non-covid-death": 0.1
             }
         }
     },
 ),
 date_died_ons=patients.died_from_any_cause(
     returning="date_of_death",
     on_or_after=from_date,
     date_format="YYYY-MM-DD",
     return_expectations={"date": {
         "earliest": from_date
     }},
Exemplo n.º 18
0
                            "rate": "universal",
                            "int": {
                                "distribution": "population_ages"
                            }
                        }),
 age_group=patients.categorised_as(
     {
         "0": "DEFAULT",
         "16 - under 40": """ age >= 16 AND age < 40""",
         "40 - under 50": """ age >= 40 AND age < 50""",
         "50 - under 65": """ age >= 50 AND age < 65""",
         "65 - under 75": """ age >= 65 AND age < 75""",
         "75 plus": """ age >=  75""",
     },
     return_expectations={
         "rate": "universal",
         "category": {
             "ratios": {
                 "16 - under 40": 0.25,
                 "40 - under 50": 0.15,
                 "50 - under 65": 0.10,
                 "65 - under 75": 0.25,
                 "75 plus": 0.25,
             }
         },
     },
 ),
 sex=patients.sex(
     return_expectations={
         "rate": "universal",
         "category": {
             "ratios": {
                    "4": 0.2,
                    "5": 0.2
                }
            },
            "incidence": 0.4,
        },
    ),
    ethnicity=patients.categorised_as(
        {
            "0": "DEFAULT",
            "1": "eth='1' OR (NOT eth AND ethnicity_sus='1')",
            "2": "eth='2' OR (NOT eth AND ethnicity_sus='2')",
            "3": "eth='3' OR (NOT eth AND ethnicity_sus='3')",
            "4": "eth='4' OR (NOT eth AND ethnicity_sus='4')",
            "5": "eth='5' OR (NOT eth AND ethnicity_sus='5')",
        },
        return_expectations={
            "category": {
                "ratios": {
                    "1": 0.2,
                    "2": 0.2,
                    "3": 0.2,
                    "4": 0.2,
                    "5": 0.2
                }
            },
            "incidence": 0.4,
        },
    ),
)