示例#1
0
def test_column_name_clashes_produce_errors():
    with pytest.raises(ValueError):
        StudyDefinition(
            population=patients.all(),
            age=patients.age_as_of("2020-01-01"),
            status=patients.satisfying(
                "age > 70 AND sex = 'M'",
                sex=patients.sex(),
                age=patients.age_as_of("2010-01-01"),
            ),
        )
def test_make_df_from_expectations_with_using_dates_as_categories():
    study = StudyDefinition(
        default_expectations={
            "date": {
                "earliest": "1900-01-01",
                "latest": "today"
            },
            "rate": "exponential_increase",
            "incidence": 0.2,
        },
        population=patients.all(),
        eligible_date=patients.categorised_as(
            {
                "2020-04-14": "age >= 80",
                "2020-06-16": "age >= 70 AND age < 80",
                "2020-08-18": "DEFAULT",
            },
            age=patients.age_as_of("2020-01-01"),
            return_expectations={
                "category": {
                    "ratios": {
                        "2020-04-14": 0.25,
                        "2020-06-16": 0.25,
                        "2020-08-18": 0.5,
                    }
                },
                "incidence": 1,
            },
        ),
    )
    population_size = 100
    result = study.make_df_from_expectations(population_size)
    assert set(result.eligible_date) == set(
        ["2020-08-18", "2020-06-16", "2020-04-14"])
示例#3
0
def test_unrecognised_database_url_raises_error(monkeypatch):
    monkeypatch.setenv("DATABASE_URL", "unknown-db://localhost")
    with pytest.raises(ValueError):
        StudyDefinition(
            population=patients.all(),
            sex=patients.sex(),
            age=patients.age_as_of("2020-01-01", ),
        )
示例#4
0
def test_errors_are_triggered_without_database_url(monkeypatch):
    monkeypatch.delenv("DATABASE_URL", raising=False)
    with pytest.raises(KeyError):
        StudyDefinition(
            population=patients.satisfying(
                "no_such_column AND missing_column"),
            sex=patients.sex(),
            age=patients.age_as_of("2020-01-01", ),
        )
示例#5
0
def test_export_data_without_database_url_raises_error(tmp_path, monkeypatch):
    monkeypatch.delenv("DATABASE_URL", raising=False)
    study = StudyDefinition(
        population=patients.all(),
        sex=patients.sex(),
        age=patients.age_as_of("2020-01-01", ),
    )
    with pytest.raises(RuntimeError):
        study.to_file(tmp_path / "dummy_data.csv")
示例#6
0
def test_syntax_errors_in_expressions_are_raised():
    with pytest.raises(ValueError):
        StudyDefinition(
            population=patients.all(),
            status=patients.satisfying(
                "age > 70 AND AND sex = 'M'",
                sex=patients.sex(),
                age=patients.age_as_of("2010-01-01"),
            ),
        )
示例#7
0
def test_age_dtype_generation():
    study = StudyDefinition(
        # This line defines the study population
        population=patients.all(),
        age=patients.age_as_of("2020-02-01"),
    )
    result = _converters_to_names(study.pandas_csv_args)
    assert result == {
        "dtype": {"age": "Int64"},
        "parse_dates": [],
        "date_col_for": {},
        "converters": {},
    }
示例#8
0
def test_create_dummy_data_works_without_database_url(tmp_path, monkeypatch):
    monkeypatch.delenv("DATABASE_URL", raising=False)
    study = StudyDefinition(
        population=patients.all(),
        sex=patients.sex(
            return_expectations={
                "rate": "universal",
                "date": {
                    "earliest": "1900-01-01",
                    "latest": "today"
                },
                "category": {
                    "ratios": {
                        "M": 0.49,
                        "F": 0.51
                    }
                },
            }),
        age=patients.age_as_of(
            "2020-01-01",
            return_expectations={
                "rate": "universal",
                "date": {
                    "earliest": "1900-01-01",
                    "latest": "2020-01-01"
                },
                "int": {
                    "distribution": "population_ages"
                },
            },
        ),
    )
    filename = tmp_path / "dummy_data.csv"
    study.to_file(filename, expectations_population=10)
    with open(filename) as f:
        results = list(csv.DictReader(f))
    assert len(results) == 10
    columns = results[0].keys()
    assert "sex" in columns
    assert "age" in columns
示例#9
0
     (sex = 'F' OR sex = 'M') AND
     (age >= 18 AND age < 120) AND
     (NOT died) AND
     (registered)
     """,
     registered=patients.registered_as_of(index_date),
     died=patients.died_from_any_cause(
         on_or_before=index_date,
         returning="binary_flag",
     ),
 ),
 age=patients.age_as_of(
     index_date,
     return_expectations={
         "int": {
             "distribution": "population_ages"
         },
         "incidence": 1
     },
 ),
 sex=patients.sex(return_expectations={
     "category": {
         "ratios": {
             "M": 0.49,
             "F": 0.51
         }
     },
     "incidence": 1
 }),
 date_death=patients.died_from_any_cause(
     between=[index_date, end_date],
示例#10
0
def test_to_file_with_expectations_population(tmp_path, file_format):
    cl = codelist([("12345", "foo"), ("67890", "bar")], system="snomed")
    study = StudyDefinition(
        default_expectations={
            "date": {
                "earliest": "2020-01-01",
                "latest": "today"
            }
        },
        population=patients.all(),
        sex=patients.sex(return_expectations={
            "category": {
                "ratios": {
                    "F": 0.5,
                    "M": 0.5
                }
            },
            "rate": "universal",
        }, ),
        age=patients.age_as_of(
            "2020-01-01",
            return_expectations={
                "int": {
                    "distribution": "population_ages"
                },
                "rate": "universal",
            },
        ),
        has_event=patients.with_these_clinical_events(
            cl,
            returning="binary_flag",
            return_expectations={
                "rate": "uniform",
                "incidence": 0.5
            },
        ),
        event_date_day=patients.with_these_clinical_events(
            cl,
            returning="date",
            date_format="YYYY-MM-DD",
            return_expectations={
                "rate": "uniform",
                "incidence": 0.5
            },
        ),
        event_date_month=patients.with_these_clinical_events(
            cl,
            returning="date",
            date_format="YYYY-MM",
            return_expectations={
                "rate": "uniform",
                "incidence": 0.5
            },
        ),
        event_date_year=patients.with_these_clinical_events(
            cl,
            returning="date",
            date_format="YYYY",
            return_expectations={
                "rate": "uniform",
                "incidence": 0.5
            },
        ),
        incomplete_categories=patients.with_these_clinical_events(
            cl,
            returning="category",
            return_expectations={
                "category": {
                    "ratios": {
                        "foo": 0.5,
                        "bar": 0.5
                    }
                },
                # Half the values here should be null
                "incidence": 0.5,
            },
        ),
    )

    dummy_data_file = tmp_path / f"dummy-data.{file_format}"
    study.to_file(dummy_data_file, expectations_population=100)
    # We reuse validate_dummy_data to check that the data generated by the expectations
    # framework is valid.
    validate_dummy_data(study.covariate_definitions, dummy_data_file)
示例#11
0
def test_to_file_with_dummy_data_file(tmp_path, file_format):
    cl = codelist(["12345"], system="snomed")
    study = StudyDefinition(
        default_expectations={
            "date": {
                "earliest": "2020-01-01",
                "latest": "today"
            }
        },
        population=patients.all(),
        sex=patients.sex(return_expectations={
            "category": {
                "ratios": {
                    "F": 0.5,
                    "M": 0.5
                }
            },
            "rate": "universal",
        }, ),
        age=patients.age_as_of(
            "2020-01-01",
            return_expectations={
                "int": {
                    "distribution": "population_ages"
                },
                "rate": "universal",
            },
        ),
        has_event=patients.with_these_clinical_events(
            cl,
            returning="binary_flag",
            return_expectations={
                "rate": "uniform",
                "incidence": 0.5
            },
        ),
        event_date_day=patients.with_these_clinical_events(
            cl,
            returning="date",
            date_format="YYYY-MM-DD",
            return_expectations={
                "rate": "uniform",
                "incidence": 0.5
            },
        ),
        event_date_month=patients.with_these_clinical_events(
            cl,
            returning="date",
            date_format="YYYY-MM",
            return_expectations={
                "rate": "uniform",
                "incidence": 0.5
            },
        ),
        event_date_year=patients.with_these_clinical_events(
            cl,
            returning="date",
            date_format="YYYY",
            return_expectations={
                "rate": "uniform",
                "incidence": 0.5
            },
        ),
    )

    # Generate dummy data using the expectations framework
    dummy_data_file = tmp_path / f"dummy-data.{file_format}"
    study.to_file(dummy_data_file, expectations_population=10)

    # Use this dummy data
    output_file = tmp_path / f"output.{file_format}"
    study.to_file(output_file, dummy_data_file=dummy_data_file)

    # Check results
    with open(dummy_data_file, "rb") as f:
        dummy_data = f.read()

    with open(output_file, "rb") as f:
        expected_output = f.read()

    assert dummy_data == expected_output
示例#12
0
                    "40-44": 0.05,
                    "45-49": 0.1,
                    "50-54": 0.05,
                    "55-59": 0.05,
                    "60-64": 0.05,
                    "65-69": 0.05,
                    "70-74": 0.05,
                    "75-79": 0.05,
                    "80-84": 0.05,
                    "85-89": 0.05,
                    "90plus": 0.03,
                    "missing": 0.02,
                }
            },
        },
        age=patients.age_as_of("index_date", ),
    ),

    # patients admitted to hospital with primary diagnoses included in cvd codelist
    # filters out maternity-related admissions and transfers from other providers
    cvd_emergency_elective=patients.admitted_to_hospital(
        with_these_primary_diagnoses=cvd_codelist,
        with_admission_method=[
            "11", "12", "13", "21", "22", "23", "24", "25", "2A", "2B", "2C",
            "2D", "28"
        ],
        between=["index_date", "index_date + 6 days"],
        return_expectations={"incidence": 0.1},
    ),
    cvd_admission_method=patients.admitted_to_hospital(
        with_these_primary_diagnoses=cvd_codelist,
示例#13
0
        "rate": "uniform",
        "incidence": 0.5,
    },
    population=patients.registered_with_one_practice_between(
        "2019-02-01", "2020-02-01"),

    # Set index date to start date
    index_date="2020-02-01",

    ## DEMOGRAPHIC INFORMATION
    ### Age
    age=patients.age_as_of(
        "2020-03-31",
        return_expectations={
            "rate": "universal",
            "int": {
                "distribution": "population_ages"
            },
            "incidence": 0.001
        },
    ),

    ### Sex
    sex=patients.sex(return_expectations={
        "rate": "universal",
        "category": {
            "ratios": {
                "M": 0.49,
                "F": 0.51
            }
        },
    }),
示例#14
0
     },
     return_expectations={
         "rate": "universal",
         "category": {
             "ratios": {
                 1: 0.15,
                 0: 0.85,
             },
         },
     },
 ),
 age=patients.age_as_of(
     "2021-03-31",  # PHE defined date for calulating eligibilty across all vaccination campaigns
     return_expectations={
         "rate": "universal",
         "int": {
             "distribution": "population_ages"
         },
     },
 ),
 ageband=patients.categorised_as(
     {
         "0": "DEFAULT",
         # consider doing an under 16 age band as well to differentiate between workers and children eligble for another reason
         "0-19": """ age >= 0 AND age < 20""",
         "20-29": """ age >= 20 AND age < 30""",
         "30-39": """ age >= 30 AND age < 40""",
         "40-49": """ age >= 40 AND age < 50""",
         "50-59": """ age >= 50 AND age < 60""",
         "60-69": """ age >= 60 AND age < 70""",
         "70-79": """ age >= 70 AND age < 80""",
示例#15
0
        date_format="YYYY-MM-DD",
        return_expectations={
            "date": {
                "earliest": "2020-11-16"
            },
            "incidence": 0.2
        },
    ),

    ### DEMOGRAPHIC COVARIATES
    # AGE
    age=patients.age_as_of(
        "sgss_pos_inrange",
        return_expectations={
            "rate": "universal",
            "int": {
                "distribution": "population_ages"
            },
        },
    ),

    # SEX
    sex=patients.sex(return_expectations={
        "rate": "universal",
        "category": {
            "ratios": {
                "M": 0.49,
                "F": 0.51
            }
        },
    }),
示例#16
0
from cohortextractor import StudyDefinition, patients, codelist, codelist_from_csv


study = StudyDefinition(
    default_expectations={
        "date": {"earliest": "1900-01-01", "latest": "today"},
        "rate": "uniform",
        "incidence": 0.5,
    },
    population=patients.registered_with_one_practice_between(
        "2019-02-01", "2020-02-01"
    ),
 age=patients.age_as_of(
        "2019-09-01",
        return_expectations={
            "rate": "universal",
            "int": {"distribution": "population_ages"},
        },
    ),

)
 population=patients.satisfying(
     "(NOT died) AND (registered) AND (pregnant) AND age >= 16",
     died=patients.died_from_any_cause(on_or_before=index_date,
                                       returning="binary_flag"),
     registered=patients.registered_as_of(index_date),
     pregnant=patients.with_these_clinical_events(
         pregnant_code,
         between=["index_date", "index_date + 1 month"],
         returning="binary_flag",
         return_expectations={"incidence": 0.6},
     ),
 ),
 age=patients.age_as_of(index_date,
                        return_expectations={
                            "rate": "universal",
                            "int": {
                                "distribution": "population_ages"
                            }
                        }),
 clinical_riskgroup=patients.with_these_clinical_events(
     clinical_riskgroup_codes,
     between=["index_date", "index_date + 1 month"],
     returning="binary_flag",
     #return_expectations= { "incidence": 0.6 },),
     return_expectations={
         "category": {
             "ratios": {
                 "0": 0.5,
                 "1": 0.5
             }
         },
        "rate": "uniform",
        "incidence": 1
    },
    index_date=index_date,

    # This line defines the study population
    population=patients.satisfying(
        """
        (age >= 18 AND age < 120) AND
        (NOT died) AND
        (registered)
        """,
        died=patients.died_from_any_cause(on_or_before=index_date,
                                          returning="binary_flag"),
        registered=patients.registered_as_of(index_date),
        age=patients.age_as_of(index_date),
    ),

    ### geographic/administrative groups
    practice=patients.registered_practice_as_of(
        index_date,
        returning="pseudo_id",
        return_expectations={
            "int": {
                "distribution": "normal",
                "mean": 100,
                "stddev": 20
            }
        },
    ),
    stp=patients.registered_practice_as_of(