Exemplo n.º 1
0
    def test_feature_conditions(self):
        cohort = pd.DataFrame({
            'person_id': [1, 2, 3, 4, 5],
            'index_date': [
                pd.to_datetime('2018-01-01'),
                pd.to_datetime('2018-01-01'),
                pd.to_datetime('2018-01-01'),
                pd.to_datetime('2018-01-01'),
                pd.to_datetime('2018-01-01')
            ]
        })
        features = {
            'non_time_bound': {
                'age_group': 0,
                'age_at_index': 0,
                'gender': 0
            },
            'time_bound': {
                'comorbid_condition': [0, 0, 0, 0],
                'drug': [0, 0, 0, 0],
                'condition': [1, 1, 1, 1],
                'procedure': [0, 0, 0, 0],
                'measurement': [0, 0, 0, 0],
                'measurement_value': [0, 0, 0, 0],
                'measurement_range_group': [0, 0, 0, 0],
                'visit_count': [0, 0, 0, 0]
            },
            'time_windows': {
                'inf': 5000,
                'long': 365,
                'med': 180,
                'short': 30,
                'minimum': 0.05
            }
        }

        expected = pd.DataFrame({
            'person_id': [1, 2, 3, 4, 5],
            'index_date': [pd.to_datetime('2018-01-01')] * 5,
            '44831230_inf': [1, 1, 0, 0, 0],
            '2_inf': [1, 1, 0, 0, 0],
            '3_inf': [1, 0, 0, 0, 0],
            '4_inf': [1, 0, 0, 0, 0],
            '44831230_long': [1, 1, 0, 0, 0],
            '2_long': [1, 1, 0, 0, 0],
            '3_long': [1, 0, 0, 0, 0],
            '4_long': [1, 0, 0, 0, 0],
            '44831230_med': [1, 1, 0, 0, 0],
            '2_med': [1, 1, 0, 0, 0],
            '3_med': [1, 0, 0, 0, 0],
            '4_med': [1, 0, 0, 0, 0],
            '44831230_short': [1, 1, 0, 0, 0],
            '2_short': [1, 1, 0, 0, 0],
            '3_short': [1, 0, 0, 0, 0],
            '4_short': [1, 0, 0, 0, 0],
        })

        output = FeaturesSelection(cohort, features, omop_tables)()
        pd.testing.assert_frame_equal(output, expected)
Exemplo n.º 2
0
def test_time_at_risk():
    cohort = pd.DataFrame({
        'cohort_definition_id': [1, 1, 1, 1, 1, 2, 2, 2],
        'person_id': [1, 2, 3, 4, 5, 1, 2, 3],
        'index_date': [
            pd.to_datetime('2018-01-01'),
            pd.to_datetime('2018-01-01'),
            pd.to_datetime('2018-01-01'),
            pd.to_datetime('2018-01-01'),
            pd.to_datetime('2018-01-01'),
            pd.to_datetime('2019-01-01'),
            pd.to_datetime('2018-05-01'),
            pd.to_datetime('2018-03-01'),
        ]
    })
    features = {
        'non_time_bound': {
            'age_group': 0,
            'age_at_index': 1,
            'gender': 1
        },
        'time_bound': {
            'comorbid_condition': [0, 0, 0, 0],
            'drug': [0, 0, 0, 0],
            'condition': [0, 0, 0, 0],
            'procedure': [0, 0, 0, 0],
            'measurement': [0, 0, 0, 0],
            'measurement_value': [0, 0, 0, 0],
            'measurement_range_group': [0, 0, 0, 0],
            'visit_count': [0, 0, 0, 0]
        },
        'time_windows': {
            'inf': 5000,
            'long': 365,
            'med': 180,
            'short': 30,
            'minimum': 0.05
        }
    }
    cohort_at_risk = cohort[cohort['cohort_definition_id'] == 1]
    del cohort_at_risk['cohort_definition_id']
    cohort_target = cohort[cohort['cohort_definition_id'] == 2]
    del cohort_target['cohort_definition_id']
    X = FeaturesSelection(cohort_at_risk, features, omop_tables)()
    output = time_at_risk(X, cohort_at_risk, cohort_target, time_at_risk=200)
    expected = pd.DataFrame({
        'age_at_index': [28, 18, 8, 48, 58],
        'gender = female': [1, 0, 1, 0, 0],
        'target': [0, 1, 1, 0, 0]
    })
    pd.testing.assert_frame_equal(output, expected)
Exemplo n.º 3
0
    def test_feature_comorbidities(self):
        cohort = pd.DataFrame({
            'person_id': [1, 2, 3, 4, 5],
            'index_date': [
                pd.to_datetime('2018-01-01'),
                pd.to_datetime('2018-01-01'),
                pd.to_datetime('2018-01-01'),
                pd.to_datetime('2018-01-01'),
                pd.to_datetime('2018-01-01')
            ]
        })
        features = {
            'non_time_bound': {
                'age_group': 0,
                'age_at_index': 0,
                'gender': 0
            },
            'time_bound': {
                'comorbid_condition': [0, 0, 1, 0],
                'drug': [0, 0, 0, 0],
                'condition': [0, 0, 0, 0],
                'procedure': [0, 0, 0, 0],
                'measurement': [0, 0, 0, 0],
                'measurement_value': [0, 0, 0, 0],
                'measurement_range_group': [0, 0, 0, 0],
                'visit_count': [0, 0, 0, 0]
            },
            'time_windows': {
                'inf': 5000,
                'long': 365,
                'med': 180,
                'short': 30,
                'minimum': 0.05
            }
        }

        expected = pd.DataFrame({
            'person_id': [1, 2, 3, 4, 5],
            'index_date': [pd.to_datetime('2018-01-01')] * 5,
            'congestive_heart_failure_med': [0, 0, 1, 0, 0],
            'valvular_disease_med': [0, 0, 0, 1, 0],
            'hypertension,complicated_med': [0, 0, 1, 0, 0]
        })
        condition_occurrence = pd.DataFrame({
            'person_id': [1, 1, 1, 1, 2, 2, 3, 4],
            'condition_concept_id': [1, 2, 3, 4, 1, 2, 44831230, 44836801],
            'condition_start_datetime': [
                pd.to_datetime('2017-12-10'),
                pd.to_datetime('2017-12-10'),
                pd.to_datetime('2017-12-10'),
                pd.to_datetime('2017-12-10'),
                pd.to_datetime('2017-12-10'),
                pd.to_datetime('2017-12-10'),
                pd.to_datetime('2017-12-10'),
                pd.to_datetime('2017-12-10')
            ]
        })
        condition_occurrence = dd.from_pandas(condition_occurrence,
                                              npartitions=1)
        condition_occurrence = condition_occurrence.set_index('person_id')
        omop_tables = {
            'person': person,
            'condition_occurrence': condition_occurrence,
            'procedure_occurrence': procedure,
            'drug_exposure': drug_exposure,
            'visit_occurrence': visit_occurrence,
            'observation_period': observation_period,
            'measurement': measurement
        }
        output = FeaturesSelection(cohort, features, omop_tables)()
        pd.testing.assert_frame_equal(output, expected)