Exemplo n.º 1
0
class TestLoadData:
    fips_claims_data = load_claims_data(DATA_FILEPATH, DROP_DATE, "fips")
    hrr_claims_data = load_claims_data(DATA_FILEPATH, DROP_DATE, "hrr")
    fips_data = load_data(DATA_FILEPATH, DROP_DATE, "fips")
    hrr_data = load_data(DATA_FILEPATH, DROP_DATE, "hrr")

    def test_base_unit(self):
        with pytest.raises(AssertionError):
            load_claims_data(DATA_FILEPATH, DROP_DATE, "foo")

        with pytest.raises(AssertionError):
            load_data(DATA_FILEPATH, DROP_DATE, "foo")

    def test_claims_columns(self):
        assert "hrr" in self.hrr_claims_data.index.names
        assert "fips" in self.fips_claims_data.index.names
        assert "date" in self.hrr_claims_data.index.names
        assert "date" in self.fips_claims_data.index.names

        expected_claims_columns = ["Denominator", "Covid_like"]
        for col in expected_claims_columns:
            assert col in self.fips_claims_data.columns
            assert col in self.hrr_claims_data.columns
        assert len(set(self.fips_claims_data.columns) - set(expected_claims_columns)) == 0
        assert len(set(self.hrr_claims_data.columns) - set(expected_claims_columns)) == 0

    def test_data_columns(self):
        assert "hrr" in self.hrr_data.columns
        assert "fips" in self.fips_data.columns
        assert "date" in self.hrr_data.columns
        assert "date" in self.fips_data.columns

        expected_columns = ["num", "den"]
        for col in expected_columns:
            assert col in self.fips_data.columns
            assert col in self.hrr_data.columns

    def test_edge_values(self):
        for data in [self.hrr_claims_data, self.fips_claims_data]:
            assert data.index.get_level_values('date').max() >= Config.FIRST_DATA_DATE
            assert data.index.get_level_values('date').min() < DROP_DATE

        for data in [self.hrr_data, self.fips_data]:
            assert data.date.max() >= Config.FIRST_DATA_DATE
            assert data.date.min() < DROP_DATE

    def test_hrrs_values(self):
        assert len(self.hrr_data.hrr.unique()) <= CONSTANTS.NUM_HRRS
        assert len(self.hrr_claims_data.index.get_level_values(
            'hrr').unique()) <= CONSTANTS.NUM_HRRS
        assert self.hrr_data.isna().sum().sum() == 0
        assert self.hrr_data["num"].sum() == self.hrr_claims_data["Covid_like"].sum()
        assert self.hrr_data["den"].sum() == self.hrr_claims_data["Denominator"].sum()

    def test_fips_values(self):
        assert len(self.fips_data.fips.unique()) <= CONSTANTS.NUM_COUNTIES
        assert len(self.fips_claims_data.index.get_level_values(
            'fips').unique()) <= CONSTANTS.NUM_COUNTIES
        assert self.fips_data.isna().sum().sum() == 0
        assert self.fips_data["num"].sum() == self.fips_claims_data["Covid_like"].sum()
        assert self.fips_data["den"].sum() == self.fips_claims_data["Denominator"].sum()
Exemplo n.º 2
0
    def test_base_unit(self):
        with pytest.raises(AssertionError):
            load_claims_data(DATA_FILEPATH, DROP_DATE, "foo")

        with pytest.raises(AssertionError):
            load_data(DATA_FILEPATH, DROP_DATE, "foo")
Exemplo n.º 3
0
class TestLoadData:
    fips_data = load_data(DATA_FILEPATH, DROP_DATE, "fips")
    hrr_data = load_data(DATA_FILEPATH, DROP_DATE, "hrr")

    def test_backwards_pad(self):
        num0 = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8],
                        dtype=float).reshape(-1, 1)
        den0 = np.array([0, 10, 10, 10, 10, 10, 10, 100, 101], dtype=float)

        num1, den1 = ClaimsHospIndicator.backwards_pad(num0,
                                                       den0,
                                                       k=7,
                                                       min_visits_to_fill=0)
        assert np.array_equal(num0, num1)
        assert np.array_equal(den0, den1)

        num2, den2 = ClaimsHospIndicator.backwards_pad(num0,
                                                       den0,
                                                       k=7,
                                                       min_visits_to_fill=11)
        exp_num2 = np.array([0, 1, 3, 5, 7, 9, 11, 7, 8],
                            dtype=float).reshape(-1, 1)
        exp_den2 = np.array([0, 10, 20, 20, 20, 20, 20, 100, 101], dtype=float)
        assert np.array_equal(exp_num2, num2)
        assert np.array_equal(exp_den2, den2)

        num3, den3 = ClaimsHospIndicator.backwards_pad(num0,
                                                       den0,
                                                       k=7,
                                                       min_visits_to_fill=100)
        exp_num3 = np.array([0, 1, 3, 6, 10, 15, 21, 7, 8],
                            dtype=float).reshape(-1, 1)
        exp_den3 = np.array([0, 10, 20, 30, 40, 50, 60, 100, 101], dtype=float)
        assert np.array_equal(exp_num3, num3)
        assert np.array_equal(exp_den3, den3)

        num4, den4 = ClaimsHospIndicator.backwards_pad(num0,
                                                       den0,
                                                       k=3,
                                                       min_visits_to_fill=100)
        exp_num4 = np.array([0, 1, 3, 6, 10, 14, 18, 7, 8],
                            dtype=float).reshape(-1, 1)
        exp_den4 = np.array([0, 10, 20, 30, 40, 40, 40, 100, 101], dtype=float)
        assert np.array_equal(exp_num4, num4)
        assert np.array_equal(exp_den4, den4)

    def test_fit_fips(self):
        date_range = pd.date_range("2020-05-01", "2020-05-20")
        all_fips = self.fips_data.fips.unique()
        loc_index_fips_data = self.fips_data.set_index(["fips", "date"])
        sample_fips = nr.choice(all_fips, 10)

        for fips in sample_fips:
            sub_data = loc_index_fips_data.loc[fips]
            sub_data = sub_data.reindex(date_range, fill_value=0)
            res0 = ClaimsHospIndicator.fit(sub_data, date_range[0], fips)
            # first value is burn-in
            assert np.min(res0["rate"][1:]) > 0
            assert np.max(res0["rate"][1:]) <= 100

            if np.all(np.isnan(res0["se"])):
                assert res0["incl"].sum() == 0
            else:
                # binomial standard error, hence largest possible value is
                # 100 * (0.5 / sqrt(MIN_DEN))
                assert np.nanmax(
                    res0["se"]) <= 100 * (0.5 / np.sqrt(Config.MIN_DEN))
                assert np.nanmin(res0["se"]) > 0
                assert res0["incl"].sum() > 0

    def test_fit_hrrs(self):
        date_range = pd.date_range("2020-05-01", "2020-05-20")
        all_hrrs = self.hrr_data.hrr.unique()
        loc_index_hrr_data = self.hrr_data.set_index(["hrr", "date"])
        sample_hrrs = nr.choice(all_hrrs, 10)

        for hrr in sample_hrrs:
            sub_data = loc_index_hrr_data.loc[hrr]
            sub_data = sub_data.reindex(date_range, fill_value=0)
            res0 = ClaimsHospIndicator.fit(sub_data, date_range[0], hrr)
            # first value is burn-in
            assert np.min(res0["rate"][1:]) > 0
            assert np.max(res0["rate"][1:]) <= 100

            if np.all(np.isnan(res0["se"])):
                assert res0["incl"].sum() == 0
            else:
                # binomial standard error, hence largest possible value is
                # 100 * (0.5 / sqrt(MIN_DEN))
                assert np.nanmax(
                    res0["se"]) <= 100 * (0.5 / np.sqrt(Config.MIN_DEN))
                assert np.nanmin(res0["se"]) > 0
                assert res0["incl"].sum() > 0