def test_empty_df(self):
        validator = StaticValidator(self.params)
        report = ValidationReport([])
        empty_df = pd.DataFrame(columns=["geo_id"], dtype=str)
        validator.check_bad_geo_id_format(empty_df, "name", "county", report)

        assert len(report.raised_errors) == 0
    def test_invalid_geo_id_msa(self):
        validator = StaticValidator(self.params)
        report = ValidationReport([])
        df = pd.DataFrame(["0", "54321", "123", ".0000", "abc12"],
                          columns=["geo_id"])
        validator.check_bad_geo_id_format(df, "name", "msa", report)

        assert len(report.raised_errors) == 1
        assert "check_geo_id_format" in report.raised_errors[0].check_data_id
        assert len(report.raised_errors[0].expression) == 2
        assert "54321" not in report.raised_errors[0].expression
    def test_invalid_geo_id_national(self):
        validator = StaticValidator(self.params)
        report = ValidationReport([])
        df = pd.DataFrame(["usa", "SP", " us", "us", "usausa", "US"],
                          columns=["geo_id"])
        validator.check_bad_geo_id_format(df, "name", "national", report)

        assert len(report.raised_errors) == 1
        assert "check_geo_id_format" in report.raised_errors[0].check_data_id
        assert len(report.raised_errors[0].expression) == 3
        assert "us" not in report.raised_errors[0].expression
        assert "US" not in report.raised_errors[0].expression
        assert "SP" not in report.raised_errors[0].expression
    def test_invalid_geo_id_state(self):
        validator = StaticValidator(self.params)
        report = ValidationReport([])
        df = pd.DataFrame(["aa", "hi", "HI", "hawaii", "Hawaii", "a", "H.I."],
                          columns=["geo_id"])
        validator.check_bad_geo_id_format(df, "name", "state", report)

        assert len(report.raised_errors) == 1
        assert "check_geo_id_format" in report.raised_errors[0].check_data_id
        assert len(report.raised_errors[0].expression) == 4
        assert "aa" not in report.raised_errors[0].expression
        assert "hi" not in report.raised_errors[0].expression
        assert "HI" not in report.raised_errors[0].expression
    def test_invalid_geo_id_hrr(self):
        validator = StaticValidator(self.params)
        report = ValidationReport([])
        df = pd.DataFrame(["1", "12", "123", "1234", "12345", "a", ".", "ab1"],
                          columns=["geo_id"])
        validator.check_bad_geo_id_format(df, "name", "hrr", report)

        assert len(report.raised_errors) == 1
        assert "check_geo_id_format" in report.raised_errors[0].check_data_id
        assert len(report.raised_errors[0].expression) == 5
        assert "1" not in report.raised_errors[0].expression
        assert "12" not in report.raised_errors[0].expression
        assert "123" not in report.raised_errors[0].expression
    def test_invalid_geo_type(self):
        validator = StaticValidator(self.params)
        report = ValidationReport([])
        empty_df = pd.DataFrame(columns=["geo_id"], dtype=str)
        validator.check_bad_geo_id_format(empty_df, "name", "hello", report)

        assert len(report.raised_errors) == 1
        assert "check_geo_type" in [
            err.check_data_id[0] for err in report.raised_errors
        ]
        assert [
            err.expression for err in report.raised_errors
            if err.check_data_id[0] == "check_geo_type"
        ][0] == "hello"