def test_empty_df(self): validator = StaticValidator(self.params) report = ValidationReport([]) empty_df = pd.DataFrame(columns=["geo_id"], dtype=str) validator.check_bad_geo_id_format(empty_df, "name", "county", report) assert len(report.raised_errors) == 0
def test_invalid_geo_id_msa(self): validator = StaticValidator(self.params) report = ValidationReport([]) df = pd.DataFrame(["0", "54321", "123", ".0000", "abc12"], columns=["geo_id"]) validator.check_bad_geo_id_format(df, "name", "msa", report) assert len(report.raised_errors) == 1 assert "check_geo_id_format" in report.raised_errors[0].check_data_id assert len(report.raised_errors[0].expression) == 2 assert "54321" not in report.raised_errors[0].expression
def test_invalid_geo_id_national(self): validator = StaticValidator(self.params) report = ValidationReport([]) df = pd.DataFrame(["usa", "SP", " us", "us", "usausa", "US"], columns=["geo_id"]) validator.check_bad_geo_id_format(df, "name", "national", report) assert len(report.raised_errors) == 1 assert "check_geo_id_format" in report.raised_errors[0].check_data_id assert len(report.raised_errors[0].expression) == 3 assert "us" not in report.raised_errors[0].expression assert "US" not in report.raised_errors[0].expression assert "SP" not in report.raised_errors[0].expression
def test_invalid_geo_id_state(self): validator = StaticValidator(self.params) report = ValidationReport([]) df = pd.DataFrame(["aa", "hi", "HI", "hawaii", "Hawaii", "a", "H.I."], columns=["geo_id"]) validator.check_bad_geo_id_format(df, "name", "state", report) assert len(report.raised_errors) == 1 assert "check_geo_id_format" in report.raised_errors[0].check_data_id assert len(report.raised_errors[0].expression) == 4 assert "aa" not in report.raised_errors[0].expression assert "hi" not in report.raised_errors[0].expression assert "HI" not in report.raised_errors[0].expression
def test_invalid_geo_id_hrr(self): validator = StaticValidator(self.params) report = ValidationReport([]) df = pd.DataFrame(["1", "12", "123", "1234", "12345", "a", ".", "ab1"], columns=["geo_id"]) validator.check_bad_geo_id_format(df, "name", "hrr", report) assert len(report.raised_errors) == 1 assert "check_geo_id_format" in report.raised_errors[0].check_data_id assert len(report.raised_errors[0].expression) == 5 assert "1" not in report.raised_errors[0].expression assert "12" not in report.raised_errors[0].expression assert "123" not in report.raised_errors[0].expression
def test_invalid_geo_type(self): validator = StaticValidator(self.params) report = ValidationReport([]) empty_df = pd.DataFrame(columns=["geo_id"], dtype=str) validator.check_bad_geo_id_format(empty_df, "name", "hello", report) assert len(report.raised_errors) == 1 assert "check_geo_type" in [ err.check_data_id[0] for err in report.raised_errors ] assert [ err.expression for err in report.raised_errors if err.check_data_id[0] == "check_geo_type" ][0] == "hello"