def test_validation_only_considers_columns_in_df(self): catalog = read_data_json("distribution_missing_column_in_data.json") distrib_meta = catalog.get_distribution(identifier="125.1") df = catalog.load_ts_distribution("125.1") ValuesAreNumericValidation(df, distrib_meta, catalog).validate()
def test_validate_time_series_catalog(self): catalog = read_data_json("valid_catalog.json") validation = catalog.validate_time_series_catalog() self.assertEqual(validation["status"], "OK")
def test_distribution_with_no_field_description_raises_error(self): catalog = read_data_json("missing_field_description.json") distribution = catalog.get_distributions()[0] df = None with self.assertRaises(NonExistentDescriptionError): FieldDescriptionsValidation(df, distribution, catalog).validate()
def test_valid_catalog_has_empty_errors(self): catalog = read_data_json("valid_catalog.json") validation = catalog.validate_time_series_catalog() self.assertFalse(validation["errors"])
def test_invalid_catalog_has_errors(self): catalog = read_data_json("repeated_field_id.json") validation = catalog.validate_time_series_catalog() self.assertTrue(validation["errors"]["125.1"])
def test_xlsx_read(self): catalog = read_data_json("catalog.xlsx") self.assertTrue(catalog.get_distributions())
def test_invalid_catalog(self): catalog = read_data_json("repeated_field_id.json") validation = catalog.validate_time_series_catalog() self.assertEqual(validation["status"], "ERROR")
def test_invalid_format_catalog(self): catalog = read_data_json("missing_dataset_title.json") validation = catalog.validate_catalog() self.assertEqual(validation["status"], "ERROR")
def test_missing_field_identifier(self): catalog = read_data_json("missing_dataset_title.json") validation = catalog.validate_catalog() self.assertEqual(validation["status"], "ERROR")
def test_multiple_errors(self): data_json = read_data_json("repeated_field_id_and_description.json") errors = get_distribution_errors(data_json, "125.1") error_classes = [x.__class__ for x in errors] self.assertIn(FieldIdRepetitionError, error_classes) self.assertIn(FieldDescriptionRepetitionError, error_classes)
def test_get_errors_repeated_field_id_catalog(self): data_json = read_data_json("repeated_field_id.json") errors = get_distribution_errors(data_json, "125.1") self.assertIn(FieldIdRepetitionError, [x.__class__ for x in errors])
def test_get_errors_of_valid_distribution_empty(self): data_json = read_data_json("data.json") errors = get_distribution_errors(data_json, "125.1") self.assertFalse(errors)
def _read_csv(self, filename): data_json = read_data_json(filename) distribution = data_json.get_distributions()[0] df = CSVReader(distribution).read() return df
def test_read_from_file_source(self): data_json = read_data_json("daily_periodicity_latin1.json") distribution = data_json.get_distributions()[0] path = csv_path("sample_data.csv") df = CSVReader(distribution, file_source=path).read() self.assertIn("title1", list(df.columns))