def _format_validation_report(self, details: Dict[str, str]) -> ValidationReport: """Create a validation report. Returns: a validation report, with detailed error reasons. """ if self._get_repo_path().upper() == "LOCAL": repo_info_msg = ( f"Repo path: {self._repo_path}, binary folder: {self._exe_folder}" ) else: repo_info_msg = f"Repo path: {self._repo_path}, software version: {self._binary_version}" if details: return ValidationReport( validation_result=ValidationResult.FAILED, validator_name=self.name, message= f"You don't have permission to access some private computation software ({repo_info_msg}). Please contact your representative at Meta", details=details, ) else: return ValidationReport( validation_result=ValidationResult.SUCCESS, validator_name=self.name, message= f"Completed binary accessibility validation successfully ({repo_info_msg}).", )
def _format_validation_report( self, message: str, rows_processed_count: int, validation_issues: InputDataValidationIssues, had_exception: bool = False, ) -> ValidationReport: validation_errors = validation_issues.get_errors() validation_warnings = validation_issues.get_warnings() if had_exception: return ValidationReport( validation_result=ValidationResult.FAILED, validator_name=INPUT_DATA_VALIDATOR_NAME, message=message, details={ "rows_processed_count": rows_processed_count, }, ) if validation_errors: error_fields = ", ".join(sorted(validation_errors.keys())) details = { "rows_processed_count": rows_processed_count, "validation_errors": validation_errors, } if validation_warnings: details["validation_warnings"] = validation_warnings return ValidationReport( validation_result=ValidationResult.FAILED, validator_name=INPUT_DATA_VALIDATOR_NAME, message= f"{message} failed validation, with errors on '{error_fields}'.", details=details, ) elif validation_warnings: return ValidationReport( validation_result=ValidationResult.SUCCESS, validator_name=INPUT_DATA_VALIDATOR_NAME, message= f"{message} completed validation successfully, with some warnings.", details={ "rows_processed_count": rows_processed_count, "validation_warnings": validation_warnings, }, ) else: return ValidationReport( validation_result=ValidationResult.SUCCESS, validator_name=INPUT_DATA_VALIDATOR_NAME, message=f"{message} completed validation successfully", details={ "rows_processed_count": rows_processed_count, }, )
def test_run_s3_validations_binary_access_denied( self, storage_service_mock: Mock) -> None: expected_report = ValidationReport( validation_result=ValidationResult.FAILED, validator_name=BINARY_FILE_VALIDATOR_NAME, message= f"You don't have permission to access some private computation software (Repo path: {DEFAULT_BINARY_REPOSITORY}, software version: {DEFAULT_BINARY_VERSION}). Please contact your representative at Meta", details={ f"{DEFAULT_BINARY_REPOSITORY}package/3/latest/binary": "An error occurred (403) when calling the HeadObject operation: Forbidden" }, ) storage_service_mock.__init__(return_value=storage_service_mock) storage_service_mock.file_exists.side_effect = [ True, True, PcpError( Exception( "An error occurred (403) when calling the HeadObject operation: Forbidden" )), ] validator = BinaryFileValidator(TEST_REGION, TEST_BINARY_INFOS) report = validator.validate() self.assertEqual(report, expected_report) self.assertEqual(storage_service_mock.file_exists.call_count, len(TEST_BINARY_INFOS))
def test_get_str_for_report_with_details(self) -> None: expected_report_str = """Validation Report: test_validator_name Result: success Message: test_message Details: { "test_key_1": 5, "test_key_2": { "test_key_3": { "test_key_4": 1 }, "test_key_5": { "test_key_6": 1, "test_key_7": 2 } } }""" report = ValidationReport( validation_result=ValidationResult.SUCCESS, validator_name="test_validator_name", message="test_message", details={ "test_key_1": 5, "test_key_2": { "test_key_3": { "test_key_4": 1, }, "test_key_5": { "test_key_6": 1, "test_key_7": 2, }, }, }, ) self.assertEqual(expected_report_str, str(report))
def test_run_validations_success_for_multikey_pa_fields( self, time_mock: Mock) -> None: time_mock.time.return_value = TEST_TIMESTAMP cloud_provider = CloudProvider.AWS lines = [ b"id_madid,id_email,id_phone,conversion_value,conversion_timestamp,conversion_metadata\n", b"abcd/1234+WXYZ=,abcd/1234+WXYZ=,abcd/1234+WXYZ=,,1645157987,0\n", b"abcd/1234+WXYZ=,,,,1645157987,0\n", b",abcd/1234+WXYZ=,abcd/1234+WXYZ=,$20,1645157987,0\n", ] self.write_lines_to_file(lines) expected_report = ValidationReport( validation_result=ValidationResult.SUCCESS, validator_name=INPUT_DATA_VALIDATOR_NAME, message= f"File: {TEST_INPUT_FILE_PATH} completed validation successfully, with some warnings.", details={ "rows_processed_count": 3, "validation_warnings": { "id_": { "empty_count": 3, }, "conversion_value": { "empty_count": 2, "bad_format_count": 1, }, }, }, ) validator = InputDataValidator(TEST_INPUT_FILE_PATH, cloud_provider, TEST_REGION) report = validator.validate() self.assertEqual(report, expected_report)
def test_run_validations_success_for_pl_fields(self, time_mock: Mock) -> None: time_mock.time.return_value = TEST_TIMESTAMP lines = [ b"id_,value,event_timestamp\n", b"abcd/1234+WXYZ=,100,1645157987\n", b"abcd/1234+WXYZ=,100,1645157987\n", b"abcd/1234+WXYZ=,100,1645157987\n", ] self.write_lines_to_file(lines) expected_report = ValidationReport( validation_result=ValidationResult.SUCCESS, validator_name=INPUT_DATA_VALIDATOR_NAME, message= f"File: {TEST_INPUT_FILE_PATH} completed validation successfully", details={ "rows_processed_count": 3, }, ) validator = InputDataValidator(TEST_INPUT_FILE_PATH, TEST_CLOUD_PROVIDER, TEST_REGION) report = validator.validate() self.assertEqual(report, expected_report)
def test_run_validations_errors_when_the_line_ending_is_unsupported( self, time_mock: Mock) -> None: exception_message = "Detected an unexpected line ending. The only supported line ending is '\\n'" time_mock.time.return_value = TEST_TIMESTAMP lines = [ b"id_,value,event_timestamp\n", b"abcd/1234+WXYZ=,100,1645157987\r\n", b"abcd/1234+WXYZ=,100,1645157987\r\n", ] self.write_lines_to_file(lines) expected_report = ValidationReport( validation_result=ValidationResult.FAILED, validator_name=INPUT_DATA_VALIDATOR_NAME, message= f"File: {TEST_INPUT_FILE_PATH} failed validation. Error: {exception_message}", details={ "rows_processed_count": 0, }, ) validator = InputDataValidator(TEST_INPUT_FILE_PATH, TEST_CLOUD_PROVIDER, TEST_REGION) report = validator.validate() self.assertEqual(report, expected_report)
def test_run_validations_it_skips_input_data_processing_when_the_file_is_too_large( self, time_mock: Mock) -> None: file_size = 3567123432 time_mock.time.return_value = TEST_TIMESTAMP self.storage_service_mock.get_file_size.return_value = file_size expected_report = ValidationReport( validation_result=ValidationResult.SUCCESS, validator_name=INPUT_DATA_VALIDATOR_NAME, message=" ".join([ f"WARNING: File: {TEST_INPUT_FILE_PATH} is too large to download.", f"The maximum file size is {int(INPUT_DATA_MAX_FILE_SIZE_IN_BYTES / (1024 * 1024))} MB.", "Skipped input_data validation. completed validation successfully", ]), details={ "rows_processed_count": 0, }, ) validator = InputDataValidator(TEST_INPUT_FILE_PATH, TEST_CLOUD_PROVIDER, TEST_REGION) report = validator.validate() self.storage_service_mock.get_file_size.assert_called_with( TEST_INPUT_FILE_PATH) self.storage_service_mock.copy.assert_not_called() self.assertEqual(report, expected_report)
def test_run_validations_reports_for_pl_when_no_ids( self, time_mock: Mock) -> None: time_mock.time.return_value = TEST_TIMESTAMP lines = [ b"id_madid,id_email,value,event_timestamp\n", b",,100,1645157987\n", b",,100,1645157987\n", ] self.write_lines_to_file(lines) error_fields = "id_" expected_report = ValidationReport( validation_result=ValidationResult.FAILED, validator_name=INPUT_DATA_VALIDATOR_NAME, message= f"File: {TEST_INPUT_FILE_PATH} failed validation, with errors on '{error_fields}'.", details={ "rows_processed_count": 2, "validation_errors": { "id_": { "empty_count": 4, }, }, }, ) validator = InputDataValidator(TEST_INPUT_FILE_PATH, TEST_CLOUD_PROVIDER, TEST_REGION) report = validator.validate() self.assertEqual(report, expected_report)
def test_run_validations_errors_when_pid_data_fields_not_found( self, time_mock: Mock) -> None: exception_message = f"Failed to parse the header row. The header row fields must have columns with prefix {ID_FIELD_PREFIX}" time_mock.time.return_value = TEST_TIMESTAMP lines = [ b"noid_,conversion_value,conversion_timestamp,conversion_metadata\n", b"abcd/1234+WXYZ=,,1645157987,0\n", b"abcd/1234+WXYZ=,,1645157987,0\n", b"abcd/1234+WXYZ=,$20,1645157987,0\n", ] self.write_lines_to_file(lines) expected_report = ValidationReport( validation_result=ValidationResult.FAILED, validator_name=INPUT_DATA_VALIDATOR_NAME, message= f"File: {TEST_INPUT_FILE_PATH} failed validation. Error: {exception_message}", details={ "rows_processed_count": 0, }, ) validator = InputDataValidator(TEST_INPUT_FILE_PATH, TEST_CLOUD_PROVIDER, TEST_REGION) report = validator.validate() self.assertEqual(report, expected_report)
def test_get_str_for_report_without_details(self) -> None: expected_report_str = """Validation Report: test_validator_name Result: failed Message: test_message""" report = ValidationReport( validation_result=ValidationResult.FAILED, validator_name="test_validator_name", message="test_message", ) self.assertEqual(expected_report_str, str(report))
def validate(self) -> ValidationReport: """A wrapper for __validator__(). In case an unexpected exception is thrown, this method will still return a SUCCESS report so that a bug will not block a PC run. """ try: return self.__validate__() except Exception as e: return ValidationReport( ValidationResult.SUCCESS, self.name, f"WARNING: {self.name} threw an unexpected error: {e}", )
def test_run_local_validations_success(self, storage_service_mock: Mock, mock_file_exists: Mock) -> None: expected_report = ValidationReport( validation_result=ValidationResult.SUCCESS, validator_name=BINARY_FILE_VALIDATOR_NAME, message= f"Completed binary accessibility validation successfully (Repo path: LOCAL, binary folder: {DEFAULT_EXE_FOLDER}).", ) mock_file_exists.return_value = True validator = BinaryFileValidator(TEST_REGION, TEST_BINARY_INFOS) report = validator.validate() self.assertEqual(report, expected_report) self.assertEqual(mock_file_exists.call_count, len(TEST_BINARY_INFOS))
def test_run_local_validations_binary_not_exist( self, storage_service_mock: Mock, mock_file_exists: Mock) -> None: expected_report = ValidationReport( validation_result=ValidationResult.FAILED, validator_name=BINARY_FILE_VALIDATOR_NAME, message= f"You don't have permission to access some private computation software (Repo path: LOCAL, binary folder: {DEFAULT_EXE_FOLDER}). Please contact your representative at Meta", details={f"{DEFAULT_EXE_FOLDER}1": "binary does not exist"}, ) mock_file_exists.side_effect = [False, True, True] validator = BinaryFileValidator(TEST_REGION, TEST_BINARY_INFOS) report = validator.validate() self.assertEqual(report, expected_report) self.assertEqual(mock_file_exists.call_count, len(TEST_BINARY_INFOS))
def test_run_s3_validations_unexpected_error( self, storage_service_mock: Mock) -> None: expected_report = ValidationReport( validation_result=ValidationResult.SUCCESS, validator_name=BINARY_FILE_VALIDATOR_NAME, message= f"WARNING: {BINARY_FILE_VALIDATOR_NAME} threw an unexpected error: An internal error occurred (500)", ) storage_service_mock.__init__(return_value=storage_service_mock) storage_service_mock.file_exists.side_effect = PcpError( Exception("An internal error occurred (500)")) validator = BinaryFileValidator(TEST_REGION, TEST_BINARY_INFOS) report = validator.validate() self.assertEqual(report, expected_report) self.assertEqual(storage_service_mock.file_exists.call_count, 1)
def test_run_validations_reports_for_pa_when_row_values_are_not_valid( self, time_mock: Mock) -> None: time_mock.time.return_value = TEST_TIMESTAMP lines = [ b"id_,conversion_value,conversion_timestamp,conversion_metadata\n", b"abcd/1234+WXYZ=,$100,1645157987,\n", b" ! ,100,1645157987,\n", b"_,100,...,0\n", b",100,...,data\n", ] self.write_lines_to_file(lines) error_fields = "conversion_timestamp, id_" expected_report = ValidationReport( validation_result=ValidationResult.FAILED, validator_name=INPUT_DATA_VALIDATOR_NAME, message= f"File: {TEST_INPUT_FILE_PATH} failed validation, with errors on '{error_fields}'.", details={ "rows_processed_count": 4, "validation_errors": { "id_": { "bad_format_count": 2, }, "conversion_timestamp": { "bad_format_count": 2, }, }, "validation_warnings": { "id_": { "empty_count": 1, }, "conversion_metadata": { "empty_count": 2, "bad_format_count": 1 }, "conversion_value": { "bad_format_count": 1, }, }, }, ) validator = InputDataValidator(TEST_INPUT_FILE_PATH, TEST_CLOUD_PROVIDER, TEST_REGION) report = validator.validate() self.assertEqual(report, expected_report)
def test_run_validations_errors_when_there_is_no_header_row( self, time_mock: Mock) -> None: time_mock.time.return_value = TEST_TIMESTAMP expected_report = ValidationReport( validation_result=ValidationResult.FAILED, validator_name=INPUT_DATA_VALIDATOR_NAME, message= f"File: {TEST_INPUT_FILE_PATH} failed validation. Error: The header row was empty.", details={ "rows_processed_count": 0, }, ) validator = InputDataValidator(TEST_INPUT_FILE_PATH, TEST_CLOUD_PROVIDER, TEST_REGION) report = validator.validate() self.assertEqual(report, expected_report)
def test_run_validations_copy_failure(self) -> None: exception_message = "failed to copy" expected_report = ValidationReport( validation_result=ValidationResult.FAILED, validator_name=INPUT_DATA_VALIDATOR_NAME, message= f"File: {TEST_INPUT_FILE_PATH} failed validation. Error: Failed to download the input file. Please check the file path and its permission.\n\t{exception_message}", details={ "rows_processed_count": 0, }, ) self.storage_service_mock.copy.side_effect = Exception( exception_message) validator = InputDataValidator(TEST_INPUT_FILE_PATH, TEST_CLOUD_PROVIDER, TEST_REGION) report = validator.validate() self.assertEqual(report, expected_report)
def test_a_validator_throws_exception(self) -> None: expected_report_thrown_by_validator = ValidationReport( validation_result=ValidationResult.SUCCESS, validator_name="TestExceptionValidator", message= "WARNING: TestExceptionValidator threw an unexpected error: test error message", ) expected_aggregated_result = ValidationResult.SUCCESS expected_aggregated_report = ( f"{TEST_SUCCESSFUL_REPORT_1}\n\n{expected_report_thrown_by_validator}" ) (actual_result, actual_report) = run_validators([ TestDummyValidator(TEST_SUCCESSFUL_REPORT_1), TestExceptionValidator(), ]) self.assertEqual(expected_aggregated_result, actual_result) self.assertEqual(expected_aggregated_report, actual_report)
def test_run_s3_validations_binary_not_exist( self, storage_service_mock: Mock) -> None: expected_report = ValidationReport( validation_result=ValidationResult.FAILED, validator_name=BINARY_FILE_VALIDATOR_NAME, message= f"You don't have permission to access some private computation software (Repo path: {DEFAULT_BINARY_REPOSITORY}, software version: {DEFAULT_BINARY_VERSION}). Please contact your representative at Meta", details={ f"{DEFAULT_BINARY_REPOSITORY}package/1/latest/1": "binary does not exist" }, ) storage_service_mock.__init__(return_value=storage_service_mock) storage_service_mock.file_exists.side_effect = [False, True, True] validator = BinaryFileValidator(TEST_REGION, TEST_BINARY_INFOS) report = validator.validate() self.assertEqual(report, expected_report) self.assertEqual(storage_service_mock.file_exists.call_count, len(TEST_BINARY_INFOS))
def test_run_s3_validations_success(self, storage_service_mock: Mock) -> None: expected_report = ValidationReport( validation_result=ValidationResult.SUCCESS, validator_name=BINARY_FILE_VALIDATOR_NAME, message= f"Completed binary accessibility validation successfully (Repo path: {DEFAULT_BINARY_REPOSITORY}, software version: {DEFAULT_BINARY_VERSION}).", ) storage_service_mock.__init__(return_value=storage_service_mock) storage_service_mock.file_exists.return_value = True validator = BinaryFileValidator(TEST_REGION, TEST_BINARY_INFOS) report = validator.validate() self.assertEqual(report, expected_report) self.assertEqual(storage_service_mock.file_exists.call_count, len(TEST_BINARY_INFOS)) storage_service_mock.file_exists.assert_has_calls([ call(f"{DEFAULT_BINARY_REPOSITORY}package/1/latest/1"), call(f"{DEFAULT_BINARY_REPOSITORY}package/2/latest/2"), call(f"{DEFAULT_BINARY_REPOSITORY}package/3/latest/binary"), ])
def test_run_validations_errors_when_pa_pl_data_fields_not_found( self, time_mock: Mock) -> None: exception_message = f"Failed to parse the header row. The header row fields must have either: {PL_FIELDS} or: {PA_FIELDS}" time_mock.time.return_value = TEST_TIMESTAMP lines = [ b"id_,header,row\n", b"1,2,3\n", b"4,5,6\n", ] self.write_lines_to_file(lines) expected_report = ValidationReport( validation_result=ValidationResult.FAILED, validator_name=INPUT_DATA_VALIDATOR_NAME, message= f"File: {TEST_INPUT_FILE_PATH} failed validation. Error: {exception_message}", details={ "rows_processed_count": 0, }, ) validator = InputDataValidator(TEST_INPUT_FILE_PATH, TEST_CLOUD_PROVIDER, TEST_REGION) report = validator.validate() self.assertEqual(report, expected_report)
return self.dummy_report class TestExceptionValidator(Validator): @property def name(self) -> str: return "TestExceptionValidator" def __validate__(self) -> ValidationReport: raise Exception("test error message") TEST_SUCCESSFUL_REPORT_1 = ValidationReport( validation_result=ValidationResult.SUCCESS, validator_name="validator 1", message="message 1", details={ "test_key_1": 5, }, ) TEST_SUCCESSFUL_REPORT_2 = ValidationReport( validation_result=ValidationResult.SUCCESS, validator_name="validator 2", message="message 2", details={ "test_key_2": 5, }, ) TEST_FAILED_REPORT_1 = ValidationReport( validation_result=ValidationResult.FAILED,