Exemplo n.º 1
0
    def _format_validation_report(self,
                                  details: Dict[str, str]) -> ValidationReport:
        """Create a validation report.

        Returns: a validation report, with detailed error reasons.
        """
        if self._get_repo_path().upper() == "LOCAL":
            repo_info_msg = (
                f"Repo path: {self._repo_path}, binary folder: {self._exe_folder}"
            )
        else:
            repo_info_msg = f"Repo path: {self._repo_path}, software version: {self._binary_version}"

        if details:
            return ValidationReport(
                validation_result=ValidationResult.FAILED,
                validator_name=self.name,
                message=
                f"You don't have permission to access some private computation software ({repo_info_msg}). Please contact your representative at Meta",
                details=details,
            )
        else:
            return ValidationReport(
                validation_result=ValidationResult.SUCCESS,
                validator_name=self.name,
                message=
                f"Completed binary accessibility validation successfully ({repo_info_msg}).",
            )
Exemplo n.º 2
0
    def _format_validation_report(
        self,
        message: str,
        rows_processed_count: int,
        validation_issues: InputDataValidationIssues,
        had_exception: bool = False,
    ) -> ValidationReport:
        validation_errors = validation_issues.get_errors()
        validation_warnings = validation_issues.get_warnings()

        if had_exception:
            return ValidationReport(
                validation_result=ValidationResult.FAILED,
                validator_name=INPUT_DATA_VALIDATOR_NAME,
                message=message,
                details={
                    "rows_processed_count": rows_processed_count,
                },
            )

        if validation_errors:
            error_fields = ", ".join(sorted(validation_errors.keys()))
            details = {
                "rows_processed_count": rows_processed_count,
                "validation_errors": validation_errors,
            }
            if validation_warnings:
                details["validation_warnings"] = validation_warnings
            return ValidationReport(
                validation_result=ValidationResult.FAILED,
                validator_name=INPUT_DATA_VALIDATOR_NAME,
                message=
                f"{message} failed validation, with errors on '{error_fields}'.",
                details=details,
            )
        elif validation_warnings:
            return ValidationReport(
                validation_result=ValidationResult.SUCCESS,
                validator_name=INPUT_DATA_VALIDATOR_NAME,
                message=
                f"{message} completed validation successfully, with some warnings.",
                details={
                    "rows_processed_count": rows_processed_count,
                    "validation_warnings": validation_warnings,
                },
            )
        else:
            return ValidationReport(
                validation_result=ValidationResult.SUCCESS,
                validator_name=INPUT_DATA_VALIDATOR_NAME,
                message=f"{message} completed validation successfully",
                details={
                    "rows_processed_count": rows_processed_count,
                },
            )
    def test_run_s3_validations_binary_access_denied(
            self, storage_service_mock: Mock) -> None:
        expected_report = ValidationReport(
            validation_result=ValidationResult.FAILED,
            validator_name=BINARY_FILE_VALIDATOR_NAME,
            message=
            f"You don't have permission to access some private computation software (Repo path: {DEFAULT_BINARY_REPOSITORY}, software version: {DEFAULT_BINARY_VERSION}). Please contact your representative at Meta",
            details={
                f"{DEFAULT_BINARY_REPOSITORY}package/3/latest/binary":
                "An error occurred (403) when calling the HeadObject operation: Forbidden"
            },
        )
        storage_service_mock.__init__(return_value=storage_service_mock)
        storage_service_mock.file_exists.side_effect = [
            True,
            True,
            PcpError(
                Exception(
                    "An error occurred (403) when calling the HeadObject operation: Forbidden"
                )),
        ]
        validator = BinaryFileValidator(TEST_REGION, TEST_BINARY_INFOS)
        report = validator.validate()

        self.assertEqual(report, expected_report)
        self.assertEqual(storage_service_mock.file_exists.call_count,
                         len(TEST_BINARY_INFOS))
    def test_get_str_for_report_with_details(self) -> None:
        expected_report_str = """Validation Report: test_validator_name
Result: success
Message: test_message
Details:
{
    "test_key_1": 5,
    "test_key_2": {
        "test_key_3": {
            "test_key_4": 1
        },
        "test_key_5": {
            "test_key_6": 1,
            "test_key_7": 2
        }
    }
}"""
        report = ValidationReport(
            validation_result=ValidationResult.SUCCESS,
            validator_name="test_validator_name",
            message="test_message",
            details={
                "test_key_1": 5,
                "test_key_2": {
                    "test_key_3": {
                        "test_key_4": 1,
                    },
                    "test_key_5": {
                        "test_key_6": 1,
                        "test_key_7": 2,
                    },
                },
            },
        )
        self.assertEqual(expected_report_str, str(report))
Exemplo n.º 5
0
    def test_run_validations_success_for_multikey_pa_fields(
            self, time_mock: Mock) -> None:
        time_mock.time.return_value = TEST_TIMESTAMP
        cloud_provider = CloudProvider.AWS
        lines = [
            b"id_madid,id_email,id_phone,conversion_value,conversion_timestamp,conversion_metadata\n",
            b"abcd/1234+WXYZ=,abcd/1234+WXYZ=,abcd/1234+WXYZ=,,1645157987,0\n",
            b"abcd/1234+WXYZ=,,,,1645157987,0\n",
            b",abcd/1234+WXYZ=,abcd/1234+WXYZ=,$20,1645157987,0\n",
        ]
        self.write_lines_to_file(lines)
        expected_report = ValidationReport(
            validation_result=ValidationResult.SUCCESS,
            validator_name=INPUT_DATA_VALIDATOR_NAME,
            message=
            f"File: {TEST_INPUT_FILE_PATH} completed validation successfully, with some warnings.",
            details={
                "rows_processed_count": 3,
                "validation_warnings": {
                    "id_": {
                        "empty_count": 3,
                    },
                    "conversion_value": {
                        "empty_count": 2,
                        "bad_format_count": 1,
                    },
                },
            },
        )

        validator = InputDataValidator(TEST_INPUT_FILE_PATH, cloud_provider,
                                       TEST_REGION)
        report = validator.validate()

        self.assertEqual(report, expected_report)
Exemplo n.º 6
0
    def test_run_validations_success_for_pl_fields(self,
                                                   time_mock: Mock) -> None:
        time_mock.time.return_value = TEST_TIMESTAMP
        lines = [
            b"id_,value,event_timestamp\n",
            b"abcd/1234+WXYZ=,100,1645157987\n",
            b"abcd/1234+WXYZ=,100,1645157987\n",
            b"abcd/1234+WXYZ=,100,1645157987\n",
        ]
        self.write_lines_to_file(lines)
        expected_report = ValidationReport(
            validation_result=ValidationResult.SUCCESS,
            validator_name=INPUT_DATA_VALIDATOR_NAME,
            message=
            f"File: {TEST_INPUT_FILE_PATH} completed validation successfully",
            details={
                "rows_processed_count": 3,
            },
        )

        validator = InputDataValidator(TEST_INPUT_FILE_PATH,
                                       TEST_CLOUD_PROVIDER, TEST_REGION)
        report = validator.validate()

        self.assertEqual(report, expected_report)
Exemplo n.º 7
0
    def test_run_validations_errors_when_the_line_ending_is_unsupported(
            self, time_mock: Mock) -> None:
        exception_message = "Detected an unexpected line ending. The only supported line ending is '\\n'"
        time_mock.time.return_value = TEST_TIMESTAMP
        lines = [
            b"id_,value,event_timestamp\n",
            b"abcd/1234+WXYZ=,100,1645157987\r\n",
            b"abcd/1234+WXYZ=,100,1645157987\r\n",
        ]
        self.write_lines_to_file(lines)
        expected_report = ValidationReport(
            validation_result=ValidationResult.FAILED,
            validator_name=INPUT_DATA_VALIDATOR_NAME,
            message=
            f"File: {TEST_INPUT_FILE_PATH} failed validation. Error: {exception_message}",
            details={
                "rows_processed_count": 0,
            },
        )

        validator = InputDataValidator(TEST_INPUT_FILE_PATH,
                                       TEST_CLOUD_PROVIDER, TEST_REGION)
        report = validator.validate()

        self.assertEqual(report, expected_report)
Exemplo n.º 8
0
    def test_run_validations_it_skips_input_data_processing_when_the_file_is_too_large(
            self, time_mock: Mock) -> None:
        file_size = 3567123432
        time_mock.time.return_value = TEST_TIMESTAMP
        self.storage_service_mock.get_file_size.return_value = file_size
        expected_report = ValidationReport(
            validation_result=ValidationResult.SUCCESS,
            validator_name=INPUT_DATA_VALIDATOR_NAME,
            message=" ".join([
                f"WARNING: File: {TEST_INPUT_FILE_PATH} is too large to download.",
                f"The maximum file size is {int(INPUT_DATA_MAX_FILE_SIZE_IN_BYTES / (1024 * 1024))} MB.",
                "Skipped input_data validation. completed validation successfully",
            ]),
            details={
                "rows_processed_count": 0,
            },
        )

        validator = InputDataValidator(TEST_INPUT_FILE_PATH,
                                       TEST_CLOUD_PROVIDER, TEST_REGION)
        report = validator.validate()

        self.storage_service_mock.get_file_size.assert_called_with(
            TEST_INPUT_FILE_PATH)
        self.storage_service_mock.copy.assert_not_called()
        self.assertEqual(report, expected_report)
Exemplo n.º 9
0
    def test_run_validations_reports_for_pl_when_no_ids(
            self, time_mock: Mock) -> None:
        time_mock.time.return_value = TEST_TIMESTAMP
        lines = [
            b"id_madid,id_email,value,event_timestamp\n",
            b",,100,1645157987\n",
            b",,100,1645157987\n",
        ]
        self.write_lines_to_file(lines)
        error_fields = "id_"
        expected_report = ValidationReport(
            validation_result=ValidationResult.FAILED,
            validator_name=INPUT_DATA_VALIDATOR_NAME,
            message=
            f"File: {TEST_INPUT_FILE_PATH} failed validation, with errors on '{error_fields}'.",
            details={
                "rows_processed_count": 2,
                "validation_errors": {
                    "id_": {
                        "empty_count": 4,
                    },
                },
            },
        )

        validator = InputDataValidator(TEST_INPUT_FILE_PATH,
                                       TEST_CLOUD_PROVIDER, TEST_REGION)
        report = validator.validate()
        self.assertEqual(report, expected_report)
Exemplo n.º 10
0
    def test_run_validations_errors_when_pid_data_fields_not_found(
            self, time_mock: Mock) -> None:
        exception_message = f"Failed to parse the header row. The header row fields must have columns with prefix {ID_FIELD_PREFIX}"
        time_mock.time.return_value = TEST_TIMESTAMP
        lines = [
            b"noid_,conversion_value,conversion_timestamp,conversion_metadata\n",
            b"abcd/1234+WXYZ=,,1645157987,0\n",
            b"abcd/1234+WXYZ=,,1645157987,0\n",
            b"abcd/1234+WXYZ=,$20,1645157987,0\n",
        ]
        self.write_lines_to_file(lines)
        expected_report = ValidationReport(
            validation_result=ValidationResult.FAILED,
            validator_name=INPUT_DATA_VALIDATOR_NAME,
            message=
            f"File: {TEST_INPUT_FILE_PATH} failed validation. Error: {exception_message}",
            details={
                "rows_processed_count": 0,
            },
        )

        validator = InputDataValidator(TEST_INPUT_FILE_PATH,
                                       TEST_CLOUD_PROVIDER, TEST_REGION)
        report = validator.validate()
        self.assertEqual(report, expected_report)
    def test_get_str_for_report_without_details(self) -> None:
        expected_report_str = """Validation Report: test_validator_name
Result: failed
Message: test_message"""
        report = ValidationReport(
            validation_result=ValidationResult.FAILED,
            validator_name="test_validator_name",
            message="test_message",
        )
        self.assertEqual(expected_report_str, str(report))
Exemplo n.º 12
0
    def validate(self) -> ValidationReport:
        """A wrapper for __validator__().

        In case an unexpected exception is thrown, this method will still return a SUCCESS report
        so that a bug will not block a PC run.
        """
        try:
            return self.__validate__()
        except Exception as e:
            return ValidationReport(
                ValidationResult.SUCCESS,
                self.name,
                f"WARNING: {self.name} threw an unexpected error: {e}",
            )
    def test_run_local_validations_success(self, storage_service_mock: Mock,
                                           mock_file_exists: Mock) -> None:
        expected_report = ValidationReport(
            validation_result=ValidationResult.SUCCESS,
            validator_name=BINARY_FILE_VALIDATOR_NAME,
            message=
            f"Completed binary accessibility validation successfully (Repo path: LOCAL, binary folder: {DEFAULT_EXE_FOLDER}).",
        )
        mock_file_exists.return_value = True

        validator = BinaryFileValidator(TEST_REGION, TEST_BINARY_INFOS)
        report = validator.validate()

        self.assertEqual(report, expected_report)
        self.assertEqual(mock_file_exists.call_count, len(TEST_BINARY_INFOS))
    def test_run_local_validations_binary_not_exist(
            self, storage_service_mock: Mock, mock_file_exists: Mock) -> None:
        expected_report = ValidationReport(
            validation_result=ValidationResult.FAILED,
            validator_name=BINARY_FILE_VALIDATOR_NAME,
            message=
            f"You don't have permission to access some private computation software (Repo path: LOCAL, binary folder: {DEFAULT_EXE_FOLDER}). Please contact your representative at Meta",
            details={f"{DEFAULT_EXE_FOLDER}1": "binary does not exist"},
        )
        mock_file_exists.side_effect = [False, True, True]

        validator = BinaryFileValidator(TEST_REGION, TEST_BINARY_INFOS)
        report = validator.validate()

        self.assertEqual(report, expected_report)
        self.assertEqual(mock_file_exists.call_count, len(TEST_BINARY_INFOS))
    def test_run_s3_validations_unexpected_error(
            self, storage_service_mock: Mock) -> None:
        expected_report = ValidationReport(
            validation_result=ValidationResult.SUCCESS,
            validator_name=BINARY_FILE_VALIDATOR_NAME,
            message=
            f"WARNING: {BINARY_FILE_VALIDATOR_NAME} threw an unexpected error: An internal error occurred (500)",
        )
        storage_service_mock.__init__(return_value=storage_service_mock)
        storage_service_mock.file_exists.side_effect = PcpError(
            Exception("An internal error occurred (500)"))
        validator = BinaryFileValidator(TEST_REGION, TEST_BINARY_INFOS)
        report = validator.validate()

        self.assertEqual(report, expected_report)
        self.assertEqual(storage_service_mock.file_exists.call_count, 1)
Exemplo n.º 16
0
    def test_run_validations_reports_for_pa_when_row_values_are_not_valid(
            self, time_mock: Mock) -> None:
        time_mock.time.return_value = TEST_TIMESTAMP
        lines = [
            b"id_,conversion_value,conversion_timestamp,conversion_metadata\n",
            b"abcd/1234+WXYZ=,$100,1645157987,\n",
            b" ! ,100,1645157987,\n",
            b"_,100,...,0\n",
            b",100,...,data\n",
        ]
        self.write_lines_to_file(lines)
        error_fields = "conversion_timestamp, id_"
        expected_report = ValidationReport(
            validation_result=ValidationResult.FAILED,
            validator_name=INPUT_DATA_VALIDATOR_NAME,
            message=
            f"File: {TEST_INPUT_FILE_PATH} failed validation, with errors on '{error_fields}'.",
            details={
                "rows_processed_count": 4,
                "validation_errors": {
                    "id_": {
                        "bad_format_count": 2,
                    },
                    "conversion_timestamp": {
                        "bad_format_count": 2,
                    },
                },
                "validation_warnings": {
                    "id_": {
                        "empty_count": 1,
                    },
                    "conversion_metadata": {
                        "empty_count": 2,
                        "bad_format_count": 1
                    },
                    "conversion_value": {
                        "bad_format_count": 1,
                    },
                },
            },
        )

        validator = InputDataValidator(TEST_INPUT_FILE_PATH,
                                       TEST_CLOUD_PROVIDER, TEST_REGION)
        report = validator.validate()
        self.assertEqual(report, expected_report)
Exemplo n.º 17
0
    def test_run_validations_errors_when_there_is_no_header_row(
            self, time_mock: Mock) -> None:
        time_mock.time.return_value = TEST_TIMESTAMP
        expected_report = ValidationReport(
            validation_result=ValidationResult.FAILED,
            validator_name=INPUT_DATA_VALIDATOR_NAME,
            message=
            f"File: {TEST_INPUT_FILE_PATH} failed validation. Error: The header row was empty.",
            details={
                "rows_processed_count": 0,
            },
        )

        validator = InputDataValidator(TEST_INPUT_FILE_PATH,
                                       TEST_CLOUD_PROVIDER, TEST_REGION)
        report = validator.validate()

        self.assertEqual(report, expected_report)
Exemplo n.º 18
0
    def test_run_validations_copy_failure(self) -> None:
        exception_message = "failed to copy"
        expected_report = ValidationReport(
            validation_result=ValidationResult.FAILED,
            validator_name=INPUT_DATA_VALIDATOR_NAME,
            message=
            f"File: {TEST_INPUT_FILE_PATH} failed validation. Error: Failed to download the input file. Please check the file path and its permission.\n\t{exception_message}",
            details={
                "rows_processed_count": 0,
            },
        )
        self.storage_service_mock.copy.side_effect = Exception(
            exception_message)

        validator = InputDataValidator(TEST_INPUT_FILE_PATH,
                                       TEST_CLOUD_PROVIDER, TEST_REGION)
        report = validator.validate()

        self.assertEqual(report, expected_report)
Exemplo n.º 19
0
    def test_a_validator_throws_exception(self) -> None:
        expected_report_thrown_by_validator = ValidationReport(
            validation_result=ValidationResult.SUCCESS,
            validator_name="TestExceptionValidator",
            message=
            "WARNING: TestExceptionValidator threw an unexpected error: test error message",
        )
        expected_aggregated_result = ValidationResult.SUCCESS
        expected_aggregated_report = (
            f"{TEST_SUCCESSFUL_REPORT_1}\n\n{expected_report_thrown_by_validator}"
        )

        (actual_result, actual_report) = run_validators([
            TestDummyValidator(TEST_SUCCESSFUL_REPORT_1),
            TestExceptionValidator(),
        ])

        self.assertEqual(expected_aggregated_result, actual_result)
        self.assertEqual(expected_aggregated_report, actual_report)
    def test_run_s3_validations_binary_not_exist(
            self, storage_service_mock: Mock) -> None:
        expected_report = ValidationReport(
            validation_result=ValidationResult.FAILED,
            validator_name=BINARY_FILE_VALIDATOR_NAME,
            message=
            f"You don't have permission to access some private computation software (Repo path: {DEFAULT_BINARY_REPOSITORY}, software version: {DEFAULT_BINARY_VERSION}). Please contact your representative at Meta",
            details={
                f"{DEFAULT_BINARY_REPOSITORY}package/1/latest/1":
                "binary does not exist"
            },
        )
        storage_service_mock.__init__(return_value=storage_service_mock)
        storage_service_mock.file_exists.side_effect = [False, True, True]

        validator = BinaryFileValidator(TEST_REGION, TEST_BINARY_INFOS)
        report = validator.validate()

        self.assertEqual(report, expected_report)
        self.assertEqual(storage_service_mock.file_exists.call_count,
                         len(TEST_BINARY_INFOS))
    def test_run_s3_validations_success(self,
                                        storage_service_mock: Mock) -> None:
        expected_report = ValidationReport(
            validation_result=ValidationResult.SUCCESS,
            validator_name=BINARY_FILE_VALIDATOR_NAME,
            message=
            f"Completed binary accessibility validation successfully (Repo path: {DEFAULT_BINARY_REPOSITORY}, software version: {DEFAULT_BINARY_VERSION}).",
        )
        storage_service_mock.__init__(return_value=storage_service_mock)
        storage_service_mock.file_exists.return_value = True

        validator = BinaryFileValidator(TEST_REGION, TEST_BINARY_INFOS)
        report = validator.validate()

        self.assertEqual(report, expected_report)
        self.assertEqual(storage_service_mock.file_exists.call_count,
                         len(TEST_BINARY_INFOS))
        storage_service_mock.file_exists.assert_has_calls([
            call(f"{DEFAULT_BINARY_REPOSITORY}package/1/latest/1"),
            call(f"{DEFAULT_BINARY_REPOSITORY}package/2/latest/2"),
            call(f"{DEFAULT_BINARY_REPOSITORY}package/3/latest/binary"),
        ])
Exemplo n.º 22
0
    def test_run_validations_errors_when_pa_pl_data_fields_not_found(
            self, time_mock: Mock) -> None:
        exception_message = f"Failed to parse the header row. The header row fields must have either: {PL_FIELDS} or: {PA_FIELDS}"
        time_mock.time.return_value = TEST_TIMESTAMP
        lines = [
            b"id_,header,row\n",
            b"1,2,3\n",
            b"4,5,6\n",
        ]
        self.write_lines_to_file(lines)
        expected_report = ValidationReport(
            validation_result=ValidationResult.FAILED,
            validator_name=INPUT_DATA_VALIDATOR_NAME,
            message=
            f"File: {TEST_INPUT_FILE_PATH} failed validation. Error: {exception_message}",
            details={
                "rows_processed_count": 0,
            },
        )

        validator = InputDataValidator(TEST_INPUT_FILE_PATH,
                                       TEST_CLOUD_PROVIDER, TEST_REGION)
        report = validator.validate()
        self.assertEqual(report, expected_report)
Exemplo n.º 23
0
        return self.dummy_report


class TestExceptionValidator(Validator):
    @property
    def name(self) -> str:
        return "TestExceptionValidator"

    def __validate__(self) -> ValidationReport:
        raise Exception("test error message")


TEST_SUCCESSFUL_REPORT_1 = ValidationReport(
    validation_result=ValidationResult.SUCCESS,
    validator_name="validator 1",
    message="message 1",
    details={
        "test_key_1": 5,
    },
)

TEST_SUCCESSFUL_REPORT_2 = ValidationReport(
    validation_result=ValidationResult.SUCCESS,
    validator_name="validator 2",
    message="message 2",
    details={
        "test_key_2": 5,
    },
)

TEST_FAILED_REPORT_1 = ValidationReport(
    validation_result=ValidationResult.FAILED,