def test_given_anonymize_called_with_valid_request_then_expected_valid_response_returned( ): request_body = """ { "text": "hello world, my name is Jane Doe. My number is: 034453334", "anonymizers": { "DEFAULT": { "type": "replace", "new_value": "ANONYMIZED" }, "PHONE_NUMBER": { "type": "mask", "masking_char": "*", "chars_to_mask": 4, "from_end": true } }, "analyzer_results": [ { "start": 24, "end": 32, "score": 0.8, "entity_type": "NAME" }, { "start": 24, "end": 28, "score": 0.8, "entity_type": "FIRST_NAME" }, { "start": 29, "end": 32, "score": 0.6, "entity_type": "LAST_NAME" }, { "start": 48, "end": 57, "score": 0.95, "entity_type": "PHONE_NUMBER" } ] } """ response_status, response_content = anonymize(request_body) expected_response = ( """{"text": "hello world, my name is ANONYMIZED. My number is: 03445****", "items": [{"anonymizer": "mask", "entity_type": "PHONE_NUMBER", "start": 50, "end": 59, "anonymized_text": "03445****"}, {"anonymizer": "replace", "entity_type": "NAME", "start": 24, "end": 34, "anonymized_text": "ANONYMIZED"}]}""" ) assert response_status == 200 assert equal_json_strings(expected_response, response_content)
def test_given_decision_process_enabled_for_analyze_input_then_return_response_with_decision_process( ): request_body = """ { "text": "John Smith drivers license is AC432223", "language": "en", "return_decision_process": true } """ response_status, response_content = analyze(request_body) expected_response = """ [ {"entity_type": "PERSON", "start": 0, "end": 10, "score": 0.85, "analysis_explanation": { "recognizer": "SpacyRecognizer", "pattern_name": null, "pattern": null, "original_score": 0.85, "score": 0.85, "textual_explanation": "Identified as PERSON by Spacy's Named Entity Recognition", "score_context_improvement": 0, "supportive_context_word": "", "validation_result": null } }, {"entity_type": "US_DRIVER_LICENSE", "start": 30, "end": 38, "score": 0.6499999999999999, "analysis_explanation": { "recognizer": "UsLicenseRecognizer", "pattern_name": "Driver License - Alphanumeric (weak)", "pattern": "\\\\b([A-Z][0-9]{3,6}|[A-Z][0-9]{5,9}|[A-Z][0-9]{6,8}|[A-Z][0-9]{4,8}|[A-Z][0-9]{9,11}|[A-Z]{1,2}[0-9]{5,6}|H[0-9]{8}|V[0-9]{6}|X[0-9]{8}|A-Z]{2}[0-9]{2,5}|[A-Z]{2}[0-9]{3,7}|[0-9]{2}[A-Z]{3}[0-9]{5,6}|[A-Z][0-9]{13,14}|[A-Z][0-9]{18}|[A-Z][0-9]{6}R|[A-Z][0-9]{9}|[A-Z][0-9]{1,12}|[0-9]{9}[A-Z]|[A-Z]{2}[0-9]{6}[A-Z]|[0-9]{8}[A-Z]{2}|[0-9]{3}[A-Z]{2}[0-9]{4}|[A-Z][0-9][A-Z][0-9][A-Z]|[0-9]{7,8}[A-Z])\\\\b", "original_score": 0.3, "score": 0.6499999999999999, "textual_explanation": null, "score_context_improvement": 0.3499999999999999, "supportive_context_word": "driver", "validation_result": null } } ] """ assert response_status == 200 assert equal_json_strings(expected_response, response_content)
def test_given_decrypt_called_with_invalid_key_then_invalid_input_response_returned( ): text = "e6HnOMnIxbd4a8Qea44LshQDnjvxwzBIaAz + YqHNnMW2mC5r3AWoay8Spsoajyyy" request_body = { "text": text, "deanonymizers": { "NUMBER": { "type": "decrypt", "key": "invalidkey" } }, "anonymizer_results": [{ "start": 0, "end": len(text), "entity_type": "NUMBER" }], } response_status, response_content = deanonymize(json.dumps(request_body)) expected_response = """ { "error": "Invalid input, key must be of length 128, 192 or 256 bits" } """ assert response_status == 422 assert equal_json_strings(expected_response, response_content)
def test_given_decrypt_called_with_encrypted_text_then_decrypted_text_returned( ): text = "e6HnOMnIxbd4a8Qea44LshQDnjvxwzBIaAz+YqHNnMW2mC5r3AWoay8Spsoajyyy" request_body = { "text": text, "deanonymizers": { "NUMBER": { "type": "decrypt", "key": "1111111111111111" } }, "anonymizer_results": [{ "start": 0, "end": len(text), "entity_type": "NUMBER" }], } response_status, response_content = deanonymize(json.dumps(request_body)) expected_response = """{"text": "text_for_encryption", "items": [{"start": 0, "end": 19, "operator":"decrypt", "text": "text_for_encryption","entity_type":"NUMBER"}]}""" assert response_status == 200 assert equal_json_strings(expected_response, response_content)
def test_given_ad_hoc_deny_list_recognizer_the_right_entities_are_returned(): request_body = r""" { "text": "Mr. John Smith's drivers license is AC432223", "language": "en", "ad_hoc_recognizers":[ { "name": "Mr. Recognizer", "supported_language": "en", "deny_list": ["Mr", "Mr.", "Mister"], "supported_entity":"MR_TITLE" }, { "name": "Ms. Recognizer", "supported_language": "en", "deny_list": ["Ms", "Ms.", "Miss", "Mrs", "Mrs."], "supported_entity":"MS_TITLE" } ] } """ response_status, response_content = analyze(request_body) expected_response = """ [ {"entity_type": "PERSON", "start": 4, "end": 14, "score": 0.85, "analysis_explanation":null}, {"entity_type": "US_DRIVER_LICENSE", "start": 36, "end": 44, "score": 0.6499999999999999, "analysis_explanation":null}, {"entity_type": "MR_TITLE", "start": 0, "end": 3, "score": 1.0, "analysis_explanation":null} ] """ assert response_status == 200 assert equal_json_strings(expected_response, response_content)
def test_given_ad_hoc_pattern_recognizer_context_raises_confidence(): request_body = r""" { "text": "John Smith drivers license is AC432223. Zip code: 10023", "language": "en", "ad_hoc_recognizers":[ { "name": "Zip code Recognizer", "supported_language": "en", "patterns": [ { "name": "zip code (weak)", "regex": "(\\b\\d{5}(?:\\-\\d{4})?\\b)", "score": 0.01 } ], "context": ["zip", "code"], "supported_entity":"ZIP" } ] } """ response_status, response_content = analyze(request_body) expected_response = """ [ {"entity_type": "PERSON", "start": 0, "end": 10, "score": 0.85, "analysis_explanation":null}, {"entity_type": "US_DRIVER_LICENSE", "start": 30, "end": 38, "score": 0.6499999999999999, "analysis_explanation":null}, {"entity_type": "ZIP", "start": 50, "end": 55, "score": 0.4, "analysis_explanation":null} ] """ assert response_status == 200 assert equal_json_strings(expected_response, response_content)
def test_given_wrong_ad_hoc_json_exception_is_given(): malformed_request_body = r""" { "text": "John Smith drivers license is AC432223. Zip code: 10023", "language": "en", "ad_hoc_recognizers":[ { "name": "Zip code Recognizer", "supported_language": "en", "patterns": [ { "type": "zip code (weak)", "bebex": "(\\b\\d{5}(?:\\-\\d{4})?\\b)", "confidence": 0.01 } ], "supported_entity":"ZIP" } ] } """ response_status, response_content = analyze(malformed_request_body) expected_response = """ { "error":"Failed to parse /analyze request for AnalyzerEngine.analyze(). __init__() got an unexpected keyword argument \'type\'" } """ assert equal_json_strings(expected_response, response_content) assert response_status == 400
def test_given_no_image_then_we_fail(): # black redact expected_response = """ {"error": "Invalid parameter, please add image data"} """ response = redact("") assert response.status_code == 422 assert equal_json_strings(response.content.decode(), expected_response)
def test_given_anonymizers_called_then_expected_anonymizers_list_returned(): response_status, response_content = anonymizers() expected_response = """ ["hash", "mask", "redact", "replace", "encrypt", "custom"] """ assert response_status == 200 assert equal_json_strings(expected_response, response_content)
def test_given_no_analyze_text_input_then_return_error(): request_body = "{}" response_status, response_content = analyze(request_body) expected_response = """ {"error": "No text provided"} """ assert response_status == 500 assert equal_json_strings(expected_response, response_content)
def test_given_a_unsupported_language_for_supported_entities_then_expect_an_error( ): language_query_parameter = "language=he" response_status, response_content = analyzer_supported_entities( language_query_parameter) expected_response = """ {"error": "No matching recognizers were found to serve the request."} """ assert response_status == 500 assert equal_json_strings(expected_response, response_content)
def test_given_decrypt_called_with_missing_key_then_invalid_input_response_returned( ): request_body = """ { "text": "e6HnOMnIxbd4a8Qea44LshQDnjvxwzBIaAz+YqHNnMW2mC5r3AWoay8Spsoajyyy" } """ response_status, response_content = deanonymize(request_body) expected_response = """{"text": "e6HnOMnIxbd4a8Qea44LshQDnjvxwzBIaAz+YqHNnMW2mC5r3AWoay8Spsoajyyy", "items": []}""" assert response_status == 200 assert equal_json_strings(expected_response, response_content)
def test_given_a_incorrect_analyze_language_input_then_return_error(): request_body = """ { "text": "John Smith drivers license is AC432223", "language": "zz" } """ response_status, response_content = analyze(request_body) assert response_status == 500 expected_response = """ {"error": "No matching recognizers were found to serve the request."} """ assert equal_json_strings(expected_response, response_content)
def test_given_analyze_text_no_language_input_then_return_error(): request_body = """ { "text": "John Smith drivers license is AC432223" } """ response_status, response_content = analyze(request_body) expected_response = """ {"error": "No language provided"} """ assert response_status == 500 assert equal_json_strings(expected_response, response_content)
def test_given_an_illegal_input_for_supported_entities_then_igonre_and_proceed( ): language_query_parameter = "uknown=input" response_status, response_content = analyzer_supported_entities( language_query_parameter) expected_response = """ ["PHONE_NUMBER", "US_DRIVER_LICENSE", "US_PASSPORT", "SG_NRIC_FIN", "LOCATION", "CREDIT_CARD", "CRYPTO", "UK_NHS", "US_SSN", "US_BANK_NUMBER", "EMAIL_ADDRESS", "DATE_TIME", "IP_ADDRESS", "PERSON", "IBAN_CODE", "NRP", "US_ITIN", "DOMAIN_NAME"] """ assert response_status == 200 assert equal_json_strings(expected_response, response_content)
def test_given_decrypt_called_with_missing_payload_then_bad_request_response_returned( ): request_body = """ { } """ response_status, response_content = deanonymize(request_body) expected_response = """ { "error": "Invalid request json" } """ assert response_status == 400 assert equal_json_strings(expected_response, response_content)
def test_given_anonymize_called_with_custom_then_bad_request_error_returned(): request_body = """ { "text": "The user has the following two emails: [email protected] and [email protected]", "anonymizers": { "DEFAULT": { "type": "custom", "new_value": "lambda x: x[::-1]" } }, "analyzer_results": [ { "start": 39, "end": 55, "score": 1.0, "entity_type": "EMAIL_ADDRESS" }, { "start": 60, "end": 76, "score": 1.0, "entity_type": "EMAIL_ADDRESS" } ] } """ response_status, response_content = anonymize(request_body) expected_response = '{"error": "Custom type anonymizer is not supported"}' assert response_status == 400 assert equal_json_strings(expected_response, response_content)
def test_given_decrypt_called_with_missing_text_then_invalid_input_response_returned( ): request_body = """ { "key": "1111111111111111" } """ response_status, response_content = deanonymize(request_body) expected_response = """ { "error": "Invalid input, text can not be empty" } """ assert response_status == 422 assert equal_json_strings(expected_response, response_content)
def test_given_a_correct_analyze_input_then_return_full_response(): request_body = """ { "text": "John Smith drivers license is AC432223", "language": "en" } """ response_status, response_content = analyze(request_body) expected_response = """ [ {"entity_type": "PERSON", "start": 0, "end": 10, "score": 0.85, "analysis_explanation":null}, {"entity_type": "US_DRIVER_LICENSE", "start": 30, "end": 38, "score": 0.6499999999999999, "analysis_explanation":null} ] """ assert response_status == 200 assert equal_json_strings(expected_response, response_content)
def test_given_anonymize_called_with_deformed_body_then_internal_server_error_returned( ): request_body = """ { "text": "hello world, my name is Jane Doe. My number is: 034453334", "anonymizers": { "DEFAULT": {"type": "replace", "new_value": "ANONYMIZED"}, }, "analyzer_results": [ {"start": 24, "end": 32, "score": 0.8, "entity_type": "NAME"}, {"start": 24, "end": 28, "score": 0.8, "entity_type": "FIRST_NAME"}, ] } """ response_status, response_content = anonymize(request_body) expected_response = '{"error": "Internal server error"}' assert response_status == 500 assert equal_json_strings(expected_response, response_content)
def test_given_anonymize_called_with_empty_analyzer_results_then_unchanged_text_is_returned( ): request_body = """ { "text": "hello world! nice to meet you!", "anonymizers": { "DEFAULT": { "type": "replace", "new_value": "ANONYMIZED" }, "PHONE_NUMBER": { "type": "mask", "masking_char": "*", "chars_to_mask": 4, "from_end": true } }, "analyzer_results": [ ] } """ response_status, response_content = anonymize(request_body) expected_response = """{"text": "hello world! nice to meet you!", "items": []}""" assert response_status == 200 assert equal_json_strings(expected_response, response_content)
def test_given_anonymize_called_with_empty_text_then_invalid_input_message_returned( ): request_body = """ { "text": "", "anonymizers": { "DEFAULT": { "type": "replace", "new_value": "ANONYMIZED" } }, "analyzer_results": [ { "start": 24, "end": 32, "score": 0.8, "entity_type": "NAME" } ] } """ response_status, response_content = anonymize(request_body) expected_response = '{"error": "Invalid input, text can not be empty"}' assert response_status == 422 assert equal_json_strings(expected_response, response_content)
def test_given_decrypt_called_with_missing_text_then_invalid_input_response_returned( ): request_body = """ { "key": "1111111111111111" } """ response_status, response_content = decrypt(request_body) expected_response = """ { "error": "Expected parameter text" } """ assert response_status == 422 assert equal_json_strings(expected_response, response_content)
def test_given_analyze_threshold_input_then_return_result_above_threshold(): request_body = """ { "text": "John Smith drivers license is AC432223", "language": "en", "score_threshold": 0.7 } """ response_status, response_content = analyze(request_body) expected_response = """ [ {"entity_type": "PERSON", "start": 0, "end": 10, "score": 0.85, "analysis_explanation": null } ] """ assert response_status == 200 assert equal_json_strings(expected_response, response_content)
def test_given_decrypt_called_with_invalid_key_then_invalid_input_response_returned( ): request_body = """ { "key": "invalidkey", "text": "e6HnOMnIxbd4a8Qea44LshQDnjvxwzBIaAz+YqHNnMW2mC5r3AWoay8Spsoajyyy" } """ response_status, response_content = decrypt(request_body) expected_response = """ { "error": "Invalid input, key must be of length 128, 192 or 256 bits" } """ assert response_status == 422 assert equal_json_strings(expected_response, response_content)
def test_given_decrypt_called_with_encrypted_text_then_decrypted_text_returned( ): request_body = """ { "key": "1111111111111111", "text": "e6HnOMnIxbd4a8Qea44LshQDnjvxwzBIaAz+YqHNnMW2mC5r3AWoay8Spsoajyyy" } """ response_status, response_content = decrypt(request_body) expected_response = """ { "result": "text_for_encryption" } """ assert response_status == 200 assert equal_json_strings(expected_response, response_content)
def anonymize_and_assert(anonymizer_request, expected_response): response_status, response_content = anonymize(json.dumps(anonymizer_request)) assert response_status == 200 assert equal_json_strings(expected_response, response_content)
def analyze_and_assert(analyzer_request, expected_response): response_status, response_content = analyze(json.dumps(analyzer_request)) assert response_status == 200 assert equal_json_strings(expected_response, response_content) analyzer_data = json.loads(response_content) return analyzer_data