Exemplo n.º 1
0
def test_dictify_invalid_arg_max_multi_line():
    """
    Test to see if dictify raises the correct exception when an incorrect type for the max_multi_line arg is passed.
    """
    txt_man = TextManager()
    with pytest.raises(TypeError):
        txt_man.dictify('good so far...', {}, 100.0, ['...nevermind'])
Exemplo n.º 2
0
def test_dictify_default_id_num_found_same_line():
    """
    Test the case in which an ID number was found, on the same line as the ID number field name, by dictify and
    whether it is used to extract other information such as date of birth, status and sex.
    """
    txt_man = TextManager()
    in_str = (
        'Id no 7101135111011\n'
        'Surname\n'
        'Doe\n'
        'Names\n'
        'John-Michael\n'
        'Robert\n'
        'Nationality\n'
        'RSA\n'
        'Country of Birth\n'
        'RSA\n'
        'Status\n'
        'Citizen\n'
        'Sex\n'
        'M\n'
        'Date of Birth\n'
        '13 Jan 1971'
    )
    assert txt_man.dictify(in_str) == {
        'identity_number': '7101135111011',
        'surname': 'Doe',
        'names': 'John-Michael Robert',
        'sex': 'M',
        'date_of_birth': '1971-01-13',
        'country_of_birth': 'RSA',
        'status': 'Citizen',
        'nationality': 'RSA'
    }
Exemplo n.º 3
0
def test_dictify_multi_line_2():
    """
    Test the ability of the dictify function to retrieve field values over multiple lines.
    This case checks if a match to multi_line_end was found.
    """
    txt_man = TextManager()
    in_str = (
        'Identity Number\n'
        '7101135111011\n'
        'Surname\n'
        'Doe\n'
        'Names\n'
        'John-Michael\n'
        'Sex\n'
        'M'
    )
    assert txt_man.dictify(in_str) == {
        'identity_number': '7101135111011',
        'surname': 'Doe',
        'names': 'John-Michael',
        'sex': 'M',
        'date_of_birth': '1971-01-13',
        'country_of_birth': None,
        'status': 'Citizen',
        'nationality': None
    }
Exemplo n.º 4
0
def test_clean_up_invalid_arg_in_str():
    """
    Test that the clean up function raises the correct exception for an invalid in_str type.
    """
    txt_man = TextManager()
    with pytest.raises(TypeError):
        txt_man.clean_up(123)
Exemplo n.º 5
0
def test_dictify_invalid_arg_barcode_data():
    """
    Test to see if dictify raises the correct exception when an incorrect type for the barcode_data arg is passed.
    """
    txt_man = TextManager()
    with pytest.raises(TypeError):
        txt_man.dictify('seems legit', 'nope')
Exemplo n.º 6
0
def test_dictify_fuzzy_2():
    """
    Tests to see if dictify is capable of retrieving field values through reasonable or commonly required fuzzy
    matching to be performed.
    """
    txt_man = TextManager()
    in_str = (
        'Idenmy Number lll\n'
        '7101135111011\n'
        'Suriname 00ee\n'
        'Doe\n'
        'Names iii\n'
        'John-Michael\n'
        'Robert\n'
        'Seeex\n'
        'M\n'
        'Nahonallly\n'
        'RSA\n'
    )
    assert txt_man.dictify(in_str) == {
        'identity_number': '7101135111011',
        'surname': 'Doe',
        'names': 'John-Michael Robert',
        'sex': 'M',
        'date_of_birth': '1971-01-13',
        'country_of_birth': None,
        'status': 'Citizen',
        'nationality': 'RSA'
    }
Exemplo n.º 7
0
def test_dictify_invalid_arg_in_str():
    """
    Test to see if dictify raises the correct exception when an incorrect type for the in_str arg is passed.
    """
    txt_man = TextManager()
    with pytest.raises(TypeError):
        txt_man.dictify(['not legit'])
Exemplo n.º 8
0
def test_dictify_fuzzy_min_ratio():
    """
    Tests the dictify function fuzzy matching with a specified minimum ratio.
    """
    txt_man = TextManager()
    in_str = (
        'Idenmy Number lll\n'
        '7101135111011\n'
        'Suriname\n'
        'Doe\n'
        'Names\n'
        'John-Michael\n'
        'Robert\n'
        'Sex\n'
        'M\n'
        'Nahonallly\n'
        'RSA\n'
    )
    assert txt_man.dictify(in_str, fuzzy_min_ratio=90.00) == {
        'identity_number': None,
        'surname': 'Doe',
        'names': 'John-Michael Robert',
        'sex': 'M',
        'date_of_birth': None,
        'country_of_birth': None,
        'status': None,
        'nationality': None
    }
Exemplo n.º 9
0
def test_dictify_max_multi_line():
    """
    Test the ability of the dictify function to retrieve field values over multiple lines.
    This case checks if the correct number of multi_line was considered when specified.
    """
    txt_man = TextManager()
    in_str = (
        'Identity Number\n'
        '7101135111011\n'
        'Surname\n'
        'Doe\n'
        'Names\n'
        'John-Michael\n'
        'Robert\n'
        'Douglas\n'
        'Ignore'
        'Sex\n'
        'M'
    )
    assert txt_man.dictify(in_str, max_multi_line=3) == {
        'identity_number': '7101135111011',
        'surname': 'Doe',
        'names': 'John-Michael Robert Douglas',
        'sex': 'M',
        'date_of_birth': '1971-01-13',
        'country_of_birth': None,
        'status': 'Citizen',
        'nationality': None
    }
Exemplo n.º 10
0
def test_dictify_invalid_arg_min_fuzzy_ratio():
    """
    Test to see if dictify raises the correct exception when an incorrect type for the min_fuzzy_ratio arg is passed.
    """
    txt_man = TextManager()
    with pytest.raises(TypeError):
        txt_man.dictify('good so far...', {}, '...fail')
Exemplo n.º 11
0
def test_dictify_id_in_barcode():
    """
    Test the case in which an ID number was extracted from a barcode and passed to dictify and whether it is used to
    extract other information such as date of birth, status and sex.
    """
    txt_man = TextManager()
    in_str = (
        'Identity Number\n'
        '123456789\n'
        'Surname\n'
        'Doe\n'
        'Names\n'
        'Jane-Michael\n'
        'Robert'
    )
    assert txt_man.dictify(in_str, barcode_data={'identity_number': '7101134111111'}) == {
        'identity_number': '7101134111111',
        'surname': 'Doe',
        'names': 'Jane-Michael Robert',
        'sex': 'F',
        'date_of_birth': '1971-01-13',
        'country_of_birth': None,
        'status': 'Non Citizen',
        'nationality': None
    }
Exemplo n.º 12
0
def test_clean_up_invalid_arg_deplorables_3():
    """
    Test that the clean up function raises the correct exception for an invalid deplorables type.
    Particularly, checks to see if it is not a list of srings.
    """
    txt_man = TextManager()
    with pytest.raises(TypeError):
        txt_man.clean_up('', ['almost', 'but not quite', 3.3])
Exemplo n.º 13
0
def test_clean_up_invalid_arg_deplorables_1():
    """
    Test that the clean up function raises the correct exception for an invalid deplorables type.
    Particularly, checks to see if it is not a list.
    """
    txt_man = TextManager()
    with pytest.raises(TypeError):
        txt_man.clean_up('legit', 'not legit')
Exemplo n.º 14
0
def test_dictify_empty_in_str():
    """
    Test the case in which an empty string is passed to the dictify function.
    """
    txt_man = TextManager()
    assert txt_man.dictify('') == {
        'identity_number': None,
        'surname': None,
        'names': None,
        'sex': None,
        'date_of_birth': None,
        'country_of_birth': None,
        'status': None,
        'nationality': None
    }
Exemplo n.º 15
0
def test_dictify_bare():
    """
    Test the dictify function's behaviour when a field name is matched, but no field value follows and is at the end
    of the in_string.
    """
    txt_man = TextManager()
    in_str = (
        'Surname\n'
    )
    assert txt_man.dictify(in_str) == {
        'identity_number': None,
        'surname': None,
        'names': None,
        'sex': None,
        'date_of_birth': None,
        'country_of_birth': None,
        'status': None,
        'nationality': None
    }
Exemplo n.º 16
0
def test_dictify_multi_line_3():
    """
    Test the ability of the dictify function to retrieve field values over multiple lines.
    This case checks how a specified multi_line field value is dealt with if the value does not exist at the end of
    the in_string.
    """
    txt_man = TextManager()
    in_str = (
        'Names'
    )
    assert txt_man.dictify(in_str) == {
        'identity_number': None,
        'surname': None,
        'names': None,
        'sex': None,
        'date_of_birth': None,
        'country_of_birth': None,
        'status': None,
        'nationality': None
    }
Exemplo n.º 17
0
def test_clean_up_remove_multiple_spaces():
    """
    Test the removal of multiple spaces in the clean up function.
    """
    txt_man = TextManager()
    in_str = (
        'Identity Number\n'
        '123456789\n'
        'Surname\n'
        'Doe\n'
        'Names\n'
        'John     Michael   Robert'
    )
    assert txt_man.clean_up(in_str) == (
        'Identity Number\n'
        '123456789\n'
        'Surname\n'
        'Doe\n'
        'Names\n'
        'John Michael Robert'
    )
Exemplo n.º 18
0
def test_dictify_invalid_date_of_birth():
    """
    Test the dictify function's behaviour when an invalid date of birth is given for formatting.
    We expect it return the malformed 'date'.
    """
    txt_man = TextManager()
    in_str = (
        'date of birth\n'
        '123 Jin 1971\n'
        'country of birth\n'
        'RSA'
    )
    assert txt_man.dictify(in_str) == {
        'identity_number': None,
        'surname': None,
        'names': None,
        'sex': None,
        'date_of_birth': '123 Jin 1971',
        'country_of_birth': 'RSA',
        'status': None,
        'nationality': None
    }
Exemplo n.º 19
0
def test_dictify_default_id_num_not_found():
    """
    Test the case in which an ID number was not found by dictify.
    """
    txt_man = TextManager()
    in_str = (
        'Nothing to find here...\n'
        '7101135111011\n'
        'Not legit\n'
        'Ignore\n'
        'Surname\n'
        'Doe\n'
        'Names\n'
        'John-Michael\n'
        'Robert\n'
        'Nationality\n'
        'RSA\n'
        'Country of Birth\n'
        'RSA\n'
        'Skip\n'
        'Skip this too\n'
        'Status\n'
        'Hungry\n'
        'Sex\n'
        'M\n'
        'Date of Birth\n'
        '13 Jan 1971'
    )
    assert txt_man.dictify(in_str) == {
        'identity_number': None,
        'surname': 'Doe',
        'names': 'John-Michael Robert',
        'sex': 'M',
        'date_of_birth': '1971-01-13',
        'country_of_birth': 'RSA',
        'status': 'Hungry',
        'nationality': 'RSA'
    }
Exemplo n.º 20
0
def test_clean_up_unicode_support():
    """
    Test support for unicode characters in the cleanup function.
    """
    txt_man = TextManager()
    in_str = (
        'Identity Number\n'
        '123456789\n'
        'Surname\n'
        'Döe\n'
        'Names\n'
        'John-Micháel\n'
        'Robert'
    )
    assert txt_man.clean_up(in_str) == (
        'Identity Number\n'
        '123456789\n'
        'Surname\n'
        'Döe\n'
        'Names\n'
        'John-Micháel\n'
        'Robert'
    )
Exemplo n.º 21
0
def test_clean_up_remove_specified():
    """
    Test the clean up function's removal with an additional list of characters to remove.
    """
    txt_man = TextManager()
    in_str = (
        'Identity Number\n'
        '123456789\n'
        'Surname\n'
        'Döe\n'
        'Names\n'
        'John+Michael\n'
        'Robert'
    )
    assert txt_man.clean_up(in_str, ['+', 'ö']) == (
        'Identity Number\n'
        '123456789\n'
        'Surname\n'
        'De\n'
        'Names\n'
        'JohnMichael\n'
        'Robert'
    )
Exemplo n.º 22
0
def test_clean_up_remove_default():
    """
    Test the default clean up function's removal.
    """
    txt_man = TextManager()
    in_str = (
        'Identity #Number\n'
        '123456789...\n'
        '$Sur_name&\n'
        '\\/Doe.\n'
        'Names*\n'
        'John-Michae|l\n'
        'R%obert+'
    )
    assert txt_man.clean_up(in_str) == (
        'Identity Number\n'
        '123456789\n'
        'Surname\n'
        'Doe\n'
        'Names\n'
        'John-Michael\n'
        'Robert'
    )
Exemplo n.º 23
0
def test_dictify_default_skip_unnecessary():
    """
    Test the dictify function's ability to search for relevant (pre-specified) information.
    """
    txt_man = TextManager()
    in_str = (
        'Not legit\n'
        'Ignore\n'
        'Surname\n'
        'Doe\n'
        'Names\n'
        'John-Michael\n'
        'Robert\n'
        'Nationality\n'
        'RSA\n'
        'Country of Birth\n'
        'RSA\n'
        'Skip\n'
        'Skip this too\n'
        'Status\n'
        'Citizen\n'
        'Sex\n'
        'M\n'
        'Date of Birth\n'
        '13 Jan 1971'
    )
    assert txt_man.dictify(in_str) == {
        'identity_number': None,
        'surname': 'Doe',
        'names': 'John-Michael Robert',
        'sex': 'M',
        'date_of_birth': '1971-01-13',
        'country_of_birth': 'RSA',
        'status': 'Citizen',
        'nationality': 'RSA'
    }
Exemplo n.º 24
0
def test_clean_up_remove_specified_sanitise():
    """
    Test the clean up function's removal with an additional list of characters to remove, but tests to see if certain
    control characters used within the underlying regex, such as ], [, ^ and -, are escaped.
    """
    txt_man = TextManager()
    in_str = (
        'Identity Number\n'
        '123456789\n'
        'Surname\n'
        'Doe[^-]\n'
        'Names\n'
        'John-Michael\n'
        'Robert'
    )
    assert txt_man.clean_up(in_str, [']', '[', '^', '-']) == (
        'Identity Number\n'
        '123456789\n'
        'Surname\n'
        'Doe\n'
        'Names\n'
        'JohnMichael\n'
        'Robert'
    )
Exemplo n.º 25
0
def test_dictify_bare_multi_line_4():
    """
    Test the ability of the dictify function to retrieve field values over multiple lines.
    This case checks how a specified multi_line field value is dealt with if the value exists, but is at the end of
    the in_string.
    """
    txt_man = TextManager()
    in_str = (
        'Surname\n'
        'Doe\n'
        'Names\n'
        'John\n'
        'Robert'
    )
    assert txt_man.dictify(in_str, max_multi_line=4) == {
        'identity_number': None,
        'surname': 'Doe',
        'names': 'John Robert',
        'sex': None,
        'date_of_birth': None,
        'country_of_birth': None,
        'status': None,
        'nationality': None
    }
Exemplo n.º 26
0
    def extract(self, img):
        """
        This function is a sample that demonstrates how text would be extracted
        Author(s):
            Nicolai van Niekerk
        Args:
            img: The image of the ID that contains the text to be extracted
        Returns:
            id_details: JSON obj (The extracted information)
        """
        if 'remove_face' in self.preferences:
            self.remove_face = self.preferences['remove_face'] == 'true'
        logger.debug('self.remove_face: ' + str(self.remove_face))

        simplification_manager = SimplificationManager()
        barcode_manager = BarCodeManager()
        data = {}

        # Perform perspective transformation and read from barcode.
        logger.info('Performing perspective transformation...')
        image = simplification_manager.perspectiveTransformation(img)
        cv2.imwrite(DESKTOP + "/output/3.png", image)
        barcode_data_found, barcode_scan_data, barcoded_image = barcode_manager.get_barcode_info(
            image)
        if barcode_data_found:
            logger.info('Barcode successfully scanned')
            data = {
                'identity_number': barcode_scan_data.decode('utf-8'),
            }

        # Process image
        if 'id_type' in self.preferences:
            identification_type = self.preferences['id_type']
            logger.info("No template matching required")
            logger.info("Identification type: " + identification_type)
        else:
            template_match = TemplateMatching()
            logger.info('Performing template matching...')
            identification_type = template_match.identify(barcoded_image)

        logger.info('Constructing text extraction pipeline')
        pipeline = BuildDirector.construct_text_extract_pipeline(
            self.preferences, identification_type)
        image = pipeline.process_text_extraction(barcoded_image,
                                                 self.remove_face)

        # Extract and return text
        filename = "{}.png".format(os.getpid())
        cv2.imwrite(filename, image)

        text = pytesseract.image_to_string(Image.open(filename))
        os.remove(filename)

        text_manager = TextManager()
        # Log the uncleaned string to terminal.
        # This is for demonstration purposes.
        logger.debug('-' * 50)
        logger.debug('String to clean:')
        logger.debug('-' * 50)
        [logger.debug(log_line) for log_line in text.split('\n')]
        logger.debug('-' * 50)
        logger.info('Cleaning up text...')
        # Clean the OCR output text.
        clean_text = text_manager.clean_up(text)
        # Log the cleaned string to terminal.
        # This is for demonstration purposes.
        logger.debug('-' * 50)
        logger.debug('Cleaned text:')
        logger.debug('-' * 50)
        [logger.debug(log_line) for log_line in clean_text.split('\n')]
        logger.debug('-' * 50)
        # Cater for UP student/staff cards.
        if identification_type == 'studentcard':
            return {
                'up_card':
                True,  # Used to be able to reliably check if a response is a UP card from client-side.
                'text_dump': clean_text,  # Dump extracted and cleaned text.
                'barcode_dump': data['identity_number']
                if data else None  # Dump the barcode data.
            }
        # Dictify cleaned text.
        logger.info('Placing extracted text in a dictionary...')
        id_details = text_manager.dictify(clean_text, data)
        # Log the dictified extracted text to terminal.
        # This is for demonstration purposes.
        logger.debug('-' * 50)
        logger.debug('Extracted ID details:')
        logger.debug('-' * 50)
        [
            logger.debug(id_details_line) for id_details_line in
            prettify_json_message(id_details).split('\n')
        ]
        logger.debug('-' * 50)
        # Return the extracted ID information.
        return id_details
Exemplo n.º 27
0
def test_clean_up_empty_in_str():
    """
    Test the case in which an empty string is passed to the cleanup function.
    """
    txt_man = TextManager()
    assert txt_man.clean_up('') == ''