def test_extract(): # check if it works through both types post = rp.extract(test_img) post = rp.extract("tests/test_resources/test_receipt.jpeg") # check type assert isinstance(post, tuple)
def test_extract_lines(): # test extraction from folder of images from_folder = rp.extract("tests/test_resources/test_receipt.jpeg") assert isinstance(from_folder, tuple) assert isinstance(from_folder[0], type(np.array([]))) assert len(from_folder[0].shape) == 3 img = cv2.imread("tests/test_resources/test_receipt.jpeg") from_cv2 = rp.extract(img) assert isinstance(from_cv2, tuple) assert isinstance(from_cv2[0], type(np.array([]))) assert len(from_cv2[0].shape) == 3
def test_ocr_item_pipe(): imgs = rp.extract("tests/test_resources/test_receipt.jpeg") items = rp.item_pipe(imgs) # check type assert isinstance(items, type(pd.DataFrame([1, 2, 3]))) assert len(imgs) == len(items) assert len(items.columns) == 2
def test_ocr_text_extract(): imgs = rp.extract("tests/test_resources/test_receipt.jpeg") result = rp.ocr.ocr_textM(imgs) assert len(imgs) == len(result) assert isinstance(result[0][0], str) assert isinstance(result[0][1], type(np.zeros(1))) assert all([isinstance(e, str) for e in rp.text._text(result)])
# sk_model & model_params can be passed see documentation ) # -------------------------------------------------------------------------------- # Example of extracting lines from an image # If no classifier path is provided a default classifier will be used # which is only trained on one store type and has loose ground truth # labels (see docs on how to provide a classifier from previous step) # NOTE: As with the ``make_training_data`` function above, you can and should # use the context dictionary for extraction. It would ideally be the same as the one # used to train the model for a given shop. So the padding and other parameters # are respected. # From a image path: imgs = rp.extract(input_image_as_str) # From Image imgs = rp.extract(input_img_loaded_with_io_load_validate) # From a path to a folder imgs = rp.extract_from_folder(folder_path) # I've added aditional functionality! # If you run the following command, you will get a tuple out (items, non-items). imgs, non_items = rp.extract(input_see_above, return_negatives=True) # Afterwards you can proceed to run the rest of the pipeline as normal on the 'imgs' # However please see further down how to use the `non_items` to get date and shop name # Please note! That if you specify `override_prediction = True` in the extract # function, the line classification will be avoided and one can see just what the # text line detection is finding in terms of lines.
# third party import cv2 # project from readpyne import extract, ocr, io lines = extract("tests/test_resources/test_receipt.jpeg") def test_ocr(): assert True