def test_ocrpredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pages): det_bsize = 4 det_predictor = DetectionPredictor( PreProcessor(output_size=(512, 512), batch_size=det_bsize), detection.db_mobilenet_v3_large( pretrained=False, pretrained_backbone=False, assume_straight_pages=assume_straight_pages, ), ) assert not det_predictor.model.training reco_bsize = 32 reco_predictor = RecognitionPredictor( PreProcessor(output_size=(32, 128), batch_size=reco_bsize, preserve_aspect_ratio=True), recognition.crnn_vgg16_bn(pretrained=False, pretrained_backbone=False, vocab=mock_vocab), ) assert not reco_predictor.model.training doc = DocumentFile.from_pdf(mock_pdf) predictor = OCRPredictor( det_predictor, reco_predictor, assume_straight_pages=assume_straight_pages, straighten_pages=straighten_pages, detect_orientation=True, detect_language=True, ) if assume_straight_pages: assert predictor.crop_orientation_predictor is None else: assert isinstance(predictor.crop_orientation_predictor, nn.Module) out = predictor(doc) assert isinstance(out, Document) assert len(out.pages) == 2 # Dimension check with pytest.raises(ValueError): input_page = (255 * np.random.rand(1, 256, 512, 3)).astype(np.uint8) _ = predictor([input_page]) orientation = 0 assert out.pages[0].orientation["value"] == orientation
def test_preprocessor(batch_size, output_size, input_tensor, expected_batches, expected_value): processor = PreProcessor(output_size, batch_size) # Invalid input type with pytest.raises(TypeError): processor(42) # 4D check with pytest.raises(AssertionError): processor(np.full((256, 128, 3), 255, dtype=np.uint8)) with pytest.raises(TypeError): processor(np.full((1, 256, 128, 3), 255, dtype=np.int32)) # 3D check with pytest.raises(AssertionError): processor([np.full((3, 256, 128, 3), 255, dtype=np.uint8)]) with pytest.raises(TypeError): processor([np.full((256, 128, 3), 255, dtype=np.int32)]) out = processor(input_tensor) assert isinstance(out, list) and len(out) == expected_batches assert all(isinstance(b, tf.Tensor) for b in out) assert all(b.dtype == tf.float32 for b in out) assert all(b.shape[1:3] == output_size for b in out) assert all(tf.math.reduce_all(b == expected_value) for b in out) assert len(repr(processor).split("\n")) == 4
def test_recognitionpredictor(mock_pdf, mock_vocab): # noqa: F811 batch_size = 4 predictor = RecognitionPredictor( PreProcessor(output_size=(32, 128), batch_size=batch_size, preserve_aspect_ratio=True), recognition.crnn_vgg16_bn(vocab=mock_vocab, input_shape=(32, 128, 3)), ) pages = DocumentFile.from_pdf(mock_pdf).as_images() # Create bounding boxes boxes = np.array([[0.5, 0.5, 0.75, 0.75], [0.5, 0.5, 1.0, 1.0]], dtype=np.float32) crops = extract_crops(pages[0], boxes) out = predictor(crops) # One prediction per crop assert len(out) == boxes.shape[0] assert all( isinstance(val, str) and isinstance(conf, float) for val, conf in out) # Dimension check with pytest.raises(ValueError): input_crop = (255 * np.random.rand(1, 128, 64, 3)).astype(np.uint8) _ = predictor([input_crop]) return predictor
def test_ocrpredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pages): det_bsize = 4 det_predictor = DetectionPredictor( PreProcessor(output_size=(512, 512), batch_size=det_bsize), detection.db_mobilenet_v3_large( pretrained=True, pretrained_backbone=False, input_shape=(512, 512, 3), assume_straight_pages=assume_straight_pages, )) reco_bsize = 16 reco_predictor = RecognitionPredictor( PreProcessor(output_size=(32, 128), batch_size=reco_bsize, preserve_aspect_ratio=True), recognition.crnn_vgg16_bn(pretrained=False, pretrained_backbone=False, vocab=mock_vocab)) doc = DocumentFile.from_pdf(mock_pdf).as_images() predictor = OCRPredictor( det_predictor, reco_predictor, assume_straight_pages=assume_straight_pages, straighten_pages=straighten_pages, ) if assume_straight_pages: assert predictor.crop_orientation_predictor is None else: assert isinstance(predictor.crop_orientation_predictor, NestedObject) out = predictor(doc) assert isinstance(out, Document) assert len(out.pages) == 2 # Dimension check with pytest.raises(ValueError): input_page = (255 * np.random.rand(1, 256, 512, 3)).astype(np.uint8) _ = predictor([input_page])
def _predictor(arch: str, pretrained: bool, **kwargs: Any) -> RecognitionPredictor: if arch not in ARCHS: raise ValueError(f"unknown architecture '{arch}'") _model = recognition.__dict__[arch](pretrained=pretrained) kwargs['mean'] = kwargs.get('mean', _model.cfg['mean']) kwargs['std'] = kwargs.get('std', _model.cfg['std']) kwargs['batch_size'] = kwargs.get('batch_size', 32) input_shape = _model.cfg['input_shape'][:2] if is_tf_available( ) else _model.cfg['input_shape'][-2:] predictor = RecognitionPredictor( PreProcessor(input_shape, preserve_aspect_ratio=True, **kwargs), _model) return predictor
def test_detectionpredictor(mock_pdf): # noqa: F811 batch_size = 4 predictor = DetectionPredictor( PreProcessor(output_size=(512, 512), batch_size=batch_size), detection.db_resnet50(input_shape=(512, 512, 3))) pages = DocumentFile.from_pdf(mock_pdf).as_images() out = predictor(pages) # The input PDF has 2 pages assert len(out) == 2 # Dimension check with pytest.raises(ValueError): input_page = (255 * np.random.rand(1, 256, 512, 3)).astype(np.uint8) _ = predictor([input_page]) return predictor
def _predictor(arch: Any, pretrained: bool, **kwargs: Any) -> RecognitionPredictor: if isinstance(arch, str): if arch not in ARCHS: raise ValueError(f"unknown architecture '{arch}'") _model = recognition.__dict__[arch](pretrained=pretrained) else: if not isinstance( arch, (recognition.CRNN, recognition.SAR, recognition.MASTER)): raise ValueError(f"unknown architecture: {type(arch)}") _model = arch kwargs["mean"] = kwargs.get("mean", _model.cfg["mean"]) kwargs["std"] = kwargs.get("std", _model.cfg["std"]) kwargs["batch_size"] = kwargs.get("batch_size", 32) input_shape = _model.cfg["input_shape"][:2] if is_tf_available( ) else _model.cfg["input_shape"][-2:] predictor = RecognitionPredictor( PreProcessor(input_shape, preserve_aspect_ratio=True, **kwargs), _model) return predictor