Exemplo n.º 1
0

def document_version_ocr_submit(self):
    task_do_ocr.apply_async(args=[self.pk], queue='ocr')


@receiver(post_version_upload, dispatch_uid='post_version_upload_ocr', sender=DocumentVersion)
def post_version_upload_ocr(sender, instance, **kwargs):
    logger.debug('received post_version_upload')
    logger.debug('instance pk: %s', instance.pk)
    if instance.document.document_type.ocr:
        instance.submit_for_ocr()


Document.add_to_class('submit_for_ocr', document_ocr_submit)
DocumentVersion.add_to_class('submit_for_ocr', document_version_ocr_submit)

class_permissions(Document, [PERMISSION_OCR_DOCUMENT])

register_tool(link_entry_list)

APIEndPoint('ocr')

register_model_list_columns(DocumentVersionOCRError, [
    {
        'name': _('Document'), 'attribute': encapsulate(lambda entry: document_link(entry.document_version.document))
    },
    {
        'name': _('Added'), 'attribute': 'datetime_submitted'
    },
    {
Exemplo n.º 2
0
    def ready(self):
        super(OCRApp, self).ready()

        APIEndPoint(app=self, version_string='1')

        Document.add_to_class('submit_for_ocr', document_ocr_submit)
        DocumentVersion.add_to_class('submit_for_ocr',
                                     document_version_ocr_submit)

        ModelPermission.register(model=Document,
                                 permissions=(permission_ocr_document,
                                              permission_ocr_content_view))

        SourceColumn(source=DocumentVersionOCRError,
                     label=_('Document'),
                     func=lambda context: document_link(context[
                         'object'].document_version.document))
        SourceColumn(source=DocumentVersionOCRError,
                     label=_('Added'),
                     attribute='datetime_submitted')
        SourceColumn(source=DocumentVersionOCRError,
                     label=_('Result'),
                     attribute='result')

        app.conf.CELERY_QUEUES.append(
            Queue('ocr', Exchange('ocr'), routing_key='ocr'), )

        app.conf.CELERY_ROUTES.update({
            'ocr.tasks.task_do_ocr': {
                'queue': 'ocr'
            },
        })

        document_search.add_model_field(
            field='versions__pages__ocr_content__content', label=_('Content'))

        menu_facet.bind_links(links=(link_document_content, ),
                              sources=(Document, ))
        menu_multi_item.bind_links(links=(link_document_submit_multiple, ),
                                   sources=(Document, ))
        menu_object.bind_links(links=(link_document_submit, ),
                               sources=(Document, ))
        menu_object.bind_links(links=(link_document_type_ocr_settings, ),
                               sources=(DocumentType, ))
        menu_secondary.bind_links(
            links=(link_entry_list, ),
            sources=('ocr:entry_list', 'ocr:entry_delete_multiple',
                     'ocr:entry_re_queue_multiple', DocumentVersionOCRError))
        menu_tools.bind_links(links=(link_document_submit_all,
                                     link_document_type_submit,
                                     link_entry_list))

        post_save.connect(initialize_new_ocr_settings,
                          dispatch_uid='initialize_new_ocr_settings',
                          sender=DocumentType)
        post_version_upload.connect(post_version_upload_ocr,
                                    dispatch_uid='post_version_upload_ocr',
                                    sender=DocumentVersion)

        namespace = PropertyNamespace('ocr', _('OCR'))

        try:
            pdftotext = sh.Command(setting_pdftotext_path.value)
        except sh.CommandNotFound:
            namespace.add_property('pdftotext',
                                   _('pdftotext version'),
                                   _('not found'),
                                   report=True)
        except Exception:
            namespace.add_property('pdftotext',
                                   _('pdftotext version'),
                                   _('error getting version'),
                                   report=True)
        else:
            namespace.add_property('pdftotext',
                                   _('pdftotext version'),
                                   pdftotext('-v').stderr,
                                   report=True)

        try:
            tesseract = sh.Command(setting_tesseract_path.value)
        except sh.CommandNotFound:
            namespace.add_property('tesseract',
                                   _('tesseract version'),
                                   _('not found'),
                                   report=True)
        except Exception:
            namespace.add_property('tesseract',
                                   _('tesseract version'),
                                   _('error getting version'),
                                   report=True)
        else:
            namespace.add_property('tesseract',
                                   _('tesseract version'),
                                   tesseract('-v').stderr,
                                   report=True)
Exemplo n.º 3
0
    def ready(self):
        super(OCRApp, self).ready()

        APIEndPoint(app=self, version_string='1')

        Document.add_to_class('submit_for_ocr', document_ocr_submit)
        DocumentVersion.add_to_class(
            'submit_for_ocr', document_version_ocr_submit
        )

        ModelPermission.register(
            model=Document, permissions=(
                permission_ocr_document, permission_ocr_content_view
            )
        )

        SourceColumn(
            source=DocumentVersionOCRError, label=_('Document'),
            func=lambda context: document_link(context['object'].document_version.document)
        )
        SourceColumn(
            source=DocumentVersionOCRError, label=_('Added'),
            attribute='datetime_submitted'
        )
        SourceColumn(
            source=DocumentVersionOCRError, label=_('Result'),
            attribute='result'
        )

        app.conf.CELERY_QUEUES.append(
            Queue('ocr', Exchange('ocr'), routing_key='ocr'),
        )

        app.conf.CELERY_ROUTES.update(
            {
                'ocr.tasks.task_do_ocr': {
                    'queue': 'ocr'
                },
            }
        )

        document_search.add_model_field(
            field='versions__pages__ocr_content__content', label=_('Content')
        )

        menu_facet.bind_links(
            links=(link_document_content,), sources=(Document,)
        )
        menu_multi_item.bind_links(
            links=(link_document_submit_multiple,), sources=(Document,)
        )
        menu_object.bind_links(
            links=(link_document_submit,), sources=(Document,)
        )
        menu_object.bind_links(
            links=(link_document_type_ocr_settings,), sources=(DocumentType,)
        )
        menu_secondary.bind_links(
            links=(link_entry_list,),
            sources=(
                'ocr:entry_list', 'ocr:entry_delete_multiple',
                'ocr:entry_re_queue_multiple', DocumentVersionOCRError
            )
        )
        menu_tools.bind_links(
            links=(
                link_document_submit_all, link_document_type_submit,
                link_entry_list
            )
        )

        post_save.connect(
            initialize_new_ocr_settings,
            dispatch_uid='initialize_new_ocr_settings', sender=DocumentType
        )
        post_version_upload.connect(
            post_version_upload_ocr, dispatch_uid='post_version_upload_ocr',
            sender=DocumentVersion
        )

        namespace = PropertyNamespace('ocr', _('OCR'))

        try:
            pdftotext = sh.Command(setting_pdftotext_path.value)
        except sh.CommandNotFound:
            namespace.add_property(
                'pdftotext', _('pdftotext version'), _('not found'),
                report=True
            )
        except Exception:
            namespace.add_property(
                'pdftotext', _('pdftotext version'),
                _('error getting version'), report=True
            )
        else:
            namespace.add_property(
                'pdftotext', _('pdftotext version'), pdftotext('-v').stderr,
                report=True
            )

        try:
            tesseract = sh.Command(setting_tesseract_path.value)
        except sh.CommandNotFound:
            namespace.add_property(
                'tesseract', _('tesseract version'), _('not found'),
                report=True
            )
        except Exception:
            namespace.add_property(
                'tesseract', _('tesseract version'),
                _('error getting version'), report=True
            )
        else:
            namespace.add_property(
                'tesseract', _('tesseract version'), tesseract('-v').stderr,
                report=True
            )
Exemplo n.º 4
0
def document_version_ocr_submit(self):
    task_do_ocr.apply_async(args=[self.pk], queue='ocr')


@receiver(post_version_upload,
          dispatch_uid='post_version_upload_ocr',
          sender=DocumentVersion)
def post_version_upload_ocr(sender, instance, **kwargs):
    logger.debug('received post_version_upload')
    logger.debug('instance pk: %s', instance.pk)
    if instance.document.document_type.ocr:
        instance.submit_for_ocr()


Document.add_to_class('submit_for_ocr', document_ocr_submit)
DocumentVersion.add_to_class('submit_for_ocr', document_version_ocr_submit)

class_permissions(Document, [PERMISSION_OCR_DOCUMENT])

register_tool(link_entry_list)

APIEndPoint('ocr')

register_model_list_columns(DocumentVersionOCRError, [
    {
        'name':
        _('Document'),
        'attribute':
        encapsulate(
            lambda entry: document_link(entry.document_version.document))
    },