Python get_cache示例

编程语言: Python

命名空间/包名称: extractors.cache

方法/功能: get_cache

hotexamples.com的示例: 4

Python get_cache - 已找到4个示例。这些是从开源项目中提取的最受好评的extractors.cache.get_cache现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： tesseract.py 项目： pudo/extractors

def extract_image_data(data, languages=None):
    """Extract text from a binary string of data."""
    if TESSDATA_PREFIX is None:
        raise ValueError("Env TESSDATA_PREFIX is not set, OCR will not work.")
    key, text = get_cache(data)
    if text is not None:
        return text
    try:
        img = Image.open(StringIO(data))
    except Exception as ex:
        log.debug("Failed to parse image internally: %r", ex)
        return ""

    # TODO: play with contrast and sharpening the images.
    try:
        languages = _get_languages(languages)
        extractor = Tesseract(TESSDATA_PREFIX, lang=languages)
        extractor.set_page_seg_mode(PageSegMode.PSM_AUTO_OSD)
        text = extractor.ocr_image(img)
        log.debug("OCR done: %s, %s characters extracted", languages, len(text))
        set_cache(key, text)
        return text
    except Exception as ex:
        log.exception(ex)
        return ""

示例#2

显示文件

def extract_image_data(data, languages=None):
    """Extract text from a binary string of data."""
    if TESSDATA_PREFIX is None:
        raise ValueError('Env TESSDATA_PREFIX is not set, OCR will not work.')
    key, text = get_cache(data)
    if text is not None:
        return text
    try:
        img = Image.open(StringIO(data))
    except Exception as ex:
        log.debug('Failed to parse image internally: %r', ex)
        return ''

    # TODO: play with contrast and sharpening the images.
    try:
        languages = _get_languages(languages)
        extractor = Tesseract(TESSDATA_PREFIX, lang=languages)
        extractor.set_page_seg_mode(PageSegMode.PSM_AUTO_OSD)
        text = extractor.ocr_image(img)
        log.debug('OCR done: %s, %s characters extracted', languages,
                  len(text))
        set_cache(key, text)
        return text
    except Exception as ex:
        log.exception(ex)
        return ''

示例#3

显示文件

文件： tesseract.py 项目： 01-/extractors

def extract_image_data(data, languages=None):
    """ Extract text from a binary string of data containing an image in
    a commonly-used format. """
    if TESSDATA_PREFIX is None:
        raise ValueError('Env TESSDATA_PREFIX is not set, OCR will not work.')
    key, text = get_cache(data)
    if text is not None:
        return text
    img = Image.open(StringIO(data))
    # TODO: play with contrast and sharpening the images.
    try:
        extractor = _get_tesseract()
        extractor.set_image(img)
        text = extractor.get_utf8_text()
        extractor.clear()
        set_cache(key, text)
        return text
    except Exception as ex:
        log.exception(ex)
        set_cache(key, '')
        return ''

示例#4

显示文件

文件： crawl.py 项目： e6/ocr-crawl

def crawl_file(file_path):
    global processed
    _, ext = os.path.splitext(file_path)
    ext = ext.strip().lower()
    if ext.lower() not in FILE_EXTENSIONS:
        return
    with open(file_path, 'rb') as fh:
        data = fh.read()
    key, text = get_cache(data)
    if text is not None:
        return
    text = extract_image_data(data)
    counter_lock.acquire()
    try:
        processed += 1
        time_taken = time.time() - START_TIME
        img_per_sec = time_taken / processed
    finally:
        counter_lock.release()
    log.info('Extracted: %s (%d characters of text), %.3fs/img', file_path,
             len(text), img_per_sec)