Python get_cache примеры использования

Язык программирования: Python

Пространство имен/Пакет: extractors.cache

Метод/Функция: get_cache

Примеров на hotexamples.com: 4

Python get_cache - 4 примера найдено. Это лучшие примеры Python кода для extractors.cache.get_cache, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

Файл: tesseract.py Проект: pudo/extractors

def extract_image_data(data, languages=None):
    """Extract text from a binary string of data."""
    if TESSDATA_PREFIX is None:
        raise ValueError("Env TESSDATA_PREFIX is not set, OCR will not work.")
    key, text = get_cache(data)
    if text is not None:
        return text
    try:
        img = Image.open(StringIO(data))
    except Exception as ex:
        log.debug("Failed to parse image internally: %r", ex)
        return ""

    # TODO: play with contrast and sharpening the images.
    try:
        languages = _get_languages(languages)
        extractor = Tesseract(TESSDATA_PREFIX, lang=languages)
        extractor.set_page_seg_mode(PageSegMode.PSM_AUTO_OSD)
        text = extractor.ocr_image(img)
        log.debug("OCR done: %s, %s characters extracted", languages, len(text))
        set_cache(key, text)
        return text
    except Exception as ex:
        log.exception(ex)
        return ""

Пример #2

Показать файл

def extract_image_data(data, languages=None):
    """Extract text from a binary string of data."""
    if TESSDATA_PREFIX is None:
        raise ValueError('Env TESSDATA_PREFIX is not set, OCR will not work.')
    key, text = get_cache(data)
    if text is not None:
        return text
    try:
        img = Image.open(StringIO(data))
    except Exception as ex:
        log.debug('Failed to parse image internally: %r', ex)
        return ''

    # TODO: play with contrast and sharpening the images.
    try:
        languages = _get_languages(languages)
        extractor = Tesseract(TESSDATA_PREFIX, lang=languages)
        extractor.set_page_seg_mode(PageSegMode.PSM_AUTO_OSD)
        text = extractor.ocr_image(img)
        log.debug('OCR done: %s, %s characters extracted', languages,
                  len(text))
        set_cache(key, text)
        return text
    except Exception as ex:
        log.exception(ex)
        return ''

Пример #3

Показать файл

Файл: tesseract.py Проект: 01-/extractors

def extract_image_data(data, languages=None):
    """ Extract text from a binary string of data containing an image in
    a commonly-used format. """
    if TESSDATA_PREFIX is None:
        raise ValueError('Env TESSDATA_PREFIX is not set, OCR will not work.')
    key, text = get_cache(data)
    if text is not None:
        return text
    img = Image.open(StringIO(data))
    # TODO: play with contrast and sharpening the images.
    try:
        extractor = _get_tesseract()
        extractor.set_image(img)
        text = extractor.get_utf8_text()
        extractor.clear()
        set_cache(key, text)
        return text
    except Exception as ex:
        log.exception(ex)
        set_cache(key, '')
        return ''

Пример #4

Показать файл

Файл: crawl.py Проект: e6/ocr-crawl

def crawl_file(file_path):
    global processed
    _, ext = os.path.splitext(file_path)
    ext = ext.strip().lower()
    if ext.lower() not in FILE_EXTENSIONS:
        return
    with open(file_path, 'rb') as fh:
        data = fh.read()
    key, text = get_cache(data)
    if text is not None:
        return
    text = extract_image_data(data)
    counter_lock.acquire()
    try:
        processed += 1
        time_taken = time.time() - START_TIME
        img_per_sec = time_taken / processed
    finally:
        counter_lock.release()
    log.info('Extracted: %s (%d characters of text), %.3fs/img', file_path,
             len(text), img_per_sec)