Пример #1
0
def delivery_type(page: pdf.PageObject) -> dict:
    result = dict()
    page_text = page.extractText()
    if re.search("Tracking", page_text) is not None:
        result["type"] = "Label"
        result["amount"] = len((re.findall("Tracking", page_text)))
        return result
    elif is_it_blank_page(page):
        result["type"] = "Empty Page"
        result["amount"] = 0
        return result
    else:
        result["type"] = "Shipping List"
        result["amount"] = 1
        return result
Пример #2
0
def is_it_blank_page(page: pdf.PageObject) -> bool:
    if page.extractText() == '' and '/XObject' not in page['/Resources']:
        return True
    return False
Пример #3
0
from PyPDF2.pdf import PageObject
from PyPDF2 import PdfFileReader, PdfFileWriter

file_path = "Daycoval.pdf"
pdf = PdfFileReader(file_path)

with open('Daycoval.txt', 'w') as f:
    for page_num in range(pdf.numPages):
        print('Page: {0}'.format(page_num))
        PageObject = pdf.getPage(page_num)

        try:
            txt = PageObject.extractText()
            print(''.center(100, '-'))
        except:
            pass
        else:
            f.write('Page {0}\n'.format(page_num +1))
            f.write(''.center(100, '-'))
            f.write(txt)
    f.close()