Пример #1
0
 def merge(self, pdf_one, pdf_two, filename='my.pdf', output_dir='D:/pdf/'):
     '''
     function:#pdfone为扫描的正面;#pdftwo为扫描的背面;#本函数实现将两个扫描文件按原有的顺序合并起来
     :param pdf_one:
     :param pdf_two:
     :param filename:
     :param output_dir:
     :return:
     '''
     input_one = open(pdf_one, 'rb')
     input_two = open(pdf_two, 'rb')
     pdf_input_one = PdfFileReader(input_one)
     pdf_input_two = PdfFileReader(input_two)
     numOne = pdf_input_one.getNumPages()
     numTwo = pdf_input_two.getNumPages()
     print(numOne, numTwo)
     pdf_output = PdfFileWriter()
     index_one = 0
     index_two = numTwo - 1
     while True:
         if index_one == numOne: break
         print(index_one, index_two)
         page1 = pdf_input_one.getPage(index_one)
         pdf_output.addPage(page1)
         page2 = pdf_input_two.getPage(index_two)
         pdf_output.addPage(page2)
         index_one += 1
         index_two -= 1
     pdf_name = output_dir + filename
     output_stream = open(pdf_name, 'wb')
     pdf_output.write(output_stream)
     output_stream.close()
     input_one.close()
     input_two.close()
     print('Done!')
Пример #2
0
def generate_images(path, save_dir_name, is_train):
    if not os.path.exists('png_files/'):
        os.mkdir('png_files/')
    train_images = 'train_images/'
    test_images = 'test_images/'
    if is_train:
        save_directory_path = 'png_files/'+ train_images + save_dir_name + '_annotated_images'
    else:
        save_directory_path = 'png_files/' + test_images + save_dir_name + '_annotated_images'

    if not os.path.exists(save_directory_path):
        os.makedirs(save_directory_path)
    filename = path
    print("Converting " + filename + " from pdf to PNG...")
    reader = PdfFileReader(open(filename, mode="rb"))
    try:
        page_number = reader.getNumPages()
    except:
        page_number = reader.getNumPages() #PyPDF2 bug
    with tempfile.TemporaryDirectory() as path:
        images_from_path = convert_from_path(filename, dpi=72, output_folder=path, last_page=page_number, first_page=0)
    i = 0
    for page in images_from_path:
        base_filename = os.path.splitext(os.path.basename(filename))[0] + '_' + str(i + 1) + '.png'
        page.save(os.path.join(save_directory_path, base_filename), 'PNG')
        i += 1

    print('PDF file successfully converted.')
Пример #3
0
 def test_cat(self):
     """Make sure files are properly concatenated."""
     run_stapler(['cat', ONEPAGE_PDF, FIVEPAGE_PDF, self.outputfile])
     self.assertTrue(os.path.isfile(self.outputfile))
     with open(self.outputfile, 'rb') as outputfile:
         pdf = PdfFileReader(outputfile)
         self.assertEqual(pdf.getNumPages(), 6)
Пример #4
0
def PdfPrettyPrint(inputname, outputname):
    inputfile = open(inputname, 'rb')
    wrt = PdfFileWriter()
    ipt = PdfFileReader(inputfile)
    #print ipt.getDocumentInfo()
    pdfnums = ipt.getNumPages()
    #print pdfnums
    i = 0
    while i < pdfnums:
        page = ipt.getPage(i)
        wrt.addPage(page)
        if i + 2 < pdfnums:
            page = ipt.getPage(i + 2)
            wrt.addPage(page)
        else:
            wrt.addBlankPage()
        if i + 1 < pdfnums:
            page = ipt.getPage(i + 1)
            page.rotateClockwise(180)
            wrt.addPage(page)
        else:
            wrt.addBlankPage()
        if i + 3 < pdfnums:
            page = ipt.getPage(i + 3)
            page.rotateClockwise(180)
            wrt.addPage(page)
        else:
            wrt.addBlankPage()
        i = i + 4
    fl = open(outputname, "wb")
    wrt.write(fl)
    inputfile.close()
    fl.close()
    return True
Пример #5
0
def readPDFfile(infile):
    pdf = PdfFileReader(infile, "rb"))
    content = ""
    num = pdf.getNumPages()
    for i in range(0, num):
        extractedText = pdf.getPage(i).extractText()
        content +=  extractedText + "\n"
    return content
def getDataUsingPyPdf2(filename):
    pdf = PdfFileReader(open(filename, "rb"))
    content = ""
    num = pdf.getNumPages()
    for i in range(0, num):
        extractedText = pdf.getPage(i).extractText()
        content += extractedText + "\n"
    return content
Пример #7
0
def searchPDF(filename,search_term):  
    search_term = search_term.lower()  
    pages = []  
    pdf = PdfFileReader(open(filename, "rb"))  
  
    for i in range(0, pdf.getNumPages()):  
        content = pdf.getPage(i).extractText().lower()  
        if(search_term in content):  
            pages.append(i + 1)  
    return pages;  
Пример #8
0
 def __init__(self, path):
     self.path = path
     self.pages = None
     self.meta_data = None
     try: 
         with open(self.path,'rb') as fp:
             pdf = PdfFileReader(fp)
             self.meta_data =  pdf.getDocumentInfo()
             self.pages = pdf.getNumPages()
     except (IOError,TypeError) as e:
             print(e)
     except:
             print("Unexpected error:", sys.exc_info()[0])
Пример #9
0
def clickOK():
    File = PdfFileReader(open(selectPDF.get() + '.pdf', 'rb'))
    page_cound = File.getNumPages()
    pprint.pprint(page_cound)

    ageList = []
    for i in range(0, page_cound):
        try:
            if i == entry1.get():
                ageList.append(File.getPage(i).extractText())
                pprint.pprint(ageList[int(0)])
        except:
            print("except")
Пример #10
0
 def generate_images_for_lecture(self, lecture_instance, file_pdf):
     pdf_im = PdfFileReader(file_pdf)
     for page_num in range(pdf_im.getNumPages()):
         page_file_name = file_pdf.path+'['+str(page_num)+']'
         print(page_file_name)
         im = PythonMagick.Image(page_file_name)
         
         image_lecture_page = Image()
         image_lecture_page.lecture = lecture_instance
         image_file_name = basename(file_pdf.name)+"-page-"+str(page_num)+".jpeg"
         image_lecture_page.image.save(image_file_name, File(im))
         print("here")
         print(image_lecture_page)
Пример #11
0
def crop(pdf_in, pdf_out):
    """
    Параметры
    pdf_in - абсолютный путь к пдф
    pdf_out - абсолютный путь для исходящего пдф
    :return: status
    """

    """ Временно к функции добавлен второй параметр - pdf_out. В продакшн она должна сохранять результат кропа
     в тот же файл
    """
    status = True

    # Словарь с размерами бумаги для каждой страницы
    papers = analyze_papersize(pdf_in)  # like {1: ('Speedmaster', 900, 640), 2: ('Dominant', 640, 450)}

    # TODO Доработать временное решение кропа в отсутствии инфы о размере бумаги.
    if papers == {}:
        perl_crop = "perl pdfcrop.pl {} {}".format(pdf_in, pdf_out)
        os.system(perl_crop)
        return status

    input = PdfFileReader(file(pdf_in, "rb"))
    output = PdfFileWriter()

    # Количество страниц
    pages_qty = input.getNumPages()

    for index in range(pages_qty):
        paper_machine = papers[index+1][0]
        paper_w = papers[index+1][1]
        paper_h = papers[index+1][2]

        for m in PrintingPress._registry:
            if paper_machine == m.name:
                machine = m

        plate_w = machine.plate_w
        plate_h = machine.plate_h

        page = input.getPage(index)

        """ EXAMLE
        # The resulting document has a trim box that is 200x200 points
        # and starts at 25,25 points inside the media box.
        # The crop box is 25 points inside the trim box.
        print mm(page.mediaBox.getUpperRight_x()), mm(page.mediaBox.getUpperRight_y())
        page.trimBox.lowerLeft = (25, 25)
        page.trimBox.upperRight = (225, 225)
        page.cropBox.lowerLeft = (50, 50)
        page.cropBox.upperRight = (200, 200)
        """

        print 'Crop page {} to paper {}x{}'.format(index+1, paper_w, paper_h)
        page.mediaBox.lowerLeft = ((pt(plate_w - paper_w)/2), pt(machine.klapan))  # отступ слева, отступ снизу
        page.mediaBox.upperRight = (pt(paper_w + (plate_w - paper_w)/2), pt(paper_h + machine.klapan))  # ширина+отступ, высота+отступ

        output.addPage(page)

    outputstream = file(pdf_out, "wb")
    output.write(outputstream)
    outputstream.close()

    return status