def MergePDF(self, params):
        output = PdfFileWriter()
        outputPages = 0
        pdf_fileName = self.getFileName(params['filepath'])
        if len(pdf_fileName) < 1:
            print 'there is not any files'
            return
        for i in range(0, params['fileCount']):
            filename = params['filepath'] + str(i) + '.pdf'
            print '*********************%s************************' % i
            # print 'filename: %s and pdf_fileName: %s' % (filename, pdf_fileName[i])
            # 读取源pdf文件
            input = PdfFileReader(file(filename, "rb"))

            # 如果pdf文件已经加密,必须首先解密才能使用pyPdf
            if input.isEncrypted == True:
                input.decrypt("map")

            # 获得源pdf文件中页面总数
            pageCount = input.getNumPages()
            outputPages += pageCount
            print pageCount

            # 分别将page添加到输出output中
            for iPage in range(0, pageCount):
                output.addPage(input.getPage(iPage))

        print "All Pages Number:" + str(outputPages)
        # 最后写pdf文件
        filePath = params['filepath'] + params['outfile']
        outputStream = file(filePath, "wb")
        output.write(outputStream)
        outputStream.close()
        print "finished"
示例#2
0
 def render(self):
     output = PdfFileWriter()
     base1 = "%s/lib/%s" % (path.dirname(__file__), "kfza_base.pdf")
     base1 = open(base1, 'rb')
     b1_pdf = PdfFileReader(base1)
     wm = b1_pdf.getPage(0)
     p1 = PdfFileReader(self.generate_page_one())
     page1 = p1.getPage(0)
     page1.mergePage(wm)
     output.addPage(page1)
     bpdf = "%s/lib/%s" % (path.dirname(__file__), self.base_pdf)
     with open(bpdf, 'rb') as pdf:
         pf = PdfFileReader(pdf)
         if pf.isEncrypted:
             pf.decrypt('')
         for page in range(pf.getNumPages()):
             output.addPage(pf.getPage(page))
         if self.context.course.extra_questions:
             b1_pdf = PdfFileReader(base1)
             wm = b1_pdf.getPage(0)
             p1 = PdfFileReader(self.generate_page_one())
             page1 = p1.getPage(1)
             page1.mergePage(wm)
             output.addPage(page1)
         ntf = TemporaryFile()
         output.write(ntf)
     ntf.seek(0)
     base1.close()
     return ntf
示例#3
0
def MergePDF(filepath,outfile):
    output=PdfFileWriter()
    outputPages=0
    pdf_fileName=getFileName(filepath)
    for each in pdf_fileName:
        print "file:" + each
        if(each.find(".pdf") < 0):
            continue
        # 读取源pdf文件
        input = PdfFileReader(file(each, "rb"))

        # 如果pdf文件已经加密,必须首先解密才能使用pyPdf
        if input.isEncrypted == True:
            input.decrypt("map")

        # 获得源pdf文件中页面总数
        pageCount = input.getNumPages()
        outputPages += pageCount
        print pageCount

        # 分别将page添加到输出output中
        for iPage in range(0, pageCount):
            output.addPage(input.getPage(iPage))


    print "All Pages Number:"+str(outputPages)
    # 最后写pdf文件
    outputStream=file(filepath+outfile,"wb")
    output.write(outputStream)
    outputStream.close()
    print "finished"
示例#4
0
文件: yapot.py 项目: akash0675/yapot
def _get_images_from_pdf(pdf_filename, resolution, verbose, delete_files,
        temp_dir, make_thumbs, thumb_size, thumb_dir, thumb_prefix, pool_count=1):

    success = False
    try:

        if verbose == True:
            print "Splitting PDF into multiple pdf's for processing ..."

        # make sure there is a place to put our temporary pdfs
        if not os.path.exists(temp_dir):
            os.makedirs(temp_dir)

        # make sure if we are going to make thumbs, the folde rexists
        if make_thumbs == True:
            if not os.path.exists(thumb_dir):
                os.makedirs(thumb_dir)

        # read input pdf
        inputpdf = PdfFileReader(open(pdf_filename, "rb"))
        if inputpdf.getIsEncrypted():
            inputpdf.decrypt('')

        if verbose == True:
            print "Writing out %i pages ..." % inputpdf.numPages

        # create all of the temporary pdfs
        for i in xrange(inputpdf.numPages):
            output = PdfFileWriter()
            output.addPage(inputpdf.getPage(i))
            #print output.resolvedObjects
            filename = "{0}/document-page-{1}.pdf".format(temp_dir,i)
            with open(filename, "wb") as outputStream:
                output.write(outputStream)
            __pdf_queue.put(i)

        if verbose == True:
            print "Dispatching pdf workers ..."

        # spin up our workers to convert the pdfs to images
        #pool_count = 4
        pool = Pool()
        pool.map_async(
            _pdf_converter_worker,
            [(x, resolution, verbose, delete_files,
                temp_dir, make_thumbs, thumb_size,
                thumb_dir, thumb_prefix) for \
                x in range(pool_count)]
        )

        while __pdf_texts.qsize() != inputpdf.numPages:
            time.sleep(.25)

        if verbose == True:
            print "Done converting PDF."

        success = True

    except Exception, e:
        print str(e)
示例#5
0
def merge_pdf(new_filename, pdfs, encryp=False, user_pw="", owner_pw=None, lvl=128):
    """
    Merges pdfs into one pdf called new_filename.
    
    pdf: list of tuples (path=string, password=string)
    """
    output = PdfFileWriter()
        
    for path, pw in pdfs:
        pdf = PdfFileReader(open(path, "rb"))
        
        if pdf.isEncrypted:
            decryption = pdf.decrypt(pw)
            if decryption == 0:
                raise PasswordError
        
        for page_num in range(pdf.getNumPages()):
            page = pdf.getPage(page_num)
            output.addPage(page)
    
    with file(new_filename, "wb") as outputStream: 
        outputStream = open(new_filename, "wb")
        
        if encryp and lvl == 128:
            output.encrypt(user_pw, owner_pw, True)
        elif encryp:
            output.encrypt(user_pw, owner_pw, False)
        output.write(outputStream)
示例#6
0
    def OCR(self, fn, resolution=300, verbose=False, part=''):

        i = 1
        pdf = PdfFileReader(file(fn, 'rb'))
        if pdf.getIsEncrypted():
            if pdf.decrypt(''):
                jnk = 0
            else:
                return false
        pagedata = []
        text = ''

        for p in pdf.pages:

            if verbose:
                print ' --- ' + str(i)

            part = str(part)

            # Temporary filenames for ImageMagick conversion
            pgfile = 'tmp-' + part + '-' + str(i) + '.pdf'
            pgfilejpg = 'tmp-' + part + '-' + str(i) + '.jpg'

            # Parse this page
            output = PdfFileWriter()
            output.addPage(p)
            outputStream = file(pgfile, 'wb')
            output.write(outputStream)
            outputStream.close()

            # Convert this page to a high-resolution JPEG
            img = PythonMagick.Image()
            img.density(str(resolution))
            img.read(pgfile)
            img.write(pgfilejpg)

            # OCR the converted JPG
            im = Image.open(pgfilejpg)
            if (len(im.split()) == 4):
                r, g, b, a = im.split()
                im = Image.merge('RGB', (r, g, b))

            t = image_to_string(im)

            # Cleanup
            os.remove(pgfile)
            os.remove(pgfilejpg)

            # Add to data object
            pagedata.append(OCRPage(i, t, self.OCRCleanup(t)))
            text += t

            i += 1

        # Produce the output data object
        result = OCRResult(text, self.OCRCleanup(text), (i - 1), pagedata)

        return result
示例#7
0
文件: OCRPDF.py 项目: bdheath/OCRPDF
	def OCR(self, fn, resolution=300, verbose=False, part=''):
		
		i = 1
		pdf = PdfFileReader(file(fn, 'rb'))
		if pdf.getIsEncrypted():
			if pdf.decrypt(''):
				jnk = 0
			else:
				return false
		pagedata = []
		text = ''
		
		for p in pdf.pages:
		
			if verbose:
				print ' --- ' + str(i)
		
			part = str(part)
		
			# Temporary filenames for ImageMagick conversion
			pgfile = 'tmp-' + part + '-' + str(i) + '.pdf'
			pgfilejpg = 'tmp-' + part + '-' + str(i) + '.jpg'
			
			# Parse this page
			output = PdfFileWriter()
			output.addPage(p)
			outputStream = file(pgfile,'wb')
			output.write(outputStream)
			outputStream.close()
		
			# Convert this page to a high-resolution JPEG
			img = PythonMagick.Image()
			img.density(str(resolution))
			img.read(pgfile)
			img.write(pgfilejpg)
			
			# OCR the converted JPG
			im = Image.open(pgfilejpg)
			if(len(im.split()) == 4):
				r, g, b, a = im.split()
				im = Image.merge('RGB', (r,g,b))

			t = image_to_string(im)
			
			# Cleanup
			os.remove(pgfile)
			os.remove(pgfilejpg)
			
			# Add to data object
			pagedata.append(OCRPage(i, t, self.OCRCleanup(t)))
			text += t

			i += 1
		
		# Produce the output data object
		result = OCRResult(text, self.OCRCleanup(text), (i-1), pagedata)

		return result
def download_pdf(url):
    writer = PdfFileWriter()
    
    remoteFile = urlopen(Request(url)).read()
    memoryFile = StringIO(remoteFile)
    pdfFile = PdfFileReader(memoryFile)
    
    if pdfFile.isEncrypted:
        pdfFile.decrypt('')
        
    for pageNum in xrange(pdfFile.getNumPages()):
            currentPage = pdfFile.getPage(pageNum)
            #currentPage.mergePage(watermark.getPage(0))
            writer.addPage(currentPage)
    
    outputStream = open('/home/hjiang/pmscrapy/pdf_folder/%s'%basename(url),"wb")
    writer.write(outputStream)
    outputStream.close()
示例#9
0
    def _get_pdf_reader(self, pdf_stream):
        pdf = PdfFileReader(pdf_stream)
        if pdf.isEncrypted:
            result = pdf.decrypt("")
            if (result == 0):
                self._logger.error("Failed to decrypt PDF file.")
                raise ValueError('Failed to decrypt PDF file.')

        return pdf
示例#10
0
def read_pdf(filename):
    """Open a PDF file with pyPdf."""
    if not os.path.exists(filename):
        raise CommandError("{} does not exist".format(filename))
    pdf = PdfFileReader(file(filename, "rb"))
    if pdf.isEncrypted:
        while True:
            pw = prompt_for_pw(filename)
            matched = pdf.decrypt(pw)
            if matched:
                break
            else:
                print "The password did not match."
    return pdf
示例#11
0
def merge_vac(icao_code_list, directory, options):
    """ Merges pages corresponding to icao_code_list in directory to a single pdf file. """
    vac_all_file = "%s/%s.pdf" % (directory, VAC_ALL_FILE)
    vac_a5_file = "%s/%s.a5.pdf" % (directory, VAC_ALL_FILE)
    start_page = 1 if options.merge_better else 0
    info("Merging all VAC charts into one pdf file (%s)..." % vac_all_file, options)
    from pyPdf import PdfFileWriter, PdfFileReader
    out_pdf = PdfFileWriter()
    for icao_code in icao_code_list:
        in_file = open("%s/%s.pdf" % (directory, icao_code), "rb")
        in_pdf = PdfFileReader(in_file)
        in_pdf.decrypt("")
        for i in range(start_page, in_pdf.numPages):
            out_pdf.addPage(in_pdf.getPage(i))
    out_file = open("%s/%s.pdf" % (directory, VAC_ALL_FILE), "wb")
    out_pdf.write(out_file)
    out_file.close()
    if options.a5:
        info("Converting merged file to 2x1 A5 in A4 format (%s)..." % vac_a5_file, options)
        (status, output) = commands.getstatusoutput("pdfnup %s/%s.pdf --outfile %s/%s.a5.pdf" 
                                                    % (directory, VAC_ALL_FILE, directory, VAC_ALL_FILE))
        if status != 0:
            fail("Failed to convert merged file to 2x1 A5 in A4 format.\n%s" % output)
示例#12
0
def decrypt(pdf_path, out_path, password):
    pdf = PdfFileReader(open(os.path.abspath(pdf_path), 'rb'))

    if pdf.decrypt(str(password)) == 0:
        raise DecryptionError(pdf_path)

    title = pdf.documentInfo.title if pdf.documentInfo.title else _('Unknown')
    author = pdf.documentInfo.author if pdf.documentInfo.author else _('Unknown')
    out_pdf = PdfFileWriter(title=title, author=author)

    for page in pdf.pages:
        out_pdf.addPage(page)

    with open(out_path, 'wb') as out_file:
        out_pdf.write(out_file)
示例#13
0
def split_pdf(path_pdf):

    inputpdf = PdfFileReader(file(path_pdf, "rb"))
    inputpdf.decrypt('')

    if not path.exists('./tmp'):
        makedirs('./tmp/')

    for i in range(inputpdf.numPages):
        output = PdfFileWriter()
        output.addPage(inputpdf.getPage(i * 1))
        newname = path_pdf[:7] + "-" + str(i) + ".pdf"
        outputStream = file("./tmp/" + newname, "w+")
        output.write(outputStream)
        DateName = change_name(outputStream)
        outputStream.close()
        rename("./tmp/" + newname, "./tmp/" + DateName)

    AllPdf = listdir('./tmp/')
    now = datetime.now()
    now = now.strftime('%Y-%m-%d')
    DateNow = arrow.get(now)

    for pdf in AllPdf:
        PdfReturn = pdf.split('-')[::-1]
        PdfDate = '-'.join(PdfReturn)

        DateFile = arrow.get(PdfDate)
        delta = (DateFile - DateNow)
        if -5 <= delta.days <= 5:
            rename("./tmp/" + pdf, "./" + "planning.pdf")
            remove_all()
            return "planning.pdf"
        else:
            continue
    return False
示例#14
0
def MergePDF(filepath,outfile):
    output=PdfFileWriter()
    outputPages=0
    pdf_fileName=getFileName(filepath)
    print '总的',pdf_fileName
    for each in pdf_fileName:
        if '.DS_Store' in each:
            continue
        # print '看看',os.path.dirname(each),'+', os.path.splitext(each.replace(os.path.dirname(each),''))
        # 
        print '单的',each
        # 读取源pdf文件
        input = PdfFileReader(file(each, "rb"))

        # print 'input:',input
        # 如果pdf文件已经加密,必须首先解密才能使用pyPdf
        if input.isEncrypted == True:
            print 'input.isEncrypted',input.isEncrypted 
            input.decrypt("map")

        # 获得源pdf文件中页面总数
        pageCount = input.getNumPages()
        outputPages += pageCount
        print pageCount

        # 分别将page添加到输出output中
        for iPage in range(0, pageCount):
            output.addPage(input.getPage(iPage))


    print "All Pages Number:"+str(outputPages)
    # 最后写pdf文件
    outputStream=file(filepath+outfile,"wb")
    output.write(outputStream)
    outputStream.close()
    print "finished"
示例#15
0
class PdfBox(object):
    ''' Wraps pyPdf utils into a pdf object'''
    pdfReader = None
    pdfInfo = None
    currentpage = 0
    extractedPages = {}
    filepath = ""
    isencrypted = False
    password = ""
    author = ""
    title = ""
    subject = ""
    pages = 0
    initialized = False

    def __init__(self, filepath, password = None):
        self.filepath = filepath
        self.pdfReader = PdfFileReader(file(filepath, "rb"))
        if password:
            self.password = password
        if self.initializePdf(self.password):
            self.pdfInfo = self.pdfReader.getDocumentInfo()
            self.author = self.pdfInfo.author
            self.title = self.pdfInfo.title
            self.pages = self.pdfReader.getNumPages()
            self.subject = self.pdfInfo.subject
            self.extractedPages = {}
        
    def initializePdf(self, password = None):
        if self.pdfReader.getIsEncrypted():
            self.isencrypted = True
            if self.pdfReader.decrypt(self.password):
                self.initialized = True
                return True
        else:
            self.initialized = True
            return True
        return False
    
    def getPage(self, pagenum):
        self.currentpage = pagenum
        if self.extractedPages.has_key(pagenum):
            return self.extractedPages[pagenum]
        else:
            page = self.pdfReader.getPage(pagenum)
            text = page.extractText()
            self.extractedPages[pagenum] = text
            return text
示例#16
0
import sys

from pyPdf import PdfFileReader, PdfFileWriter

if len(sys.argv) is not 4:
    print("Example:")
    print("python %s input.pdf output.pdf password" % __file__)
    exit(0)

input_file = sys.argv[1]
output_file = sys.argv[2]
password = sys.argv[3]

with open(input_file, "rb") as pdf:
    reader = PdfFileReader(pdf)
    if reader.isEncrypted:
        reader.decrypt('')
    writer = PdfFileWriter()
    for i in range(reader.getNumPages()):
        writer.addPage(reader.getPage(i))
    with open(output_file, "wb") as outputStream:
        writer.encrypt(password)
        writer.write(outputStream)
        outputStream.close()
示例#17
0
from pyPdf import PdfFileReader,PdfFileWriter
my_path ='C:/Users/Intel i3/Desktop/python/Real_Python/book1-exercises-master/Course materials/Chapter 12/Practice files/Walrus.pdf'
pdf_file = PdfFileReader(file(my_path,'rb'))
yo = pdf_file.decrypt('IamtheWalrus')
page = pdf_file.getNumPages()
outfile = PdfFileWriter()
for page_no in range(0,page):
    text = pdf_file.getPage(page_no)
    text = text.rotateClockwise(270)
    #text = text.encode('utf-8')
    #text = text.replace('  ','\n')
    outfile.addPage(text)
out_path ='C:/Users/Intel i3/Desktop/python/Real_Python/book1-exercises-master/Course materials/Chapter 12/Practice files/new.pdf'
out_file = open(out_path,'wb')
outfile.write(out_file)
out_file.close()
示例#18
0
def appendPDFwithPDF(outFile,toAppend):
    '''
This function is meant to combine multiple pdf files, I'm not sure I like the pyPdf module's issues atm, hope it updates soon

@author
  William Panting

@param outFile
  a string representing the path of the file that is to be created/modified
@param toAppend
  a string representing the path of the file that is to be appended to the orgional file,
  or an ordered list of multiple strings representing files
@return bool
  true if successful false if not
'''
    pdfWriter=PdfFileWriter()
    
    #out file must not be a directory
    if os.path.isdir(outFile):
        logging.error('Input error: outFile cannot be a directory.')
        return False
    #if outfile is a file then it needs to be added to the output page by page just like the other pdfs
    elif os.path.isfile(outFile):
        #if toAppend is a string then make it into a list [outDir,toAppend]
        if isinstance(toAppend,str):
            toAppend=[outFile,toAppend]
        #if toAppend is a list prepend outDir to it
        elif isinstance(toAppend,list):
            toAppend.insert(0,outFile)

    #if toAppend is a string
    if isPDF(toAppend):
        toAppendReader=PdfFileReader(open(toAppend, "rb"))
        try:
            numPages=toAppendReader.getNumPages()
        except Exception: #this try catch handles where the pyPDF lib mistakenly thinks a pdf is encrypted, will not work with encryption 3,4
            toAppendReader.decrypt('')
            numPages=toAppendReader.getNumPages()
        #loop over pages adding them one by one
        pageCount=0
        while pageCount<numPages:
            pdfWriter.addPage(toAppendReader.getPage(pageCount))
            pageCount+=1
    #if toAppend is a list of paths
    elif isinstance(toAppend, list):
        for path in toAppend:
            #verify list as pdfs
            if isPDF(path)==False:
                logging.error('Error with input: '+str(path)+' --Each member of the list to append must be a valid pdf.')
                return False
            #loop over each page appending it
            toAppendReader=PdfFileReader(open(path, "rb"))
            try:
                numPages=toAppendReader.getNumPages()
            except Exception: #this try catch handles where the pyPDF lib mistakenly thinks a pdf is encrypted, will not work with encryption 3,4
                toAppendReader.decrypt('')
                numPages=toAppendReader.getNumPages()
            #loop over pages adding them one by one
            pageCount=0
            while pageCount<numPages:
                pdfWriter.addPage(toAppendReader.getPage(pageCount))
                pageCount+=1
    else:
        logging.error('Error with input: '+str(toAppend)+' --The input to Append must be a file path or list of file paths.')
        return False
    
    #write the concatenated file, must open for read write or if it exists or you get an exception in pyPdf
    if(os.path.lexists(outFile)):
        pdfStream = open(outFile, "r+b")
    else:
        pdfStream= open(outFile,'wb')
    pdfWriter.write(pdfStream)
    
    return True
import os
import copy
from pyPdf import PdfFileReader, PdfFileWriter

path = '/home/alberick/Documents/python/books/realpython-jean/part1/book1-exercises/Course materials/Chapter 12/Practice files'

input_file_name = os.path.join(path, 'Walrus.pdf')
input_file = PdfFileReader(file(input_file_name, 'rb'))

input_file.decrypt('IamtheWalrus')
output_PDF = PdfFileWriter()

for page_num in range(input_file.getNumPages()):
    page = input_file.getPage(page_num)
    page.rotateCounterClockwise(90)
    page_left = page
    page_right = copy.copy(page)
    upper_right = page_left.mediaBox.upperRight
    page_left.mediaBox.upperRight = (upper_right[0] / 2, upper_right[1])
    output_PDF.addPage(page_left)
    page_right.mediaBox.upperLeft = (upper_right[0] / 2, upper_right[1])
    output_PDF.addPage(page_right)

output_file_name = os.path.join(path, 'Output/Walrus.pdf')

with file(output_file_name, 'wb') as output_file:
    output_PDF.write(output_file)
    output_file.close()
示例#20
0
import sys
from pyPdf import PdfFileReader

helpmsg = "Simple PDF brute force script\n"
helpmsg += "Cracks pwds of the format <first 4 chars of email>0000-9999."
helpmsg += "Example: snow0653\n\n"
helpmsg += "Usage: pdfbrute.py <encrypted_pdf_file> <email_address>"
if len(sys.argv) < 2:
    print helpmsg
    sys.exit()

pdffile = PdfFileReader(file(sys.argv[1], "rb"))
if pdffile.isEncrypted == False:
    print "[!] The file is not protected with any password. Exiting."
    exit

print "[+] Attempting to Brute force. This could take some time..."

z = ""
for i in range(0, 9999):
    z = str(i)
    while (len(z) < 4):
        z = "0" + z

    a = str(sys.argv[2][:4] + str(z))

    if pdffile.decrypt(a) > 0:
        print "[+] Password is: " + a
        print "[...] Exiting.."
        sys.exit()
示例#21
0
def process_file(id_source):
    from pyPdf import PdfFileReader
    from numpy import array
    S = {}
    locations, comments = annotations.getPublicCommentsByFile(id_source)

    srcfile = "tmp/%s.pdf" % (id_source)

    pdf = PdfFileReader(file(srcfile, "rb"))
    if pdf.isEncrypted and pdf.decrypt("") == 0:
        print "PDF file encrypted with non-empty password: %s" % (srcfile, )
        return False
    trim_box = pdf.pages[
        0].trimBox  # Sacha's coordinate system now uses this box
    crop_box = pdf.pages[0].cropBox  # ConTeXt's page inclusion uses this box
    fudge = (int(trim_box[2]) -
             int(trim_box[0])) / 612.0  # for the assumption of 612bp width
    bp_per_pixel = 72.0 / 150 * fudge

    roots = {}
    children_of = {}
    comments_res = {}
    page_comment = {}

    for k in comments:
        node = int(k)
        parent = comments[k]['id_parent']
        if parent:
            if parent not in children_of:
                children_of[parent] = []
            children_of[parent].append(node)
        else:
            loc_id = comments[node]['ID_location']
            loc = locations[loc_id]
            if loc['page'] != 0:
                loc['center_x'] = loc['left'] + loc['w'] / 2.0
                loc['center_y'] = loc['top'] + loc['h'] / 2.0
            else:
                loc['center_x'] = None
                loc['center_y'] = None
            roots[node] = loc

    def oneline(s):
        return s.replace('\n', ' ')

    def texify(s):
        s = s.strip()
        patterns = [(r'\\', r'\\\\'), (r'%', r'\%'), (r'\$', r'\$'),
                    ('_', r'\_'), (r'\&', r'\&'), (r'\^', r'\^\\null{}'),
                    (r'#', r'\#'), (r'\|', r'$|$')]
        for p in patterns:
            s = re.sub(p[0], p[1], s)
        return s

    def rect2array(rect):
        return array(rect.lowerLeft + rect.upperRight, dtype=float)

    def rectangle_height(rect):
        return rect.upperRight[1] - rect.lowerLeft[1]

    S["last_page"] = -1

    def print_child(n, levels=0):
        loc_id = comments[n]['ID_location']
        location = locations[loc_id]
        page = int(location['page'])
        if levels == 0 and page > S["last_page"]:
            S["last_page"] = page
        if levels == 0 and page != 0:  # a root comment not on page 0 needs callout
            root = roots[n]
            # Sacha's coords are from top left corner, relative to TrimBox
            # but in pixels (not postscript points).
            # evaluate comment_box_px, with this coord system, as [llx lly wwc h]
            comment_box_px = array(
                [root['left'], root['top'] + root['h'], root['w'], root['h']])
            comment_box_bp = comment_box_px * bp_per_pixel
            # convert y coordinate to use bottom edge of trim_box as y=0
            comment_box_bp[1] = int(rectangle_height(trim_box)) - int(
                comment_box_bp[1])
            # convert to coordinates relative to CropBox
            comment_box_bp[0:2] += (rect2array(trim_box) -
                                    rect2array(crop_box))[0:2]

            comments_res[n] = {}
            comments_res[n]['location_ID'] = loc_id
            comments_res[n]['source_ID'] = id_source
            comments_res[n]['ensemble_ID'] = locations[loc_id]['id_ensemble']
            comments_res[n]['x'] = comment_box_bp[0]
            comments_res[n]['y'] = comment_box_bp[1]
            comments_res[n]['w'] = comment_box_bp[2]
            comments_res[n]['h'] = comment_box_bp[3]
            comments_res[n]['page'] = page
            comments_res[n]['parent'] = -1

            strPage = str(page)
            if strPage in page_comment:
                tmp = page_comment[strPage]
                tmp.append(n)
                page_comment[strPage] = tmp
            else:
                page_comment[strPage] = [n]

        elif levels != 0 and page != 0:
            parent = comments[n]['id_parent']
            comments_res[n] = {}
            comments_res[n]['location_ID'] = loc_id
            comments_res[n]['source_ID'] = id_source
            comments_res[n]['ensemble_ID'] = locations[loc_id]['id_ensemble']
            comments_res[n]['x'] = comments_res[parent]['x']
            comments_res[n]['y'] = comments_res[parent]['y']
            comments_res[n]['w'] = comments_res[parent]['w']
            comments_res[n]['h'] = comments_res[parent]['h']
            comments_res[n]['page'] = page
            comments_res[n]['parent'] = parent

            strPage = str(page)
            if strPage in page_comment:
                tmp = page_comment[strPage]
                tmp.append(n)
                page_comment[strPage] = tmp
            else:
                page_comment[strPage] = [n]

        if n in children_of:
            for k in sorted(children_of[n]):
                print_child(k, levels + 1)

    def cmp(a, b):
        if roots[a]['page'] == 0 and roots[b]['page'] == 0:
            return a - b  # order by comment id
        for key in ['page', 'center_y', 'center_x']:
            if roots[a][key] != roots[b][key]:
                return int(1e6 * roots[a][key] - 1e6 * roots[b][key])
        return 0

    for root_id in sorted(roots, cmp):
        print_child(root_id, 0)
    return comments_res, page_comment
import sys
from pyPdf import PdfFileReader
 
helpmsg = "Simple PDF brute force script\n"
helpmsg += "Cracks pwds of the format <first 4 chars of email>0000-9999."
helpmsg += "Example: snow0653\n\n"
helpmsg += "Usage: pdfbrute.py <encrypted_pdf_file> <email_address>"
if len(sys.argv) < 2:
        print helpmsg
        sys.exit()
         
pdffile = PdfFileReader(file(sys.argv[1], "rb"))
if pdffile.isEncrypted == False:
        print "[!] The file is not protected with any password. Exiting."
        exit
 
print "[+] Attempting to Brute force. This could take some time..."
 
z = ""
for i in range(0,9999):
        z = str (i)
        while (len(z) < 4):
                z = "0" + z
         
        a = str(sys.argv[2][:4] + str(z))
                
        if pdffile.decrypt(a) > 0:
                print "[+] Password is: " + a
                print "[...] Exiting.."
                sys.exit()
class GenerateMarkPdf(object):

    # Wstaw parametry do listy
    def __init__(self, args):
        self.params = args
        # 0 nazwa skryptu
        # 1 nazwa orginalnego dokumentu
        # 2 polozenie znacznik na osi X
        # 3 polozenie znacznika na osi Y
        # 4 dane ktore trzeba zapisac
        # 5 dane2 ktore trzeba zapisac
        # 6 nazwa pliku: output default true

        # if len(args) < 5:
        #     self.logger("Brak wszytkich parametrow")
        #     return
        #
        # if len(self.params) < 7:
        #     self.params.append(self.params[1])

    ######################## Output
    # Utworz plik dla ktorego zostanie wygenerowany nowy PDF
    def setFilename(self, filename):
        self.output = PdfFileWriter()
        #@TODO ustaw nazwe pliku
        self.filename = filename

    ######################## logi
    # Trzeba utworzyc plik w lokalizacji i nadac mu uprawnienia
    def logger(self, data):
        try:
            logfile = open("/var/log/pdf_error.log", "a")
            today = strftime("%Y-%m-%d %H:%M:%S", gmtime())
            logfile.write(str(today) + " " + str(data) + "\r\n")
        except Exception as e:
            print("Nie mozna utworzyc pliku")
        except UnboundLocalError as u:
            print("Nie mozna utworzyc pliku")
        finally:
            logfile.close()

    # Ustaw orginalny plik
    def setOrginalPdfFile(self):
        try:
            # copyfile(params[1], "")
            # wa = os.access(params[1], os.W_OK)
            # print(wa)
            self.input1 = PdfFileReader(file(self.params[1], "rb"))

            if self.input1.isEncrypted:
                self.input1.decrypt()
        except IOError as ie:
            self.logger("Nie ma takiego pliku " + " " + str(ie))
        except Exception as e:
            self.logger(e)

    # W obiekcie page.mediaBox jest trzymana rozmiar strony
    # def getXPosition(self, page):
    #     x = 20
    #     if self.params[2] == 'Left':
    #         x = 20
    #     if self.params[2] == 'Right':
    #         x = page.mediaBox[2] - 60
    #     return x
    #
    #
    # # W obiekcie page.mediaBox jest trzymana rozmiar strony
    # def getY(self, page):
    #     y = 20
    #
    #     if self.params[2] == 'Left':
    #         y = 20
    #     if self.params[2] == 'Right':
    #         y = page.mediaBox[2] - 60
    #
    #     return y

    ######################### Watermark
    # Generuj plik PDF
    def watermark(self, x, y, z, w):
        packet = StringIO.StringIO()
        #x = self.params[2]
        #y = self.params[3]
        data = self.params[4]
        # data2 = self.params[5]

        can = canvas.Canvas(packet)
        can.drawString(int(x), int(y), str(data))

        # can.drawString(int(z), int(w), str(data2))
        can.save()

        #Ustaw kurson na poczatek bufora
        packet.seek(0)
        return PdfFileReader(packet)

    def createNewPdf(self):
        iloscStron = int(self.input1.getNumPages())

        i = 0
        while i < iloscStron:
            # Orginalna strona
            page = self.input1.getPage(i)
            x = 20
            y = page.mediaBox[3] - 10

            z = page.mediaBox[2] - 100
            w = page.mediaBox[3] - 10
            watermark = self.watermark(x, y, z, w)
            w = watermark.getPage(0)
            page.mergePage(w)
            self.output.addPage(page)
            i += 1

        try:
            w = os.access(
                '/home/edokumenty/public_html/apps/edokumenty/var/tmp',
                os.W_OK)

            # Gdzie zapisac dane
            outputStream = file(self.filename, "wb")
            self.output.write(outputStream)
        except Exception as e:
            self.logger("Blad " + str(e))
示例#24
0
def process_file(id_source): 
    from processing.tex_template import tex_header
    from pyPdf import PdfFileReader
    from numpy import array
    logging.info("begin %s" % (id_source, ))
    OUTPUT = []
    S = {}
    locations, comments  = annotations.getPublicCommentsByFile(id_source)
    repfile   = "%s/%s/%s" % (settings.HTTPD_MEDIA,settings.REPOSITORY_DIR, id_source)
    if not  os.path.exists(repfile):
        logging.warning("%s not found. Skipping..." % (repfile, ))
        return 
    srcfile   = "/tmp/orig_%s.pdf" % (id_source, )
    if not os.path.exists(srcfile):
        os.symlink(repfile, srcfile)
    pdf       = PdfFileReader(file(srcfile, "rb"))
    if pdf.isEncrypted and pdf.decrypt("")==0:
        print "PDF file encrypted with non-empty password: %s" % (srcfile,)
        return False
    trim_box  = pdf.pages[0].trimBox # Sacha's coordinate system now uses this box
    crop_box  = pdf.pages[0].cropBox  # ConTeXt's page inclusion uses this box
    fudge     = (int(trim_box[2])-int(trim_box[0]))/612.0 # for the assumption of 612bp width
    bp_per_pixel = 72.0/150 * fudge

    roots       = {}
    children_of = {}

    for k in comments:
        node = int(k)
        parent = comments[k]['id_parent']
        if parent:
            if parent not in children_of:
                children_of[parent] = []
            children_of[parent].append(node)
        else:
            loc_id      = comments[node]['ID_location']
            loc         = locations[loc_id]
            if loc['page'] != 0:
                loc['center_x'] = loc['left'] + loc['w']/2.0
                loc['center_y'] = loc['top']  + loc['h']/2.0
            else:
                loc['center_x'] = None
                loc['center_y'] = None
            roots[node] = loc

    def oneline(s):
        return s.replace('\n', ' ')

    def texify(s):
        s = s.strip()
        patterns = [(r'\\', r'\\\\'),
                    (r'%', r'\%'), (r'\$', r'\$'), ('_', r'\_'), (r'\&', r'\&'),
                    (r'\^', r'\^\\null{}'), (r'#', r'\#'), (r'\|', r'$|$')]
        for p in patterns:
            s = re.sub(p[0], p[1], s)
        return s

    def rect2array(rect):
        return array(rect.lowerLeft+rect.upperRight, dtype=float)

    def rectangle_height(rect):
        return rect.upperRight[1]-rect.lowerLeft[1]

    S["last_page"] = -1
    def print_child(n, levels=0):
        body     = comments[n]['body']
        loc_id   = comments[n]['ID_location']
        location = locations[loc_id]
        page     = int(location['page'])
        if levels == 0 and page > S["last_page"]:
            OUTPUT.append('\n%% Comments on page %d of %s [%s]' % (page,
                                                           "myfile",
                                                           os.path.basename(srcfile)))
            if page == 0:
                sectitle = 'Global comments'
            else:
                sectitle = 'Comments on page %d' % page
            OUTPUT.append(r'\title{%s} \def\whatpage{%d}' % (sectitle, page))
            S["last_page"] = page
        if comments[n]['admin'] == 1:
            me = 1
        else:
            me = 0
        msg = '\n'+r'\comment{note-%s}{%d}{%s}{%d}{%d}' % (n, levels, texify(body), me, int(n))
        OUTPUT.append(unicode(msg).encode("ascii", "ignore"))
        if levels == 0 and page != 0:  # a root comment not on page 0 needs callout
            root = roots[n]
            # Sacha's coords are from top left corner, relative to TrimBox
            # but in pixels (not postscript points).
            # evaluate comment_box_px, with this coord system, as [llx lly w h]
            comment_box_px = array([root['left'],
                                    root['top']+root['h'],
                                    root['w'],
                                    root['h']])
            comment_box_bp = comment_box_px * bp_per_pixel
            # convert y coordinate to use bottom edge of trim_box as y=0
            comment_box_bp[1] = int(rectangle_height(trim_box))-int(comment_box_bp[1])
            # convert to coordinates relative to CropBox
            comment_box_bp[0:2] += (rect2array(trim_box)-rect2array(crop_box))[0:2]
            OUTPUT.append('\setpospxywhd{note-%d-dest}{1}' % n)
            OUTPUT.append('{%fbp}{%fbp}{%fbp}{%fbp}{0pt}' % tuple(comment_box_bp))
            OUTPUT.append('''\startpositionoverlay{callouts}
    \setMPpositiongraphic{note-%d}{callout}{to=note-%d-dest}
    \stoppositionoverlay''' % (n, n))
        if n in children_of:
            for k in sorted(children_of[n]):
                print_child(k, levels+1)

    def cmp(a,b):
        if roots[a]['page'] == 0 and roots[b]['page'] == 0:
            return a-b              # order by comment id
        for key in ['page', 'center_y', 'center_x']:
            if roots[a][key] != roots[b][key]:
                return int(1e6*roots[a][key] - 1e6*roots[b][key])
        return 0

    tex_params = {'crop_wd': crop_box[2]-crop_box[0],
                  'crop_ht': crop_box[3]-crop_box[1],
                  'srcfile': srcfile
                  }

    OUTPUT.append( tex_header % tex_params)
    OUTPUT.append( '\n\\starttext')
    for root_id in sorted(roots, cmp):
        print_child(root_id, 0)
    OUTPUT.append( '\n\\stoptext')    
    texfile = "/tmp/%s.tex" % (id_source, )
    f = open(texfile, "w")
    f.write("\n".join(OUTPUT))
    f.close()
    cmd = "(cd /tmp; texexec --timeout=120 %s; mv %s.pdf %s/%s/%s)" % (texfile, id_source, settings.HTTPD_MEDIA, settings.ANNOTATED_DIR, id_source)
    os.system(cmd)
    logging.info("end %s" % (id_source, ))
示例#25
0
    def export_to_file(self, file_out, only_selected=False):
        """Export to file"""

        selection = self.iconview.get_selected_items()
        pdf_output = PdfFileWriter()
        pdf_input = []
        for pdfdoc in self.pdfqueue:
            pdfdoc_inp = PdfFileReader(file(pdfdoc.copyname, 'rb'))
            if pdfdoc_inp.getIsEncrypted():
                try: # Workaround for lp:#355479
                    stat = pdfdoc_inp.decrypt('')
                except:
                    stat = 0
                if (stat!=1):
                    errmsg = _('File %s is encrypted.\n'
                               'Support for encrypted files has not been implemented yet.\n'
                               'File export failed.') % pdfdoc.filename
                    raise Exception, errmsg
                #FIXME
                #else
                #   ask for password and decrypt file
            pdf_input.append(pdfdoc_inp)

        for row in self.model:

            if only_selected and row.path not in selection:
                continue

            # add pages from input to output document
            nfile = row[2]
            npage = row[3]
            current_page = copy(pdf_input[nfile-1].getPage(npage-1))
            angle = row[6]
            angle0 = current_page.get("/Rotate",0)
            crop = [row[7],row[8],row[9],row[10]]
            if angle != 0:
                current_page.rotateClockwise(angle)
            if crop != [0.,0.,0.,0.]:
                rotate_times = (((angle + angle0) % 360 + 45) / 90) % 4
                crop_init = crop
                if rotate_times != 0:
                    perm = [0,2,1,3]
                    for it in range(rotate_times):
                        perm.append(perm.pop(0))
                    perm.insert(1,perm.pop(2))
                    crop = [crop_init[perm[side]] for side in range(4)]
                #(x1, y1) = current_page.cropBox.lowerLeft
                #(x2, y2) = current_page.cropBox.upperRight
                (x1, y1) = [float(xy) for xy in current_page.mediaBox.lowerLeft]
                (x2, y2) = [float(xy) for xy in current_page.mediaBox.upperRight]
                x1_new = int(x1 + (x2-x1) * crop[0])
                x2_new = int(x2 - (x2-x1) * crop[1])
                y1_new = int(y1 + (y2-y1) * crop[3])
                y2_new = int(y2 - (y2-y1) * crop[2])
                #current_page.cropBox.lowerLeft = (x1_new, y1_new)
                #current_page.cropBox.upperRight = (x2_new, y2_new)
                current_page.mediaBox.lowerLeft = (x1_new, y1_new)
                current_page.mediaBox.upperRight = (x2_new, y2_new)

            pdf_output.addPage(current_page)

        # finally, write "output" to document-output.pdf
        pdf_output.write(file(file_out, 'wb'))
示例#26
0
# 12.2 review exercises

import os
import copy
from pyPdf import PdfFileReader, PdfFileWriter

path = "C:/Real Python/Course materials/Chapter 12/Practice files"
input_file_name = os.path.join(path, "Walrus.pdf")
input_file = PdfFileReader(file(input_file_name, "rb"))
output_PDF = PdfFileWriter()

input_file.decrypt("IamtheWalrus")  # decrypt the input file

for page_num in range(0, input_file.getNumPages()):
    # rotate pages (call everything page_left for now; will make a copy)
    page_left = input_file.getPage(page_num)
    page_left.rotateCounterClockwise(90)

    page_right = copy.copy(page_left)  # split each page in half
    upper_right = page_left.mediaBox.upperRight  # get original page corner

    # crop and add left-side page
    page_left.mediaBox.upperRight = (upper_right[0] / 2, upper_right[1])
    output_PDF.addPage(page_left)
    # crop and add right-side page
    page_right.mediaBox.upperLeft = (upper_right[0] / 2, upper_right[1])
    output_PDF.addPage(page_right)

# save new pages to an output file
output_file_name = os.path.join(path, "Output/Updated Walrus.pdf")
with file(output_file_name, "wb") as output_file:
示例#27
0
path = 
for f in full:

    filename = "myfile.txt"
    path_to_file = pjoin("C:", "foo", "bar", "baz", filename)
    FILE = open(path_to_file, "w")


from pyPdf import PdfFileReader

# p = '/home/matt/Desktop/new/The_Warren_Buffett_Way.pdf'
p = '/home/matt/Desktop/new/the_mckinsey_way.pdf'

pdf = PdfFileReader(file(p, 'rb'))
if pdf.isEncrypted:
    pdf.decrypt('')
pdf.documentInfo
pdf.getNumPages()
267
info = pdf.getDocumentInfo()

info.author
info.title


p = '/home/matt/Desktop/new/The_McKinsey_Way.pdf'

pdf = PdfFileReader(file(p, 'rb'))
if pdf.isEncrypted:
	try:
	    pdf.decrypt('')
示例#28
0
文件: views.py 项目: Dpetters/Umeqo
def employer_resume_book_create(request):
    if request.POST.has_key("resume_book_id") and request.POST["resume_book_id"]:
        redelivering = True
        try:
            resume_book = ResumeBook.objects.get(id=request.POST["resume_book_id"])
        except ResumeBook.DoesNotExist:
            raise Http404("No resume book exists with id of %s" % request.POST["resume_book_id"])
    else:
        redelivering = False
        try:
            resume_book, created = ResumeBook.objects.get_or_create(recruiter=request.user.recruiter, delivered=False)
        except ResumeBook.MultipleObjectsReturned:
            resume_books = ResumeBook.objects.filter(recruiter=request.user.recruiter, delivered=False)
            for i, rb in enumerate(resume_books):
                if i != 0:
                    rb.delete()
                else:
                    resume_book = rb

    if redelivering:
        resume_book_name = resume_book.name
    else:
        now = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
        resume_book_name = "%s_%s" % (str(request.user), now)
        resume_book.name = resume_book_name
        resume_book.save()

    file_path = "%semployer/resumebook/" % (s.MEDIA_ROOT,)
    if not os.path.exists(file_path):
        os.makedirs(file_path)

    if request.POST["delivery_format"] == "separate":
        # Create the zip file
        file_name = "%s%s" % (file_path, resume_book_name)
        output = zipfile.ZipFile(file_name, "w")
        try:
            for student in resume_book.students.visible():
                resume_file = file("%s%s" % (s.MEDIA_ROOT, str(student.resume)), "rb")
                try:
                    name = "%s %s (%s, %s).pdf" % (
                        student.first_name,
                        student.last_name,
                        student.graduation_year,
                        student.degree_program,
                    )
                    output.write(resume_file.name, name, zipfile.ZIP_DEFLATED)
                finally:
                    resume_file.close()
        finally:
            output.close()
    else:
        output = PdfFileWriter()
        file_name = "%s%s.pdf" % (file_path, resume_book_name)
        report_buffer = cStringIO.StringIO()
        c = Canvas(report_buffer)
        now = datetime.now()
        first_line = "Created on %s at %s" % (now.strftime("%m/%d/%Y"), now.strftime("%I:%M %p"))
        c.drawString(1 * cm, 28.5 * cm, first_line)
        c.drawString(1 * cm, 28 * cm, str(request.user.recruiter))
        c.drawString(1 * cm, 27.5 * cm, str(request.user.recruiter.employer))
        c.drawString(16 * cm, 28.5 * cm, "Created using Umeqo")
        c.drawString(8.5 * cm, 26.5 * cm, "Resume Book Contents")
        pad_from_top = 0
        for num, student in enumerate(
            resume_book.students.visible().order_by("graduation_year", "first_name", "last_name")
        ):
            c.drawString(6.5 * cm, (25.5 - pad_from_top * 0.5) * cm, "%s %s" % (student.first_name, student.last_name))
            c.drawString(
                12 * cm, (25.5 - pad_from_top * 0.5) * cm, "%s, %s" % (student.graduation_year, student.degree_program)
            )
            pad_from_top += 1
            if num == 50:
                c.showPage()
                c.save()
                output.addPage(PdfFileReader(cStringIO.StringIO(report_buffer.getvalue())).getPage(0))
                report_buffer = cStringIO.StringIO()
                c = Canvas(report_buffer)
                pad_from_top = 0
        c.showPage()
        c.save()
        output.addPage(PdfFileReader(cStringIO.StringIO(report_buffer.getvalue())).getPage(0))
        for student in resume_book.students.visible().order_by("graduation_year", "first_name", "last_name"):
            resume_file = open("%s%s" % (s.MEDIA_ROOT, str(student.resume)), "rb")
            resume = PdfFileReader(resume_file)
            if resume.getIsEncrypted():
                resume.decrypt("")
            for page in range(resume.getNumPages()):
                output.addPage(resume.getPage(page))
        outputStream = file(file_name, "wb")
        output.write(outputStream)
        outputStream.close()
        resume_file.close()

    resume_book_contents = open(file_name, "rb")
    resume_book.resume_book.save(file_name, File(resume_book_contents))
    resume_book_contents.close()
    return HttpResponse()
示例#29
0
文件: pyfoca.py 项目: 0x90shell/ipwn
	def processFile(self, curr_file):
		global extractedFrom
		author = '-'
		date = '-'
		generator = '-'
		created = '-'
		producer = '-'
		modded = '-'
		last_saved = '-'
		if ".pdf" in curr_file:
			try:
				pdfFile = PdfFileReader(file(curr_file, 'rb'))
				if pdfFile.getIsEncrypted():
					pdfFile.decrypt('')
				docInfo = pdfFile.getDocumentInfo()
				if not docInfo:
					return
				last_saved = '-'
				#looks at the entire dictionary to parse for information	
				if "/CreationDate" in docInfo:
					data = docInfo["/CreationDate"].strip("D:|'")
					year = data[0:4]
					date = data[4:6] + "/" + data[6:8]
					created_time = data[8:10] + ":" + data[10:12]
					created_time = time.strftime("%I:%M %p", time.strptime(created_time, "%H:%M"))
					created = date + "/" + year + " " + created_time
				if "/Author" in docInfo:
					author = docInfo["/Author"] + " "
					if len(author) <=1:
						author = "-"
				if "/Producer" in docInfo:
					producer = docInfo["/Producer"].strip("(Windows)")
					producer = re.sub(r'[^\w]', ' ', producer)
					if len(producer) == 0:
						producer = "-"
					while True:
						if "  " in producer:
							producer = producer.replace("  ", " ")
						else:
							break
				if "/ModDate" in docInfo:
					data = docInfo["/ModDate"].strip("D:|'")
					year = data[0:4]
					date = data[4:6] + "/" + data[6:8]
					modded_time = data[8:10] + ":" + data[10:12]
					modded_time = time.strftime("%I:%M %p", time.strptime(modded_time, "%H:%M"))
					modded = date + "/" + year + " "  + modded_time

				#strips '/' off file name (if it includes directory name)
				if "/" in curr_file:
					curr_file = curr_file[curr_file.rfind("/")+1:]
				if "\\" in curr_file:
					curr_file = curr_file.replace("\\","")

				#trim information if it's too long
				if len(curr_file) > 15: # trims file name
					curr_file = curr_file[:15] + "..." + curr_file[-13:]
				if len(producer) > 30:
					producer = producer[:20] + " [snipped] "
				if len(author) > 20:
					author = author[:20] + " [snipped] "

				#appends each piece of information. output will show ONLY if at least ONE file has data in a column
				self.container.append([" | " + curr_file,created,author,producer,modded,last_saved])
			except Exception, err:
				return
# 12.2 review exercises

import os
import copy
from pyPdf import PdfFileReader, PdfFileWriter

path = "C:/Real Python/Course materials/Chapter 8/Practice files"
inputFileName = os.path.join(path, "Walrus.pdf")
inputFile = PdfFileReader(file(inputFileName, "rb"))
outputPDF = PdfFileWriter()

inputFile.decrypt("IamtheWalrus") # decrypt the input file

for pageNum in range(0, inputFile.getNumPages()):
    # rotate pages (call everything pageLeft for now; will make a copy)
    pageLeft = inputFile.getPage(pageNum)
    pageLeft.rotateCounterClockwise(90)
    
    pageRight = copy.copy(pageLeft) # split each page in half
    upperRight = pageLeft.mediaBox.upperRight # get original page corner
    
    # crop and add left-side page
    pageLeft.mediaBox.upperRight = (upperRight[0]/2, upperRight[1])
    outputPDF.addPage(pageLeft)
    # crop and add right-side page
    pageRight.mediaBox.upperLeft = (upperRight[0]/2, upperRight[1])
    outputPDF.addPage(pageRight)
    
# save new pages to an output file    
outputFileName = os.path.join(path, "Output/Updated Walrus.pdf")
with file(outputFileName, "wb") as outputFile:
示例#31
0
文件: views.py 项目: Dpetters/Umeqo
def employer_resume_book_create(request):
    if request.POST.has_key("resume_book_id") and request.POST['resume_book_id']:
        redelivering = True
        try:
            resume_book = ResumeBook.objects.get(id=request.POST["resume_book_id"])
        except ResumeBook.DoesNotExist:
            raise Http404("No resume book exists with id of %s" % request.POST["resume_book_id"])
    else:
        redelivering = False
        try:
            resume_book, created = ResumeBook.objects.get_or_create(recruiter = request.user.recruiter, delivered=False)    
        except ResumeBook.MultipleObjectsReturned:
            resume_books = ResumeBook.objects.filter(recruiter=request.user.recruiter, delivered=False)
            for i, rb in enumerate(resume_books):
                if i != 0:
                    rb.delete()
                else:
                    resume_book = rb

    if redelivering:
        resume_book_name = resume_book.name
    else:
        now = datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
        resume_book_name = "%s_%s" % (str(request.user), now,)
        resume_book.name = resume_book_name
        resume_book.save()

    file_path = "%semployer/resumebook/"% (s.MEDIA_ROOT,)
    if not os.path.exists(file_path):
        os.makedirs(file_path)

    if request.POST['delivery_format'] == 'separate':
        # Create the zip file
        file_name = "%s%s" % (file_path, resume_book_name,)
        output = zipfile.ZipFile(file_name, 'w')
        try:
            for student in resume_book.students.visible():
                resume_file = file("%s%s" % (s.MEDIA_ROOT, str(student.resume)), "rb")
                try:
                    name = "%s %s (%s, %s).pdf" % (student.first_name, student.last_name, student.graduation_year, student.degree_program)
                    output.write(resume_file.name, name, zipfile.ZIP_DEFLATED)
                finally:
                    resume_file.close()
        finally:
            output.close()
    else:
        output = PdfFileWriter()
        file_name = "%s%s.pdf" % (file_path, resume_book_name)
        report_buffer = cStringIO.StringIO() 
        c = Canvas(report_buffer)  
        now = datetime.now()
        first_line = "Created on %s at %s" % (now.strftime('%m/%d/%Y'), now.strftime('%I:%M %p'))
        c.drawString(1*cm, 28.5*cm, first_line)
        c.drawString(1*cm, 28*cm, str(request.user.recruiter))
        c.drawString(1*cm, 27.5*cm, str(request.user.recruiter.employer))
        c.drawString(16*cm, 28.5*cm, "Created using Umeqo")
        c.drawString(8.5*cm, 26.5*cm, "Resume Book Contents")
        pad_from_top = 0
        for num, student in enumerate(resume_book.students.visible().order_by("graduation_year", "first_name", "last_name")):
            c.drawString(6.5*cm, (25.5-pad_from_top*.5)*cm, "%s %s" % (student.first_name, student.last_name))
            c.drawString(12*cm, (25.5-pad_from_top*.5)*cm,  "%s, %s" %(student.graduation_year, student.degree_program))
            pad_from_top += 1
	    if num == 50:
                c.showPage()
                c.save()
                output.addPage(PdfFileReader(cStringIO.StringIO(report_buffer.getvalue())).getPage(0)) 
                report_buffer = cStringIO.StringIO()
                c = Canvas(report_buffer)
                pad_from_top = 0
        c.showPage()
        c.save()
        output.addPage(PdfFileReader(cStringIO.StringIO(report_buffer.getvalue())).getPage(0)) 
        for student in resume_book.students.visible().order_by("graduation_year", "first_name", "last_name"):
            resume_file = open("%s%s" % (s.MEDIA_ROOT, str(student.resume)), "rb")
            resume = PdfFileReader(resume_file)
            if resume.getIsEncrypted():
                resume.decrypt("")
            for page in range(resume.getNumPages()):
                output.addPage(resume.getPage(page))
        outputStream = file(file_name, "wb")
        output.write(outputStream)
        outputStream.close()
        resume_file.close()

    resume_book_contents = open(file_name, "rb")
    resume_book.resume_book.save(file_name, File(resume_book_contents))
    resume_book_contents.close()
    return HttpResponse()
示例#32
0
    def export_to_file(self, file_out, only_selected=False):
        """Export to file"""

        selection = self.iconview.get_selected_items()
        pdf_output = PdfFileWriter()
        pdf_input = []
        for pdfdoc in self.pdfqueue:
            pdfdoc_inp = PdfFileReader(open(pdfdoc.copyname, 'rb'))
            if pdfdoc_inp.getIsEncrypted():
                try:  # Workaround for lp:#355479
                    stat = pdfdoc_inp.decrypt('')
                except:
                    stat = 0
                if (stat != 1):
                    errmsg = _(
                        'File %s is encrypted.\n'
                        'Support for encrypted files has not been implemented yet.\n'
                        'File export failed.') % pdfdoc.filename
                    raise Exception(errmsg)
                #FIXME
                #else
                #   ask for password and decrypt file
            pdf_input.append(pdfdoc_inp)

        for row in self.model:

            if only_selected and row.path not in selection:
                continue

            # add pages from input to output document
            nfile = row[2]
            npage = row[3]
            current_page = copy(pdf_input[nfile - 1].getPage(npage - 1))
            angle = row[6]
            angle0 = current_page.get("/Rotate", 0)
            crop = [row[7], row[8], row[9], row[10]]
            if angle != 0:
                current_page.rotateClockwise(angle)
            if crop != [0., 0., 0., 0.]:
                rotate_times = int(round(((angle + angle0) % 360) / 90) % 4)
                crop_init = crop
                if rotate_times != 0:
                    perm = [0, 2, 1, 3]
                    for it in range(rotate_times):
                        perm.append(perm.pop(0))
                    perm.insert(1, perm.pop(2))
                    crop = [crop_init[perm[side]] for side in range(4)]
                #(x1, y1) = current_page.cropBox.lowerLeft
                #(x2, y2) = current_page.cropBox.upperRight
                (x1,
                 y1) = [float(xy) for xy in current_page.mediaBox.lowerLeft]
                (x2,
                 y2) = [float(xy) for xy in current_page.mediaBox.upperRight]
                x1_new = int(x1 + (x2 - x1) * crop[0])
                x2_new = int(x2 - (x2 - x1) * crop[1])
                y1_new = int(y1 + (y2 - y1) * crop[3])
                y2_new = int(y2 - (y2 - y1) * crop[2])
                #current_page.cropBox.lowerLeft = (x1_new, y1_new)
                #current_page.cropBox.upperRight = (x2_new, y2_new)
                current_page.mediaBox.lowerLeft = (x1_new, y1_new)
                current_page.mediaBox.upperRight = (x2_new, y2_new)

            pdf_output.addPage(current_page)

        # finally, write "output" to document-output.pdf
        pdf_output.write(open(file_out, 'wb'))
示例#33
0
文件: pyfoca.py 项目: tazV2/ipwn
    def processFile(self, curr_file):
        global extractedFrom
        author = '-'
        date = '-'
        generator = '-'
        created = '-'
        producer = '-'
        modded = '-'
        last_saved = '-'
        if ".pdf" in curr_file:
            try:
                pdfFile = PdfFileReader(file(curr_file, 'rb'))
                if pdfFile.getIsEncrypted():
                    pdfFile.decrypt('')
                docInfo = pdfFile.getDocumentInfo()
                if not docInfo:
                    return
                last_saved = '-'
                #looks at the entire dictionary to parse for information
                if "/CreationDate" in docInfo:
                    data = docInfo["/CreationDate"].strip("D:|'")
                    year = data[0:4]
                    date = data[4:6] + "/" + data[6:8]
                    created_time = data[8:10] + ":" + data[10:12]
                    created_time = time.strftime(
                        "%I:%M %p", time.strptime(created_time, "%H:%M"))
                    created = date + "/" + year + " " + created_time
                if "/Author" in docInfo:
                    author = docInfo["/Author"] + " "
                    if len(author) <= 1:
                        author = "-"
                if "/Producer" in docInfo:
                    producer = docInfo["/Producer"].strip("(Windows)")
                    producer = re.sub(r'[^\w]', ' ', producer)
                    if len(producer) == 0:
                        producer = "-"
                    while True:
                        if "  " in producer:
                            producer = producer.replace("  ", " ")
                        else:
                            break
                if "/ModDate" in docInfo:
                    data = docInfo["/ModDate"].strip("D:|'")
                    year = data[0:4]
                    date = data[4:6] + "/" + data[6:8]
                    modded_time = data[8:10] + ":" + data[10:12]
                    modded_time = time.strftime(
                        "%I:%M %p", time.strptime(modded_time, "%H:%M"))
                    modded = date + "/" + year + " " + modded_time

                #strips '/' off file name (if it includes directory name)
                if "/" in curr_file:
                    curr_file = curr_file[curr_file.rfind("/") + 1:]
                if "\\" in curr_file:
                    curr_file = curr_file.replace("\\", "")

                #trim information if it's too long
                if len(curr_file) > 15:  # trims file name
                    curr_file = curr_file[:15] + "..." + curr_file[-13:]
                if len(producer) > 30:
                    producer = producer[:20] + " [snipped] "
                if len(author) > 20:
                    author = author[:20] + " [snipped] "

                #appends each piece of information. output will show ONLY if at least ONE file has data in a column
                self.container.append([
                    " | " + curr_file, created, author, producer, modded,
                    last_saved
                ])
            except Exception, err:
                return