Python PDF示例，pdf.PDF Python示例

示例#1

0

显示文件

def create_individual_pdfs(revue):
    path = revue.conf["Paths"]

    # Create front pages for individual actors, if they don't already exist:
    frontpages_list = []

    for actor in revue.actors:
        file_name = "forside-{}.pdf".format(actor.name)
        if not os.path.isfile(os.path.join(path["pdf cache"], file_name)):
            tex = TeX(revue)
            tex.create_frontpage(subtitle=actor.name)
            frontpages_list.append([tex, file_name])

    # Create front pages:
    conv = cv.Converter()
    conv.parallel_textopdf(frontpages_list, outputdir=path["pdf cache"])

    total_list = []
    for actor in revue.actors:
        individual_list = (os.path.join(path["pdf cache"], "forside-{}.pdf".format(actor.name)),
                             os.path.join(path["pdf"],"aktoversigt.pdf"),
                             os.path.join(path["pdf"],"rolleliste.pdf"),
                             actor,
                             os.path.join(path["pdf"],"rekvisitliste.pdf"))
        total_list.append((individual_list,
                           os.path.join(path["individual pdf"],
                                       "{}.pdf".format(actor.name))))

    pdf = PDF()
    pdf.parallel_pdfmerge(total_list)

示例#2

0

显示文件

文件： create.py 项目： theunbound/FysikRevyTeX

def create_song_manus_pdf(revue):
    path = revue.conf["Paths"]

    # Create front page, if it doesn't already exist:
    # if not os.path.exists(os.path.join(path["pdf"], "cache")):
    #     os.mkdir(os.path.join(path["pdf"], "cache"))

    # if not os.path.isfile(os.path.join(path["pdf"], "cache", "forside-sangmanuskript.pdf")):
    # Det tager vare på sig selv nu
    tex = TeX(revue)
    tex.create_frontpage(subtitle="sangmanuskript")
    tex.topdf("forside-sangmanuskript.pdf",
              outputdir=os.path.join(path["pdf"], "cache"))

    # Create song manuscript:
    file_list = [
        os.path.join(path["pdf"], "cache", "forside-sangmanuskript.pdf")
    ]
    for act in revue.acts:
        for material in act.materials:
            if material.category == path["songs"]:
                file_list.append((os.path.join(
                    path["pdf"],
                    os.path.dirname(os.path.relpath(material.path)),
                    "{}.pdf".format(material.file_name[:-4])), material.title))

    pdf = PDF()
    pdf.pdfmerge(file_list, os.path.join(path["pdf"], "sangmanuskript.pdf"))

示例#3

0

显示文件

文件： text_to_pdf.py 项目： ArseniyFokin/Text-to-PDF

class TextToPDF:
    def __init__(self, font='JetBrainsMono'):
        self.pdf = PDF(font)
        self.errors = []

    def input_from_file(self, path):
        try:
            self.pdf.print_chapter(path)
        except (FileNotFoundError, PermissionError) as ex:
            self.errors.append(f"{path} file does not exist")

    def input_from_files(self, paths):
        for path in paths:
            self.input_from_file(path)

    def input_from_package(self, path, extension=".java"):
        try:
            # files = [path + "/" + file_path for file_path in os.listdir(path) if file_path.endswith(extension)]
            files = [path + "/" + file_path for file_path in os.listdir(path)]
            self.input_from_files(files)
        except FileNotFoundError as ex:
            self.errors.append(f"{path} folder does not exist")

    def output(self, path):
        try:
            self.pdf.output(path, 'F')
        except FileNotFoundError as ex:
            self.errors.append(f"{path} folder does not exist")

示例#4

0

显示文件

    def __init__(self, settings):
        self.context = None

        self.totalCount = 0
        self.cfbf = CFBF()
        self.pdf = PDF()
        self.ooxml = OOXML()

        self.xls_result = []
        self.ppt_result = []
        self.doc_result = []
        self.xlsx_result = []
        self.pptx_result = []
        self.docx_result = []
        self.PyPDF_result = []
        self.PDFNoModule_result = []

示例#5

0

显示文件

文件： Printing.py 项目： zbanga/abundata

	def OnPrintPage( self, page ):
		exportGrid = self.prepareGrid( page )

		category = self.pageInfo[page][0]
		pageNumber = self.pageInfo[page][3]
		pageTotal = self.pageInfo[page][4]
		
		fname = u'{fileBase}-{categoryName}.pdf'.format(
			fileBase = self.fileBase,
			categoryName = category.fullname if category != 'Primes' else 'Primes'
		)
		fname = Utils.RemoveDisallowedFilenameChars( fname ).replace( ' ', '-' )
		
		if not self.pdf:
			self.pdf = PDF( orientation = 'L' if self.orientation == wx.LANDSCAPE else 'P' )
			self.pdf.set_font( 'Arial', '', 12 )
			self.pdf.set_author( unicode(getpass.getuser()).encode('iso-8859-1','ignore') )
			self.pdf.set_keywords( unicode('CrossMgr Results').encode('iso-8859-1','ignore') )
			self.pdf.set_creator( unicode(Version.AppVerName).encode('iso-8859-1','ignore') )
			self.pdf.set_title( unicode(os.path.splitext(fname)[0].replace('-', ' ')).encode('iso-8859-1','ignore') )
		
		exportGrid.drawToFitPDF( *([self.pdf, self.orientation] + self.pageInfo[page][1:-1]) )
		
		if not self.allInOne and pageNumber == pageTotal:
			if self.dir and not os.path.isdir( self.dir ):
				os.mkdir( self.dir )
			fname = os.path.join( self.dir, fname )
			self.pdf.output( fname, 'F' )
			self.lastFName = fname
			self.pdf = None
		
		return True

示例#6

0

显示文件

文件： Auditreport.py 项目： fdq09eca/Chong-Shing-HKEX-Project

    def test():
        # logging.basicConfig(level=logging.INFO)
        query = HKEX_API(from_date=n_yearsago(n=1), to_date=today())
        for data in query.data:
            try:
                url = data.file_link
                # url = 'https://www.dropbox.com/'
                print(url)

                pdf = PDF(url)
                pdf_obj = pdf.pdf_obj
                f = AuditFee(pdf_obj) 
                tab_sum = []
                for table in f.tables:
                    tab_sum.append(table.summary)
            except KeyboardInterrupt:
                break
            except Exception as e:
                # print(e)
                result = {
                'table_summary' : e,
                'ERROR': True,
                'url' : url,
                }
                write_to_csv(result,  'result_3.csv')
                continue
            else:
                # print('ok')
                result = {
                'table_summary' : list(filter(None, tab_sum)),
                'ERROR': None,
                'url' : url
                }
                write_to_csv(result,  'result_3.csv')

示例#7

0

显示文件

文件： create.py 项目： theunbound/FysikRevyTeX

def create_individual_pdfs(revue):
    path = revue.conf["Paths"]

    ## Create front pages for individual actors, if they don't already exist:
    # frontpages_list = []

    # for actor in revue.actors:
    #     file_name = "forside-{}.pdf".format(actor.name)
    #     if not os.path.isfile(os.path.join(path["pdf cache"], file_name)):
    #         tex = TeX(revue)
    #         tex.create_frontpage(subtitle=actor.name)
    #         frontpages_list.append([tex, file_name])

    # Det burde ordne sig selv nu:
    def tex_for_front_page(name):
        tex = TeX(revue)
        tex.create_frontpage(subtitle=name)
        return tex

    frontpages_list = [[
        tex_for_front_page(actor.name), "forside-{}.pdf".format(actor.name)
    ] for actor in revue.actors]

    # Create front pages:
    conv = cv.Converter()
    conv.parallel_textopdf(frontpages_list, outputdir=path["pdf cache"])

    total_list = []
    for actor in revue.actors:
        individual_list = ((os.path.join(path["pdf cache"],
                                         "forside-{}.pdf".format(actor.name)),
                            "Forside"), (os.path.join(path["pdf"],
                                                      "aktoversigt.pdf"),
                                         "Aktoversigt"),
                           (os.path.join(path["pdf"], "rolleliste.pdf"),
                            "Rolleliste"), actor,
                           (os.path.join(path["pdf"],
                                         "kontaktliste.pdf"), "Kontaktliste"))
        total_list.append((individual_list,
                           os.path.join(path["individual pdf"],
                                        "{}.pdf".format(actor.name))))

    pdf = PDF()
    pdf.parallel_pdfmerge(total_list)

示例#8

0

显示文件

文件： Printing.py 项目： johndowen/CrossMgr

    def OnPrintPage(self, page):
        exportGrid = self.prepareGrid(page)

        try:
            category = self.pageInfo[page][0]
        except Exception as e:
            # Handle case of no data.
            return True

        pageNumber = self.pageInfo[page][3]
        pageTotal = self.pageInfo[page][4]

        fname = u'{fileBase}-{categoryName}.pdf'.format(
            fileBase=self.fileBase,
            categoryName=category.fullname
            if category != 'Primes' else 'Primes')
        fname = Utils.RemoveDisallowedFilenameChars(fname).replace(' ', '-')

        if not self.pdf:
            self.pdf = PDF(
                orientation='L' if self.orientation == wx.LANDSCAPE else 'P')
            self.pdf.set_font('Arial', '', 12)
            self.pdf.set_author(
                unicode(getpass.getuser()).encode('iso-8859-1', 'ignore'))
            self.pdf.set_keywords(
                unicode('CrossMgr Results').encode('iso-8859-1', 'ignore'))
            self.pdf.set_creator(
                unicode(Version.AppVerName).encode('iso-8859-1', 'ignore'))
            self.pdf.set_title(
                unicode(os.path.splitext(fname)[0].replace('-', ' ')).encode(
                    'iso-8859-1', 'ignore'))

        exportGrid.drawToFitPDF(*([self.pdf, self.orientation] +
                                  self.pageInfo[page][1:-1]))

        if not self.allInOne and pageNumber == pageTotal:
            if self.dir and not os.path.isdir(self.dir):
                os.mkdir(self.dir)
            fname = os.path.join(self.dir, fname)
            self.pdf.output(fname, 'F')
            self.lastFName = fname
            self.pdf = None

        return True

示例#9

0

显示文件

文件： create.py 项目： digitaldingo/FysikRevyTeX

def create_song_manus_pdf(revue):
    path = revue.conf["Paths"]

    # Create front page, if it doesn't already exist:
    if not os.path.exists(os.path.join(path["pdf"], "cache")):
        os.mkdir(os.path.join(path["pdf"], "cache"))

    if not os.path.isfile(os.path.join(path["pdf"], "cache", "forside-sangmanuskript.pdf")):
            tex = TeX(revue)
            tex.create_frontpage(subtitle="sangmanuskript")
            tex.topdf("forside-sangmanuskript.pdf", outputdir=os.path.join(path["pdf"], "cache"))

    # Create song manuscript:
    file_list = [os.path.join(path["pdf"], "cache", "forside-sangmanuskript.pdf")]
    for act in revue.acts:
        for material in act.materials:
            if material.category == path["songs"]:
                file_list.append(os.path.join(path["pdf"], path["songs"],
                                        "{}.pdf".format(material.file_name[:-4])))

    pdf = PDF()
    pdf.pdfmerge(file_list, os.path.join(path["pdf"],"sangmanuskript.pdf"))

示例#10

0

显示文件

文件： clg.py 项目： phelian/clg

def main(config=None, label=None):
    # Setup PDF Generator
    pdf = PDF("A4")

    # Start initial page, always one
    pdf.add_page()

    # Command run from console with label input
    # Generate label and exit
    if label:
        # Add all fonts here
        pdf.add_font("cambria", "", "fonts/cambria.ttf", True)
        pdf.add_font("cambria", "B", "fonts/cambria_B.ttf", True)
        pdf.generate(**label)
        return

    if config:
        server = Server(config, pdf)
        server.run()

示例#11

0

显示文件

文件： web.py 项目： feesta/pdf-generator

 def post(self):
     title = self.get_argument('title', 'This is the title')
     circles = int(self.get_argument('circles', 10))
     lines = int(self.get_argument('lines', 10))
     pdf = PDF()
     pdf.start(title, circles, lines)
     pdf.set_title(title)
     filename = pdf.save()
     self.write(filename)

示例#12

0

显示文件

def print_aso_bib(participant, copies=2):
    license_holder = participant.license_holder
    copies = int(copies)

    pdf = PDF(orientation='P')
    pdf.set_subject('Bib number and rider info in aso format.')
    pdf.set_keywords(
        'RaceDB CrossMgr Bicycle Racing Software Database Road Time Trial MTB CycloCross RFID'
    )

    for c in xrange(copies):
        aso_bib(pdf, participant.bib, license_holder.first_name,
                license_holder.last_name, participant.competition.name)

    return pdf.output(dest='s')

示例#13

0

显示文件

def print_aso_bib_two_per_page(participant):

    pdf = PDF(orientation='P')
    pdf.set_subject(
        'Bib number and rider info in modified aso format, two per page.')
    pdf.set_keywords(
        'RaceDB CrossMgr Bicycle Racing Software Database Road Time Trial MTB CycloCross RFID'
    )

    license_holder = participant.license_holder
    aso_bib_two_per_page(pdf, participant.bib, license_holder.first_name,
                         license_holder.last_name,
                         participant.competition.name)

    return pdf.output(dest='s')

示例#14

0

显示文件

文件： Auditreport.py 项目： fdq09eca/Chong-Shing-HKEX-Project

    def debug(url, p):
        pdf = PDF(url)
        pdf_obj = pdf.pdf_obj
        f = AuditFee(pdf_obj)

        section = f.target_section(p)
        print(section.extract_text())

        table = AuditFeeTable(section)
        # print('curr_idx:', table.currency_idx)
        # print("amount_idx:", table.amount_idx)
        # print("year_idx", table.year_idx)
        # print("co_row_idx:", table.co_row_idx)
        # print("co_col_idx:",table.co_col_idx)

        # print(table.raw_table)
        print(table.table)
        print(table.summary)

示例#15

0

显示文件

def process_pdf(input_path, db_feed):
    '''
    kicks off pdf processing
    db_feed - pdf json from db
    input_path - path to pdf
    '''

    # initializing pdf object & set parameters
    pdf_object = PDF(db_feed)

    # parsing pdf - input path (?)
    pdf_content = PDFparser().parse(input_path)

    # building dataframe
    df = PDFparser().create_df(pdf_content, pdf_object.page_pattern,
                               pdf_object.table_pattern, pdf_object.column)

    # continue df transformation & filtering only if parsing success
    if (df is not None):
        return process_df(pdf_object, df)
    else:
        print('no dataframe passed')

示例#16

0

显示文件

文件： main.py 项目： rsd13/data-analysis-pipeline

def main():

    args = parse()

    # opciones
    pdf = PDF()
    pdf.input_title()
    if args.clear:
        data_clear(pdf)
    elif args.cities:
        compare_city(args.cities, pdf)
    elif args.states:
        compare_state(args.states, pdf)
    elif args.type:
        compare_restaurant(pdf, args.type)
    else:
        print(
            'Error: se requiere uno o mas argumentos para realizar la accion. Pulsa -h para más información'
        )

    url_pdf = url + "/../pdf/"
    pdf.output(url_pdf + "analytic.pdf", 'F')

示例#17

0

显示文件

def print_bib_tag_label(participant,
                        sponsor_name=None,
                        left_page=True,
                        right_page=True,
                        barcode=True):
    competition = participant.competition
    license_holder = participant.license_holder

    bib = participant.bib
    name = license_holder.first_last
    if len(name) > 32:
        name = license_holder.first_last_short

    if sponsor_name is None:
        if competition.number_set and competition.number_set.sponsor:
            sponsor_name = competition.number_set.sponsor
        else:
            sponsor_name = competition.name
    system_name = 'CrossMgr'

    # Use points at the units.
    page_width = 3.9 * inches_to_points
    page_height = 2.4 * inches_to_points

    pdf = PDF('L', (page_height, page_width))
    pdf.set_author(RaceDBVersion)
    pdf.set_title('Race Bib Number: {}'.format(bib))
    pdf.set_subject(
        'Bib number and rider info to be printed as a label to apply on the chip tag.'
    )
    pdf.set_creator(getpass.getuser())
    pdf.set_keywords(
        'RaceDB CrossMgr Bicycle Racing Software Database Road Time Trial MTB CycloCross RFID'
    )

    pdf.add_font('din1451alt',
                 style='',
                 fname=get_font_file('din1451alt G.ttf'),
                 uni=True)
    pdf.add_font('Arrows',
                 style='',
                 fname=get_font_file('Arrrows-Regular.ttf'),
                 uni=True)

    margin = min(page_height, page_width) / 18.0
    sep = margin / 2.5

    height = page_height - margin * 2.0
    width = page_width - margin * 2.0

    header = Rect(margin, margin, width, height / 18.0)
    footer = Rect(margin, page_height - margin - header.height, header.width,
                  header.height)
    field = Rect(header.x, header.bottom + sep, width,
                 footer.top - header.bottom - sep * 2)

    license_code = license_holder.uci_id or license_holder.license_code

    leftArrow, rightArrow = 'A', 'a'

    font_name = 'Helvetica'
    for lp in ([True] if left_page else []) + ([False] if right_page else []):
        pdf.add_page()

        arrow = copy.deepcopy(header)
        arrow.y -= arrow.height * 0.5
        arrow.height *= 2
        pdf.set_font('Arrows')
        arrowWidth = arrow.draw_text_to_fit(
            pdf,
            leftArrow if lp else rightArrow,
            (Rect.AlignLeft if lp else Rect.AlignRight) | Rect.AlignMiddle,
            consider_descenders=True,
            convert_to_text=False,
        )
        arrowWidth += pdf.get_string_width('  ')

        header_remain = copy.deepcopy(header)
        if lp:
            header_remain.x += arrowWidth
        header_remain.width -= arrowWidth

        pdf.set_font(font_name)
        header_remain.draw_text_to_fit(
            pdf, sponsor_name,
            (Rect.AlignLeft if lp else Rect.AlignRight) | Rect.AlignMiddle,
            True)

        pdf.set_font('din1451alt', '', 16)
        field.draw_text_to_fit(pdf, bib, Rect.AlignCenter | Rect.AlignMiddle)

        pdf.set_font(font_name)
        name_width = footer.draw_text_to_fit(
            pdf, name,
            (Rect.AlignRight if lp else Rect.AlignLeft) | Rect.AlignMiddle)

        logo = copy.deepcopy(footer)
        if not lp:
            logo.x += name_width + sep
        logo.width -= name_width + sep
        if logo.width > 20:
            logo_width = logo.draw_text_to_fit(
                pdf, system_name,
                (Rect.AlignLeft if lp else Rect.AlignRight) | Rect.AlignMiddle)
        else:
            logo_width = 0

        if barcode:
            remaining_width = header.width - name_width - logo_width
            if lp:
                barcode_rect = Rect(footer.x + logo_width, footer.y,
                                    remaining_width, footer.height)
            else:
                barcode_rect = Rect(
                    footer.right - logo_width - remaining_width, footer.y,
                    remaining_width, footer.height)
            if license_code:
                draw_code128(pdf, license_code, barcode_rect.x, barcode_rect.y,
                             barcode_rect.width, barcode_rect.height)

    pdf_str = pdf.output(dest='s')
    return pdf_str

示例#18

0

显示文件

def print_bib_on_rect(bib,
                      license_code=None,
                      name=None,
                      logo=None,
                      widthInches=5.9,
                      heightInches=3.9,
                      copies=1,
                      onePage=False):
    page_width = widthInches * inches_to_points
    page_height = heightInches * inches_to_points

    pdf = PDF('L', (page_height * (copies if onePage else 1), page_width))
    pdf.set_author(RaceDBVersion)
    pdf.set_title('Race Bib Number: {}'.format(bib))
    pdf.set_subject('Bib number.')
    pdf.set_creator(getpass.getuser())
    pdf.set_keywords(
        'RaceDB CrossMgr Bicycle Racing Software Database Road Time Trial MTB CycloCross RFID'
    )
    pdf.add_font('din1451alt',
                 style='',
                 fname=get_font_file('din1451alt G.ttf'),
                 uni=True)

    margin = min(page_height, page_width) / 17.5
    sep = margin / 2.5

    height = page_height - margin * 2.0
    width = page_width - margin * 2.0

    text_margin = margin
    text_height = margin * 0.4

    for c in xrange(copies):
        if c == 0 or not onePage:
            pdf.add_page()
            page_y = 0
        else:
            page_y = page_height * c
            pdf.dashed_line(0, page_y, page_width, page_y, space_length=12)

        pdf.set_font('din1451alt', '', 16)
        field = Rect(margin, margin + page_y, width, height)
        field.draw_text_to_fit(pdf, bib, Rect.AlignCenter | Rect.AlignMiddle)

        pdf.set_font('Helvetica')
        if logo:
            x = text_margin
            logo_rect = Rect(x, page_height - margin + page_y,
                             (page_width - barcode_width_max) / 2.0 - x,
                             text_height)
            logo_rect.draw_text_to_fit(pdf, logo,
                                       Rect.AlignLeft | Rect.AlignMiddle)

        if license_code:
            barcode_rect = Rect(margin, page_height - margin * 1.2 + page_y,
                                width, margin * 0.8)
            draw_code128(pdf, license_code, barcode_rect.x, barcode_rect.y,
                         barcode_rect.width, barcode_rect.height)

        if name:
            x = (page_width + barcode_width_max) / 2.0
            name_rect = Rect(x, page_height - margin + page_y,
                             page_width - text_margin - x, text_height)
            name_rect.draw_text_to_fit(pdf, name,
                                       Rect.AlignRight | Rect.AlignMiddle)

    pdf_str = pdf.output(dest='s')
    return pdf_str

示例#19

0

显示文件

文件： test.py 项目： nam-nguyen17/CS111

from pdf import PDF, Word, Document, Html

#function
def show_common(obj):
    if type(obj) == PDF:
        print(obj.show())
    elif type(obj) == Word:
        print(obj.show())
    elif type(obj) == Html:
        obj.print()
    elif type(obj) == Document:
        print(obj.show())
        # print(obj.name() + ' is an abstract class. Cannot say anything!')

docs = [PDF('Doc1'), Word('Doc2'), Document('Doc3'), Html('Doc4')]
for d in docs:
    # print(d.show())
    show_common(d)

示例#20

0

显示文件

文件： main.py 项目： oneebkhan/Merger

# finds path of files

# making an output directory for pdf merged
output_merge = file_path_variable + '/output_merged'
if not os.path.exists(output_merge):
    os.mkdir(output_merge)

for subdir, dirs, files in os.walk(file_path_variable):
    for filename in files:
        filepath = subdir + os.sep + filename
        print(filepath)

        ppt = Ex()

        if filepath.endswith(".ppt"):
            folder = filepath.replace('/', '\\')
            ppt.ppt_convert(folder)

        if filepath.endswith(".pptx"):
            folder = filepath.replace('/', '\\')
            ppt.ppt_convert(folder)

        if filepath.endswith(".pptm"):
            folder = filepath.replace('/', '\\')
            ppt.ppt_convert(folder)

pdf = PDF()
pdf.p(file_path_variable, output_merge.replace('/', '\\'))

root.mainloop()

示例#21

0

显示文件

文件： quizzes.py 项目： bestdax/quiz_gen_for_kids

def gen_pdf_quiz(cfg):
    for user in cfg:
        pdf = PDF()
        for i in range(cfg[user]['global']['pages']):
            pdf.add_page()
            pdf.set_title('四则运算练习')
            pdf.set_date(date=cfg[user]['global']['show_date'])
            quizzes = bulk_quiz_gen(cfg[user])
            pdf.set_quizzes(quizzes=quizzes)
        quiz_dir = cfg[user]['global']['quiz_dir']
        quiz_dir = os.path.expanduser(quiz_dir)
        pdf_filename = os.path.join(quiz_dir,
                                    f'{user}.pdf' if user else 'quizzes.pdf')
        pdf.output(f'{pdf_filename}', 'F')

示例#22

0

显示文件

文件： create.py 项目： digitaldingo/FysikRevyTeX

    if len(conf.cmd_parts) == 0:
        arglist = ("aktoversigt", "roles", "frontpage", "props",
                   "contacts", "material","individual", "songmanus")
    elif "manus" in sys.argv:
        arglist = ("aktoversigt", "roles", "frontpage", "props",
                   "contacts", "material")
    else:
        arglist = sys.argv[1:]

    for arg in arglist:
        create_parts(revue, arg)


    if len(conf.cmd_parts) == 0 or "manus" in sys.argv:
        pdf = PDF()
        pdf.pdfmerge((os.path.join(path["pdf"],"forside.pdf"),
                      os.path.join(path["pdf"],"aktoversigt.pdf"),
                      os.path.join(path["pdf"],"rolleliste.pdf"),
                      revue,
                      os.path.join(path["pdf"],"rekvisitliste.pdf"),
                      os.path.join(path["pdf"],"kontaktliste.pdf")),
                      os.path.join(path["pdf"],"manuskript.pdf"))

        print("Manuscript successfully created!")


    for act in revue.acts:
        for material in act.materials:
            metadata.update_mod_time(material)
    for f in glob.glob(os.path.join(path["pdf"], "*.pdf")):

示例#23

0

显示文件

class CrossMgrPrintoutPDF(CrossMgrPrintout):
    def __init__(self,
                 dir,
                 fileBase,
                 orientation,
                 categories=None,
                 allInOne=False):
        CrossMgrPrintout.__init__(self, categories)
        self.dir = dir
        self.fileBase = fileBase
        self.orientation = orientation
        self.allInOne = allInOne
        self.pdf = None
        self.lastFName = None

    def OnEndPrinting(self):
        if self.pdf and self.allInOne:
            if self.dir and not os.path.isdir(self.dir):
                os.mkdir(self.dir)
            fname = u'{fileBase}.pdf'.format(fileBase=self.fileBase)
            self.pdf.set_title(
                unicode(os.path.splitext(fname)[0].replace('-', ' ')).encode(
                    'iso-8859-1', 'ignore'))
            fname = os.path.join(self.dir, fname)
            self.pdf.output(fname, 'F')
            self.lastFName = fname
            self.pdf = None
        return super(CrossMgrPrintoutPDF, self).OnEndPrinting()

    def OnPrintPage(self, page):
        exportGrid = self.prepareGrid(page)

        category = self.pageInfo[page][0]
        pageNumber = self.pageInfo[page][3]
        pageTotal = self.pageInfo[page][4]

        fname = u'{fileBase}-{categoryName}.pdf'.format(
            fileBase=self.fileBase,
            categoryName=category.fullname
            if category != 'Primes' else 'Primes')
        fname = Utils.RemoveDisallowedFilenameChars(fname).replace(' ', '-')

        if not self.pdf:
            self.pdf = PDF(
                orientation='L' if self.orientation == wx.LANDSCAPE else 'P')
            self.pdf.set_font('Arial', '', 12)
            self.pdf.set_author(
                unicode(getpass.getuser()).encode('iso-8859-1', 'ignore'))
            self.pdf.set_keywords(
                unicode('CrossMgr Results').encode('iso-8859-1', 'ignore'))
            self.pdf.set_creator(
                unicode(Version.AppVerName).encode('iso-8859-1', 'ignore'))
            self.pdf.set_title(
                unicode(os.path.splitext(fname)[0].replace('-', ' ')).encode(
                    'iso-8859-1', 'ignore'))

        exportGrid.drawToFitPDF(*([self.pdf, self.orientation] +
                                  self.pageInfo[page][1:-1]))

        if not self.allInOne and pageNumber == pageTotal:
            if self.dir and not os.path.isdir(self.dir):
                os.mkdir(self.dir)
            fname = os.path.join(self.dir, fname)
            self.pdf.output(fname, 'F')
            self.lastFName = fname
            self.pdf = None

        return True

示例#24

0

显示文件

文件： text_to_pdf.py 项目： ArseniyFokin/Text-to-PDF

 def __init__(self, font='JetBrainsMono'):
     self.pdf = PDF(font)
     self.errors = []

示例#25

0

显示文件

文件： views.py 项目： bovine/flightloggin2

def pdf(request):
    
    pdf = PDF(request.display_user)
    
    return pdf.as_response()

示例#26

0

显示文件

def print_id_label(participant):
    competition = participant.competition
    license_holder = participant.license_holder

    bib = participant.bib
    name = license_holder.first_last
    if len(name) > 32:
        name = license_holder.first_last_short

    system_name = 'CrossMgr'

    inches_to_points = 72.0

    # Use points at the units.
    page_width = 3.9 * inches_to_points
    page_height = 2.4 * inches_to_points

    pdf = PDF('L', (page_height, page_width))
    pdf.set_author(RaceDBVersion)
    pdf.set_title('Bib Number: {}'.format(bib))
    pdf.set_subject('Rider ID and Emergency Information.')
    pdf.set_creator(getpass.getuser())
    pdf.set_keywords(
        'RaceDB CrossMgr Bicycle Racing Software Database Road Time Trial MTB CycloCross'
    )

    margin = min(page_height, page_width) / 18.0
    sep = margin / 2.5

    height = page_height - margin * 2.0
    width = page_width - margin * 2.0

    header = Rect(margin, margin, width, height / 10.0)
    footer_height = height / 20
    footer = Rect(margin, page_height - margin - footer_height, header.width,
                  footer_height)
    field = Rect(header.x, header.bottom + sep, width,
                 footer.top - header.bottom - sep * 2)

    leftArrow, rightArrow = chr(172), chr(174)

    font_name = 'Helvetica'
    pdf.add_page()
    pdf.set_font(font_name, 'b')

    header.draw_text_to_fit(pdf, name, Rect.AlignLeft, True)

    pdf.set_font(font_name)
    info = []
    info.append([
        '',
        u',  '.join([
            u'Age: {}'.format(license_holder.get_age()),
            u'Gender: {}'.format(license_holder.get_gender_display()),
            u'Nation: {}'.format(license_holder.nation_code),
        ]),
    ])
    info.append(['', ''])
    if participant.team:
        info.append(['', u'{}'.format(participant.team.name)])
    info.append([
        '',
        u',  '.join([
            u'Bib: {}'.format(participant.bib),
            u'Category: {}'.format(participant.category.code_gender
                                   if participant.category else ''),
        ]),
    ])
    if license_holder.phone:
        info.append([
            '',
            u'  '.join([
                u'Phone: {}'.format(format_phone(license_holder.phone)),
            ]),
        ])

    info.append(['', ''])
    if license_holder.emergency_medical:
        info.append([
            '', u'Medical Alert: {}'.format(license_holder.emergency_medical)
        ])
    info.append(['', u'Emergency Contact:'])
    if license_holder.emergency_contact_name:
        info.append([
            '', u'  {}'.format(license_holder.emergency_contact_name
                               or 'None provided')
        ])
    info.append([
        '', u'  {}'.format(
            format_phone(license_holder.emergency_contact_phone)
            or 'No phone number provided')
    ])

    pdf.table_in_rectangle(field.x,
                           field.y,
                           field.width,
                           field.height,
                           info,
                           leftJustifyCols=[0, 1],
                           hasHeader=False,
                           horizontalLines=False)

    footer.draw_text_to_fit(pdf, system_name, Rect.AlignRight, True)

    pdf_str = pdf.output(dest='s')
    return pdf_str

示例#27

0

显示文件

class DocumentMetadataIngestModule(DataSourceIngestModule):

    _logger = Logger.getLogger(DocumentMetadataIngestModuleFactory.moduleName)

    def log(self, level, msg):
        self._logger.logp(level, self.__class__.__name__,
                          inspect.stack()[1][3], msg)

    def __init__(self, settings):
        self.context = None

        self.totalCount = 0
        self.cfbf = CFBF()
        self.pdf = PDF()
        self.ooxml = OOXML()

        self.xls_result = []
        self.ppt_result = []
        self.doc_result = []
        self.xlsx_result = []
        self.pptx_result = []
        self.docx_result = []
        self.PyPDF_result = []
        self.PDFNoModule_result = []

    def startUp(self, context):
        self.context = context
        pass

    def getTitles(self, result):
        titles = []
        for i in result:
            for j in i.keys():
                if j == "handle":
                    continue
                titles.append(j)
        return list(set(titles))

    def addData(self, titles, result, filetype, skCase):
        for title in titles:
            try:
                attID = skCase.addArtifactAttributeType(
                    "TSK_" + filetype + "_" + str(title), BlackboardAttribute.
                    TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.STRING, unicode(title))
                artID_art = skCase.addBlackboardArtifactType(
                    "TSK_" + filetype + "_DATA", filetype)
            except:
                pass

        getArtId = skCase.getArtifactTypeID("TSK_" + filetype + "_DATA")

        for i in result:
            art = i["handle"].newArtifact(getArtId)
            for title in titles:
                try:
                    art.addAttribute(
                        BlackboardAttribute(
                            skCase.getAttributeType("TSK_" + filetype + "_" +
                                                    str(title)),
                            DocumentMetadataIngestModuleFactory.moduleName,
                            unicode(i[title])))
                except:
                    art.addAttribute(
                        BlackboardAttribute(
                            skCase.getAttributeType("TSK_" + filetype + "_" +
                                                    str(title)),
                            DocumentMetadataIngestModuleFactory.moduleName,
                            ""))

    def startModule(self, extension, skCase, fileManager, dataSource,
                    progressBar):
        files = fileManager.findFiles(dataSource, "%." + extension)
        numFiles = len(files)
        progressBar.switchToDeterminate(numFiles)
        fileCount = 0

        Directory = os.path.join(Case.getCurrentCase().getTempDirectory(),
                                 extension + " files")
        try:
            os.mkdir(Directory)
        except:
            pass

        for file in files:
            self.log(Level.INFO, "Processing file: " + file.getName())
            fileCount += 1
            self.totalCount += 1

            Path = os.path.join(Directory, unicode(file.getName()))
            ContentUtils.writeToFile(file, File(Path))

            if extension.lower() == "pdf":
                try:
                    resultPDF, resultNoModule = self.pdf.run(Path)
                    resultPDF["handle"] = file
                    resultNoModule["handle"] = file
                    self.PyPDF_result.append(resultPDF)
                    self.PDFNoModule_result.append(resultNoModule)
                except:
                    pass

            elif extension.lower() == "xlsx":
                try:
                    OOXML = self.ooxml.run(Path)
                    OOXML["handle"] = file
                    self.xlsx_result.append(OOXML)
                except:
                    pass

            elif extension.lower() == "pptx":
                try:
                    OOXML = self.ooxml.run(Path)
                    OOXML["handle"] = file
                    self.pptx_result.append(OOXML)
                except:
                    pass

            elif extension.lower() == "docx":
                try:
                    OOXML = self.ooxml.run(Path)
                    OOXML["handle"] = file
                    self.docx_result.append(OOXML)
                except:
                    pass

            elif extension.lower() == "xls":
                try:
                    resultCFBF = self.cfbf.run(Path)
                    resultCFBF["handle"] = file
                    self.xls_result.append(resultCFBF)
                except:
                    pass

            elif extension.lower() == "ppt":
                try:
                    resultCFBF = self.cfbf.run(Path)
                    resultCFBF["handle"] = file
                    self.ppt_result.append(resultCFBF)
                except:
                    pass

            elif extension.lower() == "doc":
                try:
                    resultCFBF = self.cfbf.run(Path)
                    resultCFBF["handle"] = file
                    self.doc_result.append(resultCFBF)
                except:
                    pass

            progressBar.progress(fileCount)

        if extension.lower() == "pdf":
            titles = self.getTitles(self.PyPDF_result)
            self.addData(titles, self.PyPDF_result, "PyPDF", skCase)
            titles = self.getTitles(self.PDFNoModule_result)
            self.addData(titles, self.PDFNoModule_result, "PDFNoModule",
                         skCase)

        elif extension.lower() == "xlsx":
            titles = self.getTitles(self.xlsx_result)
            self.addData(titles, self.xlsx_result, "XLSX", skCase)

        elif extension.lower() == "pptx":
            titles = self.getTitles(self.pptx_result)
            self.addData(titles, self.pptx_result, "PPTX", skCase)

        elif extension.lower() == "docx":
            titles = self.getTitles(self.docx_result)
            self.addData(titles, self.docx_result, "DOCX", skCase)

        elif extension.lower() == "xls":
            titles = self.getTitles(self.xls_result)
            self.addData(titles, self.xls_result, "XLS", skCase)

        elif extension.lower() == "ppt":
            titles = self.getTitles(self.ppt_result)
            self.addData(titles, self.ppt_result, "PPT", skCase)

        elif extension.lower() == "doc":
            titles = self.getTitles(self.doc_result)
            self.addData(titles, self.doc_result, "DOC", skCase)

    def process(self, dataSource, progressBar):
        progressBar.switchToIndeterminate()
        skCase = Case.getCurrentCase().getSleuthkitCase()
        fileManager = Case.getCurrentCase().getServices().getFileManager()

        self.startModule("pdf", skCase, fileManager, dataSource, progressBar)
        self.startModule("docx", skCase, fileManager, dataSource, progressBar)
        self.startModule("pptx", skCase, fileManager, dataSource, progressBar)
        self.startModule("xlsx", skCase, fileManager, dataSource, progressBar)
        self.startModule("doc", skCase, fileManager, dataSource, progressBar)
        self.startModule("ppt", skCase, fileManager, dataSource, progressBar)
        self.startModule("xls", skCase, fileManager, dataSource, progressBar)

        message = IngestMessage.createMessage(
            IngestMessage.MessageType.DATA, "DocumentMetadataParser",
            "Found %d files" % self.totalCount)
        IngestServices.getInstance().postMessage(message)

        return IngestModule.ProcessResult.OK

示例#28

0

显示文件

        outlines, next_outlines = itertools.tee(outlines, 2)
        next_outlines = itertools.chain(itertools.islice(next_outlines, 1, None), [None])
        return outlines, next_outlines

if __name__ == "__main__":
    import get_pdf
    from test_cases import test_cases
    from get_data import HKEX_API
    from helper import write_to_csv, n_yearsago, today
    # logging.basicConfig(level=logging.INFO)
    # query = HKEX_API(from_date=n_yearsago(n=1), to_date=today())
    
    
    # for data in query.data:
        
    #     result = {}
    #     url = data.file_link
    #     pdf = PDF(url)
    #     print(url)
    #     f = TableOfContent(pdf.pdf_obj)
    #     print()
    #     result['result'] = f.search_outline_page_range(TableOfContent.auditor_remunration_regex)
    #     result['url'] = url
    #     write_to_csv(result, 'result_2.csv')
    
    url = 'https://www1.hkexnews.hk/listedco/listconews/sehk/2020/0813/2020081300777.pdf'
    url = 'https://www1.hkexnews.hk/listedco/listconews/sehk/2020/0813/2020081300670.pdf'
    pdf = PDF(url)
    print(url)
    f = TableOfContent(pdf.pdf_obj)
    print(f.search_outline_page_range(TableOfContent.audit_fee_regex))

示例#29

0

显示文件

文件： create.py 项目： theunbound/FysikRevyTeX

    if "--tex-all" in sys.argv:
        conf["TeXing"]["force TeXing of all files"] = "yes"

    revue = cr.Revue.fromfile("aktoversigt.plan")
    path = revue.conf["Paths"]
    conv = cv.Converter()

    if len(conf.cmd_parts) == 0:
        arglist = ("aktoversigt", "roles", "frontpage", "props", "contacts",
                   "material", "individual", "songmanus")
    elif "manus" in sys.argv:
        arglist = ("aktoversigt", "roles", "frontpage", "props", "contacts",
                   "material")
    else:
        arglist = sys.argv[1:]

    create_parts(revue, arglist)

    if len(conf.cmd_parts) == 0 or "manus" in sys.argv:
        pdf = PDF()
        pdf.pdfmerge(
            ((os.path.join(path["pdf"], "forside.pdf"), "Forside"),
             (os.path.join(path["pdf"], "aktoversigt.pdf"), "Aktoversigt"),
             (os.path.join(path["pdf"],
                           "rolleliste.pdf"), "Rolleliste"), revue,
             (os.path.join(path["pdf"], "rekvisitliste.pdf"), "Rekvisitliste"),
             (os.path.join(path["pdf"], "kontaktliste.pdf"), "Kontaktliste")),
            os.path.join(path["pdf"], "manuskript.pdf"))

        print("Manuscript successfully created!")

示例#30

0

显示文件

文件： TeamResults.py 项目： scottwedge/CrossMgr

	def doExport( self, event=None ):
		race = Model.race
		if not race:
			return
		
		fileName = Utils.getMainWin().fileName if Utils.getMainWin() else 'Test.cmn'
		
		#---------------------------------------------------------------------------------
		# Create an Excel file.
		#
		xlFileName = os.path.splitext(fileName)[0] + '-TeamResults.xlsx'

		try:
			wb = xlsxwriter.Workbook( xlFileName )
			formats = ExportGrid.ExportGrid.getExcelFormatsXLSX( wb )
			
			ues = Utils.UniqueExcelSheetName()
			for category in race.getCategories( publishOnly=True ):			
				eg = self.toExportGrid( category )
				if eg:
					ws = wb.add_worksheet( ues.getSheetName(category.fullname) )
					eg.toExcelSheetXLSX( formats, ws )
			wb.close()
		except Exception as e:
			logException( e, sys.exc_info() )
		del wb
		
		#---------------------------------------------------------------------------------
		# Create a PDF file.
		#
		pdfFileName = os.path.splitext(fileName)[0] + '-TeamResults.pdf'
		
		try:
			pdf = PDF( orientation = 'P' )
			pdf.set_font( 'Arial', '', 12 )
			pdf.set_author( getpass.getuser() )
			pdf.set_keywords( 'CrossMgr Team Results' )
			pdf.set_creator( Version.AppVerName )
			pdf.set_title( os.path.splitext(pdfFileName)[0].replace('-', ' ') )
			for category in race.getCategories( publishOnly=True ):
				eg = self.toExportGrid( category )
				if eg:
					eg.drawToFitPDF( pdf, orientation=wx.PORTRAIT )
			pdf.output( pdfFileName, 'F' )
		except Exception as e:
			logException( e, sys.exc_info() )
		del pdf

示例#31

0

显示文件

文件： views.py 项目： mdek/flightloggin2

def pdf(request):

    pdf = PDF(request.display_user)

    return pdf.as_response()

示例#32

0

显示文件

    def vectorize(self,
                  path,
                  ignore_if_html_file_exists=True,
                  ignore_errors=False,
                  num_threads=1):
        """
		:type path: str or Path
		:type ignore_if_html_file_exists: bool
		:type ignore_errors: bool
		:rtype: DataFrame
		"""
        path = Path(path)
        pdf_paths = [
            PDF(file_path) for file_path in path.list(show_size=False)
            if file_path.extension.lower() == 'pdf'
        ]

        exceptions = []

        # create htmls
        for pdf_path in iterate(iterable=pdf_paths,
                                text='converting pdfs to html'):
            try:
                pdf_path.convert_to_html(
                    ignore_if_exists=ignore_if_html_file_exists)
            except Exception as e:
                if ignore_errors:
                    exceptions.append(e)
                else:
                    raise e

        # get paragraphs
        def extract_paragraphs(pdf_path):
            try:
                pdf_paragraphs = pdf_path.paragraphs
                num_paragraphs = len(pdf_paragraphs)
                return [{
                    'pdf': pdf_path,
                    'paragraph_num': i + 1,
                    'paragraph': paragraph,
                    'num_paragraphs': num_paragraphs
                } for i, paragraph in enumerate(pdf_paragraphs)]

            except Exception as e:
                if ignore_errors:
                    return [{'error': e}]
                else:
                    raise e

        if num_threads == 1:
            paragraph_dict_lists = [
                extract_paragraphs(x) for x in iterate(
                    pdf_paths, text='extracting paragraphs (single-threaded)')
            ]
        else:
            processor = Parallel(n_jobs=num_threads,
                                 backend='threading',
                                 require='sharedmem')
            paragraph_dict_lists = processor(
                delayed(extract_paragraphs)(pdf_path=x) for x in iterate(
                    pdf_paths, text='extracting paragraphs (multi-threaded)'))
        paragraph_dicts = [
            x for paragraph_dict_list in paragraph_dict_lists
            for x in paragraph_dict_list
        ]

        # create vectors
        def get_vector_and_num_tokens(paragraph_dict):
            try:
                pdf_path = paragraph_dict['pdf']
                paragraph_num = paragraph_dict['paragraph_num']
                paragraph = paragraph_dict['paragraph']
                num_paragraphs = paragraph_dict['num_paragraphs']
                vector, num_tokens = self._bert_vectorizer.vectorize(
                    text=paragraph, get_num_tokens=True)
                vector_df = DataFrame(
                    vector,
                    columns=[f'bert_{i + 1}' for i in range(vector.shape[1])])
                vector_df['pdf'] = pdf_path.name_and_extension
                vector_df['num_paragraphs'] = num_paragraphs
                vector_df['paragraph_num'] = paragraph_num
                vector_df['num_tokens'] = num_tokens
                return vector_df
            except Exception as e:
                if ignore_errors:
                    return e
                else:
                    raise e

        if num_threads == 1:
            vectors = [
                get_vector_and_num_tokens(paragraph_dict=x) for x in iterate(
                    paragraph_dicts,
                    text='converting paragraphs to vectors (single-threaded)')
            ]
        else:
            processor = Parallel(n_jobs=num_threads,
                                 backend='threading',
                                 require='sharedmem')
            vectors = processor(
                delayed(get_vector_and_num_tokens)(paragraph_dict=x)
                for x in iterate(
                    paragraph_dicts,
                    text='converting paragraphs to vectors (multi-threaded)'))

        return bring_to_front(
            data=concat(vectors),
            columns=['pdf', 'paragraph_num', 'num_paragraphs',
                     'num_tokens']).reset_index(drop=True)

示例#33

0

显示文件

文件： Printing.py 项目： ZigmundRat/CrossMgr

class CrossMgrPrintoutPDF( CrossMgrPrintout ):
	def __init__( self, dir, fileBase, orientation, categories = None, allInOne = False ):
		CrossMgrPrintout.__init__(self, categories)
		self.dir = dir
		self.fileBase = fileBase
		self.orientation = orientation
		self.allInOne = allInOne
		self.pdf = None
		self.lastFName = None
		
	def OnEndPrinting(self):
		if self.pdf and self.allInOne:
			if self.dir and not os.path.isdir( self.dir ):
				os.mkdir( self.dir )
			fname = u'{fileBase}.pdf'.format( fileBase=self.fileBase )
			self.pdf.set_title( unicode(os.path.splitext(fname)[0].replace('-', ' ')).encode('iso-8859-1','ignore') )
			fname = os.path.join( self.dir, fname )
			self.pdf.output( fname, 'F' )
			self.lastFName = fname
			self.pdf = None
		return super(CrossMgrPrintoutPDF, self).OnEndPrinting()

	def OnPrintPage( self, page ):
		exportGrid = self.prepareGrid( page )

		try:
			category = self.pageInfo[page][0]
		except Exception as e:
			# Handle case of no data.
			return True
		
		pageNumber = self.pageInfo[page][3]
		pageTotal = self.pageInfo[page][4]
		
		fname = u'{fileBase}-{categoryName}.pdf'.format(
			fileBase = self.fileBase,
			categoryName = category.fullname if category != 'Primes' else 'Primes'
		)
		fname = Utils.RemoveDisallowedFilenameChars( fname ).replace( ' ', '-' )
		
		if not self.pdf:
			self.pdf = PDF( orientation = 'L' if self.orientation == wx.LANDSCAPE else 'P' )
			self.pdf.set_font( 'Arial', '', 12 )
			self.pdf.set_author( unicode(getpass.getuser()).encode('iso-8859-1','ignore') )
			self.pdf.set_keywords( unicode('CrossMgr Results').encode('iso-8859-1','ignore') )
			self.pdf.set_creator( unicode(Version.AppVerName).encode('iso-8859-1','ignore') )
			self.pdf.set_title( unicode(os.path.splitext(fname)[0].replace('-', ' ')).encode('iso-8859-1','ignore') )
		
		exportGrid.drawToFitPDF( *([self.pdf, self.orientation] + self.pageInfo[page][1:-1]) )
		
		if not self.allInOne and pageNumber == pageTotal:
			if self.dir and not os.path.isdir( self.dir ):
				os.mkdir( self.dir )
			fname = os.path.join( self.dir, fname )
			self.pdf.output( fname, 'F' )
			self.lastFName = fname
			self.pdf = None
		
		return True

示例#34

0

显示文件

        return f'{self.__class__.__name__} - {self.section}'


if __name__ == '__main__':
    from hkex_api import HKEX_API
    # https://www1.hkexnews.hk/listedco/listconews/gem/2020/0929/2020092901098.pdf #concat number
    # https://www1.hkexnews.hk/listedco/listconews/sehk/2020/0929/2020092900604.pdf #concat number
    query = HKEX_API()
    urls = [data.file_link for data in query.get_data()]
    # urls = ['https://www1.hkexnews.hk/listedco/listconews/sehk/2020/0923/2020092300374.pdf']
    for url in urls:
        # url = data.file_link
        # url, p = 'https://www1.hkexnews.hk/listedco/listconews/sehk/2020/0721/2020072100713.pdf', 61
        # url, p = 'https://www1.hkexnews.hk/listedco/listconews/sehk/2020/0721/2020072100653.pdf', 94
        print(url)
        pdf = PDF.create(url)
        corp_gov_report = pdf.get_outline(CorporateGovReport.title_regex)
        if not corp_gov_report:
            continue
        corp_gov_report = CorporateGovReport.create(corp_gov_report[0])
        if not corp_gov_report:
            continue
        if not corp_gov_report.audit_fee:
            continue
        try:
            page = corp_gov_report.audit_fee.pages[0]
            sec = corp_gov_report.audit_fee.sections[0]
            table = corp_gov_report.audit_fee.tables[0]
        except Exception as e:
            print(e)
            continue

示例#35

0

显示文件

文件： miner.py 项目： joeyuan19/PyDF

    text = ''.join(i for i in temp if isinstance(i,basestring))
    return text

password = ''
print sys.argv
fp = open(sys.argv[1], 'rb')
parser = PDFParser(fp)
document = PDFDocument(parser)
document.initialize(password)
rsrcmgr = PDFResourceManager()
laparams = LAParams()
device = PDFPageAggregator(rsrcmgr, laparams=laparams)
interpreter = PDFPageInterpreter(rsrcmgr, device)


pdf = PDF(sys.argv[1],verbose=True)

page_no = 1
for page in PDFPage.create_pages(document):
    interpreter.process_page(page)
    # receive the LTPage object for the page.
    layout = device.get_result()
    for group in layout.groups:
        rect = [group.x0,group.y0,group.x1,group.y1]
        quad = [
                group.x0,group.y0,
                group.x1,group.y0,
                group.x1,group.y1,
                group.x0,group.y1
                ]
        pdf.add_annot_to_page(page_no,quad,rect)