Exemplo n.º 1
0
def docx_to_pdf(infilename, outfilename):

    # Extract the text from the DOCX file object infile and write it to 
    # a PDF file.

    try:
        infil = opendocx(infilename)
    except:
        print("Error opening infilename")
        #print "Exception: " + repr(e) + "\n"
        sys.exit(1)

    paragraphs = getdocumenttext(infil)

    pw = PDFWriter(outfilename)
    pw.setFont("Courier", 12)
    pw.setHeader("DOCXtoPDF - convert text in DOCX file to PDF")
    pw.setFooter("Generated by xtopdf and python-docx")
    wrapper = TextWrapper(width=70, drop_whitespace=False)

    # For Unicode handling.
    new_paragraphs = []
    for paragraph in paragraphs:
        new_paragraphs.append(paragraph.encode("utf-8"))

    for paragraph in new_paragraphs:
        lines = wrapper.wrap(paragraph)
        for line in lines:
            pw.writeLine(line)
        pw.writeLine("")

    pw.savePage()
    pw.close()
Exemplo n.º 2
0
def text_to_pdf(txt_filename):
    pw = PDFWriter(txt_filename + '.pdf')
    pw.setFont('Courier', 12)
    pw.setHeader('{} converted to PDF'.format(txt_filename))
    pw.setFooter('PDF conversion by xtopdf: https://google.com/search?q=xtopdf')

    with open(txt_filename) as txt_fil:
        for line in txt_fil:
            pw.writeLine(line.strip('\n'))
        pw.savePage()
Exemplo n.º 3
0
def text_to_pdf(txt_filename):
    pw = PDFWriter(txt_filename + '.pdf')
    pw.setFont('Courier', 12)
    pw.setHeader('{} converted to PDF'.format(txt_filename))
    pw.setFooter(
        'PDF conversion by xtopdf: https://google.com/search?q=xtopdf')

    with open(txt_filename) as txt_fil:
        for line in txt_fil:
            pw.writeLine(line.strip('\n'))
        pw.savePage()
Exemplo n.º 4
0
def pdf_book():
    if request.method == 'GET':
        # Display the PDF book creation form.
        return '''
            <form action="/pdf_book" method="post">
                PDF file name: <input type="text" name="pdf_file_name" />

                Header: <input type="text" name="header" />
                Footer: <input type="text" name="footer" />

                Content:
                <textarea name="content" rows="15" cols="50"></textarea>

                <input type="submit" value="Submit" />

            </form>
            '''
    else:
        # Create the PDF book from the posted form content.
        try:
            # Get the needed fields from the form.
            pdf_file_name = request.form['pdf_file_name']
            header = request.form['header']
            footer = request.form['footer']
            content = request.form['content']

            # Create a PDFWriter instance and set some of its fields.
            pw = PDFWriter(pdf_file_name)
            pw.setFont("Courier", 12)
            pw.setHeader(header)
            pw.setFooter(footer)

            # Get the content field.
            # Split it into paragraphs delimited by newlines.
            # Convert each paragraph into a list of lines of
            # maximum width 70 characters.
            # Print each line to the PDF file.
            paragraphs = content.split('\n')
            wrapper = TextWrapper(width=70, drop_whitespace=False)
            for paragraph in paragraphs:
                lines = wrapper.wrap(paragraph)
                for line in lines:
                    pw.writeLine(line)

            pw.savePage()
            pw.close()
            return "OK. PDF book created in file " + pdf_file_name + ".\n"
        except Exception, e:
            traceback.print_stack()
            return "Error: PDF book not created.\n" + repr(e) + ".\n" 
Exemplo n.º 5
0
def pdf_book():
    if request.method == 'GET':
        # Display the PDF book creation form.
        return '''
            <form action="/pdf_book" method="post">
                PDF file name: <input type="text" name="pdf_file_name" />

                Header: <input type="text" name="header" />
                Footer: <input type="text" name="footer" />

                Content:
                <textarea name="content" rows="15" cols="50"></textarea>

                <input type="submit" value="Submit" />

            </form>
            '''
    else:
        # Create the PDF book from the posted form content.
        try:
            # Get the needed fields from the form.
            pdf_file_name = request.form['pdf_file_name']
            header = request.form['header']
            footer = request.form['footer']
            content = request.form['content']

            # Create a PDFWriter instance and set some of its fields.
            pw = PDFWriter(pdf_file_name)
            pw.setFont("Courier", 12)
            pw.setHeader(header)
            pw.setFooter(footer)

            # Get the content field.
            # Split it into paragraphs delimited by newlines.
            # Convert each paragraph into a list of lines of
            # maximum width 70 characters.
            # Print each line to the PDF file.
            paragraphs = content.split('\n')
            wrapper = TextWrapper(width=70, drop_whitespace=False)
            for paragraph in paragraphs:
                lines = wrapper.wrap(paragraph)
                for line in lines:
                    pw.writeLine(line)

            pw.savePage()
            pw.close()
            return "OK. PDF book created in file " + pdf_file_name + ".\n"
        except Exception, e:
            traceback.print_stack()
            return "Error: PDF book not created.\n" + repr(e) + ".\n"
Exemplo n.º 6
0
def main():

    if len(sys.argv) < 3:
        usage(sys.argv[0])
        sys.exit(0)

    try:
        pw = PDFWriter(sys.argv[1])
        pw.setFont('Courier', 12)
        pw.setFooter('xtopdf: https://google.com/search?q=xtopdf')

        for line in fileinput.input(sys.argv[2:]):
            if fileinput.filelineno() == 1:
                pw.setHeader(fileinput.filename())
                if fileinput.lineno() != 1:
                    pw.savePage()
            pw.writeLine(line.strip('\n'))

        pw.savePage()
        pw.close()
    except Exception as e:
        print("Caught Exception: type: {}, message: {}".format(\
            e.__class__, str(e)))
Exemplo n.º 7
0
def main():

    # Create some HTML for testing conversion of its text to PDF.
    html_doc = """
    <html>
        <head>
            <title>
            Test file for HTMLTextToPDF
            </title>
        </head>
        <body>
        This is text within the body element but outside any paragraph.
        <p>
        This is a paragraph of text. Hey there, how do you do?
        The quick red fox jumped over the slow blue cow.
        </p>
        <p>
        This is another paragraph of text.
        Don't mind what it contains.
        What is mind? Not matter.
        What is matter? Never mind.
        </p>
        This is also text within the body element but not within any paragraph.
        </body>
    </html>
    """

    pw = PDFWriter("HTMLTextTo.pdf")
    pw.setFont("Courier", 10)
    pw.setHeader("Conversion of HTML text to PDF")
    pw.setFooter("Generated by xtopdf: http://slid.es/vasudevram/xtopdf")

    # Use method chaining this time.
    for line in BeautifulSoup(html_doc).get_text().split("\n"):
        pw.writeLine(line)
    pw.savePage()
    pw.close()
Exemplo n.º 8
0
def main():

    # Create some HTML for testing conversion of its text to PDF.
    html_doc = """
    <html>
        <head>
            <title>
            Test file for HTMLTextToPDF
            </title>
        </head>
        <body>
        This is text within the body element but outside any paragraph.
        <p>
        This is a paragraph of text. Hey there, how do you do?
        The quick red fox jumped over the slow blue cow.
        </p>
        <p>
        This is another paragraph of text.
        Don't mind what it contains.
        What is mind? Not matter.
        What is matter? Never mind.
        </p>
        This is also text within the body element but not within any paragraph.
        </body>
    </html>
    """

    pw = PDFWriter("HTMLTextTo.pdf")
    pw.setFont("Courier", 10)
    pw.setHeader("Conversion of HTML text to PDF")
    pw.setFooter("Generated by xtopdf: http://slid.es/vasudevram/xtopdf")
 
    # Use method chaining this time.
    for line in BeautifulSoup(html_doc).get_text().split("\n"):
        pw.writeLine(line)
    pw.savePage()
    pw.close()
Exemplo n.º 9
0
    pw.setHeader("DOCXtoPDF - convert text in DOCX file to PDF")
    pw.setFooter("Generated by xtopdf and python-docx")
    wrapper = TextWrapper(width=70, drop_whitespace=False)

    # For Unicode handling.
    new_paragraphs = []
    for paragraph in paragraphs:
        new_paragraphs.append(paragraph.encode("utf-8"))

    for paragraph in new_paragraphs:
        lines = wrapper.wrap(paragraph)
        for line in lines:
            pw.writeLine(line)
        pw.writeLine("")

    pw.savePage()
    pw.close()
    
def usage():

    return "Usage: python DOCXtoPDF.py infile.docx outfile.txt\n"

def main():

    try:
        # Check for correct number of command-line arguments.
        if len(sys.argv) != 3:
            print "Wrong number of arguments"
            print usage()
            sys.exit(1)
        infilename = sys.argv[1]
Exemplo n.º 10
0
def main():

	'''Main program to test DBFReader class.
	'''

	# check for right num. of args
	if (len(sys.argv) != 3):
		usage()
		sys.exit(1)

	# extract dbf and pdf filenames from args
	dbf_fn = sys.argv[1]
	pdf_fn = sys.argv[2]

	# create and open the DBFReader instance
	dr = DBFReader(dbf_fn)
	dr.open()

	# create the PDFWriter instance
	pw = PDFWriter(pdf_fn)

	# and set some of its fields

	# set the font
	pw.setFont("Courier", 10)

	# set the page header
	gen_datetime = time.asctime()
	pw.setHeader("Generated by DBFtoPDF: Input: " + dbf_fn + \
	" At: " + gen_datetime)

	# set the page footer
	pw.setFooter("Generated by DBFtoPDF: Input: " + dbf_fn + \
	" At: " + gen_datetime)

	# create the separator for logical grouping of output
	sep = "=" * 60

	# get the DBF file header
	file_header = dr.read_dbf_file_header()

	# print a separator line
	pw.writeLine(sep)

	# print title for the overall output
	pw.writeLine("Information for DBF file: %s" % (dbf_fn))

	# print a separator line
	pw.writeLine(sep)

	# print the file header section title
	pw.writeLine("")
	pw.writeLine("File Header Information:")

	# print a separator line
	pw.writeLine(sep)

	# setup labels for file header output
	lbl_dbf_ver = \
	"DBF version (signature)       : "
	lbl_last_update = \
	"Date of last update (YY/MM/DD): "
	lbl_num_recs = \
	"Number of data records        : "
	lbl_hdr_len = \
	"DBF header length in bytes    : "
	lbl_rec_len = \
	"DBF record length in bytes    : "
	lbl_num_flds = \
	"Number of fields in DBF file  : "

	# print the file header metadata with labels
	pw.writeLine(lbl_dbf_ver + str(file_header['ver']))
	pw.writeLine(lbl_last_update + file_header['last_update'])
	pw.writeLine(lbl_num_recs + str(file_header['num_recs']))
	pw.writeLine(lbl_hdr_len + str(file_header['hdr_len']))
	pw.writeLine(lbl_rec_len + str(file_header['rec_len']))
	pw.writeLine(lbl_num_flds + str(file_header['num_flds']))

	# print a separator line
	pw.writeLine(sep)

	# save current page
	pw.savePage()

	# print the field headers section title
	pw.writeLine("Field Header Information:")

	# print a separator line
	pw.writeLine(sep)

	# print labels for field headers output
	pw.writeLine("%3s%13s%7s%8s%10s" % \
		  ("#", "Field name", "Type", "Length", "Decimals"))

	# print a separator line
	pw.writeLine(sep)

	# get num. of fields from file header
	num_flds = file_header["num_flds"]

	# get the field headers
	field_headers = dr.read_dbf_field_headers()

	# extract individual lists from the field headers list of lists
	fld_nam, fld_typ, fld_len, fld_dec = (
		field_headers[0], 
		field_headers[1], 
		field_headers[2], 
		field_headers[3] )

	fld_num = 0

	# print the field headers metadata
	while (fld_num < num_flds):
		s1 =  "%3s" % (fld_num + 1) 
		s2 =  "%13s" % (fld_nam[fld_num])
		#s3 =  "%4s" % (string.replace(fld_typ[fld_num], '\0', ' ')) ,
		s3 =  "%4s" % (fld_typ[fld_num])
		s4 =  "%5s" % (fld_len[fld_num])
		s5 =  "%7s" % (fld_dec[fld_num])

		s = s1 + " " + s2 + " " + s3 + " " + s4 + " " + s5
		pw.writeLine(s)
		fld_num = fld_num + 1
		
	# print a separator line
	pw.writeLine(sep)

	# save current page
	pw.savePage()

	# position the DBFReader instance to start reading data records
	dr.reset()

	# print the data records section title
	pw.writeLine("DBF Data Records:")

	# print a separator line
	pw.writeLine(sep)
	
	# print the data records
	rec_num = 0
	while (dr.has_next_record()):
		rec_num = rec_num + 1
		# get next data record from the DBFReader
		r = dr.next_record()
		# convert it from a list to a human-friendly string
		fr = dbf_record_to_string(r)
		# the serial num. of the record
		s1 = "%7d:" % (rec_num)
		# the record itself
		s2 = "%s" % (fr)
		# serial num. + record
		s = s1 + " " + s2
		# print the record
		pw.writeLine(s)

	# print a separator line
	pw.writeLine(sep)

	# save current page
	pw.savePage()

	# close the DBFReader
	dr.close()

	# close the PDFWriter
	pw.close()
Exemplo n.º 11
0
def XMLtoPDFBook():

    debug("Entered XMLtoPDFBook()")

    global sysargv

    # Get command-line arguments.
    xml_filename = get_xml_filename(sysargv)
    debug("xml_filename: " + xml_filename)
    pdf_filename = get_pdf_filename(sysargv)
    debug("pdf_filename: " + pdf_filename)

    # Parse the XML file.
    try:
        tree = ET.ElementTree(file=xml_filename)
        debug("tree = " + repr(tree))
    except Exception:
        sys.stderr.write("Error: caught exception in ET.ElementTree(file)")
        sys.exit(1)

    # Get the tree root.
    root = tree.getroot()
    debug("root.tag = " + root.tag)
    if root.tag != "book":
        debug("Error: Root tag is not 'book'")
        sys.exit(1)

    # Initialize the table of contents list.
    toc = []
    # Initialize the chapters list.
    chapters = []

    # Traverse the tree, extracting needed data into variables.
    debug("-" * 60)
    for root_child in root:
        if root_child.tag != "chapter":
            debug("Error: root_child tag is not 'chapter'")
            sys.exit(1)
        chapter = root_child
        #debug(chapter.text)
        chapters.append(chapter.text)
        try:
            chapter_name = chapter.attrib['name']
        except KeyError:
            chapter_name = ""
        toc.append(chapter_name)
        debug("-" * 60)

    # Create and set some fields of a PDFWriter.
    pw = PDFWriter(pdf_filename)
    pw.setFont("Courier", 12)
    pw.setFooter("Generated by XMLtoPDFBook. Copyright 2013 Vasudev Ram")

    # Write the TOC.
    pw.setHeader("Table of Contents")
    chapter_num = 0
    debug("Chapter names")
    for chapter_name in toc:
        debug(chapter_name)
        chapter_num += 1
        pw.writeLine(str(chapter_num) + ": " + chapter_name)
    pw.savePage()

    # Write the chapters.
    chapter_num = 0
    for chapter in chapters:
        chapter_num += 1
        pw.setHeader("Chapter " + str(chapter_num) + ": " +
                     toc[chapter_num - 1])
        lines = chapter.split("\n")
        for line in lines:
            pw.writeLine(line)
        pw.savePage()

    pw.close()

    debug("Exiting XMLtoPDFBook()")
Exemplo n.º 12
0
def main():
    '''Main program to test DBFReader class.
	'''

    # check for right num. of args
    if (len(sys.argv) != 3):
        usage()
        sys.exit(1)

    # extract dbf and pdf filenames from args
    dbf_fn = sys.argv[1]
    pdf_fn = sys.argv[2]

    # create and open the DBFReader instance
    dr = DBFReader(dbf_fn)
    dr.open()

    # create the PDFWriter instance
    pw = PDFWriter(pdf_fn)

    # and set some of its fields

    # set the font
    pw.setFont("Courier", 10)

    # set the page header
    gen_datetime = time.asctime()
    pw.setHeader("Generated by DBFtoPDF: Input: " + dbf_fn + \
    " At: " + gen_datetime)

    # set the page footer
    pw.setFooter("Generated by DBFtoPDF: Input: " + dbf_fn + \
    " At: " + gen_datetime)

    # create the separator for logical grouping of output
    sep = "=" * 60

    # get the DBF file header
    file_header = dr.read_dbf_file_header()

    # print a separator line
    pw.writeLine(sep)

    # print title for the overall output
    pw.writeLine("Information for DBF file: %s" % (dbf_fn))

    # print a separator line
    pw.writeLine(sep)

    # print the file header section title
    pw.writeLine("")
    pw.writeLine("File Header Information:")

    # print a separator line
    pw.writeLine(sep)

    # setup labels for file header output
    lbl_dbf_ver = \
    "DBF version (signature)       : "
    lbl_last_update = \
    "Date of last update (YY/MM/DD): "
    lbl_num_recs = \
    "Number of data records        : "
    lbl_hdr_len = \
    "DBF header length in bytes    : "
    lbl_rec_len = \
    "DBF record length in bytes    : "
    lbl_num_flds = \
    "Number of fields in DBF file  : "

    # print the file header metadata with labels
    pw.writeLine(lbl_dbf_ver + str(file_header['ver']))
    pw.writeLine(lbl_last_update + file_header['last_update'])
    pw.writeLine(lbl_num_recs + str(file_header['num_recs']))
    pw.writeLine(lbl_hdr_len + str(file_header['hdr_len']))
    pw.writeLine(lbl_rec_len + str(file_header['rec_len']))
    pw.writeLine(lbl_num_flds + str(file_header['num_flds']))

    # print a separator line
    pw.writeLine(sep)

    # save current page
    pw.savePage()

    # print the field headers section title
    pw.writeLine("Field Header Information:")

    # print a separator line
    pw.writeLine(sep)

    # print labels for field headers output
    pw.writeLine("%3s%13s%7s%8s%10s" % \
       ("#", "Field name", "Type", "Length", "Decimals"))

    # print a separator line
    pw.writeLine(sep)

    # get num. of fields from file header
    num_flds = file_header["num_flds"]

    # get the field headers
    field_headers = dr.read_dbf_field_headers()

    # extract individual lists from the field headers list of lists
    fld_nam, fld_typ, fld_len, fld_dec = (field_headers[0], field_headers[1],
                                          field_headers[2], field_headers[3])

    fld_num = 0

    # print the field headers metadata
    while (fld_num < num_flds):
        s1 = "%3s" % (fld_num + 1)
        s2 = "%13s" % (fld_nam[fld_num])
        #s3 =  "%4s" % (string.replace(fld_typ[fld_num], '\0', ' ')) ,
        s3 = "%4s" % (fld_typ[fld_num])
        s4 = "%5s" % (fld_len[fld_num])
        s5 = "%7s" % (fld_dec[fld_num])

        s = s1 + " " + s2 + " " + s3 + " " + s4 + " " + s5
        pw.writeLine(s)
        fld_num = fld_num + 1

    # print a separator line
    pw.writeLine(sep)

    # save current page
    pw.savePage()

    # position the DBFReader instance to start reading data records
    dr.reset()

    # print the data records section title
    pw.writeLine("DBF Data Records:")

    # print a separator line
    pw.writeLine(sep)

    # print the data records
    rec_num = 0
    while (dr.has_next_record()):
        rec_num = rec_num + 1
        # get next data record from the DBFReader
        r = dr.next_record()
        # convert it from a list to a human-friendly string
        fr = dbf_record_to_string(r)
        # the serial num. of the record
        s1 = "%7d:" % (rec_num)
        # the record itself
        s2 = "%s" % (fr)
        # serial num. + record
        s = s1 + " " + s2
        # print the record
        pw.writeLine(s)

    # print a separator line
    pw.writeLine(sep)

    # save current page
    pw.savePage()

    # close the DBFReader
    dr.close()

    # close the PDFWriter
    pw.close()