Пример #1
0
            if (page == -1):
                print(("Error %d in %s() on page %d: %s\n" % \
                    (tet.get_errnum(), tet.get_apiname(), pageno, \
                     tet.get_errmsg())))
                continue                        # try next page */

            # Retrieve all text fragments for the page */
            text = tet.get_text(page)
            while (text):
                # Loop over all characters */
                ci = tet.get_char_info(page)
                while (ci):
                    # We need only the font name and size the text 
                    # position could be fetched from ci->x and ci->y.
                    #/
                    fontname = tet.pcos_get_string(doc, \
                                "fonts[%d]/name" % ci["fontid"])

                    # Check whether we found a match */
                    # C only: some versions of strstr don't allow empty
                    # strings, so we better check */
                    if (ci["fontsize"] >= fontsizetrigger and \
                                fontname.find(fontnametrigger) != -1):
                        # print the retrieved font name, size, and text */
                        print(("[%s %.2f] %s" % (fontname, ci["fontsize"], text)))
                    ci = tet.get_char_info(page)
                    # In this sample we check only the first character of
                    # each fragment.
                    #/
                    break
                text = tet.get_text(page)
            if (tet.get_errnum() != 0):
Пример #2
0
        #print(("  Tagged PDF: %s\n" % \
         #   yesno(tet.pcos_get_number(doc, "tagged"))))

        #print(("No. of pages: %d" % \
         #   int(tet.pcos_get_number(doc, "length:pages"))))

        #print((" Page 1 size: width=%g, height=%g" % \
         #   (tet.pcos_get_number(doc, "pages[%d]/width" % 0),
          #   tet.pcos_get_number(doc, "pages[%d]/height" % 0))))

        count = int(tet.pcos_get_number(doc, "length:fonts"))
        #print(("No. of fonts: %d" % count))

        for i in range(count):
            type = tet.pcos_get_string(doc, "fonts[%d]/type" % i)
            name = tet.pcos_get_string(doc, "fonts[%d]/name" % i)

          #  if (tet.pcos_get_number(doc, "fonts[%d]/embedded" % i)):
         #       print("embedded %s font %s" % (type, name))
           # else:
          #      print("unembedded %s font %s" % (type, name))

        #print()

        plainmetadata = \
                int(tet.pcos_get_number(doc, "encrypt/plainmetadata"))

        if (pcosmode == 1 and not plainmetadata and \
                int(tet.pcos_get_number(doc, "encrypt/nocopy"))):
            print("Restricted mode: no more information available\n")
Пример #3
0
            ti = tet.get_image_info(page)
            while (ti):
		# Report image geometry
		print("page %d: %.2fx%.2fpt, alpha=%.2f, beta=%.2f" %
		(pageno, ti["width"], ti["height"], ti["alpha"], ti["beta"]))

		# Retrieve additional image properties with pCOS
		print("   id=%d, %dx%d pixel" % ( ti["imageid"],
		    tet.pcos_get_number(doc, "images[%d]/Width" % ti["imageid"]),
		    tet.pcos_get_number(doc, "images[%d]/Height" % ti["imageid"])))
		cs =  tet.pcos_get_number(doc, "images[%d]/colorspaceid" % ti["imageid"])

		if (cs != -1):
		    print("   %dx%d bit %s" % (tet.pcos_get_number(doc, "colorspaces[%d]/components" % cs),
			tet.pcos_get_number(doc, "images[%d]/bpc" % ti["imageid"]), 
			tet.pcos_get_string(doc, "colorspaces[%d]/name" % cs )))

		else:
		    # cs==-1 may happen for some JPEG 2000 images. bpc,
		    # colorspace name and number of components are not
		    # available in this case.


		    print("JPEG2000")

		# Fetch image data and write it to a disk file. The
		# output filename is generated from the input filename,
		# page number and image ID.

		imageoptlist = baseimageoptlist + " filename {" + outfilebase + "_p" + repr(pageno) + "_I" + repr(ti["imageid"]) + "}"
		if (tet.write_image_file(doc, ti["imageid"], imageoptlist) == -1):
Пример #4
0
                width = tet.pcos_get_number(doc, \
                                "images[%d]/Width" % imageid)
                height = tet.pcos_get_number(doc, \
                                "images[%d]/Height" % imageid)
                bpc = tet.pcos_get_number(doc, \
                                "images[%d]/bpc" % imageid)
                cs = tet.pcos_get_number(doc, \
                                "images[%d]/colorspaceid" % imageid)

                txt = "image I%d: %dx%d pixel, " % (imageid, width, height);

                if (cs != -1):
                    txt = txt + "%dx" % tet.pcos_get_number(doc, \
                                "colorspaces[%d]/components" % cs) \
                        + "%d bit " % bpc \
                        + "%s" % tet.pcos_get_string(doc, \
                                "colorspaces[%d]/name" % cs)
                else:
                    # cs==-1 may happen for some JPEG 2000 images. bpc,
                    # colorspace name and number of components are not
                    # available in this case.
                    txt = txt + "JPEG2000"

                if (mergetype):
                    txt = txt + ", mergetype="
                    if (mergetype == 1):
                        txt = txt + "artificial"
                    else:
                        txt = txt + "consumed"

                print(txt)