예제 #1
0
    def pdf_run(self, image_file_name, filename, path):
        
        image_pdf = Image(filename=image_file_name, resolution=300) #take filename
        image_page = image_pdf.convert("png") #png conversion

        page = 1 #init page
        process_start = time.time()

        for img in image_page.sequence: # Every single image in image_page for grayscale conversion in 300 resolution
            
            img_per_page = Image(image=img)
            img_per_page.type = 'grayscale'
            img_per_page.depth = 8
            img_per_page.density = 300

            try:
                img_per_page.level(black=0.3, white=1.0, gamma=1.5, channel=None)
            
            except AttributeError as e:
                print("Update Wand library: %s" % e)

            img_buf = path + '/' + "saram_" + filename + str(page) + ".png"

            os.chmod(path, 0o777)
            img_per_page.save(filename=img_buf)

            page_start = time.time()
            page_elaboration = time.time() - page_start
            print("page %s - size %s - process %2d sec." % (page, img_per_page.size, page_elaboration))
                
            page += 1
            img.destroy()

        process_end = time.time() - process_start
        print("Total elaboration time: %s" % process_end)
예제 #2
0
    def process(self, pdf_filename, pdf_resolution, imageformat,
                do_orientation):
        final_text = ""
        image_pdf = Image(filename=pdf_filename, resolution=pdf_resolution)
        image_page = image_pdf.convert(imageformat)

        page = 1
        process_start = time.time()
        for img in image_page.sequence:
            img_per_page = Image(image=img)
            img_per_page.type = 'grayscale'
            img_per_page.depth = 8
            img_per_page.density = pdf_resolution
            try:
                img_per_page.level(black=0.3,
                                   white=1.0,
                                   gamma=1.5,
                                   channel=None)
            except AttributeError as e:
                print("Update Wand library: %s" % e)
            img_per_page.save(filename="buffer.png")
            page_start = time.time()
            txt = self.image2txt_pyocr(img_per_page.make_blob(imageformat),
                                       do_orientation)
            page_elaboration = time.time() - page_start
            print("page %s - size %s - process %2d sec. - text %s" %
                  (page, img_per_page.size, page_elaboration, len(txt)))
            final_text += "%s\n" % txt
            page += 1
            img.destroy()

        process_end = time.time() - process_start
        print("Total elaboration time: %s" % process_end)

        return final_text
예제 #3
0
    def rasterizeImage(self):
        if not os.path.isfile(self.getImgFilepath()):
            print 'rasterize page:', self.n
            # rasterize
            wand_img = Image(file=self.bytes,
                             resolution=int(IMAGE_WIDTH * DPI_TO_PX_RATIO /
                                            (self.pdf_page.mediaBox[3])))
            width, height = wand_img.width, wand_img.height
            wand_img.depth = 8
            blob = wand_img.make_blob(format='RGB')

            # convert wand_image to cv_image
            img = np.zeros((height, width, 3), dtype=np.uint8)
            for y in xrange(height):
                for x in xrange(width):
                    img[y, x,
                        0] = struct.unpack('B',
                                           blob[3 * (y * width + x) + 2])[0]
                    img[y, x,
                        1] = struct.unpack('B',
                                           blob[3 * (y * width + x) + 1])[0]
                    img[y, x,
                        2] = struct.unpack('B',
                                           blob[3 * (y * width + x) + 0])[0]
            cv2.imwrite(self.getImgFilepath(), img)
예제 #4
0
def runPage(pdf, n):
    # read the pdf page into bytes array
    pdf_writer = PyPDF2.PdfFileWriter()
    pdf_page = pdf.getPage(n)
    pdf_writer.addPage(pdf_page)
    bytes = io.BytesIO()
    pdf_writer.write(bytes)
    bytes.seek(0)

    # rasterize
    wand_img = Image(file=bytes,
                     resolution=int(IMAGE_WIDTH * DPI_TO_PX_RATIO /
                                    (pdf_page.mediaBox[2])))
    width, height = wand_img.width, wand_img.height
    wand_img.depth = 8
    blob = wand_img.make_blob(format='RGB')

    # convert wand_image to cv_image
    cv_img = np.zeros((height, width, 3), dtype=np.uint8)
    for y in xrange(height):
        for x in xrange(width):
            cv_img[y, x, 0] = struct.unpack('B',
                                            blob[3 * (y * width + x) + 2])[0]
            cv_img[y, x, 1] = struct.unpack('B',
                                            blob[3 * (y * width + x) + 1])[0]
            cv_img[y, x, 2] = struct.unpack('B',
                                            blob[3 * (y * width + x) + 0])[0]

    cv2.imshow('img', cv_img)

    #undistort
    cv_img = undistort(cv_img.copy(), verbose=False)

    cv2.imshow('undistorted', cv_img)

    structure_data = {}
    structure_data['w'], structure_data['h'], structure_data[
        'split_pts'] = split(cv_img, verbose=False)

    lined_img = cv_img.copy()
    for h in structure_data['split_pts']:
        h = int(h)
        cv2.line(lined_img, (0, h), (width, h), (0, 0, 255), 0)

    cv2.imshow('img', lined_img)

    return cv_img, structure_data
    def rasterizeImage(self):
        if not os.path.isfile(self.getImgFilepath()):
            print 'rasterize page:', self.n
            # rasterize
            wand_img = Image(file = self.bytes, resolution = int(IMAGE_WIDTH*DPI_TO_PX_RATIO/(self.pdf_page.mediaBox[3])))
            width, height = wand_img.width, wand_img.height
            wand_img.depth = 8
            blob = wand_img.make_blob(format='RGB')

            # convert wand_image to cv_image
            img = np.zeros((height, width, 3), dtype = np.uint8)
            for y in xrange(height):
                for x in xrange(width):
                    img[y, x, 0] = struct.unpack('B', blob[3*(y*width+x)+2])[0]
                    img[y, x, 1] = struct.unpack('B', blob[3*(y*width+x)+1])[0]
                    img[y, x, 2] = struct.unpack('B', blob[3*(y*width+x)+0])[0]
            cv2.imwrite(self.getImgFilepath(), img)
예제 #6
0
def runPage(pdf, n):
    # read the pdf page into bytes array
    pdf_writer = PyPDF2.PdfFileWriter()
    pdf_page = pdf.getPage(n)
    pdf_writer.addPage(pdf_page)
    bytes = io.BytesIO()
    pdf_writer.write(bytes)
    bytes.seek(0)

    # rasterize
    wand_img = Image(file = bytes, resolution = int(IMAGE_WIDTH*DPI_TO_PX_RATIO/(pdf_page.mediaBox[2])))
    width, height = wand_img.width, wand_img.height
    wand_img.depth = 8
    blob = wand_img.make_blob(format='RGB')

    # convert wand_image to cv_image
    cv_img = np.zeros((height, width, 3), dtype = np.uint8)
    for y in xrange(height):
        for x in xrange(width):
            cv_img[y, x, 0] = struct.unpack('B', blob[3*(y*width+x)+2])[0]
            cv_img[y, x, 1] = struct.unpack('B', blob[3*(y*width+x)+1])[0]
            cv_img[y, x, 2] = struct.unpack('B', blob[3*(y*width+x)+0])[0]

    cv2.imshow('img', cv_img)

    #undistort
    cv_img = undistort(cv_img.copy(), verbose = False)

    cv2.imshow('undistorted', cv_img)

    structure_data = {}
    structure_data['w'], structure_data['h'], structure_data['split_pts'] = split(cv_img, verbose = False)

    lined_img = cv_img.copy()
    for h in structure_data['split_pts']:
        h = int(h)
        cv2.line(lined_img, (0, h), (width, h), (0,0,255), 0)

    cv2.imshow('img', lined_img)

    return cv_img, structure_data
예제 #7
0
    def ExtractImg(pdf_reader, n, out_path):
        writer = PyPDF2.PdfFileWriter()
        page = pdf_reader.getPage(n)
        writer.addPage(page)

        bytes = io.BytesIO()
        writer.write(bytes)
        bytes.seek(0)

        wand_img = Image(file = bytes, resolution = IMAGE_WIDTH*DPI_TO_PX_RATIO/(page.mediaBox[3]))
        width, height = wand_img.width, wand_img.height
        wand_img.depth = 8
        blob = wand_img.make_blob(format='RGB')

        # convert wand_image to cv_image
        img = np.zeros((height, width, 3), dtype = np.uint8)
        for y in xrange(height):
            for x in xrange(width):
                img[y, x, 0] = struct.unpack('B', blob[3*(y*width+x)+2])[0]
                img[y, x, 1] = struct.unpack('B', blob[3*(y*width+x)+1])[0]
                img[y, x, 2] = struct.unpack('B', blob[3*(y*width+x)+0])[0]
        cv2.imwrite(out_path, img)
예제 #8
0
    def process(self, pdf_filename, pdf_resolution, imageformat,
                do_orientation,
                png_filename):  #Attribute png File name is added by me

        final_text = ""
        image_pdf = Image(filename=pdf_filename, resolution=pdf_resolution)
        image_page = image_pdf.convert(imageformat)

        page = 1
        process_start = time.time()
        for img in image_page.sequence:
            img_per_page = Image(image=img)
            img_per_page.type = 'grayscale'
            img_per_page.depth = 8
            img_per_page.density = pdf_resolution
            #try:
            img_per_page.level(black=0.3, white=1.0, gamma=1.5, channel=None)
            #except AttributeError as e:
            #print("Update Wand library: {}".format(e))
            img_per_page.save(
                filename=png_filename
            )  # i have changed this to png_filename from 'buffer.png'
            page_start = time.time()
            self.image2txt_pyocr(img_per_page.make_blob(imageformat),
                                 do_orientation)
##            txt = self.image2txt_pyocr(img_per_page.make_blob(imageformat), do_orientation)
##            page_elaboration = time.time() - page_start
##            print("page %s - size %s - process %2d sec. - text %s" %
##                  (page, img_per_page.size, page_elaboration, len(txt)))
##            final_text += "%s\n" % txt
##            page += 1
##            img.destroy()

        process_end = time.time() - process_start
        print("Total elaboration time: %s" % process_end)

        return final_text
예제 #9
0
    def run(path, pdf_filename):
        sys.stdout.flush()
        # create folder
        foldername = pdf_filename[:-4]
        if not os.path.isdir(path+'/'+foldername):
            print 'PdfFile', pdf_filename,
            os.makedirs(path+'/'+foldername)
        else:
            return # if a folder exists, assume that the images are ready

        # read pdf
        pdf = PyPDF2.PdfFileReader(file(path+'/'+pdf_filename, "rb"))
        print pdf.getNumPages(), 'pages',
        for n in xrange(0, pdf.getNumPages()):
            pdf_writer = PyPDF2.PdfFileWriter()
            pdf_page = pdf.getPage(n)
            pdf_writer.addPage(pdf_page)
            bytes = io.BytesIO()
            pdf_writer.write(bytes)
            bytes.seek(0)

            # rasterize
            wand_img = Image(file = bytes, resolution = int(IMAGE_WIDTH*DPI_TO_PX_RATIO/(pdf_page.mediaBox[3])))
            width, height = wand_img.width, wand_img.height
            wand_img.depth = 8
            blob = wand_img.make_blob(format='RGB')

            # convert wand_image to cv_image
            cv_img = np.zeros((height, width, 3), dtype = np.uint8)
            for y in xrange(height):
                for x in xrange(width):
                    cv_img[y, x, 0] = struct.unpack('B', blob[3*(y*width+x)+2])[0]
                    cv_img[y, x, 1] = struct.unpack('B', blob[3*(y*width+x)+1])[0]
                    cv_img[y, x, 2] = struct.unpack('B', blob[3*(y*width+x)+0])[0]
            cv2.imwrite(path+'/'+foldername+'/'+str(n)+'.jpg', cv_img)
            print '.',
        print ''
예제 #10
0
def main():
	args = get_args()

	draw = Drawing()
	draw.font = args.font_file
	draw.font_size = args.font_size

	font_name = args.font_name
	out_dir = args.out_dir

	img_ref = Image(width=1000, height=1000)

	if args.verbose:
		print "Writing " + out_dir + "/" + font_name + ".c"
	f = open(out_dir + "/" + font_name + ".c", 'wb+')
	write_comment(f)
	f.write("#include \"font.h\"\n\n")

	font_height = 0
	range_first = 0x20
	range_last = 0x7d
	font_width = []
	max_width = 0
	for x in range(range_first, range_last + 1):
		letter = chr(x)
		metrics = draw.get_font_metrics(img_ref, letter)
		text_height = int(round(metrics.text_height + 2))
		if font_height == 0:
			font_height = text_height
		assert (font_height == text_height), "font height changed!"
		if max_width == 0:
			max_width = metrics.maximum_horizontal_advance + 2
		assert (max_width == metrics.maximum_horizontal_advance + 2), \
			"font advance width changed!"
		text_width = int(round(metrics.text_width + 2))
		font_width.append(text_width)
		img = Image(width=text_width, height=text_height)
		d = draw.clone()
		d.text(0, int(metrics.ascender), letter)
		d(img)

		img.depth = 1;

		f.write("static const unsigned char ")
		f.write("letter_" + str(hex(x)[2:]) + "[] = {\n")
		c_hex_print(f, img.make_blob(format='A'))
		f.write("};\n\n")
		img.close()

	f.write("static const struct font_letter letters[] = {\n")
	for x in range(range_first, range_last + 1):
		letter_var_name = "letter_" + str(hex(x)[2:])
		f.write("\t{ " + letter_var_name + ", ")
		f.write("sizeof(" + letter_var_name + "), ")
		f.write(str(font_width[x - range_first]) + "},\n")
	f.write("};\n\n")

	f.write("const struct font font_" + font_name + " = {\n")
	f.write("\t.first = " + str(hex(range_first)) + ",\n")
	f.write("\t.last = " + str(hex(range_last)) + ",\n")
	f.write("\t.letters = letters,\n")
	f.write("\t.height = " + str(font_height) + ",\n")
	f.write("\t.max_width = " + str(max_width) + ",\n")
	f.write("};\n")
	f.close()

	if args.verbose:
		print "Writing " + out_dir + "/" + font_name + ".h"
	f = open(out_dir + "/" + font_name + ".h", 'wb+')
	write_comment(f)
	f.write("#ifndef __" + font_name.upper() + "_H\n");
	f.write("#define __" + font_name.upper() + "_H\n");
	f.write("#include \"font.h\"\n")
	f.write("extern const struct font font_" + font_name + ";\n")
	f.write("#endif /*__" + font_name.upper() + "_H*/\n");
	f.close()
예제 #11
0
def main():
    args = get_args()

    draw = Drawing()
    draw.font = args.font_file
    draw.font_size = args.font_size

    font_name = args.font_name
    out_dir = args.out_dir

    img_ref = Image(width=1000, height=1000)

    if args.verbose:
        print "Writing " + out_dir + "/" + font_name + ".c"
    f = open(out_dir + "/" + font_name + ".c", 'wb+')
    write_comment(f)
    f.write("#include \"font.h\"\n\n")

    font_height = 0
    range_first = 0x20
    range_last = 0x7d
    font_width = []
    max_width = 0
    for x in range(range_first, range_last + 1):
        letter = chr(x)
        metrics = draw.get_font_metrics(img_ref, letter)
        text_height = int(round(metrics.text_height + 2))
        if font_height == 0:
            font_height = text_height
        assert (font_height == text_height), "font height changed!"
        if max_width == 0:
            max_width = metrics.maximum_horizontal_advance + 2
        assert (max_width == metrics.maximum_horizontal_advance + 2), \
         "font advance width changed!"
        text_width = int(round(metrics.text_width + 2))
        font_width.append(text_width)
        img = Image(width=text_width, height=text_height)
        d = draw.clone()
        d.text(0, int(metrics.ascender), letter)
        d(img)

        img.depth = 1

        f.write("static const unsigned char ")
        f.write("letter_" + str(hex(x)[2:]) + "[] = {\n")
        c_hex_print(f, img.make_blob(format='A'))
        f.write("};\n\n")
        img.close()

    f.write("static const struct font_letter letters[] = {\n")
    for x in range(range_first, range_last + 1):
        letter_var_name = "letter_" + str(hex(x)[2:])
        f.write("\t{ " + letter_var_name + ", ")
        f.write("sizeof(" + letter_var_name + "), ")
        f.write(str(font_width[x - range_first]) + "},\n")
    f.write("};\n\n")

    f.write("const struct font font_" + font_name + " = {\n")
    f.write("\t.first = " + str(hex(range_first)) + ",\n")
    f.write("\t.last = " + str(hex(range_last)) + ",\n")
    f.write("\t.letters = letters,\n")
    f.write("\t.height = " + str(font_height) + ",\n")
    f.write("\t.max_width = " + str(max_width) + ",\n")
    f.write("};\n")
    f.close()

    if args.verbose:
        print "Writing " + out_dir + "/" + font_name + ".h"
    f = open(out_dir + "/" + font_name + ".h", 'wb+')
    write_comment(f)
    f.write("#ifndef __" + font_name.upper() + "_H\n")
    f.write("#define __" + font_name.upper() + "_H\n")
    f.write("#include \"font.h\"\n")
    f.write("extern const struct font font_" + font_name + ";\n")
    f.write("#endif /*__" + font_name.upper() + "_H*/\n")
    f.close()
예제 #12
0
def main():
    mc = redis.StrictRedis()
    while True:
        hash_value = mc.rpop('_incoming')
        if not hash_value:
            time.sleep(0.25)
            continue
        desc = mc.get(hash_value)
        if not desc:
             continue
        desc = cPickle.loads(desc)
        try:
            desc = desc._replace(status = 'Processing')
            mc.set(hash_value, cPickle.dumps(desc))

            im = Image(filename=os.path.join('upload', hash_value+'.'+desc.ext))
            dt_string = im.metadata.get('exif:DateTimeOriginal')
            uniq = im.metadata.get('exif:UniqueCameraModel')
            mm = im.metadata.get('exif:Make'),im.metadata.get('exif:Model')

            if dt_string is None:
                raise Exception("Image date undefined")

            dt = datetime.strptime(dt_string, '%Y:%m:%d %H:%M:%S')
            if (datetime.now() - dt).days > 365:
                raise Exception("Image too old")

            im.depth = 8
            im.format = 'RGB'
            blob = im.make_blob()
            im = PIL.Image.frombytes('RGB', im.size, blob)
           
            imgif = {}
            model_string = ''
            if uniq:
                model_string = uniq
                imgif[piexif.ImageIFD.UniqueCameraModel] = uniq
            else:
                if mm[0]: 
                    imgif[piexif.ImageIFD.Make] = mm[0]
                if mm[1]: 
                    imgif[piexif.ImageIFD.Model] = mm[1]
                model_string = ' '.join([v for v in mm if v])

            imgif[piexif.ImageIFD.DateTime] = desc.upload_date
            exif = {piexif.ExifIFD.DateTimeOriginal: dt_string,
                piexif.ExifIFD.UserComment: desc.name}
            exif_dict = { "0th": imgif, "Exif":exif }
            exif_bytes = piexif.dump(exif_dict)

            fp = io.BytesIO()  
            im.save(fp, "JPEG", exif = exif_bytes)
            saved = fp.getvalue()  
            size_string = '%i'%len(saved)

            desc = ImageDesc(
                hash_value = hash_value
                , name = desc.name
                , ext = desc.ext
                , upload_date = desc.upload_date
                , creation_date = dt_string
                , camera = model_string
                , size = size_string
                , status = 'OK')
             
            os.remove(os.path.join('upload', hash_value+'.'+desc.ext))
            with open(os.path.join('static', hash_value+'.jpg'), 'wb') as f:
                f.write(saved)
            im.thumbnail((32,32), PIL.Image.ANTIALIAS)
            im.save(os.path.join('static', hash_value+'_thumb.jpg'), "JPEG")
            
            mc.set(hash_value, cPickle.dumps(desc))
            mc.rpush('_images', hash_value) 
        except Exception as e:
            desc = desc._replace(status = str(e))
            mc.setex(hash_value, 60*24, cPickle.dumps(desc))
예제 #13
0
def main():
    mc = redis.StrictRedis()
    while True:
        hash_value = mc.rpop('_incoming')
        if not hash_value:
            time.sleep(0.25)
            continue
        desc = mc.get(hash_value)
        if not desc:
            continue
        desc = cPickle.loads(desc)
        try:
            desc = desc._replace(status='Processing')
            mc.set(hash_value, cPickle.dumps(desc))

            im = Image(filename=os.path.join('upload', hash_value + '.' +
                                             desc.ext))
            dt_string = im.metadata.get('exif:DateTimeOriginal')
            uniq = im.metadata.get('exif:UniqueCameraModel')
            mm = im.metadata.get('exif:Make'), im.metadata.get('exif:Model')

            if dt_string is None:
                raise Exception("Image date undefined")

            dt = datetime.strptime(dt_string, '%Y:%m:%d %H:%M:%S')
            if (datetime.now() - dt).days > 365:
                raise Exception("Image too old")

            im.depth = 8
            im.format = 'RGB'
            blob = im.make_blob()
            im = PIL.Image.frombytes('RGB', im.size, blob)

            imgif = {}
            model_string = ''
            if uniq:
                model_string = uniq
                imgif[piexif.ImageIFD.UniqueCameraModel] = uniq
            else:
                if mm[0]:
                    imgif[piexif.ImageIFD.Make] = mm[0]
                if mm[1]:
                    imgif[piexif.ImageIFD.Model] = mm[1]
                model_string = ' '.join([v for v in mm if v])

            imgif[piexif.ImageIFD.DateTime] = desc.upload_date
            exif = {
                piexif.ExifIFD.DateTimeOriginal: dt_string,
                piexif.ExifIFD.UserComment: desc.name
            }
            exif_dict = {"0th": imgif, "Exif": exif}
            exif_bytes = piexif.dump(exif_dict)

            fp = io.BytesIO()
            im.save(fp, "JPEG", exif=exif_bytes)
            saved = fp.getvalue()
            size_string = '%i' % len(saved)

            desc = ImageDesc(hash_value=hash_value,
                             name=desc.name,
                             ext=desc.ext,
                             upload_date=desc.upload_date,
                             creation_date=dt_string,
                             camera=model_string,
                             size=size_string,
                             status='OK')

            os.remove(os.path.join('upload', hash_value + '.' + desc.ext))
            with open(os.path.join('static', hash_value + '.jpg'), 'wb') as f:
                f.write(saved)
            im.thumbnail((32, 32), PIL.Image.ANTIALIAS)
            im.save(os.path.join('static', hash_value + '_thumb.jpg'), "JPEG")

            mc.set(hash_value, cPickle.dumps(desc))
            mc.rpush('_images', hash_value)
        except Exception as e:
            desc = desc._replace(status=str(e))
            mc.setex(hash_value, 60 * 24, cPickle.dumps(desc))