import ocr_utils foia_response = '1505deposists2.pdf' ocr_utils.split_pdf(foia_response) num_page_start = 1 num_page_end = 86 all_text = [] data=[] for page in range(num_page_start, num_page_end + 1): indiv_filename = "{}.{}.pdf".format(ocr_utils.get_stem(foia_response), page) tmp_filename = indiv_filename tmp_image = ocr_utils.convert_to_png(tmp_filename) all_text.append(ocr_utils.ocr_image(tmp_image)) data=all_text[0].split('\n') # for each data set you should read in the amounts reviewed=[] # inputs moved to checkanalysis.py #for row in data: # # if row.split() == []: pass # else: # inputs.append(row.split()[2]) #print inputs
draw2 = np.copy(im_resized) boxes = get_boxes(segm, rbox, angle_pred, args.segm_thresh) img = Image.fromarray(draw2) draw = ImageDraw.Draw(img) # if len(boxes) > 10: # boxes = boxes[0:10] out_boxes = [] for box in boxes: pts = box[0:8] pts = pts.reshape(4, -1) det_text, conf, dec_s = ocr_image(net, codec, im_data, box, device) if len(det_text) == 0: continue width, height = draw.textsize(det_text, font=font2) center = [box[0], box[1]] draw.text((center[0], center[1]), det_text, fill=(0, 255, 0), font=font2) out_boxes.append(box) print(det_text) im = np.array(img) for box in out_boxes: pts = box[0:8]
foia_response = '1505deposists2.pdf' ocr_utils.split_pdf(foia_response) num_page_start = 1 num_page_end = 86 all_text = [] data = [] for page in range(num_page_start, num_page_end + 1): indiv_filename = "{}.{}.pdf".format(ocr_utils.get_stem(foia_response), page) tmp_filename = indiv_filename tmp_image = ocr_utils.convert_to_png(tmp_filename) all_text.append(ocr_utils.ocr_image(tmp_image)) data = all_text[0].split('\n') # for each data set you should read in the amounts reviewed = [] # inputs moved to checkanalysis.py #for row in data: # # if row.split() == []: pass # else: # inputs.append(row.split()[2]) #print inputs
def run_model_input_image(im, show_boxes=False): predictions = {} parser = argparse.ArgumentParser() parser.add_argument('-cuda', type=int, default=1) parser.add_argument('-model', default='e2e-mlt-rctw.h5') parser.add_argument('-segm_thresh', default=0.5) font2 = ImageFont.truetype("Arial-Unicode-Regular.ttf", 18) args = parser.parse_args() net = ModelResNetSep2(attention=True) net_utils.load_net(args.model, net) net = net.eval() if args.cuda: print('Using cuda ...') net = net.cuda() with torch.no_grad(): # im = Image.open(im) # im = im.convert('RGB') im = np.asarray(im) im = im[...,:3] im_resized, (ratio_h, ratio_w) = resize_image(im, scale_up=False) images = np.asarray([im_resized], dtype=np.float) images /= 128 images -= 1 im_data = net_utils.np_to_variable(images, is_cuda=args.cuda).permute(0, 3, 1, 2) seg_pred, rboxs, angle_pred, features = net(im_data) rbox = rboxs[0].data.cpu()[0].numpy() rbox = rbox.swapaxes(0, 1) rbox = rbox.swapaxes(1, 2) angle_pred = angle_pred[0].data.cpu()[0].numpy() segm = seg_pred[0].data.cpu()[0].numpy() segm = segm.squeeze(0) draw2 = np.copy(im_resized) boxes = get_boxes(segm, rbox, angle_pred, args.segm_thresh) img = Image.fromarray(draw2) draw = ImageDraw.Draw(img) #if len(boxes) > 10: # boxes = boxes[0:10] out_boxes = [] prediction_i = [] for box in boxes: pts = box[0:8] pts = pts.reshape(4, -1) det_text, conf, dec_s = ocr_image(net, codec, im_data, box) if len(det_text) == 0: continue width, height = draw.textsize(det_text, font=font2) center = [box[0], box[1]] draw.text((center[0], center[1]), det_text, fill = (0,255,0),font=font2) out_boxes.append(box) # det_text is one prediction prediction_i.append(det_text.lower()) predictions["frame"] = prediction_i # show each image boxes and output in pop up window. show_image_with_boxes(img, out_boxes, show=show_boxes) print(predictions) return predictions
# PNGs created by running the image through an image converter # i.e. convert -density 300 -trim <file>.pdf -quality 100 -sharpen 0x1.0 M <image>.png # density > 250 leads to poor image recogition. # depends on tesseract-ocr and tesseract-devvel nd leptonica to run properly import ocr_utils import numpy png_response = '1505deposists2.83.png' all_text=[] all_text.append(ocr_utils.ocr_image(png_response)) data=all_text[0].split('\n') # for each data set you should read in the amounts # row.split()[2] should work for *most* cases but there are # edge cases where [0] is required i.e. 1505deposits2.8.png inputs=[] for row in data: if row.split() == []: pass try: inputs.append(row.split()[2]) except IndexError: inputs.append('null') # check which checks require review and mark them for review review=[] # create new array and flag which amounts require review # afterwards, remove paranthesis to get purely dollar amounts