def postprocess(isbn): import os import crop os.system("convert book%s.png -gravity SouthEast -chop 200x50 pbook%s.png" % (isbn,isbn)) crop.crop("pbook%s" % isbn) os.system("convert pbook%s_crop.png -depth 8 -alpha off -colorspace gray pbook%s.tif" % (isbn,isbn)) os.system("tesseract pbook%s.tif book%s" % (isbn,isbn))
def main(): description = 'Extract part of image which specified by bounding box of non background pixels' \ + 'and save it to same filename in chosen folder.' parser = argparse.ArgumentParser(prog="image_auto_crop", description=description) parser.add_argument('-p', '--path', metavar='STRING', help='Path to source dir', required=True) parser.add_argument('-d', '--dest', metavar='STRING', help='Path to destination dir', required=True) parser.add_argument('-c', '--clear', default=False, action='store_true', help='Clear destination directory if destination!=source') parser.add_argument('-m', '--margin', metavar=('LEFT','TOP','RIGHT','BOTTOM'), type=int, nargs=4, default=(0,0,0,0), help='Cropped image margin') parser.add_argument('-w', '--width', default=-1, metavar='INT', type=int , help='Max width for cropped image.') parser.add_argument('-b', '--background', metavar=('R','G','B'), type=int, nargs=3, default=(255, 255, 255), help='Image background(dominant) color') parser.add_argument('-f', '--format', metavar='STRING', default="PNG", help='PIL supported output format') parser.add_argument('-q', '--quality', metavar='INT',type=int, default=100, help='Image quality 0-100') args = parser.parse_args() print args crop(args.path, args.dest, maxwith=args.width, margin=args.margin, remove_destination=args.clear, background_color=args.background, format=args.format, quality=args.quality)
def process_tail(tile_dir, selection, remove_trash=False): """ Crops images in tile_dir folder. :param tile_dir: str path to directory :param selection: array contains coordinates for cropping :param remove_trash: Bool """ # selection = transform_coordinates(coordinates_from_geojson(geojson)) does_not_contain = True for image_name in os.listdir(tile_dir): image_path = os.path.join(tile_dir, image_name) if contains(get_corner_coordinates(image_path), selection) is True: does_not_contain = False output_file_name = 'c' + os.path.splitext(image_name)[0] + '.tiff' # print('\t' + output_file_name) crop(selection, image_path, os.path.join(tile_dir, output_file_name)) else: print('\t' + 'does not contain your selection') logging.info(tile_dir + ' does not contain selection') if remove_trash is True: logging.info('removing ' + image_path) os.remove(image_path) if does_not_contain and remove_trash: logging.info('removing ' + tile_dir) shutil.rmtree(tile_dir)
def forward(self, x, l, train, action): # Retina Encoding if self.xp == np: loc = l.data else: loc = self.xp.asnumpy(l.data) hg = crop(x, center=loc, size=self.g_size) # multi-scale glimpse for k in range(1, self.scale): s = np.power(2, k) patch = crop(x, center=loc, size=self.g_size * s) patch = F.average_pooling_2d(patch, ksize=s) hg = F.concat((hg, patch), axis=1) hg = F.relu(self.emb_x(hg)) # Location Encoding hl = F.relu(self.emb_l(l)) # Glimpse Net g = F.relu(self.fc_lg(hl) + self.fc_xg(hg)) # Core Net h = self.core_lstm(g) # Location Net: truncate h h_truncated = chainer.Variable(h.data, volatile=not train) m = self.fc_hl(h_truncated) if train: # generate sample from N(mean,var) eps = self.xp.random.normal(0, 1, size=m.data.shape).astype(np.float32) l = m + np.sqrt(self.var) * eps # get ln(location policy) l1, l2 = F.split_axis(l, indices_or_sections=2, axis=1) m1, m2 = F.split_axis(m, indices_or_sections=2, axis=1) ln_p = -0.5 * ((l1 - m1) * (l1 - m1) + (l2 - m2) * (l2 - m2)) / self.var ln_p = F.reshape(ln_p, (-1, )) # truncate l l = chainer.Variable(l.data, volatile=not train) if action: # Action Net y = self.fc_ha(h) if train: # Baseline b = self.fc_hb(h) b = F.reshape(b, (-1, )) return l, ln_p, y, b else: return m, None, y, None else: if train: return l, ln_p, None, None else: return m, None, None, None
def compute_glimpse(self, x, location): # Retina Encoding glimpse = crop(x, center=location.data, size=self.g_size) # multi-scale glimpse for k in range(1, self.scale): s = int(self.xp.power(2,k)) patch = crop(x, center=location.data, size=self.g_size*s) patch = F.average_pooling_2d(patch, ksize=s) glimpse = F.concat((glimpse, patch), axis=1) return glimpse
def crop_images(directory, selection): """ Crop all tiff files in provided directory. :param directory: str, directory containing tiff files. :param selection: array, selection for cropping """ for root, dirs, files in os.walk(directory): for file in files: path = os.path.join(root, file) if os.path.splitext(file)[1] == '.tiff': crop(selection, path, path)
def recognize(imgname, output, desired, show_intermediate_results=False): scan(imgname, show_intermediate_results) im = cv2.imread('deskewed.jpg') im = cv2.dilate(im, np.ones((2, 2))) newimgname = 'no_noise.jpg' cv2.imwrite(newimgname, im) crop(newimgname, 'scan_res.jpg', show_intermediate_results) recognized_text = pytesseract.image_to_string(Image.open('scan_res.jpg'), config="config") with open(output, 'w+') as f: print(recognized_text, file=f) print('Accuracy: ' + str(test_accuracy(scan_res=output, desired=desired)))
def anglec(img1): [r, c] = img1.shape temp = 0 for i in range(200): for j in range(100): if img1[i][j] == 1: x1 = i y1 = j temp = 1 break if temp == 1: break temp = 0 for i in range(200): for j in range(c - 1, c - 100, -1): if img1[i][j] == 1: x2 = i - x1 y2 = j - y1 temp = 1 break if temp == 1: break theta = (math.degrees(math.atan(x2 / y2))) img1 = (binarization1((rotate(img1, theta)))) img1 = crop(img1) return (img1)
def screenshot_captcha(self, captcha_element, filename="captcha.png"): self.driver.save_screenshot(filename) # self.driver.save_screenshot("full_page.png") location = captcha_element.location location["y_off"] = 50 location["x_off"] = 120 return crop.crop(filename, location, self.executable)
def magnify_touch( self, touch ): # Clear the magnifier. clear_magnifier( self ) # Store the image of the magnifier lens to the path 'magnifier_lens.png'. self.lens_image = str( touch.time_update ) + '.png' # Take a screenshot of this widget and the tree rooted at this widget. self.export_to_png( self.lens_image ) # Magnify (zoom and crop) the screenshot centered at touch.pos. zoom_scale = 10 crop_radius = 512 magnifier_image = crop( zoom( self.lens_image, world_to_photo( self.pos, touch.pos ), zoom_scale ), crop_radius ) magnifier_image.save( self.lens_image ) # Render the magnification photo and a magnifier outline to self.canvas. d = 128 + 64 OFFSET = d/2 + 10 self.lens = ( Color( 1,1,1,1 ), Ellipse( texture=load_texture( self.lens_image ), pos=(touch.x-d/2, touch.y-d/2 + OFFSET), size=(d,d) ), Line( circle=(touch.x, touch.y + OFFSET, d/2) ) ) for instr in self.lens: self.canvas.after.add( instr )
def on(self): self.pipeline.start(self.config) while (True): frames = self.pipeline.wait_for_frames() color_frame = frames.get_color_frame() color_image = np.asanyarray(color_frame.get_data()) cv.imshow('liv', color_image) key = cv.waitKey(1) & 0xFF if key == ord('s'): cv.waitKey(0) break if self.stream_stop: break if self.iscaptured == True: self.capture() self.filtering() color_image_orign = np.asanyarray(self.color_frame.get_data()) refPt = crop.crop(color_image_orign) self.input_image = color_image_orign[refPt[0][1]: refPt[1][1], refPt[0][0]: refPt[1][0]] self.depth_roi = self.colorized_depth[refPt[0][1]: refPt[1][1], refPt[0][0]: refPt[1][0]] self.refPt = refPt refPt = None self.iscaptured = False cv.destroyAllWindows()
def stream(self): cap = cv.VideoCapture(self.url) scale = 0.4 while (True): _, frame = cap.read() if frame is not None: origin = frame.copy() re_frame = cv.resize(frame, dsize=(0, 0), fx=scale, fy=scale, interpolation=cv.INTER_LINEAR) cv.imshow('frame', re_frame) q = cv.waitKey(1) if self.stream_stop: break if self.iscaptured: capture_img = origin refPt = crop.crop(re_frame) new_refPt = [] for i in range(2): ptlist = [] for j in range(2): ptlist.append(int(refPt[i][j] * (1 / scale))) new_refPt.append(ptlist) self.input_img = capture_img[new_refPt[0][1]:new_refPt[1][1], new_refPt[0][0]:new_refPt[1][0]] refPt = None new_refPt = None self.iscaptured = False
def input_function( image): # this is used to specify all the parameters to crop return crop(image, circle_mask=True, circle_mask_size=1, square_mask=True, square_mask_dim=[100, 1000, 100, 1000])
def eval(args, subject, engine, dev_df, test_df): cors = [] all_probs = [] answers = choices[:test_df.shape[1] - 2] for i in range(test_df.shape[0]): # get prompt and make sure it fits k = args.ntrain prompt_end = format_example(test_df, i, include_answer=False) train_prompt = gen_prompt(dev_df, subject, k) prompt = train_prompt + prompt_end while crop(prompt) != prompt: k -= 1 train_prompt = gen_prompt(dev_df, subject, k) prompt = train_prompt + prompt_end label = test_df.iloc[i, test_df.shape[1] - 1] while True: try: c = openai.Completion.create( engine=engine, prompt=prompt, max_tokens=1, logprobs=100, temperature=0, echo=True, ) break except: print("pausing") time.sleep(1) continue lprobs = [] for ans in answers: try: lprobs.append(c["choices"][0]["logprobs"]["top_logprobs"][-1][ " {}".format(ans)]) except: print( "Warning: {} not found. Artificially adding log prob of -100." .format(ans)) lprobs.append(-100) pred = {0: "A", 1: "B", 2: "C", 3: "D"}[np.argmax(lprobs)] probs = softmax(np.array(lprobs)) cor = pred == label cors.append(cor) all_probs.append(probs) acc = np.mean(cors) cors = np.array(cors) all_probs = np.array(all_probs) print("Average accuracy {:.3f} - {}".format(acc, subject)) return cors, acc, all_probs
def recognize(imgname='photos\\tough6.jpg', output='output.txt', desired='texts\\chom_tough.txt', show_intermediate_results=False): scan(imgname, show_intermediate_results) img = cv2.imread('deskewed.jpg') img = cv2.dilate(img, np.ones((2, 2))) newimgname = 'no_noise.jpg' cv2.imwrite(newimgname, img) crop(newimgname, "scan_res.jpg", show_intermediate_results) a = pytesseract.image_to_string(Image.open('scan_res.jpg'), config="config") f = open(output, 'w+') print(a, file=f) f.flush() f.close() print('Accuracy: ' + str(test_accuracy(scan_res=output, desired=desired)))
def test_show_cropped_image(input): files = listdir(input) for f in files: img_path = join(input_path, f) if isfile(img_path): cropped = crop(img_path) plt.imshow(cropped) plt.show()
def home(): IMO = request.args.get("imo") thumbnail = request.args.get("thumbnail") outfile = crop.crop(IMO, thumbnail, cvNet) if outfile: return send_file(outfile, mimetype="image/jpg") else: return None
def main(argv): if len(argv) < 1: printFiles() filename = argv[0] video_filepath, data = parseOneVideo(filename) labels = crop(data[0]) MILlist = [MIL(video_filepath, 0, label) for label in labels] KCFlist = [kcf.KCF(video_filepath, 0, label) for label in labels] # showAll(MILlist) kcf.showAll(KCFlist + MILlist)
def edit_image(): pdf_files = os.listdir(app.config["UPLOAD_FOLDER"]) jpg_files = os.listdir(app.config["IMAGE_FOLDER"]) if request.method == "GET": if not pdf_files: return redirect(url_for("upload_file")) pdf_to_jpg(os.path.join(app.config["UPLOAD_FOLDER"], pdf_files[0])) for jpg in jpg_files: image = crop(os.path.join(app.config["IMAGE_FOLDER"], jpg), False) cv2.imwrite(os.path.join(app.config["VISUALIZE_FOLDER"], jpg), image) elif request.method == "POST": dilation = int(request.form["dilation"]) erosion = int(request.form["erosion"]) min_width = int(request.form["min_width"]) min_height = int(request.form["min_height"]) max_width = int(request.form["max_width"]) max_height = int(request.form["max_height"]) old_image = os.path.basename(request.form["image"]) print(old_image) new_image = crop(os.path.join(app.config["IMAGE_FOLDER"], old_image), False, dilation=dilation, erosion=erosion, min_height=min_height, min_width=min_width, max_height=max_height, max_width=max_width) print(request.form) cv2.imwrite(os.path.join(app.config["VISUALIZE_FOLDER"], old_image), new_image) visualize = [ os.path.join(app.config["VISUALIZE_FOLDER"], file) for file in os.listdir(app.config["VISUALIZE_FOLDER"]) ] print(visualize) return render_template("edit_image.html", visualize=visualize)
def magic(self): # do magic self.image.save('image.jpg') img = cv2.imread('image.jpg', 0) img = crop(img) prid = apply_model(img) #print("predicted value is : ", prid) my_label = tk.Label(root, text=prid, font=("Arial Bold", 150)) my_label.config(bg="black") my_label.place(x=50, y=480, anchor='sw')
def drawMap(Map, angle, transformed_img, contours): grayMap = cv2.cvtColor(Map, cv2.COLOR_BGR2GRAY) rotated = rotate(grayMap, angle) x3, y3 = temp_match(rotated, transformed_img) Map1 = rotate(Map, angle) for cnt in contours: x,y,w,h = cv2.boundingRect(cnt) Map1 = cv2.rectangle(Map1, (x3+x-10,y3+y-10), (x3+x+10+w,y3+y+10+h), (0,0,0), 3) Map1 = rotate(Map1, -angle) Map1 = crop(grayMap, Map1) return Map1
def process(img): img = crop.crop(img) t = classify.classify(img) if t == Type.BAND: return t, None, img, img, (None, None, None, None) mask, _, _ = extract.extract(img) masked = cv2.bitwise_and(img, img, mask=mask) x, y, w, h = watch_features.bounding_box(mask) #img = img[y:y+h, x:x+w] f = features.get_features(img) return t, f, img, masked, (x, y, w, h)
def main(argv): if len(argv) < 1: printFiles() filename = argv[0] video_filepath, data = parseOneVideo(filename) hog = cv2.HOGDescriptor() hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector()) labels = crop(data[0]) MILlist = [MIL(video_filepath, 0, label) for label in labels] KCFlist = [kcf.KCF(video_filepath, 0, label) for label in labels] # showAll(MILlist) kcf.showAll(KCFlist + MILlist)
def digits(wild_image): import crop import chop import numpy as np cropped = crop.crop(wild_image) chopped = chop.chop(cropped) n_images = chopped.shape[0] flattened = np.reshape(n_images, -1) return flattened
def hashFileWithCrop(videoFile, hashFn='pHash', output=sys.stdout): cap = cv2.VideoCapture(videoFile) fn = hashNameToFn[hashFn] frames = [] while True: ret, frame = cap.read() if not ret: break frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)) cropped = crop(frames) for frame in cropped: print(fn(frame), file=output)
def forward(self, x, l, train, action): if self.xp == np: loc = l.data else: loc = self.xp.asnumpy(l.data) margin = self.g_size/2 loc = (loc+1)*0.5*(self.in_size-self.g_size+1) + margin loc = np.clip(loc, margin, self.in_size-margin) loc = np.floor(loc).astype(np.int32) # Retina Encoding hx = crop(x, loc=loc, size=self.g_size) hx = F.relu(self.emb_x(hx)) # Location Encoding hl = F.relu(self.emb_l(l)) # Glimpse Net g = F.relu(self.fc_lg(hl) + self.fc_xg(hx)) # Core Net h = self.core_lstm(g) # LSTM(g + h_t-1) # Location Net l = F.tanh(self.fc_hl(h)) if train: # sampling location l s = F.gaussian(mean=l, ln_var=self.ln_var) s = F.clip(s, -1., 1.) # location policy l1, l2 = F.split_axis(l, indices_or_sections=2, axis=1) s1, s2 = F.split_axis(s, indices_or_sections=2, axis=1) norm = (s1-l1)*(s1-l1) + (s2-l2)*(s2-l2) ln_p = 0.5 * norm / self.var ln_p = F.reshape(ln_p, (-1,)) if action: # Action Net y = self.fc_ha(h) if train: return s, ln_p, y else: return l, None, y else: if train: return s, ln_p, None else: return l, None, None
def single(file_path, dst_dir): logging.info('start {}'.format(file_path)) name_bytes, time_bytes, crop_region = crop(file_path) time.sleep(1) chat_name = recognize(name_bytes) time.sleep(1) qr_time = recognize(time_bytes) exp_time = get_expiration(qr_time) ext_name = os.path.splitext(file_path)[1] dst_file_name = "{}.{}{}".format(chat_name, exp_time, ext_name) dst_file_path = os.path.join(os.path.abspath(dst_dir), dst_file_name) logging.info(dst_file_name) crop_region.save(dst_file_path) logging.info('finish {}'.format(file_path))
def li_convert(fname, crop_size, stretch=True): img = Image.open(fname) rgb = np.array(img).mean(axis=2).astype( np.uint8) # Use the mean as a proxy for brightness # Use Li's minimum cross entropy threshold = skimage.filters.threshold_li(rgb) _, thresholded = cv2.threshold(rgb, threshold, 255, cv2.THRESH_BINARY) bbox = bbox_from_threshold_array(img, thresholded) if bbox == (0, 0, img.width, img.height): print "Could not find bbox for {}".format(fname) return crop(img, bbox, crop_size, stretch=stretch)
def crop_face(img, bboxes, dst_face_path): """Crop face with the raw image and face boxes. Args: img: ndarray, shape with [h, w, 3] bboxes: ndarray, [n, 4] dst_face_path: str, the path to write the face. """ h, w, c = img.shape n, axis = bboxes.shape if c != 3: print('ERROR: wrong input shape {}, we need the c to be 3.'.format(c)) exit() if axis != 4: print('ERROR: wrong input shape {}, we need the axis to be 4.'.format( axis)) exit() bboxes = np.round(bboxes).astype(np.int32) bboxes = square_boxes(bboxes) bboxes = broad_boxes(bboxes, 0.4) for idx, bbox in enumerate(bboxes): crop(img, bbox, dst_face_path)
def createTrainingSet(fileList, trainingSetSize): random.shuffle(fileList) data = fileList[:trainingSetSize] fileList[:] = fileList[trainingSetSize:] dataSet = [] for file in data: genre = musicFileInfo.getGenre(file) slices = crop(config.spectrogramsDir + genre + '\\' + file) for slice in slices: dataSet.append((slice, genre)) random.shuffle(dataSet) return dataSet
def adaptive_convert(fname, crop_size, stretch=True): img = Image.open(fname) rgb = np.array(img).mean(axis=2).astype(np.uint8) # Use the mean as a proxy for brightness # Adaptive threshold to find border thresholded = cv2.adaptiveThreshold(rgb, 1, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, thresholdType=cv2.THRESH_BINARY_INV, blockSize=11, C=0) # Median blur to erode garbage thresholded = cv2.medianBlur(thresholded, 9) bbox = bbox_from_threshold_array(img, thresholded) if bbox == (0, 0, img.width, img.height): print "Could not find bbox for {}".format(fname) return crop(img, bbox, crop_size, stretch=stretch)
def otsu_convert(fname, crop_size, stretch=True): img = Image.open(fname) rgb = np.array(img).mean(axis=2).astype( np.uint8) # Use the mean as a proxy for brightness # Use Otsu thresholding blurred = cv2.GaussianBlur(rgb, (5, 5), 0) ret3, thresholded = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) bbox = bbox_from_threshold_array(img, thresholded) if bbox == (0, 0, img.width, img.height): print "Could not find bbox for {}".format(fname) return crop(img, bbox, crop_size, stretch=stretch)
def create_image(image_file,cropped=True,pad_rate=0.25,save_file='',category='',correct_RGBShift=True): img_square=None char_sizes=None img=None if os.path.exists(image_file): try: img = Image.open(image_file) if cropped and img.mode=='RGBA': img_square, char_sizes=crop.crop(img,pad_rate=0.25,save_file=save_file,category=category,correct_RGBShift=correct_RGBShift) else: img_square=img.copy() # 3 channel image except: print('loading error: '+image_file) if img is not None: img.close() return img_square, char_sizes
def create_image(image_file, cropped=True, pad_rate=0.25, save_file='', category='', correct_RGBShift=True): img = Image.open(image_file) if cropped and img.mode == 'RGBA': img_square, char_sizes = crop.crop(img, pad_rate=0.25, save_file=save_file, category=category, correct_RGBShift=correct_RGBShift) else: img_square = img.copy() # 3 channel image img.close() return img_square, char_sizes
import crop import numpy a = numpy.zeros((5,10),numpy.int) a[numpy.arange(5),:] = numpy.arange(10) b = numpy.transpose([(10 ** numpy.arange(5))]) a = (a*b)[:,1:] #this array is most likely NOT contiguous print a print "dim1=%d dim2=%d" % (a.shape[0],a.shape[1]) d1_0 = 2 d1_1 = 4 d2_0 = 1 d2_1 = 5 c = crop.crop(a, d1_0,d1_1, d2_0,d2_1) d = a[d1_0:d1_1, d2_0:d2_1] print "returned array:" print c print "native slicing:" print d
color = (255, 0, 0) canvas = image.copy() draw = ImageDraw.Draw(canvas) xy = np.array([locs[t,1],locs[t,0],locs[t,1],locs[t,0]]) wh = np.array([-g_size//2, -g_size//2, g_size//2, g_size//2]) xys = [xy + np.power(2,s)*wh for s in range(n_scales)] for xy in xys: draw.rectangle(xy=list(xy), outline=color) del draw plt.imshow(canvas) plt.axis('off') # glimpse at each scale gs = crop(x, center=ls[t:t+1], size=g_size) plt.subplot(3+n_scales, n_steps, n_steps + t+1) plt.imshow(gs.data[0,0], cmap='gray') plt.axis('off') for k in range(1, n_scales): s = np.power(2,k) patch = crop(x, center=ls[t:t+1], size=g_size*s) patch = F.average_pooling_2d(patch, ksize=s) gs = F.concat((gs, patch), axis=1) plt.subplot(3+n_scales, n_steps, n_steps*(k+1) + t+1) plt.imshow(gs.data[0,k], cmap='gray') plt.axis('off') # output probability plt.subplot2grid((3+n_scales,n_steps), (1+n_scales,t), rowspan=2)