def find_viola_proposals(self, viola_detections, img_name=None): '''Call viola to find coordinates of candidate roofs. Extract those patches from the image, tranform them so they can be fed to neural network. Return both the coordinates and the patches. ''' try: img_full = cv2.imread(self.in_path+img_name, flags=cv2.IMREAD_COLOR) img_shape = img_full.shape except IOError as e: print e sys.exit(-1) #if DEBUG: # self.viola.evaluation.save_images(img_name) all_proposal_patches = dict() all_proposal_coords = dict() #extract patches for neural network classification for roof_type in ['metal', 'thatch']: all_proposal_coords[roof_type] = viola_detections.get_detections(img_name=img_name, roof_type=roof_type) #all_proposal_coords[roof_type] = self.viola.viola_detections.get_detections(img_name=img_name, roof_type=roof_type) patches = np.empty((len(all_proposal_coords[roof_type]), 3, utils.PATCH_W, utils.PATCH_H)) for i, detection in enumerate(all_proposal_coords[roof_type]): #extract the patch from the image using utils code img = utils.four_point_transform(img_full, detection) #transform the patch using utils code patch = utils.cv2_to_neural(img) patches[i, :, :,:] = patch all_proposal_patches[roof_type] = patches return all_proposal_patches, all_proposal_coords, img_shape
def align(self): print self.src_ print self.dst_ src = np.float32(self.src_) dst = np.float32(self.dst_) M = four_point_transform(self.img_, src, dst) np.save('m.npy', M * self.m0_)
def get_roof_patches_from_rectified_dataset( self, coordinates_only=False, xml_path=utils.RECTIFIED_COORDINATES, xml_name=None, img_path=None): ''' Return roof patches from the dataset that has roofs properly bounded If coordinated_only is True, return only the coordinates instead of the patches ''' assert xml_name is not None xml_path = xml_path + xml_name #EXTRACT THE POLYGONS FROM THE XML tree = ET.parse(xml_path) root = tree.getroot() polygon_list = list() for child in root: if child.tag == 'object': for grandchild in child: #get positions of bounding box if grandchild.tag == 'polygon': polygon = list() #list of four points for coordinates in grandchild: if coordinates.tag == 'pt': for point in coordinates: pos = int(float(point.text)) pos = pos if pos >= 0 else 0 if point.tag == 'x': x = pos elif point.tag == 'y': y = pos polygon.append((x, y)) if len(polygon) == 4: polygon_list.append(polygon) if coordinates_only: return np.array(polygon_list) else: #EXTRACT THE RECTIFIED ROOF PATCH FROM THE IMG, RETURN THE ROOF PATCHES assert img_path is not None try: img = cv2.imread(img_path, flags=cv2.IMREAD_COLOR) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) gray_equalized = cv2.equalizeHist(gray) except IOError as e: print e sys.exit(-1) roof_patches = list() for i, polygon in enumerate(polygon_list): roof_patches.append( utils.four_point_transform( gray_equalized, np.array(polygon, dtype="float32"))) return roof_patches
def get_roof_patches_from_rectified_dataset(self, coordinates_only=False, xml_path=utils.RECTIFIED_COORDINATES, xml_name=None, img_path=None): ''' Return roof patches from the dataset that has roofs properly bounded If coordinated_only is True, return only the coordinates instead of the patches ''' assert xml_name is not None xml_path = xml_path+xml_name #EXTRACT THE POLYGONS FROM THE XML tree = ET.parse(xml_path) root = tree.getroot() polygon_list = list() for child in root: if child.tag == 'object': for grandchild in child: #get positions of bounding box if grandchild.tag == 'polygon': polygon = list() #list of four points for coordinates in grandchild: if coordinates.tag == 'pt': for point in coordinates: pos = int(float(point.text)) pos = pos if pos >= 0 else 0 if point.tag == 'x': x = pos elif point.tag == 'y': y = pos polygon.append((x,y)) if len(polygon) == 4: polygon_list.append(polygon) if coordinates_only: return np.array(polygon_list) else: #EXTRACT THE RECTIFIED ROOF PATCH FROM THE IMG, RETURN THE ROOF PATCHES assert img_path is not None try: img = cv2.imread(img_path, flags=cv2.IMREAD_COLOR) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) gray_equalized = cv2.equalizeHist(gray) except IOError as e: print e sys.exit(-1) roof_patches = list() for i, polygon in enumerate(polygon_list): roof_patches.append(utils.four_point_transform(gray_equalized, np.array(polygon, dtype = "float32"))) return roof_patches
def extract_patches(polygon_list, img_path=None, grayscale=False): ''' Extract polygons from the image and array of patches ''' assert img_path is not None try: img = cv2.imread(img_path, flags=cv2.IMREAD_COLOR) assert img is not None if grayscale: img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) img = cv2.equalizeHist(img) except IOError as e: print e sys.exit(-1) patches = list() for i, polygon in enumerate(polygon_list): patches.append(utils.four_point_transform(img, np.array(polygon, dtype = "float32"))) return patches
def save_training_FP_and_TP_helper(self, num_patches, img_name, detections, patches_path, general_path, img, roof_type, extraction_type, color, rects=False): #this is where we write the detections we're extraction. One image per roof type #we save: 1. the patches and 2. the image with marks of what the detections are, along with the true roofs (for debugging) img_debug = np.copy(img) if roof_type == 'background': utils.draw_detections(self.correct_roofs['metal'][img_name], img_debug, color=(0, 0, 0), thickness=2, rects=rects) utils.draw_detections(self.correct_roofs['thatch'][img_name], img_debug, color=(0, 0, 0), thickness=2, rects=rects) else: utils.draw_detections(self.correct_roofs[roof_type][img_name], img_debug, color=(0, 0, 0), thickness=2, rects=rects) for i, detection in enumerate(detections): batch_path = 'batch{}/'.format(int(num_patches/20000)) if num_patches % 20000 == 0: utils.mkdir('{}falsepos/batch{}/'.format(general_path, num_patches/20000)) utils.mkdir('{}truepos/batch{}/'.format(general_path, num_patches/20000)) num_patches += 1 current_patch_path = patches_path+batch_path #extract the patch, rotate it to a horizontal orientation, save it if rects == False: bitmap = np.zeros((img.shape[:2]), dtype=np.uint8) padded_detection = utils.add_padding_polygon(detection, bitmap) warped_patch = utils.four_point_transform(img, padded_detection) cv2.imwrite('{0}{1}_{2}_roof{3}.jpg'.format(current_patch_path, roof_type, img_name[:-4], i), warped_patch) #mark where roofs where taken out from for debugging utils.draw_polygon(padded_detection, img_debug, fill=False, color=color, thickness=2, number=i) else: pad = 10 xmin = (detection.xmin-pad) if (detection.xmin-pad)>0 else detection.xmin ymin = (detection.ymin-pad) if (detection.ymin-pad)>0 else detection.ymin xmax = (detection.xmax+pad) if (detection.xmax+pad)<img.shape[1] else detection.xmax ymax = (detection.ymax+pad) if (detection.ymax+pad)<img.shape[0] else detection.ymax patch = img[ymin:ymax, xmin:xmax, :] #print 'saving {0}{1}_{2}_roof{3}.jpg'.format(current_patch_path, roof_type, img_name[:-4], i) cv2.imwrite('{0}{1}_{2}_roof{3}.jpg'.format(current_patch_path, roof_type, img_name[:-4], i), patch) self.TOTAL += 1 if self.TOTAL % 1000 == 0: print 'Saved {} patches'.format(self.TOTAL) return num_patches
def extract_patches(polygon_list, img_path=None, grayscale=False): ''' Extract polygons from the image and array of patches ''' assert img_path is not None try: img = cv2.imread(img_path, flags=cv2.IMREAD_COLOR) assert img is not None if grayscale: img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) img = cv2.equalizeHist(img) except IOError as e: print e sys.exit(-1) patches = list() for i, polygon in enumerate(polygon_list): patches.append( utils.four_point_transform(img, np.array(polygon, dtype="float32"))) return patches
def save_training_FP_and_TP_helper(img_name,evaluation, detections, patches_path, general_path, img, roof_type, extraction_type, color): #this is where we write the detections we're extraction. One image per roof type #we save: 1. the patches and 2. the image with marks of what the detections are, along with the true roofs (for debugging) img_debug = np.copy(img) if roof_type == 'background': utils.draw_detections(evaluation.correct_roofs['metal'][img_name], img_debug, color=(0, 0, 0), thickness=2) utils.draw_detections(evaluation.correct_roofs['thatch'][img_name], img_debug, color=(0, 0, 0), thickness=2) else: utils.draw_detections(evaluation.correct_roofs[roof_type][img_name], img_debug, color=(0, 0, 0), thickness=2) for i, detection in enumerate(detections): #extract the patch, rotate it to a horizontal orientation, save it bitmap = np.zeros((img.shape[:2]), dtype=np.uint8) padded_detection = utils.add_padding_polygon(detection, bitmap) warped_patch = utils.four_point_transform(img, padded_detection) cv2.imwrite('{0}{1}_{2}_roof{3}.jpg'.format(patches_path, roof_type, img_name[:-4], i), warped_patch) #mark where roofs where taken out from for debugging utils.draw_polygon(padded_detection, img_debug, fill=False, color=color, thickness=2, number=i) #write this type of extraction and the roofs to an image cv2.imwrite('{0}{1}_{2}_extract_{3}.jpg'.format(general_path, img_name[:-4], roof_type, extraction_type), img_debug)
def scan(self, image_path): RESCALED_HEIGHT = 500.0 OUTPUT_DIR = './imgs/output' # load the image and compute the ratio of the old height # to the new height, clone it, and resize it image = cv2.imread(image_path) assert (image is not None) ratio = image.shape[0] / RESCALED_HEIGHT orig = image.copy() rescaled_image = utils.resize(image, height=int(RESCALED_HEIGHT)) # get the contour of the document screenCnt = self.get_contour(rescaled_image) # apply the perspective transformation warped = utils.four_point_transform(orig, screenCnt * ratio) # convert the warped image to grayscale gray = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY) # sharpen image sharpen = cv2.GaussianBlur(gray, (0, 0), 3) # sharpen = cv2.addWeighted(gray, 1.5, sharpen, -0.5, 0) sharpen = cv2.addWeighted(gray, 1.5, gray, -0.5, 0) # # # apply adaptive threshold to get black and white effect # thresh = cv2.adaptiveThreshold(sharpen, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 21, 15) # save the transformed image basename = os.path.basename(image_path) # cv2.imwrite(OUTPUT_DIR + '/' + basename, thresh) cv2.imwrite(OUTPUT_DIR + '/' + basename, sharpen) print("Proccessed " + basename)
def crop_image(image, list_coordinate): """ Crop character in lp image """ list_character = [] lp_image = imutils.resize(image, width=600) for bbox in list_coordinate: if bbox[0][0] == bbox[0][1] == bbox[1][0] == bbox[1][1]: break pts = np.array([(bbox[0][0], bbox[0][1]), (bbox[1][0], bbox[1][1]), (bbox[2][0], bbox[2][1]), (bbox[3][0], bbox[3][1])], dtype="float32") warped = four_point_transform(lp_image, pts) # _,warped = cv2.threshold(cv2.cvtColor(warped,cv2.COLOR_BGR2GRAY),0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU) # warped = cv2.resize(warped,(12,28)) warped = resize_and_pad(warped, (28, 28), padColor=255) warped = warped / 255.0 # warped = warped[..., None] list_character.append(warped) return list_character
if os.path.isfile(new + image): continue if not os.path.isfile(dir + image + ".csv"): #if no gt, make one create(dir + image) with open(dir + image + ".csv", 'r') as csvfile: for i in range(4): line = csvfile.readline().split(" ") coordinates[i][0] = float(line[0].strip()) coordinates[i][1] = float(line[1].strip()) background = cv2.imread(back_source + back) img = cv2.imread(dir + image, cv2.IMREAD_UNCHANGED) #orientation correction #perspective correction in document warped, M = four_point_transform(img, coordinates) #array to image merged, points = merge(background, warped, M) cv2.imwrite(new + image, merged) #binary im = cv2.cvtColor(background, cv2.COLOR_RGB2GRAY) #(thresh, im) = cv2.threshold(im_gray, 10, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU) #max kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (11, 11), (-1, -1)) maxed = cv2.dilate(im, kernel) comp = cv2.compare(im, maxed, cmpop=cv2.CMP_EQ) im = cv2.multiply(im, comp)
cnts = imutils.grab_contours(cnts) cnts = sorted(cnts, key=cv2.contourArea, reverse=True) # loop over the contours max_contour = cnts[0] for c in cnts: peri = cv2.arcLength(max_contour, True) approx = cv2.approxPolyDP(max_contour, 0.02 * peri, True) screenCnt = approx if len(approx) == 4: screenCnt = approx break warped = four_point_transform( orig, screenCnt.reshape(max(list(screenCnt.shape)), 2) * ratio) # convert the warped image to grayscale, then threshold it # to give it that 'black and white' paper effect warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY) ret3, th3 = cv2.threshold(warped, 140, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) cv2.imwrite('for_tess.jpg', th3) image = Image.open('for_tess.jpg') text = pytesseract.image_to_string(th3, lang="rus") with open('text.txt', 'w') as text_file:
#This segment of the code works on the board segment of the frame contours,hierarchy = cv2.findContours(gray,cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE) # find the biggest area cnt = contours[0] max_area = cv2.contourArea(cnt) for cont in contours: if cv2.contourArea(cont) > max_area: cnt = cont max_area = cv2.contourArea(cont) epsilon = 0.01*cv2.arcLength(cnt,True) poly_approx = cv2.approxPolyDP(cnt, epsilon, True) board_segment = four_point_transform(gray,poly_approx) #Applying Gaussian Blurring to the image dst = cv2.GaussianBlur(board_segment,(1,1),cv2.BORDER_DEFAULT) #Applying Inverse Binary Threshold to the image ret,thresh_inv = cv2.threshold(dst, 180, 255,cv2.THRESH_BINARY_INV) #Applying Probabilistic Hough Transform on the Binary Image minLineLength = 100 maxLineGap = 60 lines = cv2.HoughLinesP(thresh_inv,1,np.pi/180,100,minLineLength=100,maxLineGap=10) for l in lines: x1,y1,x2,y2 = l[0] cv2.line(board_segment,(x1,y1),(x2,y2),(0,255,0),2, cv2.LINE_AA)
for contour in cnts: peri = cv2.arcLength(contour, True) approx = cv2.approxPolyDP(contour, 0.02 * peri, True) print(approx) if len(approx) >= 4: screenCnt = approx break else: print("cannot scan image reliably") exit(-1) cv2.drawContours(image, [screenCnt], -1, (255, 0, 0), 2) cv2.imshow("Outline", image) cv2.waitKey(0) warped = four_point_transform(orig, screenCnt.reshape(4, 2) * ratio) # convert the warped image to grayscale, then threshold it # to give it that 'black and white' paper effect warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY) T = threshold_local(warped, 11, offset=10, method="gaussian") warped = (warped > T).astype("uint8") * 255 cv2.imshow("Original", imutils.resize(orig, height=650)) cv2.imshow("Scanned", imutils.resize(warped, height=650)) cv2.waitKey(0) cv2.destroyAllWindows()
def save_training_FP_and_TP_helper(self, num_patches, img_name, detections, patches_path, general_path, img, roof_type, extraction_type, color, rects=False): #this is where we write the detections we're extraction. One image per roof type #we save: 1. the patches and 2. the image with marks of what the detections are, along with the true roofs (for debugging) img_debug = np.copy(img) if roof_type == 'background': utils.draw_detections(self.correct_roofs['metal'][img_name], img_debug, color=(0, 0, 0), thickness=2, rects=rects) utils.draw_detections(self.correct_roofs['thatch'][img_name], img_debug, color=(0, 0, 0), thickness=2, rects=rects) else: utils.draw_detections(self.correct_roofs[roof_type][img_name], img_debug, color=(0, 0, 0), thickness=2, rects=rects) for i, detection in enumerate(detections): batch_path = 'batch{}/'.format(int(num_patches / 20000)) if num_patches % 20000 == 0: utils.mkdir('{}falsepos/batch{}/'.format( general_path, num_patches / 20000)) utils.mkdir('{}truepos/batch{}/'.format( general_path, num_patches / 20000)) num_patches += 1 current_patch_path = patches_path + batch_path #extract the patch, rotate it to a horizontal orientation, save it if rects == False: bitmap = np.zeros((img.shape[:2]), dtype=np.uint8) padded_detection = utils.add_padding_polygon(detection, bitmap) warped_patch = utils.four_point_transform( img, padded_detection) cv2.imwrite( '{0}{1}_{2}_roof{3}.jpg'.format(current_patch_path, roof_type, img_name[:-4], i), warped_patch) #mark where roofs where taken out from for debugging utils.draw_polygon(padded_detection, img_debug, fill=False, color=color, thickness=2, number=i) else: pad = 10 xmin = (detection.xmin - pad) if (detection.xmin - pad) > 0 else detection.xmin ymin = (detection.ymin - pad) if (detection.ymin - pad) > 0 else detection.ymin xmax = (detection.xmax + pad) if (detection.xmax + pad) < img.shape[1] else detection.xmax ymax = (detection.ymax + pad) if (detection.ymax + pad) < img.shape[0] else detection.ymax patch = img[ymin:ymax, xmin:xmax, :] #print 'saving {0}{1}_{2}_roof{3}.jpg'.format(current_patch_path, roof_type, img_name[:-4], i) cv2.imwrite( '{0}{1}_{2}_roof{3}.jpg'.format(current_patch_path, roof_type, img_name[:-4], i), patch) self.TOTAL += 1 if self.TOTAL % 1000 == 0: print 'Saved {} patches'.format(self.TOTAL) return num_patches