def detect_and_rectify(self, detector, image, angle, dest_img_shape, rgb_rotated=None): #do the detection detections = detector.detectMultiScale(image, scaleFactor=self.scale, minNeighbors=self.min_neighbors) ''' print 'were about to save the rotated images, if you dont want this, quit and remove this from like 232 in viola_detector.py' pdb.set_trace() for d in detections: cv2.rectangle(rgb_rotated, (d[0], d[1]), (d[0]+d[2], d[1]+d[3]), (255,255,255), 4) cv2.imwrite('rotated.jpg', rgb_rotated) pdb.set_trace() ''' #convert to proper coordinate system polygons = utils.convert_detections_to_polygons(detections) if angle > 0: #rotate back to original image coordinates print 'rotating...' rectified_detections = utils.rotate_detection_polygons(polygons, image, angle, dest_img_shape, remove_off_img=self.remove_off_img) else: rectified_detections = polygons print 'done rotating' if self.group: bounding_boxes = utils.get_bounding_boxes(np.array(rectified_detections)) else: bounding_boxes = None return rectified_detections, bounding_boxes
def non_max_suppression(self,polygons): #we start with polygons, get the bounding box of it rects = utils.get_bounding_boxes(np.array(polygons)) #covert the bounding box to what's requested by the non_max_suppression boxes = utils.rects2boxes(rects) boxes_suppressed = suppression.non_max_suppression(boxes, overlapThresh=self.overlapThresh) polygons_suppressed = utils.boxes2polygons(boxes_suppressed) return polygons_suppressed
def run(self): ''' 1. Find proposals using ViolaJones 2. Resize the window and classify it 3. Net returns a list of the roof coordinates of each type - saved in roof_coords ''' neural_time = 0 for i, img_name in enumerate(self.img_names): print '***************** Image {0}: {1}/{2} *****************'.format(img_name, i, len(self.img_names)-1) #VIOLA if self.pickle_viola is None: self.viola.detect_roofs(img_name=img_name) #this next line will fail because you dont get the image shape! self.viola.evaluation.score_img(img_name, img_shape[:2]) self.viola.evaluation.save_images(img_name, fname='beforeNeural') current_viola_detections = self.viola.viola_detections viola_time = self.viola.evaluation.detections.total_time else:#use the pickled detections for speed in testing the neural network current_viola_detections = self.viola_evaluation.detections viola_time = self.viola_evaluation.detections.total_time proposal_patches, proposal_coords, img_shape = self.find_viola_proposals(current_viola_detections, img_name=img_name) #NEURALNET with Timer() as t: classified_detections = self.neural_classification(proposal_patches, proposal_coords) #set detections and score for roof_type in utils.ROOF_TYPES: if self.groupThreshold > 0 and roof_type == 'metal': #need to covert to rectangles boxes = utils.get_bounding_boxes(np.array(classified_detections[roof_type])) grouped_boxes, weights = cv2.groupRectangles(np.array(boxes).tolist(), self.groupThreshold) classified_detections[roof_type] = utils.convert_detections_to_polygons(grouped_boxes) #convert back to polygons elif self.groupBounds and roof_type == 'metal': #grouping with the minimal bound of all overlapping rects classified_detections[roof_type] = self.group_min_bound(classified_detections[roof_type], img_shape[:2], erosion=self.erosion) elif self.suppress and roof_type == 'metal': #proper non max suppression from Felzenszwalb et al. classified_detections[roof_type] = self.non_max_suppression(classified_detections[roof_type]) self.detections_after_neural.set_detections(img_name=img_name, roof_type=roof_type, detection_list=classified_detections[roof_type]) neural_time += t.secs self.evaluation_after_neural.score_img(img_name, img_shape[:2], contours=self.groupBounds) self.evaluation_after_neural.save_images(img_name, 'posNeural') if self.pickle_viola is None: self.viola.evaluation.print_report(print_header=True, stage='viola') else: self.viola_evaluation.print_report(print_header=True, stage='viola') self.evaluation_after_neural.detections.total_time = (neural_time) self.evaluation_after_neural.print_report(print_header=False, stage='neural')
def detect_and_rectify(self, detector, image, angle, dest_img_shape, rgb_rotated=None): #do the detection detections = detector.detectMultiScale(image, scaleFactor=self.scale, minNeighbors=self.min_neighbors) ''' print 'were about to save the rotated images, if you dont want this, quit and remove this from like 232 in viola_detector.py' pdb.set_trace() for d in detections: cv2.rectangle(rgb_rotated, (d[0], d[1]), (d[0]+d[2], d[1]+d[3]), (255,255,255), 4) cv2.imwrite('rotated.jpg', rgb_rotated) pdb.set_trace() ''' #convert to proper coordinate system polygons = utils.convert_detections_to_polygons(detections) if angle > 0: #rotate back to original image coordinates print 'rotating...' rectified_detections = utils.rotate_detection_polygons( polygons, image, angle, dest_img_shape, remove_off_img=self.remove_off_img) else: rectified_detections = polygons print 'done rotating' if self.group: bounding_boxes = utils.get_bounding_boxes( np.array(rectified_detections)) else: bounding_boxes = None return rectified_detections, bounding_boxes
def run(self): ''' 1. Sliding window proposals 2. Resize the window and classify it 3. Net returns a list of the roof coordinates of each type - saved in roof_coords ''' neural_time = 0 for i, img_name in enumerate(self.img_names): print '***************** Image {0}: {1}/{2} *****************'.format(img_name, i, len(self.img_names)-1) #NEURALNET with Timer() as t: classified_detections = self.neural_classification(proposal_patches, proposal_coords) #set detections and score for roof_type in utils.ROOF_TYPES: if self.groupThreshold > 0 and roof_type == 'metal': #need to covert to rectangles boxes = utils.get_bounding_boxes(np.array(classified_detections[roof_type])) grouped_boxes, weights = cv2.groupRectangles(np.array(boxes).tolist(), self.groupThreshold) classified_detections[roof_type] = utils.convert_detections_to_polygons(grouped_boxes) #convert back to polygons elif self.groupBounds and roof_type == 'metal': #grouping with the minimal bound of all overlapping rects classified_detections[roof_type] = self.group_min_bound(classified_detections[roof_type], img_shape[:2], erosion=self.erosion) elif self.suppress and roof_type == 'metal': #proper non max suppression from Felzenszwalb et al. classified_detections[roof_type] = self.non_max_suppression(classified_detections[roof_type]) self.detections_after_neural.set_detections(img_name=img_name, roof_type=roof_type, detection_list=classified_detections[roof_type]) neural_time += t.secs self.evaluation.score_img(img_name, img_shape[:2], contours=self.groupBounds) self.evaluation.save_images(img_name, 'posNeural') self.evaluation_after_neural.detections.total_time = (neural_time)
def main(argv: list): # argument check if len(argv) == 1: print("NO VIDEO FILE", flush=True) exit(0) # set videopath video = argv[1] # get file extension extension = utils.get_file_extension(video).lower() # extension check if extension not in SUPPORTED_EXTENSIONS: print("NOT SUPPORTED EXTENSION") exit(0) rotating_angle = 0 # set rotating angle if second parameter given if len(argv) > 2: rotating_angle = int(argv[2]) # construct VideoCapture object cap = cv2.VideoCapture(video) # initializing base white image image_to_save = np.zeros([200, 200, 3], dtype=np.uint8) image_to_save.fill(255) # 4-character code of codec for writing video fourcc = cv2.VideoWriter_fourcc(*'mp4v') datetime_stamp = datetime.datetime.now()\ .strftime("%Y_%m%d_%H%M%S") # temporary video file temp_video = "temp_video.mp4" filename = 'enarvideo' + datetime_stamp + '.mp4' fps = cap.get(cv2.CAP_PROP_FPS) # construct VideoWriter object # needed to make video from cropped enarnumbers out = cv2.VideoWriter(temp_video, fourcc, float(fps), (200, 200)) # initialize array for enardicts dict_array = [] # processing the video while True: # get one frame from VideoCapture object ret, frame = cap.read() # if no more frame, stop processing if not ret: break # getting current_time in sec current_time = int(cap.get(cv2.CAP_PROP_POS_MSEC) / 1000) # this print needed for the wrapper app print(str( round( cap.get(cv2.CAP_PROP_POS_FRAMES) / cap.get(cv2.CAP_PROP_FRAME_COUNT), 3)), flush=True) # append new dict if len(dict_array) == current_time: dict_array.append(dict()) # rotating frame frame = rotate(frame, angle=rotating_angle) # converting image to HSV colorspace hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV) # creating mask for filtering mask = cv2.inRange(hsv, lower, upper) # filtering frame with mask res = cv2.bitwise_and(frame, frame, mask=mask) # converting to grayscale gray = cv2.cvtColor(res, cv2.COLOR_BGR2GRAY) # searching connected components output = cv2.connectedComponentsWithStats(gray, 8) # number of labels nlabels = output[0] # stats of the components (width,height etc.) stats = output[2] modframe = frame.copy() # getting the bounding boxes bboxes = utils.get_bounding_boxes(nlabels, stats) # iterating over the bounding boxes for bbox in bboxes: # drawing rectangle to displayed frame cv2.rectangle(modframe, (bbox[0][0], bbox[0][1]), (bbox[1][0], bbox[1][1]), (0, 0, 255), 1) # cropping image crop_img = utils.crop_image(frame, bbox) # getting string from image ocr_string = utils.image_to_string(crop_img, "digits", 1, 6) # check if (ocr_string): # saving cropped image to write out later image_to_save = cv2.resize(crop_img, (200, 200)) # splitting str splitted_str = ocr_string.split("\n") # ocr_string = 'Recognized:\n' + ocr_string + '\nLength: ' + str(len(ocr_string)) + ' \n END' # iterating over the lines for _str in splitted_str: # if only one word in the line and longer then 3 if len(_str.split(' ')) == 1 and len(_str) > 3: # adding to dict if _str[:4] not in dict_array[current_time]: dict_array[current_time][_str[:4]] = 1 else: dict_array[current_time][ _str[:4]] = dict_array[current_time].get( _str[:4]) + 1 # if only one word in the line and longer then 4 if len(_str.split(' ')) == 1 and len(_str) > 4: # adding to dict if _str[:5] not in dict_array[current_time]: dict_array[current_time][_str[:5]] = 1 else: dict_array[current_time][ _str[:5]] = dict_array[current_time].get( _str[:5]) + 1 out.write(image_to_save) # if current_time > 3: # md = utils.merge_dicts(dict_array[current_time], dict_array[current_time - 1], dict_array[current_time - 2], # dict_array[current_time - 3], dict_array[current_time - 4]) # enar5 = utils.get_enar_from_dict(md) # # if enar5 != "": # cv2.putText(modframe, "Enar: " + enar5, (40, 200), cv2.FONT_HERSHEY_SIMPLEX, 1, # (200, 200, 255)) cv2.imshow('video', resize(modframe, 1200, 1200)) k = cv2.waitKey(5) & 0xFF if k == 27: break cv2.destroyAllWindows() cap.release() out.release() # writing to file # checking size if len(dict_array) > 1: # opening file to write with open("enar" + datetime_stamp + ".txt", "w") as f: i = 0 # iterating over the array for _ in dict_array: i = i + 1 # merging dicts # and getting enarnumber if i == 1: m = utils.merge_dicts(dict_array[i], dict_array[i - 1]) #print(m) enar5 = utils.get_enar_from_dict(m) #print(enar4, enar5) elif i == len(dict_array): m = utils.merge_dicts(dict_array[i - 1], dict_array[i - 2]) #print(m) enar5 = utils.get_enar_from_dict(m) #print(enar4, enar5) else: m = utils.merge_dicts(dict_array[i], dict_array[i - 1], dict_array[i - 2]) #print(m) enar5 = utils.get_enar_from_dict(m) #print(enar4, enar5) # writing time and recognized enar to file f.write(str(i) + " " + (enar5 or "") + "\n") # converting video with ffmpeg command = "ffmpeg -i {} {}\n".format(temp_video, filename) try: # running command output = subprocess.check_output(command, stderr=subprocess.STDOUT, shell=True) except subprocess.CalledProcessError as e: print('FAIL:\ncmd:{}\noutput:{}'.format(e.cmd, e.output)) # and deleting temporary video file delete_command = "del" if platform.system() == "Windows" else "rm" subprocess.call("{} {}".format(delete_command, temp_video), shell=True)
det_tic = time.time() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() # Saving predicted boxes for evaluation for i in range(0, batch_size): index = step * batch_size + i allBoundingBoxes = get_bounding_boxes( rois[i].unsqueeze(0), cls_prob[i].unsqueeze(0), bbox_pred[i].unsqueeze(0), im_info[i].unsqueeze(0), allBoundingBoxes, index) misc_toc = time.time() nms_time = misc_toc - misc_tic sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(step + 1, num_images, detect_time, nms_time)) sys.stdout.flush() # Evaluation print('Evaluating detections') metricsPerClass = evaluator.GetPascalVOCMetrics(allBoundingBoxes, IOUThreshold=0.5) pdb.set_trace() for mc in metricsPerClass: