def _extract_roi(paths): img_path, roi_path = paths img = openem.Image() status = img.FromFile(img_path) if status != openem.kSuccess: print("Failed to read image {}".format(img_path)) else: roi = openem.Rectify(img, ((x1, y1), (x2, y2))) print("Saving ROI to: {}".format(roi_path)) roi.ToFile(roi_path)
for img in imgs: status = mask_finder.AddImage(img) if not status == openem.kSuccess: raise RuntimeError("Failed to add image for processing!") # Process the loaded images. masks = openem.VectorImage() status = mask_finder.Process(masks) if not status == openem.kSuccess: raise RuntimeError("Failed to process images!") for mask, img in zip(masks, imgs): # Resize the masks back into the same size as the images. mask.Resize(img.Width(), img.Height()) # Check if ruler is present. present = openem.RulerPresent(mask) if not present: print("Could not find ruler in image! Skipping...") continue # Find orientation and region of interest based on the mask. endpoints = openem.RulerEndpoints(mask) r_mask = openem.Rectify(mask, endpoints) roi = openem.FindRoi(r_mask) # Rectify, crop, and display the image. r_img = openem.Rectify(img, endpoints) c_img = openem.Crop(r_img, roi) c_img.Show()
def _find_roi(mask_finder_path, vid_path): """Finds ROI in a video. # Arguments mask_finder_path: Path to find_ruler model file. vid_path: Path to the video. # Returns: Region of interest and ruler endpoints. # Raises: IOError: If video or model file cannot be opened. RuntimeError: If fails to add images or process model. """ # Determined by experimentation with GPU having 8GB memory. max_img = 8 # Create and initialize the mask finder. mask_finder = openem.RulerMaskFinder() status = mask_finder.Init(mask_finder_path) if not status == openem.kSuccess: raise IOError("Failed to initialize mask finder!") # Set up video reader. reader = openem.VideoReader() status = reader.Init(vid_path) if not status == openem.kSuccess: raise IOError("Failed to open video!") # Decode the first 100 frames and take the average mask. mask_avg = np.zeros((reader.Height(), reader.Width())) num_masks = 0 masks = openem.VectorImage() vid_end = False for i in range(math.ceil(100 / max_img)): for j in range(max_img): img = openem.Image() status = reader.GetFrame(img) if not status == openem.kSuccess: vid_end = True break status = mask_finder.AddImage(img) if not status == openem.kSuccess: raise RuntimeError("Failed to add frame to mask finder!") status = mask_finder.Process(masks) if not status == openem.kSuccess: raise RuntimeError("Failed to process mask finder!") for mask in masks: mask.Resize(reader.Width(), reader.Height()) mask_data = mask.DataCopy() mask_data = np.array(mask_data) mask_data = np.reshape(mask_data, (reader.Height(), reader.Width())) mask_avg += mask_data num_masks += 1 if vid_end: break # Convert mean mask from numpy to openem format. mask_vec = mask_avg.copy() mask_vec = mask_vec / np.max(mask_vec) mask_vec = mask_vec * 255.0 mask_vec = mask_vec.reshape(-1).astype(np.uint8).tolist() mask_img = openem.Image() mask_img.FromData(mask_vec, reader.Width(), reader.Height(), 1) # Now that we have the best mask, use this to compute the ROI. endpoints = openem.RulerEndpoints(mask_img) r_mask = openem.Rectify(mask_img, endpoints) roi = openem.FindRoi(r_mask) return (roi, endpoints)
def _detect_and_classify(detect_path, classify_path, vid_path, roi, endpoints): """Finds and classifies detections for all frames in a video. # Arguments detect_path: Path to detect model file. classify_path: Path to classify model file. vid_path: Path to the video. roi: Region of interest output from find_roi. endpoints: Ruler endpoints from find_roi. # Returns Detection rects and classification scores. # Raises IOError: If video or model files cannot be opened. RuntimeError: If unable to add frame or process a model. """ # Determined by experimentation with GPU having 8GB memory. max_img = 32 # Create and initialize the detector. detector = openem.Detector() status = detector.Init(detect_path, 0.5) if not status == openem.kSuccess: raise IOError("Failed to initialize detector!") # Create and initialize the classifier. classifier = openem.Classifier() status = classifier.Init(classify_path, 0.5) if not status == openem.kSuccess: raise IOError("Failed to initialize classifier!") # Initialize the video reader. reader = openem.VideoReader() status = reader.Init(vid_path) if not status == openem.kSuccess: raise IOError("Failed to open video {}!".format(vid_path)) # Iterate through frames. vid_end = False detections = [] scores = [] while True: # Find detections. dets = openem.VectorVectorDetection() imgs = [openem.Image() for _ in range(max_img)] for i, img in enumerate(imgs): status = reader.GetFrame(img) if not status == openem.kSuccess: vid_end = True break img = openem.Rectify(img, endpoints) img = openem.Crop(img, roi) status = detector.AddImage(img) imgs[i] = img if not status == openem.kSuccess: raise RuntimeError("Failed to add frame to detector!") status = detector.Process(dets) if not status == openem.kSuccess: raise RuntimeError("Failed to process detector!") detections += dets # Classify detections for det_frame, img in zip(dets, imgs): score_batch = openem.VectorClassification() for det in det_frame: det_img = openem.GetDetImage(img, det.location) status = classifier.AddImage(det_img) if not status == openem.kSuccess: raise RuntimeError("Failed to add frame to classifier!") status = classifier.Process(score_batch) if not status == openem.kSuccess: raise RuntimeError("Failed to process classifier!") scores.append(score_batch) if vid_end: break return (detections, scores)