def run(args): # assume the image is 4K, crop if its larger img = ColorImage.imread(args.file_name, resize=(3840, 2160)) # step 1: find a bounding box region for the document document_region = detect_document_region(img) # visualize the document region region = img.draw_quadrilateral(document_region) region.imsave('results/region.png') # step 2: find text regions # step 2.1: perform homography to flatten out document # 1700 x 2200 is size of a letter size paper at 200 DPI warped_region = warp_document(img, document_region, 1700, 2200) # visualize the flattened document warped_region.imsave('results/document.png') # extract word regions from document words = extract_word_bounds(warped_region.copy()) # step 3: CRNN word extraction texts = [] for i, word in enumerate(words): # crop each word from the document word_img = word.crop(warped_region.img) # visualize each cropped word word_img.imsave('words/word{}.png'.format(i)) # CRNN text extraction from cropped word text = extract_text(word_img.img) texts.append(text) print(' '.join(texts))
def frames(self, flush=True): """ Returns the latest color image from the stream Raises: Exception if opencv sensor gives ret_val of 0 """ self.flush() ret_val, frame = self._sensor.read() if not ret_val: raise Exception( "Unable to retrieve frame from OpenCVCameraSensor for id {0}". format(self._device_id)) return ColorImage(frame)
def next_frame(self): """ Return next frame from video as ColorImage instance or raise StopIteration if stream is over """ if self.frame is not None and self.me < self.start_time: self.me += 1 return self.frame.clone() hdr = self.player.readline() if hdr != "FRAME\n": if self.frame is not None and self.me < self.end_time: self.me += 1 return self.frame.clone() self.p.terminate() return None # raise StopIteration self.frame = ColorImage(self.width, self.height, fromfile=self.player) self.me += 1 return self.frame.clone()
def frames(self): """Retrieve the next frame from the image directory and convert it to a ColorImage, a DepthImage, and an IrImage. Parameters ---------- skip_registration : bool If True, the registration step is skipped. Returns ------- :obj:`tuple` of :obj:`ColorImage`, :obj:`DepthImage`, :obj:`IrImage`, :obj:`numpy.ndarray` The ColorImage, DepthImage, and IrImage of the current frame. Raises ------ RuntimeError If the Kinect stream is not running or if all images in the directory have been used. """ if not self._running: raise RuntimeError( 'VirtualKinect2 device pointing to %s not runnning. Cannot read frames' % (self._path_to_images)) if self._im_index > self._num_images: raise RuntimeError('VirtualKinect2 device is out of images') # read images color_filename = os.path.join(self._path_to_images, 'color_%d.png' % (self._im_index)) color_im = ColorImage.open(color_filename, frame=self._frame) depth_filename = os.path.join(self._path_to_images, 'depth_%d.npy' % (self._im_index)) depth_im = DepthImage.open(depth_filename, frame=self._frame) ir_filename = os.path.join(self._path_to_images, 'ir_%d.npy' % (self._im_index)) ir_im = IrImage.open(ir_filename, frame=self._frame) self._im_index += 1 return color_im, depth_im, ir_im
def _forward_pass(self, images): """ Forward pass a list of images through the CNN """ # form image array num_images = len(images) if num_images == 0: return None for image in images: if not isinstance(image, Image): new_images = [] for image in images: if len(image.shape) > 2: new_images.append( ColorImage(image, frame='unspecified')) elif image.dtype == np.float32 or image.dtype == np.float64: new_images.append( DepthImage(image, frame='unspecified')) else: raise ValueError('Image type not understood') images = new_images break im_height = images[0].height im_width = images[0].width channels = images[0].channels tensor_channels = 3 image_arr = np.zeros( [num_images, im_height, im_width, tensor_channels]) for j, image in enumerate(images): if channels == 3: image_arr[j, :, :, :] = image.raw_data else: image_arr[j, :, :, :] = np.tile(image.raw_data, [1, 1, 1, 3]) # predict fp_start = time.time() final_blobs = self.cnn_.featurize(image_arr) fp_stop = time.time() logging.debug('Featurization took %f sec per image' % ((fp_stop - fp_start) / len(images))) return final_blobs.reshape(final_blobs.shape[0], -1)
class MPlayer(object): """ Load a video as YUV4MPEG2 frames using mplayer """ def __init__(self, video): self.p = Popen(command + tail + video.path + force, shell=True, bufsize=0, stdout=PIPE) self.player = self.p.stdout hdr = self.player.readline() try: self.width = int(re.search("W(\d+)", hdr).group(1)) self.height = int(re.search("H(\d+)", hdr).group(1)) except: self.width = 640 self.height = 480 self.video = video self.me = 0 self.start_time = self.video.freeze_in_time self.end_time = self.video.freeze_out_time self.frame = None self.fps = 25 def next_frame(self): """ Return next frame from video as ColorImage instance or raise StopIteration if stream is over """ if self.frame is not None and self.me < self.start_time: self.me += 1 return self.frame.clone() hdr = self.player.readline() if hdr != "FRAME\n": if self.frame is not None and self.me < self.end_time: self.me += 1 return self.frame.clone() self.p.terminate() return None # raise StopIteration self.frame = ColorImage(self.width, self.height, fromfile=self.player) self.me += 1 return self.frame.clone()
def _frames_and_index_map(self, skip_registration=False): """Retrieve a new frame from the Kinect and return a ColorImage, DepthImage, IrImage, and a map from depth pixels to color pixel indices. Parameters ---------- skip_registration : bool If True, the registration step is skipped. Returns ------- :obj:`tuple` of :obj:`ColorImage`, :obj:`DepthImage`, :obj:`IrImage`, :obj:`numpy.ndarray` The ColorImage, DepthImage, and IrImage of the current frame, and an ndarray that maps pixels of the depth image to the index of the corresponding pixel in the color image. Raises ------ RuntimeError If the Kinect stream is not running. """ if not self._running: raise RuntimeError( 'Kinect2 device %s not runnning. Cannot read frames' % (self._device_num)) # read frames frames = self._listener.waitForNewFrame() unregistered_color = frames['color'] distorted_depth = frames['depth'] ir = frames['ir'] # apply color to depth registration color_frame = self._color_frame color = unregistered_color depth = distorted_depth color_depth_map = np.zeros([depth.height, depth.width]).astype(np.int32).ravel() if not skip_registration and self._registration_mode == Kinect2RegistrationMode.COLOR_TO_DEPTH: color_frame = self._ir_frame depth = lf2.Frame(depth.width, depth.height, 4, lf2.FrameType.Depth) color = lf2.Frame(depth.width, depth.height, 4, lf2.FrameType.Color) self._registration.apply(unregistered_color, distorted_depth, depth, color, color_depth_map=color_depth_map) # convert to array (copy needed to prevent reference of deleted data color_arr = copy.copy(color.asarray()) color_arr[:, :, [0, 2]] = color_arr[:, :, [2, 0]] # convert BGR to RGB color_arr[:, :, 0] = np.fliplr(color_arr[:, :, 0]) color_arr[:, :, 1] = np.fliplr(color_arr[:, :, 1]) color_arr[:, :, 2] = np.fliplr(color_arr[:, :, 2]) color_arr[:, :, 3] = np.fliplr(color_arr[:, :, 3]) depth_arr = np.fliplr(copy.copy(depth.asarray())) ir_arr = np.fliplr(copy.copy(ir.asarray())) # convert meters if self._depth_mode == Kinect2DepthMode.METERS: depth_arr = depth_arr * MM_TO_METERS # Release and return self._listener.release(frames) return (ColorImage(color_arr[:, :, :3], color_frame), DepthImage(depth_arr, self._ir_frame), IrImage(ir_arr.astype(np.uint16), self._ir_frame), color_depth_map)