def FindExtrinsicCameraParams(imagepoints, objectpoints, KK): """ Use OpenCV to solve for the affine transformation that matches imagepoints to object points imagepoints - 2xN array objectpoints - 3xN array KK - 3x3 array or 4 element array """ imagepoints = array(imagepoints,float) objectpoints = array(objectpoints,float) if len(KK.shape) == 1: cvKK = cv.CreateMat(3,3,cv.CV_32FC1) cvKK[0,0] = KK[0]; cvKK[0,1] = 0; cvKK[0,2] = KK[2]; cvKK[1,0] = 0; cvKK[1,1] = KK[1]; cvKK[1,2] = KK[3]; cvKK[2,0] = 0; cvKK[2,1] = 0; cvKK[2,2] = 1; else: cvKK = cv.fromarray(KK) cvDist = cv.CreateMat(4,1,cv.CV_32FC1) cvDist[0,0] = 0; cvDist[1,0] = 0; cvDist[2,0] = 0; cvDist[3,0] = 0; rvec = cv.CreateMat(3,1,cv.CV_32FC1) tvec = cv.CreateMat(3,1,cv.CV_32FC1) object_points = cv.CreateMatHeader(3,objectpoints.shape[0],cv.CV_32FC1) cv.SetData(object_points,struct.pack('f'*(objectpoints.shape[0]*3),*transpose(objectpoints).flat),4*objectpoints.shape[0]) image_points = cv.CreateMatHeader(2,imagepoints.shape[0],cv.CV_32FC1) cv.SetData(image_points,struct.pack('f'*(imagepoints.shape[0]*2),*transpose(imagepoints).flat),4*imagepoints.shape[0]) cv.FindExtrinsicCameraParams2(object_points,image_points,cvKK,cvDist,rvec,tvec) T = matrixFromAxisAngle((rvec[0,0],rvec[1,0],rvec[2,0])) T[0:3,3] = [tvec[0,0],tvec[1,0],tvec[2,0]] return T
def __init__(self, type, points3d, descriptors): if len(descriptors) < 1: raise detection_error('no descriptor found') self.type = type self.points3d = points3d self.desckdtree = pyANN.KDTree(descriptors) self.besterr_thresh = 0.001 self.cvKK = cv.fromarray(eye(3)) self.cvDist = cv.fromarray(zeros((4, 1))) self.rvec = cv.CreateMat(3, 1, cv.CV_32FC1) self.tvec = cv.CreateMat(3, 1, cv.CV_32FC1) self.ninitial = 4 self.object_points = cv.CreateMatHeader(3, self.ninitial, cv.CV_32FC1) self.image_points = cv.CreateMatHeader(2, self.ninitial, cv.CV_32FC1)
def imgmsg_to_cv(self, img_msg, desired_encoding="passthrough"): """ Convert a sensor_msgs::Image message to an OpenCV :ctype:`IplImage`. :param img_msg: A sensor_msgs::Image message :param desired_encoding: The encoding of the image data, one of the following strings: * ``"passthrough"`` * one of the standard strings in sensor_msgs/image_encodings.h :rtype: :ctype:`IplImage` :raises CvBridgeError: when conversion is not possible. If desired_encoding is ``"passthrough"``, then the returned image has the same format as img_msg. Otherwise desired_encoding must be one of the standard image encodings This function returns an OpenCV :ctype:`IplImage` message on success, or raises :exc:`cv_bridge.CvBridgeError` on failure. """ source_type = self.encoding_as_cvtype(img_msg.encoding) im = cv.CreateMatHeader(img_msg.height, img_msg.width, source_type) cv.SetData(im, img_msg.data, img_msg.step) if desired_encoding == "passthrough": return im from cv_bridge.boost.cv_bridge_boost import cvtColor try: res = cvtColor(im, img_msg.encoding, desired_encoding) except RuntimeError as e: raise CvBridgeError(e) return res
def create_image(self, buffer, create_alpha=True): self.extension = self.extension or '.tif' self.no_data_value = None # FIXME: opencv doesn't support gifs, even worse, the library # segfaults when trying to decoding a gif. An exception is a # less drastic measure. try: if FORMATS[self.extension] == 'GIF': raise ValueError("opencv doesn't support gifs") except KeyError: pass if FORMATS[self.extension] == 'TIFF': self.buffer = buffer img0 = self.read_tiff(buffer, create_alpha) else: imagefiledata = cv.CreateMatHeader(1, len(buffer), cv.CV_8UC1) cv.SetData(imagefiledata, buffer, len(buffer)) img0 = cv.DecodeImageM(imagefiledata, cv.CV_LOAD_IMAGE_UNCHANGED) if FORMATS[self.extension] == 'JPEG': try: info = JpegFile.fromString(buffer).get_exif() if info: self.exif = info.data self.exif_marker = info.marker except Exception: pass return img0
def set_image_buffer(self, image_buffer): buffer_len = len(image_buffer) imagefiledata = cv.CreateMatHeader(1, buffer_len, cv.CV_8UC1) cv.SetData(imagefiledata, image_buffer, buffer_len) self.image = cv.DecodeImage(imagefiledata, cv.CV_LOAD_IMAGE_COLOR) self.size = cv.GetSize(self.image) self.mode = "BGR"
def get_sample(filename, iscolor=cv.CV_LOAD_IMAGE_COLOR): image_cache = {} if not filename in image_cache: #filedata = filename imagefiledata = cv.CreateMatHeader(1, len(filedata), cv.CV_8UC1) cv.SetData(imagefiledata, filedata, len(filedata)) image_cache[filename] = cv.DecodeImageM(imagefiledata, iscolor) return image_cache[filename]
def CVtoPIL_4Channel(CV_img): """converts CV image to PIL image""" cv_img = cv.CreateMatHeader( cv.GetSize(img)[1], cv.GetSize(img)[0], cv.CV_8UC1) #cv.SetData(cv_img, pil_img.tostring()) pil_img = Image.fromstring("L", cv.GetSize(img), img.tostring()) return pil_img
def handle_image(self, msg): if self.counter < self.bg_num: cv_image = self.bridge.imgmsg_to_cv(msg, "bgr8") self.numpy_bgs.append(numpy.asarray(cv_image)) self.counter += 1 return if not self.have_ave_bg: self.have_ave_bg = True rospy.loginfo('Collected samples for background image averaging') height = self.numpy_bgs[0].shape[0] width = self.numpy_bgs[0].shape[1] n_channels = self.numpy_bgs[0].shape[2] depth = cv.IPL_DEPTH_8U size = self.numpy_bgs[0].size rospy.loginfo( 'height = %d, width = %d, n_channels = %d, depth = %d, size = %d' % (height, width, n_channels, depth, size)) cov_mat = cv.CreateMat(3, 3, cv.CV_32FC1) ave_arr = cv.CreateMat(1, 3, cv.CV_32FC1) # for each pixel in the image for i in xrange(0, size, 3): vects = [] # for each image that we sampled for img in self.numpy_bgs: mat = cv.CreateMatHeader(1, 3, cv.CV_8UC1) cv.SetData(mat, img.take([i, i + 1, i + 2]), 3) vects.append(mat) cv.CalcCovarMatrix(vects, cov_mat, ave_arr, cv.CV_COVAR_NORMAL) self.determinants.append(cv.Det(cov_mat)) self.covariances.extend( numpy.asarray(cov_mat, dtype=numpy.uint8).ravel()) ave_np = numpy.asarray(ave_arr, dtype=numpy.uint8).ravel() self.averages.extend(ave_np) sdb = sdg = sdr = 0.0 for img in self.numpy_bgs: (b, g, r) = img.take([i, i + 1, i + 2]) sdb += pow(b - ave_np[0], 2.0) sdg += pow(g - ave_np[1], 2.0) sdr += pow(r - ave_np[2], 2.0) self.std_devs.append(math.sqrt(sdb / (self.bg_num - 1.0))) self.std_devs.append(math.sqrt(sdg / (self.bg_num - 1.0))) self.std_devs.append(math.sqrt(sdr / (self.bg_num - 1.0))) ave_numpy = numpy.array(self.averages, dtype=numpy.uint8) self.ave_img = cv.CreateImageHeader((width, height), cv.IPL_DEPTH_8U, 3) cv.SetData(self.ave_img, ave_numpy, width * 3) #cv.ShowImage('win1', self.ave_img) #cv.ShowImage('win1', self.numpy_bgs[0]) #cv.ShowImage('win2', cv.fromarray(self.numpy_bgs[0])) #cv.WaitKey(1000) rospy.loginfo('Computed average background image from samples')
def createImage(self, image_data, width, height, depth, color_code, jpeg=False): if jpeg: length = len(image_data) image = cv.CreateMatHeader(1, length, cv.CV_8UC1) cv.SetData(image, image_data, length) return cv.DecodeImage(image) else: image = cv.CreateImageHeader((width, height), depth, 4) cv.SetData(image, image_data)
def imgmsg_to_cv(img_msg, desired_encoding = "passthrough"): try: return bridge.imgmsg_to_cv(img_msg, desired_encoding) except: cv2_im = bridge.imgmsg_to_cv2(img_msg, desired_encoding) img_msg = bridge.cv2_to_imgmsg(cv2_im) source_type = encoding_as_cvtype(img_msg.encoding) im = cv.CreateMatHeader(img_msg.height, img_msg.width, source_type) cv.SetData(im, img_msg.data, img_msg.step) return im
def gaussiannoise(im, mean=0.0, std=15.0): """ Applies Gaussian noise to the image. This models sensor noise found in cheap cameras in low light etc. **Parameters:** * im - (cvArr) - The source image. * mean (float) - The mean value of the Gaussian distribution. * std (float) - The standard deviation of the Gaussian distribution. Larger standard deviation means more noise. **Returns:** The noisy image. .. note:: This function takes a while to run on large images. .. todo:: * Argument for blue amplification to model bad sensors? * Use numpy to speed things up? .. seealso:: :func:`saltandpepper()` """ # The first version below takes around 0.4s less time to run on my computer than the version beneath it on a colour image that is about 600x800. # But I still don't like it... # Want to change this to make it quicker still and nicer to read. # Numpy would make this really quick but don't want it be a dependancy. # Also it's tricky to add the blue amplification using this method. dst = create(im) if im.channels == 3: data = array.array('d', [ random.gauss(mean, std) for i in xrange(im.width * im.height * 3) ]) noise = cv.CreateMatHeader(im.height, im.width, cv.CV_64FC3) cv.SetData(noise, data, cv.CV_AUTOSTEP) else: data = array.array( 'd', [random.gauss(mean, std) for i in xrange(im.width * im.height)]) noise = cv.CreateMatHeader(im.height, im.width, cv.CV_64FC1) cv.SetData(noise, data, cv.CV_AUTOSTEP) cv.Add(im, noise, dst) return dst
def create_image(self, buffer): # FIXME: opencv doesn't support gifs, even worse, the library # segfaults when trying to decoding a gif. An exception is a # less drastic measure. if FORMATS[self.extension] == 'GIF': raise ValueError("opencv doesn't support gifs") imagefiledata = cv.CreateMatHeader(1, len(buffer), cv.CV_8UC1) cv.SetData(imagefiledata, buffer, len(buffer)) img0 = cv.DecodeImage(imagefiledata, cv.CV_LOAD_IMAGE_COLOR) return img0
def loadTemplates(self): u'''テンプレート画像の読み込み''' self._templates = [] for i, cvimageinfo in enumerate(config.template.images): cvmat = cv.CreateMatHeader(cvimageinfo.rows, cvimageinfo.cols, cvimageinfo.type) cv.SetData(cvmat, cvimageinfo.data) self._templates.append(A( image = cv.GetImage(cvmat), number = i, result = None, ))
def init(self): if not config.normalize.points or len(config.normalize.points) < 4: self._label = tk.Label(self, text=u'まだ正規化が済んでいません。\n正規化を行ってください。') self._label.pack() return if not config.template.images: config.template.images = [None for i in xrange(10)] # カメラの準備 self._camera = cv.CaptureFromCAM(config.camera.id) # カメラ画像表示用Canvasなどの準備 self._cvmat = None self._image = tk.PhotoImage(width=config.canvas.width, height=config.canvas.height) self._canvas = tk.Canvas(self, width=config.canvas.width, height=config.canvas.height) self._canvas.create_image(config.canvas.width / 2, config.canvas.height / 2, image=self._image, tags='image') self._canvas.pack(expand=1, fill=tk.BOTH) self._canvas.tag_bind('image', '<ButtonPress-1>', self.mouseDown) self._canvas.tag_bind('image', '<B1-Motion>', self.mouseDrag) self._canvas.tag_bind('image', '<ButtonRelease-1>', self.mouseUp) # ボタン self._buttons = [] for i in xrange(10): command = (lambda id: lambda: self.fixation(id))(i) button = tk.Button(self, text=u'%d' % i, command=command) button.pack(side=tk.LEFT) self._buttons.append(button) # ボタン画像をセーブデータから復元する cvimageinfo = config.template.images[i] if cvimageinfo: cvmat = cv.CreateMatHeader(cvimageinfo.rows, cvimageinfo.cols, cvimageinfo.type) cv.SetData(cvmat, cvimageinfo.data) self.setButtonImage(i, cvmat) self.allButtonEnable(False) # マウス座標の情報 self._mouse_down = None self._mouse_up = None # 画像をフィルタするための変数 self._clip_rect, self._perspective_points = Points2Rect( config.normalize.points) # カメラ画像の更新を1秒間隔にする self.addTiming(self.showImage, 1)
def scanPic(self, uri): mr = MediaResource(graph, uri) jpg, mtime = mr.getImageAndMtime(1000) mat = cv.CreateMatHeader(1, len(jpg), cv.CV_8UC1) cv.SetData(mat, jpg, len(jpg)) img = cv.DecodeImage(mat) grayscale = cv.CreateImage((img.width, img.height), 8, 1) cv.CvtColor(img, grayscale, cv.CV_RGB2GRAY) cv.EqualizeHist(grayscale, grayscale) storage = cv.CreateMemStorage(0) faces = cv.HaarDetectObjects( grayscale, self.cascade, storage, 1.2, # scaleFactor between scans 3, # minNeighbors cv.CV_HAAR_DO_CANNY_PRUNING, (20, 20) # min window size ) size = cv.GetSize(grayscale) for f, neighbors in faces: desc = { 'source': str(uri), 'types': [PHO.Crop], 'tag': 'face', 'x1': f[0] / size[0], 'y1': f[1] / size[1], 'x2': (f[0] + f[2]) / size[0], 'y2': (f[1] + f[3]) / size[1], # this ought to have a padded version for showing, and # also the face coords inside that padded version, for # recognition. Note that the padded one may run into # the margins 'neighbors': neighbors, } alt = restkit.Resource( uri.replace('http://photo.bigasterisk.com/', 'http://bang:8031/') + "/alt") resp = alt.post(payload=json.dumps(desc), headers={ 'content-type': 'application/json', 'x-foaf-agent': 'http://bigasterisk.com/tool/scanFace' }) print resp.status, resp.body_string()
def load_sample(name=None): if len(argv) > 1: img0 = cv.LoadImage(argv[1], cv.CV_LOAD_IMAGE_COLOR) elif name is not None: try: img0 = cv.LoadImage(name, cv.CV_LOAD_IMAGE_COLOR) except IOError: urlbase = 'https://code.ros.org/svn/opencv/trunk/opencv/samples/c/' file = name.split('/')[-1] filedata = urllib2.urlopen(urlbase+file).read() imagefiledata = cv.CreateMatHeader(1, len(filedata), cv.CV_8UC1) cv.SetData(imagefiledata, filedata, len(filedata)) img0 = cv.DecodeImage(imagefiledata, cv.CV_LOAD_IMAGE_COLOR) return img0
def array2cv(a): dtype2depth = { 'uint8': cv.CV_8UC1, 'int8': cv.CV_8UC1, 'uint16': cv.CV_16UC1, 'int16': cv.IPL_DEPTH_16S, 'int32': cv.IPL_DEPTH_32S, 'float32': cv.IPL_DEPTH_32F, 'float64': cv.IPL_DEPTH_64F, } cv_im = cv.CreateMatHeader(a.shape[0], a.shape[1], dtype2depth[str(a.dtype)]) cv.SetData(cv_im, a.tostring()) return cv_im
def fit(self, data): m = mean(data, 0) diff = data - tile(m, (data.shape[0], 1)) area0 = abs(linalg.det(dot(transpose(diff[:, 0:2]), diff[:, 0:2]))) if area0 < 0.00001: # check if point area is large enough #print 'degenerate 2d data %f'%area0 return None # have to compute if the 3d points are collinear or not eigvalues = linalg.eigvalsh(dot(transpose(diff[:, 2:5]), diff[:, 2:5])) if sum(abs(eigvalues) <= 1e-9 ) >= 2: # check if point area is large enough #print 'degenerate 3d points',eigvalues return None if data.shape[0] == self.ninitial: object_points = self.object_points image_points = self.image_points else: object_points = cv.CreateMatHeader(3, data.shape[0], cv.CV_32FC1) image_points = cv.CreateMatHeader(2, data.shape[0], cv.CV_32FC1) cv.SetData( object_points, struct.pack('f' * (data.shape[0] * 3), *transpose(data[:, 2:5]).flat), 4 * data.shape[0]) cv.SetData( image_points, struct.pack('f' * (data.shape[0] * 2), *transpose(data[:, 0:2]).flat), 4 * data.shape[0]) cv.FindExtrinsicCameraParams2(object_points, image_points, self.cvKK, self.cvDist, self.rvec, self.tvec) #cv.FindExtrinsicCameraParams2(cv.fromarray(data[:,2:5]),cv.fromarray(data[:,0:2]),self.cvKK,self.cvDist,self.rvec,self.tvec) T = matrixFromAxisAngle( (self.rvec[0, 0], self.rvec[1, 0], self.rvec[2, 0])) T[0:3, 3] = [self.tvec[0, 0], self.tvec[1, 0], self.tvec[2, 0]] # make sure that texture faces towards the image (ie, z axis has negative z component) if T[2, 2] < 0: return None return T
def array2cvmat(a): dtype2type = { 'uint8': cv.CV_8UC1, 'int8': cv.CV_8SC1, 'uint16': cv.CV_16UC1, 'int16': cv.CV_16SC1, 'int32': cv.CV_32SC1, 'float32': cv.CV_32FC1, 'float64': cv.CV_64FC1 } #create matrix headers rows = a.shape[0] cols = a.shape[1] type = dtype2type[str(a.dtype)] cvmat = cv.CreateMatHeader(rows, cols, type) #set data cv.SetData(cvmat, a.tostring(), a.dtype.itemsize * a.shape[1]) return cvmat
def onBtnSaveImageClicked( self, widget ): if self.lastImage != None: # Convert to an OpenCV image cvImage = cv.CreateMatHeader( self.lastImage.height, self.lastImage.width, cv.CV_8UC3 ) cv.SetData( cvImage, self.lastImage.data, self.lastImage.step ) # Convert to BGR as OpenCV likes it cv.CvtColor( cvImage, cvImage, cv.CV_RGB2BGR ) # Find a name for the image nameFormatString = "/home/abroun/abroun-ros-pkg/gaffa_apps/object_detector/test_data/saliency/maplin_{0}.png" imageIdx = 0 nameFound = False while not nameFound: imageName = nameFormatString.format( imageIdx ) if not os.path.exists( imageName ): nameFound = True else: imageIdx += 1 # Save the image cv.SaveImage( imageName, cvImage );
def imgmsg_to_cv(self, img_msg, desired_encoding = "passthrough"): source_type = self.encoding_as_cvtype(img_msg.encoding) im = cv.CreateMatHeader(img_msg.height, img_msg.width, source_type) cv.SetData(im, img_msg.data, img_msg.step) if desired_encoding == "passthrough": return im # Might need to do a conversion. sourcefmt and destfmt can be # one of GRAY, RGB, BGR, RGBA, BGRA. sourcefmt = self.encoding_as_fmt(img_msg.encoding) destfmt = self.encoding_as_fmt(desired_encoding) destination_type = self.encoding_as_cvtype(desired_encoding) if sourcefmt == destfmt and source_type == destination_type: return im cvtim = cv.CreateMat(img_msg.height, img_msg.width, self.encoding_as_cvtype(desired_encoding)) if sourcefmt == destfmt: cv.ConvertScale(im, cvtim) else: cv.CvtColor(im, cvtim, eval("cv.CV_%s2%s" % (sourcefmt, destfmt))) return cvtim
try: # the image generators image_generator = onipy.OpenNIImageGenerator() g_context.FindExistingNode(onipy.XN_NODE_TYPE_IMAGE, image_generator) depth_generator = onipy.OpenNIDepthGenerator() g_context.FindExistingNode(onipy.XN_NODE_TYPE_DEPTH, depth_generator) width = depth_generator.XRes() height = depth_generator.YRes() # align the images depth_generator.set_viewpoint(image_generator) # matrix headers and matrices for computation buffers current_image_frame = cv.CreateImageHeader(image_generator.Res(), cv.IPL_DEPTH_8U, 3) current_depth_frame = cv.CreateMatHeader(height, width, cv.CV_16UC1) for_thresh = cv.CreateMat(height, width, cv.CV_32FC1) min_thresh = cv.CreateMat(height, width, cv.CV_8UC1) max_thresh = cv.CreateMat(height, width, cv.CV_8UC1) and_thresh = cv.CreateMat(height, width, cv.CV_8UC1) gray = cv.CreateMat(height, width, cv.CV_8UC1) obj_draw = np.zeros((height, width)) cont_draw = np.zeros((height, width)) # create some matrices for drawing hist_img = cv.CreateMat(hist_height, width, cv.CV_8UC3) out = cv.CreateMat(height + hist_height, width, cv.CV_8UC3) contours = cv.CreateMat(height, width, cv.CV_8UC3) print 'load object db and create flann index ..' db = pickle.load(open('../data/pickled.db'))
def create_image(self, buffer): imagefiledata = cv.CreateMatHeader(1, len(buffer), cv.CV_8UC1) cv.SetData(imagefiledata, buffer, len(buffer)) img0 = cv.DecodeImage(imagefiledata, cv.CV_LOAD_IMAGE_COLOR) return img0
cv.Copy(q3, tmp) cv.Copy(q1, q3) cv.Copy(tmp, q1) cv.Copy(q4, tmp) cv.Copy(q2, q4) cv.Copy(tmp, q2) if __name__ == "__main__": if len(sys.argv) > 1: im = cv.LoadImage(sys.argv[1], cv.CV_LOAD_IMAGE_GRAYSCALE) else: url = 'https://code.ros.org/svn/opencv/trunk/opencv/samples/c/baboon.jpg' filedata = urllib2.urlopen(url).read() imagefiledata = cv.CreateMatHeader(1, len(filedata), cv.CV_8UC1) cv.SetData(imagefiledata, filedata, len(filedata)) im = cv.DecodeImageM(imagefiledata, cv.CV_LOAD_IMAGE_GRAYSCALE) realInput = cv.CreateImage(cv.GetSize(im), cv.IPL_DEPTH_64F, 1) imaginaryInput = cv.CreateImage(cv.GetSize(im), cv.IPL_DEPTH_64F, 1) complexInput = cv.CreateImage(cv.GetSize(im), cv.IPL_DEPTH_64F, 2) cv.Scale(im, realInput, 1.0, 0.0) cv.Zero(imaginaryInput) cv.Merge(realInput, imaginaryInput, None, None, complexInput) dft_M = cv.GetOptimalDFTSize(im.height - 1) dft_N = cv.GetOptimalDFTSize(im.width - 1) dft_A = cv.CreateMat(dft_M, dft_N, cv.CV_64FC2)
def processBag( self, bag ): FLIP_IMAGE = bool( self.options.frameFlip == "True" ) USING_OPTICAL_FLOW_FOR_MOTION = False print "frameFlip = ", FLIP_IMAGE bagFrameIdx = 0 frameIdx = 0 impactFrameIdx = None # Setup filters opticalFlowFilter = OpticalFlowFilter( self.OPTICAL_FLOW_BLOCK_WIDTH, self.OPTICAL_FLOW_BLOCK_HEIGHT, self.OPTICAL_FLOW_RANGE_WIDTH, self.OPTICAL_FLOW_RANGE_HEIGHT ) motionDetectionFilter = MotionDetectionFilter() imageFlowFilter = ImageFlowFilter() residualSaliencyFilter = ResidualSaliencyFilter() # Process bag file for topic, msg, t in bag.read_messages(): if self.workCancelled: # We've been given the signal to quit break if msg._type == "sensor_msgs/Image": bagFrameIdx += 1 if (bagFrameIdx-1)%self.PROCESSED_FRAME_DIFF != 0: continue print "Processing image", frameIdx # Get input image image = cv.CreateMatHeader( msg.height, msg.width, cv.CV_8UC3 ) cv.SetData( image, msg.data, msg.step ) if FLIP_IMAGE: cv.Flip( image, None, 1 ) # Convert to grayscale grayImage = cv.CreateMat( msg.height, msg.width, cv.CV_8UC1 ) cv.CvtColor( image, grayImage, cv.CV_BGR2GRAY ) grayImageNumpPy = np.array( grayImage ) # Calculate optical flow opticalFlowArrayX, opticalFlowArrayY = \ opticalFlowFilter.calcOpticalFlow( grayImage ) # Detect motion if USING_OPTICAL_FLOW_FOR_MOTION: if frameIdx == 0: motionImage = PyVarFlowLib.createMotionMask( grayImageNumpPy, grayImageNumpPy ) else: motionImage = PyVarFlowLib.createMotionMask( np.array( self.grayScaleImageList[ frameIdx - 1 ] ), grayImageNumpPy ) else: motionImage = motionDetectionFilter.calcMotion( grayImage ) # Work out the left most point in the image where motion appears motionTest = np.copy( motionImage ) cv.Erode( motionTest, motionTest ) if frameIdx == 0: leftMostMotion = motionImage.shape[ 1 ] else: leftMostMotion = self.leftMostMotionList[ frameIdx - 1 ] leftMostMotionDiff = 0 for i in range( leftMostMotion ): if motionTest[ :, i ].max() > 0: leftMostMotionDiff = abs( leftMostMotion - i ) leftMostMotion = i break segmentationMask = np.zeros( ( msg.height, msg.width ), dtype=np.uint8 ) FRAMES_BACK = 3 if impactFrameIdx == None: if leftMostMotionDiff > 18 and leftMostMotion < 0.75*msg.width: # Found impact frame impactFrameIdx = frameIdx else: PROCESS_IMPACT = False if PROCESS_IMPACT and frameIdx - impactFrameIdx == FRAMES_BACK: # Should now have enough info to segment object impactMotionImage = self.motionImageList[ impactFrameIdx ] print "Aligning" postImpactRealFarFlow = imageFlowFilter.calcImageFlow( impactMotionImage, motionImage ) print "Aligning" postImpactFarFlow = imageFlowFilter.calcImageFlow( impactMotionImage, self.motionImageList[ impactFrameIdx + 2 ] ) print "Aligning" postImpactNearFlow = imageFlowFilter.calcImageFlow( impactMotionImage, self.motionImageList[ impactFrameIdx + 1 ] ) segmentationMask = np.maximum( np.maximum( np.maximum( impactMotionImage, postImpactNearFlow[ 3 ] ), postImpactFarFlow[ 3 ] ), postImpactRealFarFlow[ 3 ] ) cv.Dilate( segmentationMask, segmentationMask ) print "Aligning" preImpactRealFarFlow = imageFlowFilter.calcImageFlow( impactMotionImage, self.motionImageList[ impactFrameIdx - 8 ] ) print "Aligning" preImpactFarFlow = imageFlowFilter.calcImageFlow( impactMotionImage, self.motionImageList[ impactFrameIdx - 6 ] ) print "Aligning" preImpactNearFlow = imageFlowFilter.calcImageFlow( impactMotionImage, self.motionImageList[ impactFrameIdx - 4 ] ) subMask = np.maximum( np.maximum( preImpactRealFarFlow[ 3 ], preImpactFarFlow[ 3 ] ), preImpactNearFlow[ 3 ] ) cv.Erode( subMask, subMask ) cv.Dilate( subMask, subMask ) cv.Dilate( subMask, subMask ) cv.Dilate( subMask, subMask ) subMask[ subMask > 0 ] = 255 diffImage = segmentationMask.astype( np.int32 ) - subMask.astype( np.int32 ) diffImage[ diffImage < 0 ] = 0 diffImage = diffImage.astype( np.uint8 ) cv.Erode( diffImage, diffImage ) #diffImage[ diffImage > 0 ] = 255 #segmentationMask = subMask segmentationMask = diffImage #segmentationMask = np.where( diffImage > 128, 255, 0 ).astype( np.uint8 ) # Calculate image flow #imageFlow = imageFlowFilter.calcImageFlow( motionImage ) ## Calculate saliency map #saliencyMap, largeSaliencyMap = residualSaliencyFilter.calcSaliencyMap( grayImageNumpPy ) #blobMap = np.where( largeSaliencyMap > 128, 255, 0 ).astype( np.uint8 ) #blobMap, numBlobs = PyBlobLib.labelBlobs( blobMap ) #print "found", numBlobs, "blobs" #largeSaliencyMap = np.where( largeSaliencyMap > 128, 255, 0 ).astype( np.uint8 ) # Threshold the saliency map #largeSaliencyMap = (largeSaliencyMap > 128).astype(np.uint8) * 255 #cv.AdaptiveThreshold( largeSaliencyMap, largeSaliencyMap, 255 ) # Detect clusters within the saliency map #NUM_CLUSTERS = 5 #numSamples = np.sum( saliencyMap ) #sampleList = np.ndarray( ( numSamples, 2 ), dtype=np.float32 ) #sampleListIdx = 0 #for y in range( saliencyMap.shape[ 0 ] ): #for x in range( saliencyMap.shape[ 1 ] ): #numNewSamples = saliencyMap[ y, x ] #if numNewSamples > 0: #sampleList[ sampleListIdx:sampleListIdx+numNewSamples, 0 ] = x #sampleList[ sampleListIdx:sampleListIdx+numNewSamples, 1 ] = y #sampleListIdx += numNewSamples #sampleList[ 0:numSamples/2 ] = ( 20, 20 ) #sampleList[ numSamples/2: ] = ( 200, 200 ) #labelList = np.ndarray( ( numSamples, 1 ), dtype=np.int32 ) #cv.KMeans2( sampleList, NUM_CLUSTERS, labelList, #(cv.CV_TERMCRIT_ITER | cv.CV_TERMCRIT_EPS, 10, 0.01) ) #saliencyScaleX = float( largeSaliencyMap.shape[ 1 ] ) / saliencyMap.shape[ 1 ] #saliencyScaleY = float( largeSaliencyMap.shape[ 0 ] ) / saliencyMap.shape[ 0 ] clusterList = [] #for clusterIdx in range( NUM_CLUSTERS ): #clusterSamples = sampleList[ #np.where( labelList == clusterIdx )[ 0 ], : ] #if clusterSamples.size <= 0: #mean = ( 0.0, 0.0 ) #stdDev = 0.0 #else: #mean = clusterSamples.mean( axis=0 ) #mean = ( mean[ 0 ]*saliencyScaleX, mean[ 1 ]*saliencyScaleY ) #stdDev = clusterSamples.std()*saliencyScaleX #clusterList.append( ( mean, stdDev ) ) # Work out the maximum amount of motion we've seen in a single frame so far #motionCount = motionImage[ motionImage > 0 ].size #if frameIdx == 0: #lastMotionCount = 0 #else: #lastMotionCount = self.maxMotionCounts[ frameIdx - 1 ] #if motionCount < lastMotionCount: #motionCount = lastMotionCount ## Work out diffImage #diffImage = np.array( motionImage, dtype=np.int32 ) \ #- np.array( imageFlow[ 3 ], dtype=np.int32 ) #diffImage = np.array( np.maximum( diffImage, 0 ), dtype=np.uint8 ) # Segment the image #workingMask = np.copy( motionImage ) #workingMask = np.copy( diffImage ) workingMask = np.copy( segmentationMask ) kernel = cv.CreateStructuringElementEx( cols=3, rows=3, anchorX=1, anchorY=1, shape=cv.CV_SHAPE_CROSS ) cv.Erode( workingMask, workingMask, kernel ) cv.Dilate( workingMask, workingMask ) extraExtraMask = np.copy( workingMask ) cv.Dilate( extraExtraMask, extraExtraMask ) cv.Dilate( extraExtraMask, extraExtraMask ) cv.Dilate( extraExtraMask, extraExtraMask ) cv.Dilate( extraExtraMask, extraExtraMask ) cv.Dilate( extraExtraMask, extraExtraMask ) cv.Dilate( extraExtraMask, extraExtraMask ) allMask = np.copy( extraExtraMask ) cv.Dilate( allMask, allMask ) cv.Dilate( allMask, allMask ) cv.Dilate( allMask, allMask ) cv.Dilate( allMask, allMask ) cv.Dilate( allMask, allMask ) cv.Dilate( allMask, allMask ) possibleForeground = workingMask > 0 if workingMask[ possibleForeground ].size >= 100 \ and frameIdx >= 16: print "Msk size", workingMask[ possibleForeground ].size print workingMask[ 0, 0:10 ] fgModel = cv.CreateMat( 1, 5*13, cv.CV_64FC1 ) bgModel = cv.CreateMat( 1, 5*13, cv.CV_64FC1 ) #workingMask[ possibleForeground ] = self.GC_FGD #workingMask[ possibleForeground == False ] = self.GC_PR_BGD #workingMask[ : ] = self.GC_PR_BGD #workingMask[ possibleForeground ] = self.GC_FGD workingMask[ : ] = self.GC_BGD workingMask[ allMask > 0 ] = self.GC_PR_BGD workingMask[ extraExtraMask > 0 ] = self.GC_PR_FGD workingMask[ possibleForeground ] = self.GC_FGD if frameIdx == 16: # Save mask maskCopy = np.copy( workingMask ) maskCopy[ maskCopy == self.GC_BGD ] = 0 maskCopy[ maskCopy == self.GC_PR_BGD ] = 64 maskCopy[ maskCopy == self.GC_PR_FGD ] = 128 maskCopy[ maskCopy == self.GC_FGD ] = 255 print "Unused pixels", \ maskCopy[ (maskCopy != 255) & (maskCopy != 0) ].size outputImage = cv.CreateMat( msg.height, msg.width, cv.CV_8UC3 ) cv.CvtColor( maskCopy, outputImage, cv.CV_GRAY2BGR ) cv.SaveImage( "output.png", image ); cv.SaveImage( "outputMask.png", outputImage ); print "Saved images" #return #print "Set Msk size", workingMask[ workingMask == self.GC_PR_FGD ].size imageToSegment = image #self.inputImageList[ frameIdx - FRAMES_BACK ] imageCopy = np.copy( imageToSegment ) cv.CvtColor( imageCopy, imageCopy, cv.CV_BGR2RGB ) print "Start seg" cv.GrabCut( imageCopy, workingMask, (0,0,0,0), fgModel, bgModel, 12, self.GC_INIT_WITH_MASK ) print "Finish seg" segmentation = np.copy( imageToSegment ) segmentation[ (workingMask != self.GC_PR_FGD) & (workingMask != self.GC_FGD) ] = 0 black = (workingMask != self.GC_PR_FGD) & (workingMask != self.GC_FGD) #motionImage = np.where( black, 0, 255 ).astype( np.uint8 ) # Refine the segmentation REFINE_SEG = False if REFINE_SEG: motionImageCopy = np.copy( motionImage ) cv.Erode( motionImageCopy, motionImageCopy ) #cv.Erode( motionImageCopy, motionImageCopy ) #cv.Erode( motionImageCopy, motionImageCopy ) workingMask[ motionImageCopy > 0 ] = self.GC_PR_FGD workingMask[ motionImageCopy == 0 ] = self.GC_PR_BGD cv.Dilate( motionImageCopy, motionImageCopy ) cv.Dilate( motionImageCopy, motionImageCopy ) cv.Dilate( motionImageCopy, motionImageCopy ) cv.Dilate( motionImageCopy, motionImageCopy ) workingMask[ motionImageCopy == 0 ] = self.GC_BGD print "Other seg" cv.GrabCut( imageCopy, workingMask, (0,0,0,0), fgModel, bgModel, 12, self.GC_INIT_WITH_MASK ) print "Other seg done" segmentation = np.copy( imageToSegment ) segmentation[ (workingMask != self.GC_PR_FGD) & (workingMask != self.GC_FGD) ] = 0 black = (workingMask != self.GC_PR_FGD) & (workingMask != self.GC_FGD) motionImage = np.where( black, 0, 255 ).astype( np.uint8 ) else: segmentation = np.zeros( ( image.height, image.width ), dtype=np.uint8 ) # Save output data self.inputImageList[ frameIdx ] = image self.grayScaleImageList[ frameIdx ] = grayImage self.opticalFlowListX[ frameIdx ] = opticalFlowArrayX self.opticalFlowListY[ frameIdx ] = opticalFlowArrayY self.motionImageList[ frameIdx ] = motionImage self.segmentationList[ frameIdx ] = segmentation self.segmentationMaskList[ frameIdx ] = segmentationMask #self.maxMotionCounts[ frameIdx ] = motionCount #self.imageFlowList[ frameIdx ] = imageFlow #self.saliencyMapList[ frameIdx ] = largeSaliencyMap #self.saliencyClusterList[ frameIdx ] = clusterList self.leftMostMotionList[ frameIdx ] = leftMostMotion frameIdx += 1 self.numFramesProcessed += 1 if not self.workCancelled: SAVE_MOTION_IMAGES = True BASE_MOTION_IMAGE_NAME = self.scriptPath + "/../../test_data/motion_images/motion_{0:03}.png" if SAVE_MOTION_IMAGES and len( self.motionImageList ) > 0: width = self.motionImageList[ 0 ].shape[ 1 ] height = self.motionImageList[ 0 ].shape[ 0 ] colourImage = np.zeros( ( height, width, 3 ), dtype=np.uint8 ) for frameIdx, motionImage in enumerate( self.motionImageList ): colourImage[ :, :, 0 ] = motionImage colourImage[ :, :, 1 ] = motionImage colourImage[ :, :, 2 ] = motionImage outputName = BASE_MOTION_IMAGE_NAME.format( frameIdx + 1 ) cv.SaveImage( outputName, colourImage ) # Recalculate impactFrameIdx width = self.motionImageList[ 0 ].shape[ 1 ] totalMotionDiff = 0 maxMotionDiff = 0 impactFrameIdx = None for motionIdx in range( 1, len( self.leftMostMotionList ) ): motionDiff = abs( self.leftMostMotionList[ motionIdx ] \ - self.leftMostMotionList[ motionIdx - 1 ] ) totalMotionDiff += motionDiff if motionDiff > maxMotionDiff and totalMotionDiff > 0.5*width: maxMotionDiff = motionDiff impactFrameIdx = motionIdx if maxMotionDiff <= 18: impactFrameIdx = None if impactFrameIdx != None: preMotionImages = [] postMotionImages = [] impactMotionImage = None NUM_FRAMES_BEFORE = 3 prefix = self.options.outputPrefix if prefix != "": prefix += "_" BASE_MOTION_IMAGE_NAME = self.scriptPath + "/../../test_data/impact_images/" + prefix + "motion_{0:03}.png" START_MOTION_IMAGE_NAME = self.scriptPath + "/../../test_data/impact_images/" + prefix + "start_motion.png" START_IMAGE_NAME = self.scriptPath + "/../../test_data/impact_images/" + prefix + "start.png" IMPACT_IMAGE_NAME = self.scriptPath + "/../../test_data/impact_images/" + prefix + "impact.png" SEGMENTATION_IMAGE_NAME = self.scriptPath + "/../../test_data/impact_images/" + prefix + "segmentation.png" NUM_FRAMES_AFTER = 3 width = self.motionImageList[ 0 ].shape[ 1 ] height = self.motionImageList[ 0 ].shape[ 0 ] colourImage = np.zeros( ( height, width, 3 ), dtype=np.uint8 ) for frameIdx in range( impactFrameIdx - NUM_FRAMES_BEFORE, impactFrameIdx + NUM_FRAMES_AFTER + 1 ): motionImage = self.motionImageList[ frameIdx ] if frameIdx < impactFrameIdx: preMotionImages.append( motionImage ) elif frameIdx == impactFrameIdx: impactMotionImage = motionImage else: # frameIdx > impactFrameIdx postMotionImages.append( motionImage ) colourImage[ :, :, 0 ] = motionImage colourImage[ :, :, 1 ] = motionImage colourImage[ :, :, 2 ] = motionImage outputName = BASE_MOTION_IMAGE_NAME.format( frameIdx - impactFrameIdx ) cv.SaveImage( outputName, colourImage ) motionDetectionFilter.calcMotion( self.grayScaleImageList[ 0 ] ) startMotionImage = motionDetectionFilter.calcMotion( self.grayScaleImageList[ impactFrameIdx ] ) colourImage[ :, :, 0 ] = startMotionImage colourImage[ :, :, 1 ] = startMotionImage colourImage[ :, :, 2 ] = startMotionImage cv.SaveImage( START_MOTION_IMAGE_NAME, colourImage ) cv.CvtColor( self.inputImageList[ 0 ], colourImage, cv.CV_RGB2BGR ) cv.SaveImage( START_IMAGE_NAME, colourImage ) cv.CvtColor( self.inputImageList[ impactFrameIdx ], colourImage, cv.CV_RGB2BGR ) cv.SaveImage( IMPACT_IMAGE_NAME, colourImage ) print "Segmenting..." segmentation = self.produceSegmentation( self.inputImageList[ 0 ], impactMotionImage, preMotionImages, postMotionImages ) cv.CvtColor( segmentation, colourImage, cv.CV_RGB2BGR ) cv.SaveImage( SEGMENTATION_IMAGE_NAME, colourImage ) self.refreshGraphDisplay() print "Finished processing bag file" if bool( self.options.quitAfterFirstSegmentation == "True" ): print "Trying to quit" self.onWinMainDestroy( None ) else: print "Not trying to quit so neeah"
def imgmsg_to_cv(self, img_msg, desired_encoding = "passthrough"): """ Convert a sensor_msgs::Image message to an OpenCV :ctype:`IplImage`. :param img_msg: A sensor_msgs::Image message :param desired_encoding: The encoding of the image data, one of the following strings: * ``"passthrough"`` * ``"rgb8"`` * ``"rgba8"`` * ``"bgr8"`` * ``"bgra8"`` * ``"mono8"`` * ``"mono16"`` :rtype: :ctype:`IplImage` :raises CvBridgeError: when conversion is not possible. If desired_encoding is ``"passthrough"``, then the returned image has the same format as img_msg. Otherwise desired_encoding must be one of the strings "rgb8", "bgr8", "rgba8", "bgra8", "mono8" or "mono16", in which case this method converts the image using :func:`CvtColor` (if necessary) and the returned image has a type as follows: ``CV_8UC3`` for "rgb8", "bgr8" ``CV_8UC4`` for "rgba8", "bgra8" ``CV_8UC1`` for "mono8" ``CV_16UC1`` for "mono16" This function returns an OpenCV :ctype:`IplImage` message on success, or raises :exc:`opencv_latest.cv_bridge.CvBridgeError` on failure. """ source_type = self.encoding_as_cvtype(img_msg.encoding) im = cv.CreateMatHeader(img_msg.height, img_msg.width, source_type) cv.SetData(im, img_msg.data, img_msg.step) if desired_encoding == "passthrough": return im # Might need to do a conversion. sourcefmt and destfmt can be # one of GRAY, RGB, BGR, RGBA, BGRA. sourcefmt = self.encoding_as_fmt(img_msg.encoding) destfmt = self.encoding_as_fmt(desired_encoding) source_type = self.encoding_as_cvtype(img_msg.encoding) destination_type = self.encoding_as_cvtype(desired_encoding) if sourcefmt == destfmt and source_type == destination_type: return im # First want to make sure that source depth matches destination depth if source_type != destination_type: # im2 is the intermediate image. It has the same # channels as source_type, # but the depth of destination_type. # XXX - these macros were missing from OpenCV Python, so roll our own here: CV_CN_SHIFT = 3 def CV_MAKETYPE(depth,cn): return cv.CV_MAT_DEPTH(depth) + ((cn - 1) << CV_CN_SHIFT) im2_type = CV_MAKETYPE(destination_type, cv.CV_MAT_CN(source_type)) im2 = cv.CreateMat(img_msg.height, img_msg.width, im2_type) cv.ConvertScale(im, im2) else: im2 = im if sourcefmt != destfmt: im3 = cv.CreateMat(img_msg.height, img_msg.width, destination_type) cv.CvtColor(im2, im3, eval("cv.CV_%s2%s" % (sourcefmt, destfmt))) else: im3 = im2 return im3
def old_GeneratePerceptualHash(path): # I think what I should be doing here is going cv2.imread( path, flags = cv2.CV_LOAD_IMAGE_GRAYSCALE ) # then efficiently resize thumbnail = GeneratePILImage(path) # convert to 32 x 32 greyscale if thumbnail.mode == 'P': thumbnail = thumbnail.convert( 'RGBA' ) # problem with some P images converting to L without RGBA step in between if thumbnail.mode == 'RGBA': # this is some code i picked up somewhere # another great example of PIL failing; it turns all alpha to pure black on a RGBA->RGB thumbnail.load() canvas = PILImage.new('RGB', thumbnail.size, (255, 255, 255)) canvas.paste(thumbnail, mask=thumbnail.split()[3]) thumbnail = canvas thumbnail = thumbnail.convert('L') thumbnail = thumbnail.resize((32, 32), PILImage.ANTIALIAS) # convert to mat numpy_thumbnail_8 = cv.CreateMatHeader(32, 32, cv.CV_8UC1) cv.SetData(numpy_thumbnail_8, thumbnail.tostring()) numpy_thumbnail_32 = cv.CreateMat(32, 32, cv.CV_32FC1) cv.Convert(numpy_thumbnail_8, numpy_thumbnail_32) # compute dct dct = cv.CreateMat(32, 32, cv.CV_32FC1) cv.DCT(numpy_thumbnail_32, dct, cv.CV_DXT_FORWARD) # take top left 8x8 of dct dct = cv.GetSubRect(dct, (0, 0, 8, 8)) # get mean of dct, excluding [0,0] mask = cv.CreateMat(8, 8, cv.CV_8U) cv.Set(mask, 1) mask[0, 0] = 0 channel_averages = cv.Avg(dct, mask) average = channel_averages[0] # make a monochromatic, 64-bit hash of whether the entry is above or below the mean bytes = [] for i in range(8): byte = 0 for j in range(8): byte <<= 1 # shift byte one left value = dct[i, j] if value > average: byte |= 1 bytes.append(byte) answer = str(bytearray(bytes)) # we good return answer
imageIdx = 0 for topic, msg, t in bag.read_messages(): if msg._type == "sensor_msgs/Image": if msg.encoding == "rgb8" or msg.encoding == "bgr8": if imageIdx > 0: sys.stdout.write("\r") sys.stdout.write("Processing image " + str(imageIdx)) sys.stdout.flush() # Extract the image using OpenCV curImage = cv.CreateMatHeader(msg.height, msg.width, cv.CV_8UC3) cv.SetData(curImage, msg.data, msg.step) npImage = np.array(curImage, dtype=np.uint8) if frameAcc == None: frameAcc = np.zeros((msg.height, msg.width), dtype=np.float32) # Convert the image to grayscale and add it to the accumulator frameAcc = frameAcc \ + 0.299*npImage[ :, :, 0 ].astype( np.float32 ) \ + 0.587*npImage[ :, :, 1 ].astype( np.float32 ) \ + 0.114*npImage[ :, :, 2 ].astype( np.float32 ) imageIdx += 1