''' Created on Oct 7, 2011 @author: bolme ''' import pyvision as pv import scipy as sp if __name__ == '__main__': im = pv.Image("baboon.jpg") mat = im.asMatrix2D() U, D, Vt = sp.linalg.svd(mat) D = sp.diag(D) for dim in [256, 128, 64, 32, 16, 8, 4]: U = U[:, :dim] D = D[:dim, :dim] Vt = Vt[:dim, :] mat = sp.dot(sp.dot(U, D), Vt) pv.Image(mat).show(delay=0)
# the imagery and debugging algorithms. Unless otherwise specified, # ImageLogs are usually created in the directory "/tmp". ilog = pv.ImageLog() # Timers keep a record of the time required for algorithms to execute # and help determine runtimes and can determine which parts of algorithms # are to slow and need optimization. timer = pv.Timer() # The filename for the baboon image filename = os.path.join(pv.__path__[0], 'data', 'misc', 'baboon.jpg') # If a string is passed a to the initializer it will assume that is a # path and will read the image from that file. The image is usually read # from disk using PIL and then stored as a PIL image. im = pv.Image(filename) # This command saves the image to an image log which provides good # information for debugging. It is often helpful to save many images # during a processing to make sure that each step is producing the # intended result. ilog(im, "OriginalImage") # The PIL tool box supports man image processing and graphics functions. # Typically PIL is used for things like reading in image files and # rendering graphics on top of images for annotations. It tends to # be slower than OpenCV and also lacks many of the more specialized # computer vision algorithms. # pv.Image objects are responsible for converting between image types. # This next call returns an image in PIL format that can be used with
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import os.path from Image import composite,LINEAR import pyvision as pv from pyvision.edge.sobel import sobel #from pyvision.edge.canny import canny from pyvision.point.DetectorSURF import DetectorSURF import cv if __name__ == '__main__': ilog = pv.ImageLog() source_name = os.path.join(pv.__path__[0],'data','misc','p5240019.jpg') #Load source image and resize to smaller scale im = pv.Image(source_name) print "Size before affine scale: %s"%str(im.size) im = pv.AffineScale(0.25,(320,240)).transformImage(im) print "Size after scaling: %s"%str(im.size) ilog.log(im, 'Input') #im.show(window='Input', pos=(0,0)) #Generate edge image using sobel edge detector edges = sobel(im, 1, 0 , 3, 0) ilog.log(edges, 'Edges') #edges.show(window='Edges', pos=(360,0)) #Generate threshold mask, shows numpy integration mat = im.asMatrix2D() high = mat > 180 low = mat < 50
def _composite(self, img, pos, imgNum): """ Internal method to composite the thumbnail of a given image into the correct position, given by (row,col). @param img: The image from which a thumbnail will be composited onto the montage @param pos: A tuple (row,col) for the position in the montage layout @param imgNum: The image index of the tile being drawn, this helps us display the appropriate label in the lower left corner if self._labels is not None. """ (row, col) = pos if self._keep_aspect: # Get the current size w, h = img.size # Find the scale scale = min(1.0 * self._tileSize[0] / w, 1.0 * self._tileSize[1] / h) w = int(scale * w) h = int(scale * h) # Resize preserving aspect img2 = img.resize((w, h)).asPIL() # Create a new image with the old image centered x = (self._tileSize[0] - w) / 2 y = (self._tileSize[1] - h) / 2 pil = PIL.Image.new('RGB', self._tileSize, "#000000") pil.paste(img2, (x, y, x + w, y + h)) # Generate the tile tile = pv.Image(pil) else: tile = img.resize(self._tileSize) pos_x = col * (self._tileSize[0] + self._gutter) + self._gutter + self._xpad pos_y = row * (self._tileSize[1] + self._gutter) + self._gutter + self._ypad cvImg = self._cvMontageImage cvTile = tile.asOpenCV() cv.SetImageROI(cvImg, (pos_x, pos_y, self._tileSize[0], self._tileSize[1])) # Save the position of this image self._image_positions.append( [self._images[imgNum], imgNum, pv.Rect(pos_x, pos_y, self._tileSize[0], self._tileSize[1])]) depth = cvTile.nChannels if depth == 1: cvTileBGR = cv.CreateImage(self._tileSize, cv.IPL_DEPTH_8U, 3) cv.CvtColor(cvTile, cvTileBGR, cv.CV_GRAY2BGR) cv.Copy(cvTileBGR, cvImg) #should respect the ROI else: cv.Copy(cvTile, cvImg) #should respect the ROI if self._labels == 'index': #draw image number in lower left corner, respective to ROI lbltext = "%d" % imgNum elif type(self._labels) == list: lbltext = str(self._labels[imgNum]) else: lbltext = None if not lbltext is None: ((tw, th), _) = cv.GetTextSize(lbltext, self._txtfont) #print "DEBUG: tw, th = %d,%d"%(tw,th) if tw > 0 and th > 0: cv.Rectangle(cvImg, (0, self._tileSize[1] - 1), (tw + 1, self._tileSize[1] - (th + 1) - self._gutter), (0, 0, 0), thickness=cv.CV_FILLED) font = self._txtfont color = self._txtcolor cv.PutText(cvImg, lbltext, (1, self._tileSize[1] - self._gutter - 2), font, color) if self._highlighted and (imgNum in self._selected_tiles): #draw a highlight around this image cv.Rectangle(cvImg, (0, 0), (self._tileSize[0], self._tileSize[1]), (0, 255, 255), thickness=4) #reset ROI cv.SetImageROI(cvImg, (0, 0, self._size[0], self._size[1]))
def PhaseCorrelation(tile1, tile2, phase_only=True, ilog=None): ''' Uses phase correlation to estimate the best integer displacement to align the images. Also fits a quadradic to the correltaion surface to determine a sub pixel estimate of displacement. Returns four values as a tuple: max_corr, max_displacement, est_corr, est_displacement max_corr - maximum correlation value. max_displacement - displacement needed to obtain the maximum correlation. (full pixel) est_corr - estimated corelation value if subpixel displacement is used. est_displacement - estimated displacement (subpixel) see http://en.wikipedia.org/wiki/Phase_correlation ''' if isinstance(tile1, pv.Image): tile1 = tile1.asMatrix2D() else: tile1 = pv.Image(tile1).asMatrix2D() raise TypeError("Please pass in a numpy array or a pyvision image.") if isinstance(tile2, pv.Image): tile2 = tile2.asMatrix2D() else: tile2 = pv.Image(tile2).asMatrix2D() if tile1.shape != tile2.shape: raise ValueError( "Image tiles must have the same shape. [tile1 %s] != [tile2 %s]" % (tile1.shape, tile2.shape)) # copy the data tile1 = tile1.copy() tile2 = tile2.copy() # normalize the image tiles tile1 = pv.meanUnit(tile1) tile2 = pv.meanUnit(tile2) # compute the fft Ia = np.fft.fft2(tile1) Ib = np.conjugate(np.fft.fft2(tile2)) # build the normalized cross-power spectrum ncs = Ia * Ib if phase_only: ncs = ncs / np.abs(ncs) # build the power spectrum pc = np.real(np.fft.ifft2(ncs)) if ilog != None: ilog.log(pv.Image(tile1), label="Tile1") ilog.log(pv.Image(tile2), label="Tile2") ilog.log(pv.Image(np.fft.fftshift(pc)), label="Correlation") max_corr = pc.max() max_elem = (pc == max_corr).nonzero() max_elem = max_elem[0][0], max_elem[1][0] max_point = list(max_elem) if max_elem[0] * 2 > tile1.shape[0]: max_point[0] = -tile1.shape[0] + max_elem[0] if max_elem[1] * 2 > tile1.shape[1]: max_point[1] = -tile1.shape[1] + max_elem[1] est_corr, est_point = QuadradicEstimation(pc, max_point) return max_corr, max_point, est_corr, est_point
def setUp(self): fname = os.path.join(pv.__path__[0], 'data', 'nonface', 'NONFACE_13.jpg') self.test_image = pv.Image(fname)
def asImage(self): w, h = self.size fw = self.left + w + self.right fh = self.top + h + self.bottom pil = PIL.Image.new("RGB", (fw, fh), 'white') drawLabel(pil, (0.5 * fw, 0.5 * self.top), self.title, align='center', size='huge') drawLabel(pil, (5, self.top + 0.5 * h), self.ylabel, align='right', size='large', rotate=True) drawLabel(pil, (self.left + 0.5 * w, fh - 5), self.xlabel, align='above', size='large', rotate=False) tmp_image = PIL.Image.new("RGB", (w, h), 'white') bounds = self.range() for each in self.graphics: each.draw(self, tmp_image, bounds) pil.paste(tmp_image, (self.left, self.top)) draw = PIL.ImageDraw.Draw(pil) draw.rectangle((self.left, self.top, self.left + w, self.top + h), outline='black', fill=None) del draw # Draw the X axis labels at, labels = self.xAxis() # Tickmarks draw = PIL.ImageDraw.Draw(pil) for i in range(len(at)): x = self.left + self.x(at[i], bounds) y = self.top + h draw.line((x, y, x, y + 5), fill='black') del draw # Labels for i in range(len(at)): x = self.left + self.x(at[i], bounds) y = self.top + h drawLabel(pil, (x, y + 5), labels[i], size='large', align='bellow', rotate=False, color='black') # Draw the Y axis labels at, labels = self.yAxis() # Tickmarks draw = PIL.ImageDraw.Draw(pil) for i in range(len(at)): x = self.left y = self.top + self.y(at[i], bounds) draw.line((x, y, x - 5, y), fill='black') del draw # Labels for i in range(len(at)): x = self.left y = self.top + self.y(at[i], bounds) drawLabel(pil, (x - 5, y), labels[i], size='large', align='left', rotate=True, color='black') return pv.Image(pil)
def crop(self, rect, size=None, interpolation=None, return_affine=False): ''' Crops an image to the given rectangle. Rectangle parameters are rounded to nearest integer values. High quality resampling. The default behavior is to use cv.GetSubRect to crop the image. This returns a slice the OpenCV image so modifying the resulting image data will also modify the data in this image. If a size is provide a new OpenCV image is created for that size and cv.Resize is used to copy the image data. If the bounds of the rectangle are outside the image, an affine transform (pv.AffineFromRect) is used to produce the croped image to properly handle regions outside the image. In this case the downsampling quality may not be as good. @param rect: a Rectangle defining the region to be cropped. @param size: a new size for the returned image. If None the result is not resized. @param interpolation: None = Autoselect or one of CV_INTER_AREA, CV_INTER_NN, CV_INTER_LINEAR, CV_INTER_BICUBIC @param return_affine: If True, also return an affine transform that can be used to transform points. @returns: a cropped version of the image or if return affine a tuple of (image,affine) @rtype: pv.Image ''' # Notes: pv.Rect(0,0,w,h) should return the entire image. Since pixel values # are indexed by zero this means that upper limits are not inclusive: x from [0,w) # and y from [0,h) x, y, w, h = rect.asTuple() x = int(np.round(x)) y = int(np.round(y)) w = int(np.round(w)) h = int(np.round(h)) if x < 0 or y < 0 or x + w > self.size[0] or y + h > self.size[1]: if size == None: size = (w, h) #print size affine = pv.AffineFromRect(pv.Rect(x, y, w, h), size) im = affine(self) if return_affine: return im, affine else: return im cvim = self.asOpenCV() subim = cv.GetSubRect(cvim, (x, y, w, h)) affine = pv.AffineTranslate(-x, -y, (w, h)) if size == None: size = (w, h) # if return_affine: # return pv.Image(subim),affine # else: # return pv.Image(subim) new_image = cv.CreateImage(size, cvim.depth, cvim.nChannels) if interpolation == None: if size[0] < w or size[1] < y: # Downsampling so use area interpolation interpolation = cv.CV_INTER_AREA else: # Upsampling so use linear interpolation = cv.CV_INTER_CUBIC cv.Resize(subim, new_image, interpolation) affine = pv.AffineNonUniformScale( float(size[0]) / w, float(size[1]) / h, size) * affine if return_affine: return pv.Image(new_image), affine else: return pv.Image(new_image)
def show(self, window=None, pos=None, delay=0, size=None): ''' Displays the annotated version of the image using OpenCV highgui @param window: the name of the highgui window to use, if one already exists by this name, or it will create a new highgui window with this name. @param pos: if a new window is being created, the (x,y) coordinate for the new window @param delay: A delay in milliseconds to wait for keyboard input (passed to cv.WaitKey). 0 delays indefinitely, 30 is good for presenting a series of images like a video. For performance reasons, namely when using the same window to display successive frames of video, we don't want to tear-down and re-create the window each time. Thus the window frame will persist beyond the scope of the call to img.show(). The window will disappear after the program exits, or it can be destroyed with a call to cv.DestroyWindow. @param size: Optional output size for image, None=native size. @returns: the return value of the cv.WaitKey call. ''' if window == None and pv.runningInNotebook() and 'pylab' in globals( ).keys(): # If running in notebook, then try to display the image inline. if size is None: size = self.size # Constrain the size of the output max_dim = max(size[0], size[1]) if max_dim > 800: scale = 800.0 / max_dim size = (int(scale * size[0]), int(scale * size[1])) w, h = size # TODO: Cant quite figure out how figsize works and how to set it to native pixels #pylab.figure() IPython.core.pylabtools.figsize(1.25 * w / 72.0, 1.25 * h / 72.0) #@UndefinedVariable pylab.figure() pylab.imshow(self.asAnnotated(), origin='upper', aspect='auto') else: # Otherwise, use an opencv window if window is None: window = "PyVisionImage" # Create the window cv.NamedWindow(window) # Set the location if pos is not None: cv.MoveWindow(window, pos[0], pos[1]) # Resize the image. if size is not None: x = pyvision.Image(self.asAnnotated().resize(size)) else: x = pyvision.Image(self.asAnnotated()) # Display the result cv.ShowImage(window, x.asOpenCV()) key = cv.WaitKey(delay=delay) del x return key
def test_OpenCVToPILGray(self): pil = self.im.asPIL().resize((180, 120)).convert('L') im = pv.Image(pil) cv = im.asOpenCV() im = pv.Image(cv) pil = im.asPIL()
def asImage(self, ilog=None): '''''' mat = np.fft.ifft2(self.filter.conj()) mat = np.fft.fftshift(mat) return pv.Image(mat.real)
def extract(self, img, face_records): '''Extract a template that allows the face to be matched.''' # Compute the 128D vector that describes the face in img identified by # shape. In general, if two face descriptor vectors have a Euclidean # distance between them less than 0.6 then they are from the same # person, otherwise they are from different people. Here we just print # the vector to the screen. im = pv.Image(img[:, :, ::-1]) for face_record in face_records.face_records: rect = pt.rect_proto2pv(face_record.detection.location) x, y, w, h = rect.asTuple() # Extract view rect = pv.Rect() cx, cy = x + 0.5 * w, y + 0.5 * h tmp = 1.5 * max(w, h) cw, ch = tmp, tmp crop = pv.AffineFromRect(pv.CenteredRect(cx, cy, cw, ch), (256, 256)) pvim = pv.Image(img[:, :, ::-1]) # convert rgb to bgr pvim = crop(pvim) view = pt.image_pv2proto(pvim) face_record.view.CopyFrom(view) # Extract landmarks l, t, r, b = [int(tmp) for tmp in [x, y, x + w, y + h]] d = dlib.rectangle(l, t, r, b) shape = self.shape_pred(img, d) for i in range(len(shape.parts())): loc = shape.parts()[i] landmark = face_record.landmarks.add() landmark.landmark_id = "point_%02d" % i landmark.location.x = loc.x landmark.location.y = loc.y # Get detection rectangle and crop the face #rect = pt.rect_proto2pv(face_record.detection.location).rescale(1.5) #tile = im.crop(rect) tile = pvim.resize((224, 224)) #tile.show(delay=1000) face_im = tile.asOpenCV2() face_im = face_im[:, :, ::-1] # Convert BGR to RGB #mat_ = cv2.cvtColor(mat,cv2.COLOR_RGB2GRAY) #mat = cv2.cvtColor(mat_,cv2.COLOR_GRAY2RGB) #img = image.load_img('../image/ajb.jpg', target_size=(224, 224)) from keras_vggface import utils from keras.preprocessing import image face_im = image.img_to_array(face_im) face_im = np.expand_dims(face_im, axis=0) face_im = utils.preprocess_input(face_im, version=2) # or version=2 # Needed in multithreaded applications with self.graph.as_default(): tmp = self.recognizer.predict(face_im) face_descriptor = pv.meanUnit(tmp.flatten()) #print('shape:',face_records.face_records[0].landmarks) #face_descriptor = self.face_rec.compute_face_descriptor(img, shape, JITTER_COUNT) #face_descriptor = np.array(face_descriptor) #vec = face_descriptor.flatten() face_record.template.data.CopyFrom( pt.vector_np2proto(face_descriptor))
print len(y[mask]) print len(z[mask]) # this produces an error. Probably has too much data it.Rbf(x[mask],y[mask],z[mask]) pass if __name__ == "__main__": ilog = pv.ImageLog() filename = "02463d562.abs.gz" im = pv.Image("02463d563.ppm") t = time.time() ri = RangeImage(filename) t = time.time() - t print t print ri.getRange() ilog.log(ri.getXImage(),"X_Image") ilog.log(ri.getYImage(),"Y_Image") ilog.log(ri.getZImage(),"Z_Image") ilog.log(im,"Color") ri.populateMissingData(ilog=ilog) ilog.show()
from knn import * if __name__ == "__main__": import pyvision as pv plot = pv.Image(np.zeros((500, 500))) data = np.array([[0.89761049, 0.31978809], [0.08168021, 0.75605386], [0.67596172, 0.94886192], [0.8283411, 0.53639021], [0.50589098, 0.64003199], [0.66290861, 0.45572], [0.34614808, 0.16191715], [0.49566747, 0.83423913], [0.32471352, 0.20317006], [0.42948424, 0.78900121], [0.017235, 0.99522359], [0.21276987, 0.15219815], [0.84833654, 0.87647], [0.99716754, 0.47017644], [0.51667204, 0.63936825], [0.370152, 0.06977327], [0.16250232, 0.42129633], [0.59071007, 0.48371244], [0.70240547, 0.72759716], [0.21276305, 0.76596722]]) data = np.random.random((100, 2)) for x, y in data[:, :2]: plot.annotatePoint(500.0 * pv.Point(x, y), color='gray') x = np.array([[0.57097488, 0.33239627], [0.65494268, 0.31132802], [0.58122984, 0.69620259]]) x = np.random.random((7, 2)) knn = KNearestNeighbors(data) dist, dist_sort = knn.query(x, k=5, p=np.inf)
def gaussianFilter(im, sigma): cvim = cv.CreateImage(im.size, cv.IPL_DEPTH_8U, 3) cv.Smooth(im.asOpenCV(), cvim, cv.CV_GAUSSIAN, 0, 0, sigma) return pv.Image(cvim)
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import pyvision as pv from pyvision.edge.canny import canny # An interface to the OpenCV Canny. ''' This code is from part 1 of the PyVision Quick Start Guide. ''' if __name__ == '__main__': # (1) Load an image from a file. im = pv.Image(pv.__path__[0] + "/data/nonface/NONFACE_16.jpg") # (2) Rescale the image im = pv.AffineScale(0.5, (320, 240)).transformImage(im) # (3) Run the canny function to locate the edges. edge_im1 = canny(im) # (4) Run the canny function with different defaults. edge_im2 = canny(im, threshold1=100, threshold2=250) # (5) Save the results to a log. ilog = pv.ImageLog("../..") ilog.log(im, label="Source") ilog.log(edge_im1, label="Canny1") ilog.log(edge_im2, label="Canny2")
def genderClassifier(clsfy, ilog=None): ''' genderClassifier takes a classifier as an argument and will use the csuScrapShot data to perform a gender classification test on that classifier. These three functions will be called:: for im in training_images: clsfy.addTraining(label,im,ilog=ilog) clsfy.train(ilog=ilog) for im in testing_images: clsfy.predict(im,ilog=ilog) label = 0 or 1 (0=Female,1=Male) im is a 64x64 pyvision image that is normalized to crop the face Output of predict should be a class label (0 or 1) @returns: the success rate for the testing set. ''' filename = os.path.join(pv.__path__[0], 'data', 'csuScrapShots', 'gender.txt') f = open(filename, 'r') image_cache = [] examples = [] for line in f: im_name, class_name = line.split() if class_name == 'F': class_name = 0 else: class_name = 1 long_name = os.path.join(pv.__path__[0], 'data', 'csuScrapShots', im_name) leye, reye = SCRAPS_EYES.getEyes(im_name)[0] im = pv.Image(long_name) image_cache.append(im) im = pv.AffineFromPoints(leye, reye, pv.Point(22, 27), pv.Point(42, 27), (64, 64)).transformImage(im) #im = pv.Image(im.asPIL().resize((64,64))) examples.append([class_name, im, im_name]) training = examples[:103] testing = examples[103:] for each in training[:103]: clsfy.addTraining(each[0], each[1], ilog=ilog) clsfy.train(ilog=ilog) table = pv.Table() #values = {0:[],1:[]} correct = 0 total = 0 for each in testing: label = clsfy.predict(each[1], ilog=ilog) total += 1 if label == each[0]: correct += 1 rate = float(correct) / total if ilog: ilog.table(table) return rate
def loadFilterEyeLocator(filename,ilog=None): ''' Loads the eye locator from a file.' ''' # open the file f = open(filename,'rb') # Check the first line line = f.readline().strip() assert line == "CFEL" # read past the comment and copyright. f.readline() f.readline() # get the width and the height r,c = f.readline().split() r,c = int(r),int(c) # read in the left bounding rectangle x,y,w,h = f.readline().split() left_rect = (int(x),int(y),int(w),int(h)) # read in the right bounding rectangle x,y,w,h = f.readline().split() right_rect = (int(x),int(y),int(w),int(h)) # read the magic number magic_number = f.readline().strip() assert len(magic_number) == 4 magic_number = struct.unpack('i',magic_number)[0] # Read in the filter data lf = array.array('f') rf = array.array('f') lf.fromfile(f,r*c) rf.fromfile(f,r*c) # Test the magic number and byteswap if necessary. if magic_number == 0x41424344: pass elif magic_number == 0x44434241: lf.byteswap() rf.byteswap() else: raise ValueError("Bad Magic Number: Unknown byte ordering in file") # Create the left and right filters left_filter = cv.CreateMat(r,c,cv.CV_32F) right_filter = cv.CreateMat(r,c,cv.CV_32F) # Copy data into the left and right filters cv.SetData(left_filter, lf.tostring()) cv.SetData(right_filter, rf.tostring()) tmp = pv.OpenCVToNumpy(left_filter) t1 = tmp.mean() t2 = tmp.std() cv.Scale(left_filter,left_filter,1.0/t2,-t1*1.0/t2) tmp = pv.OpenCVToNumpy(right_filter) t1 = tmp.mean() t2 = tmp.std() cv.Scale(right_filter,right_filter,1.0/t2,-t1*1.0/t2) #tmp = pv.OpenCVToNumpy(left_filter) #print tmp.mean(),tmp.std() if ilog != None: #lf = cv.cvCreateMat(r,c,cv.CV_8U) #rf = cv.cvCreateMat(r,c,cv.CV_8U) lf = pv.OpenCVToNumpy(left_filter) rf = pv.OpenCVToNumpy(right_filter) lf = np.fft.fftshift(lf).transpose() rf = np.fft.fftshift(rf).transpose() ilog.log(pv.Image(lf),label="LeftEyeFilter") ilog.log(pv.Image(rf),label="RightEyeFilter") # Return the eye locator return OpenCVFilterEyeLocator(left_filter,right_filter,left_rect,right_rect)
def train(self, image_dir, eye_data): ''' This function trains the logistic regression model to score the meta-detections. Images must be oriented so that the face is upright. @param image_dir: A pathname containing images. @param eye_data: a list of tuples (from csv) filename,eye1x,eye1y,eye2x,eye2y ''' print("Training") data_set = [] progress = pv.ProgressBar(maxValue=len(eye_data)) for row in eye_data: filename = row[0] print("Processing",row) points = [float(val) for val in row[1:]] eye1 = pv.Point(points[0],points[1]) eye2 = pv.Point(points[2],points[3]) # Compute the truth rectangle from the eye coordinates ave_dist = np.abs(cd.AVE_LEFT_EYE.X() - cd.AVE_RIGHT_EYE.X()) y_height = 0.5*(cd.AVE_LEFT_EYE.Y() + cd.AVE_RIGHT_EYE.Y()) x_center = 0.5*(eye1.X() + eye2.X()) x_dist = np.abs(eye1.X() - eye2.X()) width = x_dist/ave_dist y_center = 0.5*(eye1.Y() + eye2.Y()) + (0.5-y_height)*width truth = pv.CenteredRect(x_center,y_center,width,width) # Read the image im = pv.Image(os.path.join(image_dir,filename)) # Compute the detections detections = self.raw_detections(im) #print detections # Score the detections # Similarity above 0.7 count as correct and get a value of 1.0 in the logistic regression # Incorrect detections get a value of 0.0 scores = [truth.similarity(each[0]) for each in detections] for i in range(len(scores)): score = scores[i] detection = detections[i] success = 0.0 if score > 0.7: success = 1.0 row = detection[1],success,detection[2:] print(row) data_set.append(row) # Display the results im = im.scale(self.prescale) colors = {'FACE':'yellow','HEAD':'blue'} for detection in detections: #print detection rect = self.prescale*detection[0] im.annotateRect(rect,color=colors[detection[1]]) im.annotateRect(self.prescale*truth,color='red') progress.updateAmount() progress.show() print() #im.show(delay=1) progress.finish() obs = [each[1] for each in data_set] data = [each[2] for each in data_set] print(obs) print(data) self.quality.train(obs,data) return for each in data_set: self.quality[each[0]][1].append(each[1]) self.quality[each[0]][2].append(each[2]) for key,value in self.quality.items(): print("Training:",key) obs = value[1] data = value[2] assert len(obs) == len(data) value[0].train(obs,data) print(value[0].params) print("Done Training")
def test_prev_ref3(self): fname = os.path.join(pv.__path__[0], 'data', 'nonface', 'NONFACE_13.jpg') torig = tprev = im_a = pv.Image(fname) #im_a.show() w, h = im_a.size # Scale aff = pv.AffineScale(0.5, (w / 2, h / 2)) accu = aff torig = aff.transformImage(torig) tprev = aff.transformImage(tprev, use_orig=False) taccu = accu.transformImage(im_a) torig.annotateLabel(pv.Point(10, 10), "use_orig = True") tprev.annotateLabel(pv.Point(10, 10), "use_orig = False") taccu.annotateLabel(pv.Point(10, 10), "accumulated") #torig.show() #tprev.show() #taccu.show() # Translate aff = pv.AffineTranslate(20, 20, (w / 2, h / 2)) accu = aff * accu torig = aff.transformImage(torig) tprev = aff.transformImage(tprev, use_orig=False) taccu = accu.transformImage(im_a) torig.annotateLabel(pv.Point(10, 10), "use_orig = True") tprev.annotateLabel(pv.Point(10, 10), "use_orig = False") taccu.annotateLabel(pv.Point(10, 10), "accumulated") #torig.show() #tprev.show() #taccu.show() # Rotate aff = pv.AffineRotate(np.pi / 4, (w / 2, h / 2)) accu = aff * accu torig = aff.transformImage(torig) tprev = aff.transformImage(tprev, use_orig=False) taccu = accu.transformImage(im_a) torig.annotateLabel(pv.Point(10, 10), "use_orig = True") tprev.annotateLabel(pv.Point(10, 10), "use_orig = False") taccu.annotateLabel(pv.Point(10, 10), "accumulated") #torig.show() #tprev.show() #taccu.show() # Translate aff = pv.AffineTranslate(100, -10, (w / 2, h / 2)) accu = aff * accu torig = aff.transformImage(torig) tprev = aff.transformImage(tprev, use_orig=False) taccu = accu.transformImage(im_a) torig.annotateLabel(pv.Point(10, 10), "use_orig = True") tprev.annotateLabel(pv.Point(10, 10), "use_orig = False") taccu.annotateLabel(pv.Point(10, 10), "accumulated") #torig.show() #tprev.show() #taccu.show() # Scale aff = pv.AffineScale(2.0, (w, h)) accu = aff * accu torig = aff.transformImage(torig) tprev = aff.transformImage(tprev, use_orig=False) taccu = accu.transformImage(im_a) torig.annotateLabel(pv.Point(10, 10), "use_orig = True") tprev.annotateLabel(pv.Point(10, 10), "use_orig = False") taccu.annotateLabel(pv.Point(10, 10), "accumulated")
# The ASEF eye locator has patent complications. This next line # disables those warnings. pv.disableCommercialUseWarnings() from pyvision.face.CascadeDetector import CascadeDetector from pyvision.face.FilterEyeLocator import FilterEyeLocator if __name__ == "__main__": ilog = pv.ImageLog() # Load the face image file fname = os.path.join(pv.__path__[0], 'data', 'misc', 'FaceSample.jpg') # Create the annotation image in black and white so that color # annotations show up better. im = pv.Image(fname, bw_annotate=True) ilog(pv.Image(fname), "Original") # Create a OpenCV cascade face detector object cd = CascadeDetector() # Create an eye detector object el = FilterEyeLocator() # Call the face detector like a function to get a list of face rectangles rects = cd(im) # print the list of rectangles print "Face Detection Output:", rects
def transformImage(self,im_a, use_orig=True, inverse=False): ''' Transforms an image into the new coordinate system. If this image was produced via an affine transform of another image, this method will attempt to trace weak references to the original image and directly compute the new image from that image to improve accuracy. To accomplish this a weak reference to the original source image and the affine matrix used for the transform are added to any image produced by this method. This can be disabled using the use_orig parameter. @param im_a: an Image object @param use_orig: (True or False) attempts to find and use the original image as the source to avoid an accumulation of errors. @returns: the transformed image ''' #TODO: does not support opencv images. see Perspective.py prev_im = im_a if inverse: inverse = self.matrix else: inverse = self.inverse if use_orig: # Find the oldest image used to produce this one by following week # references. # Check to see if there is an aff_prev list if hasattr(prev_im,'aff_prev'): # If there is... search that list for the oldest image found_prev = False for i in range(len(prev_im.aff_prev)): ref,cmat = prev_im.aff_prev[i] if not found_prev and ref(): im_a = ref() mat = np.eye(3) found_prev = True if found_prev: mat = np.dot(mat,cmat) if found_prev: inverse = np.dot(mat,inverse) if im_a.getType() == TYPE_PIL: data = inverse[:2,:].flatten() #data = (matrix[0,0],matrix[0,1],matrix[0,2],matrix[1,0],matrix[1,1],matrix[1,2]) pil = im_a.asPIL().transform(self.size, AFFINE, data, self.interpolate) result = Image(pil) elif im_a.getType() == TYPE_MATRIX_2D: # Transform a matrix 2d mat = im_a.asMatrix2D() mat = affine_transform(mat, self.inverse[:2,:2], offset=self.inverse[:2,2]) result = Image(mat[:self.size[0],:self.size[1]]) elif im_a.getType() == TYPE_MATRIX_RGB: # Transform a matrix 3d mat = im_a.asMatrix3D() c0 = mat[0,:,:] c1 = mat[1,:,:] c2 = mat[2,:,:] c0 = affine_transform(c0, self.inverse[:2,:2], offset=self.inverse[:2,2]) c1 = affine_transform(c1, self.inverse[:2,:2], offset=self.inverse[:2,2]) c2 = affine_transform(c2, self.inverse[:2,:2], offset=self.inverse[:2,2]) mat = np.array([c0,c1,c2],dtype=np.float32) result = Image(mat[:,:self.size[0],:self.size[1]]) elif im_a.getType() == TYPE_OPENCV2: # Transform an opencv 2 image src = im_a.asOpenCV2() dst = cv2.warpPerspective(src, self.matrix, self.size) result = pv.Image(dst) elif im_a.getType() == TYPE_OPENCV2BW: # Transform a bw opencv 2 image src = im_a.asOpenCV2BW() dst = cv2.warpPerspective(src, self.matrix, self.size) result = pv.Image(dst) else: raise NotImplementedError("Unhandled image type for affine transform.") # Check to see if there is an aff_prev list for this object if use_orig and hasattr(prev_im,'aff_prev'): # Create one if not result.aff_prev = copy.copy(prev_im.aff_prev) else: result.aff_prev = [] # Append the prev image and new transform result.aff_prev.append( (weakref.ref(prev_im), self.inverse) ) return result
def query(self): if self.idx >= len(self.filelist): return None f = self.filelist[self.idx] frame = pv.Image(f).asOpenCV() self.idx += 1 return pv.Image(self.resize(frame))
def transformImage(self,im, use_orig=True, inverse=False): ''' Transforms an image into the new coordinate system. If this image was produced via an affine transform of another image, this method will attempt to trace weak references to the original image and directly compute the new image from that image to improve accuracy. To accomplish this a weak reference to the original source image and the affine matrix used for the transform are added to any image produced by this method. This can be disabled using the use_orig parameter. @param im: an Image object @param use_orig: (True or False) attempts to find and use the original image as the source to avoid an accumulation of errors. @returns: the transformed image ''' #TODO: does not support opencv images. see Perspective.py prev_im = im if inverse: inverse = self.matrix else: inverse = self.inverse if use_orig: # Find the oldest image used to produce this one by following week # references. # Check to see if there is an aff_prev list if hasattr(prev_im,'aff_prev'): # If there is... search that list for the oldest image found_prev = False for i in range(len(prev_im.aff_prev)): ref,cmat = prev_im.aff_prev[i] if not found_prev and ref(): im = ref() mat = np.eye(3) found_prev = True if found_prev: mat = np.dot(mat,cmat) if found_prev: inverse = np.dot(mat,inverse) if im.getType() == TYPE_PIL: data = inverse[:2,:].flatten() #data = (matrix[0,0],matrix[0,1],matrix[0,2],matrix[1,0],matrix[1,1],matrix[1,2]) pil = im.asPIL().transform(self.size, AFFINE, data, self.filter) result = Image(pil) elif im.getType() == TYPE_MATRIX_2D: mat = im.asMatrix2D() mat = affine_transform(mat, self.inverse[:2,:2], offset=self.inverse[:2,2]) result = Image(mat) elif im.getType() == TYPE_OPENCV: matrix = pv.NumpyToOpenCV(self.matrix) src = im.asOpenCV() dst = cv.CreateImage( (self.size[0],self.size[1]), cv.IPL_DEPTH_8U, src.nChannels ); cv.WarpPerspective( src, dst, matrix, cv.CV_INTER_LINEAR+cv.CV_WARP_FILL_OUTLIERS,cv.ScalarAll(128)) result = pv.Image(dst) # cv.NamedWindow('window2') # cv.ShowImage('window2', dst ) # cv.WaitKey() else: raise NotImplementedError("Unhandled image type for affine transform.") # Check to see if there is an aff_prev list for this object if use_orig and hasattr(prev_im,'aff_prev'): # Create one if not result.aff_prev = copy.copy(prev_im.aff_prev) else: result.aff_prev = [] # Append the prev image and new transform result.aff_prev.append( (weakref.ref(prev_im), self.inverse) ) return result
def query(self): if self.current_frame < self.numFrames: frame = pv.Image(self.imageStack[self.current_frame, :, :]) self.current_frame += 1 return (pv.Image(self.resize(frame.asOpenCV()))) return None
def asGraph(self, as_image=False): ''' This uses runtime analysis to create a dataflow graph for this VTM. ''' import pydot import pyvision as pv import PIL.Image from io import StringIO def formatNum(n): ''' This formats frame offsets correctly: -1,0,+1 ''' if n == 0: return '0' else: return "%+d" % n def record_strings(my_list): return '{' '}' # Create the graph. graph = pydot.Dot(graph_type='digraph', nodesep=.3, ranksep=.5) graph.add_node( pydot.Node("Data Input", shape='invhouse', style='filled', fillcolor='#ffCC99')) graph.add_node( pydot.Node("Video Input", shape='invhouse', style='filled', fillcolor='#ffCC99')) graph.add_edge(pydot.Edge("Video Input", "FRAME")) graph.add_edge(pydot.Edge("Video Input", "LAST_FRAME")) if self.playback_shelf != None: graph.add_node( pydot.Node("Playback", shape='invhouse', style='filled', fillcolor='#ffCC99')) subgraphs = {None: graph} # Add task nodes for each in self.task_set: if 'call_count' in self.task_data[each]: class_name = self.task_data[each]['class_name'] call_count = self.task_data[each]['call_count'] mean_time = self.task_data[each]['time_sum'] / call_count node_label = "{" + " | ".join([ class_name, "Time=%0.2fms" % (mean_time * 1000.0, ), "Calls=%d" % (call_count, ), ]) + "}" color = '#99CC99' print(each, self.task_data[each]) if self.task_data[each]['color'] is not None: color = self.task_data[each]['color'] subgraph = self.task_data[each]['subgraph'] subgraph_name = subgraph if subgraph_name != None: subgraph_name = "_".join(subgraph.split()) if subgraph not in subgraphs: print("adding subgraph", subgraph) subgraphs[subgraph_name] = pydot.Cluster( subgraph_name, label=subgraph, shape='box', style='filled', fillcolor='#DDDDDD', nodesep=1.0) subgraphs[None].add_subgraph(subgraphs[subgraph_name]) print("adding node", each, subgraph) subgraphs[subgraph_name].add_node( pydot.Node(each, label=node_label, shape='record', style='filled', fillcolor=color)) else: # The task node was never executed call_count = 0 mean_time = -1 class_name = self.task_data[each]['class_name'] node_label = "{" + " | ".join([ class_name, "Time=%0.2fms" % (mean_time * 1000.0, ), "Calls=%d" % (call_count, ), ]) + "}" graph.add_node( pydot.Node(each, label=node_label, shape='record', style='filled', fillcolor='#CC3333')) # Add Data Nodes for each, subgraph in self.data_set: subgraph_name = subgraph if subgraph_name != None: subgraph_name = "_".join(subgraph.split()) subgraphs[subgraph_name].add_node( pydot.Node(each, shape='box', style='rounded, filled', fillcolor='#9999ff')) # Add edges. for each, offsets in self.flow.items(): offsets = list(offsets) if len(offsets) == 1 and list(offsets)[0] == 0: graph.add_edge(pydot.Edge(each[0], each[1])) else: offsets = formatOffsets(offsets) graph.add_edge( pydot.Edge(each[0], each[1], label=offsets, label_scheme=2, labeldistance=2, labelfloat=False)) # Create a pv.Image containing the graph. if as_image: data = graph.create_png() f = StringIO(data) im = pv.Image(PIL.Image.open(f)) return im return graph
# Create an image log if this is being saved to a file. ilog = None if options.log_dir != None: print("Creating Image Log...") ilog = pv.ImageLog(options.log_dir) # For each image run face and eye detection face_detect = CascadeDetector(image_scale=1.3*options.scale) locate_eyes = FilterEyeLocator()#locator_filename) c = 0 for pathname in image_names: c += 1 im = pv.Image(pathname) scale = options.log_scale log_im = pv.AffineScale(scale,(int(scale*im.width),int(scale*im.height))).transformImage(im) results = processFaces(im,face_detect,locate_eyes) if options.rotate: rot_image = pv.Image(im.asPIL().transpose(PIL.Image.ROTATE_90)) more_results = processFaces(rot_image,face_detect,locate_eyes) for face,eye1,eye2 in more_results: results.append([pv.Rect(im.width-face.y-face.h, face.x, face.h, face.w), pv.Point(im.width-eye1.Y(),eye1.X()),