def update(self, data): blue=pl.less(data,0.) # Fill in True where less than 0.0 red=~blue # Reverse of the above #Blue self.image[...,2][blue]=pl.minimum(pl.absolute(pl.divide(data[blue],255.)),1.) #Red -- Max 40C, so we increase the intensity of the red color 6 times self.image[...,0][red]=pl.minimum(1.,pl.divide(pl.multiply(data[red],6.),255.)) pl.imshow(self.image) pl.draw()
def update(self, img): img_now = ops.read_image(img) if img_now.ndim == 3: img_now = ops.rgb2gray(img_now) x = ops.get_subwindow(img_now, self.pos, self.sz, self.cos_window) # print(x) k = ops.dense_gauss_kernel(self.sigma, x, self.z) kf = pylab.fft2(k) alphaf_kf = pylab.multiply(self.alphaf, kf) response = pylab.real(pylab.ifft2(alphaf_kf)) # Eq. 9 # target location is at the maximum response r = response row, col = pylab.unravel_index(r.argmax(), r.shape) self.pos = self.pos - pylab.floor(self.sz / 2) + [row, col] x = ops.get_subwindow(img_now, self.pos, self.sz, self.cos_window) k = ops.dense_gauss_kernel(self.sigma, x) new_alphaf = pylab.divide(self.yf, (pylab.fft2(k) + self.lambda_value)) # Eq. 7 new_z = x f = self.interpolation_factor self.alphaf = (1 - f) * self.alphaf + f * new_alphaf self.z = (1 - f) * self.z + f * new_z box_new = np.array([ self.pos[1] - (self.sz[1]) / 2 + 1, self.pos[0] - (self.sz[0]) / 2 + 1, self.sz[1], self.sz[0] ], dtype=np.float32) return box_new
def init(self, img, box): img_now = ops.read_image(img) self.target_sz = np.array([box[3], box[2]]) self.pos = np.array([box[1], box[0]]) + self.target_sz / 2 # print(self.pos) # ground_truth = # window size, taking padding into account self.sz = pylab.floor(self.target_sz * (1 + self.padding)) # desired output (gaussian shaped), bandwidth proportional to target size self.output_sigma = pylab.sqrt(pylab.prod( self.target_sz)) * self.output_sigma_factor grid_y = pylab.arange(self.sz[0]) - pylab.floor(self.sz[0] / 2) grid_x = pylab.arange(self.sz[1]) - pylab.floor(self.sz[1] / 2) #[rs, cs] = ndgrid(grid_x, grid_y) rs, cs = pylab.meshgrid(grid_x, grid_y) y = pylab.exp(-0.5 / self.output_sigma**2 * (rs**2 + cs**2)) self.yf = pylab.fft2(y) # print(self.yf) #print("yf.shape ==", yf.shape) #print("y.shape ==", y.shape) # store pre-computed cosine window self.cos_window = pylab.outer(pylab.hanning(self.sz[0]), pylab.hanning(self.sz[1])) if img_now.ndim == 3: img_now = ops.rgb2gray(img_now) x = ops.get_subwindow(img_now, self.pos, self.sz, self.cos_window) k = ops.dense_gauss_kernel(self.sigma, x) self.alphaf = pylab.divide( self.yf, (pylab.fft2(k) + self.lambda_value)) # Eq. 7 self.z = x
def convolve(f, g): ftilda = numpy.fft.fft(f) #FT of f gtilda = numpy.fft.fft(g) #FT of g convolution = numpy.fft.ifft( pl.multiply(ftilda, gtilda) ) #Convolution using properties of fourier transforms and convolution return pl.divide(convolution, len(ftilda)) * T
def displayData(X): print "Visualizing" m, n = X.shape width = round(sqrt(n)) height = width display_rows = int(floor(sqrt(m))) display_cols = int(ceil(m/display_rows)) print "Cell width:", width print "Cell height:", height print "Display rows:", display_rows print "Display columns:", display_cols display = zeros((display_rows*height,display_cols*width)) # Iterate through the training sets, reshape each one and populate # the display matrix with the letter matrixes. for xrow in range(0, m): rowindex = divide(xrow, display_cols) columnindex = remainder(xrow, display_cols) rowstart = int(rowindex*height) rowend = int((rowindex+1)*height) colstart = int(columnindex*width) colend = int((columnindex+1)*width) display[rowstart:rowend, colstart:colend] = X[xrow,:].reshape(height,width).transpose() imshow(display, cmap=get_cmap('binary'), interpolation='none') # Show plot without blocking draw()
def update_ret_response(self, new_img): ''' :param new_img: new frame should be normalized, for tracker_status estimating the rect_snr :return: ''' self.canvas = new_img.copy() self.trackNo += 1 # get subwindow at current estimated target position, to train classifier x = self.get_subwindow(new_img, self.pos, self.window_sz, self.cos_window) # calculate response of the classifier at all locations k = self.dense_gauss_kernel(self.sigma, x, self.z) kf = pylab.fft2(k) alphaf_kf = pylab.multiply(self.alphaf, kf) response = pylab.real(pylab.ifft2(alphaf_kf)) # Eq. 9 # target location is at the maximum response row, col = pylab.unravel_index(response.argmax(), response.shape) # roi rect's topleft point add [row, col] self.tly, self.tlx = self.pos - pylab.floor(self.window_sz / 2) # here the pos is not given to self.pos at once, we need to check the psr first. # if it above the threashhold(default is 5), self.pos = pos. pos = np.array([self.tly, self.tlx]) + np.array([row, col]) # Noting, for pos(cy,cx)! for cv2.rect rect(x,y,w,h)! rect = pylab.array([ pos[1] - self.target_sz[1] / 2, pos[0] - self.target_sz[0] / 2, self.target_sz[1], self.target_sz[0] ]) rect = rect.astype(np.int) self.psr, self.trkStatus = self.tracker_status(col, row, response, rect, new_img) self.pos = pos #only update when tracker_status's psr is high if (self.psr > 10): #computing new_alphaf and observed x as z x = self.get_subwindow(new_img, self.pos, self.window_sz, self.cos_window) # Kernel Regularized Least-Squares, calculate alphas (in Fourier domain) k = self.dense_gauss_kernel(self.sigma, x) new_alphaf = pylab.divide( self.yf, (pylab.fft2(k) + self.lambda_value)) # Eq. 7 new_z = x # subsequent frames, interpolate model f = self.interpolation_factor self.alphaf = (1 - f) * self.alphaf + f * new_alphaf self.z = (1 - f) * self.z + f * new_z ok = 1 return ok, rect, self.psr, response
def fcurves(): from pylab import ogrid, divide, clabel, contour, plot X, Y = ogrid[ 0:1:.001, 0:1: .001] # range of R and P values, respectively. X is a row vector, Y is a column vector. F = divide(2 * X * Y, X + Y) # matrix s.t. F[P,R] = 2PR/(P+R) plot(X[..., 0], X[..., 0], color='#cccccc') # P=R clabel(contour(X[..., 0], Y[0, ...], F, levels=[.5, .7, .9], colors='#aaaaaa', linewidths=2), fmt='%.1f', inline_spacing=1) # show F score curves at values .5, .7, and .9
def initialize(self, image, pos, target_sz): if len(image.shape) == 3 and image.shape[2] > 1: image = rgb2gray(image) self.image = image if self.should_resize_image: self.image = scipy.misc.imresize(self.image, 0.5) self.image = self.image / 255.0 # window size, taking padding into account self.sz = pylab.floor(target_sz * (1 + self.padding)) self.pos = pos # desired output (gaussian shaped), bandwidth proportional to target size output_sigma = pylab.sqrt(pylab.prod( self.sz)) * self.output_sigma_factor grid_y = pylab.arange(self.sz[0]) - pylab.floor(self.sz[0] / 2) grid_x = pylab.arange(self.sz[1]) - pylab.floor(self.sz[1] / 2) #[rs, cs] = ndgrid(grid_x, grid_y) rs, cs = pylab.meshgrid(grid_x, grid_y) self.y = pylab.exp(-0.5 / output_sigma**2 * (rs**2 + cs**2)) self.yf = pylab.fft2(self.y) # store pre-computed cosine window self.cos_window = pylab.outer(pylab.hanning(self.sz[0]), pylab.hanning(self.sz[1])) # get subwindow at current estimated target position, # to train classifer x = get_subwindow(self.image, self.pos, self.sz, self.cos_window) # Kernel Regularized Least-Squares, # calculate alphas (in Fourier domain) k = dense_gauss_kernel(self.sigma, x) self.alphaf = pylab.divide( self.yf, (pylab.fft2(k) + self.lambda_value)) # Eq. 7 self.z = x return
def update_template(self ): """ Update the tracking template, new_example is expected to match the size of the example provided to the constructor """ # get subwindow at current estimated target position, # to train classifer x = get_subwindow(self.image, self.pos, self.sz, self.cos_window) # Kernel Regularized Least-Squares, # calculate alphas (in Fourier domain) k = dense_gauss_kernel(self.sigma, x) new_alphaf = pylab.divide(self.yf, (pylab.fft2(k) + self.lambda_value)) # Eq. 7 new_z = x # subsequent frames, interpolate model f = self.interpolation_factor self.alphaf = (1 - f) * self.alphaf + f * new_alphaf self.z = (1 - f) * self.z + f * new_z return
def initialize(self, image, pos , target_sz ): if len(image.shape) == 3 and image.shape[2] > 1: image = rgb2gray(image) self.image = image if self.should_resize_image: self.image = scipy.misc.imresize(self.image, 0.5) self.image = self.image / 255.0 # window size, taking padding into account self.sz = pylab.floor(target_sz * (1 + self.padding)) self.pos = pos # desired output (gaussian shaped), bandwidth proportional to target size output_sigma = pylab.sqrt(pylab.prod(self.sz)) * self.output_sigma_factor grid_y = pylab.arange(self.sz[0]) - pylab.floor(self.sz[0]/2) grid_x = pylab.arange(self.sz[1]) - pylab.floor(self.sz[1]/2) #[rs, cs] = ndgrid(grid_x, grid_y) rs, cs = pylab.meshgrid(grid_x, grid_y) self.y = pylab.exp(-0.5 / output_sigma**2 * (rs**2 + cs**2)) self.yf = pylab.fft2(self.y) # store pre-computed cosine window self.cos_window = pylab.outer(pylab.hanning(self.sz[0]), pylab.hanning(self.sz[1])) # get subwindow at current estimated target position, # to train classifer x = get_subwindow(self.image, self.pos, self.sz, self.cos_window) # Kernel Regularized Least-Squares, # calculate alphas (in Fourier domain) k = dense_gauss_kernel(self.sigma, x) self.alphaf = pylab.divide(self.yf, (pylab.fft2(k) + self.lambda_value)) # Eq. 7 self.z = x return
def update_template(self): """ Update the tracking template, new_example is expected to match the size of the example provided to the constructor """ # get subwindow at current estimated target position, # to train classifer x = get_subwindow(self.image, self.pos, self.sz, self.cos_window) # Kernel Regularized Least-Squares, # calculate alphas (in Fourier domain) k = dense_gauss_kernel(self.sigma, x) new_alphaf = pylab.divide(self.yf, (pylab.fft2(k) + self.lambda_value)) # Eq. 7 new_z = x # subsequent frames, interpolate model f = self.interpolation_factor self.alphaf = (1 - f) * self.alphaf + f * new_alphaf self.z = (1 - f) * self.z + f * new_z return
def init(self, img, rect ): im_width = img.shape[1] im_heihgt= img.shape[0] ys = pylab.floor(rect[1]) + pylab.arange(rect[3], dtype=int) xs = pylab.floor(rect[0]) + pylab.arange(rect[2], dtype=int) ys = ys.astype(int) xs = xs.astype(int) # check for out-of-bounds coordinates, # and set them to the values at the borders ys[ys < 0] = 0 ys[ys >= img.shape[0]] = img.shape[0] - 1 xs[xs < 0] = 0 xs[xs >= img.shape[1]] = img.shape[1] - 1 roi = self.get_imageROI(img, rect) self.init_frame = img.copy() self.canvas = img.copy() #pos is the center postion of the tracking object (cy,cx) pos = pylab.array([rect[1] + rect[3]/2, rect[0] + rect[2]/2]) self.pos_list = [pos] self.roi_list = [roi] self.rect_list = [rect] self.trackNo = 0 # parameters according to the paper -- padding = 1.0 # extra area surrounding the target(扩大窗口的因子,默认扩大2倍) # spatial bandwidth (proportional to target) output_sigma_factor = 1 / float(16) self.sigma = 0.2 # gaussian kernel bandwidth self.lambda_value = 1e-2 # regularization # linear interpolation factor for adaptation #self.interpolation_factor = 0.075 self.interpolation_factor = 0.01 self.scale_ratios = [0.985, 0.99, 0.995, 1.0, 1.005, 1.01, 1.015] #target_ze equals to [rect3, rect2] target_sz = pylab.array([int(rect[3]), int(rect[2])]) # window size(Extended window size), taking padding into account window_sz = pylab.floor(target_sz * (1 + padding)) self.window_sz = window_sz self.window_sz_new = window_sz self.target_sz = target_sz # desired output (gaussian shaped), bandwidth proportional to target size output_sigma = pylab.sqrt(pylab.prod(target_sz)) * output_sigma_factor grid_y = pylab.arange(window_sz[0]) - pylab.floor(window_sz[0] / 2) grid_x = pylab.arange(window_sz[1]) - pylab.floor(window_sz[1] / 2) # [rs, cs] = ndgrid(grid_x, grid_y) rs, cs = pylab.meshgrid(grid_x, grid_y) y = pylab.exp(-0.5 / output_sigma ** 2 * (rs ** 2 + cs ** 2)) self.yf= pylab.fft2(y) # store pre-computed cosine window self.cos_window = pylab.outer(pylab.hanning(window_sz[0]), pylab.hanning(window_sz[1])) # get subwindow at current estimated target position, to train classifer x = self.get_subwindow(img, pos, window_sz) # Kernel Regularized Least-Squares, calculate alphas (in Fourier domain) k = self.dense_gauss_kernel(self.sigma, x) #storing computed alphaf and z for next frame iteration self.alphaf = pylab.divide(self.yf, (pylab.fft2(k) + self.lambda_value)) # Eq. 7 self.z = x #return initialization status return True
def track(input_video_path): """ notation: variables ending with f are in the frequency domain. """ # parameters according to the paper -- padding = 1.0 # extra area surrounding the target # spatial bandwidth (proportional to target) output_sigma_factor = 1 / float(16) sigma = 0.2 # gaussian kernel bandwidth lambda_value = 1e-2 # regularization # linear interpolation factor for adaptation interpolation_factor = 0.075 info = load_video_info(input_video_path) img_files, pos, target_sz, \ should_resize_image, ground_truth, video_path = info # window size, taking padding into account sz = pylab.floor(target_sz * (1 + padding)) # desired output (gaussian shaped), bandwidth proportional to target size output_sigma = pylab.sqrt(pylab.prod(target_sz)) * output_sigma_factor grid_y = pylab.arange(sz[0]) - pylab.floor(sz[0] / 2) grid_x = pylab.arange(sz[1]) - pylab.floor(sz[1] / 2) # [rs, cs] = ndgrid(grid_x, grid_y) rs, cs = pylab.meshgrid(grid_x, grid_y) y = pylab.exp(-0.5 / output_sigma**2 * (rs**2 + cs**2)) yf = pylab.fft2(y) # print("yf.shape ==", yf.shape) # print("y.shape ==", y.shape) # store pre-computed cosine window cos_window = pylab.outer(pylab.hanning(sz[0]), pylab.hanning(sz[1])) total_time = 0 # to calculate FPS positions = pylab.zeros((len(img_files), 2)) # to calculate precision global z, response z = None alphaf = None response = None for frame, image_filename in enumerate(img_files): if True and ((frame % 10) == 0): print("Processing frame", frame) # load image image_path = os.path.join(video_path, image_filename) im = pylab.imread(image_path) if len(im.shape) == 3 and im.shape[2] > 1: im = rgb2gray(im) # print("Image max/min value==", im.max(), "/", im.min()) if should_resize_image: im = scipy.misc.imresize(im, 0.5) start_time = time.time() # extract and pre-process subwindow x = get_subwindow(im, pos, sz, cos_window) is_first_frame = (frame == 0) if not is_first_frame: # calculate response of the classifier at all locations k = dense_gauss_kernel(sigma, x, z) kf = pylab.fft2(k) alphaf_kf = pylab.multiply(alphaf, kf) response = pylab.real(pylab.ifft2(alphaf_kf)) # Eq. 9 # target location is at the maximum response r = response row, col = pylab.unravel_index(r.argmax(), r.shape) pos = pos - pylab.floor(sz / 2) + [row, col] if debug: print("Frame ==", frame) print("Max response", r.max(), "at", [row, col]) pylab.figure() pylab.imshow(cos_window) pylab.title("cos_window") pylab.figure() pylab.imshow(x) pylab.title("x") pylab.figure() pylab.imshow(response) pylab.title("response") pylab.show(block=True) # end "if not first frame" # get subwindow at current estimated target position, # to train classifer x = get_subwindow(im, pos, sz, cos_window) # Kernel Regularized Least-Squares, # calculate alphas (in Fourier domain) k = dense_gauss_kernel(sigma, x) new_alphaf = pylab.divide(yf, (pylab.fft2(k) + lambda_value)) # Eq. 7 new_z = x if is_first_frame: # first frame, train with a single image alphaf = new_alphaf z = x else: # subsequent frames, interpolate model f = interpolation_factor alphaf = (1 - f) * alphaf + f * new_alphaf z = (1 - f) * z + f * new_z # end "first frame or not" # save position and calculate FPS positions[frame, :] = pos total_time += time.time() - start_time # visualization plot_tracking(frame, pos, target_sz, im, ground_truth) # end of "for each image in video" if should_resize_image: positions = positions * 2 print("Frames-per-second:", len(img_files) / total_time) title = os.path.basename(os.path.normpath(input_video_path)) if len(ground_truth) > 0: # show the precisions plot show_precision(positions, ground_truth, video_path, title) return
def fcurves(): from pylab import ogrid, divide, clabel, contour, plot X, Y = ogrid[0:1:.001,0:1:.001] # range of R and P values, respectively. X is a row vector, Y is a column vector. F = divide(2*X*Y, X+Y) # matrix s.t. F[P,R] = 2PR/(P+R) plot(X[...,0], X[...,0], color='#cccccc') # P=R clabel(contour(X[...,0], Y[0,...], F, levels=[.5,.7,.9], colors='#aaaaaa', linewidths=2), fmt='%.1f', inline_spacing=1) # show F score curves at values .5, .7, and .9
def init(self, img, rect): im_width = img.shape[1] im_heihgt = img.shape[0] ys = pylab.floor(rect[1]) + pylab.arange(rect[3], dtype=int) xs = pylab.floor(rect[0]) + pylab.arange(rect[2], dtype=int) ys = ys.astype(int) xs = xs.astype(int) # check for out-of-bounds coordinates, # and set them to the values at the borders ys[ys < 0] = 0 ys[ys >= img.shape[0]] = img.shape[0] - 1 xs[xs < 0] = 0 xs[xs >= img.shape[1]] = img.shape[1] - 1 self.rect = rect #rectangle contains the bounding box of the target #pos is the center postion of the tracking object (cy,cx) self.pos = pylab.array([rect[1] + rect[3] / 2, rect[0] + rect[2] / 2]) self.posOffset = np.array([0, 0], np.int) self.tlx = rect[0] self.tly = rect[1] self.trackNo = 0 # parameters according to the paper -- padding = 1.0 # extra area surrounding the target(扩大窗口的因子,默认扩大2倍) # spatial bandwidth (proportional to target) output_sigma_factor = 1 / float(16) self.sigma = 0.2 # gaussian kernel bandwidth self.lambda_value = 1e-2 # regularization # linear interpolation factor for adaptation self.interpolation_factor = 0.075 #target_ze equals to [rect3, rect2] target_sz = pylab.array([int(rect[3]), int(rect[2])]) # window size(Extended window size), taking padding into account window_sz = pylab.floor(target_sz * (1 + padding)) self.window_sz = window_sz self.target_sz = target_sz # desired output (gaussian shaped), bandwidth proportional to target size output_sigma = pylab.sqrt(pylab.prod(target_sz)) * output_sigma_factor grid_y = pylab.arange(window_sz[0]) - pylab.floor(window_sz[0] / 2) grid_x = pylab.arange(window_sz[1]) - pylab.floor(window_sz[1] / 2) # [rs, cs] = ndgrid(grid_x, grid_y) rs, cs = pylab.meshgrid(grid_x, grid_y) y = pylab.exp(-0.5 / output_sigma**2 * (rs**2 + cs**2)) self.yf = pylab.fft2(y) # store pre-computed cosine window self.cos_window = pylab.outer(pylab.hanning(window_sz[0]), pylab.hanning(window_sz[1])) # get subwindow at current estimated target position, to train classifer x = self.get_subwindow(img, self.pos, window_sz, self.cos_window) # Kernel Regularized Least-Squares, calculate alphas (in Fourier domain) k = self.dense_gauss_kernel(self.sigma, x) #storing computed alphaf and z for next frame iteration self.alphaf = pylab.divide( self.yf, (pylab.fft2(k) + self.lambda_value)) # Eq. 7 self.z = x #monitoring the tracker's self status, based on the continuity of psr self.self_status = 0 #monitoring the collaborative status, based on the distance to the voted object bouding box center, and on psr also. self.collaborate_status = 5 self.collabor_container = np.ones((10, 1), np.int) self.highpsr_container = np.ones((10, 1), np.int) self.FourRecentRects = np.zeros((4, 4), np.float) #return initialization status return True
def track(input_video_path): """ notation: variables ending with f are in the frequency domain. """ # parameters according to the paper -- padding = 1.0 # extra area surrounding the target #spatial bandwidth (proportional to target) output_sigma_factor = 1 / float(16) sigma = 0.2 # gaussian kernel bandwidth lambda_value = 1e-2 # regularization # linear interpolation factor for adaptation interpolation_factor = 0.075 info = load_video_info(input_video_path) img_files, pos, target_sz, \ should_resize_image, ground_truth, video_path = info # window size, taking padding into account sz = pylab.floor(target_sz * (1 + padding)) # desired output (gaussian shaped), bandwidth proportional to target size output_sigma = pylab.sqrt(pylab.prod(target_sz)) * output_sigma_factor grid_y = pylab.arange(sz[0]) - pylab.floor(sz[0]/2) grid_x = pylab.arange(sz[1]) - pylab.floor(sz[1]/2) #[rs, cs] = ndgrid(grid_x, grid_y) rs, cs = pylab.meshgrid(grid_x, grid_y) y = pylab.exp(-0.5 / output_sigma**2 * (rs**2 + cs**2)) yf = pylab.fft2(y) #print("yf.shape ==", yf.shape) #print("y.shape ==", y.shape) # store pre-computed cosine window cos_window = pylab.outer(pylab.hanning(sz[0]), pylab.hanning(sz[1])) total_time = 0 # to calculate FPS positions = pylab.zeros((len(img_files), 2)) # to calculate precision global z, response z = None alphaf = None response = None for frame, image_filename in enumerate(img_files): if True and ((frame % 10) == 0): print("Processing frame", frame) # load image image_path = os.path.join(video_path, image_filename) im = pylab.imread(image_path) if len(im.shape) == 3 and im.shape[2] > 1: im = rgb2gray(im) #print("Image max/min value==", im.max(), "/", im.min()) if should_resize_image: im = scipy.misc.imresize(im, 0.5) start_time = time.time() # extract and pre-process subwindow x = get_subwindow(im, pos, sz, cos_window) if debug: pylab.figure() pylab.imshow(x) pylab.title("sub window") is_first_frame = (frame == 0) if not is_first_frame: # calculate response of the classifier at all locations k = dense_gauss_kernel(sigma, x, z) kf = pylab.fft2(k) alphaf_kf = pylab.multiply(alphaf, kf) response = pylab.real(pylab.ifft2(alphaf_kf)) # Eq. 9 # target location is at the maximum response r = response row, col = pylab.unravel_index(r.argmax(), r.shape) pos = pos - pylab.floor(sz/2) + [row, col] if debug: print("Frame ==", frame) print("Max response", r.max(), "at", [row, col]) pylab.figure() pylab.imshow(cos_window) pylab.title("cos_window") pylab.figure() pylab.imshow(x) pylab.title("x") pylab.figure() pylab.imshow(response) pylab.title("response") pylab.show(block=True) # end "if not first frame" # get subwindow at current estimated target position, # to train classifer x = get_subwindow(im, pos, sz, cos_window) # Kernel Regularized Least-Squares, # calculate alphas (in Fourier domain) k = dense_gauss_kernel(sigma, x) new_alphaf = pylab.divide(yf, (pylab.fft2(k) + lambda_value)) # Eq. 7 new_z = x if is_first_frame: #first frame, train with a single image alphaf = new_alphaf z = x else: # subsequent frames, interpolate model f = interpolation_factor alphaf = (1 - f) * alphaf + f * new_alphaf z = (1 - f) * z + f * new_z # end "first frame or not" # save position and calculate FPS positions[frame, :] = pos total_time += time.time() - start_time # visualization plot_tracking(frame, pos, target_sz, im, ground_truth) # end of "for each image in video" if should_resize_image: positions = positions * 2 print("Frames-per-second:", len(img_files) / total_time) title = os.path.basename(os.path.normpath(input_video_path)) if len(ground_truth) > 0: # show the precisions plot show_precision(positions, ground_truth, video_path, title) return
def update(self, new_img): ''' :param new_img: new frame should be normalized, for tracker_status estimating the rect_snr :return: ''' self.canvas = new_img.copy() self.trackNo += 1 # get subwindow at current estimated target position, to train classifier x = self.get_subwindow(new_img, self.pos, self.window_sz, self.cos_window) # calculate response of the classifier at all locations k = self.dense_gauss_kernel(self.sigma, x, self.z) kf = pylab.fft2(k) alphaf_kf = pylab.multiply(self.alphaf, kf) response = pylab.real(pylab.ifft2(alphaf_kf)) # Eq. 9 self.response = response self.responsePeak = np.max(response) # target location is at the maximum response row, col = pylab.unravel_index(response.argmax(), response.shape) #roi rect's topleft point add [row, col] self.tly, self.tlx = self.pos - pylab.floor(self.window_sz / 2) #here the pos is not given to self.pos at once, we need to check the psr first. #if it above the threashhold(default is 5), self.pos = pos. pos = np.array([self.tly, self.tlx]) + np.array([row, col]) #Noting, for pos(cy,cx)! for cv2.rect rect(x,y,w,h)! rect = pylab.array([ pos[1] - self.target_sz[1] / 2, pos[0] - self.target_sz[0] / 2, self.target_sz[1], self.target_sz[0] ]) rect = rect.astype(np.int) self.rect = rect self.psr, self.trkStatus = self.tracker_status(col, row, response, rect, new_img) self.pos = pos # #bad quality tracking results # if self.psr <= 5 and self.trackNo >=5: # # computing offset based on the last 4 frame's obj_bbox'center. # # using the average center shift as the (offset_x, offset_y) # dif_rect = [] # #for iter in [-1, -2, -3]: # for iter in [-1,-2,-3 ]: # dif_rect.append(np.array(self.FourRecentRects[iter]) - np.array(self.FourRecentRects[iter - 1])) # offset_rect = np.mean(dif_rect, 0) # offset = (offset_rect[0] + offset_rect[2] / 2, offset_rect[1] + offset_rect[3] / 2) # print('Tracker offset is activited (%d, %d)' % (offset[0], offset[1])) # self.pos = self.pos + np.array([ offset[1], offset[0] ]) # # rect = pylab.array([self.pos[1] - self.target_sz[1] / 2, self.pos[0] - self.target_sz[0] / 2, self.target_sz[1], self.target_sz[0]]) # # rect = rect.astype(np.int) # # self.FourRecentRects[self.trackNo % 4] = rect # else: # self.pos = pos # self.FourRecentRects[self.trackNo % 4] = rect #if self.psr <= 5: # # computing offset based on the last 4 frame's obj_bbox'center. # # using the average center shift as the (offset_x, offset_y) # # self.pos = self.pos + self.posOffset # print self # print('Tracker Default Offset is activited (%d, %d)' % (self.posOffset[1], self.posOffset[0])) # # # rect = pylab.array([self.pos[1] - self.target_sz[1] / 2, self.pos[0] - self.target_sz[0] / 2, self.target_sz[1], self.target_sz[0]]) # # rect = rect.astype(np.int) # # self.FourRecentRects[self.trackNo % 4] = rect #else: # self.pos = pos # self.FourRecentRects[self.trackNo % 4] = rect # if self.trackNo >= 5: # dif_rect = [] # # for iter in [-1, -2, -3]: # for iter in [-1, -2, -3]: # dif_rect.append(np.array(self.FourRecentRects[iter]) - np.array(self.FourRecentRects[iter - 1])) # offset_rect = np.mean(dif_rect, 0) # offset = (offset_rect[0] + offset_rect[2] / 2, offset_rect[1] + offset_rect[3] / 2) # self.posOffset = np.array([offset[1], offset[0]]) #print ('tracker\'status:res_win_ave,max,psr, rect_snr', self.trkStatus) # if debug == True: # if self.trackNo == 1: # #pylab.ion() # interactive mode on # self.fig, self.axes = pylab.subplots(ncols=3) # self.fig.show() # # We need to draw the canvas before we start animating... # self.fig.canvas.draw() # # k_img = self.axes[0].imshow(k,animated=True) # x_img = self.axes[1].imshow(x,animated=True) # r_img = self.axes[2].imshow(response,animated=True) # # self.subimgs = [k_img, x_img, r_img] # # Let's capture the background of the figure # self.backgrounds = [self.fig.canvas.copy_from_bbox(ax.bbox) for ax in self.axes] # # pylab.show(block=False) # else: # self.subimgs[0].set_data(k) # self.subimgs[1].set_data(x) # self.subimgs[2].set_data(response) # items = enumerate(zip(self.subimgs, self.axes, self.backgrounds), start=1) # for j, (subimg, ax, background) in items: # self.fig.canvas.restore_region(background) # ax.draw_artist(subimg) # self.fig.canvas.blit(ax.bbox) # pylab.show(block=False) #only update when tracker_status's psr is high if (self.psr > 10): #computing new_alphaf and observed x as z x = self.get_subwindow(new_img, self.pos, self.window_sz, self.cos_window) # Kernel Regularized Least-Squares, calculate alphas (in Fourier domain) k = self.dense_gauss_kernel(self.sigma, x) new_alphaf = pylab.divide( self.yf, (pylab.fft2(k) + self.lambda_value)) # Eq. 7 new_z = x # subsequent frames, interpolate model f = self.interpolation_factor self.alphaf = (1 - f) * self.alphaf + f * new_alphaf self.z = (1 - f) * self.z + f * new_z ok = 1 return ok, rect, self.psr, response
def track(input_video_path, show_tracking): """ 注意:以 f 结尾的变量表示频率域 """ # 目标周围的额外区域 padding = 1.0 # 空间带宽,与目标成比例 output_sigma_factor = 1 / float(16) # 高斯核带宽 sigma = 0.2 # 正则化系数 lambda_value = 1e-2 # 线性插值因子 interpolation_factor = 0.075 # 加载视频信息,包括待测试的每帧图片列表,首帧目标矩形框中心点坐标[y,x],矩形框高、宽一半的大小,是否进行图片缩放一半 # 每帧图片的 ground truth 信息,视频路径 info = load_video_info.load_video_info(input_video_path) img_files, pos, target_sz, should_resize_image, ground_truth, video_path = info # 把填充考虑进去,定义为窗口大小。 sz = pylab.floor(target_sz * (1 + padding)) # 计算想要的高斯形状的输出,其中带宽正比于目标矩形框大小 output_sigma = pylab.sqrt(pylab.prod(target_sz)) * output_sigma_factor # 平移目标矩形框的高度,以中心点为圆点,得到高度坐标列表 # 平移目标矩形框的宽度,以中心点为圆点,得到宽度坐标列表 grid_y = pylab.arange(sz[0]) - pylab.floor(sz[0] / 2) grid_x = pylab.arange(sz[1]) - pylab.floor(sz[1] / 2) # 把坐标列表边长坐标矩阵,即对二维平面范围内的区域进行网格划分 rs, cs = pylab.meshgrid(grid_x, grid_y) # 论文中公式 (19),计算得到 [0, 1] 值,越靠近中心点值越大,反之越小 y = pylab.exp((-0.5 / output_sigma ** 2) * (rs ** 2 + cs ** 2)) # 计算二维离散傅里叶变换 yf = pylab.fft2(y) # 首先计算矩形框高(某一个整数值)的 Hanning 窗(加权的余弦窗),其次计算矩形框宽的 Hanning 窗 # 最后计算两个向量的外积得到矩形框的余弦窗 cos_window = pylab.outer(pylab.hanning(sz[0]), pylab.hanning(sz[1])) # 计算 FPS total_time = 0 # to calculate FPS # 计算精度值 positions = pylab.zeros((len(img_files), 2)) # to calculate precision # global z, response plot_tracking.z = None alphaf = None plot_tracking.response = None # 依次访问图像从图像名列表中 for frame, image_filename in enumerate(img_files): if (frame % 10) == 0: print("Processing frame", frame) # 读取图像 image_path = os.path.join(video_path, image_filename) im = pylab.imread(image_path) # 如果图像是彩色图像,则转化为灰度图像 if len(im.shape) == 3 and im.shape[2] > 1: im = rgb2gray.rgb2gray(im) # 如果需要进行图像缩放,则缩放为原来一半 if should_resize_image: im = np.array(Image.fromarray(im).resize((int(im.shape[0] / 2), int(im.shape[1] / 2)))) # 开始计时 start_time = time.time() # 提取并预处理子窗口,采用余弦子窗口 x = get_subwindow.get_subwindow(im, pos, sz, cos_window) is_first_frame = (frame == 0) # 不过不是第一帧,则计算分类器的响应 if not is_first_frame: # 计算分类器在所有位置上的相应 k = dense_gauss_kernel.dense_gauss_kernel(sigma, x, plot_tracking.z) kf = pylab.fft2(k) alphaf_kf = pylab.multiply(alphaf, kf) plot_tracking.response = pylab.real(pylab.ifft2(alphaf_kf)) # Eq. 9 # 最大响应就是目标位置 r = plot_tracking.response row, col = pylab.unravel_index(r.argmax(), r.shape) pos = pos - pylab.floor(sz / 2) + [row, col] if debug: print("Frame ==", frame) print("Max response", r.max(), "at", [row, col]) pylab.figure() pylab.imshow(cos_window) pylab.title("cos_window") pylab.figure() pylab.imshow(x) pylab.title("x") pylab.figure() pylab.imshow(plot_tracking.response) pylab.title("response") pylab.show(block=True) # end "if not first frame" # 获取目标位置的余弦窗口,用于训练分类器 x = get_subwindow.get_subwindow(im, pos, sz, cos_window) # kernel 最小方差正则化,在傅里叶域计算参数 ALPHA k = dense_gauss_kernel.dense_gauss_kernel(sigma, x) new_alphaf = pylab.divide(yf, (pylab.fft2(k) + lambda_value)) # Eq. 7 new_z = x if is_first_frame: # 对于第一帧,训练单张图片 alphaf = new_alphaf plot_tracking.z = x else: # 对于后续帧,进行模型参数插值 f = interpolation_factor alphaf = (1 - f) * alphaf + f * new_alphaf plot_tracking.z = (1 - f) * plot_tracking.z + f * new_z # 保持当前位置,并计算 FPS positions[frame, :] = pos total_time += time.time() - start_time # 可视化显示跟踪的结果 if show_tracking == "yes": plot_tracking.plot_tracking(frame, pos, target_sz, im, ground_truth) if should_resize_image: positions = positions * 2 print("Frames-per-second:", len(img_files) / total_time) title = os.path.basename(os.path.normpath(input_video_path)) if len(ground_truth) > 0: # 画出精确率图像 show_precision.show_precision(positions, ground_truth, title)
def da(self,z): d = self.rtc(z) M.divide(d,1+z, d) return d
def update(self, new_img): self.canvas = new_img.copy() self.trackNo +=1 res_max = 0. for scale_rate in self.scale_ratios: template_size = scale_rate * self.window_sz_new # get subwindow at current estimated target position, to train classifer x = self.get_subwindow(new_img, self.pos_list[-1], template_size) # calculate response of the classifier at all locations k = self.dense_gauss_kernel(self.sigma, x, self.z) kf = pylab.fft2(k) alphaf_kf = pylab.multiply(self.alphaf, kf) response = pylab.real(pylab.ifft2(alphaf_kf)) # Eq. 9 # target location is at the maximum response r = response row, col = pylab.unravel_index(r.argmax(), r.shape) if res_max< np.max(r): res_row = int(row*scale_rate) res_col = int(col*scale_rate) self.window_sz_new = template_size self.target_sz = self.target_sz*scale_rate res_ave, res_max, self.psr = self.response_win_ave_max(response, col, row, winsize=12) self.scale_rate = scale_rate #roi rect's topleft point add [row, col] pos = self.pos_list[-1] - pylab.floor(self.window_sz_new / 2) + [res_row, res_col] rect = pylab.array([pos[1] - self.target_sz[1] / 2, pos[0] - self.target_sz[0] / 2, self.target_sz[1], self.target_sz[0]]) rect = rect.astype(np.int) #print (self.target_sz, self.psr, self.scale_rate) if debug: if self.trackNo == 1: #pylab.ion() # interactive mode on self.fig, self.axes = pylab.subplots(ncols=3) self.fig.show() # We need to draw the canvas before we start animating... self.fig.canvas.draw() k_img = self.axes[0].imshow(k,animated=True) x_img = self.axes[1].imshow(x,animated=True) r_img = self.axes[2].imshow(response,animated=True) self.subimgs = [k_img, x_img, r_img] # Let's capture the background of the figure self.backgrounds = [self.fig.canvas.copy_from_bbox(ax.bbox) for ax in self.axes] # tracking_rectangle = pylab.Rectangle((0, 0), 0, 0) # tracking_rectangle.set_color((1, 0, 0, 0.5)) # tracking_figure_axes.add_patch(tracking_rectangle) # # gt_point = pylab.Circle((0, 0), radius=5) # gt_point.set_color((0, 0, 1, 0.5)) # tracking_figure_axes.add_patch(gt_point) # tracking_figure_title = tracking_figure.suptitle("") pylab.show(block=False) #self.fig.show() else: self.subimgs[0].set_data(k) self.subimgs[1].set_data(x) self.subimgs[2].set_data(response) items = enumerate(zip(self.subimgs, self.axes, self.backgrounds), start=1) for j, (subimg, ax, background) in items: self.fig.canvas.restore_region(background) ax.draw_artist(subimg) self.fig.canvas.blit(ax.bbox) pylab.show(block=False) if self.psr > 10: #computing new_alphaf and observed x as z x = self.get_subwindow(new_img, pos, self.window_sz_new) # Kernel Regularized Least-Squares, calculate alphas (in Fourier domain) k = self.dense_gauss_kernel(self.sigma, x) new_alphaf = pylab.divide(self.yf, (pylab.fft2(k) + self.lambda_value)) # Eq. 7 new_z = x # subsequent frames, interpolate model f = self.interpolation_factor self.alphaf = (1 - f) * self.alphaf + f * new_alphaf self.z = (1 - f) * self.z + f * new_z self.roi_list.append(self.get_imageROI(new_img, rect)) self.pos_list.append(pos) self.rect_list.append(rect) ok = 1 return ok, rect, self.psr
def track(descriptor): global options desc_channel_count = descriptor.initialize(options.use_gpu) roi = loader.track_bounding_box_from_first_frame() roi = [ roi[0] + roi[2] / 2, roi[1] + roi[3] / 2, roi[2], roi[3], roi[2] * (1 + kcf_params.padding), roi[3] * (1 + kcf_params.padding) ] output_sigma = pylab.sqrt(pylab.prod([roi[3], roi[2] ])) * kcf_params.output_sigma_factor avg_count = 0 global cos_window cos_window = None template = [None for i in range(desc_channel_count)] alpha_f = [None for i in range(desc_channel_count)] response = [None for i in range(desc_channel_count)] yf = None track_time = 0 full_track_time = time.time() while loader.has_next_frame(): im = loader.next_frame() if (loader.frame_number() % 10) == 0: print("Processing frame {}".format(loader.frame_number())) start_time = time.time() is_first_frame = loader.frame_number() == 0 cropped = get_subwindow(im, roi) channels = descriptor.describe(cropped) subwindow = apply_cos_window(channels) subwindow = crop(subwindow) dmv = None if is_first_frame: grid_y = pylab.arange(subwindow.shape[1]) - pylab.floor( subwindow.shape[1] / 2) grid_x = pylab.arange(subwindow.shape[2]) - pylab.floor( subwindow.shape[2] / 2) rs, cs = pylab.meshgrid(grid_x, grid_y) y = pylab.exp(-0.5 / output_sigma**2 * (rs**2 + cs**2)) yf = pylab.fft2(y) else: for i in range(0, subwindow.shape[0]): channel = subwindow[i, :, :] # calculate response of the classifier at all locations k = dense_gauss_kernel(kcf_params.sigma, channel, template[i]) kf = pylab.fft2(k) alphaf_kf = pylab.multiply(alpha_f[i], kf) response[i] = pylab.real(pylab.ifft2(alphaf_kf)) # Eq. 9 # argmax = response[i].argmax() # # if response[i].item(argmax) != 0: # tmp = pylab.unravel_index(argmax, response[i].shape) # if value < response[i][tmp[0],tmp[1]]: # avg_x = tmp[1] # avg_y = tmp[0] # avg_count = 1 # value = response[i][tmp[0],tmp[1]] # chosen_i = i anchor = torch.tensor(channels[:, channels.shape[1] / 2, channels.shape[2] / 2]).unsqueeze(0) points = torch.tensor(response).view(channels.shape[0], -1).t() dmv = distance_matrix_vector(anchor, points).view(channels.shape[1], channels.shape[2]) argmax = np.array(dmv).argmax() tmp = pylab.unravel_index(argmax, subwindow.shape[1:]) moved_by = [ float(tmp[0]) - float(subwindow.shape[1]) / 2, float(tmp[1]) - float(subwindow.shape[2]) / 2 ] roi = descriptor.update_roi(roi, moved_by) cropped = get_subwindow(im, roi) channels = descriptor.describe(cropped) subwindow = apply_cos_window(channels) subwindow = crop(subwindow) for i in range(0, subwindow.shape[0]): channel = subwindow[i, :, :] k = dense_gauss_kernel(kcf_params.sigma, channel) new_alpha_f = pylab.divide( yf, (pylab.fft2(k) + kcf_params.lambda_value)) # Eq. 7 new_template = channel if is_first_frame: alpha_f[i] = new_alpha_f template[i] = new_template else: f = kcf_params.interpolation_factor alpha_f[i] = (1 - f) * alpha_f[i] + f * new_alpha_f template[i] = (1 - f) * template[i] + f * new_template track_time += time.time() - start_time results.log_tracked(im, roi, False, template[0], dmv) # end of "for each image in video" results.log_meta("speed.frames_tracked", loader.frame_number()) results.log_meta("speed.track_no_io_time", str(track_time) + "s") results.log_meta("speed.track_no_io_fps", loader.frame_number() / track_time) results.log_meta("speed.track_no_init_time", str(time.time() - full_track_time) + "s") results.show_precision() return