def track(self, image): test_patch = utils.get_subwindow(image, self.pos, self.sz, scale_factor=self.currentScaleFactor) hog_feature_t = pyhog.features_pedro(test_patch / 255., 1) hog_feature_t = np.lib.pad(hog_feature_t, ((1, 1), (1, 1), (0, 0)), 'edge') xt = np.multiply(hog_feature_t, self.cos_window[:, :, None]) xtf = np.fft.fft2(xt, axes=(0, 1)) response = np.real(np.fft.ifft2(np.divide(np.sum(np.multiply(self.x_num, xtf), axis=2), (self.x_den + self.lamda)))) v_centre, h_centre = np.unravel_index(response.argmax(), response.shape) vert_delta, horiz_delta = \ [(v_centre - response.shape[0] / 2) * self.currentScaleFactor, (h_centre - response.shape[1] / 2) * self.currentScaleFactor] self.pos = [self.pos[0] + vert_delta, self.pos[1] + horiz_delta] st = utils.get_scale_subwindow(image, self.pos, self.base_target_size, self.currentScaleFactor * self.scaleSizeFactors, self.scale_window, self.scale_model_sz) stf = np.fft.fftn(st, axes=[0]) scale_reponse = np.real(np.fft.ifftn(np.sum(np.divide(np.multiply(self.s_num, stf), (self.s_den[:, None] + self.lamda_scale)), axis=1))) recovered_scale = np.argmax(scale_reponse) self.currentScaleFactor = self.currentScaleFactor * self.scaleFactors[recovered_scale] if self.currentScaleFactor < self.min_scale_factor: self.currentScaleFactor = self.min_scale_factor elif self.currentScaleFactor > self.max_scale_factor: self.currentScaleFactor = self.max_scale_factor # update update_patch = utils.get_subwindow(image, self.pos, self.sz, scale_factor=self.currentScaleFactor) hog_feature_l = pyhog.features_pedro(update_patch / 255., 1) hog_feature_l = np.lib.pad(hog_feature_l, ((1, 1), (1, 1), (0, 0)), 'edge') xl = np.multiply(hog_feature_l, self.cos_window[:, :, None]) xlf = np.fft.fft2(xl, axes=(0, 1)) new_x_num = np.multiply(self.yf[:, :, None], np.conj(xlf)) new_x_den = np.real(np.sum(np.multiply(xlf, np.conj(xlf)), axis=2)) sl = utils.get_scale_subwindow(image, self.pos, self.base_target_size, self.currentScaleFactor * self.scaleSizeFactors, self.scale_window, self.scale_model_sz) slf = np.fft.fftn(sl, axes=[0]) new_s_num = np.multiply(self.ysf[:, None], np.conj(slf)) new_s_den = np.real(np.sum(np.multiply(slf, np.conj(slf)), axis=1)) self.x_num = (1 - self.interp_factor) * self.x_num + self.interp_factor * new_x_num self.x_den = (1 - self.interp_factor) * self.x_den + self.interp_factor * new_x_den self.s_num = (1 - self.interp_factor) * self.s_num + self.interp_factor * new_s_num self.s_den = (1 - self.interp_factor) * self.s_den + self.interp_factor * new_s_den self.target_size = self.base_target_size * self.currentScaleFactor return vot.Rectangle(self.pos[1] - self.target_size[1] / 2, self.pos[0] - self.target_size[0] / 2, self.target_size[1], self.target_size[0] )
def __init__(self, image, region): self.target_size = np.array([region.height, region.width]) self.pos = [region.y + region.height / 2, region.x + region.width / 2] padding = 2.5 # extra area surrounding the target self.patch_size = np.floor(self.target_size * (1 + padding)) img_crop = utils.get_subwindow(image, self.pos, self.patch_size) spatial_bandwidth_sigma_factor = 1 / float(16) output_sigma = np.sqrt(np.prod(self.target_size)) * spatial_bandwidth_sigma_factor grid_y = np.arange(np.floor(self.patch_size[0])) - np.floor(self.patch_size[0] / 2) grid_x = np.arange(np.floor(self.patch_size[1])) - np.floor(self.patch_size[1] / 2) rs, cs = np.meshgrid(grid_x, grid_y) y = np.exp(-0.5 / output_sigma ** 2 * (rs ** 2 + cs ** 2)) self.cos_window = np.outer(np.hanning(y.shape[0]), np.hanning(y.shape[1])) img_colour = img_crop - img_crop.mean() # Get training image patch x self.x = np.multiply(img_colour, self.cos_window[:, :, None]) # FFT Transformation # First transform y yf = np.fft.fft2(y, axes=(0, 1)) # Then transfrom x self.xf = np.fft.fft2(self.x, axes=(0, 1)) self.feature_bandwidth_sigma = 0.2 k = utils.dense_gauss_kernel(self.feature_bandwidth_sigma, self.xf, self.x) lambda_value = 1e-4 self.alphaf = np.divide(yf, np.fft.fft2(k, axes=(0, 1)) + lambda_value)
def track(self, image): test_crop = utils.get_subwindow(image, self.pos, self.patch_size) z = np.multiply(test_crop - test_crop.mean(), self.cos_window[:, :, None]) zf = np.fft.fft2(z, axes=(0, 1)) k_test = utils.dense_gauss_kernel(self.feature_bandwidth_sigma, self.xf, self.x, zf, z) kf_test = np.fft.fft2(k_test, axes=(0, 1)) response = np.real(np.fft.ifft2(np.multiply(self.alphaf, kf_test))) # Max position in response map v_centre, h_centre = np.unravel_index(response.argmax(), response.shape) vert_delta, horiz_delta = [ v_centre - response.shape[0] / 2, h_centre - response.shape[1] / 2 ] # Predicted position self.pos = [self.pos[0] + vert_delta, self.pos[1] + horiz_delta] self.bbox = [ self.pos[1] - self.target_size[1] / 2, self.pos[0] - self.target_size[0] / 2, self.target_size[1], self.target_size[0] ] self._results.append(self.bbox) return Rectangle(self.pos[1] - self.target_size[1] / 2, self.pos[0] - self.target_size[0] / 2, self.target_size[1], self.target_size[0])
def track(self, image): # ---------------------------------------track--------------------------------- # test_patch = utils.get_subwindow(image, self.pos, self.sz) hog_feature_t = pyhog.features_pedro(test_patch / 255., 1) hog_feature_t = np.lib.pad(hog_feature_t, ((1, 1), (1, 1), (0, 0)), 'edge') xt = np.multiply(hog_feature_t, self.cos_window[:, :, None]) xtf = np.fft.fft2(xt, axes=(0, 1)) #计算响应,直接多通道叠加 response = np.real( np.fft.ifft2( np.divide(np.sum(np.multiply(self.x_num, xtf), axis=2), (self.x_den + self.lamda)))) #找响应最大值 v_centre, h_centre = np.unravel_index(response.argmax(), response.shape) vert_delta, horiz_delta = \ [(v_centre - response.shape[0] / 2), (h_centre - response.shape[1] / 2)] #新的位置 self.pos = [self.pos[0] + vert_delta, self.pos[1] + horiz_delta] # ---------------------------------------update--------------------------------- # update_patch = utils.get_subwindow(image, self.pos, self.sz) hog_feature_l = pyhog.features_pedro(update_patch / 255., 1) hog_feature_l = np.lib.pad(hog_feature_l, ((1, 1), (1, 1), (0, 0)), 'edge') xl = np.multiply(hog_feature_l, self.cos_window[:, :, None]) xlf = np.fft.fft2(xl, axes=(0, 1)) #更新位置滤波器 new_x_num = np.multiply(self.yf[:, :, None], np.conj(xlf)) new_x_den = np.real(np.sum(np.multiply(xlf, np.conj(xlf)), axis=2)) #滤波器学习 self.x_num = (1 - self.interp_factor ) * self.x_num + self.interp_factor * new_x_num self.x_den = (1 - self.interp_factor ) * self.x_den + self.interp_factor * new_x_den self.target_size = self.base_target_size return vot.Rectangle(self.pos[1] - self.target_size[1] / 2, self.pos[0] - self.target_size[0] / 2, self.target_size[1], self.target_size[0])
def __update(self): patch = get_subwindow(self.img, self.pos, self.window_size) xf = fft2( get_feature(patch, self.feature, self.cell_size, self.cos_window)) alphaf = self.__train(xf) self.model_xf = ( 1 - self.interp_factor) * self.model_xf + self.interp_factor * xf self.model_alphaf = ( 1 - self.interp_factor ) * self.model_alphaf + self.interp_factor * alphaf
def get_scale_subwindow(im, pos, base_target_size, scaleFactors, scale_window, scale_model_sz): nScales = len(scaleFactors) out = [] for i in range(nScales): patch_sz = np.floor(base_target_size * scaleFactors[i]) scale_patch = get_subwindow(im, pos, patch_sz) im_patch_resized = transform.resize(scale_patch, scale_model_sz, mode='reflect') temp_hog = pyhog.features_pedro(im_patch_resized / 255., 4) out.append(np.multiply(temp_hog.flatten(), scale_window[i])) return np.asarray(out)
def track(self, image): index = 0 for scale_factor in self.scale_factors: test = utils.get_subwindow(image, self.pos, self.sz, self.scaling * scale_factor) test = transform.resize(test, (224, 224)) test = (test - imgMean) / imgStd test = np.transpose(test, (2, 0, 1)) feature = model( Variable(torch.from_numpy(test[None, :, :, :]).float())) feature = feature.data[0].numpy().transpose((1, 2, 0)) xt = ndimage.zoom( feature, (float(self.cos_window.shape[0]) / feature.shape[0], float(self.cos_window.shape[1]) / feature.shape[1], 1), order=1) xt = np.multiply(xt, self.cos_window[:, :, None]) xtf = np.fft.fft2(xt, axes=(0, 1)) response = np.real( np.fft.ifft2( np.divide(np.sum(np.multiply(self.x_num, xtf), axis=2), (self.x_den + self.lamda)))) if index == 0: max = response.argmax() response_final = response scale_factor_final = scale_factor index += 1 if response.argmax() > max: max = response.argmax() response_final = response scale_factor_final = scale_factor self.scaling *= scale_factor_final v_centre, h_centre = np.unravel_index(response_final.argmax(), response_final.shape) vert_delta, horiz_delta = \ [(v_centre - response_final.shape[0] / 2) * self.scaling * self.cell_size, (h_centre - response_final.shape[1] / 2) * self.scaling * self.cell_size] self.pos = [self.pos[0] + vert_delta, self.pos[1] + horiz_delta] - \ self.target_size * self.scaling / 2. return vot.Rectangle(self.pos[1], self.pos[0], self.target_size[1] * self.scaling, self.target_size[0] * self.scaling)
def __init__(self, image, region): #调整位置滤波器输出的参数 output_sigma_factor = 1 / float(16) self.lamda = 1e-2 self.interp_factor = 0.025 #目标尺寸 self.target_size = np.array([region.height, region.width]) #目标中心位置 self.pos = [region.y + region.height / 2, region.x + region.width / 2] #初始目标大小 init_target_size = self.target_size #基本目标尺寸,是根据目标尺寸和当前的尺度变化因子决定的 self.base_target_size = self.target_size #image.shape[:2]是返回图像(矩阵)前两维的大小,如果是1就返回第一维 #省略就返回三个维度的大小 #此步骤返回padding区域的大小,另外涉及到了用类初始化参数并作为形参用于另一函数的方法 self.sz = utils.get_window_size(self.target_size, image.shape[:2], padding()) #位置滤波器和尺度滤波器的参数,理想的高斯响应里面的sigma output_sigma = np.sqrt(np.prod(self.target_size)) * output_sigma_factor #scale_sigma = np.sqrt(nScales) * scale_sigma_factor #通过在x,y方向生成网格,从而生成相应的理想高斯响应 #arange(n)生成一个序列0到n,如果是两个参数就是从m到n,如果是三个参数,第三个参数就是步长 grid_y = np.arange(np.floor(self.sz[0])) - np.floor(self.sz[0] / 2) grid_x = np.arange(np.floor(self.sz[1])) - np.floor(self.sz[1] / 2) rs, cs = np.meshgrid(grid_x, grid_y) y = np.exp(-0.5 / output_sigma**2 * (rs**2 + cs**2)) #求位置高斯响应的傅里叶变换 self.yf = np.fft.fft2(y, axes=(0, 1)) #获取特征图,参数为目标中心位置和尺寸,特征为hog feature_map = utils.get_subwindow(image, self.pos, self.sz, feature='hog') #对特征图使用余弦窗,并进行傅里叶变换 self.cos_window = np.outer(np.hanning(y.shape[0]), np.hanning(y.shape[1])) x_hog = np.multiply(feature_map, self.cos_window[:, :, None]) xf = np.fft.fft2(x_hog, axes=(0, 1)) self.x_num = np.multiply(self.yf[:, :, None], np.conj(xf)) self.x_den = np.sum(np.multiply(xf, np.conj(xf)), axis=2)
def dectect(self, img): # 这个其实和下面update是一样的,不知道为什么源代码会分成两部分写 self.original_img = img self.img = img if self.resize: self.img = cv2.resize(self.img, self.img_size[::-1]) if self.feature == 'gray': self.img = cv2.cvtColor( self.img, cv2.COLOR_BGR2GRAY) # translate image from rgb to gray patch = get_subwindow(self.img, self.pos, self.window_size) zf = fft2( get_feature(patch, self.feature, self.cell_size, self.cos_window)) if self.kernel_type == "gaussian": kzf = gaussian_correlation(zf, self.model_xf, self.kernel_sigma) elif self.kernel_type == "polynomial": kzf = polynomial_correlation(zf, self.model_xf, self.kernel_poly_a, self.kernel_poly_b) pass response = np.real(ifft2(self.model_alphaf * kzf)) cv2.imshow("response", response) cv2.waitKey(10) [vert_delta, horiz_delta] = np.unravel_index(response.argmax(), response.shape) # print("[vert_delta, horiz_delta] = " + str([vert_delta, horiz_delta])) if vert_delta > zf.shape[0] / 2: vert_delta = vert_delta - zf.shape[0] if horiz_delta > zf.shape[1] / 2: horiz_delta = horiz_delta - zf.shape[1] # print("after handled [vert_delta, horiz_delta] = " + str([vert_delta, horiz_delta])) self.pos = int(self.pos[0] + self.cell_size * (vert_delta)), int(self.pos[1] + self.cell_size * (horiz_delta)) self.__show_image() self.__update() return self.original_pos, self.original_target_size
def __init__(self, image, region): padding = 1.5 self.lamda = 1e-4 output_sigma_factor = 0.1 self.cell_size = 4 self.scaling = 1 self.scale_factors = [1.0, 1.02, 0.98] self.target_size = np.array([region.height, region.width]) self.pos = [region.y + region.height / 2, region.x + region.width / 2] self.sz = np.floor(self.target_size * (1 + padding)) l1_patch_num = np.floor(self.sz / self.cell_size) output_sigma = np.sqrt(np.prod( self.target_size)) * output_sigma_factor / self.cell_size grid_y = np.arange(np.floor(l1_patch_num[0])) - np.floor( l1_patch_num[0] / 2) grid_x = np.arange(np.floor(l1_patch_num[1])) - np.floor( l1_patch_num[1] / 2) rs, cs = np.meshgrid(grid_x, grid_y) y = np.exp(-0.5 / output_sigma**2 * (rs**2 + cs**2)) yf = np.fft.fft2(y, axes=(0, 1)) self.cos_window = np.outer(np.hanning(yf.shape[0]), np.hanning(yf.shape[1])) img = utils.get_subwindow(image, self.pos, self.sz) img = transform.resize(img, (224, 224)) img = (img - imgMean) / imgStd img = np.transpose(img, (2, 0, 1)) feature = model(Variable(torch.from_numpy(img[None, :, :, :]).float())) feature = feature.data[0].numpy().transpose((1, 2, 0)) x = ndimage.zoom( feature, (float(self.cos_window.shape[0]) / feature.shape[0], float(self.cos_window.shape[1]) / feature.shape[1], 1), order=1) x = np.multiply(x, self.cos_window[:, :, None]) xf = np.fft.fft2(x, axes=(0, 1)) self.x_num = np.multiply(yf[:, :, None], np.conj(xf)) self.x_den = np.real(np.sum(np.multiply(xf, np.conj(xf)), axis=2))
def get_perception(self, world): s1 = minsight + int(self.s1 * (self.speed / max_speed)) # sight distance s2 = minsight + int(self.s2 * (self.speed / max_speed)) # sight distance off1, off2 = self.pos[::-1] inputs = [] real_world = np.array(world) for i in range(0, 7): angle = (i / 6) * pi - self.orient x1, x2 = (int(s1 * cos(angle)), int(s2 * sin(angle))) #print('they sould be around<<50',off1+x1,dim1,off2+x2,dim2) perc = get_subwindow(real_world, (off1 + x1, off2 + x2), deltax=perc_square_edge, deltay=perc_square_edge) #cv2.rectangle(world, (off1+x1-dim1-1,off2+ x2-dim2-1), (off1+x1+dim1,off2+ x2+dim2),(0,0,255)) height, width = perc.shape[:2] #print(perc.shape[:2]) res = cv2.resize(perc, (3 * width, 3 * height), interpolation=cv2.INTER_CUBIC) inputs.append(res) self.percs = inputs #if np.array(inputs).shape != (7,30,30,3): #print(np.array(inputs)) #print(np.array(inputs).shape) self.to_feed = np.hstack([ (1 / (255 * 3)) * np.mean( #the 7 inputs np.sum(np.array(inputs), axis=3) #to sum the channels , axis=(1, 2)), #the mean of the square self.speed, self.orient, 1 ] #bias ) #.reshape(8,1)#reshaping return inputs
def get_perception(self, world): s1 = minsight s2 = minsight off1, off2 = map(int, self.pos[::-1]) inputs = [] real_world = np.array(world) for i in range(0, 8): angle = (i / 4) * pi - self.orient x1, x2 = (int(s1 * cos(angle)), int(s2 * sin(angle))) #print('they sould be around<<50',off1+x1,dim1,off2+x2,dim2) perc = get_subwindow(real_world, (off1 + x2, off2 + x1), deltax=perc_square_edge, deltay=perc_square_edge) #cv2.rectangle(world, (off1+x1-dim1-1,off2+ x2-dim2-1), (off1+x1+dim1,off2+ x2+dim2),(0,0,255)) height, width = perc.shape[:2] #print(height,width,(off1+x2,off2+x1)) res = cv2.resize(perc, (3 * width, 3 * height), interpolation=cv2.INTER_CUBIC) inputs.append(res) self.percs = inputs #if np.array(inputs).shape != (7,30,30,3): #print(np.array(inputs)) #print(np.array(inputs).shape) self.to_feed = np.hstack([ np.mean( #the 8 inputs np.max(np.array(inputs) != (255, 255, 255), axis=3) #compare channels to somethig , axis=(1, 2)), #the mean of the square #self.speed,self.orient, 1 ] #bias ) #.reshape(8,1)#reshaping #print(len(inputs)) return inputs
def getTemplateFeature(frame, bbox): # Crop an examplar image path center_pos = np.array( [bbox[0] + (bbox[2] - 1) / 2, bbox[1] + (bbox[3] - 1) / 2]) size = np.array([bbox[2], bbox[3]]) # calculate z crop size w_z = size[0] + CONTEXT_AMOUNT * np.sum(size) h_z = size[1] + CONTEXT_AMOUNT * np.sum(size) s_z = round(np.sqrt(w_z * h_z)) # calculate channle average channel_average = np.mean(frame, axis=(0, 1)) # get crop z_crop = get_subwindow(frame, center_pos, EXAMPLAR_SIZE, s_z, channel_average) z_crop = torch.from_numpy(z_crop) z_crop = z_crop.cuda() return tracker.templateFeature(z_crop)
def run_simulation(fishes=None, foods=None, n_pop=10, max_time=3000, verbose=True, size_w=(600, 900, 3), initial_simulation=False, epoch=None, record=False, ALIVE=False, folder='frames', num_food=None): global refPt #WORLD definition size_w = size_w size_w1, size_w2 = size_w[:2][::-1] #the world that the food percieve, it contains only the fishes food_world = (np.zeros(size_w) + (255, 255, 255)).astype(np.uint8) #the world that the fishes percieve, it contains only the food fish_world = 255 * np.ones(size_w, dtype=np.uint8) if initial_simulation and verbose: #tutorial cm_br = command_bridge(f_size=0.7, commands=[ 'd debug', 'e energy', 'click to focus on one', 'b to focus the best' 'esc to remove focus', 'h increse speed', 'k decrese speed', 'esc again to exit' ]) cv2.imshow('beginning', merge_images_h([cm_br])) res = cv2.waitKey(0) cv2.destroyAllWindows() # Mouse callback #refPt = (10,10) if verbose and not record: cv2.namedWindow("Simulation") cv2.setMouseCallback("Simulation", dclick) time = 0 max_radius = 40 n_pop = n_pop #30 s1 = 100 s2 = 100 fishes_pos_orient_color = [ ( rand.randint(s2, size_w2 - s2), #x rand.randint(s1, size_w1 - s1), #y 2 * pi * rand.random(), #orientation np.random.choice(range(256), size=3, replace=False) #,dtype=np.uint8)#color ) for i in range(n_pop) ] if fishes is None: fishes = [ Fish(orient=o, pos=(p1, p2), color=c) for p1, p2, o, c in fishes_pos_orient_color ] else: fishes = fishes #fishes = [Fish()] #,Fish(pos=(100,102),orient=pi*1)] if foods is None: if num_food is None: num_food = int(n_pop * 3 / 2) + 1 print(size_w1, size_w2) foods_pos_orient = np.array([ ( rand.randint(s2 + 1 + max_radius, size_w2 - s2 - 1 - max_radius), #0-900 rand.randint(s1 + 1 + max_radius, size_w1 - s1 - 1 - max_radius), #0-600 2 * pi * rand.random()) for i in range(num_food) ]) foods = [ AliveFood(pos=(p1, p2), orient=o) for p1, p2, o in foods_pos_orient ] else: foods = foods dead_foods = [] food_positions = np.array([f.pos for f in foods]) #.astype(np.uint8) # num_foods = [] alive = n_pop debug = False energy = False selected = None focus = False focus_best = focus food_focus = True food_selected = None while (time < max_time): #get focus on individual #subject to verbose if verbose: if refPt is not None: focus_best = False focus = True print('clicked on ', refPt[::-1]) all_dist = np.sqrt( np.sum((np.array( [f.pos + 1000 * (f.energy <= 0) for f in fishes]) - (refPt[::-1] + np.array([s1, s2])))**2, axis=1)) selected = fishes[np.argmin(all_dist)] refPt = None #food steps #create food #add food every 3 steps if (not ALIVE or False ) and time % 20 == 1: #and food_positions.shape[0]<3*alive: pos1, pos2, o = ( rand.randint(s2 + 1 + max_radius, size_w2 - s2 - 1 - max_radius), #0-900 rand.randint(s1 + 1 + max_radius, size_w1 - s1 - 1 - max_radius), #0-600 2 * pi * rand.random()) mind = int(rand.random()) if mind == 0 and len(dead_foods) > 0: pool = dead_foods else: pool = foods new_food = AliveFood(pos=(pos1, pos2), orient=o, mind=pool[rand.randint(0, len(pool) - 1)].mind) foods.append(new_food) #insert in array to calculate distancies food_positions = np.vstack([food_positions, [pos1, pos2]]) num_foods.append(len(food_positions)) #draw food fish_world = (np.zeros(size_w) + (255, 255, 255)).astype(np.uint8) for i, food in enumerate(foods): if ALIVE: food.get_perception(food_world) react = food.predict(food.to_feed) #if food==food_selected: #print(food.to_feed,react) food.turn(direction=react[0]) food.inc_speed(delta=react[1]) food.move(fish_world, dx=s1 - food.s1, dy=s2 - food.s2) food_positions[i] = food.pos food.outout = react food.draw(fish_world) food_world = (np.zeros(size_w) + (255, 255, 255)).astype(np.uint8) #reset food_world after that the fishes have been drawn s.t. the food can understand where they are # #get perceptions, calculate output # increase energy if eat somehtin for f in fishes: if f.energy > 0: #f.move(food_world,dx=s1-f.s1,dy=s2-f.s2) #getting perceptions f.get_perception(fish_world) #print(f.to_feed) react = f.predict(f.to_feed) f.outout = react f.turn(direction=react[0]) f.inc_speed(delta=react[1]) f.diversity_speed[int(np.sign(np.round(react[1], 1))) + 1] += 1 f.diversity_turn[int(np.sign(np.round(react[0], 1))) + 1] += 1 if food_positions.shape[0] > 0: all_dist = np.sqrt( np.sum((food_positions - f.pos)**2, axis=1)) closest_idx = np.argmin(all_dist) min_dist = all_dist[closest_idx] if (min_dist < 6): #print(min_dist) f.energy += 50 f.eaten += 1 dead_foods.append(foods.pop(closest_idx)) if len(foods) == 0: break food_positions = np.delete(food_positions, closest_idx, axis=0) #early stopping for bad individuals alive = len([f for f in fishes if f.energy > 0]) if alive <= 0 or len(foods) == 0: break #draw individuals for f in fishes: if f.energy > 0: f.draw(food_world) #res = merge_images_h([fish_world,food_world]) #subject to verbose if verbose: #print closest food ifdebug activated if debug and food_positions.shape[0] > 0: all_dist = np.sqrt( np.sum((food_positions - f.pos)**2, axis=1)) #for fp,d in zip(food_positions,all_dist): # print(fp,d,f.pos[::-1]) closest_idx = np.argmin(all_dist) min_dist = all_dist[closest_idx] fp = (int(food_positions[closest_idx][0]), int(food_positions[closest_idx][1])) cv2.circle(food_world, fp[::-1], 1, (0, 0, 255), 5) cv2.line(food_world, tuple(f.pos[::-1]), fp[::-1], (100, 30, 100), 1) res = np.minimum(fish_world, food_world) #print energy panel if energy: f_sorted = sorted(fishes, key=lambda f: f.energy, reverse=True) energies, colors = zip(*[('energy ' + str(round(f.energy, 2)), f.color) for f in f_sorted]) cb = command_bridge(f_size=0.7, commands=energies, colors=colors) res = merge_images_h([res, cb]) #print focused if verbose and food_focus: #find best food idx_best = np.argmax([f.lifetime for f in foods]) #select best food food_selected = foods[idx_best] #print squares for perception on the food_world+fishworld food_selected.print_perception(res) #get the focused area to show it focus_area = get_subwindow(res, pt=food_selected.pos[::-1], deltax=75, deltay=75) #draw a rectangle on it #cv2.rectangle(res,(int(food_selected.pos[1])-food_selected.s1, # int(food_selected.pos[0])-food_selected.s2), # (int(food_selected.pos[1])+food_selected.s1, # int(food_selected.pos[0])+food_selected.s2), # (244,0,0)) # plancia for the food cm_br = command_bridge( f_size=0.4, commands=[ 'Life Time ' + str(food_selected.lifetime), 'Pos ' + str(food_selected.pos), 'in' + str([round(e, 1) for e in food_selected.to_feed]), 'Out: turn ' + str(np.round(food_selected.outout[0], 2)) + 'Out: speed ' + str(np.round(food_selected.outout[1], 2)) ]) # mergeimages to show detail = merge_images_v(food_selected.percs + [focus_area, cm_br]) res = merge_images_h([res, detail]) if focus: if focus_best: idx_best = np.argmax([f.energy for f in fishes]) selected = fishes[idx_best] if selected.energy > 0: focus_area = get_subwindow(res, selected.pos[::-1], deltax=75, deltay=75) cm_br = command_bridge( f_size=0.4, commands=[ 'S ' + str(selected.speed), 'E ' + str(round(selected.energy, 1)), #'O '+str(round(selected.orient*180 / pi %360,0)) ] + ['in' + str([round(e, 1) for e in selected.to_feed])] + [ 'Out: turn ' + str(np.round(selected.outout[0], 2)) + ' Diversity ' + str(selected.diversity_turn), 'Out: speed ' + str(np.round(selected.outout[1], 2)) + ' Diversity ' + str(selected.diversity_speed), ]) selected.print_perception(res) detail = merge_images_v(selected.percs + [focus_area, cm_br]) res = merge_images_h([res, detail]) else: selected = None focus = False focus_best = focus for f in fishes: if f.energy > 0: f.move(food_world, dx=s1 - f.s1, dy=s2 - f.s2) #subject to verbose if verbose and not record: k = cv2.waitKey(33) # & 0xff print('keypressed', k) #81 left #82 up #84 down #83 right #32 space #100 d #101 e #27 esc if focus and selected != None: if k in [104, 107]: direct = -1 if k == 104 else 1 else: direct = 0 selected.turn(direction=direct) #for f in fishes: # if f!=selected: # f.turn(direction = rand.random()*2-1) if k == 98: if not focus_best: focus_best = True if not focus: focus = True else: focus = False focus_best = False #idx_best = np.argmax([f.energy for f in fishes]) #selected = fishes[idx_best] if k == 100: debug = not debug if k == 101: energy = not energy if k == 32: cv2.waitKey(0) #print('pressed',k) if k in [106, 108]: delta = -1 * (k - 107) print('increasing') if focus: selected.inc_speed(delta) else: for f in fishes: f.inc_speed(delta) if k == 27: if focus: focus = not focus focus_best = False else: break if verbose == 3: res = merge_images_v([ merge_images_h([ command_bridge(commands=(['Generation ' + str(epoch)])), command_bridge( commands=['Time left ' + str(max_time - time)]), command_bridge(commands=[ 'Avg speed ' + str(np.mean([f.speed for f in fishes if f.energy > 0])) ]), command_bridge(commands=(['Alive ' + str(alive)])) ]), res ]) if record: name = 'Generation_%03d' % epoch + '_time_%03d' % time + '.jpg' cv2.imwrite(folder + '/' + name, res) elif verbose: cv2.imshow('Simulation', res) '''if time ==0: paths = 255*np.zeros(food_world.shape) if time<300: for i in range(3): paths[:,:,i] += food_world[:,:,i]# np.minimum(food_world,food_world) cv2.imshow('path',merge_images_h([paths])) print(paths.shape)''' time += 1 '''if time %300 == 299: name = 'paths.jpg' paths = np.zeros(food_world.shape) break''' cv2.destroyAllWindows() if ALIVE: return num_foods, fishes, dead_foods + foods else: return num_foods, fishes
def __init__(self, image, region): output_sigma_factor = 1 / float(16) scale_sigma_factor = 1 / float(4) self.lamda = 1e-2 self.lamda_scale = 1e-2 self.interp_factor = 0.025 nScales = 33 # number of scale levels scale_model_factor = 1.0 scale_step = 1.02 # step of one scale level scale_model_max_area = 32 * 16 self.currentScaleFactor = 1.0 self._results = [] self.target_size = np.array([region.height, region.width]) self.pos = [region.y + region.height / 2, region.x + region.width / 2] init_target_size = self.target_size self.base_target_size = self.target_size / self.currentScaleFactor self.sz = utils.get_window_size(self.target_size, image.shape[:2], padding()) output_sigma = np.sqrt(np.prod(self.target_size)) * output_sigma_factor scale_sigma = np.sqrt(nScales) * scale_sigma_factor grid_y = np.arange(np.floor(self.sz[0])) - np.floor(self.sz[0] / 2) grid_x = np.arange(np.floor(self.sz[1])) - np.floor(self.sz[1] / 2) rs, cs = np.meshgrid(grid_x, grid_y) y = np.exp(-0.5 / output_sigma**2 * (rs**2 + cs**2)) # Gaussian shaped label for scale estimation ss = np.arange(nScales) - np.ceil(nScales / 2) ys = np.exp(-0.5 * (ss**2) / scale_sigma**2) self.scaleFactors = np.power(scale_step, -ss) self.yf = np.fft.fft2(y, axes=(0, 1)) self.ysf = np.fft.fft(ys) feature_map = utils.get_subwindow(image, self.pos, self.sz, feature='hog') self.cos_window = np.outer(np.hanning(y.shape[0]), np.hanning(y.shape[1])) x_hog = np.multiply(feature_map, self.cos_window[:, :, None]) xf = np.fft.fft2(x_hog, axes=(0, 1)) # scale search preprocess if nScales % 2 == 0: self.scale_window = np.hanning(nScales + 1) self.scale_window = self.scale_window[1:] else: self.scale_window = np.hanning(nScales) self.scaleSizeFactors = self.scaleFactors self.min_scale_factor = np.power( scale_step, np.ceil(np.log(5. / np.min(self.sz)) / np.log(scale_step))) self.max_scale_factor = np.power( scale_step, np.floor( np.log( np.min(np.divide(image.shape[:2], self.base_target_size))) / np.log(scale_step))) if scale_model_factor * scale_model_factor * np.prod( init_target_size) > scale_model_max_area: scale_model_factor = np.sqrt(scale_model_max_area / np.prod(init_target_size)) self.scale_model_sz = np.floor(init_target_size * scale_model_factor) s = utils.get_scale_subwindow( image, self.pos, self.base_target_size, self.currentScaleFactor * self.scaleSizeFactors, self.scale_window, self.scale_model_sz) sf = np.fft.fftn(s, axes=[0]) self.x_num = np.multiply(self.yf[:, :, None], np.conj(xf)) self.x_den = np.real(np.sum(np.multiply(xf, np.conj(xf)), axis=2)) self.s_num = np.multiply(self.ysf[:, None], np.conj(sf)) self.s_den = np.real(np.sum(np.multiply(sf, np.conj(sf)), axis=1))
def __init__(self, img, start_pos, target_size, padding=2.5, lamb=0.0001, output_sigma_factor=0.1, interp_factor=0.075, cell_size=1, feature='gray', resize=False, kernel={ 'kernel_type': 'gaussian', 'sigma': 0.2 }, showvideo=True): self.original_img = img self.img = img self.padding = padding self.lamb = lamb self.output_sigma_factor = output_sigma_factor self.interp_factor = interp_factor self.cell_size = cell_size self.feature = feature self.showvideo = showvideo self.original_pos = start_pos self.original_target_size = target_size self.pos = start_pos # the box's CENTER point coordinate,it is format as [y, x] self.target_size = target_size # the box's size , it is format as [h, w] self.base_size = target_size self.kernel_type = kernel['kernel_type'] if self.kernel_type == 'gaussian': self.kernel_sigma = kernel['sigma'] elif self.kernel_type == 'polynomial': self.kernel_poly_a = kernel['poly_a'] self.kernel_poly_b = kernel['poly_b'] self.resize = resize if np.sqrt(np.prod(self.target_size)) >= 150: print("resize image") self.resize = True if self.resize: print("image is resized") self.pos = tuple([int(ele / 2) for ele in self.pos]) self.target_size = tuple( [int(ele / 2) for ele in self.target_size]) self.img_size = (int(img.shape[0] / 2), int(img.shape[1] / 2)) img = cv2.resize(img, self.img_size[::-1]) # in opencv the img's size get from shape # the shape is format as (h,w,c), c is the chanel of image self.img_size = (img.shape[0], img.shape[1]) self.window_size = (int(self.target_size[0] * self.padding), int(self.target_size[1] * self.padding)) if self.feature == 'gray': img = cv2.cvtColor( img, cv2.COLOR_BGR2GRAY) # translate image from rgb to gray # 一些不会变的变量,如余弦窗,期望分布 output_sigma = np.sqrt(np.prod( self.target_size)) * self.output_sigma_factor / self.cell_size self.y = gaussian_shaped_labels( output_sigma, (int(self.window_size[0] / self.cell_size), int(self.window_size[1] / self.cell_size))) self.yf = fft2(self.y) self.cos_window = np.outer(np.hanning(self.yf.shape[0]), np.hanning(self.yf.shape[1])) # 初始化模型 patch = get_subwindow(img, self.pos, self.window_size) xf = fft2( get_feature(patch, self.feature, self.cell_size, self.cos_window)) self.model_xf = xf self.model_alphaf = self.__train(xf) self.__show_image()
def track(self, image): test = utils.get_subwindow(image, self.pos, self.sz, self.current_scale_factor) test = transform.resize(test, (224, 224)) test = (test - imgMean) / imgStd test = np.transpose(test, (2, 0, 1)) feature_ensemble = model( Variable(torch.from_numpy(test[None, :, :, :]).float()).cuda()) for i in range(numlayers): feature = feature_ensemble[i].data[0].cpu().numpy().transpose( (1, 2, 0)) xt = ndimage.zoom( feature, (float(self.cos_window.shape[0]) / feature.shape[0], float(self.cos_window.shape[1]) / feature.shape[1], 1), order=1) xt = np.multiply(xt, self.cos_window[:, :, None]) xtf = np.fft.fft2(xt, axes=(0, 1)) response = np.real( np.fft.ifft2( np.divide(np.sum(np.multiply(self.x_num[i], xtf), axis=2), (self.x_den[i] + self.lamda)))) * layerweights[i] if i == 0: response_final = response else: response_final = np.add(response_final, response) v_centre, h_centre = np.unravel_index(response_final.argmax(), response_final.shape) vert_delta, horiz_delta = \ [(v_centre - response_final.shape[0] / 2) * self.current_scale_factor * self.cell_size, (h_centre - response_final.shape[1] / 2) * self.current_scale_factor * self.cell_size] self.pos = [self.pos[0] + vert_delta, self.pos[1] + horiz_delta] st = utils.get_scale_subwindow( image, self.pos, self.target_size, self.current_scale_factor * self.scaleFactors, self.scale_window, self.scale_model_sz) stf = np.fft.fftn(st, axes=[0]) scale_reponse = np.real( np.fft.ifftn( np.sum(np.divide(np.multiply(self.s_num, stf), (self.s_den[:, None] + self.lamda)), axis=1))) recovered_scale = np.argmax(scale_reponse) self.current_scale_factor = self.current_scale_factor * self.scaleFactors[ recovered_scale] if self.current_scale_factor < self.min_scale_factor: self.current_scale_factor = self.min_scale_factor elif self.current_scale_factor > self.max_scale_factor: self.current_scale_factor = self.max_scale_factor # update update_patch = utils.get_subwindow( image, self.pos, self.sz, scale_factor=self.current_scale_factor) update_patch = transform.resize(update_patch, (224, 224)) update_patch = (update_patch - imgMean) / imgStd update_patch = np.transpose(update_patch, (2, 0, 1)) feature_ensemble = model( Variable(torch.from_numpy( update_patch[None, :, :, :]).float()).cuda()) for i in range(numlayers): feature = feature_ensemble[i].data[0].cpu().numpy().transpose( (1, 2, 0)) xl = ndimage.zoom( feature, (float(self.cos_window.shape[0]) / feature.shape[0], float(self.cos_window.shape[1]) / feature.shape[1], 1), order=1) xl = np.multiply(xl, self.cos_window[:, :, None]) xlf = np.fft.fft2(xl, axes=(0, 1)) self.x_num[i] = (1 - self.interp_factor) * self.x_num[ i] + self.interp_factor * np.multiply(self.yf[:, :, None], np.conj(xlf)) self.x_den[i] = (1 - self.interp_factor) * self.x_den[ i] + self.interp_factor * np.real( np.sum(np.multiply(xlf, np.conj(xlf)), axis=2)) sl = utils.get_scale_subwindow( image, self.pos, self.target_size, self.current_scale_factor * self.scaleFactors, self.scale_window, self.scale_model_sz) slf = np.fft.fftn(sl, axes=[0]) new_s_num = np.multiply(self.ysf[:, None], np.conj(slf)) new_s_den = np.real(np.sum(np.multiply(slf, np.conj(slf)), axis=1)) self.s_num = (1 - self.interp_factor ) * self.s_num + self.interp_factor * new_s_num self.s_den = (1 - self.interp_factor ) * self.s_den + self.interp_factor * new_s_den self.final_size = self.target_size * self.current_scale_factor return vot.Rectangle(self.pos[1] - self.final_size[1] / 2, self.pos[0] - self.final_size[0] / 2, self.final_size[1], self.final_size[0])
def __init__(self, image, region): #目标的大小 self.frame_num = 1; self.target_size = np.array([region.height, region.width]) s = max(region.height, region.width) #目标的位置 self.pos = [region.y + region.height / 2, region.x + region.width / 2] #视情况看是不是要缩小图像,太大了计算量很大,不划算 #self.resize_image = (np.sqrt(np.prod(self.target_size)) >= 100) #if self.resize_image: # self.pos = np.floor(self.pos/2) # self.target_size = np.floor(self.target_size/2) #特征序列,用于确定需要采用的特征,gray,fhog,cn,raw self.feature_list = np.array(['','fhog','']) #颜色对应矩阵,用于cn特征的提取 self.w2c = np.load('w2crs.npy') #特征通道数的计算,用于初始化特征矩阵 self.num_feature_ch = 0 if 'cn' in self.feature_list: self.num_feature_ch = self.num_feature_ch + 2 if 'fhog' in self.feature_list: self.num_feature_ch = self.num_feature_ch + 9 if 'gray' in self.feature_list: self.num_feature_ch = self.num_feature_ch + 1 if 'raw' in self.feature_list: self.num_feature_ch = self.num_feature_ch + 3 #padding值,搜索区域 padding = 5 select_padding = np.int(np.ceil(padding*np.sqrt(2))) self.select_patch_size = np.floor(np.array([s,s]) * (1+select_padding)) #self.select_pos = np.ceil(self.select_patch_size/2) #提取较大区域的图像,用于旋转生成 img4sample = utils.get_subwindow(image, self.pos, self.select_patch_size) if 'fhog' in self.feature_list: self.cell_size = np.int(np.round(s/15)); print(self.cell_size) else: self.cell_size = 4 self.patch_size = np.floor(np.array((s* (1+padding),s* (1+padding)))) #self.patch_size = np.floor(self.target_size * (1 + padding)) self.patch_size_cell = np.array([round(self.patch_size[0]/self.cell_size),round(self.patch_size[1]/self.cell_size)]) #用于存储滤波器序列,sample序列和sample傅里叶变换序列,用于后续查表 self.filter_sequence1 = np.zeros((24,np.int((self.patch_size_cell[0])),np.int(self.patch_size_cell[1]))) self.x_sequence = np.zeros((24,np.int(self.patch_size_cell[0]),np.int(self.patch_size_cell[1]),self.num_feature_ch)) self.xf_sequence1 = np.zeros((24,np.int(self.patch_size_cell[0]),np.int(self.patch_size_cell[1]),self.num_feature_ch)) self.filter_sequence = self.filter_sequence1.astype(np.complex128) self.xf_sequence = self.xf_sequence1.astype(np.complex128) #self.angle_index = np.int(np.round(-theta2Horizontal/15)) #if self.angle_index < 0: # self.angle_index = angle_index + 24 self.angle_index = 0 spatial_bandwidth_sigma_factor = 1 / float(16) output_sigma = np.sqrt(np.prod(self.target_size)) * spatial_bandwidth_sigma_factor/self.cell_size grid_y = np.arange(np.floor(self.patch_size_cell[0])) - np.floor(self.patch_size_cell[0] / 2) grid_x = np.arange(np.floor(self.patch_size_cell[1])) - np.floor(self.patch_size_cell[1] / 2) rs, cs = np.meshgrid(grid_x, grid_y) y = np.exp(-0.5 / output_sigma ** 2 * (rs ** 2 + cs ** 2)) self.cos_window = np.outer(np.hanning(y.shape[0]), np.hanning(y.shape[1])) yf = np.fft.fft2(y, axes=(0, 1)) start2 = time.time() index = np.arange(24) for i in index: angle = i*15 sample_image = transform.rotate(img4sample,angle,resize = True) sample_center = np.floor(np.array((sample_image.shape[0],sample_image.shape[1]))/2) img_crop = utils.get_subwindow(sample_image, sample_center, self.patch_size) if i == 0 : im_patch_255 = np.floor(img_crop*255) cn_out = utils.im2c(im_patch_255,self.w2c,self.patch_size) cn_pca = np.reshape(cn_out,[cn_out.shape[0]*cn_out.shape[1],cn_out.shape[2]]) data_mean = np.mean(cn_pca,axis = 0) date_matrix = cn_pca - data_mean cov_matrix = 1/(cn_out.shape[0]*cn_out.shape[1]-1)*np.dot(np.transpose(date_matrix),date_matrix) pca_basis = np.linalg.svd(cov_matrix) self.projection_matrix = pca_basis[0][:,0:2] print('date_matrix') print(data_mean.shape) print(date_matrix.shape) print('target_size') print(self.patch_size) #if i == 5: # print(sample_image.shape) # print(sample_center) # cv2.imshow('response',sample_image) # cv2.imshow('sample_image',img_crop) # print(img_crop.shape) print('img_crop') print(img_crop.shape) print(self.num_feature_ch) #hog_feature_t = pyhog.features_pedro(img_crop / 255., self.cell_size) #print('hog_feature_t') #print(hog_feature_t.shape) #img_crop = np.lib.pad(hog_feature_t, ((1, 1), (1, 1), (0, 0)), 'edge') #img_crop = img_crop[:,:,0:18] img_cro = utils.get_feature_map(img_crop,self.feature_list,self.num_feature_ch, self.patch_size_cell,self.w2c,self.cell_size,self.projection_matrix) #print('img_crop') #print(img_crop.shape) self.x = np.multiply(img_cro, self.cos_window[:, :, None]) #print("加余弦窗后的大小:"+str(self.x.shape[:3])) self.xf = np.fft.fft2(self.x, axes=(0, 1)) #print("傅里叶变换后的大小:"+str(self.xf.shape[:3])) self.feature_bandwidth_sigma = 0.2 k = utils.dense_gauss_kernel(self.feature_bandwidth_sigma, self.xf, self.x) lambda_value = 1e-4 self.alphaf = np.divide(yf, np.fft.fft2(k, axes=(0, 1)) + lambda_value) self.filter_sequence[i,:,:] = self.alphaf #np.savetxt('filter_sequence.txt',self.filter_sequence[i,:,:],fmt='%.4f') #np.savetxt('alphaf.txt',self.alphaf,fmt='%.4f') self.x_sequence[i,:,:,:] = self.x self.xf_sequence[i,:,:,:] = self.xf #print('alphaf') end2 = time.time() print ('生成图像耗时:'+str(end2-start2)) print(self.filter_sequence.shape) print(self.alphaf.shape) cv2.waitKey(0) #print(self.filter_sequence) self.response_series = np.array([0.,0.,0.]) self.v_centre = np.array([0.,0.,0.]) self.h_centre = np.array([0.,0.,0.])
def track(self, image): print(self.cell_size) start3 = time.time() test_crop = utils.get_subwindow(image, self.pos, self.patch_size) cv2.imshow('hahaha',test_crop) #hog_feature_t = pyhog.features_pedro(test_crop / 255., self.cell_size) #hog_feature_t = np.lib.pad(hog_feature_t, ((1, 1), (1, 1), (0, 0)), 'edge') #hog_feature_t = hog_feature_t[:,:,0:18] hog_feature_t = utils.get_feature_map(test_crop,self.feature_list,self.num_feature_ch,self.patch_size_cell,self.w2c, self.cell_size,self.projection_matrix) z = np.multiply(hog_feature_t, self.cos_window[:, :, None]) print(z.shape) zf = np.fft.fft2(z, axes=(0, 1)) angle_index_series = (np.array((self.angle_index-1,self.angle_index,self.angle_index+1))+24)%24 response_map_series = np.zeros((24,np.int(self.patch_size_cell[0]),np.int(self.patch_size_cell[1]))) j = 0 end3 = time.time() print ('生成检测用时:'+str(end3-start3)) start4 = time.time() for i in angle_index_series: k_test = utils.dense_gauss_kernel(self.feature_bandwidth_sigma,self.xf_sequence[i,:,:,:],self.x_sequence[i,:,:,:],zf,z) kf_test = np.fft.fft2(k_test, axes=(0, 1)) alphaf_test = self.filter_sequence[i,:,:] response = np.real(np.fft.ifft2(np.multiply(alphaf_test, kf_test))) response_map_series[i,:,:] = response #plt.imshow(response, extent=[0, 1, 0, 1]) self.response_series[j] = np.max(response) self.v_centre[j], self.h_centre[j] = np.unravel_index(response.argmax(), response.shape) j = j + 1 print('response_series') f2.write(str(np.max(self.response_series))+'\n') max_response_index = np.where(self.response_series==np.max(self.response_series))[0][0] print(self.response_series) v = self.v_centre[max_response_index] h = self.h_centre[max_response_index] self.angle_index = angle_index_series[max_response_index] response4show = np.reshape(response_map_series[self.angle_index,:,:], (np.int(self.patch_size_cell[0]),np.int(self.patch_size_cell[1]))) cv2.imshow('response',response4show) #plt.matshow(response4show) #plt.colorbar() #plt.show() #plt.pause(0.033) print(self.angle_index) print(self.angle_index) print(self.angle_index) print(self.angle_index) print(self.angle_index) print(self.angle_index) f1.write(str(self.angle_index)+'\n') end4 = time.time() print ('三个滤波器用时:'+str(end4-start4)) vert_delta, horiz_delta = [v - response.shape[0] / 2,h - response.shape[1] / 2] self.pos = [self.pos[0] + vert_delta*self.cell_size, self.pos[1] + horiz_delta*self.cell_size] return vot.Rectangle(self.pos[1] - self.target_size[1] / 2, self.pos[0] - self.target_size[0] / 2, self.target_size[1], self.target_size[0] )
def __init__(self, image, region): self.target_size = np.array([region.height, region.width]) self.pos = [region.y + region.height / 2, region.x + region.width / 2] self.sz = utils.get_window_size(self.target_size, image.shape[:2], padding()) # position prediction params self.lamda = 1e-4 output_sigma_factor = 0.1 self.cell_size = 4 self.interp_factor = 0.01 self.x_num = [] self.x_den = [] # scale estimation params self.current_scale_factor = 1.0 nScales = 33 scale_step = 1.02 # step of one scale level scale_sigma_factor = 1 / float(4) self.interp_factor_scale = 0.01 scale_model_max_area = 32 * 16 scale_model_factor = 1.0 self.min_scale_factor = np.power( scale_step, np.ceil(np.log(5. / np.min(self.sz)) / np.log(scale_step))) self.max_scale_factor = np.power( scale_step, np.floor( np.log(np.min(np.divide(image.shape[:2], self.target_size))) / np.log(scale_step))) if scale_model_factor * scale_model_factor * np.prod( self.target_size) > scale_model_max_area: scale_model_factor = np.sqrt(scale_model_max_area / np.prod(self.target_size)) self.scale_model_sz = np.floor(self.target_size * scale_model_factor) # Gaussian shaped label for position perdiction l1_patch_num = np.floor(self.sz / self.cell_size) output_sigma = np.sqrt(np.prod( self.target_size)) * output_sigma_factor / self.cell_size grid_y = np.arange(np.floor(l1_patch_num[0])) - np.floor( l1_patch_num[0] / 2) grid_x = np.arange(np.floor(l1_patch_num[1])) - np.floor( l1_patch_num[1] / 2) rs, cs = np.meshgrid(grid_x, grid_y) y = np.exp(-0.5 / output_sigma**2 * (rs**2 + cs**2)) self.yf = np.fft.fft2(y, axes=(0, 1)) self.cos_window = np.outer(np.hanning(self.yf.shape[0]), np.hanning(self.yf.shape[1])) # Gaussian shaped label for scale estimation ss = np.arange(nScales) - np.ceil(nScales / 2) scale_sigma = np.sqrt(nScales) * scale_sigma_factor ys = np.exp(-0.5 * (ss**2) / scale_sigma**2) self.scaleFactors = np.power(scale_step, -ss) self.ysf = np.fft.fft(ys) if nScales % 2 == 0: self.scale_window = np.hanning(nScales + 1) self.scale_window = self.scale_window[1:] else: self.scale_window = np.hanning(nScales) # Extracting hierarchical convolutional features and training img = utils.get_subwindow(image, self.pos, self.sz) img = transform.resize(img, (224, 224)) img = (img - imgMean) / imgStd img = np.transpose(img, (2, 0, 1)) feature_ensemble = model( Variable(torch.from_numpy(img[None, :, :, :]).float()).cuda()) for i in range(numlayers): feature = feature_ensemble[i].data[0].cpu().numpy().transpose( (1, 2, 0)) x = ndimage.zoom( feature, (float(self.cos_window.shape[0]) / feature.shape[0], float(self.cos_window.shape[1]) / feature.shape[1], 1), order=1) x = np.multiply(x, self.cos_window[:, :, None]) xf = np.fft.fft2(x, axes=(0, 1)) self.x_num.append(np.multiply(self.yf[:, :, None], np.conj(xf))) self.x_den.append( np.real(np.sum(np.multiply(xf, np.conj(xf)), axis=2))) # Extracting the sample feature map for the scale filter and training s = utils.get_scale_subwindow( image, self.pos, self.target_size, self.current_scale_factor * self.scaleFactors, self.scale_window, self.scale_model_sz) sf = np.fft.fftn(s, axes=[0]) self.s_num = np.multiply(self.ysf[:, None], np.conj(sf)) self.s_den = np.real(np.sum(np.multiply(sf, np.conj(sf)), axis=1))