def trainD(self,kf_cn,kf_hog,alphaf,alpha,lambda1,dim): d=[0,0] tmp1=ifft2(np.conj(kf_cn)*alphaf) tmp2=ifft2(np.conj(kf_hog)*alphaf) y=ifft2(self.yf) tmp=2*y-lambda1*alpha new_num1=tmp.flatten().conj().T.dot(tmp1.flatten()) new_num2=tmp.flatten().conj().T.dot(tmp2.flatten()) new_den1=2*(tmp1.flatten().conj().T.dot(tmp1.flatten())) new_den2=2*(tmp2.flatten().conj().T.dot(tmp2.flatten())) if self.frame_index==1: d_num11=new_num1 d_num22=new_num2 d_den11=new_den1 d_den22=new_den2 else: d_num11=(1-self.lr_cn)*self.d_num1+self.lr_cn*new_num1 d_num22=(1-self.lr_hog)*self.d_num2+self.lr_hog*new_num2 d_den11=(1-self.lr_cn)*self.d_den1+self.lr_cn*new_den1 d_den22=(1-self.lr_hog)*self.d_den2+self.lr_hog*new_den2 d[0]=np.real(d_num11/d_den11) d[1]=np.real(d_num22/d_den22) self.d_num1=d_num11 self.d_num2=d_num22 self.d_den1=d_den11 self.d_den2=d_den22 return d
def create_csr_filter(self,img,Y,P): """ create csr filter create filter with Augmented Lagrangian iterative optimization method :param img: image patch (already normalized) :param Y: gaussian shaped labels (note that the peak must be at the top-left corner) :param P: padding mask :return: filter """ mu=5 beta=3 mu_max=20 max_iter=4 lambda_=mu/100 F=fft2(img) Sxy=F*np.conj(Y)[:,:,None] Sxx=F*np.conj(F) # mask filter H=fft2(ifft2(Sxy/(Sxx+lambda_))*P[:,:,None]) # initialize lagrangian multiplier L=np.zeros_like(H) iter=1 while True: G=(Sxy+mu*H-L)/(Sxx+mu) H=fft2(np.real(P[:,:,None]*ifft2(mu*G+L)/(mu+lambda_))) # stop optimization after fixed number of steps if iter>=max_iter: break L+=mu*(G-H) mu=min(mu_max,beta*mu) iter+=1 return H
def update(self, current_frame, vis=False): z = self.get_sub_window(current_frame, self._center, self.crop_size) z = self._window[:, :, None] * z kf = fft2(self._dgk(self.x, z)) responses = np.real( ifft2(self.alphaf_num * kf.conj() / (self.alphaf_den))) if vis is True: self.score = responses curr = np.unravel_index(np.argmax(responses, axis=None), responses.shape) dy = self._init_response_center[0] - curr[0] dx = self._init_response_center[1] - curr[1] x_c, y_c = self._center x_c -= dx y_c -= dy self._center = (x_c, y_c) new_x = self.get_sub_window(current_frame, self._center, self.crop_size) new_x = new_x * self._window[:, :, None] kf = fft2(self._dgk(new_x, new_x)) new_alphaf_num = self.yf * kf new_alphaf_den = kf * (kf + self.lambda_) self.alphaf_num = ( 1 - self.interp_factor ) * self.alphaf_num + self.interp_factor * new_alphaf_num self.alphaf_den = ( 1 - self.interp_factor ) * self.alphaf_den + self.interp_factor * new_alphaf_den self.x = (1 - self.interp_factor) * self.x + self.interp_factor * new_x return [ self._center[0] - self.w / 2, self._center[1] - self.h / 2, self.w, self.h ]
def _dgk(self, x1, x2): c = np.fft.fftshift(ifft2(fft2(x1) * np.conj(fft2(x2)))) d = np.dot(x1.flatten().conj(), x1.flatten()) + np.dot( x2.flatten().conj(), x2.flatten()) - 2 * c k = np.exp(-1 / self.sigma**2 * np.clip(d, a_min=0, a_max=None) / np.size(x1)) return k
def ADMM(self, xlf, f_pre_f, mu): model_xf = xlf f_f = np.zeros_like(model_xf) g_f = np.zeros_like(f_f) h_f = np.zeros_like(f_f) gamma = self.init_penalty_factor gamma_max = self.max_penalty_factor gamma_scale_step = self.penalty_scale_step T = self.feature_map_sz[0] * self.feature_map_sz[1] S_xx = np.sum(np.conj(model_xf) * model_xf, axis=2) Sf_pre_f = np.sum(np.conj(model_xf) * f_pre_f, axis=2) Sfx_pre_f = model_xf * Sf_pre_f[:, :, None] iter = 1 while iter <= self.admm_max_iterations: B = S_xx + T * (gamma + mu) Sgx_f = np.sum(np.conj(model_xf) * g_f, axis=2) Shx_f = np.sum(np.conj(model_xf) * h_f, axis=2) tmp0 = (1 / (T * (gamma + mu)) * (self.yf[:, :, None] * model_xf)) - ((1 / (gamma + mu)) * h_f) + ( gamma / (gamma + mu)) * g_f + \ (mu / (gamma + mu)) * f_pre_f tmp1 = 1 / (T * (gamma + mu)) * (model_xf * ((S_xx * self.yf)[:, :, None])) tmp2 = mu / (gamma + mu) * Sfx_pre_f tmp3 = 1 / (gamma + mu) * (model_xf * (Shx_f[:, :, None])) tmp4 = gamma / (gamma + mu) * (model_xf * Sgx_f[:, :, None]) f_f = tmp0 - (tmp1 + tmp2 - tmp3 + tmp4) / B[:, :, None] g_f = fft2( self.argmin_g(self.reg_window, gamma, (ifft2(gamma * (f_f + h_f))))) h_f = h_f + (gamma * (f_f - g_f)) gamma = min(gamma_scale_step * gamma, gamma_max) iter += 1 return f_f
def ADMM(self, xf): g_f = np.zeros_like(xf) h_f = np.zeros_like(g_f) l_f = np.zeros_like(g_f) mu = 1 beta = 10 mumax = 10000 i = 1 T = self.feature_map_sz[0] * self.feature_map_sz[1] S_xx = np.sum(np.conj(xf) * xf, 2) while i <= self.admm_iterations: B = S_xx + (T * mu) S_lx = np.sum(np.conj(xf) * l_f, axis=2) S_hx = np.sum(np.conj(xf) * h_f, axis=2) tmp0 = (1 / (T * mu) * (self.yf[:, :, None] * xf)) - ((1 / mu) * l_f) + h_f tmp1 = 1 / (T * mu) * (xf * ((S_xx * self.yf)[:, :, None])) tmp2 = 1 / mu * (xf * (S_lx[:, :, None])) tmp3 = xf * S_hx[:, :, None] # solve for g g_f = tmp0 - (tmp1 - tmp2 + tmp3) / B[:, :, None] # solve for h h = (T / ((mu * T) + self.admm_lambda)) * ifft2(mu * g_f + l_f) xs, ys, h = self.get_subwindow_no_window(h, (int( self.feature_map_sz[0] / 2), int(self.feature_map_sz[1] / 2)), self.small_filter_sz) t = np.zeros( (self.feature_map_sz[1], self.feature_map_sz[0], h.shape[2]), dtype=np.complex64) t[ys, xs, :] = h h_f = fft2(t) l_f = l_f + (mu * (g_f - h_f)) mu = min(beta * mu, mumax) i += 1 return g_f
def update(self, current_frame, vis=False): xt = self.get_translation_sample(current_frame, self._center, self.crop_size, self.current_scale_factor, self._window) xtf = fft2(xt) response = np.real( ifft2( np.sum(self.hf_num * xtf, axis=2) / (self.hf_den + self.lambda_))) if vis is True: self.score = response self.win_sz = self.crop_size curr = np.unravel_index(np.argmax(response, axis=None), response.shape) dy = (curr[0] - self._init_response_center[0]) * self.current_scale_factor dx = (curr[1] - self._init_response_center[1]) * self.current_scale_factor x_c, y_c = self._center x_c += dx y_c += dy self._center = (x_c, y_c) self.current_scale_factor = self.scale_estimator.update( current_frame, self._center, self.base_target_size, self.current_scale_factor) if self.scale_type == 'normal': self.current_scale_factor = np.clip(self.current_scale_factor, a_min=self._min_scale_factor, a_max=self._max_scale_factor) xl = self.get_translation_sample(current_frame, self._center, self.crop_size, self.current_scale_factor, self._window) xlf = fft2(xl) new_hf_num = self.yf[:, :, None] * np.conj(xlf) new_hf_den = np.sum(xlf * np.conj(xlf), axis=2) self.hf_den = (1 - self.interp_factor ) * self.hf_den + self.interp_factor * new_hf_den self.hf_num = (1 - self.interp_factor ) * self.hf_num + self.interp_factor * new_hf_num self.target_sz = (self.base_target_size[0] * self.current_scale_factor, self.base_target_size[1] * self.current_scale_factor) return [ self._center[0] - self.target_sz[0] / 2, self._center[1] - self.target_sz[1] / 2, self.target_sz[0], self.target_sz[1] ]
def _kernel_correlation(self, xf, yf, kernel='gaussian'): if kernel== 'gaussian': N=xf.shape[0]*xf.shape[1] xx=(np.dot(xf.flatten().conj().T,xf.flatten())/N) yy=(np.dot(yf.flatten().conj().T,yf.flatten())/N) xyf=xf*np.conj(yf) xy=np.sum(np.real(ifft2(xyf)),axis=2) kf = fft2(np.exp(-1 / self.sigma ** 2 * np.clip(xx+yy-2*xy,a_min=0,a_max=None) / np.size(xf))) elif kernel== 'linear': kf= np.sum(xf*np.conj(yf),axis=2)/np.size(xf) else: raise NotImplementedError return kf
def _dgk(self, x1, x2): xf = fft2(x1) yf = fft2(x2) xx = (x1.flatten().T).dot(x1.flatten()) yy = (x2.flatten().T).dot(x2.flatten()) xyf = xf * np.conj(yf) if len(xyf.shape) == 2: xyf = xyf[:, :, np.newaxis] xy = np.real(ifft2(np.sum(xyf, axis=2))) d = xx + yy - 2 * xy k = np.exp(-1 / self.sigma**2 * np.clip(d, a_min=0, a_max=None) / np.size(x1)) return k
def train_model(self): d=[0.5,0.5] dim=self.z_cn2.shape[2] kf_cn=fft2(self.dense_gauss_kernel(self.z_cn2,self.z_cn2,self.cn_sigma)) kf_hog=fft2(self.dense_gauss_kernel(self.z_hog2,self.z_hog2,self.hog_sigma)) count=0 stop=False lambda1=0.01 threshold=0.03 predD=d while stop is not True: new_num1=self.yf*d[0]*kf_cn new_num2=self.yf*d[1]*kf_hog new_den1=d[0]*kf_cn*(d[0]*np.conj(kf_cn)+lambda1) new_den2=d[1]*kf_hog*(d[1]*np.conj(kf_hog)+lambda1) if self.frame_index==1: alphaf_num11=new_num1 alphaf_num22=new_num2 alphaf_den11=new_den1 alphaf_den22=new_den2 else: alphaf_num11=(1-self.lr_cn)*self.alphaf_num1+self.lr_cn*new_num1 alphaf_num22=(1-self.lr_hog)*self.alphaf_num2+self.lr_hog*new_num2 alphaf_den11=(1-self.lr_cn)*self.alphaf_den1+self.lr_cn*new_den1 alphaf_den22=(1-self.lr_hog)*self.alphaf_den2+self.lr_hog*new_den2 self.alphaf_num = alphaf_num11 +alphaf_num22 self.alphaf_den = alphaf_den11 + alphaf_den22 self.alphaf=self.alphaf_num/self.alphaf_den alpha=ifft2(self.alphaf) d=self.trainD(kf_cn,kf_hog,self.alphaf,alpha,lambda1,dim) count+=1 if count>1: delta_alpha=np.abs(alpha-prev_alpha) deltaD=np.abs(np.array(d)-np.array(predD)) if(np.sum(delta_alpha)<=threshold*np.sum(np.abs(prev_alpha))) and np.sum(np.array(deltaD))<=threshold*np.sum(np.abs(np.array(predD))): stop=True prev_alpha=alpha predD=d if count>=100: d=[0.5,0.5] break self.alphaf_num1=alphaf_num11 self.alphaf_num2=alphaf_num22 self.alphaf_den1=alphaf_den11 self.alphaf_den2=alphaf_den22 return d
def phase_correlation(src1, src2): s1f = fft2(src1) s2f = fft2(src2) num = s2f * np.conj(s1f) d = np.sqrt(num * np.conj(num)) + 2e-16 Cf = np.sum(num / d, axis=2) C = np.real(ifft2(Cf)) C = np.fft.fftshift(C, axes=(0, 1)) mscore = np.max(C) pty, ptx = np.unravel_index(np.argmax(C, axis=None), C.shape) slobe_y = slobe_x = 1 idy = np.arange(pty - slobe_y, pty + slobe_y + 1).astype(np.int64) idx = np.arange(ptx - slobe_x, ptx + slobe_x + 1).astype(np.int64) idy = np.clip(idy, a_min=0, a_max=C.shape[0] - 1) idx = np.clip(idx, a_min=0, a_max=C.shape[1] - 1) weight_patch = C[idy, :][:, idx] s = np.sum(weight_patch) + 2e-16 pty = np.sum(np.sum(weight_patch, axis=1) * idy) / s ptx = np.sum(np.sum(weight_patch, axis=0) * idx) / s pty = pty - (src1.shape[0]) // 2 ptx = ptx - (src1.shape[1]) // 2 return ptx, pty, mscore
def update(self, current_frame, vis=False): self.frame_idx += 1 im_patch_cf = self.get_sub_window(current_frame, self._center, self.norm_bg_area, self.bg_area) pwp_search_area = (round(self.norm_pwp_search_area[0] / self.area_resize_factor), round(self.norm_pwp_search_area[1] / self.area_resize_factor)) im_patch_pwp = self.get_sub_window(current_frame, self._center, self.norm_pwp_search_area, pwp_search_area) likelihood_map = self.get_colour_map(im_patch_pwp, self.bg_hist, self.fg_hist, self.bin_mapping) likelihood_map[np.isnan(likelihood_map)] = 0. self.norm_target_sz = (int(self.norm_target_sz[0]), int(self.norm_target_sz[1])) response_pwp = get_center_likelihood(likelihood_map, self.norm_target_sz) xt = self.get_feature_map(im_patch_cf, self.cell_size) xt = self._window[:, :, None] * xt xt_cn, xt_hog1, xt_hog2 = self.split_features(xt) self.experts[0].xt = xt_cn self.experts[1].xt = xt_hog1 self.experts[2].xt = xt_hog2 self.experts[3].xt = np.concatenate((xt_hog1, xt_cn), axis=2) self.experts[4].xt = np.concatenate((xt_hog2, xt_cn), axis=2) self.experts[5].xt = np.concatenate((xt_hog1, xt_hog2), axis=2) self.experts[6].xt = xt center = ((self.norm_delta_area[0] - 1) / 2, (self.norm_delta_area[1] - 1) / 2) for i in range(self.expert_num): xtf = fft2(self.experts[i].xt) hf = self.experts[i].hf_num / (np.sum( self.experts[i].hf_den, axis=2) + self.lambda_)[:, :, None] response_cf = np.real(ifft2(np.sum(np.conj(hf) * xtf, axis=2))) response_sz = (self.floor_odd(self.norm_delta_area[0] / self.cell_size), self.floor_odd(self.norm_delta_area[1] / self.cell_size)) response_cf = cv2.resize( crop_filter_response(response_cf, response_sz), self.norm_delta_area, cv2.INTER_NEAREST) response_cf[np.isnan(response_cf)] = 0. self.experts[i].response = ( 1 - self.merge_factor ) * response_cf + self.merge_factor * response_pwp row, col = np.unravel_index( np.argmax(self.experts[i].response, axis=None), self.experts[i].response.shape) dy = (row - center[1]) / self.area_resize_factor dx = (col - center[0]) / self.area_resize_factor self.experts[i].pos = (self._center[0] + dx, self._center[1] + dy) cx, cy, w, h = self.experts[i].pos[0], self.experts[i].pos[ 1], self.target_sz[0], self.target_sz[1] self.experts[i].rect_positions.append( [cx - w / 2, cy - h / 2, w, h]) self.experts[i].centers.append([cx, cy]) pre_center = self.experts[i].centers[self.frame_idx - 1] smooth = np.sqrt((cx - pre_center[0])**2 + (cy - pre_center[1])**2) self.experts[i].smoothes.append(smooth) self.experts[i].smooth_scores.append( np.exp(-smooth**2 / (2 * self.avg_dim**2))) if self.frame_idx >= self.period - 1: for i in range(self.expert_num): self.experts[i].rob_scores.append( self.robustness_eva(self.experts, i, self.frame_idx, self.period, self.weight, self.expert_num)) self.id_ensemble[i] = self.experts[i].rob_scores[ self.frame_idx] self.mean_score.append( np.sum(np.array(self.id_ensemble)) / self.expert_num) idx = np.argmax(np.array(self.id_ensemble)) self._center = self.experts[idx].pos self.response = self.experts[idx].response else: for i in range(self.expert_num): self.experts[i].rob_scores.append(1) self._center = self.experts[6].pos self.response = self.experts[6].response self.mean_score.append(0) if vis is True: self.score = self.response # adaptive update score1 = self.cal_psr(self.experts[0].response) score2 = self.cal_psr(self.experts[1].response) score3 = self.cal_psr(self.experts[2].response) self.psr_score.append((score1 + score2 + score3) / 3) if self.frame_idx == len(self.psr_score): self.frame_idx = self.frame_idx if self.frame_idx >= self.period - 1: final_score = self.mean_score[self.frame_idx] * self.psr_score[ self.frame_idx] ave_score = np.sum( np.array(self.mean_score)[self.period - 1:self.frame_idx + 1] * np.array(self.psr_score[self.period - 1:self.frame_idx + 1]) ) / (self.frame_idx + 1 - self.period + 1) threshold = self.update_thresh * ave_score if final_score > threshold: self.learning_rate_pwp = self.config.interp_factor_pwp self.learning_rate_cf = self.config.interp_factor_cf else: self.learning_rate_pwp = 0 self.learning_rate_cf = ( final_score / threshold)**3 * self.config.interp_factor_cf else: final_score = self.mean_score[self.frame_idx] * self.psr_score[ self.frame_idx] if self.scale_adaptation: self.scale_factor = self.scale_estimator.update( current_frame, self._center, self.base_target_sz, self.scale_factor) self.target_sz = (round(self.base_target_sz[0] * self.scale_factor), round(self.base_target_sz[1] * self.scale_factor)) avg_dim = (self.target_sz[0] + self.target_sz[1]) / 2 bg_area = (round(self.target_sz[0] + avg_dim), round(self.target_sz[1] + avg_dim)) fg_area = (round(self.target_sz[0] - avg_dim * self.inner_padding), round(self.target_sz[1] - avg_dim * self.inner_padding)) bg_area = (min(bg_area[0], current_frame.shape[1] - 1), min(bg_area[1], current_frame.shape[0] - 1)) self.bg_area = (bg_area[0] - (bg_area[0] - self.target_sz[0]) % 2, bg_area[1] - (bg_area[1] - self.target_sz[1]) % 2) self.fg_area = (fg_area[0] + (self.bg_area[0] - fg_area[0]) % 2, fg_area[1] + (self.bg_area[1] - fg_area[1]) % 2) self.area_resize_factor = np.sqrt( self.fixed_area / (self.bg_area[0] * self.bg_area[1])) im_patch_bg = self.get_sub_window(current_frame, self._center, self.norm_bg_area, self.bg_area) xt = self.get_feature_map(im_patch_bg, self.cell_size) xt = self._window[:, :, None] * xt xt_cn, xt_hog1, xt_hog2 = self.split_features(xt) self.experts[0].xt = xt_cn self.experts[1].xt = xt_hog1 self.experts[2].xt = xt_hog2 self.experts[3].xt = np.concatenate((xt_hog1, xt_cn), axis=2) self.experts[4].xt = np.concatenate((xt_hog2, xt_cn), axis=2) self.experts[5].xt = np.concatenate((xt_hog1, xt_hog2), axis=2) self.experts[6].xt = xt for i in range(self.expert_num): xtf = fft2(self.experts[i].xt) hf_den = np.conj(xtf) * xtf / (self.cf_response_size[0] * self.cf_response_size[1]) hf_num = np.conj(self.yf)[:, :, None] * xtf / ( self.cf_response_size[0] * self.cf_response_size[1]) self.experts[i].hf_den = ( 1 - self.learning_rate_cf ) * self.experts[i].hf_den + self.learning_rate_cf * hf_den self.experts[i].hf_num = ( 1 - self.learning_rate_cf ) * self.experts[i].hf_num + self.learning_rate_cf * hf_num if self.learning_rate_pwp != 0: im_patch_bg = self.get_sub_window(current_frame, self._center, self.norm_bg_area, self.bg_area) self.bg_hist, self.fg_hist = self.update_hist_model( self.new_pwp_model, im_patch_bg, self.bg_area, self.fg_area, self.target_sz, self.norm_bg_area, self.n_bins) return [ self._center[0] - self.target_sz[0] / 2, self._center[1] - self.target_sz[1] / 2, self.target_sz[0], self.target_sz[1] ], final_score
def update(self, current_frame, vis=False): assert len(current_frame.shape) == 3 and current_frame.shape[2] == 3 old_pos = (np.inf, np.inf) iter = 1 while iter <= self.refinement_iterations and ( np.abs(old_pos[0] - self._center[0]) > 1e-2 or np.abs(old_pos[1] - self._center[1]) > 1e-2): sample_scales = self.sc * self.scale_factors xt_hc = None sample_pos = (int(np.round(self._center[0])), int(np.round(self._center[1]))) for scale in sample_scales: sub_window = self.get_sub_window( current_frame, sample_pos, model_sz=self.crop_size, scaled_sz=(int(round(self.crop_size[0] * scale)), int(round(self.crop_size[1] * scale)))) hc_features = self.extrac_hc_feature( sub_window, self.cell_size)[:, :, :, np.newaxis] if xt_hc is None: xt_hc = hc_features else: xt_hc = np.concatenate((xt_hc, hc_features), axis=3) xtw_hc = xt_hc * self.cosine_window[:, :, None, None] xtf_hc = fft2(xtw_hc) responsef_hc = np.sum(np.conj(self.f_pre_f_hc)[:, :, :, None] * xtf_hc, axis=2) responsef = responsef_hc response = np.real(ifft2(responsef)) disp_row, disp_col, sind = resp_newton(response, responsef, self.newton_iterations, self.ky, self.kx, self.feature_map_sz) #row, col, sind = np.unravel_index(np.argmax(response, axis=None), response.shape) #disp_row = (row+ int(np.floor(self.feature_map_sz[1] - 1) / 2)) % self.feature_map_sz[1] - int( # np.floor((self.feature_map_sz[1] - 1) / 2)) #disp_col = (col + int(np.floor(self.feature_map_sz[0] - 1) / 2)) % self.feature_map_sz[0] - int( # np.floor((self.feature_map_sz[0] - 1) / 2)) if vis is True: self.score = response[:, :, sind].astype(np.float32) self.score = np.roll(self.score, int(np.floor(self.score.shape[0] / 2)), axis=0) self.score = np.roll(self.score, int(np.floor(self.score.shape[1] / 2)), axis=1) dx, dy = (disp_col * self.cell_size * self.sc * self.scale_factors[sind]), (disp_row * self.cell_size * self.sc * self.scale_factors[sind]) scale_change_factor = self.scale_factors[sind] old_pos = self._center self._center = (sample_pos[0] + dx, sample_pos[1] + dy) self.sc = self.sc * scale_change_factor self.sc = np.clip(self.sc, self._min_scale_factor, self._max_scale_factor) self.sc = self.scale_estimator.update(current_frame, self._center, self.base_target_sz, self.sc) if self.scale_type == 'normal': self.sc = np.clip(self.sc, a_min=self._min_scale_factor, a_max=self._max_scale_factor) iter += 1 sample_pos = (int(np.round(self._center[0])), int(np.round(self._center[1]))) patch = self.get_sub_window( current_frame, sample_pos, model_sz=self.crop_size, scaled_sz=(int(np.round(self.crop_size[0] * self.sc)), int(np.round(self.crop_size[1] * self.sc)))) xl_hc = self.extrac_hc_feature(patch, self.cell_size) xlw_hc = xl_hc * self.cosine_window[:, :, None] xlf_hc = fft2(xlw_hc) mu = self.temporal_regularization_factor self.f_pre_f_hc = self.ADMM(xlf_hc, self.f_pre_f_hc, mu) target_sz = (self.base_target_sz[0] * self.sc, self.base_target_sz[1] * self.sc) return [(self._center[0] - (target_sz[0]) / 2), (self._center[1] - (target_sz[1]) / 2), target_sz[0], target_sz[1]], -1.0
def tracking(self, img, pos, polish): """ obtain a subwindow for detecting at the positiono from last frame, and convert to Fourier domain find a proper window size :param img: :param pos: :param iter: :return: """ large_num = 0 if polish > large_num: w_sz0 = self.window_sz0 c_w = self.cos_window else: w_sz0 = self.window_sz_search0 c_w = self.cos_window_search if self.is_rotation: patch = self.get_affine_subwindow(img, pos, self.sc, self.rot, w_sz0) else: sz_s = (int(np.floor(self.sc[0] * w_sz0[0])), int(np.floor(self.sc[1] * w_sz0[1]))) patchO = cv2.getRectSubPix(img, sz_s, pos) patch = cv2.resize(patchO, w_sz0, cv2.INTER_CUBIC) z = self.get_features(patch, self.cell_size) z = z * c_w[:, :, None] zf = fft2(z) ssz = (zf.shape[1], zf.shape[0], zf.shape[2]) # calculate response of the classifier at all shifts wf = np.conj(self.model_xf) * self.model_alphaf[:, :, None] / np.size( self.model_xf) if polish <= large_num: w = pad(np.real(ifft2(wf)), (ssz[1], ssz[0])) wf = fft2(w) tmp_sz = ssz # compute convolution for each feature block in the Fourier domain # use general compute here for easy extension in future rff = np.sum(wf * zf, axis=2) rff_real = cv2.resize(rff.real, (tmp_sz[0], tmp_sz[1]), cv2.INTER_NEAREST) rff_imag = cv2.resize(rff.imag, (tmp_sz[0], tmp_sz[1]), cv2.INTER_NEAREST) rff = rff_real + 1.j * rff_imag response_cf = np.real(ifft2(rff)) #response_cf=np.fft.fftshift(response_cf,axes=(0,1)) response_cf = crop_filter_response( response_cf, (response_cf.shape[1], response_cf.shape[0])) response_color = np.zeros_like(response_cf) if self.use_color_hist: object_likelihood = self.get_colour_map(patch, self.pl, self.pi, self.bin_mapping) response_color = get_center_likelihood(object_likelihood, self.target_sz0) response_color = cv2.resize( response_color, (response_cf.shape[1], response_cf.shape[0]), cv2.INTER_CUBIC) # adaptive merge factor if self.adaptive_merge_factor is True: cf_conf = confidence_cf_apce(response_cf) adaptive_merge_factor = self.merge_factor * self.theta + ( 1 - self.theta) * (1 - cf_conf) response = ( 1 - adaptive_merge_factor ) * response_cf + adaptive_merge_factor * response_color else: response = (1 - self.merge_factor ) * response_cf + self.merge_factor * response_color if self.vis is True: self.score = response self.crop_size = self.window_sz # sub-pixel search pty, ptx = np.unravel_index(np.argmax(response, axis=None), response.shape) if self.is_subpixel: slobe = 2 idy = np.arange(pty - slobe, pty + slobe + 1) idx = np.arange(ptx - slobe, ptx + slobe + 1) idy = np.clip(idy, a_min=0, a_max=response.shape[0] - 1) idx = np.clip(idx, a_min=0, a_max=response.shape[1] - 1) weight_patch = response[idy, :][:, idx] s = np.sum(weight_patch) + 2e-16 pty = np.sum(np.sum(weight_patch, axis=1) * idy) / s ptx = np.sum(np.sum(weight_patch, axis=0) * idx) / s cscore = PSR(response, 0.1) # update the translation status dy = pty - (response.shape[0]) // 2 dx = ptx - (response.shape[1]) // 2 if self.is_rotation: sn, cs = np.sin(self.rot), np.cos(self.rot) pp = np.array([[self.sc[1] * cs, -self.sc[0] * sn], [self.sc[1] * sn, self.sc[0] * cs]]) x, y = pos delta = self.cell_size * np.array([[dy, dx]]).dot(pp) x += delta[0, 1] y += delta[0, 0] pos = (x, y) patchL = self.get_affine_subwindow( img, pos, [1., 1.], self.rot, (int(np.floor(self.sc[0] * self.scale_sz[0])), int(np.floor(self.sc[1] * self.scale_sz[1])))) else: x, y = pos pos = (x + self.sc[0] * self.cell_size * dx, y + self.sc[1] * self.cell_size * dy) patchL = cv2.getRectSubPix( img, (int(np.floor(self.sc[0] * self.scale_sz[0])), int(np.floor(self.sc[1] * self.scale_sz[1]))), pos) patchL = cv2.resize(patchL, self.scale_sz_window, cv2.INTER_CUBIC) patchLp = cv2.logPolar(patchL.astype(np.float32), (patchL.shape[1] // 2, patchL.shape[0] // 2), self.mag, flags=cv2.INTER_LINEAR + cv2.WARP_FILL_OUTLIERS) patchLp = extract_hog_feature(patchLp, self.cell_size) #patchLp = patchLp * self.cos_window_scale[:, :, None] tmp_sc, tmp_rot, sscore = self.estimate_scale(self.model_patchLp, patchLp, self.mag) tmp_sc = np.clip(tmp_sc, a_min=0.6, a_max=1.4) if tmp_rot > 1 or tmp_rot < -1: tmp_rot = 0 return pos, tmp_sc, tmp_rot, cscore, sscore
def update(self, current_frame, vis=False): im_patch_cf = self.get_sub_window(current_frame, self._center, self.norm_bg_area, self.bg_area) pwp_search_area = (round(self.norm_pwp_search_area[0] / self.area_resize_factor), round(self.norm_pwp_search_area[1] / self.area_resize_factor)) im_patch_pwp = self.get_sub_window(current_frame, self._center, self.norm_pwp_search_area, pwp_search_area) xt = self.get_feature_map(im_patch_cf, self.hog_cell_size) xt_windowed = self._window[:, :, None] * xt xtf = fft2(xt_windowed) if self.use_ca is False: if self.den_per_channel: hf = self.hf_num / (self.hf_den + self.lambda_) else: hf = self.hf_num / (np.sum(self.hf_den, axis=2) + self.lambda_)[:, :, None] else: if self.den_per_channel: hf = self.hf_num / self.hf_den else: hf = self.hf_num / (np.sum(self.hf_den, axis=2)[:, :, None]) if self.use_ca is False: response_cf = np.real(ifft2(np.sum(np.conj(hf) * xtf, axis=2))) else: response_cf = np.real(ifft2(np.sum(hf * xtf, axis=2))) response_sz = (self.floor_odd(self.norm_delta_area[0] / self.hog_cell_size), self.floor_odd(self.norm_delta_area[1] / self.hog_cell_size)) response_cf = crop_filter_response(response_cf, response_sz) if self.hog_cell_size > 1: if self.use_ca is True: #response_cf = self.mex_resize(response_cf, self.norm_delta_area) response_cf = cv2.resize(response_cf, self.norm_delta_area, cv2.INTER_NEAREST) else: response_cf = cv2.resize(response_cf, self.norm_delta_area, cv2.INTER_NEAREST) likelihood_map = self.get_colour_map(im_patch_pwp, self.bg_hist, self.fg_hist, self.bin_mapping) likelihood_map[np.isnan(likelihood_map)] = 0. response_cf[np.isnan(response_cf)] = 0. self.norm_target_sz = (int(self.norm_target_sz[0]), int(self.norm_target_sz[1])) response_pwp = get_center_likelihood(likelihood_map, self.norm_target_sz) response = (1 - self.merge_factor ) * response_cf + self.merge_factor * response_pwp if vis is True: self.score = response curr = np.unravel_index(np.argmax(response, axis=None), response.shape) center = ((self.norm_delta_area[0] - 1) / 2, (self.norm_delta_area[1] - 1) / 2) dy = (curr[0] - center[1]) / self.area_resize_factor dx = (curr[1] - center[0]) / self.area_resize_factor x_c, y_c = self._center x_c += dx y_c += dy self._center = (x_c, y_c) if self.scale_adaptation: im_patch_scale = self.get_scale_subwindow( current_frame, self._center, self.base_target_sz, self.scale_factor * self.scale_factors, self.scale_window, self.scale_model_sz, self.hog_scale_cell_size) xsf = np.fft.fft(im_patch_scale, axis=1) scale_response = np.real( np.fft.ifft( np.sum(self.sf_num * xsf, axis=0) / (self.sf_den + self.lambda_))) recovered_scale = np.argmax(scale_response) self.scale_factor = self.scale_factor * self.scale_factors[ recovered_scale] self.scale_factor = np.clip(self.scale_factor, a_min=self.min_scale_factor, a_max=self.max_scale_factor) self.target_sz = (round(self.base_target_sz[0] * self.scale_factor), round(self.base_target_sz[1] * self.scale_factor)) avg_dim = (self.target_sz[0] + self.target_sz[1]) / 2 bg_area = (round(self.target_sz[0] + avg_dim), round(self.target_sz[1] + avg_dim)) fg_area = (round(self.target_sz[0] - avg_dim * self.inner_padding), round(self.target_sz[1] - avg_dim * self.inner_padding)) bg_area = (min(bg_area[0], current_frame.shape[1] - 1), min(bg_area[1], current_frame.shape[0] - 1)) self.bg_area = (bg_area[0] - (bg_area[0] - self.target_sz[0]) % 2, bg_area[1] - (bg_area[1] - self.target_sz[1]) % 2) self.fg_area = (fg_area[0] + (self.bg_area[0] - fg_area[0]) % 2, fg_area[1] + (self.bg_area[1] - fg_area[1]) % 2) self.area_resize_factor = np.sqrt( self.fixed_area / (self.bg_area[0] * self.bg_area[1])) im_patch_bg = self.get_sub_window(current_frame, self._center, self.norm_bg_area, self.bg_area) xt = self.get_feature_map(im_patch_bg, self.hog_cell_size) xt = self._window[:, :, None] * xt xtf = fft2(xt) if self.use_ca: sum_kfn = np.zeros_like(xtf) for j in range(len(self.offset)): im_patch_bgn = self.get_sub_window( current_frame, (self._center[0] + self.offset[j][0], self._center[1] + self.offset[j][1]), self.norm_bg_area, self.bg_area) xtn = self.get_feature_map(im_patch_bgn, self.hog_cell_size) xtn = self._window[:, :, None] * xtn xtfn = fft2(xtn) sum_kfn += np.conj(xtfn) * xtfn new_hf_num = self.yf[:, :, None] * np.conj(xtf) new_hf_den = np.conj( xtf) * xtf + self.lambda_ + self.lambda_2 * sum_kfn else: new_hf_num = np.conj(self.yf)[:, :, None] * xtf / ( self.cf_response_size[0] * self.cf_response_size[1]) new_hf_den = (np.conj(xtf) * xtf) / (self.cf_response_size[0] * self.cf_response_size[1]) self.hf_den = (1 - self.interp_factor_cf ) * self.hf_den + self.interp_factor_cf * new_hf_den self.hf_num = (1 - self.interp_factor_cf ) * self.hf_num + self.interp_factor_cf * new_hf_num self.bg_hist, self.fg_hist = self.update_hist_model( self.new_pwp_model, im_patch_bg, self.bg_area, self.fg_area, self.target_sz, self.norm_bg_area, self.n_bins) if self.scale_adaptation: im_patch_scale = self.get_scale_subwindow( current_frame, self._center, self.base_target_sz, self.scale_factor * self.scale_factors, self.scale_window, self.scale_model_sz, self.hog_scale_cell_size) xsf = np.fft.fft(im_patch_scale, axis=1) new_sf_num = self.ysf * np.conj(xsf) new_sf_den = np.sum(xsf * np.conj(xsf), axis=0) self.sf_den = ( 1 - self.interp_factor_scale ) * self.sf_den + self.interp_factor_scale * new_sf_den self.sf_num = ( 1 - self.interp_factor_scale ) * self.sf_num + self.interp_factor_scale * new_sf_num return [ self._center[0] - self.target_sz[0] / 2, self._center[1] - self.target_sz[1] / 2, self.target_sz[0], self.target_sz[1] ]
def _detection(self, alphaf, x, z): k = self._dgk(x, z) responses = np.real(ifft2(alphaf * fft2(k))) return responses
def _detection(self, alphaf, xf, zf, kernel='gaussian'): kzf = self._kernel_correlation(zf, xf, kernel) responses = np.real(ifft2(alphaf * kzf)) return responses
def update(self, current_frame): if self.resize: current_frame = cv2.resize(current_frame, dsize=None, fx=0.5, fy=0.5).astype(np.uint8) response = None for i in range(len(self.search_size)): tmp_sz = (self.target_sz[0] * (1 + self.padding) * self.search_size[i], self.target_sz[1] * (1 + self.padding) * self.search_size[i]) #param0=[self._center[0],self._center[1],tmp_sz[0]/self.crop_size[0], # 0,tmp_sz[1]/self.crop_size[0]/(self.crop_size[1]/self.crop_size[0]), # 0] #param0=self.affparam2mat(param0) #patch=self.warpimg(current_frame.astype(np.float32),param0,self.crop_size).astype(np.uint8) patch = cv2.getRectSubPix( current_frame, (int(np.round(tmp_sz[0])), int(np.round(tmp_sz[1]))), self._center) patch = cv2.resize(patch, self.crop_size) hc_features = self.get_features(patch, self.cell_size) hc_features = hc_features * self._window[:, :, None] zf = fft2(hc_features) kzf = self._kernel_correlation(zf, self.model_xf, kernel=self.kernel) if response is None: response = np.real(ifft2(self.model_alphaf * kzf))[:, :, np.newaxis] else: response = np.concatenate( (response, np.real(ifft2( self.model_alphaf * kzf))[:, :, np.newaxis]), axis=2) delta_y, delta_x, sz_id = np.unravel_index( np.argmax(response, axis=None), response.shape) self.sz_id = sz_id if delta_y + 1 > self.window_size[1] / 2: delta_y = delta_y - self.window_size[1] if delta_x + 1 > self.window_size[0] / 2: delta_x = delta_x - self.window_size[0] self.target_sz = (self.target_sz[0] * self.search_size[self.sz_id], self.target_sz[1] * self.search_size[self.sz_id]) tmp_sz = (self.target_sz[0] * (1 + self.padding), self.target_sz[1] * (1 + self.padding)) current_size_factor = tmp_sz[0] / self.crop_size[0] x, y = self._center x += current_size_factor * self.cell_size * delta_x y += current_size_factor * self.cell_size * delta_y self._center = (x, y) #param0 = [self._center[0], self._center[1], tmp_sz[0] / self.crop_size[0], # 0, tmp_sz[1] / self.crop_size[0] / (self.crop_size[1] / self.crop_size[0]), # 0] #param0 = self.affparam2mat(param0) #patch = self.warpimg(current_frame.astype(np.float32), param0, self.crop_size).astype(np.uint8) patch = cv2.getRectSubPix( current_frame, (int(np.round(tmp_sz[0])), int(np.round(tmp_sz[1]))), self._center) patch = cv2.resize(patch, self.crop_size) hc_features = self.get_features(patch, self.cell_size) hc_features = self._window[:, :, None] * hc_features xf = fft2(hc_features) kf = self._kernel_correlation(xf, xf, kernel=self.kernel) alphaf = self.yf / (kf + self.lambda_) self.model_alphaf = ( 1 - self.interp_factor ) * self.model_alphaf + self.interp_factor * alphaf self.model_xf = ( 1 - self.interp_factor) * self.model_xf + self.interp_factor * xf bbox = [(self._center[0] - self.target_sz[0] / 2), (self._center[1] - self.target_sz[1] / 2), self.target_sz[0], self.target_sz[1]] if self.resize is True: bbox = [ele * 2 for ele in bbox] max_score = response.max() return bbox, max_score
def dense_gauss_kernel(self,x1,x2,sigma): c=ifft2(np.sum(fft2(x1)*np.conj(fft2(x2)),axis=2)) d=x1.flatten().conj().T.dot(x1.flatten())+x2.flatten().conj().T.dot(x2.flatten())-2*c k=np.exp(-1/sigma**2*d/np.size(d)) return k
def update(self, current_frame, vis=False): x = None for scale_ind in range(self.number_of_scales): current_scale = self.current_scale_factor * self.scale_factors[ scale_ind] sub_window = self.get_sub_window( current_frame, self._center, model_sz=self.crop_size, scaled_sz=(int(round(self.crop_size[0] * current_scale)), int(round(self.crop_size[1] * current_scale)))) feature = self.extract_hc_feture(sub_window, self.cell_size)[:, :, :, np.newaxis] if x is None: x = feature else: x = np.concatenate((x, feature), axis=3) xtf = fft2(x * self._window[:, :, None, None]) responsef = np.sum(np.conj(self.g_f)[:, :, :, None] * xtf, axis=2) if self.interpolate_response == 2: self.interp_sz = (int(self.yf.shape[1] * self.feature_ratio * self.current_scale_factor), int(self.yf.shape[0] * self.feature_ratio * self.current_scale_factor)) responsef_padded = resize_dft2(responsef, self.interp_sz) response = np.real(ifft2(responsef_padded)) if self.interpolate_response == 3: raise ValueError elif self.interpolate_response == 4: disp_row, disp_col, sind = resp_newton(response, responsef_padded, self.newton_iterations, self.ky, self.kx, self.feature_map_sz) if vis is True: self.score = response[:, :, sind] self.score = np.roll(self.score, int(np.floor(self.score.shape[0] / 2)), axis=0) self.score = np.roll(self.score, int(np.floor(self.score.shape[1] / 2)), axis=1) else: row, col, sind = np.unravel_index(np.argmax(response, axis=None), response.shape) if vis is True: self.score = response[:, :, sind] self.score = np.roll(self.score, int(np.floor(self.score.shape[0] / 2)), axis=0) self.score = np.roll(self.score, int(np.floor(self.score.shape[1] / 2)), axis=1) disp_row = (row + int(np.floor(self.interp_sz[1] - 1) / 2)) % self.interp_sz[1] - int( np.floor((self.interp_sz[1] - 1) / 2)) disp_col = (col + int(np.floor(self.interp_sz[0] - 1) / 2)) % self.interp_sz[0] - int( np.floor((self.interp_sz[0] - 1) / 2)) if self.interpolate_response == 0 or self.interpolate_response == 3 or self.interpolate_response == 4: factor = self.feature_ratio * self.current_scale_factor * self.scale_factors[ sind] elif self.interpolate_response == 1: factor = self.current_scale_factor * self.scale_factors[sind] elif self.interpolate_response == 2: factor = self.scale_factors[sind] else: raise ValueError dx, dy = int(np.round(disp_col * factor)), int( np.round(disp_row * factor)) self.current_scale_factor = self.current_scale_factor * self.scale_factors[ sind] self.current_scale_factor = max(self.current_scale_factor, self.min_scale_factor) self.current_scale_factor = min(self.current_scale_factor, self.max_scale_factor) self.current_scale_factor = self.scale_estimator.update( current_frame, self._center, self.base_target_sz, self.current_scale_factor) self._center = (self._center[0] + dx, self._center[1] + dy) pixels = self.get_sub_window( current_frame, self._center, model_sz=self.crop_size, scaled_sz=(int(round(self.crop_size[0] * self.current_scale_factor)), int(round(self.crop_size[1] * self.current_scale_factor)))) feature = self.extract_hc_feture(pixels, cell_size=self.cell_size) #feature=cv2.resize(pixels,self.feature_map_sz)/255-0.5 xf = fft2(feature * self._window[:, :, None]) self.model_xf = ( 1 - self.interp_factor) * self.model_xf + self.interp_factor * xf self.g_f = self.ADMM(self.model_xf) target_sz = (self.target_sz[0] * self.current_scale_factor, self.target_sz[1] * self.current_scale_factor) return [ self._center[0] - target_sz[0] / 2, self._center[1] - target_sz[1] / 2, target_sz[0], target_sz[1] ], -1.0
def update(self,current_frame,vis=False): f=self.get_csr_features(current_frame,self._center,self.current_scale_factor, self.template_size,self.rescale_template_size,self.cell_size) f=f*self._window[:,:,None] if self.use_channel_weights is True: response_chann=np.real(ifft2(fft2(f)*np.conj(self.H))) response=np.sum(response_chann*self.chann_w[None,None,:],axis=2) else: response=np.real(ifft2(np.sum(fft2(f)*np.conj(self.H),axis=2))) if vis is True: self.score=response self.score = np.roll(self.score, int(np.floor(self.score.shape[0] / 2)), axis=0) self.score = np.roll(self.score, int(np.floor(self.score.shape[1] / 2)), axis=1) curr=np.unravel_index(np.argmax(response,axis=None),response.shape) if self.use_channel_weights is True: channel_discr=np.ones((response_chann.shape[2])) for i in range(response_chann.shape[2]): norm_response=self.normalize_img(response_chann[:,:,i]) from skimage.feature.peak import peak_local_max peak_locs=peak_local_max(norm_response,min_distance=5) if len(peak_locs)<2: continue vals=reversed(sorted(norm_response[peak_locs[:,0],peak_locs[:,1]])) second_max_val=None max_val=None for index,val in enumerate(vals): if index==0: max_val=val elif index==1: second_max_val=val else: break channel_discr[i]=max(0.5,1-(second_max_val/(max_val+1e-10))) v_neighbors=response[[(curr[0]-1)%response.shape[0],(curr[0])%response.shape[0], (curr[0]+1)%response.shape[0]],curr[1]] h_neighbors=response[curr[0], [(curr[1]-1) % response.shape[1], (curr[1]) % response.shape[1], (curr[1]+1) % response.shape[1]] ] row=curr[0]+self.subpixel_peak(v_neighbors) col=curr[1]+self.subpixel_peak(h_neighbors) if row+1>response.shape[0]/2: row=row-response.shape[0] if col+1>response.shape[1]/2: col=col-response.shape[1] # displacement dx=self.current_scale_factor*self.cell_size*(1/self.rescale_ratio)*col dy=self.current_scale_factor*self.cell_size*(1/self.rescale_ratio)*row self._center=(self._center[0]+dx,self._center[1]+dy) self.current_scale_factor = self.scale_estimator.update(current_frame, self._center, self.base_target_sz, self.current_scale_factor) if self.scale_type == 'normal': self.current_scale_factor = np.clip(self.current_scale_factor, a_min=self._min_scale_factor, a_max=self._max_scale_factor) self.target_sz = (self.current_scale_factor * self.base_target_sz[0], self.current_scale_factor * self.base_target_sz[1]) region=[np.round(self._center[0] - self.target_sz[0] / 2),np.round( self._center[1] - self.target_sz[1] / 2), self.target_sz[0], self.target_sz[1]] if self.use_segmentation: if self.segcolor_space=='bgr': seg_img=current_frame elif self.segcolor_space=='hsv': seg_img=cv2.cvtColor(current_frame,cv2.COLOR_BGR2HSV) seg_img[:, :, 0] = (seg_img[:, :, 0].astype(np.float32)/180*255) seg_img = seg_img.astype(np.uint8) else: raise ValueError hist_fg=Histogram(3,self.nbins) hist_bg=Histogram(3,self.nbins) self.extract_histograms(seg_img,region,hist_fg,hist_bg) self.hist_fg_p_bins=(1-self.hist_lr)*self.hist_fg_p_bins+self.hist_lr*hist_fg.p_bins self.hist_bg_p_bins=(1-self.hist_lr)*self.hist_bg_p_bins+self.hist_lr*hist_bg.p_bins hist_fg.p_bins=self.hist_fg_p_bins hist_bg.p_bins=self.hist_bg_p_bins mask=self.segment_region(seg_img,self._center,self.template_size,self.base_target_sz,self.current_scale_factor, hist_fg,hist_bg) init_mask_padded=np.zeros_like(mask) pm_x0=int(np.floor(mask.shape[1]/2-region[2]/2)) pm_y0=int(np.floor(mask.shape[0]/2-region[3]/2)) init_mask_padded[pm_y0:pm_y0+int(np.round(region[3])),pm_x0:pm_x0+int(np.round(region[2]))]=1 mask=mask*init_mask_padded mask=cv2.resize(mask,(self.yf.shape[1],self.yf.shape[0])) if self.mask_normal(mask,self.target_dummy_area) is True: kernel=cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(3,3),anchor=(1,1)) mask=cv2.dilate(mask,kernel) else: mask=self.target_dummy_mask pass else: mask=self.target_dummy_mask #cv2.imshow('Mask', (mask * 255).astype(np.uint8)) #cv2.waitKey(1) f = self.get_csr_features(current_frame, self._center, self.current_scale_factor, self.template_size, self.rescale_template_size, self.cell_size) f = f * self._window[:, :, None] H_new=self.create_csr_filter(f,self.yf,mask) if self.use_channel_weights: response=np.real(ifft2(fft2(f)*np.conj(H_new))) chann_w = np.max(response.reshape(response.shape[0] * response.shape[1], -1), axis=0)*channel_discr chann_w=chann_w/np.sum(chann_w) self.chann_w=(1-self.channels_weight_lr)*self.chann_w+self.channels_weight_lr*chann_w self.chann_w=self.chann_w/np.sum(self.chann_w) self.H=(1-self.interp_factor)*self.H+self.interp_factor*H_new return region
def init(self,first_frame,bbox): bbox=np.array(bbox).astype(np.int64) x,y,w,h=tuple(bbox) self.init_mask=np.ones((h,w),dtype=np.uint8) self._center=(x+w/2,y+h/2) self.w,self.h=w,h if np.all(first_frame[:,:,0]==first_frame[:,:,1]): self.use_segmentation=False # change 400 to 300 # for larger cell_size self.cell_size=int(min(4,max(1,w*h/300))) self.base_target_sz=(w,h) self.target_sz=self.base_target_sz template_size=(int(w+self.padding*np.sqrt(w*h)),int(h+self.padding*np.sqrt(w*h))) template_size=(template_size[0]+template_size[1])//2 self.template_size=(template_size,template_size) self.rescale_ratio=np.sqrt((200**2)/(self.template_size[0]*self.template_size[1])) self.rescale_ratio=np.clip(self.rescale_ratio,a_min=None,a_max=1) self.rescale_template_size=(int(self.rescale_ratio*self.template_size[0]), int(self.rescale_ratio*self.template_size[1])) self.yf=fft2(gaussian2d_rolled_labels((int(self.rescale_template_size[0]/self.cell_size), int(self.rescale_template_size[1]/self.cell_size)), self.y_sigma)) self._window=cos_window((self.yf.shape[1],self.yf.shape[0])) self.crop_size=self.rescale_template_size self.current_scale_factor = 1. if self.scale_type=='normal': self.scale_estimator = DSSTScaleEstimator(self.target_sz, config=self.scale_config) self.scale_estimator.init(first_frame, self._center, self.base_target_sz, self.current_scale_factor) self._num_scales = self.scale_estimator.num_scales self._scale_step = self.scale_estimator.scale_step self._min_scale_factor = self._scale_step ** np.ceil( np.log(np.max(5 / np.array(([self.crop_size[0], self.crop_size[1]])))) / np.log(self._scale_step)) self._max_scale_factor = self._scale_step ** np.floor(np.log(np.min( first_frame.shape[:2] / np.array([self.base_target_sz[1], self.base_target_sz[0]]))) / np.log( self._scale_step)) elif self.scale_type=='LP': self.scale_estimator=LPScaleEstimator(self.target_sz,config=self.scale_config) self.scale_estimator.init(first_frame,self._center,self.base_target_sz,self.current_scale_factor) # create dummy mask (approximation for segmentation) # size of the object in feature space obj_sz=(int(self.rescale_ratio*(self.base_target_sz[0]/self.cell_size)), int(self.rescale_ratio*(self.base_target_sz[1]/self.cell_size))) x0=int((self.yf.shape[1]-obj_sz[0])/2) y0=int((self.yf.shape[0]-obj_sz[1])/2) x1=x0+obj_sz[0] y1=y0+obj_sz[1] self.target_dummy_mask=np.zeros_like(self.yf,dtype=np.uint8) self.target_dummy_mask[y0:y1,x0:x1]=1 self.target_dummy_area=np.sum(self.target_dummy_mask) if self.use_segmentation: if self.segcolor_space=='bgr': seg_img=first_frame elif self.segcolor_space=='hsv': seg_img=cv2.cvtColor(first_frame,cv2.COLOR_BGR2HSV) seg_img[:, :, 0] = (seg_img[:, :, 0].astype(np.float32)/180*255) seg_img = seg_img.astype(np.uint8) else: raise ValueError hist_fg=Histogram(3,self.nbins) hist_bg=Histogram(3,self.nbins) self.extract_histograms(seg_img,bbox,hist_fg,hist_bg) mask=self.segment_region(seg_img,self._center,self.template_size,self.base_target_sz,self.current_scale_factor, hist_fg,hist_bg) self.hist_bg_p_bins=hist_bg.p_bins self.hist_fg_p_bins=hist_fg.p_bins init_mask_padded=np.zeros_like(mask) pm_x0=int(np.floor(mask.shape[1]/2-bbox[2]/2)) pm_y0=int(np.floor(mask.shape[0]/2-bbox[3]/2)) init_mask_padded[pm_y0:pm_y0+bbox[3],pm_x0:pm_x0+bbox[2]]=1 mask=mask*init_mask_padded mask=cv2.resize(mask,(self.yf.shape[1],self.yf.shape[0])) if self.mask_normal(mask,self.target_dummy_area) is True: kernel=cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(3,3),anchor=(1,1)) mask=cv2.dilate(mask,kernel) else: mask=self.target_dummy_mask else: mask=self.target_dummy_mask # extract features f=self.get_csr_features(first_frame,self._center,self.current_scale_factor, self.template_size,self.rescale_template_size,self.cell_size) f=f*self._window[:,:,None] # create filters using segmentation mask self.H=self.create_csr_filter(f,self.yf,mask) response=np.real(ifft2(fft2(f)*np.conj(self.H))) chann_w=np.max(response.reshape(response.shape[0]*response.shape[1],-1),axis=0) self.chann_w=chann_w/np.sum(chann_w)
def update(self,current_frame,vis=False): self.frame_index+=1 old_pos=(np.inf,np.inf) iter=1 while iter<=self.refinement_iterations and np.any(np.array(old_pos)!=np.array(self._center)): patch = cv2.getRectSubPix(current_frame,(int(self.base_target_sz[0]*self.sc*(1+self.padding)), int(self.base_target_sz[1]*self.sc*(1+self.padding))), self._center) patch=cv2.resize(patch,self.win_sz).astype(np.uint8) xo_hog,xo_cn= self.get_features(patch,self.cell_size) xo_cn2, xo_hog2 = self.feature_projection(xo_cn, xo_hog, self.projection_matrix_cn, self.projection_matrix_hog, self._window) detect_k_cn=self.dense_gauss_kernel(self.z_cn2,xo_cn2,self.cn_sigma) detect_k_hog=self.dense_gauss_kernel(self.z_hog2,xo_hog2,self.hog_sigma) kf=fft2(self.d[0]*detect_k_cn+self.d[1]*detect_k_hog) responsef=self.alphaf*np.conj(kf) if self.interpolate_response>0: if self.interpolate_response==2: self.interp_sz=(int(self.yf.shape[1]*self.cell_size*self.sc), int(self.yf.shape[0]*self.cell_size*self.sc)) else: responsef=self.resize_dft2(responsef,self.interp_sz) response=np.real(ifft2(responsef)) if vis is True: self.score = response self.score = np.roll(self.score, int(np.floor(self.score.shape[0] / 2)), axis=0) self.score = np.roll(self.score, int(np.floor(self.score.shape[1] / 2)), axis=1) self.crop_size=self.win_sz row,col=np.unravel_index(np.argmax(response, axis=None),response.shape) disp_row=np.mod(row+np.floor((self.interp_sz[1]-1)/2),self.interp_sz[1])-np.floor((self.interp_sz[1]-1)/2) disp_col=np.mod(col+np.floor((self.interp_sz[0]-1)/2),self.interp_sz[0])-np.floor((self.interp_sz[0]-1)/2) if self.interpolate_response==0: translation_vec=list(np.array([disp_row,disp_col])*self.cell_size*self.sc) elif self.interpolate_response==1: translation_vec=list(np.array([disp_row,disp_col])*self.sc) elif self.interpolate_response==2: translation_vec=[disp_row,disp_col] trans=np.sqrt(self.win_sz[0]*self.win_sz[1])*self.sc/3 old_pos=self._center self._center=(old_pos[0]+translation_vec[1],old_pos[1]+translation_vec[0]) iter+=1 self.sc = self.scale_estimator.update(current_frame, self._center, self.base_target_sz, self.sc) if self.scale_type == 'normal': self.sc = np.clip(self.sc, a_min=self._min_scale_factor, a_max=self._max_scale_factor) patch = cv2.getRectSubPix(current_frame, (int(self.base_target_sz[0] * self.sc * (1 + self.padding)), int(self.base_target_sz[1] * self.sc * (1 + self.padding))), self._center) patch = cv2.resize(patch, self.win_sz).astype(np.uint8) xo_hog,xo_cn=self.get_features(patch,self.cell_size) self.z_hog=(1-self.lr_hog)*self.z_hog+self.lr_hog*xo_hog self.z_cn=(1-self.lr_cn)*self.z_cn+self.lr_cn*xo_cn data_matrix_cn = self.z_cn.reshape((-1, self.z_cn.shape[2])) pca_basis_cn, _, _ = np.linalg.svd(data_matrix_cn.T.dot(data_matrix_cn)) self.projection_matrix_cn = pca_basis_cn[:, :self.num_compressed_dim_cn] data_matrix_hog = self.z_hog.reshape((-1, self.z_hog.shape[2])) pca_basis_hog, _, _ = np.linalg.svd(data_matrix_hog.T.dot(data_matrix_hog)) self.projection_matrix_hog = pca_basis_hog[:, :self.num_compressed_dim_hog] self.z_cn2, self.z_hog2 = self.feature_projection(self.z_cn, self.z_hog, self.projection_matrix_cn, self.projection_matrix_hog, self._window) if self.frame_index%self.modnum==0: self.train_model() target_sz=((self.base_target_sz[0]*self.sc),(self.base_target_sz[1]*self.sc)) return [(self._center[0] - target_sz[0] / 2), (self._center[1] - target_sz[1] / 2), target_sz[0],target_sz[1]]