def generate_samples(self, curr_bbox, positive, init=False): if init: if positive: n = self.opts['nPos_init'] Thre = self.opts['posThre_init'] else: n = self.opts['nNeg_init'] Thre = self.opts['negThre_init'] else: if positive: n = self.opts['nPos_online'] Thre = self.opts['posThre_online'] else: n = self.opts['nNeg_online'] Thre = self.opts['negThre_online'] assert n > 0, "if n = 0, don't initialize this class" if positive: examples = gen_samples('gaussian', curr_bbox, n * 2, self.opts, self.opts['finetune_trans'], self.opts['finetune_scale_factor']) r = overlap_ratio(examples, np.matlib.repmat(curr_bbox, len(examples), 1)) examples = examples[np.array(r) > Thre] examples = examples[np.random.randint( low=0, high=len(examples), size=min(len(examples), n)), :] action_labels = gen_action_labels(self.opts['num_actions'], self.opts, np.array(examples), curr_bbox) # score labels: 1 is positive. 0 is negative score_labels = list(np.ones(len(examples), dtype=int)) else: examples = gen_samples('uniform', curr_bbox, n * 2, self.opts, 2, 5) r = overlap_ratio(examples, np.matlib.repmat(curr_bbox, len(examples), 1)) examples = examples[np.array(r) < Thre] examples = examples[np.random.randint( low=0, high=len(examples), size=min(len(examples), n)), :] action_labels = np.full((self.opts['num_actions'], len(examples)), fill_value=-1) # score labels: 1 is positive. 0 is negative score_labels = list(np.zeros(len(examples), dtype=int)) action_labels = np.transpose(action_labels).tolist() bboxes = examples labels = action_labels return bboxes, labels, score_labels
def gen_samples(generator, bbox, n, overlap_range=None, scale_range=None): if overlap_range is None and scale_range is None: return generator(bbox, n) else: samples = None remain = n factor = 2 while remain > 0 and factor < 16: samples_ = generator(bbox, remain * factor) idx = np.ones(len(samples_), dtype=bool) if overlap_range is not None: r = overlap_ratio(samples_, bbox) idx *= (r >= overlap_range[0]) * (r <= overlap_range[1]) if scale_range is not None: s = np.prod(samples_[:, 2:], axis=1) / np.prod(bbox[2:]) idx *= (s >= scale_range[0]) * (s <= scale_range[1]) samples_ = samples_[idx, :] samples_ = samples_[:min(remain, len(samples_))] if samples is None: samples = samples_ else: samples = np.concatenate([samples, samples_]) remain = n - len(samples) factor = factor * 2 return samples
def iou_precision_plot(bboxes, ground_truth, title, show=True, save_plot=None): max_threshold = 100 # used for graphs in the paper precisions = np.zeros([max_threshold, 1]) if len(bboxes) != len(ground_truth): print("WARNING: the size of iou and ground_truth are not same") # just ignore any extra frames, in either results or ground truth n = min(len(bboxes), len(ground_truth)) positions = bboxes[:n] ground_truth = ground_truth[:n] iou = overlap_ratio(bboxes, ground_truth) iou = np.array(iou) # compute precision precisions = [] for p in range(max_threshold): precisions.append(len(iou[iou >= p/100.0]) / len(iou)) # plot if show or save_plot: if save_plot is not None: save_plot += '-iou' plot_result(precisions, title, show=show, save_plot=save_plot, xlabel='iou threshold (x0.01)', ylabel='precision') return precisions
def reward_original(gt, box): iou = overlap_ratio(gt, box) if iou > 0.7: reward = 1 else: reward = -1 return reward
def gen_action_labels(num_actions, opts, bb_samples, gt_bbox): num_samples = len(bb_samples) action_labels = np.zeros([num_actions, num_samples]) m = opts['action_move'] for j in range(len(bb_samples)): bbox = bb_samples[j, :] bbox[0] = bbox[0] + 0.5 * bbox[2] bbox[1] = bbox[1] + 0.5 * bbox[3] deltas = [ m['x'] * bbox[2], m['y'] * bbox[3], m['w'] * bbox[2], m['h'] * bbox[3] ] # deltas = np.max(deltas) ar = bbox[2] / bbox[3] if bbox[2] > bbox[3]: deltas[3] = deltas[2] / ar else: deltas[2] = deltas[3] * ar deltas = np.matlib.repmat(deltas, num_actions, 1) action_deltas = np.multiply(m['deltas'], deltas) action_boxes = np.matlib.repmat(bbox, num_actions, 1) action_boxes = action_boxes + action_deltas action_boxes[:, 0] = action_boxes[:, 0] - 0.5 * action_boxes[:, 2] action_boxes[:, 1] = action_boxes[:, 1] - 0.5 * action_boxes[:, 3] overs = overlap_ratio(action_boxes, np.matlib.repmat(gt_bbox, num_actions, 1)) max_action = np.argmax(overs[:-2]) # translation overlap max_value = overs[max_action] if overs[opts['stop_action']] > opts['stopIou']: max_action = opts['stop_action'] if max_value == overs[opts['stop_action']]: max_action = np.argmax(overs[:]) # (trans + scale) action action = np.zeros(num_actions) action[max_action] = 1 action_labels[:, j] = action # return bbox back bbox[0] = bbox[0] - 0.5 * bbox[2] bbox[1] = bbox[1] - 0.5 * bbox[3] return action_labels # in real matlab code, they also return overs # test the module # from utils.gen_samples import gen_samples # gt_bbox = [50,50,20,20] # pos_examples = gen_samples('gaussian', gt_bbox, opts['nPos_train']*5, opts, 0.1, 5) # gen_action_labels(opts['num_actions'], opts, pos_examples, gt_bbox)
def train(self, X, bbox, gt): X = X.cpu().numpy() bbox = np.copy(bbox) gt = np.copy(gt) if gt.ndim == 1: gt = gt[None, :] r = overlap_ratio(bbox, gt) s = np.prod(bbox[:, 2:], axis=1) / np.prod(gt[0, 2:]) idx = (r >= self.overlap_range[0]) * (r <= self.overlap_range[1]) * \ (s >= self.scale_range[0]) * (s <= self.scale_range[1]) X = X[idx] bbox = bbox[idx] Y = self.get_examples(bbox, gt) self.model.fit(X, Y)
def predict(self, X, bbox): X = X.cpu().numpy() bbox_ = np.copy(bbox) Y = self.model.predict(X) bbox_[:, :2] = bbox_[:, :2] + bbox_[:, 2:] / 2 bbox_[:, :2] = Y[:, :2] * bbox_[:, 2:] + bbox_[:, :2] bbox_[:, 2:] = np.exp(Y[:, 2:]) * bbox_[:, 2:] bbox_[:, :2] = bbox_[:, :2] - bbox_[:, 2:] / 2 r = overlap_ratio(bbox, bbox_) s = np.prod(bbox[:, 2:], axis=1) / np.prod(bbox_[:, 2:], axis=1) idx = (r >= self.overlap_range[0]) * (r <= self.overlap_range[1]) * \ (s >= self.scale_range[0]) * (s <= self.scale_range[1]) idx = np.logical_not(idx) bbox_[idx] = bbox[idx] bbox_[:, :2] = np.maximum(bbox_[:, :2], 0) bbox_[:, 2:] = np.minimum(bbox_[:, 2:], self.img_size - bbox[:, :2]) return bbox_
def do_iou_precise(path_exam, path_gt, thre=0.7): ''' compute the iou and save to output/iou.txt print the average iou and precise :param path_exam: :param path_gt: :return: ''' path_home = "output/" x1 = np.load(path_exam) x2 = np.load(path_gt) from utils.overlap_ratio import overlap_ratio iou = overlap_ratio(x1, x2) np.savetxt(path_home + 'iou.txt', iou, fmt='%.06f') #fmt: keep 6 numbers after dot iou = np.array(iou) average_iou = iou.mean() right_rs = iou > thre right_rs = iou[right_rs] precise = right_rs.size / iou.size print("average_iou: " + str(average_iou) + " ;\t precise: " + str(precise))
def process_data_mul_step_2(img_paths, opt, train_db_pos_neg_all, lock): opts = opt.copy() train_db_pos_neg_gpu = [] for train_i in img_paths: n_frames = len(train_i['gt']) # max_dis=15 gt_file_path = '../datasets/data/ILSVRC/Annotations/VID/train/' + train_i[ 'img_files'][0][39:-5] + '.xml' imginfo = get_xml_img_info(gt_file_path) opts['imgSize'] = imginfo['imgsize'] for i in range(0, n_frames - 2, 5): for l in range(len(train_i['trackid'][i])): train_db_pos_neg = { 'img_path': train_i['img_files'][i + 1], 'bboxes': [], 'labels': [], 'score_labels': [] } for k in range(len(train_i['trackid'][i + 1])): if train_i['trackid'][i][l] == train_i['trackid'][i + 1][k]: gt_end = train_i['gt'][i + 1][k] step_list = [] box_list = [] box_list.append(train_i['gt'][i][l]) for st_list in range(14): iou_max = -1 step_max = [] box_max = [] for lp in range(50): curr_bbox = box_list[-1] step = [] box = [] for st in range(5): #step numbers action = random.randint(0, 10) step.append(action) box.append(curr_bbox) curr_bbox = do_action(curr_bbox, opts, action, opts['imgSize']) box.append(curr_bbox) step.append(opts['stop_action']) #stop action c_iou = cal_iou(curr_bbox, gt_end) if c_iou > iou_max: iou_max = c_iou step_max = step box_max = box # if len(step_max)==0: # print(c_iou,iou_max) step_list.append(step_max[0]) box_list.append(box_max[1]) step_list.append(opts['stop_action']) iou_max = cal_iou(box_list[-1], gt_end) if iou_max > opts['stopIou']: #save data to train_db for datai in range(len(step_list)): train_db_pos_neg['bboxes'].append(box_list[datai]) action_t = np.zeros(opts['num_actions']) action_t[step_list[datai]] = 1 action_label_pos = action_t.tolist() train_db_pos_neg['labels'].append(action_label_pos) train_db_pos_neg['score_labels'].extend( list(np.ones(1, dtype=int))) if (datai) % 3 == 0: nct = -1 while True: # in original code, this 1 line below use opts['nPos_train'] instead of opts['nNeg_train'] nct += 1 if nct == 20: break neg = gen_samples('gaussian', gt_end, 5, opts, 2, 10) r = overlap_ratio( neg, np.matlib.repmat(gt_end, len(neg), 1)) # neg = neg[np.array(r) < opts['consecutive_negThre_train']] neg = neg[np.array(r) < opts['consecutive_negThre_train']] if len(neg) == 0: continue # break else: pos_neg_box = neg[0] # print("neg[0]", end=": ") # print(neg[0]) break train_db_pos_neg['bboxes'].append(pos_neg_box) action_label_neg = np.full( (opts['num_actions'], 1), fill_value=-1) action_label_neg = np.transpose( action_label_neg).tolist() train_db_pos_neg['labels'].extend(action_label_neg) train_db_pos_neg['score_labels'].extend( list(np.zeros(1, dtype=int))) # train_db_pos_neg_gpu.append(train_db_pos_neg) # if len(train_db_pos_neg['bboxes']) >0: # print(iou_max,len(train_db_pos_neg['bboxes'])) if len(train_db_pos_neg['bboxes']) == 20: train_db_pos_neg_gpu.append(train_db_pos_neg) try: lock.acquire() train_db_pos_neg_all.extend(train_db_pos_neg_gpu) except Exception as err: raise err finally: lock.release()
def process_data_mul_step_3(img_paths, opt, train_db_pos_neg_all, lock): opts = opt.copy() # train_db_pos_neg_gpu = [] train_db_pos_neg = { 'img_path': [], # train_i['img_files'][i], 'bboxes': [], 'labels': [], 'score_labels': [] } distan = 1 for train_i in img_paths: n_frames = len(train_i['gt']) # max_dis=15 gt_file_path = '../datasets/data/ILSVRC/Annotations/VID/train/' + train_i[ 'img_files'][0][39:-5] + '.xml' imginfo = get_xml_img_info(gt_file_path) opts['imgSize'] = imginfo['imgsize'] for i in range(0, n_frames - distan - 1, 5): for l in range(len(train_i['trackid'][i])): # train_db_pos_neg = { # 'img_path': train_i['img_files'][i + distan], # 'bboxes': [], # 'labels': [], # 'score_labels': [] # } for k in range(len(train_i['trackid'][i + distan])): if train_i['trackid'][i][l] == train_i['trackid'][ i + distan][k]: gt_end = train_i['gt'][i + distan][k] iou_max = 0 step_max = [] box_max = [] curr_bbox = train_i['gt'][i][l] # if i==5: # print("debug") for st in range(15): box_max.append(curr_bbox) t_iou_max = 0 t_box_max = [] t_act_max = -1 for action in range(11): curr_bbox_t = do_action(curr_bbox, opts, action, opts['imgSize']) t_iou = cal_iou(curr_bbox_t, gt_end) if action == opts['stop_action']: t_iou_act_stop = t_iou t_box_act_stop = curr_bbox_t if t_iou > t_iou_max: t_iou_max = t_iou t_act_max = action t_box_max = curr_bbox_t if abs(t_iou_act_stop - t_iou_max ) < 0.005 and t_act_max != opts['stop_action']: t_iou_max = t_iou_act_stop t_act_max = opts['stop_action'] t_box_max = t_box_act_stop if t_act_max == -1: break iou_max = t_iou_max # if st==0: # print("") # print("start iou: %f,"%(t_iou_act_stop),end=' ') # print("do %d -> %f,"%(t_act_max,iou_max),end=' ') if t_act_max == opts['stop_action']: step_max.append(opts['stop_action']) break else: step_max.append(t_act_max) curr_bbox = t_box_max # for lp in range(500): # curr_bbox = train_i['gt'][i][l] # step=[] # box=[] # for st in range(5): #step numbers # action=random.randint(0, 10) # # if st==0: # # print(action) # step.append(action) # box.append(curr_bbox) # curr_bbox = do_action(curr_bbox, opts, action, opts['imgSize']) # box.append(curr_bbox) # step.append(opts['stop_action']) #stop action # # c_iou=cal_iou(curr_bbox,gt_end) # t_iou_max=cal_iou(curr_bbox,gt_end) # t_max_n=-1 # for st in range(5): # t_iou=cal_iou(box[st],gt_end) # if t_iou>t_iou_max: # t_iou_max=t_iou # t_max_n=st # if t_max_n>-1: # box=box[:t_max_n+1] # step=step[:t_max_n] # step.append(opts['stop_action']) # if t_iou_max>iou_max: # iou_max=t_iou_max # step_max=step # box_max=box if iou_max > opts['stopIou']: #save data to train_db for datai in range(len(step_max)): train_db_pos_neg['img_path'].append( train_i['img_files'][i + distan]) train_db_pos_neg['bboxes'].append(box_max[datai]) action_t = np.zeros(opts['num_actions']) action_t[step_max[datai]] = 1 action_label_pos = action_t.tolist() train_db_pos_neg['labels'].append(action_label_pos) train_db_pos_neg['score_labels'].extend( list(np.ones(1, dtype=int))) if (datai) % 3 == 0: nct = -1 while True: # in original code, this 1 line below use opts['nPos_train'] instead of opts['nNeg_train'] nct += 1 if nct == 20: break neg = gen_samples('gaussian', gt_end, 5, opts, 2, 10) r = overlap_ratio( neg, np.matlib.repmat(gt_end, len(neg), 1)) # neg = neg[np.array(r) < opts['consecutive_negThre_train']] neg = neg[np.array(r) < opts['consecutive_negThre_train']] if len(neg) == 0: continue # break else: pos_neg_box = neg[0] # print("neg[0]", end=": ") # print(neg[0]) break train_db_pos_neg['img_path'].append( train_i['img_files'][i + distan]) train_db_pos_neg['bboxes'].append(pos_neg_box) action_label_neg = np.full( (opts['num_actions'], 1), fill_value=-1) action_label_neg = np.transpose( action_label_neg).tolist() train_db_pos_neg['labels'].extend(action_label_neg) train_db_pos_neg['score_labels'].extend( list(np.zeros(1, dtype=int))) # train_db_pos_neg_gpu.append(train_db_pos_neg) # if len(train_db_pos_neg['bboxes']) >0: # print(iou_max,len(train_db_pos_neg['bboxes'])) # if len(train_db_pos_neg['bboxes']) == 20: # train_db_pos_neg_gpu.append(train_db_pos_neg) try: lock.acquire() # train_db_pos_neg_all.extend(train_db_pos_neg_gpu) train_db_pos_neg_all.append(train_db_pos_neg) except Exception as err: raise err finally: lock.release()
def process_data_ILSVR_consecutive_frame(img_paths, opt, train_db_pos_neg_all, lock): opts = opt.copy() # train_db_pos_neg_gpu = [] train_db_pos_neg = { 'img_path': [], # train_i['img_files'][i], 'bboxes': [], 'labels': [], 'score_labels': [] } for train_i in img_paths: n_frames = len(train_i['gt']) max_dis = 15 gt_file_path = '../datasets/data/ILSVRC/Annotations/VID/train/' + train_i[ 'img_files'][0][39:-5] + '.xml' imginfo = get_xml_img_info(gt_file_path) opts['imgSize'] = imginfo['imgsize'] for i in range(n_frames - 1, 0, -1): # train_db_pos_neg = { # 'img_path': train_i['img_files'][i], # 'bboxes': [], # 'labels': [], # 'score_labels': [] # } # del_t=len(train_i['trackid'][i]) # if del_t>1: # print("debug") for l in range(len(train_i['trackid'][i])): gt_bbox = train_i['gt'][i][l] # train_db_pos_neg = { # 'img_path': [],#train_i['img_files'][i], # 'bboxes': [], # 'labels': [], # 'score_labels': [] # } bk_sign = False for j in range(i - 1, i - max_dis - 1, -1): if j < 0: break for k in range(len(train_i['trackid'][j])): if train_i['trackid'][j][k] == train_i['trackid'][i][ l]: # train_db_pos_neg = { # 'img_path': train_i['img_files'][i], # 'bboxes': [], # 'labels': [], # 'score_labels': [] # } pos_neg_box = train_i['gt'][j][k] c_iou = cal_iou(pos_neg_box, gt_bbox) # del_iou=cal_iou(pos_neg_box,gt_bbox) # print(i-j,del_iou) if c_iou > 0.7: action_label_pos, _ = gen_action_pos_neg_labels( opts['num_actions'], opts, np.array(pos_neg_box), gt_bbox) train_db_pos_neg['img_path'].append( train_i['img_files'][i]) train_db_pos_neg['bboxes'].append(pos_neg_box) action_label_pos = np.transpose( action_label_pos).tolist() train_db_pos_neg['labels'].extend( action_label_pos) train_db_pos_neg['score_labels'].extend( list(np.ones(1, dtype=int))) # train_db_pos_neg_gpu.append(train_db_pos_neg) else: bk_sign = True break # train_db_pos_neg = { # 'img_path': train_i['img_files'][i], # 'bboxes': [], # 'labels': [], # 'score_labels': [] # } if (i - j) % 3 == 0: nct = -1 while True: # in original code, this 1 line below use opts['nPos_train'] instead of opts['nNeg_train'] nct += 1 if nct == 20: break neg = gen_samples('gaussian', gt_bbox, 5, opts, 2, 10) r = overlap_ratio( neg, np.matlib.repmat(gt_bbox, len(neg), 1)) # neg = neg[np.array(r) < opts['consecutive_negThre_train']] neg = neg[np.array( r) < opts['consecutive_negThre_train']] if len(neg) == 0: continue # break else: pos_neg_box = neg[0] # print("neg[0]", end=": ") # print(neg[0]) break train_db_pos_neg['img_path'].append( train_i['img_files'][i]) train_db_pos_neg['bboxes'].append(pos_neg_box) action_label_neg = np.full( (opts['num_actions'], 1), fill_value=-1) action_label_neg = np.transpose( action_label_neg).tolist() train_db_pos_neg['labels'].extend( action_label_neg) train_db_pos_neg['score_labels'].extend( list(np.zeros(1, dtype=int))) # train_db_pos_neg_gpu.append(train_db_pos_neg) if bk_sign == True: break # if len(train_db_pos_neg['bboxes']) >0: # if len(train_db_pos_neg['bboxes']) == 20: # train_db_pos_neg_gpu.append(train_db_pos_neg) try: lock.acquire() # train_db_pos_neg_all.extend(train_db_pos_neg_gpu) train_db_pos_neg_all.append(train_db_pos_neg) except Exception as err: raise err finally: lock.release()
def process_data_vot(train_sequences, vid_info, opt, train_db_pos, train_db_neg, lock): opts = opt.copy() train_db_pos_gpu = [] train_db_neg_gpu = [] for train_i in range(len(train_sequences)): train_db_pos_ = { 'img_path': [], 'bboxes': [], 'labels': [], 'score_labels': [] } train_db_neg_ = { 'img_path': [], 'bboxes': [], 'labels': [], 'score_labels': [] } img_idx = train_sequences[train_i] gt_bbox = vid_info['gt'][img_idx] if len(gt_bbox) == 0: continue pos_examples = [] while len(pos_examples) < opts['nPos_train']: pos = gen_samples('gaussian', gt_bbox, opts['nPos_train'] * 5, opts, 0.1, 5) r = overlap_ratio(pos, np.matlib.repmat(gt_bbox, len(pos), 1)) pos = pos[np.array(r) > opts['posThre_train']] if len(pos) == 0: continue pos = pos[np.random. randint(low=0, high=len(pos), size=min(len(pos), opts['nPos_train'] - len(pos_examples))), :] pos_examples.extend(pos) neg_examples = [] while len(neg_examples) < opts['nNeg_train']: # in original code, this 1 line below use opts['nPos_train'] instead of opts['nNeg_train'] neg = gen_samples('gaussian', gt_bbox, opts['nNeg_train'] * 5, opts, 2, 10) r = overlap_ratio(neg, np.matlib.repmat(gt_bbox, len(neg), 1)) neg = neg[np.array(r) < opts['negThre_train']] if len(neg) == 0: continue neg = neg[np.random. randint(low=0, high=len(neg), size=min(len(neg), opts['nNeg_train'] - len(neg_examples))), :] neg_examples.extend(neg) # examples = pos_examples + neg_examples action_labels_pos = gen_action_labels(opts['num_actions'], opts, np.array(pos_examples), gt_bbox) action_labels_neg = np.full((opts['num_actions'], len(neg_examples)), fill_value=-1) action_labels_pos = np.transpose(action_labels_pos).tolist() action_labels_neg = np.transpose(action_labels_neg).tolist() # action_labels = action_labels_pos + action_labels_neg train_db_pos_['img_path'] = np.full(len(pos_examples), vid_info['img_files'][img_idx]) train_db_pos_['bboxes'] = pos_examples train_db_pos_['labels'] = action_labels_pos # score labels: 1 is positive. 0 is negative train_db_pos_['score_labels'] = list( np.ones(len(pos_examples), dtype=int)) train_db_neg_['img_path'] = np.full(len(neg_examples), vid_info['img_files'][img_idx]) train_db_neg_['bboxes'] = neg_examples train_db_neg_['labels'] = action_labels_neg # score labels: 1 is positive. 0 is negative train_db_neg_['score_labels'] = list( np.zeros(len(neg_examples), dtype=int)) train_db_pos_gpu.append(train_db_pos_) train_db_neg_gpu.append(train_db_neg_) try: lock.acquire() #print("len(train_db_pos_gpu): %d"%len(train_db_pos_gpu)) train_db_pos.extend(train_db_pos_gpu) #print("len(train_db_pos): %d" % len(train_db_pos)) #print("len(train_db_neg_gpu): %d" % len(train_db_neg_gpu)) train_db_neg.extend(train_db_neg_gpu) #print("len(train_db_neg): %d" % len(train_db_neg)) except Exception as err: raise err finally: lock.release()
def process_data_ILSVR(img_paths, opt, train_db_pos_neg_all, lock): opts = opt.copy() train_db_pos_neg_gpu = [] # train_db_neg_gpu = [] for train_i in img_paths: train_db_pos_ = { 'img_path': '', 'bboxes': [], 'labels': [], 'score_labels': [] } train_db_neg_ = { 'img_path': '', 'bboxes': [], 'labels': [], 'score_labels': [] } #img_idx = train_sequences[train_i] #gt_bbox = vid_info['gt'][img_idx] #if len(gt_bbox) == 0: # continue gt_file_path = '../datasets/data/ILSVRC/Annotations/VID/train/' + train_i + '.xml' #gt_bbox=get_xml_box_label(gt_file_path) #opts['imgSize'] = get_xml_img_size(gt_file_path) imginfo = get_xml_img_info(gt_file_path) gt_bboxs = imginfo['gts'] opts['imgSize'] = imginfo['imgsize'] img_path = '../datasets/data/ILSVRC/Data/VID/train/' + train_i + '.JPEG' for gt_bbox in gt_bboxs: train_db_pos_neg = { 'img_path': '', 'bboxes': [], 'labels': [], 'score_labels': [] } pos_examples = [] while len(pos_examples) < opts['nPos_train']: pos = gen_samples('gaussian', gt_bbox, opts['nPos_train'] * 5, opts, 0.1, 5) r = overlap_ratio(pos, np.matlib.repmat(gt_bbox, len(pos), 1)) pos = pos[np.array(r) > opts['posThre_train']] if len(pos) == 0: #continue break pos = pos[np.random.randint(low=0, high=len(pos), size=min( len(pos), opts['nPos_train'] - len(pos_examples))), :] pos_examples.extend(pos) neg_examples = [] while len(neg_examples) < opts['nNeg_train']: # in original code, this 1 line below use opts['nPos_train'] instead of opts['nNeg_train'] neg = gen_samples('gaussian', gt_bbox, opts['nNeg_train'] * 5, opts, 2, 10) r = overlap_ratio(neg, np.matlib.repmat(gt_bbox, len(neg), 1)) neg = neg[np.array(r) < opts['negThre_train']] if len(neg) == 0: #continue break neg = neg[np.random.randint(low=0, high=len(neg), size=min( len(neg), opts['nNeg_train'] - len(neg_examples))), :] neg_examples.extend(neg) # examples = pos_examples + neg_examples action_labels_pos = gen_action_labels(opts['num_actions'], opts, np.array(pos_examples), gt_bbox) action_labels_neg = np.full( (opts['num_actions'], len(neg_examples)), fill_value=-1) action_labels_pos = np.transpose(action_labels_pos).tolist() action_labels_neg = np.transpose(action_labels_neg).tolist() # action_labels = action_labels_pos + action_labels_neg # train_db_pos_['bboxes'].extend(pos_examples) # train_db_pos_['labels'].extend(action_labels_pos) # # score labels: 1 is positive. 0 is negative # train_db_pos_['score_labels'].extend(list(np.ones(len(pos_examples), dtype=int))) # # # train_db_neg_['bboxes'].extend(neg_examples) # train_db_neg_['labels'].extend(action_labels_neg) # # score labels: 1 is positive. 0 is negative # train_db_neg_['score_labels'].extend(list(np.zeros(len(neg_examples), dtype=int))) train_db_pos_neg['bboxes'].extend(pos_examples) train_db_pos_neg['labels'].extend(action_labels_pos) # score labels: 1 is positive. 0 is negative train_db_pos_neg['score_labels'].extend( list(np.ones(len(pos_examples), dtype=int))) train_db_pos_neg['bboxes'].extend(neg_examples) train_db_pos_neg['labels'].extend(action_labels_neg) # score labels: 1 is positive. 0 is negative train_db_pos_neg['score_labels'].extend( list(np.zeros(len(neg_examples), dtype=int))) train_db_pos_neg['img_path'] = img_path # train_db_pos_['img_path'] = img_path # train_db_neg_['img_path'] = img_path # if len(train_db_pos_['bboxes']) != 0 and len(train_db_neg_['bboxes']) != 0: # train_db_pos_gpu.append(train_db_pos_) # train_db_neg_gpu.append(train_db_neg_) if len(train_db_pos_neg['bboxes']) == (opts['nPos_train'] + opts['nNeg_train']): train_db_pos_neg_gpu.append(train_db_pos_neg) # train_db_neg_gpu.append(train_db_neg_) # box_ii += 1 # img_ii += 1 # if img_ii==3471: # print("when gt_skip set to 200, and the img_ii=3472, the gen_samples function can't produce examples that iou>thred") # #'ILSVRC2015_VID_train_0002/ILSVRC2015_train_00633000/000025' #reason:the img is so small and unclear # if img_ii%1000==0 and img_ii!=0: # t9=time.time() # real_time=t9-t2 # all_time=t9-t0 # all_h=all_time//3600 # all_m=all_time%3600//60 # all_s=all_time%60 # speed_img=1000/real_time # speed_box=(box_ii-box_ii_start)/real_time # all_speed_img=img_ii/all_time # all_speed_box = box_ii/all_time # print('\ndone imgs: %d , done boxes: %d , all imgs: %d. '%(img_ii,box_ii,all_img_num)) # print('real_time speed: %d imgs/s, %d boxes/s'%(speed_img,speed_box)) # print('avg_time speed: %d imgs/s, %d boxes/s' % (all_speed_img, all_speed_box)) # print('spend time: %d h %d m %d s (%d s)'%(all_h,all_m,all_s,all_time)) # box_ii_start=box_ii # t2=time.time() try: lock.acquire() # print("len(train_db_pos_gpu): %d" % len(train_db_pos_gpu)) train_db_pos_neg_all.extend(train_db_pos_neg_gpu) # print("len(train_db_pos): %d" % len(train_db_pos)) # print("len(train_db_neg_gpu): %d" % len(train_db_neg_gpu)) # train_db_neg.extend(train_db_neg_gpu) # print("len(train_db_neg): %d" % len(train_db_neg)) except Exception as err: raise err finally: lock.release()
def run_tracking( img_list, init_bbox, gt=None, savefig_dir='', display=False, siamfc_path="../models/siamfc_pretrained.pth", policy_path="../models/template_policy/11200_template_policy.pth", gpu_id=0): rate = init_bbox[2] / init_bbox[3] target_bbox = np.array(init_bbox) result = np.zeros((len(img_list), 4)) # result_bb = np.zeros((len(img_list), 4)) result[0] = target_bbox # result_bb[0] = target_bbox success = 1 actor = Actor() #.load_state_dict(torch.load("../Models/500_actor.pth")) pretrained_act_dict = torch.load( "../models/Double_agent/95600_DA_actor.pth") actor_dict = actor.state_dict() pretrained_act_dict = { k: v for k, v in pretrained_act_dict.items() if k in actor_dict } actor_dict.update(pretrained_act_dict) actor.load_state_dict(actor_dict) siamfc = SiamFCTracker(model_path=siamfc_path, gpu_id=gpu_id) siamEmbed = siam = SiameseNet(BaselineEmbeddingNet()) T_N = opts['T_N'] pi = T_Policy(T_N) weights_init(pi) pretrained_pi_dict = torch.load( '../models/template_policy/95600_template_policy.pth') pi_dict = pi.state_dict() pretrained_pi_dict = { k: v for k, v in pretrained_pi_dict.items() if k in pi_dict } # pretrained_pi_dict = {k: v for k, v in pretrained_pi_dict.items() if k in pi_dict and k.startswith("conv")} pi_dict.update(pretrained_pi_dict) pi.load_state_dict(pi_dict) if opts['use_gpu']: actor = actor.cuda() siamEmbed = siamEmbed.cuda() pi = pi.cuda() image = cv2.cvtColor(cv2.imread(img_list[0]), cv2.COLOR_BGR2RGB) #init deta_flag, out_flag_first = init_actor(actor, image, target_bbox) template = siamfc.init(image, target_bbox) # t = template templates = [] for i in range(T_N): templates.append(template) spf_total = 0 # Display savefig = 0 if display or savefig: dpi = 80.0 figsize = (image.shape[1] / dpi, image.shape[0] / dpi) fig = plt.figure(frameon=False, figsize=figsize, dpi=dpi) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.set_axis_off() fig.add_axes(ax) im = ax.imshow(image) if gt is not None: gt_rect = plt.Rectangle(tuple(gt[0, :2]), gt[0, 2], gt[0, 3], linewidth=3, edgecolor="#00ff00", zorder=1, fill=False) ax.add_patch(gt_rect) rect = plt.Rectangle(tuple(result[0, :2]), result[0, 2], result[0, 3], linewidth=3, edgecolor="#ff0000", zorder=1, fill=False) ax.add_patch(rect) if display: plt.pause(.01) plt.draw() if savefig: fig.savefig(os.path.join(savefig_dir, '0000.jpg'), dpi=dpi) imageVar_first = cv2.Laplacian( crop_image_blur(np.array(image), target_bbox), cv2.CV_64F).var() for i in range(1, len(img_list)): tic = time.time() # Load image image = cv2.cvtColor(cv2.imread(img_list[i]), cv2.COLOR_BGR2RGB) np_img = np.array( cv2.resize(image, (255, 255), interpolation=cv2.INTER_AREA)).transpose(2, 0, 1) np_imgs = [] for i in range(T_N): np_imgs.append(np_img) if imageVar_first > 200: imageVar = cv2.Laplacian( crop_image_blur(np.array(image), target_bbox), cv2.CV_64F).var() else: imageVar = 200 if opts['use_gpu']: responses = siamEmbed( torch.Tensor(templates).permute(0, 3, 1, 2).float().cuda(), torch.Tensor(np_imgs).float().cuda()) else: responses = siamEmbed( torch.Tensor(templates).permute(0, 3, 1, 2).float(), torch.Tensor(np_imgs).float()) # responses = [] # for i in range(T_N): # template = templates[i] # response = siamfc.response_map(image, template) # responses.append(response[None,:,:]) if opts['use_gpu']: pi_input = torch.Tensor(responses.cpu()).permute(1, 0, 2, 3).cuda() action = pi(pi_input).cpu().detach().numpy() else: pi_input = torch.Tensor(responses).permute(1, 0, 2, 3) action = pi(pi_input).numpy() action_id = np.argmax(action) template = templates[action_id] siam_box = siamfc.update(image, templates[0]) siam_box = np.round([ siam_box[0], siam_box[1], siam_box[2] - siam_box[0], siam_box[3] - siam_box[1] ]) print(siam_box) # Estimate target bbox img_g, img_l, out_flag = getbatch_actor( np.array(image), np.array(siam_box).reshape([1, 4])) deta_pos = actor(img_l, img_g) deta_pos = deta_pos.data.clone().cpu().numpy() if deta_pos[:, 2] > 0.05 or deta_pos[:, 2] < -0.05: deta_pos[:, 2] = 0 if deta_flag or (out_flag and not out_flag_first): deta_pos[:, 2] = 0 pos_ = np.round( move_crop_tracking(np.array(siam_box), deta_pos, (image.shape[1], image.shape[0]), rate)) if imageVar > 100: target_bbox = pos_ result[i] = target_bbox if i % 10 == 0: template = siamfc.init(image, pos_) templates.append(template) templates.pop(1) spf = time.time() - tic spf_total += spf # Display if display or savefig: im.set_data(image) if gt is not None: gt_rect.set_xy(gt[i, :2]) gt_rect.set_width(gt[i, 2]) gt_rect.set_height(gt[i, 3]) rect.set_xy(result[i, :2]) rect.set_width(result[i, 2]) rect.set_height(result[i, 3]) if display: plt.pause(.01) plt.draw() if savefig: fig.savefig(os.path.join(savefig_dir, '%04d.jpg' % (i)), dpi=dpi) if display: if gt is None: print ("Frame %d/%d, Time %.3f" % \ (i, len(img_list), spf)) else: if opts['show_train']: print ("Frame %d/%d, Overlap %.3f, Time %.3f, box (%d,%d,%d,%d), var %d" % \ (i, len(img_list), overlap_ratio(gt[i], result[i])[0], spf, target_bbox[0], target_bbox[1], target_bbox[2], target_bbox[3], imageVar)) fps = len(img_list) / spf_total return result, fps
def get_train_dbs(vid_info, opts): img = cv2.imread(vid_info['img_files'][0]) opts['scale_factor'] = 1.05 opts['imgSize'] = list(img.shape) gt_skip = opts['train']['gt_skip'] if vid_info['db_name'] == 'alov300': train_sequences = vid_info['gt_use'] == 1 else: train_sequences = list(range(0, vid_info['nframes'], gt_skip)) train_db_pos = [] train_db_neg = [] for train_i in range(len(train_sequences)): train_db_pos_ = { 'img_path': [], 'bboxes': [], 'labels': [], 'score_labels': [] } train_db_neg_ = { 'img_path': [], 'bboxes': [], 'labels': [], 'score_labels': [] } img_idx = train_sequences[train_i] gt_bbox = vid_info['gt'][img_idx] if len(gt_bbox) == 0: continue pos_examples = [] while len(pos_examples) < opts['nPos_train']: pos = gen_samples('gaussian', gt_bbox, opts['nPos_train'] * 5, opts, 0.1, 5) r = overlap_ratio(pos, np.matlib.repmat(gt_bbox, len(pos), 1)) pos = pos[np.array(r) > opts['posThre_train']] if len(pos) == 0: continue pos = pos[np.random. randint(low=0, high=len(pos), size=min(len(pos), opts['nPos_train'] - len(pos_examples))), :] pos_examples.extend(pos) neg_examples = [] while len(neg_examples) < opts['nNeg_train']: # in original code, this 1 line below use opts['nPos_train'] instead of opts['nNeg_train'] neg = gen_samples('gaussian', gt_bbox, opts['nNeg_train'] * 5, opts, 2, 10) r = overlap_ratio(neg, np.matlib.repmat(gt_bbox, len(neg), 1)) neg = neg[np.array(r) < opts['negThre_train']] if len(neg) == 0: continue neg = neg[np.random. randint(low=0, high=len(neg), size=min(len(neg), opts['nNeg_train'] - len(neg_examples))), :] neg_examples.extend(neg) show_examples_test(pos_examples, neg_examples, vid_info['img_files'][img_idx]) # examples = pos_examples + neg_examples action_labels_pos = gen_action_labels(opts['num_actions'], opts, np.array(pos_examples), gt_bbox) action_labels_neg = np.full((opts['num_actions'], len(neg_examples)), fill_value=-1) action_labels_pos = np.transpose(action_labels_pos).tolist() action_labels_neg = np.transpose(action_labels_neg).tolist() # action_labels = action_labels_pos + action_labels_neg train_db_pos_['img_path'] = np.full(len(pos_examples), vid_info['img_files'][img_idx]) train_db_pos_['bboxes'] = pos_examples train_db_pos_['labels'] = action_labels_pos # score labels: 1 is positive. 0 is negative train_db_pos_['score_labels'] = list( np.ones(len(pos_examples), dtype=int)) train_db_neg_['img_path'] = np.full(len(neg_examples), vid_info['img_files'][img_idx]) train_db_neg_['bboxes'] = neg_examples train_db_neg_['labels'] = action_labels_neg # score labels: 1 is positive. 0 is negative train_db_neg_['score_labels'] = list( np.zeros(len(neg_examples), dtype=int)) train_db_pos.append(train_db_pos_) train_db_neg.append(train_db_neg_) return train_db_pos, train_db_neg