def generate_samples(self, curr_bbox, positive, init=False): if init: if positive: n = self.opts['nPos_init'] Thre = self.opts['posThre_init'] else: n = self.opts['nNeg_init'] Thre = self.opts['negThre_init'] else: if positive: n = self.opts['nPos_online'] Thre = self.opts['posThre_online'] else: n = self.opts['nNeg_online'] Thre = self.opts['negThre_online'] assert n > 0, "if n = 0, don't initialize this class" if positive: examples = gen_samples('gaussian', curr_bbox, n * 2, self.opts, self.opts['finetune_trans'], self.opts['finetune_scale_factor']) r = overlap_ratio(examples, np.matlib.repmat(curr_bbox, len(examples), 1)) examples = examples[np.array(r) > Thre] examples = examples[np.random.randint( low=0, high=len(examples), size=min(len(examples), n)), :] action_labels = gen_action_labels(self.opts['num_actions'], self.opts, np.array(examples), curr_bbox) # score labels: 1 is positive. 0 is negative score_labels = list(np.ones(len(examples), dtype=int)) else: examples = gen_samples('uniform', curr_bbox, n * 2, self.opts, 2, 5) r = overlap_ratio(examples, np.matlib.repmat(curr_bbox, len(examples), 1)) examples = examples[np.array(r) < Thre] examples = examples[np.random.randint( low=0, high=len(examples), size=min(len(examples), n)), :] action_labels = np.full((self.opts['num_actions'], len(examples)), fill_value=-1) # score labels: 1 is positive. 0 is negative score_labels = list(np.zeros(len(examples), dtype=int)) action_labels = np.transpose(action_labels).tolist() bboxes = examples labels = action_labels return bboxes, labels, score_labels
def process_data_mul_step_2(img_paths, opt, train_db_pos_neg_all, lock): opts = opt.copy() train_db_pos_neg_gpu = [] for train_i in img_paths: n_frames = len(train_i['gt']) # max_dis=15 gt_file_path = '../datasets/data/ILSVRC/Annotations/VID/train/' + train_i[ 'img_files'][0][39:-5] + '.xml' imginfo = get_xml_img_info(gt_file_path) opts['imgSize'] = imginfo['imgsize'] for i in range(0, n_frames - 2, 5): for l in range(len(train_i['trackid'][i])): train_db_pos_neg = { 'img_path': train_i['img_files'][i + 1], 'bboxes': [], 'labels': [], 'score_labels': [] } for k in range(len(train_i['trackid'][i + 1])): if train_i['trackid'][i][l] == train_i['trackid'][i + 1][k]: gt_end = train_i['gt'][i + 1][k] step_list = [] box_list = [] box_list.append(train_i['gt'][i][l]) for st_list in range(14): iou_max = -1 step_max = [] box_max = [] for lp in range(50): curr_bbox = box_list[-1] step = [] box = [] for st in range(5): #step numbers action = random.randint(0, 10) step.append(action) box.append(curr_bbox) curr_bbox = do_action(curr_bbox, opts, action, opts['imgSize']) box.append(curr_bbox) step.append(opts['stop_action']) #stop action c_iou = cal_iou(curr_bbox, gt_end) if c_iou > iou_max: iou_max = c_iou step_max = step box_max = box # if len(step_max)==0: # print(c_iou,iou_max) step_list.append(step_max[0]) box_list.append(box_max[1]) step_list.append(opts['stop_action']) iou_max = cal_iou(box_list[-1], gt_end) if iou_max > opts['stopIou']: #save data to train_db for datai in range(len(step_list)): train_db_pos_neg['bboxes'].append(box_list[datai]) action_t = np.zeros(opts['num_actions']) action_t[step_list[datai]] = 1 action_label_pos = action_t.tolist() train_db_pos_neg['labels'].append(action_label_pos) train_db_pos_neg['score_labels'].extend( list(np.ones(1, dtype=int))) if (datai) % 3 == 0: nct = -1 while True: # in original code, this 1 line below use opts['nPos_train'] instead of opts['nNeg_train'] nct += 1 if nct == 20: break neg = gen_samples('gaussian', gt_end, 5, opts, 2, 10) r = overlap_ratio( neg, np.matlib.repmat(gt_end, len(neg), 1)) # neg = neg[np.array(r) < opts['consecutive_negThre_train']] neg = neg[np.array(r) < opts['consecutive_negThre_train']] if len(neg) == 0: continue # break else: pos_neg_box = neg[0] # print("neg[0]", end=": ") # print(neg[0]) break train_db_pos_neg['bboxes'].append(pos_neg_box) action_label_neg = np.full( (opts['num_actions'], 1), fill_value=-1) action_label_neg = np.transpose( action_label_neg).tolist() train_db_pos_neg['labels'].extend(action_label_neg) train_db_pos_neg['score_labels'].extend( list(np.zeros(1, dtype=int))) # train_db_pos_neg_gpu.append(train_db_pos_neg) # if len(train_db_pos_neg['bboxes']) >0: # print(iou_max,len(train_db_pos_neg['bboxes'])) if len(train_db_pos_neg['bboxes']) == 20: train_db_pos_neg_gpu.append(train_db_pos_neg) try: lock.acquire() train_db_pos_neg_all.extend(train_db_pos_neg_gpu) except Exception as err: raise err finally: lock.release()
def process_data_mul_step_3(img_paths, opt, train_db_pos_neg_all, lock): opts = opt.copy() # train_db_pos_neg_gpu = [] train_db_pos_neg = { 'img_path': [], # train_i['img_files'][i], 'bboxes': [], 'labels': [], 'score_labels': [] } distan = 1 for train_i in img_paths: n_frames = len(train_i['gt']) # max_dis=15 gt_file_path = '../datasets/data/ILSVRC/Annotations/VID/train/' + train_i[ 'img_files'][0][39:-5] + '.xml' imginfo = get_xml_img_info(gt_file_path) opts['imgSize'] = imginfo['imgsize'] for i in range(0, n_frames - distan - 1, 5): for l in range(len(train_i['trackid'][i])): # train_db_pos_neg = { # 'img_path': train_i['img_files'][i + distan], # 'bboxes': [], # 'labels': [], # 'score_labels': [] # } for k in range(len(train_i['trackid'][i + distan])): if train_i['trackid'][i][l] == train_i['trackid'][ i + distan][k]: gt_end = train_i['gt'][i + distan][k] iou_max = 0 step_max = [] box_max = [] curr_bbox = train_i['gt'][i][l] # if i==5: # print("debug") for st in range(15): box_max.append(curr_bbox) t_iou_max = 0 t_box_max = [] t_act_max = -1 for action in range(11): curr_bbox_t = do_action(curr_bbox, opts, action, opts['imgSize']) t_iou = cal_iou(curr_bbox_t, gt_end) if action == opts['stop_action']: t_iou_act_stop = t_iou t_box_act_stop = curr_bbox_t if t_iou > t_iou_max: t_iou_max = t_iou t_act_max = action t_box_max = curr_bbox_t if abs(t_iou_act_stop - t_iou_max ) < 0.005 and t_act_max != opts['stop_action']: t_iou_max = t_iou_act_stop t_act_max = opts['stop_action'] t_box_max = t_box_act_stop if t_act_max == -1: break iou_max = t_iou_max # if st==0: # print("") # print("start iou: %f,"%(t_iou_act_stop),end=' ') # print("do %d -> %f,"%(t_act_max,iou_max),end=' ') if t_act_max == opts['stop_action']: step_max.append(opts['stop_action']) break else: step_max.append(t_act_max) curr_bbox = t_box_max # for lp in range(500): # curr_bbox = train_i['gt'][i][l] # step=[] # box=[] # for st in range(5): #step numbers # action=random.randint(0, 10) # # if st==0: # # print(action) # step.append(action) # box.append(curr_bbox) # curr_bbox = do_action(curr_bbox, opts, action, opts['imgSize']) # box.append(curr_bbox) # step.append(opts['stop_action']) #stop action # # c_iou=cal_iou(curr_bbox,gt_end) # t_iou_max=cal_iou(curr_bbox,gt_end) # t_max_n=-1 # for st in range(5): # t_iou=cal_iou(box[st],gt_end) # if t_iou>t_iou_max: # t_iou_max=t_iou # t_max_n=st # if t_max_n>-1: # box=box[:t_max_n+1] # step=step[:t_max_n] # step.append(opts['stop_action']) # if t_iou_max>iou_max: # iou_max=t_iou_max # step_max=step # box_max=box if iou_max > opts['stopIou']: #save data to train_db for datai in range(len(step_max)): train_db_pos_neg['img_path'].append( train_i['img_files'][i + distan]) train_db_pos_neg['bboxes'].append(box_max[datai]) action_t = np.zeros(opts['num_actions']) action_t[step_max[datai]] = 1 action_label_pos = action_t.tolist() train_db_pos_neg['labels'].append(action_label_pos) train_db_pos_neg['score_labels'].extend( list(np.ones(1, dtype=int))) if (datai) % 3 == 0: nct = -1 while True: # in original code, this 1 line below use opts['nPos_train'] instead of opts['nNeg_train'] nct += 1 if nct == 20: break neg = gen_samples('gaussian', gt_end, 5, opts, 2, 10) r = overlap_ratio( neg, np.matlib.repmat(gt_end, len(neg), 1)) # neg = neg[np.array(r) < opts['consecutive_negThre_train']] neg = neg[np.array(r) < opts['consecutive_negThre_train']] if len(neg) == 0: continue # break else: pos_neg_box = neg[0] # print("neg[0]", end=": ") # print(neg[0]) break train_db_pos_neg['img_path'].append( train_i['img_files'][i + distan]) train_db_pos_neg['bboxes'].append(pos_neg_box) action_label_neg = np.full( (opts['num_actions'], 1), fill_value=-1) action_label_neg = np.transpose( action_label_neg).tolist() train_db_pos_neg['labels'].extend(action_label_neg) train_db_pos_neg['score_labels'].extend( list(np.zeros(1, dtype=int))) # train_db_pos_neg_gpu.append(train_db_pos_neg) # if len(train_db_pos_neg['bboxes']) >0: # print(iou_max,len(train_db_pos_neg['bboxes'])) # if len(train_db_pos_neg['bboxes']) == 20: # train_db_pos_neg_gpu.append(train_db_pos_neg) try: lock.acquire() # train_db_pos_neg_all.extend(train_db_pos_neg_gpu) train_db_pos_neg_all.append(train_db_pos_neg) except Exception as err: raise err finally: lock.release()
def process_data_ILSVR_consecutive_frame(img_paths, opt, train_db_pos_neg_all, lock): opts = opt.copy() # train_db_pos_neg_gpu = [] train_db_pos_neg = { 'img_path': [], # train_i['img_files'][i], 'bboxes': [], 'labels': [], 'score_labels': [] } for train_i in img_paths: n_frames = len(train_i['gt']) max_dis = 15 gt_file_path = '../datasets/data/ILSVRC/Annotations/VID/train/' + train_i[ 'img_files'][0][39:-5] + '.xml' imginfo = get_xml_img_info(gt_file_path) opts['imgSize'] = imginfo['imgsize'] for i in range(n_frames - 1, 0, -1): # train_db_pos_neg = { # 'img_path': train_i['img_files'][i], # 'bboxes': [], # 'labels': [], # 'score_labels': [] # } # del_t=len(train_i['trackid'][i]) # if del_t>1: # print("debug") for l in range(len(train_i['trackid'][i])): gt_bbox = train_i['gt'][i][l] # train_db_pos_neg = { # 'img_path': [],#train_i['img_files'][i], # 'bboxes': [], # 'labels': [], # 'score_labels': [] # } bk_sign = False for j in range(i - 1, i - max_dis - 1, -1): if j < 0: break for k in range(len(train_i['trackid'][j])): if train_i['trackid'][j][k] == train_i['trackid'][i][ l]: # train_db_pos_neg = { # 'img_path': train_i['img_files'][i], # 'bboxes': [], # 'labels': [], # 'score_labels': [] # } pos_neg_box = train_i['gt'][j][k] c_iou = cal_iou(pos_neg_box, gt_bbox) # del_iou=cal_iou(pos_neg_box,gt_bbox) # print(i-j,del_iou) if c_iou > 0.7: action_label_pos, _ = gen_action_pos_neg_labels( opts['num_actions'], opts, np.array(pos_neg_box), gt_bbox) train_db_pos_neg['img_path'].append( train_i['img_files'][i]) train_db_pos_neg['bboxes'].append(pos_neg_box) action_label_pos = np.transpose( action_label_pos).tolist() train_db_pos_neg['labels'].extend( action_label_pos) train_db_pos_neg['score_labels'].extend( list(np.ones(1, dtype=int))) # train_db_pos_neg_gpu.append(train_db_pos_neg) else: bk_sign = True break # train_db_pos_neg = { # 'img_path': train_i['img_files'][i], # 'bboxes': [], # 'labels': [], # 'score_labels': [] # } if (i - j) % 3 == 0: nct = -1 while True: # in original code, this 1 line below use opts['nPos_train'] instead of opts['nNeg_train'] nct += 1 if nct == 20: break neg = gen_samples('gaussian', gt_bbox, 5, opts, 2, 10) r = overlap_ratio( neg, np.matlib.repmat(gt_bbox, len(neg), 1)) # neg = neg[np.array(r) < opts['consecutive_negThre_train']] neg = neg[np.array( r) < opts['consecutive_negThre_train']] if len(neg) == 0: continue # break else: pos_neg_box = neg[0] # print("neg[0]", end=": ") # print(neg[0]) break train_db_pos_neg['img_path'].append( train_i['img_files'][i]) train_db_pos_neg['bboxes'].append(pos_neg_box) action_label_neg = np.full( (opts['num_actions'], 1), fill_value=-1) action_label_neg = np.transpose( action_label_neg).tolist() train_db_pos_neg['labels'].extend( action_label_neg) train_db_pos_neg['score_labels'].extend( list(np.zeros(1, dtype=int))) # train_db_pos_neg_gpu.append(train_db_pos_neg) if bk_sign == True: break # if len(train_db_pos_neg['bboxes']) >0: # if len(train_db_pos_neg['bboxes']) == 20: # train_db_pos_neg_gpu.append(train_db_pos_neg) try: lock.acquire() # train_db_pos_neg_all.extend(train_db_pos_neg_gpu) train_db_pos_neg_all.append(train_db_pos_neg) except Exception as err: raise err finally: lock.release()
def process_data_vot(train_sequences, vid_info, opt, train_db_pos, train_db_neg, lock): opts = opt.copy() train_db_pos_gpu = [] train_db_neg_gpu = [] for train_i in range(len(train_sequences)): train_db_pos_ = { 'img_path': [], 'bboxes': [], 'labels': [], 'score_labels': [] } train_db_neg_ = { 'img_path': [], 'bboxes': [], 'labels': [], 'score_labels': [] } img_idx = train_sequences[train_i] gt_bbox = vid_info['gt'][img_idx] if len(gt_bbox) == 0: continue pos_examples = [] while len(pos_examples) < opts['nPos_train']: pos = gen_samples('gaussian', gt_bbox, opts['nPos_train'] * 5, opts, 0.1, 5) r = overlap_ratio(pos, np.matlib.repmat(gt_bbox, len(pos), 1)) pos = pos[np.array(r) > opts['posThre_train']] if len(pos) == 0: continue pos = pos[np.random. randint(low=0, high=len(pos), size=min(len(pos), opts['nPos_train'] - len(pos_examples))), :] pos_examples.extend(pos) neg_examples = [] while len(neg_examples) < opts['nNeg_train']: # in original code, this 1 line below use opts['nPos_train'] instead of opts['nNeg_train'] neg = gen_samples('gaussian', gt_bbox, opts['nNeg_train'] * 5, opts, 2, 10) r = overlap_ratio(neg, np.matlib.repmat(gt_bbox, len(neg), 1)) neg = neg[np.array(r) < opts['negThre_train']] if len(neg) == 0: continue neg = neg[np.random. randint(low=0, high=len(neg), size=min(len(neg), opts['nNeg_train'] - len(neg_examples))), :] neg_examples.extend(neg) # examples = pos_examples + neg_examples action_labels_pos = gen_action_labels(opts['num_actions'], opts, np.array(pos_examples), gt_bbox) action_labels_neg = np.full((opts['num_actions'], len(neg_examples)), fill_value=-1) action_labels_pos = np.transpose(action_labels_pos).tolist() action_labels_neg = np.transpose(action_labels_neg).tolist() # action_labels = action_labels_pos + action_labels_neg train_db_pos_['img_path'] = np.full(len(pos_examples), vid_info['img_files'][img_idx]) train_db_pos_['bboxes'] = pos_examples train_db_pos_['labels'] = action_labels_pos # score labels: 1 is positive. 0 is negative train_db_pos_['score_labels'] = list( np.ones(len(pos_examples), dtype=int)) train_db_neg_['img_path'] = np.full(len(neg_examples), vid_info['img_files'][img_idx]) train_db_neg_['bboxes'] = neg_examples train_db_neg_['labels'] = action_labels_neg # score labels: 1 is positive. 0 is negative train_db_neg_['score_labels'] = list( np.zeros(len(neg_examples), dtype=int)) train_db_pos_gpu.append(train_db_pos_) train_db_neg_gpu.append(train_db_neg_) try: lock.acquire() #print("len(train_db_pos_gpu): %d"%len(train_db_pos_gpu)) train_db_pos.extend(train_db_pos_gpu) #print("len(train_db_pos): %d" % len(train_db_pos)) #print("len(train_db_neg_gpu): %d" % len(train_db_neg_gpu)) train_db_neg.extend(train_db_neg_gpu) #print("len(train_db_neg): %d" % len(train_db_neg)) except Exception as err: raise err finally: lock.release()
def process_data_ILSVR(img_paths, opt, train_db_pos_neg_all, lock): opts = opt.copy() train_db_pos_neg_gpu = [] # train_db_neg_gpu = [] for train_i in img_paths: train_db_pos_ = { 'img_path': '', 'bboxes': [], 'labels': [], 'score_labels': [] } train_db_neg_ = { 'img_path': '', 'bboxes': [], 'labels': [], 'score_labels': [] } #img_idx = train_sequences[train_i] #gt_bbox = vid_info['gt'][img_idx] #if len(gt_bbox) == 0: # continue gt_file_path = '../datasets/data/ILSVRC/Annotations/VID/train/' + train_i + '.xml' #gt_bbox=get_xml_box_label(gt_file_path) #opts['imgSize'] = get_xml_img_size(gt_file_path) imginfo = get_xml_img_info(gt_file_path) gt_bboxs = imginfo['gts'] opts['imgSize'] = imginfo['imgsize'] img_path = '../datasets/data/ILSVRC/Data/VID/train/' + train_i + '.JPEG' for gt_bbox in gt_bboxs: train_db_pos_neg = { 'img_path': '', 'bboxes': [], 'labels': [], 'score_labels': [] } pos_examples = [] while len(pos_examples) < opts['nPos_train']: pos = gen_samples('gaussian', gt_bbox, opts['nPos_train'] * 5, opts, 0.1, 5) r = overlap_ratio(pos, np.matlib.repmat(gt_bbox, len(pos), 1)) pos = pos[np.array(r) > opts['posThre_train']] if len(pos) == 0: #continue break pos = pos[np.random.randint(low=0, high=len(pos), size=min( len(pos), opts['nPos_train'] - len(pos_examples))), :] pos_examples.extend(pos) neg_examples = [] while len(neg_examples) < opts['nNeg_train']: # in original code, this 1 line below use opts['nPos_train'] instead of opts['nNeg_train'] neg = gen_samples('gaussian', gt_bbox, opts['nNeg_train'] * 5, opts, 2, 10) r = overlap_ratio(neg, np.matlib.repmat(gt_bbox, len(neg), 1)) neg = neg[np.array(r) < opts['negThre_train']] if len(neg) == 0: #continue break neg = neg[np.random.randint(low=0, high=len(neg), size=min( len(neg), opts['nNeg_train'] - len(neg_examples))), :] neg_examples.extend(neg) # examples = pos_examples + neg_examples action_labels_pos = gen_action_labels(opts['num_actions'], opts, np.array(pos_examples), gt_bbox) action_labels_neg = np.full( (opts['num_actions'], len(neg_examples)), fill_value=-1) action_labels_pos = np.transpose(action_labels_pos).tolist() action_labels_neg = np.transpose(action_labels_neg).tolist() # action_labels = action_labels_pos + action_labels_neg # train_db_pos_['bboxes'].extend(pos_examples) # train_db_pos_['labels'].extend(action_labels_pos) # # score labels: 1 is positive. 0 is negative # train_db_pos_['score_labels'].extend(list(np.ones(len(pos_examples), dtype=int))) # # # train_db_neg_['bboxes'].extend(neg_examples) # train_db_neg_['labels'].extend(action_labels_neg) # # score labels: 1 is positive. 0 is negative # train_db_neg_['score_labels'].extend(list(np.zeros(len(neg_examples), dtype=int))) train_db_pos_neg['bboxes'].extend(pos_examples) train_db_pos_neg['labels'].extend(action_labels_pos) # score labels: 1 is positive. 0 is negative train_db_pos_neg['score_labels'].extend( list(np.ones(len(pos_examples), dtype=int))) train_db_pos_neg['bboxes'].extend(neg_examples) train_db_pos_neg['labels'].extend(action_labels_neg) # score labels: 1 is positive. 0 is negative train_db_pos_neg['score_labels'].extend( list(np.zeros(len(neg_examples), dtype=int))) train_db_pos_neg['img_path'] = img_path # train_db_pos_['img_path'] = img_path # train_db_neg_['img_path'] = img_path # if len(train_db_pos_['bboxes']) != 0 and len(train_db_neg_['bboxes']) != 0: # train_db_pos_gpu.append(train_db_pos_) # train_db_neg_gpu.append(train_db_neg_) if len(train_db_pos_neg['bboxes']) == (opts['nPos_train'] + opts['nNeg_train']): train_db_pos_neg_gpu.append(train_db_pos_neg) # train_db_neg_gpu.append(train_db_neg_) # box_ii += 1 # img_ii += 1 # if img_ii==3471: # print("when gt_skip set to 200, and the img_ii=3472, the gen_samples function can't produce examples that iou>thred") # #'ILSVRC2015_VID_train_0002/ILSVRC2015_train_00633000/000025' #reason:the img is so small and unclear # if img_ii%1000==0 and img_ii!=0: # t9=time.time() # real_time=t9-t2 # all_time=t9-t0 # all_h=all_time//3600 # all_m=all_time%3600//60 # all_s=all_time%60 # speed_img=1000/real_time # speed_box=(box_ii-box_ii_start)/real_time # all_speed_img=img_ii/all_time # all_speed_box = box_ii/all_time # print('\ndone imgs: %d , done boxes: %d , all imgs: %d. '%(img_ii,box_ii,all_img_num)) # print('real_time speed: %d imgs/s, %d boxes/s'%(speed_img,speed_box)) # print('avg_time speed: %d imgs/s, %d boxes/s' % (all_speed_img, all_speed_box)) # print('spend time: %d h %d m %d s (%d s)'%(all_h,all_m,all_s,all_time)) # box_ii_start=box_ii # t2=time.time() try: lock.acquire() # print("len(train_db_pos_gpu): %d" % len(train_db_pos_gpu)) train_db_pos_neg_all.extend(train_db_pos_neg_gpu) # print("len(train_db_pos): %d" % len(train_db_pos)) # print("len(train_db_neg_gpu): %d" % len(train_db_neg_gpu)) # train_db_neg.extend(train_db_neg_gpu) # print("len(train_db_neg): %d" % len(train_db_neg)) except Exception as err: raise err finally: lock.release()
def adnet_test(net, vid_path, opts, args): if torch.cuda.is_available(): if args.cuda: torch.set_default_tensor_type('torch.cuda.FloatTensor') if not args.cuda: print( "WARNING: It looks like you have a CUDA device, but aren't " + "using CUDA.\nRun with --cuda for optimal training speed.") torch.set_default_tensor_type('torch.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor') transform = ADNet_Augmentation(opts) print('Testing sequences in ' + str(vid_path) + '...') t_sum = 0 if args.visualize: writer = SummaryWriter( log_dir=os.path.join('tensorboardx_log', 'online_adapatation_' + args.save_result_npy)) ################################ # Load video sequences ################################ vid_info = {'gt': [], 'img_files': [], 'nframes': 0} vid_info['img_files'] = glob.glob(os.path.join(vid_path, 'color', '*.jpg')) vid_info['img_files'].sort(key=str.lower) gt_path = os.path.join(vid_path, 'groundtruth.txt') if not os.path.exists(gt_path): bboxes = [] t = 0 return bboxes, t_sum # parse gt gtFile = open(gt_path, 'r') gt = gtFile.read().split('\n') for i in range(len(gt)): if gt[i] == '' or gt[i] is None: continue if ',' in gt[i]: separator = ',' elif '\t' in gt[i]: separator = '\t' elif ' ' in gt[i]: separator = ' ' else: separator = ',' gt[i] = gt[i].split(separator) gt[i] = list(map(float, gt[i])) gtFile.close() if len(gt[0]) >= 6: for gtidx in range(len(gt)): if gt[gtidx] == "": continue x = gt[gtidx][0:len(gt[gtidx]):2] y = gt[gtidx][1:len(gt[gtidx]):2] gt[gtidx] = [min(x), min(y), max(x) - min(x), max(y) - min(y)] vid_info['gt'] = gt if vid_info['gt'][-1] == '': # small hack vid_info['gt'] = vid_info['gt'][:-1] vid_info['nframes'] = min(len(vid_info['img_files']), len(vid_info['gt'])) # catch the first box curr_bbox = vid_info['gt'][0] # init containers bboxes = np.zeros(np.array( vid_info['gt']).shape) # tracking result containers ntraining = 0 # setup training if args.cuda: optimizer = optim.SGD([{ 'params': net.module.base_network.parameters(), 'lr': 0 }, { 'params': net.module.fc4_5.parameters() }, { 'params': net.module.fc6.parameters() }, { 'params': net.module.fc7.parameters(), 'lr': 1e-3 }], lr=1e-3, momentum=opts['train']['momentum'], weight_decay=opts['train']['weightDecay']) else: optimizer = optim.SGD([{ 'params': net.base_network.parameters(), 'lr': 0 }, { 'params': net.fc4_5.parameters() }, { 'params': net.fc6.parameters() }, { 'params': net.fc7.parameters(), 'lr': 1e-3 }], lr=1e-3, momentum=opts['train']['momentum'], weight_decay=opts['train']['weightDecay']) action_criterion = nn.CrossEntropyLoss() score_criterion = nn.CrossEntropyLoss() dataset_storage_pos = None dataset_storage_neg = None is_negative = False # is_negative = True if the tracking failed target_score = 0 all_iteration = 0 t = 0 for idx in range(vid_info['nframes']): # for frame_idx, frame_path in enumerate(vid_info['img_files']): frame_idx = idx frame_path = vid_info['img_files'][idx] t0_wholetracking = time.time() frame = cv2.imread(frame_path) # draw box or with display, then save if args.display_images: im_with_bb = display_result(frame, curr_bbox) # draw box and display else: im_with_bb = draw_box(frame, curr_bbox) if args.save_result_images: filename = os.path.join(args.save_result_images, str(frame_idx) + '-' + str(t) + '.jpg') cv2.imwrite(filename, im_with_bb) curr_bbox_old = curr_bbox cont_negatives = 0 if frame_idx > 0: # tracking if args.cuda: net.module.set_phase('test') else: net.set_phase('test') t = 0 while True: curr_patch, curr_bbox, _, _ = transform( frame, curr_bbox, None, None) if args.cuda: curr_patch = curr_patch.cuda() curr_patch = curr_patch.unsqueeze( 0) # 1 batch input [1, curr_patch.shape] fc6_out, fc7_out = net.forward(curr_patch) curr_score = fc7_out.detach().cpu().numpy()[0][1] if ntraining > args.believe_score_result: if curr_score < opts['failedThre']: cont_negatives += 1 if args.cuda: action = np.argmax(fc6_out.detach().cpu().numpy() ) # TODO: really okay to detach? action_prob = fc6_out.detach().cpu().numpy()[0][action] else: action = np.argmax(fc6_out.detach().numpy() ) # TODO: really okay to detach? action_prob = fc6_out.detach().numpy()[0][action] # do action curr_bbox = do_action(curr_bbox, opts, action, frame.shape) # bound the curr_bbox size if curr_bbox[2] < 10: curr_bbox[0] = min( 0, curr_bbox[0] + curr_bbox[2] / 2 - 10 / 2) curr_bbox[2] = 10 if curr_bbox[3] < 10: curr_bbox[1] = min( 0, curr_bbox[1] + curr_bbox[3] / 2 - 10 / 2) curr_bbox[3] = 10 t += 1 # draw box or with display, then save if args.display_images: im_with_bb = display_result( frame, curr_bbox) # draw box and display else: im_with_bb = draw_box(frame, curr_bbox) if args.save_result_images: filename = os.path.join( args.save_result_images, str(frame_idx) + '-' + str(t) + '.jpg') cv2.imwrite(filename, im_with_bb) if action == opts[ 'stop_action'] or t >= opts['num_action_step_max']: break print('final curr_score: %.4f' % curr_score) # redetection when confidence < threshold 0.5. But when fc7 is already reliable. Else, just trust the ADNet if ntraining > args.believe_score_result: if curr_score < 0.5: print('redetection') is_negative = True # redetection process redet_samples = gen_samples( 'gaussian', curr_bbox_old, opts['redet_samples'], opts, min(1.5, 0.6 * 1.15**cont_negatives), opts['redet_scale_factor']) score_samples = [] for redet_sample in redet_samples: temp_patch, temp_bbox, _, _ = transform( frame, redet_sample, None, None) if args.cuda: temp_patch = temp_patch.cuda() temp_patch = temp_patch.unsqueeze( 0) # 1 batch input [1, curr_patch.shape] fc6_out_temp, fc7_out_temp = net.forward(temp_patch) score_samples.append( fc7_out_temp.detach().cpu().numpy()[0][1]) score_samples = np.array(score_samples) max_score_samples_idx = np.argmax(score_samples) # replace the curr_box with the samples with maximum score curr_bbox = redet_samples[max_score_samples_idx] # update the final result image if args.display_images: im_with_bb = display_result( frame, curr_bbox) # draw box and display else: im_with_bb = draw_box(frame, curr_bbox) if args.save_result_images: filename = os.path.join(args.save_result_images, str(frame_idx) + '-redet.jpg') cv2.imwrite(filename, im_with_bb) else: is_negative = False else: is_negative = False if args.save_result_images: filename = os.path.join(args.save_result_images, 'final-' + str(frame_idx) + '.jpg') cv2.imwrite(filename, im_with_bb) # record the curr_bbox result bboxes[frame_idx] = curr_bbox # create or update storage + set iteration_range for training if frame_idx == 0: dataset_storage_pos = OnlineAdaptationDatasetStorage( initial_frame=frame, first_box=curr_bbox, opts=opts, args=args, positive=True) if opts['nNeg_init'] != 0: # (thanks to small hack in adnet_test) the nNeg_online is also 0 dataset_storage_neg = OnlineAdaptationDatasetStorage( initial_frame=frame, first_box=curr_bbox, opts=opts, args=args, positive=False) iteration_range = range(opts['finetune_iters']) else: assert dataset_storage_pos is not None if opts['nNeg_init'] != 0: # (thanks to small hack in adnet_test) the nNeg_online is also 0 assert dataset_storage_neg is not None # if confident or when always generate samples, generate new samples if ntraining < args.believe_score_result: always_generate_samples = True # as FC7 wasn't trained, it is better to wait for some time to believe its confidence result to decide whether to generate samples or not.. Before believe it, better to just generate sample always else: always_generate_samples = False if always_generate_samples or (not is_negative or target_score > opts['successThre']): dataset_storage_pos.add_frame_then_generate_samples( frame, curr_bbox) iteration_range = range(opts['finetune_iters_online']) # training when depend on the frequency.. else, don't run the training code... if False and frame_idx % args.online_adaptation_every_I_frames == 0: ntraining += 1 # generate dataset just before training dataset_pos = OnlineAdaptationDataset(dataset_storage_pos) data_loader_pos = data.DataLoader(dataset_pos, opts['minibatch_size'], num_workers=args.num_workers, shuffle=True, pin_memory=False) batch_iterator_pos = None if opts['nNeg_init'] != 0: # (thanks to small hack in adnet_test) the nNeg_online is also 0 dataset_neg = OnlineAdaptationDataset(dataset_storage_neg) data_loader_neg = data.DataLoader(dataset_neg, opts['minibatch_size'], num_workers=args.num_workers, shuffle=True, pin_memory=False) batch_iterator_neg = None else: dataset_neg = [] epoch_size_pos = len(dataset_pos) // opts['minibatch_size'] epoch_size_neg = len(dataset_neg) // opts['minibatch_size'] epoch_size = epoch_size_pos + epoch_size_neg # 1 epoch, how many iterations which_dataset = list(np.full(epoch_size_pos, fill_value=1)) which_dataset.extend(np.zeros(epoch_size_neg, dtype=int)) shuffle(which_dataset) print("1 epoch = " + str(epoch_size) + " iterations") if args.cuda: net.module.set_phase('train') else: net.set_phase('train') # training loop for iteration in iteration_range: all_iteration += 1 # use this for update the visualization # create batch iterator if (not batch_iterator_pos) or (iteration % epoch_size == 0): batch_iterator_pos = iter(data_loader_pos) if opts['nNeg_init'] != 0: if (not batch_iterator_neg) or (iteration % epoch_size == 0): batch_iterator_neg = iter(data_loader_neg) # load train data if which_dataset[iteration % len(which_dataset)]: # if positive images, bbox, action_label, score_label = next( batch_iterator_pos) else: images, bbox, action_label, score_label = next( batch_iterator_neg) if args.cuda: images = torch.Tensor(images.cuda()) bbox = torch.Tensor(bbox.cuda()) action_label = torch.Tensor(action_label.cuda()) score_label = torch.Tensor(score_label.float().cuda()) else: images = torch.Tensor(images) bbox = torch.Tensor(bbox) action_label = torch.Tensor(action_label) score_label = torch.Tensor(score_label) # forward t0 = time.time() action_out, score_out = net(images) # backprop optimizer.zero_grad() if which_dataset[iteration % len(which_dataset)]: # if positive action_l = action_criterion(action_out, torch.max(action_label, 1)[1]) else: action_l = torch.Tensor([0]) score_l = score_criterion(score_out, score_label.long()) loss = action_l + score_l loss.backward() optimizer.step() t1 = time.time() if all_iteration % 10 == 0: print('Timer: %.4f sec.' % (t1 - t0)) print('iter ' + repr(all_iteration) + ' || Loss: %.4f ||' % (loss.data.item()), end=' ') if args.visualize and args.send_images_to_visualization: random_batch_index = np.random.randint(images.size(0)) writer.add_image( 'image', images.data[random_batch_index].cpu().numpy(), random_batch_index) if args.visualize: writer.add_scalars( 'data/iter_loss', { 'action_loss': action_l.item(), 'score_loss': score_l.item(), 'total': (action_l.item() + score_l.item()) }, global_step=all_iteration) t1_wholetracking = time.time() t_sum += t1_wholetracking - t0_wholetracking print('whole tracking time = %.4f sec.' % (t1_wholetracking - t0_wholetracking)) # evaluate the precision bboxes = np.array(bboxes) vid_info['gt'] = np.array(vid_info['gt']) # iou_precisions = iou_precision_plot(bboxes, vid_info['gt'], vid_path, show=args.display_images, save_plot=args.save_result_images) # # distance_precisions = distance_precision_plot(bboxes, vid_info['gt'], vid_path, show=args.display_images, save_plot=args.save_result_images) # # precisions = [distance_precisions, iou_precisions] np.save(args.save_result_npy + '-bboxes.npy', bboxes) np.save(args.save_result_npy + '-ground_truth.npy', vid_info['gt']) # return bboxes, t_sum, precisions return bboxes, t_sum
def get_train_dbs(vid_info, opts): img = cv2.imread(vid_info['img_files'][0]) opts['scale_factor'] = 1.05 opts['imgSize'] = list(img.shape) gt_skip = opts['train']['gt_skip'] if vid_info['db_name'] == 'alov300': train_sequences = vid_info['gt_use'] == 1 else: train_sequences = list(range(0, vid_info['nframes'], gt_skip)) train_db_pos = [] train_db_neg = [] for train_i in range(len(train_sequences)): train_db_pos_ = { 'img_path': [], 'bboxes': [], 'labels': [], 'score_labels': [] } train_db_neg_ = { 'img_path': [], 'bboxes': [], 'labels': [], 'score_labels': [] } img_idx = train_sequences[train_i] gt_bbox = vid_info['gt'][img_idx] if len(gt_bbox) == 0: continue pos_examples = [] while len(pos_examples) < opts['nPos_train']: pos = gen_samples('gaussian', gt_bbox, opts['nPos_train'] * 5, opts, 0.1, 5) r = overlap_ratio(pos, np.matlib.repmat(gt_bbox, len(pos), 1)) pos = pos[np.array(r) > opts['posThre_train']] if len(pos) == 0: continue pos = pos[np.random. randint(low=0, high=len(pos), size=min(len(pos), opts['nPos_train'] - len(pos_examples))), :] pos_examples.extend(pos) neg_examples = [] while len(neg_examples) < opts['nNeg_train']: # in original code, this 1 line below use opts['nPos_train'] instead of opts['nNeg_train'] neg = gen_samples('gaussian', gt_bbox, opts['nNeg_train'] * 5, opts, 2, 10) r = overlap_ratio(neg, np.matlib.repmat(gt_bbox, len(neg), 1)) neg = neg[np.array(r) < opts['negThre_train']] if len(neg) == 0: continue neg = neg[np.random. randint(low=0, high=len(neg), size=min(len(neg), opts['nNeg_train'] - len(neg_examples))), :] neg_examples.extend(neg) show_examples_test(pos_examples, neg_examples, vid_info['img_files'][img_idx]) # examples = pos_examples + neg_examples action_labels_pos = gen_action_labels(opts['num_actions'], opts, np.array(pos_examples), gt_bbox) action_labels_neg = np.full((opts['num_actions'], len(neg_examples)), fill_value=-1) action_labels_pos = np.transpose(action_labels_pos).tolist() action_labels_neg = np.transpose(action_labels_neg).tolist() # action_labels = action_labels_pos + action_labels_neg train_db_pos_['img_path'] = np.full(len(pos_examples), vid_info['img_files'][img_idx]) train_db_pos_['bboxes'] = pos_examples train_db_pos_['labels'] = action_labels_pos # score labels: 1 is positive. 0 is negative train_db_pos_['score_labels'] = list( np.ones(len(pos_examples), dtype=int)) train_db_neg_['img_path'] = np.full(len(neg_examples), vid_info['img_files'][img_idx]) train_db_neg_['bboxes'] = neg_examples train_db_neg_['labels'] = action_labels_neg # score labels: 1 is positive. 0 is negative train_db_neg_['score_labels'] = list( np.zeros(len(neg_examples), dtype=int)) train_db_pos.append(train_db_pos_) train_db_neg.append(train_db_neg_) return train_db_pos, train_db_neg