def parser_txt_anno(video_dir, video_id, txt_anno, track_save_dir): subfix = ".jpg" if (len(os.listdir(track_save_dir)) == len(os.listdir(video_dir))): return with open(txt_anno, 'r') as f: for index, line in enumerate(f): img_name = str(index) img_file = os.path.join(video_dir, img_name + subfix) #assert os.path.exists(img_file),img_file if not os.path.exists(img_file): continue img = None img = imread(img_file) line_list = line.split(",") bbox = [float(x) for x in line_list] target_box = convert_bbox_format(Rectangle(*bbox), 'center-based') crop, scale, new_sizes = get_crops(img, target_box, size_z=127, size_x=255, context_amount=0.5) savename = osp.join( track_save_dir, '{}.w.{}.h.{}.jpg'.format(img_name, int(np.rint(new_sizes[0])), int(np.rint(new_sizes[1])))) if osp.exists(savename): continue imwrite(savename, crop, [int(cv2.IMWRITE_JPEG_QUALITY), 90])
def parser_txt_anno(video_dir, video_id, txt_anno,track_save_dir): if not osp.exists(track_save_dir): os.makedirs(track_save_dir) have_croped_list = [] else: count = count_out_of_view_frame(txt_anno) saved_list = glob(track_save_dir+"/*.jpg") crop_imgs_size = len(saved_list) origin_img_size = len(glob(video_dir+"/*.jpg")) if((crop_imgs_size+count)==origin_img_size): print("video already croped, skip this video") return else: print("crop_imgs_size: %d, origin_img_size: %d, out-of-view: %d"%(crop_imgs_size, origin_img_size, count)) #return have_croped_list = [ i.split('.')[0]+'.jpg' for i in saved_list] img_files = glob(video_dir+"/*.jpg") img_files.sort() with open(txt_anno,'r') as f: for index, line in enumerate(tqdm(f)): if img_files[index] in have_croped_list: print("img %s has been croped, skip"%(img_files[index])) continue img = None img = imread(img_files[index]) if isinstance(img, type(None)): continue line_list = line.split(",") bbox = [int(float(x)) for x in line_list] #skip out-of-view frames if bbox[2]==0 or bbox[3]==0: print("found out-of-view frame, skip this frame") continue #convert from 1-based to 0-based bbox[0] = bbox[0]-1 bbox[1] = bbox[1]-1 target_box = convert_bbox_format(Rectangle(*bbox), 'center-based') #target_box = Rectangle(*bbox) if target_box.width<=0 or target_box.height<=0: print("target_box error in",txt_anno, index) continue crop, scale,new_sizes = get_crops(img, target_box, size_z=127, size_x=255, context_amount=0.5) img_id = img_files[index].split('/')[-1].split('.')[0] savename = osp.join(track_save_dir, '{}.w.{}.h.{}.jpg'.format(img_id,int(np.rint(new_sizes[0])),int(np.rint(new_sizes[1])))) #print(savename) if osp.exists(savename): continue imwrite(savename, crop, [int(cv2.IMWRITE_JPEG_QUALITY), 90])
def process_split(root_dir, save_dir, split, subdir='', ): data_dir = osp.join(root_dir, 'Data', 'VID', split) anno_dir = osp.join(root_dir, 'Annotations', 'VID', split, subdir) video_names = os.listdir(anno_dir) for idx, video in enumerate(video_names): print('{split}-{subdir} ({idx}/{total}): Processing {video}...'.format(split=split, subdir=subdir, idx=idx, total=len(video_names), video=video)) video_path = osp.join(anno_dir, video) xml_files = glob(osp.join(video_path, '*.xml')) for xml in xml_files: tree = ET.parse(xml) root = tree.getroot() folder = root.find('folder').text filename = root.find('filename').text # Read image img_file = osp.join(data_dir, folder, filename + '.JPEG') img = None # Get all object bounding boxes bboxs = [] for object in root.iter('object'): bbox = object.find('bndbox') xmax = float(bbox.find('xmax').text) xmin = float(bbox.find('xmin').text) ymax = float(bbox.find('ymax').text) ymin = float(bbox.find('ymin').text) width = xmax - xmin + 1 height = ymax - ymin + 1 bboxs.append([xmin, ymin, width, height]) for idx, object in enumerate(root.iter('object')): id = object.find('trackid').text class_name = object.find('name').text track_save_dir = get_track_save_directory(save_dir, 'train', subdir, video) mkdir_p(track_save_dir) savename = osp.join(track_save_dir, '{}.{:02d}.crop.x.jpg'.format(filename, int(id))) if osp.isfile(savename): continue # skip existing images if img is None: img = imread(img_file) # Get crop target_box = convert_bbox_format(Rectangle(*bboxs[idx]), 'center-based') crop, _ = get_crops(img, target_box, size_z=127, size_x=255, context_amount=0.5, ) imwrite(savename, crop, [int(cv2.IMWRITE_JPEG_QUALITY), 90])
def process_split(root_dir, save_dir, split, subdir='', ): data_dir = osp.join(root_dir, 'Data', 'VID', split) anno_dir = osp.join(root_dir, 'Annotations', 'VID', split, subdir) video_names = os.listdir(anno_dir) for idx, video in enumerate(video_names): print('{split}-{subdir} ({idx}/{total}): Processing {video}...'.format(split=split, subdir=subdir, idx=idx, total=len(video_names), video=video)) video_path = osp.join(anno_dir, video) xml_files = glob(osp.join(video_path, '*.xml')) for xml in xml_files: tree = ET.parse(xml) root = tree.getroot() folder = root.find('folder').text filename = root.find('filename').text # Read image img_file = osp.join(data_dir, folder, filename + '.JPEG') img = None # Get all object bounding boxes bboxs = [] for object in root.iter('object'): bbox = object.find('bndbox') xmax = float(bbox.find('xmax').text) xmin = float(bbox.find('xmin').text) ymax = float(bbox.find('ymax').text) ymin = float(bbox.find('ymin').text) width = xmax - xmin + 1 height = ymax - ymin + 1 bboxs.append([xmin, ymin, width, height]) for idx, object in enumerate(root.iter('object')): id = object.find('trackid').text class_name = object.find('name').text track_save_dir = get_track_save_directory(save_dir, 'train', subdir, video) mkdir_p(track_save_dir) savename = osp.join(track_save_dir, '{}.{:02d}.crop.x.jpg'.format(filename, int(id))) if osp.isfile(savename): continue # skip existing images if img is None: img = imread(img_file) # Get crop target_box = convert_bbox_format(Rectangle(*bboxs[idx]), 'center-based') crop, _ = get_crops(img, target_box, size_z=127, size_x=255, context_amount=0.01) imwrite(savename, crop, [int(cv2.IMWRITE_JPEG_QUALITY), 90])
def process_split(root_dir, save_dir, split): data_dir = osp.join(root_dir, split) video_names = os.listdir(data_dir) video_names = [vn for vn in video_names if '.txt' not in vn] for idx, video in enumerate(video_names): print('{split} ({idx}/{total}): Processing {video}...'.format(split=split, idx=idx, total=len(video_names), video=video)) video_path = osp.join(data_dir, video) jpg_files = glob(osp.join(video_path, '*.jpg')) with open(osp.join(video_path, 'groundtruth.txt')) as f: ann_content = f.readlines() for jpg in jpg_files: # Read image img_file = jpg.split('/')[-1] img = None # Get all object bounding boxes jpgidx = img_file.split('.')[0] jpgidx = int(jpgidx) - 1 ann = ann_content[jpgidx] ann = ann.strip() bbox = ann.split(',') bbox = [int(float(bb)) for bb in bbox] # [xmin, ymin, w, h] track_save_dir = osp.join(save_dir, split, video) mkdir_p(track_save_dir) savename = osp.join(track_save_dir, '{}.crop.x.jpg'.format(img_file)) if osp.isfile(savename): try: im = Image.open(savename) continue # skip existing images except IOError: os.remove(savename) if img is None: img = imread(jpg) # Get crop target_box = convert_bbox_format(Rectangle(*bbox), 'center-based') crop, _ = get_crops(img, target_box, size_z=127, size_x=255, context_amount=0.5) imwrite(savename, crop, [int(cv2.IMWRITE_JPEG_QUALITY), 90])
def parser_xml_anno(img_file, xml_anno, track_save_dir): tree = ET.parse(xml_anno) root = tree.getroot() img = None # Get all object bounding boxes bboxs = [] for object in root.iter('object'): bbox = object.find('bndbox') xmax = float(bbox.find('xmax').text) xmin = float(bbox.find('xmin').text) ymax = float(bbox.find('ymax').text) ymin = float(bbox.find('ymin').text) width = xmax - xmin + 1 height = ymax - ymin + 1 bboxs.append([xmin, ymin, width, height]) for idx, object in enumerate(root.iter('object')): #id = object.find('trackid').text if img is None: img = cv2.imread(img_file) target_box = convert_bbox_format(Rectangle(*bboxs[idx]), 'center-based') crop, scale, new_sizes = get_crops(img, target_box, size_z=127, size_x=255, context_amount=0.5) index_sub = "_" + str(idx) if idx > 0 else "" save_dir = track_save_dir + index_sub if not os.path.exists(save_dir): os.makedirs(save_dir) savename = os.path.join( save_dir, '0.w.{}.h.{}.jpg'.format(int(np.rint(new_sizes[0])), int(np.rint(new_sizes[1])))) if osp.exists(savename): continue imwrite(savename, crop, [int(cv2.IMWRITE_JPEG_QUALITY), 90])
def track_init(self, first_bbox, first_frame_image_path): print(first_frame_image_path) first_frame_image = safe_imread(first_frame_image_path) self.first_frame_image = cv2.cvtColor( first_frame_image, cv2.COLOR_BGR2RGB) if self.image_use_rgb else first_frame_image self.first_bbox = convert_bbox_format( Rectangle(first_bbox[0], first_bbox[1], first_bbox[2], first_bbox[3]), 'center-based') first_image_crop, _, target_size = get_crops(self.first_frame_image, self.first_bbox, self.z_image_size, self.x_image_size, 0.5) cx = (self.x_image_size - 1) / 2.0 cy = (self.x_image_size - 1) / 2.0 gt_examplar_box = np.array([ cx - target_size[0] / 2.0, cy - target_size[1] / 2.0, cx + target_size[0] / 2.0, cy + target_size[1] / 2.0 ], np.float32) self.img_height, self.img_width, _ = self.first_frame_image.shape if self.save_video: video_name = first_frame_image_path.split('/')[-3] + '.mp4' fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v') result_dir = os.path.join(Project_root, self.track_config['log_dir']) if not os.path.exists(result_dir): os.makedirs(result_dir) video_path = os.path.join(result_dir, video_name) print("save video into %s" % (video_path)) self.video = cv2.VideoWriter(video_path, fourcc, 30, (self.img_width, self.img_height)) def center_crop(img, crop_size=127): img_shape = np.shape(img) center_y = (img_shape[0] - 1) // 2 center_x = (img_shape[1] - 1) // 2 h = crop_size w = crop_size croped_img = img[center_y - h // 2:center_y + h // 2 + 1, center_x - w // 2:center_x + w // 2 + 1] assert (croped_img.shape[0] == crop_size) return croped_img self.first_image_examplar = center_crop(first_image_crop, self.z_image_size) shift_y = (self.x_image_size - self.z_image_size) // 2 shift_x = shift_y x1 = gt_examplar_box[0] - shift_x y1 = gt_examplar_box[1] - shift_y x2 = gt_examplar_box[2] - shift_x y2 = gt_examplar_box[3] - shift_y self.gt_examplar_boxes = np.reshape(np.array([x1, y1, x2, y2]), [1, 4]) self.current_target_state = TargetState(bbox=self.first_bbox) self.window = np.tile( np.outer(np.hanning(self.score_size), np.hanning(self.score_size)).flatten(), 5) #5 is the number of aspect ratio anchors
def track(self, first_bbox, frames, bSaveImage=False, SavePath='/tmp'): #1. init the tracker self.track_init(first_bbox, frames[0]) include_first = self.track_config['include_first'] # Run tracking loop reported_bboxs = [] examplar = np.reshape(self.first_image_examplar, [1, self.z_image_size, self.z_image_size, 3]) cost_time_dict = { 'load_img': 0.0, 'crop_img': 0.0, 'sess_run': 0.0, 'post_process': 0.0 } for i, filename in tqdm(enumerate(frames)): if i > 0 or include_first: # We don't really want to process the first image unless intended to do so. load_img_start = time.time() bgr_img = safe_imread(filename) load_img_end = time.time() cost_time_dict['load_img'] += load_img_end - load_img_start crop_img_start = time.time() current_img = cv2.cvtColor( bgr_img, cv2.COLOR_BGR2RGB) if self.image_use_rgb else bgr_img instance_img, scale_x, _ = get_crops( current_img, self.current_target_state.search_box, self.z_image_size, self.x_image_size, 0.5) instance = np.reshape( instance_img, [1, self.x_image_size, self.x_image_size, 3]) crop_img_end = time.time() cost_time_dict['crop_img'] += crop_img_end - crop_img_start sess_run_start = time.time() if self.model.model_config.get('BinWindow', False): boxes, scores = self.sess.run( [self.model.topk_bboxes, self.model.topk_scores], feed_dict={ self.model.examplar_feed: examplar, self.model.instance_feed: instance, self.model.gt_examplar_boxes: self.gt_examplar_boxes }) else: boxes, scores = self.sess.run( [self.model.topk_bboxes, self.model.topk_scores], feed_dict={ self.model.examplar_feed: examplar, self.model.instance_feed: instance }) sess_run_end = time.time() cost_time_dict['sess_run'] += sess_run_end - sess_run_start post_process_start = time.time() def padded_size(w, h): context = 0.5 * (w + h) return np.sqrt((w + context) * (h + context)) #boxes: 1*NA*4 score: 1*Na boxes = boxes[0] #NA*4 scores = scores[0] #NA*2 scales = padded_size( (boxes[:, 2] - boxes[:, 0]) / scale_x, (boxes[:, 3] - boxes[:, 1]) / scale_x) #Na ratios = (boxes[:, 3] - boxes[:, 1]) / (boxes[:, 2] - boxes[:, 0]) scale_change = scales / self.current_target_state.scale scale_change = np.maximum(scale_change, 1.0 / scale_change) ratio_change = ratios / (self.current_target_state.ratio) ratio_change = np.maximum(ratio_change, 1.0 / ratio_change) scale_penalty = np.exp(-(scale_change * ratio_change - 1) * self.track_config['penalty_k']) pscores = scores * scale_penalty window_influence = self.track_config['window_influence'] wpscores = pscores * ( 1 - window_influence) + self.window * window_influence max_index = np.argmax(wpscores) corrdinates = boxes[max_index] #Top1 #print("Tracking %d/%d with tracking score:%.2f, wpscore: %.2f"%(i+1, len(frames), scores[max_index],wpscores[max_index])) # Position within frame in frame coordinates res_box = Rectangle(*corrdinate_to_bbox(corrdinates)) center_x = (self.x_image_size - 1.0) / 2 center_y = center_x delta_x = (res_box.x - center_x) / scale_x delta_y = (res_box.y - center_y) / scale_x w = res_box.width / scale_x h = res_box.height / scale_x y = self.current_target_state.target_box.y + delta_y x = self.current_target_state.target_box.x + delta_x #update seach bbox alpha = self.track_config[ 'search_scale_smooth_factor'] * pscores[max_index] belta = 0.0 new_search_cx = max( min( self.img_width, self.current_target_state.target_box.x * belta + (1.0 - belta) * x), 0.0) new_search_cy = max( min( self.img_height, self.current_target_state.target_box.y * belta + (1.0 - belta) * y), 0.0) new_search_w = max( 10.0, min( self.current_target_state.target_box.width * (1.0 - alpha) + alpha * w, self.img_width)) new_search_h = max( 10.0, min( self.current_target_state.target_box.height * (1.0 - alpha) + alpha * h, self.img_height)) self.current_target_state.target_box = Rectangle( new_search_cx, new_search_cy, new_search_w, new_search_h) self.current_target_state.scale = padded_size( new_search_w, new_search_h) self.current_target_state.ratio = new_search_h * 1.0 / new_search_w #auto increase the search region if max score is lower than the conf_threshold if (scores[max_index] < self.conf_threshold and self.auto_increase): increase_w = min(new_search_w * 1.5, self.img_width) increase_h = min(new_search_h * 1.5, self.img_height) self.current_target_state.search_box = Rectangle( new_search_cx, new_search_cy, increase_w, increase_h) else: self.current_target_state.search_box = self.current_target_state.target_box #save and show tracking process if bSaveImage: cv2.imwrite(SavePath + "/" + os.path.basename(frames[i]), bgr_img) elif self.save_video: x1, y1, x2, y2 = bbox_to_corrdinate( self.current_target_state.search_box) cv2.rectangle(bgr_img, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2) cv2.putText(bgr_img, "%.2f" % (scores[max_index]), (int(x1), int(y1)), 0, 1, (0, 255, 0), 2) self.video.write(bgr_img) elif self.show_video: x1, y1, x2, y2 = bbox_to_corrdinate( self.current_target_state.search_box) cv2.rectangle(bgr_img, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2) cv2.putText(bgr_img, "%.2f" % (scores[max_index]), (int(x1), int(y1)), 0, 1, (0, 255, 0), 2) cv2.imshow("Tracker", bgr_img) cv2.waitKey(10) else: pass post_process_end = time.time() cost_time_dict[ 'post_process'] += post_process_end - post_process_start else: x1, y1, x2, y2 = bbox_to_corrdinate( self.current_target_state.search_box) cv2.rectangle(self.first_frame_image, (int(x1), int(y1)), (int(x2), int(y2)), (255, 255, 255), 2) #cv2.imshow("Tracker",cv2.cvtColor(self.first_frame_image, cv2.COLOR_RGB2BGR)) #cv2.imshow("Target",self.first_frame_image) #cv2.waitKey(100) reported_bbox = convert_bbox_format( self.current_target_state.target_box, 'top-left-based') reported_bboxs.append(reported_bbox) for key in cost_time_dict: cost_time_dict[key] /= len(frames) #print(cost_time_dict) return reported_bboxs
def process_split(root_dir, save_dir, split): data_dir = osp.join(root_dir, split) video_names = os.listdir(data_dir) video_names = [vn for vn in video_names if '.txt' not in vn] for idx, video in enumerate(video_names): print('{split} ({idx}/{total}): Processing {video}...'.format( split=split, idx=idx, total=len(video_names), video=video)) video_path = osp.join(data_dir, video) jpg_files = glob(osp.join(video_path, '*.jpg')) with open(osp.join(video_path, 'groundtruth.txt')) as f: ann_content = f.readlines() track_save_dir = osp.join(save_dir, split, video) mkdir_p(track_save_dir) fw = open(osp.join(track_save_dir, 'groundtruth.txt'), 'w') copyfile(osp.join(video_path, 'absence.label'), osp.join(track_save_dir, 'absence.label')) copyfile(osp.join(video_path, 'cover.label'), osp.join(track_save_dir, 'cover.label')) copyfile(osp.join(video_path, 'cut_by_image.label'), osp.join(track_save_dir, 'cut_by_image.label')) copyfile(osp.join(video_path, 'meta_info.ini'), osp.join(track_save_dir, 'meta_info.ini')) for i, jpg in enumerate(jpg_files): # Read image img_file = jpg.split('/')[-1] img = None # Get all object bounding boxes jpgidx = img_file.split('.')[0] jpgidx = int(jpgidx) - 1 ann = ann_content[jpgidx] ann = ann.strip() bbox = ann.split(',') bbox = [int(float(bb)) for bb in bbox] # [xmin, ymin, w, h] ## bbox #### annk = ann_content[i] annk = annk.strip() bboxk = annk.split(',') bboxk = [int(float(bb)) for bb in bboxk] # [xmin, ymin, w, h] w = bboxk[2] h = bboxk[3] context_amount = 0.5 size_z = 127 size_x = 271 wc_z = w + context_amount * (w + h) hc_z = h + context_amount * (w + h) s_z = np.sqrt(wc_z * hc_z) scale_z = size_z / s_z d_search = (size_x - size_z) / 2 pad = d_search / scale_z s_x = s_z + 2 * pad wn = int(w * size_x / s_x) hn = int(h * size_x / s_x) #if wn < 1 or hn < 1: #if wn == 0: #wn = 1 #if hn == 0: #hn = 1 #ratio = wn / hn #if ratio > 1.: #newbb = [int(135-wn/2), int(135-hn/2), 85, int(85. / ratio)] #else: #newbb = [int(135-wn/2), int(135-hn/2), int(85. * ratio), 85] #else: #newbb = [int(135-wn/2), int(135-hn/2), wn, hn] if wn < 1: wn = 1 if hn < 1: hn = 1 newbb = [int(135 - wn / 2), int(135 - hn / 2), wn, hn] fw.write(','.join(str(e) + '.0000' for e in newbb) + '\n') ## bbox #### savename = osp.join(track_save_dir, '{}.jpg'.format(img_file)) if osp.isfile(savename): try: im = Image.open(savename) continue # skip existing images except IOError: os.remove(savename) if img is None: img = imread(jpg) # Get crop target_box = convert_bbox_format(Rectangle(*bbox), 'center-based') crop, _ = get_crops(img, target_box, size_z=127, size_x=271, context_amount=0.5) imwrite(savename, crop, [int(cv2.IMWRITE_JPEG_QUALITY), 90]) fw.close()