def initialize_pos_neg_dataset(train_videos, opts, transform=None, multidomain=True): """ Return list of pos and list of neg dataset for each domain. Args: train_videos: opts: transform: multidomain: Returns: datasets_pos: (list of SLDataset) List length: if multidomain, #videos (or domain). Else: 1 datasets_neg: (list of SLDataset) List length: if multidomain, #videos (or domain). Else: 1 """ num_videos = len(train_videos['video_names']) datasets_pos = [] datasets_neg = [] pos_count = 0 neg_count = 0 mean_subtractor = SubtractMeans() for vid_idx in tqdm(range(num_videos)): file_name_set = set() train_db_pos = { 'img_path': [], # list of string 'bboxes': [], # list of ndarray left top coordinate [left top width height] 'labels': [], # list of ndarray #action elements. One hot vector 'score_labels': [], # list of scalar 0 (negative) or 1 (positive) 'vid_idx': [] # list of int. Each video (or domain) index } train_db_neg = { 'img_path': [], # list of string 'bboxes': [], # list of ndarray left top coordinate [left top width height] 'labels': [], # list of ndarray #action elements. One hot vector 'score_labels': [], # list of scalar 0 (negative) or 1 (positive) 'vid_idx': [] # list of int. Each video (or domain) index } print("Generating dataset from video {}/{} from bench {} (current total (pos-neg): {}-{})...".format( vid_idx + 1, num_videos, train_videos['bench_names'][vid_idx], len(train_db_pos['labels']), len(train_db_neg['labels']))) # print("generating dataset from video " + str(vid_idx + 1) + "/" + str(num_videos) + # "(current total data (pos-neg): " + str(len(train_db_pos['labels'])) + # "-" + str(len(train_db_neg['labels'])) + ")") bench_name = train_videos['bench_names'][vid_idx] video_name = train_videos['video_names'][vid_idx] video_path = train_videos['video_paths'][vid_idx] vid_info = get_video_infos(bench_name, video_path, video_name) train_db_pos_, train_db_neg_ = get_train_dbs(vid_info, opts) # separate for each bboxes sample for sample_idx in range(len(train_db_pos_)): # for img_path_idx in range(len(train_db_pos_[sample_idx]['score_labels'])): train_db_pos['img_path'].extend(train_db_pos_[sample_idx]['img_path']) train_db_pos['bboxes'].extend(train_db_pos_[sample_idx]['bboxes']) train_db_pos['labels'].extend(train_db_pos_[sample_idx]['labels']) train_db_pos['score_labels'].extend(train_db_pos_[sample_idx]['score_labels']) train_db_pos['vid_idx'].extend(np.repeat(vid_idx, len(train_db_pos_[sample_idx]['img_path']))) pos_count += len(train_db_pos['labels']) print("Finished generating positive dataset (current total data: {})".format(pos_count)) for sample_idx in range(len(train_db_neg_)): # for img_path_idx in range(len(train_db_neg_[sample_idx]['score_labels'])): train_db_neg['img_path'].extend(train_db_neg_[sample_idx]['img_path']) train_db_neg['bboxes'].extend(train_db_neg_[sample_idx]['bboxes']) train_db_neg['labels'].extend(train_db_neg_[sample_idx]['labels']) train_db_neg['score_labels'].extend(train_db_neg_[sample_idx]['score_labels']) train_db_neg['vid_idx'].extend(np.repeat(vid_idx, len(train_db_neg_[sample_idx]['img_path']))) neg_count += len(train_db_neg['labels']) file_name_set.update(train_db_neg['img_path']) file_name_set.update(train_db_pos['img_path']) img_path_np_dict = {} print('Loading images into memory...') for image_name in tqdm(file_name_set): im = cv2.imread(image_name) im, _, _, _ = mean_subtractor.__call__(im) img_path_np_dict[image_name] = im print("Finished generating negative dataset (current total data: {})".format(neg_count)) dataset_pos = SLDataset(train_db_pos, transform=transform) dataset_pos.img_path_np_dict = img_path_np_dict dataset_neg = SLDataset(train_db_neg, transform=transform) dataset_neg.img_path_np_dict = img_path_np_dict if multidomain: datasets_pos.append(dataset_pos) datasets_neg.append(dataset_neg) else: datasets_pos.extend(dataset_pos) datasets_neg.extend(dataset_neg) return datasets_pos, datasets_neg
for c in range(0, 3): im[y1:y2, x1:x2, c] = (alpha_s * obj_img[:, :, c] + alpha_l * im[y1:y2, x1:x2, c]) new_im_arr.append(im) cv2.imwrite( os.path.join(new_img_save_path, 'color', file[file.rfind('/') + 1:]), im) # cv2.imshow('hoy', im) # key = cv2.waitKey(0) & 0xFF # if key == ord("q"): # cv2.destroyAllWindows() # return current_x += random.randint(-x_var, x_var) current_y += random.randint(-y_var, y_var) current_x = min(current_x, max_x) current_y = min(current_y, max_y) current_x = max(current_x, 0) current_y = max(current_y, 0) gt_file_path = os.path.join(new_img_save_path, 'groundtruth1.txt') with open(gt_file_path, "w") as text_file: for (x, y, w, h) in new_gt_arr: text_file.write("{},{},{},{}\n".format(x, y, w, h)) return new_im_arr, new_gt_arr if __name__ == '__main__': vid_info = get_video_infos('vot15', 'datasets/data/vot15', 'bag') generate_mot_dataset(vid_info, '../datasets/data/vot15/bagMOT')
def __init__(self, train_videos, opts, transform, args): self.videos = [] # list of clips dict self.opts = opts self.transform = transform self.args = args self.RL_steps = self.opts['train']['RL_steps'] # clip length video_names = train_videos['video_names'] video_paths = train_videos['video_paths'] bench_names = train_videos['bench_names'] vid_idxs = np.random.permutation(len(video_names)) for vid_idx in vid_idxs: # dict consist of set of clips in ONE video clips = { 'img_path': [], 'frame_start': [], 'frame_end': [], 'init_bbox': [], 'end_bbox': [], 'vid_idx': [], } # Load current training video info video_name = video_names[vid_idx] video_path = video_paths[vid_idx] bench_name = bench_names[vid_idx] # TODO MAYBE ADD MOT vid_info = get_video_infos(bench_name, video_path, video_name) if self.RL_steps is None: self.RL_steps = len(vid_info['gt']) - 1 vid_clip_starts = [0] vid_clip_ends = [len(vid_info['gt']) - 1] else: vid_clip_starts = np.array( range(len(vid_info['gt']) - self.RL_steps)) vid_clip_starts = np.random.permutation(vid_clip_starts) vid_clip_ends = vid_clip_starts + self.RL_steps # number of clips in one video num_train_clips = min(opts['train']['rl_num_batches'], len(vid_clip_starts)) # print("num_train_clips of vid " + str(vid_idx) + ": ", str(num_train_clips)) for clipIdx in range(num_train_clips): frameStart = vid_clip_starts[clipIdx] frameEnd = vid_clip_ends[clipIdx] clips['img_path'].append( vid_info['img_files'][frameStart:frameEnd]) clips['frame_start'].append(frameStart) clips['frame_end'].append(frameEnd) clips['init_bbox'].append(vid_info['gt'][frameStart]) clips['end_bbox'].append(vid_info['gt'][frameEnd]) clips['vid_idx'].append(vid_idx) if num_train_clips > 0: # small hack self.videos.append(clips) self.clip_idx = -1 # hack for reset function self.vid_idx = 0 self.state = None # current bbox self.gt = None # end bbox self.current_img = None # current image frame self.current_patch = None # current patch (transformed) self.current_img_idx = 0 self.reset()
def initialize_pos_neg_dataset(train_videos, opts,args, transform=None, multidomain=True): """ Return list of pos and list of neg dataset for each domain. Args: train_videos: opts: transform: multidomain: Returns: datasets_pos: (list of SLDataset) List length: if multidomain, #videos (or domain). Else: 1 datasets_neg: (list of SLDataset) List length: if multidomain, #videos (or domain). Else: 1 """ # datasets_pos = [] # datasets_neg = [] datasets_pos_neg = [] if train_videos==None: num_videos=1 else: num_videos = len(train_videos['video_names']) t0 = time.time() for vid_idx in range(num_videos): train_db = { 'img_path': [], # list of string 'bboxes': [], # list of ndarray left top coordinate [left top width height] 'labels': [], # list of ndarray #action elements. One hot vector 'score_labels': [], # list of scalar 0 (negative) or 1 (positive) # 'vid_idx': [] # list of int. Each video (or domain) index } # train_db_neg = { # 'img_path': [], # list of string # 'bboxes': [], # list of ndarray left top coordinate [left top width height] # 'labels': [], # list of ndarray #action elements. One hot vector # 'score_labels': [], # list of scalar 0 (negative) or 1 (positive) # 'vid_idx': [] # list of int. Each video (or domain) index # } if train_videos == None: print("generating dataset from ILSVR dataset...") # train_db_pos_, train_db_neg_ = get_train_dbs_ILSVR(opts) if args.train_consecutive: train_db_pos_neg_ = get_train_dbs_ILSVR_consecutive_frame(opts) elif args.train_mul_step: train_db_pos_neg_ = get_train_dbs_mul_step(opts) else: train_db_pos_neg_ = get_train_dbs_ILSVR(opts) else: # print("generating dataset from video " + str(vid_idx + 1) + "/" + str(num_videos) + # "(current total data (pos-neg): " + str(len(train_db_pos['labels'])) + # "-" + str(len(train_db_neg['labels'])) + ")") print("generating dataset from video " + str(vid_idx + 1) + "/" + str(num_videos) + "(current total data (pos+neg): " + str(len(train_db['labels'])) + ")") bench_name = train_videos['bench_names'][vid_idx] video_name = train_videos['video_names'][vid_idx] video_path = train_videos['video_paths'][vid_idx] vid_info = get_video_infos(bench_name, video_path, video_name) train_db_pos_, train_db_neg_ = get_train_dbs(vid_info, opts) # separate for each bboxes sample print("before train_db_pos['img_path'].extend", end=' : ') print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) for sample_idx in range(len(train_db_pos_neg_)): # # for img_path_idx in range(len(train_db_pos_[sample_idx]['score_labels'])): # train_db['img_path'].append(train_db_pos_neg_[sample_idx]['img_path']) # train_db['bboxes'].append(train_db_pos_neg_[sample_idx]['bboxes']) # train_db['labels'].append(train_db_pos_neg_[sample_idx]['labels']) # train_db['score_labels'].append(train_db_pos_neg_[sample_idx]['score_labels']) # # train_db['vid_idx'].extend(np.repeat(vid_idx, len(train_db_pos_[sample_idx]['img_path']))) # # train_db['vid_idx'].append(vid_idx) train_db['img_path'].extend(train_db_pos_neg_[sample_idx]['img_path']) train_db['bboxes'].extend(train_db_pos_neg_[sample_idx]['bboxes']) train_db['labels'].extend(train_db_pos_neg_[sample_idx]['labels']) train_db['score_labels'].extend(train_db_pos_neg_[sample_idx]['score_labels']) # if len(train_db_pos_neg_[sample_idx]['bboxes'])!=20: # print("len(train_db_pos_neg_[sample_idx]['bboxes']): %d, img path: %s"%( # len(train_db_pos_neg_[sample_idx]['bboxes']),train_db_pos_neg_[sample_idx]['img_path'])) # if len(train_db_pos_neg_[sample_idx]['labels'])!=20: # print("len(train_db_pos_neg_[sample_idx]['labels']): %d, img path: %s"%( # len(train_db_pos_neg_[sample_idx]['labels']),train_db_pos_neg_[sample_idx]['img_path'])) # if len(train_db_pos_neg_[sample_idx]['score_labels'])!=20: # print("len(train_db_pos_neg_[sample_idx]['score_labels']): %d, img path: %s"%( # len(train_db_pos_neg_[sample_idx]['score_labels']),train_db_pos_neg_[sample_idx]['img_path'])) # print('over debug.') # print("\nFinish generating positive dataset... (current total data: " + str(len(train_db_pos['labels'])) + ")") # for sample_idx in range(len(train_db_neg_)): # # for img_path_idx in range(len(train_db_neg_[sample_idx]['score_labels'])): # train_db['img_path'].append(train_db_neg_[sample_idx]['img_path']) # train_db['bboxes'].append(train_db_neg_[sample_idx]['bboxes']) # train_db['labels'].append(train_db_neg_[sample_idx]['labels']) # train_db['score_labels'].append(train_db_neg_[sample_idx]['score_labels']) # # train_db['vid_idx'].extend(np.repeat(vid_idx, len(train_db_neg_[sample_idx]['img_path']))) # train_db['vid_idx'].append(vid_idx) # print("\nFinish generating negative dataset... (current total data: " + str(len(train_db_neg['labels'])) + ")") print("after train_db_neg['img_path'].extend", end=' : ') print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) #dataset_pos = SLDataset(train_db_pos, transform=transform) dataset_pos_neg = SLDataset(train_db, transform=transform) print("after dataset_pos_neg = SLDataset(train_db", end=' : ') print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) # dataset_neg = SLDataset(train_db_neg, transform=transform) # print("after dataset_neg = SLDataset(train_db_neg", end=' : ') # print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) if multidomain: datasets_pos_neg.append(dataset_pos_neg) #datasets_neg.append(dataset_neg) else: if len(datasets_pos_neg)==0: datasets_pos_neg.append(dataset_pos_neg) #datasets_neg.append(dataset_neg) print("after datasets_pos_neg.append(dataset_pos_neg)", end=' : ') print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) else: # datasets_pos[0].train_db['img_path'].extend(dataset_pos.train_db['img_path']) # datasets_pos[0].train_db['bboxes'].extend(dataset_pos.train_db['bboxes']) # datasets_pos[0].train_db['labels'].extend(dataset_pos.train_db['labels']) # datasets_pos[0].train_db['score_labels'].extend(dataset_pos.train_db['score_labels']) # datasets_pos[0].train_db['vid_idx'].extend(dataset_pos.train_db['vid_idx']) # # datasets_neg[0].train_db['img_path'].extend(dataset_neg.train_db['img_path']) # datasets_neg[0].train_db['bboxes'].extend(dataset_neg.train_db['bboxes']) # datasets_neg[0].train_db['labels'].extend(dataset_neg.train_db['labels']) # datasets_neg[0].train_db['score_labels'].extend(dataset_neg.train_db['score_labels']) # datasets_neg[0].train_db['vid_idx'].extend(dataset_neg.train_db['vid_idx']) datasets_pos_neg[0].train_db['img_path'].extend(dataset_pos_neg.train_db['img_path']) datasets_pos_neg[0].train_db['bboxes'].extend(dataset_pos_neg.train_db['bboxes']) datasets_pos_neg[0].train_db['labels'].extend(dataset_pos_neg.train_db['labels']) datasets_pos_neg[0].train_db['score_labels'].extend(dataset_pos_neg.train_db['score_labels']) # datasets_pos_neg[0].train_db['vid_idx'].extend(dataset_pos_neg.train_db['vid_idx']) t1 = time.time() all_time = t1 - t0 all_m = all_time // 60 all_s = all_time % 60 print('time of generating dataset: %d m %d s (%d s)' % (all_m, all_s, all_time)) # return datasets_pos, datasets_neg return datasets_pos_neg
def initialize_pos_neg_dataset(train_videos, opts, transform=None, multidomain=True): """ Return list of pos and list of neg dataset for each domain. Args: train_videos: opts: transform: multidomain: Returns: datasets_pos: (list of SLDataset) List length: if multidomain, #videos (or domain). Else: 1 datasets_neg: (list of SLDataset) List length: if multidomain, #videos (or domain). Else: 1 """ num_videos = len(train_videos['video_names']) datasets_pos = [] datasets_neg = [] for vid_idx in range(num_videos): train_db_pos = { 'img_path': [], # list of string 'bboxes': [], # list of ndarray left top coordinate [left top width height] 'labels': [], # list of ndarray #action elements. One hot vector 'score_labels': [], # list of scalar 0 (negative) or 1 (positive) 'vid_idx': [] # list of int. Each video (or domain) index } train_db_neg = { 'img_path': [], # list of string 'bboxes': [], # list of ndarray left top coordinate [left top width height] 'labels': [], # list of ndarray #action elements. One hot vector 'score_labels': [], # list of scalar 0 (negative) or 1 (positive) 'vid_idx': [] # list of int. Each video (or domain) index } print("generating dataset from video " + str(vid_idx + 1) + "/" + str(num_videos) + "(current total data (pos-neg): " + str(len(train_db_pos['labels'])) + "-" + str(len(train_db_neg['labels'])) + ")") bench_name = train_videos['bench_names'][vid_idx] video_name = train_videos['video_names'][vid_idx] video_path = train_videos['video_paths'][vid_idx] vid_info = get_video_infos(bench_name, video_path, video_name) train_db_pos_, train_db_neg_ = get_train_dbs(vid_info, opts) # separate for each bboxes sample for sample_idx in range(len(train_db_pos_)): # for img_path_idx in range(len(train_db_pos_[sample_idx]['score_labels'])): train_db_pos['img_path'].extend( train_db_pos_[sample_idx]['img_path']) train_db_pos['bboxes'].extend(train_db_pos_[sample_idx]['bboxes']) train_db_pos['labels'].extend(train_db_pos_[sample_idx]['labels']) train_db_pos['score_labels'].extend( train_db_pos_[sample_idx]['score_labels']) train_db_pos['vid_idx'].extend( np.repeat(vid_idx, len(train_db_pos_[sample_idx]['img_path']))) print("Finish generating positive dataset... (current total data: " + str(len(train_db_pos['labels'])) + ")") for sample_idx in range(len(train_db_neg_)): # for img_path_idx in range(len(train_db_neg_[sample_idx]['score_labels'])): train_db_neg['img_path'].extend( train_db_neg_[sample_idx]['img_path']) train_db_neg['bboxes'].extend(train_db_neg_[sample_idx]['bboxes']) train_db_neg['labels'].extend(train_db_neg_[sample_idx]['labels']) train_db_neg['score_labels'].extend( train_db_neg_[sample_idx]['score_labels']) train_db_neg['vid_idx'].extend( np.repeat(vid_idx, len(train_db_neg_[sample_idx]['img_path']))) print("Finish generating negative dataset... (current total data: " + str(len(train_db_neg['labels'])) + ")") dataset_pos = SLDataset(train_db_pos, transform=transform) dataset_neg = SLDataset(train_db_neg, transform=transform) if multidomain: datasets_pos.append(dataset_pos) datasets_neg.append(dataset_neg) else: datasets_pos.extend(dataset_pos) datasets_neg.extend(dataset_neg) return datasets_pos, datasets_neg