예제 #1
0
def initialize_pos_neg_dataset(train_videos, opts, transform=None, multidomain=True):
    """
    Return list of pos and list of neg dataset for each domain.
    Args:
        train_videos:
        opts:
        transform:
        multidomain:
    Returns:
        datasets_pos: (list of SLDataset) List length: if multidomain, #videos (or domain). Else: 1
        datasets_neg: (list of SLDataset) List length: if multidomain, #videos (or domain). Else: 1
    """
    num_videos = len(train_videos['video_names'])

    datasets_pos = []
    datasets_neg = []
    pos_count = 0
    neg_count = 0
    mean_subtractor = SubtractMeans()

    for vid_idx in tqdm(range(num_videos)):
        file_name_set = set()
        train_db_pos = {
            'img_path': [],  # list of string
            'bboxes': [],  # list of ndarray left top coordinate [left top width height]
            'labels': [],  # list of ndarray #action elements. One hot vector
            'score_labels': [],  # list of scalar 0 (negative) or 1 (positive)
            'vid_idx': []  # list of int. Each video (or domain) index
        }
        train_db_neg = {
            'img_path': [],  # list of string
            'bboxes': [],  # list of ndarray left top coordinate [left top width height]
            'labels': [],  # list of ndarray #action elements. One hot vector
            'score_labels': [],  # list of scalar 0 (negative) or 1 (positive)
            'vid_idx': []  # list of int. Each video (or domain) index
        }
        print("Generating dataset from video {}/{} from bench {} (current total (pos-neg): {}-{})...".format(
            vid_idx + 1, num_videos, train_videos['bench_names'][vid_idx],
            len(train_db_pos['labels']), len(train_db_neg['labels'])))

        # print("generating dataset from video " + str(vid_idx + 1) + "/" + str(num_videos) +
        #       "(current total data (pos-neg): " + str(len(train_db_pos['labels'])) +
        #       "-" + str(len(train_db_neg['labels'])) + ")")

        bench_name = train_videos['bench_names'][vid_idx]
        video_name = train_videos['video_names'][vid_idx]
        video_path = train_videos['video_paths'][vid_idx]

        vid_info = get_video_infos(bench_name, video_path, video_name)

        train_db_pos_, train_db_neg_ = get_train_dbs(vid_info, opts)
        # separate for each bboxes sample
        for sample_idx in range(len(train_db_pos_)):
            # for img_path_idx in range(len(train_db_pos_[sample_idx]['score_labels'])):
            train_db_pos['img_path'].extend(train_db_pos_[sample_idx]['img_path'])
            train_db_pos['bboxes'].extend(train_db_pos_[sample_idx]['bboxes'])
            train_db_pos['labels'].extend(train_db_pos_[sample_idx]['labels'])
            train_db_pos['score_labels'].extend(train_db_pos_[sample_idx]['score_labels'])
            train_db_pos['vid_idx'].extend(np.repeat(vid_idx, len(train_db_pos_[sample_idx]['img_path'])))

        pos_count += len(train_db_pos['labels'])
        print("Finished generating positive dataset (current total data: {})".format(pos_count))

        for sample_idx in range(len(train_db_neg_)):
            # for img_path_idx in range(len(train_db_neg_[sample_idx]['score_labels'])):
            train_db_neg['img_path'].extend(train_db_neg_[sample_idx]['img_path'])
            train_db_neg['bboxes'].extend(train_db_neg_[sample_idx]['bboxes'])
            train_db_neg['labels'].extend(train_db_neg_[sample_idx]['labels'])
            train_db_neg['score_labels'].extend(train_db_neg_[sample_idx]['score_labels'])
            train_db_neg['vid_idx'].extend(np.repeat(vid_idx, len(train_db_neg_[sample_idx]['img_path'])))

        neg_count += len(train_db_neg['labels'])
        file_name_set.update(train_db_neg['img_path'])
        file_name_set.update(train_db_pos['img_path'])

        img_path_np_dict = {}
        print('Loading images into memory...')
        for image_name in tqdm(file_name_set):
            im = cv2.imread(image_name)
            im, _, _, _ = mean_subtractor.__call__(im)
            img_path_np_dict[image_name] = im
        print("Finished generating negative dataset (current total data: {})".format(neg_count))

        dataset_pos = SLDataset(train_db_pos, transform=transform)
        dataset_pos.img_path_np_dict = img_path_np_dict
        dataset_neg = SLDataset(train_db_neg, transform=transform)
        dataset_neg.img_path_np_dict = img_path_np_dict

        if multidomain:
            datasets_pos.append(dataset_pos)
            datasets_neg.append(dataset_neg)
        else:
            datasets_pos.extend(dataset_pos)
            datasets_neg.extend(dataset_neg)

    return datasets_pos, datasets_neg
예제 #2
0
        for c in range(0, 3):
            im[y1:y2, x1:x2, c] = (alpha_s * obj_img[:, :, c] +
                                   alpha_l * im[y1:y2, x1:x2, c])
        new_im_arr.append(im)
        cv2.imwrite(
            os.path.join(new_img_save_path, 'color',
                         file[file.rfind('/') + 1:]), im)
        # cv2.imshow('hoy', im)
        # key = cv2.waitKey(0) & 0xFF
        # if key == ord("q"):
        #     cv2.destroyAllWindows()
        #     return
        current_x += random.randint(-x_var, x_var)
        current_y += random.randint(-y_var, y_var)

        current_x = min(current_x, max_x)
        current_y = min(current_y, max_y)

        current_x = max(current_x, 0)
        current_y = max(current_y, 0)
    gt_file_path = os.path.join(new_img_save_path, 'groundtruth1.txt')
    with open(gt_file_path, "w") as text_file:
        for (x, y, w, h) in new_gt_arr:
            text_file.write("{},{},{},{}\n".format(x, y, w, h))
    return new_im_arr, new_gt_arr


if __name__ == '__main__':
    vid_info = get_video_infos('vot15', 'datasets/data/vot15', 'bag')
    generate_mot_dataset(vid_info, '../datasets/data/vot15/bagMOT')
예제 #3
0
    def __init__(self, train_videos, opts, transform, args):
        self.videos = []  # list of clips dict

        self.opts = opts
        self.transform = transform
        self.args = args

        self.RL_steps = self.opts['train']['RL_steps']  # clip length

        video_names = train_videos['video_names']
        video_paths = train_videos['video_paths']
        bench_names = train_videos['bench_names']

        vid_idxs = np.random.permutation(len(video_names))

        for vid_idx in vid_idxs:
            # dict consist of set of clips in ONE video
            clips = {
                'img_path': [],
                'frame_start': [],
                'frame_end': [],
                'init_bbox': [],
                'end_bbox': [],
                'vid_idx': [],
            }
            # Load current training video info
            video_name = video_names[vid_idx]
            video_path = video_paths[vid_idx]
            bench_name = bench_names[vid_idx]

            # TODO MAYBE ADD MOT
            vid_info = get_video_infos(bench_name, video_path, video_name)

            if self.RL_steps is None:
                self.RL_steps = len(vid_info['gt']) - 1
                vid_clip_starts = [0]
                vid_clip_ends = [len(vid_info['gt']) - 1]
            else:
                vid_clip_starts = np.array(
                    range(len(vid_info['gt']) - self.RL_steps))
                vid_clip_starts = np.random.permutation(vid_clip_starts)
                vid_clip_ends = vid_clip_starts + self.RL_steps

            # number of clips in one video
            num_train_clips = min(opts['train']['rl_num_batches'],
                                  len(vid_clip_starts))

            # print("num_train_clips of vid " + str(vid_idx) + ": ", str(num_train_clips))

            for clipIdx in range(num_train_clips):
                frameStart = vid_clip_starts[clipIdx]
                frameEnd = vid_clip_ends[clipIdx]

                clips['img_path'].append(
                    vid_info['img_files'][frameStart:frameEnd])
                clips['frame_start'].append(frameStart)
                clips['frame_end'].append(frameEnd)
                clips['init_bbox'].append(vid_info['gt'][frameStart])
                clips['end_bbox'].append(vid_info['gt'][frameEnd])
                clips['vid_idx'].append(vid_idx)

            if num_train_clips > 0:  # small hack
                self.videos.append(clips)

        self.clip_idx = -1  # hack for reset function
        self.vid_idx = 0

        self.state = None  # current bbox
        self.gt = None  # end bbox
        self.current_img = None  # current image frame
        self.current_patch = None  # current patch (transformed)
        self.current_img_idx = 0

        self.reset()
def initialize_pos_neg_dataset(train_videos, opts,args, transform=None, multidomain=True):
    """
    Return list of pos and list of neg dataset for each domain.
    Args:
        train_videos:
        opts:
        transform:
        multidomain:
    Returns:
        datasets_pos: (list of SLDataset) List length: if multidomain, #videos (or domain). Else: 1
        datasets_neg: (list of SLDataset) List length: if multidomain, #videos (or domain). Else: 1
    """

    # datasets_pos = []
    # datasets_neg = []
    datasets_pos_neg = []

    if train_videos==None:
        num_videos=1
    else:
        num_videos = len(train_videos['video_names'])
    t0 = time.time()
    for vid_idx in range(num_videos):
        train_db = {
            'img_path': [],  # list of string
            'bboxes': [],  # list of ndarray left top coordinate [left top width height]
            'labels': [],  # list of ndarray #action elements. One hot vector
            'score_labels': [],  # list of scalar 0 (negative) or 1 (positive)
            # 'vid_idx': []  # list of int. Each video (or domain) index
        }
        # train_db_neg = {
        #     'img_path': [],  # list of string
        #     'bboxes': [],  # list of ndarray left top coordinate [left top width height]
        #     'labels': [],  # list of ndarray #action elements. One hot vector
        #     'score_labels': [],  # list of scalar 0 (negative) or 1 (positive)
        #     'vid_idx': []  # list of int. Each video (or domain) index
        # }

        if train_videos == None:
            print("generating dataset from ILSVR dataset...")
            # train_db_pos_, train_db_neg_ = get_train_dbs_ILSVR(opts)
            if args.train_consecutive:
                train_db_pos_neg_ = get_train_dbs_ILSVR_consecutive_frame(opts)
            elif args.train_mul_step:
                train_db_pos_neg_ = get_train_dbs_mul_step(opts)
            else:
                train_db_pos_neg_ = get_train_dbs_ILSVR(opts)
        else:
            # print("generating dataset from video " + str(vid_idx + 1) + "/" + str(num_videos) +
            #   "(current total data (pos-neg): " + str(len(train_db_pos['labels'])) +
            #   "-" + str(len(train_db_neg['labels'])) + ")")
            print("generating dataset from video " + str(vid_idx + 1) + "/" + str(num_videos) +
                  "(current total data (pos+neg): " + str(len(train_db['labels']))  + ")")

            bench_name = train_videos['bench_names'][vid_idx]
            video_name = train_videos['video_names'][vid_idx]
            video_path = train_videos['video_paths'][vid_idx]
            vid_info = get_video_infos(bench_name, video_path, video_name)
            train_db_pos_, train_db_neg_ = get_train_dbs(vid_info, opts)
        # separate for each bboxes sample
        print("before train_db_pos['img_path'].extend", end=' : ')
        print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
        for sample_idx in range(len(train_db_pos_neg_)):
            # # for img_path_idx in range(len(train_db_pos_[sample_idx]['score_labels'])):
            # train_db['img_path'].append(train_db_pos_neg_[sample_idx]['img_path'])
            # train_db['bboxes'].append(train_db_pos_neg_[sample_idx]['bboxes'])
            # train_db['labels'].append(train_db_pos_neg_[sample_idx]['labels'])
            # train_db['score_labels'].append(train_db_pos_neg_[sample_idx]['score_labels'])
            # # train_db['vid_idx'].extend(np.repeat(vid_idx, len(train_db_pos_[sample_idx]['img_path'])))
            # # train_db['vid_idx'].append(vid_idx)

            train_db['img_path'].extend(train_db_pos_neg_[sample_idx]['img_path'])
            train_db['bboxes'].extend(train_db_pos_neg_[sample_idx]['bboxes'])
            train_db['labels'].extend(train_db_pos_neg_[sample_idx]['labels'])
            train_db['score_labels'].extend(train_db_pos_neg_[sample_idx]['score_labels'])

        #     if len(train_db_pos_neg_[sample_idx]['bboxes'])!=20:
        #         print("len(train_db_pos_neg_[sample_idx]['bboxes']): %d, img path: %s"%(
        #             len(train_db_pos_neg_[sample_idx]['bboxes']),train_db_pos_neg_[sample_idx]['img_path']))
        #     if len(train_db_pos_neg_[sample_idx]['labels'])!=20:
        #         print("len(train_db_pos_neg_[sample_idx]['labels']): %d, img path: %s"%(
        #             len(train_db_pos_neg_[sample_idx]['labels']),train_db_pos_neg_[sample_idx]['img_path']))
        #     if len(train_db_pos_neg_[sample_idx]['score_labels'])!=20:
        #         print("len(train_db_pos_neg_[sample_idx]['score_labels']): %d, img path: %s"%(
        #             len(train_db_pos_neg_[sample_idx]['score_labels']),train_db_pos_neg_[sample_idx]['img_path']))
        # print('over debug.')
        # print("\nFinish generating positive dataset... (current total data: " + str(len(train_db_pos['labels'])) + ")")

        # for sample_idx in range(len(train_db_neg_)):
        #     # for img_path_idx in range(len(train_db_neg_[sample_idx]['score_labels'])):
        #     train_db['img_path'].append(train_db_neg_[sample_idx]['img_path'])
        #     train_db['bboxes'].append(train_db_neg_[sample_idx]['bboxes'])
        #     train_db['labels'].append(train_db_neg_[sample_idx]['labels'])
        #     train_db['score_labels'].append(train_db_neg_[sample_idx]['score_labels'])
        #     # train_db['vid_idx'].extend(np.repeat(vid_idx, len(train_db_neg_[sample_idx]['img_path'])))
        #     train_db['vid_idx'].append(vid_idx)
        # print("\nFinish generating negative dataset... (current total data: " + str(len(train_db_neg['labels'])) + ")")

        print("after train_db_neg['img_path'].extend", end=' : ')
        print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))

        #dataset_pos = SLDataset(train_db_pos, transform=transform)
        dataset_pos_neg = SLDataset(train_db, transform=transform)
        print("after dataset_pos_neg = SLDataset(train_db", end=' : ')
        print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
        # dataset_neg = SLDataset(train_db_neg, transform=transform)
        # print("after dataset_neg = SLDataset(train_db_neg", end=' : ')
        # print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))

        if multidomain:
            datasets_pos_neg.append(dataset_pos_neg)
            #datasets_neg.append(dataset_neg)
        else:
            if len(datasets_pos_neg)==0:
                datasets_pos_neg.append(dataset_pos_neg)
                #datasets_neg.append(dataset_neg)
                print("after datasets_pos_neg.append(dataset_pos_neg)", end=' : ')
                print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
            else:
                # datasets_pos[0].train_db['img_path'].extend(dataset_pos.train_db['img_path'])
                # datasets_pos[0].train_db['bboxes'].extend(dataset_pos.train_db['bboxes'])
                # datasets_pos[0].train_db['labels'].extend(dataset_pos.train_db['labels'])
                # datasets_pos[0].train_db['score_labels'].extend(dataset_pos.train_db['score_labels'])
                # datasets_pos[0].train_db['vid_idx'].extend(dataset_pos.train_db['vid_idx'])
                #
                # datasets_neg[0].train_db['img_path'].extend(dataset_neg.train_db['img_path'])
                # datasets_neg[0].train_db['bboxes'].extend(dataset_neg.train_db['bboxes'])
                # datasets_neg[0].train_db['labels'].extend(dataset_neg.train_db['labels'])
                # datasets_neg[0].train_db['score_labels'].extend(dataset_neg.train_db['score_labels'])
                # datasets_neg[0].train_db['vid_idx'].extend(dataset_neg.train_db['vid_idx'])
                datasets_pos_neg[0].train_db['img_path'].extend(dataset_pos_neg.train_db['img_path'])
                datasets_pos_neg[0].train_db['bboxes'].extend(dataset_pos_neg.train_db['bboxes'])
                datasets_pos_neg[0].train_db['labels'].extend(dataset_pos_neg.train_db['labels'])
                datasets_pos_neg[0].train_db['score_labels'].extend(dataset_pos_neg.train_db['score_labels'])
                # datasets_pos_neg[0].train_db['vid_idx'].extend(dataset_pos_neg.train_db['vid_idx'])

    t1 = time.time()
    all_time = t1 - t0
    all_m = all_time // 60
    all_s = all_time % 60
    print('time of generating dataset: %d m  %d s (%d s)' % (all_m, all_s, all_time))
    # return datasets_pos, datasets_neg
    return datasets_pos_neg
예제 #5
0
def initialize_pos_neg_dataset(train_videos,
                               opts,
                               transform=None,
                               multidomain=True):
    """
    Return list of pos and list of neg dataset for each domain.
    Args:
        train_videos:
        opts:
        transform:
        multidomain:
    Returns:
        datasets_pos: (list of SLDataset) List length: if multidomain, #videos (or domain). Else: 1
        datasets_neg: (list of SLDataset) List length: if multidomain, #videos (or domain). Else: 1
    """
    num_videos = len(train_videos['video_names'])

    datasets_pos = []
    datasets_neg = []

    for vid_idx in range(num_videos):
        train_db_pos = {
            'img_path': [],  # list of string
            'bboxes':
            [],  # list of ndarray left top coordinate [left top width height]
            'labels': [],  # list of ndarray #action elements. One hot vector
            'score_labels': [],  # list of scalar 0 (negative) or 1 (positive)
            'vid_idx': []  # list of int. Each video (or domain) index
        }
        train_db_neg = {
            'img_path': [],  # list of string
            'bboxes':
            [],  # list of ndarray left top coordinate [left top width height]
            'labels': [],  # list of ndarray #action elements. One hot vector
            'score_labels': [],  # list of scalar 0 (negative) or 1 (positive)
            'vid_idx': []  # list of int. Each video (or domain) index
        }

        print("generating dataset from video " + str(vid_idx + 1) + "/" +
              str(num_videos) + "(current total data (pos-neg): " +
              str(len(train_db_pos['labels'])) + "-" +
              str(len(train_db_neg['labels'])) + ")")

        bench_name = train_videos['bench_names'][vid_idx]
        video_name = train_videos['video_names'][vid_idx]
        video_path = train_videos['video_paths'][vid_idx]
        vid_info = get_video_infos(bench_name, video_path, video_name)
        train_db_pos_, train_db_neg_ = get_train_dbs(vid_info, opts)
        # separate for each bboxes sample
        for sample_idx in range(len(train_db_pos_)):
            # for img_path_idx in range(len(train_db_pos_[sample_idx]['score_labels'])):
            train_db_pos['img_path'].extend(
                train_db_pos_[sample_idx]['img_path'])
            train_db_pos['bboxes'].extend(train_db_pos_[sample_idx]['bboxes'])
            train_db_pos['labels'].extend(train_db_pos_[sample_idx]['labels'])
            train_db_pos['score_labels'].extend(
                train_db_pos_[sample_idx]['score_labels'])
            train_db_pos['vid_idx'].extend(
                np.repeat(vid_idx, len(train_db_pos_[sample_idx]['img_path'])))

        print("Finish generating positive dataset... (current total data: " +
              str(len(train_db_pos['labels'])) + ")")

        for sample_idx in range(len(train_db_neg_)):
            # for img_path_idx in range(len(train_db_neg_[sample_idx]['score_labels'])):
            train_db_neg['img_path'].extend(
                train_db_neg_[sample_idx]['img_path'])
            train_db_neg['bboxes'].extend(train_db_neg_[sample_idx]['bboxes'])
            train_db_neg['labels'].extend(train_db_neg_[sample_idx]['labels'])
            train_db_neg['score_labels'].extend(
                train_db_neg_[sample_idx]['score_labels'])
            train_db_neg['vid_idx'].extend(
                np.repeat(vid_idx, len(train_db_neg_[sample_idx]['img_path'])))

        print("Finish generating negative dataset... (current total data: " +
              str(len(train_db_neg['labels'])) + ")")

        dataset_pos = SLDataset(train_db_pos, transform=transform)
        dataset_neg = SLDataset(train_db_neg, transform=transform)

        if multidomain:
            datasets_pos.append(dataset_pos)
            datasets_neg.append(dataset_neg)
        else:
            datasets_pos.extend(dataset_pos)
            datasets_neg.extend(dataset_neg)

    return datasets_pos, datasets_neg