Пример #1
0
    def __getitem__(self, index):
        """
        Args:
            index (int): Index
        Returns:
            tuple: (image, target) where target is class_index of the target class.
        """
        path = self.data[index]['video']
        frame_indices = self.data[index]['frame_indices']

        clip = []
        mvclip = []
        gop_index = int(frame_indices[0] / 12)
        if not self.opt.residual_only:
            iframe = load(path, gop_index, 0, 0, True)
            iframe = self.tensor_2_image(iframe)

            clip.append(iframe)
        for frame_index in range(1, 12):
            residual = load(path, gop_index, frame_index, 2, True)
            residual += 128
            residual = (np.minimum(np.maximum(residual, 0),
                                   255)).astype(np.uint8)
            residual = self.tensor_2_image(residual)
            clip.append(residual)

            if self.opt.residual_only and frame_index == 1:  ## double if we skip the iframe
                clip.append(residual)

        p = 0
        new_clip = []
        if self.spatial_transform is not None:
            self.spatial_transform.randomize_parameters()
            clip = [self.spatial_transform(img) for img in clip]
        clip = torch.stack(clip, 0).permute(1, 0, 2, 3)

        if self.opt.add and not self.opt.mv:
            l = 0
            iframe = clip[:, 0, :, :]
            for mat in clip:
                if l != 0:
                    residual_frame = clip[:, l, :, :]
                    clip[:, l, :, :] = (residual_frame + iframe) / 2
                l += 1

        target = self.data[index]
        if self.target_transform is not None:
            target = self.target_transform(target)
        if self.opt.video_level_accuracy:
            return clip, target, self.data[index]['video'].split('/')[-1]

        else:
            return clip, target
Пример #2
0
    def __getitem__(self, index):

        video_path = self.data['video_path'][index]
        gop_index = self.data['gop_index'][index]
        target = self.data['targets'][index]
        # print(video_path, gop_index, target)

        if self._representation == 'iframe':
            frames_i = []
            img_i = load(video_path, gop_index, 0, 0, self._accumulate)
            img_i = color_aug(img_i)
            img_i = img_i[..., ::-1]
            frames_i.append(img_i)
            frames_i = self._transform(frames_i)
            frames_i = np.array(frames_i)
            frames_i = np.transpose(frames_i, (0, 3, 1, 2))
            input_i = torch.from_numpy(frames_i).float() / 255.0
            input_i = (input_i - self._input_mean) / self._input_std
            input = input_i


        if self._representation == 'mv':
            frames_m = []
            img_m = load(video_path, gop_index, 6, 1, self._accumulate)
            img_m = clip_and_scale(img_m, 20)
            img_m += 128
            img_m = (np.minimum(np.maximum(img_m, 0), 255)).astype(np.uint8)
            frames_m.append(img_m)
            frames_m = self._transform(frames_m)
            frames_m = np.array(frames_m)
            frames_m = np.transpose(frames_m, (0, 3, 1, 2))
            input_m = torch.from_numpy(frames_m).float() / 255.0
            input_m = (input_m - 0.5)
            input = input_m


        if self._representation == 'r':
            frames_r = []                        
            img_r = load(video_path, gop_index, 6, 2, self._accumulate)
            img_r += 128
            img_r = (np.minimum(np.maximum(img_r, 0), 255)).astype(np.uint8)           
            frames_r.append(img_r)      
            frames_r = self._transform(frames_r)     
            frames_r = np.array(frames_r)        
            frames_r = np.transpose(frames_r, (0, 3, 1, 2))          
            input_r = torch.from_numpy(frames_r).float() / 255.0            
            input_r = (input_r - 0.5) / self._input_std
            input = input_r

        # print(input.shape)
        # target = target.long()
        # print(target.shape)
        return input, target
Пример #3
0
def main():
    #for video_name in video_names[:2]:
    for video_name in video_names:
        fold_path = video_name.split('.avi')[0].split('/')[-1]
        path_mv = os.path.join(fold_path, PATH_MV_CONT)
        path_res = os.path.join(fold_path, PATH_RES_CONT)
        if not os.path.exists(path_mv):
            os.makedirs(path_mv)
        if not os.path.exists(path_res):
            os.makedirs(path_res)
        NUM_FRAMES = get_num_frames(video_name)
        print(NUM_FRAMES)
        # The index of GOP
        curGopIdx = 0
        for curGopIdx in range(max(NUM_FRAMES // GOP_FRAMES_NUM, 1)):
            for innerGopIdx in range(GOP_FRAMES_NUM):
                curFrameIdx = curGopIdx * GOP_FRAMES_NUM + innerGopIdx
                #rgbFrame = load(video_name, curGopIdx, innerGopIdx, 0, True)

                #start = time.time()
                print(video_name, curGopIdx, innerGopIdx)
                mvCont_origin = load(video_name, curGopIdx, innerGopIdx, 1, False)
                resCont = load(video_name, curGopIdx, innerGopIdx, 2, False)

                if mvCont_origin is None:
                    mvCont_origin = np.zeros([720,960,2], dtype=np.uint8)
                
                mvCont = mvCont_origin + 2048
                # (high_h, low_h, high_w, low_w)
                mvPng = np.array([((mvCont[:,:,0] >> 8) & 0xff) , (mvCont[:,:,0] & 0xff), ((mvCont[:,:,1] >> 8) & 0xff), (mvCont[:,:,1] & 0xff)], dtype = np.uint8)
                mvPng = np.transpose(mvPng, [1,2,0])

                

                imsave(path_mv+'/frame'+str(curFrameIdx)+'.png', mvPng)
                #save_mvPng = imread(path_mv+'/frame'+str(curFrameIdx)+'.png').astype(np.int16)

                #reload_mvCont = np.array([ (save_mvPng[:,:,0] << 8) + (save_mvPng[:,:,1]), (save_mvPng[:,:,2] << 8) + (save_mvPng[:,:,3]) ])
                #reload_mvCont = np.transpose(reload_mvCont, [1,2,0])
                #reload_mvCont -= 2048

                #print((reload_mvCont == mvCont_origin).min())
                if resCont is None:
                    resCont = np.zeros([720,960,3], dtype=np.uint8)
                
                resCont = np.round((resCont + 256)/2).astype(np.uint8)
                #resCont = np.abs(resCont)
                imsave(path_res+'/frame'+str(curFrameIdx)+'.png', resCont)
                cv2.imwrite(PATH_RES_CONT+fold_path+'.png', resCont)
Пример #4
0
    def _load_video(self, video_name):
        #選擇擷取特徵
        representation_idx = 0
        if self._representation == 'mv':
            representation_idx = 1
        elif self._representation == 'residual':
            representation_idx = 2

        #計算片段數
        total_frames = get_num_frames(video_name)
        total_segments = total_frames // SEG_SIZE

        #把每個片段中間那幀紀錄下來
        frames = []
        for i in range(total_segments):
            gop_idx, gop_pos = self._get_frame_index(total_frames, i)
            img = load(video_name, gop_idx, gop_pos, representation_idx, self._accumulate)
            roi_img = img[int(ROI_Y):int(ROI_Y+ROI_HEIGHT), int(ROI_X):int(ROI_X+ROI_WIDTH)]
            frames.append(roi_img)

        #預設是每3個片段辨識一個動作
        for i in range(2, len(frames)):
            tmp = []
            tmp.append(frames[i-2])
            tmp.append(frames[i-1])
            tmp.append(frames[i])
            self._frames.append(tmp)
        frames.clear()
Пример #5
0
    def frame_callback(vis, frame_idx):
        #
        print('Processing frame: ', frame_idx)

        # Load image and generate detections.
        # Update visualization.
        GROUP_SIZE = 12  # the number of frames in one group. We set to 12 for the raw mpeg4 video.
        gop_idx = int(
            (frame_idx - 1) / GROUP_SIZE
        )  # GOP starts from 0, while frame_idx here starts from 1.
        in_group_idx = int(
            (frame_idx - 1) % GROUP_SIZE)  # the index in the group
        image = coviar.load(video_path, gop_idx, in_group_idx, frame_type,
                            accumulate)
        image = compressed_frame_to_show(image, frame_type, tool_type='cv2')

        vis.set_image(image.copy(), frame_idx)

        raw_box = seq_info['boxes']
        if raw_boxes is not None:
            index = raw_box[:, 0] == frame_idx
            box = raw_box[index]
            index = box[:, 6] >= min_confidence
            box = box[index]
            box = box[:, 1:7]  # [target_id, x, y, w, h]
            box_list = []
            for idx in range(box.shape[0]):
                box_list.append(box[idx, :])
            vis.draw_box(box_list)
Пример #6
0
    def __getitem__(self, index):

        if self._representation == 'mv':
            representation_idx = 1
        elif self._representation == 'residual':
            representation_idx = 2
        else:
            representation_idx = 0


        if self._is_train:
            video_path, label, num_frames = random.choice(self._video_list)
        else:
            video_path, label, num_frames = self._video_list[index]

        frames = []
        for seg in range(self._num_segments):

            if self._is_train:
                gop_index, gop_pos = self._get_train_frame_index(num_frames, seg)
            else:
                gop_index, gop_pos = self._get_test_frame_index(num_frames, seg)

            img = load(video_path, gop_index, gop_pos,
                       representation_idx, self._accumulate)

            if img is None:
                print('Error: loading video %s failed.' % video_path)
                img = np.zeros((256, 256, 2)) if self._representation == 'mv' else np.zeros((256, 256, 3))
            else:
                if self._representation == 'mv':
                    img = clip_and_scale(img, 20)
                    img += 128
                    img = (np.minimum(np.maximum(img, 0), 255)).astype(np.uint8)
                elif self._representation == 'residual':
                    img += 128
                    img = (np.minimum(np.maximum(img, 0), 255)).astype(np.uint8)

            if self._representation == 'iframe':
                img = color_aug(img)

                # BGR to RGB. (PyTorch uses RGB according to doc.)
                img = img[..., ::-1]

            frames.append(img)

        frames = self._transform(frames)

        frames = np.array(frames)
        frames = np.transpose(frames, (0, 3, 1, 2))
        input = torch.from_numpy(frames).float() / 255.0

        if self._representation == 'iframe':
            input = (input - self._input_mean) / self._input_std
        elif self._representation == 'residual':
            input = (input - 0.5) / self._input_std
        elif self._representation == 'mv':
            input = (input - 0.5)

        return input, label
Пример #7
0
 def process_segment_consecutive(self, frames, gop_index, gop_pos, video_path,representation_idx):
     # if self._is_train:
     #     gop_index, gop_pos = self._get_train_frame_index(num_frames, seg)
     # else:
     #     gop_index, gop_pos = self._get_test_frame_index(num_frames, seg)
     # returns image of the specified frame
     img = load(video_path, gop_index, gop_pos,
                representation_idx, self._accumulate)
     if img is None:
         print('Error: loading video %s failed.' % video_path)
         img = np.zeros((256, 256, 2)) if self._representation == 'mv' else np.zeros((256, 256, 3))
     else:
         if self._representation == 'mv':
             img = clip_and_scale(img, 20)
             img += 128
             img = (np.minimum(np.maximum(img, 0), 255)).astype(np.uint8)
         elif self._representation == 'residual':
             img += 128
             img = (np.minimum(np.maximum(img, 0), 255)).astype(np.uint8)
     if self._representation == 'iframe':
         img = color_aug(img)
         # BGR to RGB. (PyTorch uses RGB according to doc.)
         img = img[..., ::-1]
     frames.append(img)
     return frames
Пример #8
0
def load_segment(is_train, num_frames, seg, representation, num_segments,
                 video_path, representation_idx, accumulate):
    if is_train:
        gop_index, gop_pos = get_train_frame_index(num_frames, seg,
                                                   representation,
                                                   num_segments)
    else:
        gop_index, gop_pos = get_test_frame_index(num_frames, seg,
                                                  representation, num_segments)

    img = load(video_path, gop_index, gop_pos, representation_idx, accumulate)

    if img is None:
        print('Error: loading video %s failed.' % video_path)
        img = np.zeros((256, 256, 2)) if representation == 'mv' else np.zeros(
            (256, 256, 3))
    else:
        if representation == 'mv':
            img = clip_and_scale(img, 20)
            img += 128
            img = (np.minimum(np.maximum(img, 0), 255)).astype(np.uint8)
        elif representation == 'residual':
            img += 128
            img = (np.minimum(np.maximum(img, 0), 255)).astype(np.uint8)

    if representation == 'iframe':
        if is_train:
            img = color_aug(img)

        # BGR to RGB. (PyTorch uses RGB according to doc.)
        img = img[..., ::-1]
    return img
Пример #9
0
        def load_frame_from_compressed_video(video_path,
                                             frame_id,
                                             frame_type,
                                             accumulated,
                                             group_size=12):
            """
            This function load the frame from a compressed raw video.
            :param video_path: the path to mp4 raw video
            :param frame_id: int, starts from 1
            :param frame_type: int, 0 for I frame (also the image),
                        1 for motion vector, 2 for residual
            :param accumulated: bool, determin whether to loaded accumulated mv or residual
            :param group_size: GOP, default is 12

            :return: ndarray, the loaded frame. For I fame and residual, it has
                        format BGR, for motion vector, the 0-th and 1-th channel
                         are x and y offsets respectively.
            """
            gop_idx = int(
                (frame_id - 1) / group_size
            )  # GOP starts from 0, while frame_id  here starts from 1.
            in_group_idx = int(
                (frame_id - 1) % group_size)  # the index in the group
            frame_load = coviar.load(video_path, gop_idx, in_group_idx,
                                     frame_type, accumulated)
            return frame_load
def _parse_function_v2(filename, label, nSegments):

    reps_np = []

    for representation_idx in range(0, 3):

        frames = []
        for seg_idx in range(0, nSegments):
            #print(filename.decode())
            nFrames = get_num_frames(filename.decode())
            #print('nFrames:',nFrames)
            gop_index, gop_pos = getTrainFrameIndex(nFrames, seg_idx,
                                                    nSegments,
                                                    representation_idx)
            #print('gop_index, gop_pos:', gop_index, gop_pos)
            img = load(filename.decode(), gop_index, gop_pos,
                       representation_idx, True)
            #print('H3')
            if img is None:
                #print('Error: loading video %s failed.' % filename.decode())
                img = np.zeros((256, 256, 3))
            else:
                if representation_idx == 1:
                    img = (img * (127.5 / 20)).astype(np.int32)
                    img += 128
                    img = (np.minimum(np.maximum(img, 0),
                                      255)).astype(np.uint8)
                    img = np.append(img,
                                    np.zeros_like(img[..., 0, None]),
                                    axis=-1)
                elif representation_idx == 2:
                    img += 128
                    img = (np.minimum(np.maximum(img, 0),
                                      255)).astype(np.uint8)
                else:
                    img = img[..., ::-1]  #flipping to RGB
            #print('H4')
            frames.append(img)

        #np_frames = np.transpose(np.array(frames).astype(np.float32), (0, 3, 1, 2)) / 255.0
        np_frames = np.array(frames).astype(np.float32) / 255.0

        if representation_idx == 0:
            np_frames = (np_frames - DATA_MEAN) / DATA_STD
        elif representation_idx == 2:
            np_frames = (np_frames - 0.5) / DATA_STD
        elif representation_idx == 1:
            np_frames = (np_frames - 0.5)
        np_frames = np_frames[:, 16:240, 52:276, :].astype(np.float32)
        reps_np.append(np_frames)

    return reps_np[0], reps_np[1], reps_np[2], label
def video2mv(path_to_video, target_directory):
    num_group = coviar.get_num_gops(path_to_video)
    for i in range(0, num_group):
        for j in range(0, 12):
            idx = i * 12 + j

            start = timer()

            mv = coviar.load(path_to_video, i, j, 1, True)
            mv_path = '%06d' % idx
            mv_path = target_directory + '/' + mv_path + '.pkl'
            pickle.dump(mv, open(mv_path, 'wb'))

            end = timer()
            print(timedelta(seconds=end - start))
def video2mv_collection(path_to_video, target_directory, collection):
    num_group = coviar.get_num_gops(path_to_video)
    for idx in collection:
        idx_int = int(idx)
        group_idx = idx_int // 12
        frame_idx = idx_int % 12 + 5
        try:
            mv = coviar.load(path_to_video, group_idx, frame_idx, 1, True)
            mv = mv.astype('int8')
            mv_path = '%06d' % idx_int
            mv_path = target_directory + '/' + mv_path + '.pkl'
            pickle.dump(mv, open(mv_path, 'wb'), protocol=2)
        except:
            print("Error\n\n\n\n\n")
            error_recorder.append(mv_path)
            return

        loaded_mv = load_mv(mv_path)
        assert (loaded_mv == mv).all()
def video2mv_collection(path_to_video, target_directory, startIdx, seg_len):
    for idx in range(seg_len):
        group_idx = idx // 12
        frame_idx = idx % 12
        try:
            mv = coviar.load(path_to_video, group_idx, frame_idx, 1, True)
            # mv_path = '%06d' % (idx + startIdx)
            mv_path = '%06d' % (idx)
            mv_path = target_directory + '/' + mv_path + '.pkl'

            mv = mv.astype('int8')

            pickle.dump(mv, open(mv_path, 'wb'), protocol=2)
        except:
            print("Error\n\n\n\n\n")
            error_recorder.append(mv_path)
            return

        loaded_mv = load_mv(mv_path)
        assert (loaded_mv == mv).all()
Пример #14
0
    def __getitem__(self, index):

        if self._representation == 'mv':
            representation_idx = 1
        elif self._representation == 'residual':
            representation_idx = 2
        else:
            representation_idx = 3

        # True:随机选取batch_size个视频
        if self._is_train:
            video_path, label, num_frames = random.choice(self._video_list)
        else:
            video_path, label, num_frames = self._video_list[index]
        #print(video_path)
        frames = []
        for seg in range(self._num_segments):

            if self._is_train:
                gop_index, gop_pose = self._get_train_frame_index(
                    num_frames, seg)
            else:
                gop_index, gop_pose = self._get_test_frame_index(
                    num_frames, seg)

            img = load(video_path, gop_index, gop_pose, representation_idx,
                       self._accumulate)
            #print(img.shape)

            if img is None:
                print('Error: loading compressed video {} failed.'.format(
                    video_path))
                img = np.zeros(
                    (256, 256,
                     2)) if self._representation == 'mv' else np.zeros(
                         (256, 256, 3))
            else:
                if self._representation == 'mv':
                    img = clip_and_scale(img, 20)
                    img += 128
                    img = (np.minimum(np.maximum(img, 0),
                                      255)).astype(np.uint8)
                elif self._representation == 'residual':
                    img += 128
                    img = (np.minimum(np.maximum(img, 0),
                                      255)).astype(np.uint8)

            if self._representation == 'iframe':
                img = color_aug(img)
                # BGR to RGB
                img = img[..., ::-1]

            frames.append(img)

        frames = self._transform(frames)

        frames = np.array(frames)
        frames = np.transpose(frames, (0, 3, 1, 2))

        input = torch.from_numpy(frames).float() / 255.0

        if self._representation == 'iframe':
            input = (input - self._input_mean) / self._input_std
        elif self._representation == 'residual':
            input = (input - 0.5) / self._input_std
        elif self._representation == 'mv':
            input = (input - 0.5)

        # 为了与raw_data维度保持一致,transpose针对nparray变化维度,permute针对tensor变化维度
        input = input.permute(1, 0, 2, 3)
        print(input.shape)

        return input, label
Пример #15
0
def _perturbation_image(model, original_image, ori_label, video_path,
                        save_path, transform_post, args, config, device):

    original_image = original_image.to(device)

    total_frames = get_num_frames(video_path)
    original_image_ = original_image.clone()  # torch.Size([1, 3, 72, 84, 84])
    num_frame, channel, height, width = original_image.shape
    dim = height * width * channel
    loop = 0
    inner_loop = 0
    success = False
    num_query = 0
    num_pframe = 0

    max_query = 60000
    exploration = 0.1
    fd_eta = 0.1
    online_lr = 0.1
    flow_lr = 0.025
    target_label = (ori_label + 1) % args.num_classes
    '''
    while target_label == ori_label:
        target_label = torch.tensor([random.sample(range(174), 1)[0]]).cuda()
    '''
    motion_vector = list()

    prior = torch.zeros(num_frame, channel, height, width).to(device)
    delta = torch.zeros(num_frame, channel, height, width).to(device)
    est_grad = torch.zeros(num_frame, channel, height, width).to(device)
    adv_img = torch.zeros(3, num_frame, channel, height, width).to(device)
    iframe = torch.zeros(num_frame, height, width, channel).to(device)
    noise_frames = torch.zeros(num_frame, channel, height, width).to(device)

    index_visual = torch.zeros(num_frame, 2, height, width).to(device)
    index_motion = torch.zeros(num_frame, height, width, 2).to(device)

    while not (num_query > max_query):
        pred_adv_logit = list()
        start1 = time.time()

        end_index = total_frames // GOP_SIZE
        if loop % args.interval == 0:  # can also try 8 for tsn2d
            #mv_index = int(torch.rand(1)*end_index)
            mv_index = inner_loop % end_index
            mv = load(video_path, mv_index, 11, 1, True)

            mv = mv - mv.min()
            mv = np.dstack((mv, np.zeros((mv.shape[:2] + (1, )))))
            mv = [mv.astype(np.uint8)] * num_frame
            inner_loop += 1
            motion_vector = transform_post(mv)
            motion_vector = np.stack(motion_vector, axis=0) * 255
            motion_vector = torch.from_numpy(motion_vector).permute(
                0, 2, 3, 1).float().to(device)

            motion_vector[:, :, :, 0] = (2 * motion_vector[:, :, :, 0] -
                                         height + 1.) / (height - 1.)
            motion_vector[:, :, :, 1] = (2 * motion_vector[:, :, :, 1] -
                                         width + 1.) / (width - 1.)

        noise_frames = torch.randn(1, 3, height,
                                   width).repeat(num_frame, 1, 1, 1).to(device)
        noise_frames = F.grid_sample(noise_frames, motion_vector[:, :, :, :2])

        exp_noise = exploration * noise_frames
        q1 = prior + exp_noise
        q2 = prior - exp_noise
        adv_img[0] = original_image + fd_eta * q1 / norm2(q1)
        adv_img[1] = original_image + fd_eta * q2 / norm2(q2)
        adv_img[2] = original_image
        for i in range(3):
            img_group = normalization(adv_img[i].clone().cpu().numpy(), args)
            tmp_result = model(img_group.astype('float32', copy=False))
            tmp_result = FF.mean(tmp_result, axis=0, keepdims=True)
            tmp_result = torch.from_numpy(tmp_result.asnumpy()).to(device)
            pred_adv_logit.append(tmp_result)

        l1, _, _, _, _, _, _ = _pert_loss(pred_adv_logit[0], ori_label,
                                          target_label, delta)
        l2, _, _, _, _, _, _ = _pert_loss(pred_adv_logit[1], ori_label,
                                          target_label, delta)
        loss, target, real, other, other_class, second_logit, second_class = _pert_loss(
            pred_adv_logit[2], ori_label, target_label, delta)

        num_query += 3
        est_deriv = (l1 - l2) / (fd_eta * exploration * exploration)
        est_grad = est_deriv.item() * exp_noise
        prior += online_lr * est_grad

        original_image = original_image - flow_lr * prior.sign()
        delta = original_image_ - original_image
        tmp_norm = norm2(delta)
        original_image = torch.max(
            torch.min(original_image, original_image_ + 0.03),
            original_image_ - 0.03)
        original_image = torch.clamp(original_image, 0, 1)

        pred_adv_label = pred_adv_logit[2].argmax()
        if (loop % 1000 == 0) or (loop
                                  == max_query) or pred_adv_label != ori_label:
            #if (loop % 1000 ==0) or (loop == max_query) or pred_adv_label == target_label:
            print('[T2]{:.3f}s for [{}]-th loop\t'
                  'Queries {:03d}\t'
                  'Overall loss {:.3f}\t'
                  'est_deriv {:.3f}\t'
                  'Target {}\t'
                  'Target logit {:.3f}\t'
                  'ori logit {:.3f}\t'
                  'ori class {}\t'
                  'second logit {:.3f}\t'
                  'second class {}\t'.format(time.time() - start1,
                                             loop, num_query, loss,
                                             est_deriv.item(), target, real,
                                             other, other_class, second_logit,
                                             second_class))

        loop += 1
        if pred_adv_label != ori_label:
            #if pred_adv_label == target_label:
            #print('Predicted label is {}\t'.format(pred_adv_label))
            diff = adv_img[2] - original_image_
            print('diff max {:.3f}, diff min {:.3f}'.format(
                diff.max(), diff.min()))
            success = True
            #save_images(num_frame, original_image_.cpu().permute(0,2,3,1).numpy(), adv_img[2].cpu().permute(0,2,3,1).numpy(), save_path)
            break

        if num_query >= max_query:
            #save_images(num_frame, original_image_.cpu().permute(0,2,3,1).numpy(), adv_img[2].cpu().permute(0,2,3,1).numpy(), save_path)
            break
    return pred_adv_label, num_query, success
Пример #16
0
    def __getitem__(self, index):

        if self._is_train:
            video_path, label, num_frames = random.choice(self._video_list)
        else:
            video_path, label, num_frames = self._video_list[index]

        num_gop = num_frames // GOP_SIZE

        for gop in range(num_gop):

            frames_i = []
            frames_m = []
            frames_r = []

            img_i = load(video_path, gop, 0, 0, self._accumulate)
            img_i = color_aug(img_i)
            img_i = img_i[..., ::-1]

            img_m = load(video_path, gop, 6, 1, self._accumulate)
            img_m = clip_and_scale(img_m, 20)
            img_m += 128
            img_m = (np.minimum(np.maximum(img_m, 0), 255)).astype(np.uint8)

            img_r = load(video_path, gop, 6, 2, self._accumulate)
            img_r += 128
            img_r = (np.minimum(np.maximum(img_r, 0), 255)).astype(np.uint8)

            frames_i.append(img_i)
            frames_m.append(img_m)
            frames_r.append(img_r)

            frames_i = self._transform_i(frames_i)
            frames_m = self._transform_m(frames_m)
            frames_r = self._transform_r(frames_r)

            frames_i = np.array(frames_i)
            frames_m = np.array(frames_m)
            frames_r = np.array(frames_r)
            frames_i = np.transpose(frames_i, (0, 3, 1, 2))
            frames_m = np.transpose(frames_m, (0, 3, 1, 2))
            frames_r = np.transpose(frames_r, (0, 3, 1, 2))
            input_i = torch.from_numpy(frames_i).float() / 255.0
            input_m = torch.from_numpy(frames_m).float() / 255.0
            input_r = torch.from_numpy(frames_r).float() / 255.0

            input_i = (input_i - self._input_mean) / self._input_std
            input_m = (input_m - 0.5)
            input_r = (input_r - 0.5) / self._input_std

            # print(input_i.shape)
            # a=input_i.view((-1, ) + input_i.size()[-3:])
            # print(a.shape)
            # print(input_m.shape)
            # print(input_r.shape)

            input1 = torch.cat((input_i, input_m, input_r), 1)

            # print(input1.shape)

            if gop == 0:
                input = input1
            else:
                input = torch.cat((input, input1), 0)
        # print(input.shape)
        # a=input.view((-1, ) + input.size()[-3:])
        # print(a.shape)
        # print(input)
        return input, label
Пример #17
0
def mploader(video_prefix, vid_subpath, sampled_idxs, modality, accumulate,
             ds_factor, _mv_minmaxnorm, mv_loadimg):

    if not mv_loadimg:
        from coviar import get_num_frames
        from coviar import load
    image_loader = get_default_image_loader()
    mv = []
    res = []
    index_pos = []
    video = []
    # change this part according your relative path between frame image and videos and stored MV and R
    if video_prefix == '/HMDB51/fb/TSN_input/':
        video_path = os.path.join('/HMDB51/fb/videos_mpeg4/',
                                  vid_subpath + '.mp4')
        if not mv_loadimg:
            video_prefix2 = '/'
        else:
            video_prefix2 = '/HMDB51/fb/MC_input/'
    elif video_prefix == '/UCF101/TSN_input/':
        video_path = os.path.join('/UCF101/fb/mpeg4_videos/',
                                  vid_subpath + '.mp4')
        if not mv_loadimg:
            video_prefix2 = '/'
        else:
            video_prefix2 = '/UCF101_MC_input/'
    for frame_idx in sampled_idxs:
        index_pos.append(get_gop_pos(frame_idx, 'mv'))
    if modality == 'mv':
        for j, item in enumerate(index_pos):
            image_path = os.path.join(
                video_prefix2, vid_subpath,
                'mv_x_{:05d}.jpg'.format(sampled_idxs[j] + 1))
            if mv_loadimg and os.path.exists(image_path):
                img = np.asarray(
                    image_loader(
                        image_path, 'flow',
                        os.path.join(
                            video_prefix2, vid_subpath,
                            'mv_y_{:05d}.jpg'.format(sampled_idxs[j] +
                                                     1)))).astype(np.float)
                img += -128
            else:
                gop_index, gop_pos = item
                img = load(video_path, gop_index, gop_pos, 1, accumulate)
            if _mv_minmaxnorm == 1:
                img = clip_and_scale(img,
                                     20)  # scale values from +-20 to +-127.5
            img += 128
            img = (np.minimum(np.maximum(img, 0), 255))
            mv.append(img.astype(np.uint8))
        return mv
    elif modality == 'res':
        for j, item in enumerate(index_pos):
            image_path = os.path.join(
                video_prefix2, vid_subpath,
                'res_{:05d}.jpg'.format(sampled_idxs[j] + 1))
            if mv_loadimg and os.path.exists(image_path):
                img = np.asarray(image_loader(image_path, 'rgb'))
            else:
                gop_index, gop_pos = item
                img = load(video_path, gop_index, gop_pos, 2, accumulate)
                img += 128
                img = (np.minimum(np.maximum(img, 0), 255))
            res.append(img.astype(np.uint8))
        return res
    elif modality == 'I':
        for item in index_pos:
            gop_index, gop_pos = item
            img = load(video_path, gop_index, 0, 0, accumulate)
            img = img[..., ::-1]
            res.append(img.astype(np.uint8))
        return res
    elif modality == 'flow+mp4':

        for i in range(len(sampled_idxs)):
            image_path = os.path.join(
                video_prefix, vid_subpath,
                'flow_x_{:05d}.jpg'.format(sampled_idxs[i] + 1))
            if os.path.exists(image_path):
                flow = image_loader(
                    image_path, 'flow',
                    os.path.join(
                        video_prefix, vid_subpath,
                        'flow_y_{:05d}.jpg'.format(sampled_idxs[i] + 1)))
            image_path = os.path.join(
                video_prefix2, vid_subpath,
                'mv_x_{:05d}.jpg'.format(sampled_idxs[i] + 1))
            if mv_loadimg and os.path.exists(image_path):
                img = image_loader(
                    image_path, 'flow',
                    os.path.join(video_prefix2, vid_subpath,
                                 'mv_y_{:05d}.jpg'.format(sampled_idxs[i] +
                                                          1))).astype(np.float)
                img += -128
            else:
                gop_index, gop_pos = index_pos[i]
                img = load(video_path, gop_index, gop_pos, 1, accumulate)
            if _mv_minmaxnorm == 1:
                img = clip_and_scale(img,
                                     20)  # scale values from +-20 to +-127.5
            img += 128
            img = (np.minimum(np.maximum(img, 0), 255))
            flow = np.concatenate((flow, img), axis=2)
            image_path = os.path.join(
                video_prefix2, vid_subpath,
                'res_{:05d}.jpg'.format(sampled_idxs[i] + 1))
            if mv_loadimg and os.path.exists(image_path):
                img = image_loader(image_path, 'rgb')
            else:
                gop_index, gop_pos = index_pos[i]
                img = load(video_path, gop_index, gop_pos, 2, accumulate)
                img += 128
                img = (np.minimum(np.maximum(img, 0), 255))
            flow = np.concatenate((flow, img), axis=2)
            video.append(flow.astype(np.uint8))
        return video
Пример #18
0
    return img


file_path = "output.mp4"

totalTime = timedelta(seconds=0)
count = 0

for i in range(0, 10):
    for j in range(0, 12):

        # Added
        count += 1
        start = timer()

        image = coviar.load(file_path, i, j, 0, True)
        #plt.imshow(image)
        #plt.savefig(str(i)+'_' +str(j) +'_base.jpg')

        # Added
        end = timer()
        totalTime += timedelta(seconds=end - start)
        print(timedelta(seconds=end - start))

        image = coviar.load(file_path, i, j, 1, True)
        #image = flow_to_img(image)
        #plt.imshow(image)
        #plt.savefig(str(i)+'_' +str(j) +'_mv.jpg')

        image = coviar.load(file_path, i, j, 2, True)
        image = np.asarray(image)
Пример #19
0
import argparse

import numpy as np
import skimage.io as io

from coviar import load

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("--load_dir", type=str)
    parser.add_argument("--gop_idx", type=int, default=0)
    parser.add_argument("--frame_idx", type=int, default=0)
    parser.add_argument("--repr", type=int, default=0)
    parser.add_argument("--accumulate", type=bool, default=True)
    parser.add_argument("--save_dir", type=str, default="./frame.png")

    args = parser.parse_args()

    frame = load(args.load_dir, args.gop_idx, args.frame_idx, args.repr,
                 args.accumulate)
    print(np.max(frame), frame[frame < 0], frame.dtype, frame.shape)
    # io.imsave(args.save_dir, frame)
Пример #20
0
def _get_image_blob(roidb, target_size, im_type=['im', 'residual', 'mv']):
    """Builds an input blob from the images in the roidb at the specified
  scales.
  """
    num_images = len(roidb)

    processed_ims = []
    im_scales = []
    for i in range(num_images):

        import os
        import coviar
        frame_path = roidb[i]['image']  # '/data0/liuqk/MOTChallenge/2DMOT2015/train/ETH-Bahnhof/img1/000229.jpg'

        if frame_path == '/data0/liuqk/MOTChallenge/2DMOT2015/train/KITTI-17/img1/000145.jpg':
            frame_path = '/data0/liuqk/MOTChallenge/2DMOT2015/train/KITTI-17/img1/000144.jpg'
        frame_path_info = frame_path.split('/')

        if 'motchallenge' in roidb[i]['dataset_name']:
            # ['', 'data0', 'liuqk', 'MOTChallenge', '2DMOT2015', 'train', 'ETH-Bahnhof', 'img1', '000229.jpg']

            # get seq_path
            seq_path = '/'
            for j in range(len(frame_path_info) - 2):
                seq_path = os.path.join(seq_path, frame_path_info[j])
            # get the video path
            video_path = os.path.join(seq_path, frame_path_info[-3] + '.mp4')

        elif 'citypersons' in roidb[i]['dataset_name']:
            # ['', 'data0', 'liuqk', 'Cityscapes', 'citysacpesdataset', 'leftImg8bit',
            #  'train', 'tubingen', 'tubingen_000082_000019_leftImg8bit', '000001.png']

            # get seq_path
            seq_path = '/'
            for j in range(len(frame_path_info) - 1):
                seq_path = os.path.join(seq_path, frame_path_info[j])
            # get the video path
            video_path = os.path.join(seq_path, frame_path_info[-2] + '.mp4')

        if not os.path.exists(video_path):
            raise RuntimeError(video_path + ' does not exists')
        frame_id = int(frame_path_info[-1][0:6])

        gop_idx = int((frame_id - 1) / 12)  # GOP starts from 0, while frame_id  here starts from 1.
        in_group_idx = int((frame_id - 1) % 12)  # the index in the group

        if 'im' in im_type:
            im = coviar.load(video_path, gop_idx, in_group_idx, 0, True)
            if len(im.shape) == 2:
                im = im[:, :, np.newaxis]
                im = np.concatenate((im, im, im), axis=2)
            im, im_scale = prep_im_for_blob(im=im,
                                            pixel_normal_scale=cfg.PIXEL_NORMAL_SCALE,
                                            pixel_means=cfg.PIXEL_MEANS,
                                            pixel_stds=cfg.PIXEL_STDS,
                                            target_size=target_size[i],
                                            channel=cfg.PIXEL_CHANNEL)
            if roidb[i]['flipped']:
                im = im[:, ::-1, :]

            im_shape = im.shape
        else:
            im = None

        if 'mv' in im_type:
            mv = coviar.load(video_path, gop_idx, in_group_idx, 1, True)
            mv, im_scale = prep_mv_for_blob(im=mv,
                                            mv_normal_scale=cfg.MV_NORMAL_SCALE,
                                            mv_means=cfg.MV_MEANS,
                                            mv_stds=cfg.MV_STDS,
                                            target_size=target_size[i],
                                            channel=cfg.MV_CHANNEL)
            if roidb[i]['flipped']:
                mv = mv[:, ::-1, :]
                mv[:, :, 0] = - mv[:, :, 0]

            im_shape = mv.shape

        else:
            mv = None

        if 'residual' in im_type:
            residual = coviar.load(video_path, gop_idx, in_group_idx, 2, True)
            # check whether it is a gray image
            if len(residual.shape) == 2:
                residual = residual[:, :, np.newaxis]
                residual = np.concatenate((residual, residual, residual), axis=2)

            residual, im_scale = prep_residual_for_blob(im=residual,
                                                        pixel_normal_scale=cfg.RESIDUAL_NORMAL_SCALE,
                                                        pixel_means=cfg.RESIDUAL_MEANS,
                                                        pixel_stds=cfg.RESIDUAL_STDS,
                                                        target_size=target_size[i],
                                                        channel=cfg.RESIDUAL_CHANNEL)

            if roidb[i]['flipped']:
                residual = residual[:, ::-1, :]

            im_shape = residual.shape
        else:
            residual = None

        im_data = np.zeros((im_shape[0], im_shape[1], 3+2+3))
        if im is not None:
            im_data[:,:,0:3] = im
        if mv is not None:
            im_data[:,:,3:5] = mv
        if residual is not None:
            im_data[:,:,5:8] = residual

        # # ------------ show some results ------------------------
        # from lib.model.utils.misc import show_compressed_frame
        # show_compressed_frame(np.array(im_data[:,:,0:3] + cfg.PIXEL_MEANS, dtype=np.uint8), 0)
        # show_compressed_frame(im_data[:, :, 3:5], 1)
        # show_compressed_frame(np.array(im_data[:, :, 5:8], dtype=np.uint8), 2)

        im_scales.append(im_scale)
        processed_ims.append(im_data)

    # Create a blob to hold the input images
    blob = im_list_to_blob(processed_ims)

    return blob, im_scales
Пример #21
0
            mv = mv.astype('int8')
            mv_path = '%06d' % idx_int
            mv_path = target_directory + '/' + mv_path + '.pkl'
            pickle.dump(mv, open(mv_path, 'wb'), protocol=2)
        except:
            print("Error\n\n\n\n\n")
            error_recorder.append(mv_path)
            return

        loaded_mv = load_mv(mv_path)
        assert (loaded_mv == mv).all()


for idx in range(num_group):
    for j in range(12):
        mv = coviar.load(path_to_video, idx, j, 1, True)
        mv = mv.astype('int8')
        mv_path = '%06d' % (idx * 12 + j)
        mv_path = mv_path + '.pkl'
        pickle.dump(mv, open(mv_path, 'wb'), protocol=2)

    for idx in collection:
        idx_int = int(idx)
        group_idx = idx_int // 12
        frame_idx = idx_int % 12 + 5
        try:
            mv = coviar.load(path_to_video, group_idx, frame_idx, 1, True)
            mv = mv.astype('int8')
            mv_path = '%06d' % idx_int
            mv_path = target_directory + '/' + mv_path + '.pkl'
            pickle.dump(mv, open(mv_path, 'wb'), protocol=2)
Пример #22
0
    def __getitem__(self, index):

        if self._representation == 'mv':
            representation_idx = 1
        elif self._representation == 'residual':
            representation_idx = 2
        else:
            representation_idx = 0

        if self._is_train:
            video_path, label, num_frames = random.choice(self._video_list)
        else:
            video_path, label, num_frames = self._video_list[index]

        frames = []
        for seg in range(self._num_segments):

            if self._is_train:
                gop_index, gop_pos = self._get_train_frame_index(
                    num_frames, seg)
            else:
                gop_index, gop_pos = self._get_test_frame_index(
                    num_frames, seg)

            img = load(video_path, gop_index, gop_pos, representation_idx,
                       self._accumulate)

            if img is None:
                print('Error: loading video %s failed.' % video_path)
                img = np.zeros(
                    (256, 256,
                     2)) if self._representation == 'mv' else np.zeros(
                         (256, 256, 3))
            else:
                if self._representation == 'mv':
                    img = clip_and_scale(img, 20)
                    img += 128
                    img = (np.minimum(np.maximum(img, 0),
                                      255)).astype(np.uint8)
                elif self._representation == 'residual':
                    img += 128
                    img = (np.minimum(np.maximum(img, 0),
                                      255)).astype(np.uint8)

            if self._representation == 'iframe':
                img = color_aug(img)

                # BGR to RGB. (PyTorch uses RGB according to doc.)
                img = img[..., ::-1]

            frames.append(img)

        frames = self._transform(frames)

        frames = np.array(frames)
        frames = np.transpose(frames, (0, 3, 1, 2))
        input = torch.from_numpy(frames).float() / 255.0

        if self._representation == 'iframe':
            input = (input - self._input_mean) / self._input_std
        elif self._representation == 'residual':
            input = (input - 0.5) / self._input_std
        elif self._representation == 'mv':
            input = (input - 0.5)

        return input, label
Пример #23
0
    def __getitem__(self, index):

        if self._representation == 'mv':
            representation_idx = 1
        elif self._representation == 'residual':
            representation_idx = 2
        else:
            representation_idx = 0

        if self._is_train:
            video_path, label, num_frames = random.choice(self._video_list)
        else:
            video_path, label, num_frames = self._video_list[index]

        frames = []
        idx_first = -99999
        for seg in range(self._num_segments):

            if self._is_train:
                gop_index, gop_pos = self._get_train_frame_index(
                    num_frames, seg)
            else:
                gop_index, gop_pos = self._get_test_frame_index(
                    num_frames, seg)

            flow_path = video_path_to_flow_path(self._flow_root, video_path)
            if self._flow_folder == 'tvl1':
                flow_tmpl = 'flow_{0}_{1:05d}.jpg'
            idx = gop_index * GOP_SIZE + gop_pos + 1
            if idx_first == -99999:
                idx_first = idx
            # read the corresponding pre-computed optical flow along x and y dimension
            x_img = np.array(
                Image.open(os.path.join(flow_path,
                                        flow_tmpl.format('x',
                                                         idx))).convert('L'))
            y_img = np.array(
                Image.open(os.path.join(flow_path,
                                        flow_tmpl.format('y',
                                                         idx))).convert('L'))
            flow = np.stack([x_img, y_img], axis=-1)
            if flow is None:
                print('Error: loading flow %s failed.' % video_path)

            # load MV and data pre-processing
            mv = load(video_path, gop_index, gop_pos, representation_idx,
                      self._accumulate)

            if mv is None:
                print('Error: loading video %s failed.' % video_path)
                mv = np.zeros(
                    (256, 256,
                     2)) if self._representation == 'mv' else np.zeros(
                         (256, 256, 3))
            else:
                if self._representation == 'mv':
                    if self._mv_minmaxnorm == 1:
                        mv = clip_and_scale(
                            mv, 20)  # scale values from +-20 to +-127.5
                    mv += 128
                    mv = (np.minimum(np.maximum(mv, 0), 255)).astype(np.uint8)
                elif self._representation == 'residual':
                    mv += 128
                    mv = (np.minimum(np.maximum(mv, 0), 255)).astype(np.uint8)

            if self._representation == 'iframe':
                mv = color_aug(mv)

                # BGR to RGB. (PyTorch uses RGB according to doc.)
                mv = mv[..., ::-1]

            # load residual and data pre-processing
            residual = load(video_path, gop_index, gop_pos, 2,
                            self._accumulate)
            residual += 128
            residual = (np.minimum(np.maximum(residual, 0),
                                   255)).astype(np.uint8)

            frames.append(np.concatenate((flow, mv, residual), axis=2))

        frames = self._transform(frames)

        frames = np.array(frames)
        frames = np.transpose(frames, (0, 3, 1, 2))
        # print('frames shape in dataloader:')
        # print(frames.shape)  # (num_crops*num_segments, 5, 224, 224)

        # split input into input_mv and input_flow
        input_flow = frames[:, 0:2, :, :]
        input_mv = frames[:, 2:4, :, :]
        input_residual = frames[:, 4:, :, :]

        if self._flow_ds_factor is not 0:
            # downsample to make OF blocky
            factor = self._flow_ds_factor
            w_max = input_flow.shape[2]
            h_max = input_flow.shape[3]
            input_flow = block_reduce(input_flow,
                                      block_size=(1, 1, factor, factor),
                                      func=np.mean)
            # resize to original size by repeating or interpolation
            if self._upsample_interp is False:
                input_flow = input_flow.repeat(factor, axis=2).repeat(factor,
                                                                      axis=3)
            else:
                # interpolate along certain dimension? only interp1d can do so
                w_max_ds = input_flow.shape[2]
                h_max_ds = input_flow.shape[3]
                f_out = interpolate.interp1d(np.linspace(0, 1, w_max_ds),
                                             input_flow,
                                             kind='linear',
                                             axis=2)
                input_flow = f_out(np.linspace(0, 1, w_max_ds * factor))
                f_out = interpolate.interp1d(np.linspace(0, 1, h_max_ds),
                                             input_flow,
                                             kind='linear',
                                             axis=3)
                input_flow = f_out(np.linspace(0, 1, h_max_ds * factor))
            input_flow = input_flow[:, :, :w_max, :h_max]
        """load data from numpy to torch and pre-processing"""
        # print('input_flow shape in dataloader:')
        # print(input_flow.shape)  # (num_crops*num_segments, 2, 224, 224)
        input_flow = torch.from_numpy(input_flow).float() / 255.0
        input_mv = torch.from_numpy(input_mv).float() / 255.0
        input_residual = torch.from_numpy(input_residual).float() / 255.0
        # print('input_flow after torch shape in dataloader:')
        # print(input_flow.shape)  # torch.Size([num_crops*num_segments, 2, 224, 224])

        if self._representation == 'iframe':
            input_mv = (input_mv - self._input_mean) / self._input_std
        elif self._representation == 'mv':
            input_mv = (input_mv - 0.5) / torch.mean(self._input_std)

        input_flow = (input_flow - 0.5) / torch.mean(self._input_std)
        input_residual = (input_residual - 0.5) / self._input_std

        # print('Input flow shape %s:' % str(input_flow.shape))  # torch.Size([1, num_crops*num_segments, 2, 224, 224])
        # print('Input mv shape %s:' % str(input_mv.shape))
        # print('Input residual shape %s:' % str(input_residual.shape))
        # print('Input mv scope min %s:' % str(input_mv.min()))
        # print('Input mv scope max %s:' % str(input_mv.max()))
        # print('Input flow scope min %s:' % str(input_flow.min()))
        # print('Input flow scope max %s:' % str(input_flow.max()))
        if (self._viz == True) and (self._is_train == False):
            classname = flow_path.split('/')[-2]
            img_tmpl = 'img_{:05d}.jpg'
            # idx is the index of the first frame/segment of the current video
            return input_flow, input_mv, input_residual, label, os.path.join(
                flow_path, flow_tmpl.format('x', idx)), os.path.join(
                    flow_path, flow_tmpl.format('y', idx)), os.path.join(
                        flow_path, img_tmpl.format(idx_first)), classname
        else:
            return input_flow, input_mv, input_residual, label
    def __getitem__(self, index):
        """
        Args:
            index (int): Index
        Returns:
            tuple: (image, target) where target is class_index of the target class.
        """
        if self.opt.video_index != -1:
            index = self.opt.video_index
            #print(self.data[index]['frame_indices'])
        jpg_dir_path = self.data[index]['jpeg_path']
        num_frames = self.data[index]['n_frames']
        gop_size = 12
        num_of_gops = int(num_frames/gop_size) -1 #+ 1


        
        gop_index = int(self.data[index]['frame_indices'][0] / 12)
        frame_start = gop_index % 12
        iframe_path_img   = os.path.join(self.data[index]['iframe_path'],'iframe_') + str(gop_index) + '_' + str(0) + '.png'
        if not os.path.exists(iframe_path_img):
            gop_index -= 1
            iframe_path_img   = os.path.join(self.data[index]['iframe_path'],'iframe_') + str(gop_index) + '_' + str(0) + '.png'
        clip = []
        path = jpg_dir_path = self.data[index]['video']
       # print("self.data[index]['frame_indices'][0]",self.data[index]['frame_indices'][0])
        if not self.opt.residual_only:
            if self.data[index]['frame_indices'][0] % 12 == 0:
                iframe_path_img   = os.path.join(self.data[index]['iframe_path'],'iframe_') + str(gop_index) + '_' + str(0) + '.png'
                iframe            = pil_loader(iframe_path_img)
            else:
                iframe            = load(path,gop_index,frame_start, 0, True)
                iframe =self.tensor_2_image(iframe)
            clip.append(iframe)
                
        for frame_index in range(1,12):
            residual_path_img = os.path.join(self.data[index]['residual_path'],'residuals_') + str(gop_index) + '_' + str(frame_index) + '.png'
            if os.path.exists(residual_path_img):
                if self.data[index]['frame_indices'][0] % 12 == 0:
                    residual          = pil_loader(residual_path_img)
                else:
                    frame_start += 1
                    residual          = load(path,gop_index,frame_index, 2, True)
                    residual += 128
                    residual = (np.minimum(np.maximum(residual, 0), 255)).astype(np.uint8)
                    residual = self.tensor_2_image(residual)
                if self.opt.residual_only and frame_index == 1: 
                    clip.append(residual)
                clip.append(residual)
            else:
                gop_index -= 1
                residual_path_img = os.path.join(self.data[index]['residual_path'],'residuals_') + str(gop_index) + '_' + str(frame_index) + '.png'
                residual          = pil_loader(residual_path_img)
                clip.append(residual)
                continue

            


                
        if self.spatial_transform is not None:
            self.spatial_transform.randomize_parameters()
            clip = [self.spatial_transform(img) for img in clip]
            if len(clip) < 12:
                delta = 12 - len(clip)
                dup = clip[-1]
                for k in range(delta):
                    clip.append(dup)
        clip = torch.stack(clip, 0).permute(1, 0, 2, 3)
        
        if self.opt.add:
            l = 0
            iframe = clip[:,0,:,:]
            for mat in clip:
                if l != 0:
                    residual_frame = clip[:,l,:,:]
                    clip [:,l,:,:] = (residual_frame + iframe)/2
                l += 1
        

        target = self.data[index]
            
        if self.target_transform is not None:
            target = self.target_transform(target)
        if self.opt.video_level_accuracy:
            return clip, target, self.data[index]['video'].split('/')[-1]
               
        else:
            return clip, target
    def __getitem__(self, index):

        if self._is_train:
            video_path, label, num_frames = random.choice(self._video_list)
        else:
            video_path, label, num_frames = self._video_list[index]

        iframes = []
        mvs = []
        for seg in range(self._num_segments):

            # for iframe
            if self._is_train:
                gop_index, gop_pos = self._get_train_frame_index(
                    num_frames, seg, self._num_segments, 'iframe')
            else:
                gop_index, gop_pos = self._get_test_frame_index(
                    num_frames, seg, self._num_segments, 'iframe')

            img = load(video_path, gop_index, gop_pos, IFRAME,
                       self._accumulate)
            if img is None:
                print('Error: loading video %s failed.' % video_path)
                img = np.zeros((224, 224, 3))
            # img = color_aug(img)
            # BGR to RGB. (PyTorch uses RGB according to doc.)
            img = img[..., ::-1]
            iframes.append(img)

        for seg in range(self._num_segments * self.alpha):
            # for mv .notice here we should use the same gop_index with iframe
            if self._is_train:
                gop_index, gop_pos = self._get_train_frame_index(
                    num_frames, seg, self._num_segments * self.alpha, 'mv')
            else:
                gop_index, gop_pos = self._get_test_frame_index(
                    num_frames, seg, self._num_segments * self.alpha, 'mv')
            mv = load(video_path, gop_index, gop_pos, MV, self._accumulate)
            if mv is None:
                print('Error: loading video %s failed.' % video_path)
                mv = np.zeros((224, 224, 2))

            mv = clip_and_scale(mv, 20)  # scale up the value
            mv += 128
            mv = (np.minimum(np.maximum(mv, 0), 255)).astype(np.uint8)
            mvs.append(mv)

        # preprocess iframe
        iframes = self._iframe_transform(
            iframes) if self._is_train else self._infer_transform(iframes)
        iframes = np.asarray(iframes, dtype=np.float32) / 255.0
        iframes = np.transpose(iframes, (3, 0, 1, 2))
        iframes = (iframes - self._input_mean) / self._input_std

        # preprocess mv
        mvs = self._mv_transform(
            mvs) if self._is_train else self._infer_transform(mvs)
        mvs = np.asarray(mvs, dtype=np.float32) / 255.0
        mvs = np.transpose(mvs, (3, 0, 1, 2))
        mvs = (mvs - 0.5)

        # check the shape
        # channels,depth, width, height
        assert iframes.shape[1] == self._num_segments, print(
            "iframe shape wrong")
        assert mvs.shape[1] == self._num_segments * self.alpha, print(
            "timesacle shape wrong")
        return (iframes, mvs), label
Пример #26
0
    num_frames = video[4]  # 195

    print(video_name)

    pickle_folder = os.path.join(pickle_root, folder, video_name)
    iframe_folder = os.path.join(pickle_folder, "iframe")
    mv_folder = os.path.join(pickle_folder, "mv")

    if os.path.exists(iframe_folder) is False:
        os.makedirs(iframe_folder)
    if os.path.exists(mv_folder) is False:
        os.makedirs(mv_folder)

    for i in range(num_frames):
        gop_index, gop_pos = get_gop_pos(i, 'iframe')
        iframe_img = load(video_path, gop_index, gop_pos, 0, True)  # I-frame
        iframe_path = os.path.join(iframe_folder,
                                   str(gop_index) + "_" + str(gop_pos) + ".p")
        with open(iframe_path, 'wb') as iframe_pickle:
            pickle.dump(iframe_img, iframe_pickle)

        if i > 0:
            gop_index, gop_pos = get_gop_pos(i, 'mv')
            mv_img = load(video_path, gop_index, gop_pos, 1, True)  # mv
            mv_path = os.path.join(mv_folder,
                                   str(gop_index) + "_" + str(gop_pos) + ".p")
            with open(mv_path, 'wb') as mv_pickle:
                pickle.dump(mv_img, mv_pickle)

print("Done!")
Пример #27
0
def show_boxes_in_compressed_video(video_path,
                                   update_ms=10,
                                   min_confidence=0.0,
                                   box_file_path=None,
                                   min_frame_idx=None,
                                   max_frame_idx=None,
                                   frame_interval=1,
                                   frame_type=0,
                                   accumulate=False):
    """
    This function show box

    :param box_file_path: string, the path of the boxes. The format of this file should be the same with MOTChallenge
                det.txt or gt.txt.
    :param video_path: string, the path of frames.
    :param update_ms: scalar, 1000 / update_ms is the fps, default 10
    :param min_confidence: float, the confidence threshold of detection, the boxes with smaller confidence will not be
                displayed. Default 0.0
    :param min_frame_idx: integer, the first frame to display, default the first frame of this sequence
    :param max_frame_idx: integer, the last frame to display, default the last frame of this sequence
    :param frame_interval: the interval to show frames
    :param frame_type: int, can be 0, 1 or 2 (denotes I frame, motion vector, residual, respectively)
    :param accumulate: used for the motion vector and residual. If it is true, the motion vector and residual are
                accumulated.
    :return: None
    """
    def frame_callback(vis, frame_idx):
        #
        print('Processing frame: ', frame_idx)

        # Load image and generate detections.
        # Update visualization.
        GROUP_SIZE = 12  # the number of frames in one group. We set to 12 for the raw mpeg4 video.
        gop_idx = int(
            (frame_idx - 1) / GROUP_SIZE
        )  # GOP starts from 0, while frame_idx here starts from 1.
        in_group_idx = int(
            (frame_idx - 1) % GROUP_SIZE)  # the index in the group
        image = coviar.load(video_path, gop_idx, in_group_idx, frame_type,
                            accumulate)
        image = compressed_frame_to_show(image, frame_type, tool_type='cv2')

        vis.set_image(image.copy(), frame_idx)

        raw_box = seq_info['boxes']
        if raw_boxes is not None:
            index = raw_box[:, 0] == frame_idx
            box = raw_box[index]
            index = box[:, 6] >= min_confidence
            box = box[index]
            box = box[:, 1:7]  # [target_id, x, y, w, h]
            box_list = []
            for idx in range(box.shape[0]):
                box_list.append(box[idx, :])
            vis.draw_box(box_list)

    total_frames = coviar.get_num_frames(video_path) + 1

    # get the first and las frame index
    if min_frame_idx is None:
        min_frame_idx = 1
    if min_frame_idx < 0 or min_frame_idx > total_frames:
        min_frame_idx = 1

    if max_frame_idx is None:
        max_frame_idx = total_frames
    if max_frame_idx < 0 or max_frame_idx > total_frames:
        max_frame_idx = total_frames

    if min_frame_idx > max_frame_idx:
        raise RuntimeError('The first frame index ', min_frame_idx,
                           ' is larger than the last frame index ',
                           max_frame_idx)

    # get the sequence information
    im = coviar.load(video_path, 0, 0, 0, False)
    im_size = im.shape

    raw_boxes = None if box_file_path is None else np.loadtxt(
        box_file_path, dtype=float, delimiter=',')

    seq_info = {
        'image_size': [im_size[0], im_size[1]],
        'min_frame_idx': min_frame_idx,
        'max_frame_idx': max_frame_idx,
        'frame_interval': frame_interval,
        'boxes': raw_boxes,
        'sequence_name': ''
    }

    visualizer = Visualization(seq_info, update_ms)
    visualizer.run(frame_callback)