def temp_encoded_video(num_frames: int, fps: int, height=10, width=10, prefix=None, directory=None): """Creates a temporary lossless, mp4 video with synthetic content. Uses a context which deletes the video after exit. """ # Lossless options. video_codec = "libx264rgb" options = {"crf": "0"} data = create_dummy_video_frames(num_frames, height, width) with tempfile.NamedTemporaryFile(prefix=prefix, suffix=".mp4", dir=directory) as f: f.close() io.write_video(f.name, data, fps=fps, video_codec=video_codec, options=options) yield f.name, thwc_to_cthw(data).to(torch.float32) os.unlink(f.name)
def temp_video(num_frames, height, width, fps, lossless=False, video_codec=None, options=None): if lossless: if video_codec is not None: raise ValueError( "video_codec can't be specified together with lossless") if options is not None: raise ValueError( "options can't be specified together with lossless") video_codec = "libx264rgb" options = {"crf": "0"} if video_codec is None: video_codec = "libx264" if options is None: options = {} data = _create_video_frames(num_frames, height, width) with tempfile.NamedTemporaryFile(suffix=".mp4") as f: f.close() io.write_video(f.name, data, fps=fps, video_codec=video_codec, options=options) yield f.name, data os.unlink(f.name)
def temp_video(num_frames, height, width, fps, lossless=False, video_codec=None, options=None): if lossless: assert video_codec is None, "video_codec can't be specified together with lossless" assert options is None, "options can't be specified together with lossless" video_codec = 'libx264rgb' options = {'crf': '0'} if video_codec is None: video_codec = 'libx264' if options is None: options = {} data = _create_video_frames(num_frames, height, width) with tempfile.NamedTemporaryFile(suffix='.mp4') as f: io.write_video(f.name, data, fps=fps, video_codec=video_codec, options=options) yield f.name, data
def temp_video(num_frames, height, width, fps, lossless=False, video_codec=None, options=None): if lossless: if video_codec is not None: raise ValueError("video_codec can't be specified together with lossless") if options is not None: raise ValueError("options can't be specified together with lossless") video_codec = 'libx264rgb' options = {'crf': '0'} if video_codec is None: if get_video_backend() == "pyav": video_codec = 'libx264' else: # when video_codec is not set, we assume it is libx264rgb which accepts # RGB pixel formats as input instead of YUV video_codec = 'libx264rgb' if options is None: options = {} data = _create_video_frames(num_frames, height, width) with tempfile.NamedTemporaryFile(suffix='.mp4') as f: f.close() io.write_video(f.name, data, fps=fps, video_codec=video_codec, options=options) yield f.name, data os.unlink(f.name)
def blur_background(video_path, respth='./res/test_res', cp='model_final_diss.pth'): frames, audio, info = read_video(video_path, 61, 65, pts_unit="sec") scale_labels = transforms.Compose([ transforms.ToPILImage(), transforms.Resize((frames.shape[1], frames.shape[2]), interpolation=Image.NEAREST), transforms.ToTensor() ]) labels = label_images(frames, cp) new_frames = [] for frame_inx in tqdm.tqdm( list(range(frames.shape[0])), desc="generating segmented frames with background"): scaled_label = scale_labels(labels[frame_inx].type( torch.uint8)).squeeze(0) scaled_label = torch.stack([scaled_label, scaled_label, scaled_label], dim=2) blurred_img = torch.from_numpy( cv2.blur(frames[frame_inx].numpy(), (15, 15))) new_frames.append( torch.where(scaled_label > 0, frames[frame_inx], blurred_img)) new_frames = torch.stack(new_frames) write_video( os.path.join(respth, "blurred_background" + os.path.basename(video_path)) + ".mp4", new_frames, round(info["video_fps"]))
def test_reading_from_directory_structure(self, decoder): # For an unknown reason this import has to be here for `buck test` to work. import torchvision.io as io with tempfile.TemporaryDirectory() as root_dir: # Create test directory structure with two classes and a video in each. root_dir_name = pathlib.Path(root_dir) test_class_1 = root_dir_name / "running" test_class_1.mkdir() data_1 = create_dummy_video_frames(15, 10, 10) test_class_2 = root_dir_name / "cleaning windows" test_class_2.mkdir() data_2 = create_dummy_video_frames(20, 15, 15) with tempfile.NamedTemporaryFile( suffix=".mp4", dir=test_class_1) as f_1, tempfile.NamedTemporaryFile( suffix=".mp4", dir=test_class_2) as f_2: f_1.close() f_2.close() # Write lossless video for each class. io.write_video( f_1.name, data_1, fps=30, video_codec="libx264rgb", options={"crf": "0"}, ) io.write_video( f_2.name, data_2, fps=30, video_codec="libx264rgb", options={"crf": "0"}, ) clip_sampler = make_clip_sampler("uniform", 3) labeled_video_paths = LabeledVideoPaths.from_path(root_dir) dataset = LabeledVideoDataset( labeled_video_paths, clip_sampler=clip_sampler, video_sampler=SequentialSampler, decode_audio=False, decoder=decoder, ) # Videos are sorted alphabetically so "cleaning windows" (i.e. data_2) # will be first. sample_1 = next(dataset) self.assertEqual(sample_1["label"], 0) self.assertTrue(sample_1["video"].equal( thwc_to_cthw(data_2).to(torch.float32))) sample_2 = next(dataset) self.assertEqual(sample_2["label"], 1) self.assertTrue(sample_2["video"].equal( thwc_to_cthw(data_1).to(torch.float32)))
def test_write_read_video(self): with tempfile.NamedTemporaryFile(suffix='.mp4') as f: data = self._create_video_frames(10, 300, 300) io.write_video(f.name, data, fps=5) lv, _, info = io.read_video(f.name) self.assertTrue( (data.float() - lv.float()).abs().max() < self.TOLERANCE) self.assertEqual(info["video_fps"], 5)
def label_and_render_video(video_path, respth='./res/test_res', cp='model_final_diss.pth'): frames, audio, info = read_video(video_path, pts_unit="sec") labels = label_images(frames, cp) segmented_frames = [] for frame_inx in tqdm.tqdm(list(range(frames.shape[0])), desc="generating segmented frames"): segmented_frames.append( render_segmented_image(frames[frame_inx], labels[frame_inx])) segmented_frames = torch.stack(segmented_frames) write_video( os.path.join(respth, "segment" + os.path.basename(video_path)) + ".mp4", segmented_frames, info["video_fps"])
def get_list_of_videos(tmpdir, num_videos=5, sizes=None, fps=None): names = [] for i in range(num_videos): if sizes is None: size = 5 * (i + 1) else: size = sizes[i] if fps is None: f = 5 else: f = fps[i] data = torch.randint(0, 256, (size, 300, 400, 3), dtype=torch.uint8) name = os.path.join(tmpdir, "{}.mp4".format(i)) names.append(name) io.write_video(name, data, fps=f) return names
def test_read_partial_video(self): with tempfile.NamedTemporaryFile(suffix='.mp4') as f: data = self._create_video_frames(10, 300, 300) io.write_video(f.name, data, fps=5) pts = io.read_video_timestamps(f.name) for start in range(5): for l in range(1, 4): lv, _, _ = io.read_video(f.name, pts[start], pts[start + l - 1]) s_data = data[start:(start + l)] self.assertEqual(len(lv), l) self.assertTrue((s_data.float() - lv.float()).abs().max() < self.TOLERANCE) lv, _, _ = io.read_video(f.name, pts[4] + 1, pts[7]) self.assertEqual(len(lv), 4) self.assertTrue( (data[4:8].float() - lv.float()).abs().max() < self.TOLERANCE)
def test_read_timestamps(self): with tempfile.NamedTemporaryFile(suffix='.mp4') as f: data = self._create_video_frames(10, 300, 300) io.write_video(f.name, data, fps=5) pts = io.read_video_timestamps(f.name) # note: not all formats/codecs provide accurate information for computing the # timestamps. For the format that we use here, this information is available, # so we use it as a baseline container = av.open(f.name) stream = container.streams[0] pts_step = int( round(float(1 / (stream.average_rate * stream.time_base)))) num_frames = int( round( float(stream.average_rate * stream.time_base * stream.duration))) expected_pts = [i * pts_step for i in range(num_frames)] self.assertEqual(pts, expected_pts)
def read_and_reshape(inpt): video_dir, new_name = inpt width = height = 320 sample = 10 vframes, aframes, info = read_video(video_dir) t, w, h, c = vframes.shape w_start = int(w / 2 - width) w_end = int(w / 2 + width) h_start = int(h / 2) - height h_end = int(h / 2) + height vframes = vframes[:, w_start:w_end, h_start:h_end, :] sub_sampled = [] for i, elem in enumerate(vframes): if i % sample == 0: sub_sampled.append(elem) sub_sampled = torch.stack(sub_sampled) write_video(new_name, sub_sampled, fps=sub_sampled.shape[0])
def replace_background(video_path, bk_img_path, respth='./res/test_res', cp='model_final_diss.pth'): frames, audio, info = read_video(video_path, pts_unit="sec") bkg = transforms.Compose([ transforms.Resize((frames[0].shape[0], frames[0].shape[1])), transforms.ToTensor() ])(Image.open(bk_img_path)).transpose(0, 2).transpose(0, 1) # scaling to [0,255] and casting to uint8 bkg = (bkg * 255).type(torch.uint8) scale_labels = transforms.Compose([ transforms.ToPILImage(), transforms.Resize((frames.shape[1], frames.shape[2]), interpolation=Image.NEAREST), transforms.ToTensor() ]) labels = label_images(frames, cp) new_frames = [] for frame_inx in tqdm.tqdm( list(range(frames.shape[0])), desc="generating segmented frames with background"): scaled_label = scale_labels(labels[frame_inx].type( torch.uint8)).squeeze(0) scaled_label = torch.stack([scaled_label, scaled_label, scaled_label], dim=2) new_frames.append(torch.where(scaled_label > 0, frames[frame_inx], bkg)) new_frames = torch.stack(new_frames) write_video( os.path.join(respth, "background" + os.path.basename(video_path)) + ".mp4", new_frames, info["video_fps"])
def render_video(self, src: str, dst: str, batch_size: int = 1, num_workers: int = 1) -> None: video, _, info = self.fetch_video(src) dataset = TensorDataset(video) loader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) styled_video = torch.zeros_like(video) styled_name = os.path.join(dst, os.path.basename(src)) with torch.no_grad(): for i, batch in enumerate(loader): styled_video[i * batch_size:(i + 1) * batch_size] = self.render(batch[0]).cpu() print('%d/%d' % (i, len(loader))) styled_video = styled_video.permute((0, 2, 3, 1)) * 127.5 + 127.5 write_video(styled_name, styled_video, float(info['video_fps']))
SPL_onehot = SPL_onehot.permute(2, 0, 1) return SPL_onehot if __name__ == '__main__': with open('test_data/test.lst') as f: persons = [line.strip() for line in f][:4] opt = InferOptions().parse() pipeline = InferencePipeline.from_opts(opt) videos = [io.read_video('test_data/seq.mp4', pts_unit='sec')[0]] segs = [torch.zeros_like(videos[0], dtype=torch.uint8)] images = [torch.zeros_like(videos[0], dtype=torch.uint8)] for person in persons: source_image = Image.open(f'test_data/test/{person}.jpg') pipeline.segmentator.path = f'test_data/testSPL2/{person}.png' frames, segmentations = zip(*pipeline.render_video(source_image, 'test_data/seq/')) frames = torch.cat(frames) frames = frames.float() frames = torch.movedim(frames, 1, 3) frames = (frames + 1) / 2.0 * 255.0 videos.append(frames.byte()) segmentations = torch.cat(segmentations) segmentations = torch.stack([torch.from_numpy(util.tensor2im(torch.argmax(sf, axis=0, keepdim=True).data, True)) for sf in segmentations]) segs.append(segmentations.byte()) source_image_tensor = torch.from_numpy(np.array(source_image)).unsqueeze(0).expand(frames.size()) images.append(source_image_tensor) comp_video = torch.cat([torch.cat(part, dim=2) for part in (images, segs, videos)], dim=1) io.write_video('test_data/out.mp4', comp_video, fps=30) # output_image.save(OUPUT_PATH)
def test_reading_from_directory_structure_hmdb51(self, decoder): # For an unknown reason this import has to be here for `buck test` to work. import torchvision.io as io with tempfile.TemporaryDirectory() as root_dir: # Create test directory structure with two classes and a video in each. root_dir_name = pathlib.Path(root_dir) action_1 = "running" action_2 = "cleaning_windows" videos_root_dir = root_dir_name / "videos" videos_root_dir.mkdir() test_class_1 = videos_root_dir / action_1 test_class_1.mkdir() data_1 = create_dummy_video_frames(15, 10, 10) test_class_2 = videos_root_dir / action_2 test_class_2.mkdir() data_2 = create_dummy_video_frames(20, 15, 15) test_splits = root_dir_name / "folds" test_splits.mkdir() with tempfile.NamedTemporaryFile( suffix="_u_nm_np1_ba_goo_19.avi", dir=test_class_1) as f_1, tempfile.NamedTemporaryFile( suffix="_u_nm_np1_fr_med_1.avi", dir=test_class_2) as f_2: f_1.close() f_2.close() # Write lossless video for each class. io.write_video( f_1.name, data_1, fps=30, video_codec="libx264rgb", options={"crf": "0"}, ) io.write_video( f_2.name, data_2, fps=30, video_codec="libx264rgb", options={"crf": "0"}, ) _, video_name_1 = os.path.split(f_1.name) _, video_name_2 = os.path.split(f_2.name) with open( os.path.join(test_splits, action_1 + "_test_split1.txt"), "w") as f: f.write(f"{video_name_1} 1\n") with open( os.path.join(test_splits, action_2 + "_test_split1.txt"), "w") as f: f.write(f"{video_name_2} 1\n") clip_sampler = make_clip_sampler("uniform", 3) dataset = Hmdb51( data_path=test_splits, video_path_prefix=root_dir_name / "videos", clip_sampler=clip_sampler, video_sampler=SequentialSampler, split_id=1, split_type="train", decode_audio=False, decoder=decoder, ) # Videos are sorted alphabetically so "cleaning windows" (i.e. data_2) # will be first. sample_1 = next(dataset) sample_2 = next(dataset) self.assertTrue(sample_1["label"] in [action_1, action_2]) if sample_1["label"] == action_2: sample_1, sample_2 = sample_2, sample_1 self.assertEqual(sample_1["label"], action_1) self.assertEqual(5, len(sample_1["meta_tags"])) self.assertTrue(sample_1["video"].equal( thwc_to_cthw(data_1).to(torch.float32))) self.assertEqual(sample_2["label"], action_2) self.assertEqual(5, len(sample_2["meta_tags"])) self.assertTrue(sample_2["video"].equal( thwc_to_cthw(data_2).to(torch.float32)))
def plot_video(orig: Tensor, recons: Tensor, model_name: str, epoch: int, out_path: str, rows: int, cols: int, fps: int, thumbnail_width: int = None, thumbnail_height: int = None): if orig.shape[-2:] != (thumbnail_height, thumbnail_width): # Resize each frame to_pil = ToPILImage() to_tensor = ToTensor() resize = Resize((thumbnail_height, thumbnail_width)) def transform(x): return to_tensor(resize(to_pil(x))) def resize_batch(batch): return torch.cat([ torch.cat( [transform(frame).unsqueeze(dim=0) for frame in video]).unsqueeze(dim=0) for video in batch ]) recons = resize_batch(recons) orig = resize_batch(orig) # Convert [B, T, C, H, W] to [T, C, H, W] # Distributing the batch dimension in a grid n = min(rows * cols, orig.shape[0]) i = 0 video_rows = [] for _ in range(rows): done = False # Build each row, one column at a time video_cols = [] for _ in range(cols): if i >= n: done = True break # Original on left, recons on right video = torch.cat([orig[i], recons[i]], dim=-1) video *= 255.0 video = video.byte() video_cols.append(video) i += 1 while len(video_cols) < cols: # Append black videos to the empty spaces video_cols.append(torch.zeros(video.shape)) # Concatenate all columns into a row video_row = torch.cat(video_cols, dim=-1) video_rows.append(video_row) if done: break # Concatenate all rows into a single video video_array = torch.cat(video_rows, dim=-2) # [T, C, H, W] -> [T, W, H, C] -> [T, H, W, C] video_array = torch.transpose(video_array, 1, -1) video_array = torch.transpose(video_array, 1, 2) # Monochrome to RGB video_array = video_array.repeat(1, 1, 1, 3) # Export the tensor as a video # TODO: improve video quality write_video(out_path + '.mp4', video_array, fps)
def save_video(xseq, path): video = xseq.data.cpu().clamp(-1, 1) video = ((video + 1.) / 2. * 255).type(torch.uint8).permute(0, 2, 3, 1) write_video(path, video, fps=15)
def done(self): Path(self.directory).mkdir(parents=True, exist_ok=True) stream = torch.from_numpy(np.stack(self.t)) write_video(f'{self.directory}/capture_{self.cap_id}.mp4', stream, 24.0) self.cap_id += 1
source_dir, root_dir='%s/' % source_dir) one = torch.tensor([source_dataset[0]['image']]) images = torch.tensor([source_dataset[0]['image']]) print(images.shape) last_choice = 0 last_diff = 0 total_diff = 0 hash_hit_count = 0 count = 0 start = time.time() for i in range(1, len(source_dataset)): print('Source example count = %d' % count) count += 1 t = torch.tensor(source_dataset[i]['image']) print(t.shape) images = torch.cat((images, one), 0) images[i] = t end = time.time() print( 'DONE checking hashes. Processed count = %d in %d seconds, at %d seconds per example' % (count, (end - start), (end - start) / len(source_dataset))) print(images.shape) filename = '%s/%s.mp4' % (output_dir, output_filename) write_video(filename, images, np.int(30), video_codec='libx264', options=None)