Exemplo n.º 1
1
def _load_video(full_path, size = None, resize_mode = 'resize_and_crop', cut_edges=False, cut_edges_thresh=0):
    """
    Lead a video into a numpy array

    :param full_path: Full path to the video
    :param size: A 2-tuple of width-height, indicating the desired size of the ouput
    :param resize_mode: The mode with which to get the video to the desired size.  Can be:
        'squeeze', 'preserve_aspect', 'crop', 'scale_crop'.  See resize_image in image_ops.py for more info.
    :param cut_edges: True if you want to cut the dark edges from the video
    :param cut_edges_thresh: If cut_edges, this is the threshold at which you'd like to cut them.
    :return: A (n_frames, height, width, 3) numpy array
    """
    try:
        from moviepy.video.io.VideoFileClip import VideoFileClip
    except ImportError:
        raise ImportError("You need to install moviepy to read videos.  In the virtualenv, go `pip install moviepy`")
    assert os.path.exists(full_path)
    video = VideoFileClip(full_path)
    images = []
    edge_crops = None
    for frame in video.iter_frames():
        if cut_edges:
            if edge_crops is None:
                edge_crops = get_dark_edge_slice(frame, cut_edges_thresh=cut_edges_thresh)
            else:
                frame = frame[edge_crops[0], edge_crops[1]]
        if size is not None:
            width, height = size
            frame = resize_image(frame, width=width, height=height, mode=resize_mode)
        images.append(frame)
    return images
Exemplo n.º 2
0
class VideoStim(Stim, CollectionStimMixin):

    ''' A video. '''

    def __init__(self, filename, onset=None):

        self.clip = VideoFileClip(filename)
        self.fps = self.clip.fps
        self.width = self.clip.w
        self.height = self.clip.h

        self.n_frames = int(self.fps * self.clip.duration)
        duration = self.clip.duration

        super(VideoStim, self).__init__(filename, onset, duration)

    def __iter__(self):
        """ Frame iteration. """
        for i, f in enumerate(self.clip.iter_frames()):
            yield VideoFrameStim(self, i, data=f)

    @property
    def frames(self):
        return [f for f in self.clip.iter_frames()]

    def get_frame(self, index=None, onset=None):
        if index is not None:
            onset = float(index) / self.fps
        else:
            index = int(onset * self.fps)
        return VideoFrameStim(self, index, data=self.clip.get_frame(onset))
Exemplo n.º 3
0
def ffwd_video(path_in, path_out, checkpoint_dir, device_t='/gpu:0', batch_size=4):
    video_clip = VideoFileClip(path_in, audio=False)
    video_writer = ffmpeg_writer.FFMPEG_VideoWriter(path_out, video_clip.size, video_clip.fps, codec="libx264",
                                                    preset="medium", bitrate="2000k",
                                                    audiofile=path_in, threads=None,
                                                    ffmpeg_params=None)

    g = tf.Graph()
    soft_config = tf.ConfigProto(allow_soft_placement=True)
    soft_config.gpu_options.allow_growth = True
    with g.as_default(), g.device(device_t), \
            tf.Session(config=soft_config) as sess:
        batch_shape = (batch_size, video_clip.size[1], video_clip.size[0], 3)
        img_placeholder = tf.placeholder(tf.float32, shape=batch_shape,
                                         name='img_placeholder')

        preds = transform.net(img_placeholder)
        saver = tf.train.Saver()
        if os.path.isdir(checkpoint_dir):
            ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)
            else:
                raise Exception("No checkpoint found...")
        else:
            saver.restore(sess, checkpoint_dir)

        X = np.zeros(batch_shape, dtype=np.float32)

        def style_and_write(count):
            for i in range(count, batch_size):
                X[i] = X[count - 1]  # Use last frame to fill X
            _preds = sess.run(preds, feed_dict={img_placeholder: X})
            for i in range(0, count):
                video_writer.write_frame(np.clip(_preds[i], 0, 255).astype(np.uint8))

        frame_count = 0  # The frame count that written to X
        for frame in video_clip.iter_frames():
            X[frame_count] = frame
            frame_count += 1
            if frame_count == batch_size:
                style_and_write(frame_count)
                frame_count = 0

        if frame_count != 0:
            style_and_write(frame_count)

        video_writer.close()
Exemplo n.º 4
0
def ffwd_video(path_in, path_out, checkpoint_dir, device_t='/gpu:0', batch_size=4):
    video_clip = VideoFileClip(path_in, audio=False)
    video_writer = ffmpeg_writer.FFMPEG_VideoWriter(path_out, video_clip.size, video_clip.fps, codec="libx264",
                                                    preset="medium", bitrate="2000k",
                                                    audiofile=path_in, threads=None,
                                                    ffmpeg_params=None)

    g = tf.Graph()
    soft_config = tf.ConfigProto(allow_soft_placement=True)
    soft_config.gpu_options.allow_growth = True
    with g.as_default(), g.device(device_t), \
            tf.Session(config=soft_config) as sess:
        batch_shape = (batch_size, video_clip.size[1], video_clip.size[0], 3)
        img_placeholder = tf.placeholder(tf.float32, shape=batch_shape,
                                         name='img_placeholder')

        preds = transform.net(img_placeholder)
        saver = tf.train.Saver()
        if os.path.isdir(checkpoint_dir):
            ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)
            else:
                raise Exception("No checkpoint found...")
        else:
            saver.restore(sess, checkpoint_dir)

        X = np.zeros(batch_shape, dtype=np.float32)

        def style_and_write(count):
            for i in range(count, batch_size):
                X[i] = X[count - 1]  # Use last frame to fill X
            _preds = sess.run(preds, feed_dict={img_placeholder: X})
            for i in range(0, count):
                video_writer.write_frame(np.clip(_preds[i], 0, 255).astype(np.uint8))

        frame_count = 0  # The frame count that written to X
        for frame in video_clip.iter_frames():
            X[frame_count] = frame
            frame_count += 1
            if frame_count == batch_size:
                style_and_write(frame_count)
                frame_count = 0

        if frame_count != 0:
            style_and_write(frame_count)

        video_writer.close()
Exemplo n.º 5
0
def video_style_transfer_gatys(video_path, style_path, output_path, batch_s=4):

    video = VideoFileClip(video_path, audio=False)
    video_w = ffmpeg_writer.FFMPEG_VideoWriter(output_path, video.size, video.fps, codec="libx264",
                                               preset="medium", bitrate="2000k",
                                               audiofile=video_path, threads=None,
                                               ffmpeg_params=None)

    style = Image.load_image(style_path)
    content = [c for c in video.iter_frames()]
    batch_l = [content[i:i + batch_s] for i in range(0, len(content), batch_s)]
    for b in batch_l:
        frames = run_style_transfer(b, style)
        for f in frames:
            video_w.write_frame(f)
    video_w.close()
Exemplo n.º 6
0
def load_video(filepath, sample=6):
    clip = VideoFileClip(filepath)
    video = []

    skip = 0
    for frame in clip.iter_frames():
        skip += 1
        if skip % sample != 0:
            continue

        img = Image.fromarray(frame)
        img = img.resize((224, 224))
        norm = np.divide(np.array(img), 255)
        norm = np.reshape(norm, [1, 224, 224, 3])
        video.append(norm)

    return np.array(video)
Exemplo n.º 7
0
def get_video_feat(path):
    feat = []
    if use_VGG:
        frames = load_video(path)
        for f in frames:
            f5_3 = sess.run([vgg.fc6], feed_dict={image_holder: f})
            feat.append(f5_3)
    else:
        clip = VideoFileClip(path)
        skip = 6
        count = 0
        for f in clip.iter_frames():
            count += 1
            if count % skip != 0:
                continue
            feat.append(model.extract_PIL(Image.fromarray(f)))
    return feat
Exemplo n.º 8
0
def video_style_transfer(input_path, model_path, output_path, batch_s=4):

    video = VideoFileClip(input_path, audio=False)
    video_w = ffmpeg_writer.FFMPEG_VideoWriter(output_path, video.size, video.fps, codec="libx264",
                                               preset="medium", bitrate="2000k",
                                               audiofile=input_path, threads=None,
                                               ffmpeg_params=None)

    with tf.Graph().as_default(), tf.Session() as session:

        video_iter = list(video.iter_frames())
        batch_l = [video_iter[i:i + batch_s] for i in range(0, len(video_iter), batch_s)]
        while len(batch_l[-1]) < batch_s:
            batch_l[-1].append(batch_l[-1][-1])

        print("Loading model, it may take some time")
        video_wip = np.array(batch_l, dtype=np.float32)
        place_holder = tf.placeholder(tf.float32, shape=video_wip.shape[1:], name='place_holder')
        wip = Transform.net(place_holder)

        p_loader = tf.train.Saver()

        if os.path.isdir(model_path):

            model = tf.train.get_checkpoint_state(model_path)
            is_valid = model.model_checkpoint_path

            if model is not None and is_valid:
                p_loader.restore(session, is_valid)
            else:
                raise EX
        else:
            p_loader.restore(session, model_path)

        # The information about size in the video files are: 'width, height'
        # In *** the dimensions are 'height, width'
        #shape = (batch_s, video.size[1], video.size[0], 3)
        # TODO check if it's ok without shape
        for i in range(len(video_wip)):
            r_res = session.run(wip, feed_dict={place_holder: video_wip[i]})
            for r in r_res:
                video_w.write_frame(np.clip(r, 0, 255).astype(np.uint8))
            print("processed " + str(i+1) + " out of " + str(len(video_wip)) + " batches", end = '\r')

        video_w.close()
Exemplo n.º 9
0
def get_frames_from_video(path_to_video,
                          from_sec=0,
                          to_sec=None,
                          undistort=False):
    """
    Generator that reads a video file from disk and yields a color correct frame at a time
    """
    camera_calibration = CameraCalibration(CAMERA_TYPE)

    fullpath = os.path.abspath(path_to_video)
    video = VideoFileClip(fullpath, audio=False).subclip(from_sec, to_sec)
    for frame in video.iter_frames():
        # We have to switch the order of channels as opencv has a different order as they are coming from the camera
        color_corrected_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        if undistort:
            color_corrected_frame = camera_calibration.undistort(
                color_corrected_frame)
        yield color_corrected_frame
Exemplo n.º 10
0
class VideoStim(DynamicStim):
    ''' A video. '''
    def __init__(self, filename):

        self.clip = VideoFileClip(filename)
        self.fps = self.clip.fps
        self.width = self.clip.w
        self.height = self.clip.h

        self.frames = []
        self.frames = [f for f in self.clip.iter_frames()]
        self.n_frames = len(self.frames)

        super(VideoStim, self).__init__(filename)

    def _extract_duration(self):
        self.duration = self.n_frames * 1. / self.fps

    def __iter__(self):
        """ Frame iteration. """
        for i, f in enumerate(self.frames):
            yield VideoFrameStim(self, i, data=f)

    def extract(self, extractors, merge_events=True, **kwargs):
        period = 1. / self.fps
        timeline = Timeline(period=period)
        for ext in extractors:
            # For VideoExtractors, pass the entire stim
            if ext.target.__name__ == self.__class__.__name__:
                events = ext.apply(self, **kwargs)
                for ev in events:
                    timeline.add_event(ev, merge=merge_events)
            # Otherwise, for images, loop over frames
            else:
                c = 0
                for frame in self:
                    if frame.data is not None:
                        event = Event(onset=c * period)
                        event.add_value(ext.apply(frame))
                        timeline.add_event(event, merge=merge_events)
                        c += 1
        return timeline
Exemplo n.º 11
0
def _load_video(full_path,
                size=None,
                resize_mode='resize_and_crop',
                cut_edges=False,
                cut_edges_thresh=0):
    """
    Lead a video into a numpy array

    :param full_path: Full path to the video
    :param size: A 2-tuple of width-height, indicating the desired size of the ouput
    :param resize_mode: The mode with which to get the video to the desired size.  Can be:
        'squeeze', 'preserve_aspect', 'crop', 'scale_crop'.  See resize_image in image_ops.py for more info.
    :param cut_edges: True if you want to cut the dark edges from the video
    :param cut_edges_thresh: If cut_edges, this is the threshold at which you'd like to cut them.
    :return: A (n_frames, height, width, 3) numpy array
    """
    try:
        from moviepy.video.io.VideoFileClip import VideoFileClip
    except ImportError:
        raise ImportError(
            "You need to install moviepy to read videos.  In the virtualenv, go `pip install moviepy`"
        )
    assert os.path.exists(full_path)
    video = VideoFileClip(full_path)
    images = []
    edge_crops = None
    for frame in video.iter_frames():
        if cut_edges:
            if edge_crops is None:
                edge_crops = get_dark_edge_slice(
                    frame, cut_edges_thresh=cut_edges_thresh)
            else:
                frame = frame[edge_crops[0], edge_crops[1]]
        if size is not None:
            width, height = size
            frame = resize_image(frame,
                                 width=width,
                                 height=height,
                                 mode=resize_mode)
        images.append(frame)
    return images
Exemplo n.º 12
0
def load_video(filepath, sample=6, use_VGG=True):
    clip = VideoFileClip(filepath)
    video = []

    skip = 0
    for frame in clip.iter_frames():
        skip += 1
        if skip % sample != 0:
            continue

        img = Image.fromarray(frame)
        img = img.resize((224, 224)) if use_VGG else img.resize((299, 299))
        if use_VGG:
            norm = np.divide(np.array(img), 255)
            norm = np.reshape(norm, [1, 224, 224, 3])
            video.append(norm)
        else:
            # keras will handle input normalization for InceptionV3
            video.append(np.array(img))

    return np.array(video)
Exemplo n.º 13
0
    def open(cls, path: Union[str, Path], method: str = "pillow") -> 'GifSequence':
        """
        Create a GifSequence from a GIF file using Pillow or MoviePy
        :param path: path to GIF file
        :param method: method to load GIF frames (pillow or mpy)
        :return: the opened GifSequence
        """
        image_file = Image.open(path)
        assert type(image_file) is GifImagePlugin.GifImageFile

        if method == "pillow":
            return cls(ImageSequence.Iterator(image_file))
        elif method == "mpy":
            durations = [image.info['duration'] for image in ImageSequence.Iterator(image_file)]
            clip = VideoFileClip(path)
            frames = []
            for frame, duration in zip(clip.iter_frames(), durations):
                frames.append(GifFrame.from_array(array=frame, duration=duration))
            return GifSequence.from_frames(frames)
        else:
            raise ValueError("Method must be either pillow or mpy (moviepy)")
Exemplo n.º 14
0
    def _extract_frames(self, person, video):
        video_frames_dir = self._video_frames_path(video)
        video_clip = VideoFileClip(self._video_path(video))
        
        start_time = time.time()
        print('[extract-frames] about to extract_frames for {}, fps {}, length {}s'.format(video_frames_dir, video_clip.fps, video_clip.duration))
        
        if os.path.exists(video_frames_dir):
            print('[extract-frames] frames already exist, skipping extraction: {}'.format(video_frames_dir))
            return
        
        os.makedirs(video_frames_dir)
        frame_num = 0
        for frame in tqdm.tqdm(video_clip.iter_frames(fps=video['fps']), total = video_clip.fps * video_clip.duration):
            video_frame_file = os.path.join(video_frames_dir, 'frame_{:03d}.jpg'.format(frame_num))
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # Swap RGB to BGR to work with OpenCV
            cv2.imwrite(video_frame_file, frame)
            frame_num += 1

        print('[extract] finished extract_frames for {}, total frames {}, time taken {:.0f}s'.format(
            video_frames_dir, frame_num-1, time.time() - start_time))            
Exemplo n.º 15
0
    def _get_frames(self):
        """
        Extract frames from the video
        """
        path_to_vid = os.path.join(self.vid_path, self.vid_id)
        assert os.path.exists(path_to_vid), "{} file not found".format(
            path_to_vid)

        try:
            # Load video
            video_clip = VideoFileClip(path_to_vid,
                                       audio=False,
                                       fps_source="fps")

        except Exception as e:
            logger.info("Failed to load video from {} with error {}".format(
                path_to_vid, e))

        self.orig_width, self.orig_height = video_clip.size
        self.frames = None

        for in_frame in video_clip.iter_frames(fps=self.in_fps):
            if self.frames is None:
                self.frames = in_frame[None, ...]
            else:
                self.frames = np.concatenate(
                    (self.frames, in_frame[None, ...]), axis=0)

        # convert to tensor
        self.frames = torch.from_numpy(self.frames).float()

        # Normalize the values
        self.frames = self.frames / 255.0
        self.frames = self.frames - torch.tensor(self.cfg.DATA.MEAN)
        self.frames = self.frames / torch.tensor(self.cfg.DATA.STD)

        # T H W C -> C T H W.
        self.frames = self.frames.permute(3, 0, 1, 2)
Exemplo n.º 16
0
def cartoonize(file_in, smoothing = 3):
    print(file_in)
    path_in=os.path.join(dir_in, file_in)
    path_out=os.path.join(dir_out, file_in)
    if os.path.exists(path_out):
        os.remove(path_out)

    video_clip = VideoFileClip(path_in, audio=False)

    
    i = 0
    for img in video_clip.iter_frames():
        print(i)
        i += 1
#        if i == 100:
#            break
    
        # 1) Edges
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        gray = cv2.medianBlur(gray, 5)
        edges = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 9, 9)
        
        # 2) Color
        color = cv2.bilateralFilter(img, 9, 200, 200)
        
        # 3) Cartoon
        cartoon = cv2.bitwise_and(color, color, mask=edges)
        output = cv2.GaussianBlur(cartoon, (smoothing, smoothing), 0)
 #       output = cartoon
        cv2.imwrite("images/im%04d.png"%(i), output)

    fps = int(video_clip.fps)    
    cv2.destroyAllWindows()
    os.system("ffmpeg -r %d -i images/im%%04d.png -vb 40M -vcodec mpeg4 -r %d %s"%(fps, fps, path_out))
    files = glob.glob(os.path.join(dir_out, "*.png"))
    for f in files:
        os.remove(f)
def feed_forward_video(path_in, path_out, checkpoint_dir):
    # initialize video cap
    video_cap = VideoFileClip(path_in, audio=False)
    # initialize writer
    video_writer = ffmpeg_writer.FFMPEG_VideoWriter(path_out, video_cap.size, video_cap.fps, codec='libx264',
                                                    preset='medium', bitrate='2000k', audiofile=path_in,
                                                    threads=None, ffmpeg_params=None)

    g = tf.Graph()
    soft_config = tf.ConfigProto(allow_soft_placement=True)
    soft_config.gpu_options.allow_growth = True

    with g.as_default(), tf.Session(config=soft_config) as sess:
        batch_shape = (None, video_cap.size[1], video_cap.size[0], 3)
        img_placeholder = tf.placeholder(tf.float32, shape=batch_shape, name='img_placeholder')

        model = Transfer()
        pred = model(img_placeholder)
        saver = tf.train.Saver()

        if os.path.isdir(checkpoint_dir):
            ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)
            else:
                raise Exception('No checkpoint found...')
        else:
            saver.restore(sess, checkpoint_dir)

        frame_id = 0
        for frame in video_cap.iter_frames():
            print('frame id: {}'.format(frame_id))
            _pred = sess.run(pred, feed_dict={img_placeholder: np.asarray([frame]).astype(np.float32)})
            video_writer.write_frame(np.clip(_pred, 0, 255).astype(np.uint8))
            frame_id += 1

        video_writer.close()
Exemplo n.º 18
0
    def transform_video(self,
                        input_path,
                        output_path,
                        batch_size=4,
                        start=0,
                        end=0):
        '''
        Transform a video to animation version
        https://github.com/lengstrom/fast-style-transfer/blob/master/evaluate.py#L21
        '''
        # Force to None
        end = end or None

        if not os.path.isfile(input_path):
            raise FileNotFoundError(f'{input_path} does not exist')

        output_dir = "/".join(output_path.split("/")[:-1])
        os.makedirs(output_dir, exist_ok=True)
        is_gg_drive = '/drive/' in output_path
        temp_file = ''

        if is_gg_drive:
            # Writing directly into google drive can be inefficient
            temp_file = f'tmp_anime.{output_path.split(".")[-1]}'

        def transform_and_write(frames, count, writer):
            anime_images = denormalize_input(self.transform(frames),
                                             dtype=np.uint8)
            for i in range(0, count):
                img = np.clip(anime_images[i], 0, 255)
                writer.write_frame(img)

        video_clip = VideoFileClip(input_path, audio=False)
        if start or end:
            video_clip = video_clip.subclip(start, end)

        video_writer = ffmpeg_writer.FFMPEG_VideoWriter(temp_file
                                                        or output_path,
                                                        video_clip.size,
                                                        video_clip.fps,
                                                        codec="libx264",
                                                        preset="medium",
                                                        bitrate="2000k",
                                                        audiofile=input_path,
                                                        threads=None,
                                                        ffmpeg_params=None)

        total_frames = round(video_clip.fps * video_clip.duration)
        print(
            f'Transfroming video {input_path}, {total_frames} frames, size: {video_clip.size}'
        )

        batch_shape = (batch_size, video_clip.size[1], video_clip.size[0], 3)
        frame_count = 0
        frames = np.zeros(batch_shape, dtype=np.float32)
        for frame in tqdm(video_clip.iter_frames()):
            try:
                frames[frame_count] = frame
                frame_count += 1
                if frame_count == batch_size:
                    transform_and_write(frames, frame_count, video_writer)
                    frame_count = 0
            except Exception as e:
                print(e)
                break

        # The last frames
        if frame_count != 0:
            transform_and_write(frames, frame_count, video_writer)

        if temp_file:
            # move to output path
            shutil.move(temp_file, output_path)

        print(f'Animation video saved to {output_path}')
        video_writer.close()
Exemplo n.º 19
0
                source_video = movie_resize(source_video, (width, height))
                x_center, y_center = 128, height // 2
            else:
                width = int(256 * width / height)
                height = 256
                source_video = movie_resize(source_video, (width, height))
                x_center, y_center = width // 2, 128
            source_video = movie_crop(source_video,
                                      x_center=x_center,
                                      y_center=y_center,
                                      width=256,
                                      height=256)
        else:
            raise NotImplementedError("Invalid Video Resize Mode")

    driving_video = [(frame / 255) for frame in source_video.iter_frames()]
    print()

    print("Generating Video")
    predictions = make_animation(source_image, driving_video, generator,
                                 kp_detector, **options)
    print()

    output_clip = VideoClip(make_frame, duration=source_duration)
    output_clip = output_clip.set_fps(source_fps)
    output_clip = output_clip.set_audio(source_audio)

    if args.image_resize == 'fill' and args.crop_output:
        print(f"Cropping output video to {unfill_width}x{unfill_height}")
        output_clip = movie_crop(output_clip,
                                 x_center=256 // 2,
Exemplo n.º 20
0
def ffwd_video(path_in, path_out, checkpoint_dir, device_t='/gpu:0', batch_size=4):
    """Creates a stylized video. Code from lengstrom's repo found here:
    https://github.com/lengstrom/fast-style-transfer
    and the specific file is found here:
    https://github.com/lengstrom/fast-style-transfer/blob/master/evaluate.py

    Parameters
    ----------
    path_in : str
        The path to the video to read in to stylize.
    path_out : str
        The path to save the stylized video.
    checkpoint_dir : str
        The checkpoint dir holding the neural style transfer model. This should
        be a .ckpt file.
    device_t : str, optional
        The device you want to run the model on.
    batch_size : int, optional
        The batch size you want to use for the model.
    """

    video_clip = VideoFileClip(path_in, audio=False)
    video_writer = ffmpeg_writer.FFMPEG_VideoWriter(path_out,
                                                    video_clip.size,
                                                    video_clip.fps,
                                                    codec="libx264",
                                                    preset="medium",
                                                    bitrate="2000k",
                                                    audiofile=path_in,
                                                    threads=None,
                                                    ffmpeg_params=None)

    g = tf.Graph()
    soft_config = tf.ConfigProto(allow_soft_placement=True)
    soft_config.gpu_options.allow_growth = True
    with g.as_default(), g.device(device_t), \
            tf.Session(config=soft_config) as sess:
        batch_shape = (batch_size, video_clip.size[1], video_clip.size[0], 3)
        img_placeholder = tf.placeholder(tf.float32, shape=batch_shape,
                                         name='img_placeholder')

        preds = transform.net(img_placeholder)
        saver = tf.train.Saver()
        if os.path.isdir(checkpoint_dir):
            ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)
            else:
                raise Exception("No checkpoint found...")
        else:
            saver.restore(sess, checkpoint_dir)

        X = np.zeros(batch_shape, dtype=np.float32)

        def style_and_write(count):
            for i in range(count, batch_size):
                X[i] = X[count - 1]  # Use last frame to fill X
            _preds = sess.run(preds, feed_dict={img_placeholder: X})
            for i in range(0, count):
                video_writer.write_frame(np.clip(_preds[i], 0, 255).astype(np.uint8))

        frame_count = 0  # The frame count that written to X
        pbar = tqdm(total=int(video_clip.fps * video_clip.duration))
        for frame in video_clip.iter_frames():
            X[frame_count] = frame
            frame_count += 1
            if frame_count == batch_size:
                style_and_write(frame_count)
                pbar.update(frame_count)
                frame_count = 0


        if frame_count != 0:
            style_and_write(frame_count)
            pbar.update(frame_count)

        pbar.close()
        video_writer.close()
Exemplo n.º 21
0
if __name__ == '__main__':
    options, _ = getopt.getopt(sys.argv[1:], '', ['file='])

    for opt in options:
        if opt[0] == '--file':
            video_path = opt[1]

    clip = VideoFileClip(video_path, audio=False)

    coun = 0
    max_frame_cout = 2000
    start_count = 60 * 20  # 60 fps * 17 sec
    imgs_path = []

    for clip in clip.iter_frames():
        coun += 1

        if coun % 60 != 0 or coun < start_count:
            continue
        elif len(imgs_path) >= max_frame_cout:
            break

        img = Image.fromarray(clip)
        step = 30
        sample_size = (150, 200)
        margin = 80

        for x in range(0 + margin, img.size[0] - sample_size[0] - margin,
                       step):
            for y in range(0 + margin, img.size[1] - sample_size[1] - margin,
Exemplo n.º 22
0
from CarDetection import detect_vehicle
from LicensePlateDetection import LicensePlateDetection
from Utils import get_image_patch, save_debug_image

if __name__ == "__main__":
    start = time.time()
    fullpath = os.path.abspath("testFiles/IMG_2993.m4v")
    clip = VideoFileClip(fullpath, audio=False).subclip(0, 3)
    frame_counter = 0
    car_counter = 0

    car_detection_total_duration = 0
    lp_extraction_total_duration = 0

    for frame in clip.iter_frames():
        frame_counter += 1
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame_copy = np.copy(frame)

        car_detection_start = time.time()
        car_boxes = detect_vehicle(frame)
        car_detection_total_duration += time.time() - car_detection_start

        print("Found " + str(len(car_boxes)) + " cars in frame " +
              str(frame_counter))
        car_counter_per_frame = 0
        for car_box in car_boxes:
            car_counter_per_frame += 1
            car_counter += 1
            frame_copy = cv2.rectangle(frame_copy, car_box[0], car_box[3],
Exemplo n.º 23
0
def load_movie(filename):
    clip = VideoFileClip(filename)
    frameList = [_make_grayscale(a) for a in clip.iter_frames()]
    mat = np.asarray(frameList, dtype=np.uint8)
    return mat
                          y2=P1_HEALTH_BAR_TL[1] + HEALTH_BAR_DIM[1])
    P2_health_clip = crop(high_res_clip,
                          x1=P2_HEALTH_BAR_TL[0],
                          y1=P2_HEALTH_BAR_TL[1],
                          x2=P2_HEALTH_BAR_TL[0] + HEALTH_BAR_DIM[0],
                          y2=P2_HEALTH_BAR_TL[1] + HEALTH_BAR_DIM[1])

    # Find match start timestamps
    ###########################################################################
    sec_clip_frames_buffer = collections.deque()
    vs_sec_clip_frames = []
    sec_matches = []
    match_titles = []
    next_sec = 0

    for sec, clip_frame in clip.iter_frames(with_times=True, logger='bar'):
        '''
        if detect_health(high_res_clip, sec):
            print("hit")
        '''
        while sec_clip_frames_buffer:
            (buffer_sec, _) = sec_clip_frames_buffer[0]
            if sec - buffer_sec > CLIP_FRAME_BUFFER_MAX_SECS:
                sec_clip_frames_buffer.popleft()
            else:
                break
        sec_clip_frames_buffer.append((sec, clip_frame))

        if sec < next_sec:
            continue
Exemplo n.º 25
0
def save_results_video(loader, model, args):
    from moviepy.video.io.VideoFileClip import VideoFileClip
    model.eval()
    with torch.no_grad():
        for i, video_paths in enumerate(
                loader):  # Must be a batch size of 1 (video)
            print(f"Processing {i} of {len(loader)}: {video_paths[0]}")
            clip = VideoFileClip(video_paths[0])
            for frame_id, frame in enumerate(
                    clip.iter_frames()):  # HxWx3 numpy array
                frame = transforms.ToTensor()(frame)
                if args.gpu is not None:
                    images = [frame.cuda(args.gpu, non_blocking=True)]

                outputs = model(images)

                # Plots:
                # do_plot = True
                do_plot = False
                if do_plot:
                    import visualize_maskrcnn_predictions as vis_preds
                    top_predictions = vis_preds.select_top_predictions(
                        outputs[0], 0.7)
                    top_predictions = {
                        k: v.cpu()
                        for k, v in top_predictions.items()
                    }
                    cv_img = (images[0].cpu().numpy().transpose(
                        (1, 2, 0)) * 255).astype("uint8")
                    result = cv_img.copy()
                    result = vis_preds.overlay_boxes(result, top_predictions)
                    result = vis_preds.overlay_keypoints(
                        result, top_predictions)
                    result = vis_preds.overlay_class_names(
                        result, top_predictions)
                    plt.imshow(result)
                    plt.show()

                for j, output in enumerate(outputs):
                    keypoints_all = output["keypoints"].cpu(
                    )  # Nx17x3 (most confident detection first)
                    keypoints_scores_all = output["keypoints_scores"].cpu(
                    )  # Nx17 (most confident detection first)
                    boxes = output["boxes"].cpu()  # Nx4
                    labels = output["labels"].cpu()  # N
                    scores = output["scores"].cpu()  # N
                    video_path = video_paths[j]

                    # If using small dataset, scale keypoints by 3 to get original coordinates
                    if 'small' in args.data:
                        keypoints_all[:, :, :2] *= args.image_scale_factor

                    # Rearrange into dictionary for writing:
                    # OPENPOSE ANNOTATION STYLE
                    people = []
                    for k, keypoints in enumerate(keypoints_all):
                        if labels[k] == 1:
                            keypoint_visibility = keypoints[:, 2].tolist()
                            keypoints_scores = keypoints_scores_all[k, :]
                            keypoints[:,
                                      2] = keypoints_scores  # Swap visibility and keypoint scores
                            keypoints = keypoints.reshape(-1).tolist()
                            box = boxes[k].tolist()
                            score = scores[k].item()
                            person = {
                                "person_id": [-1],
                                "pose_keypoints_2d": keypoints,
                                "keypoint_visibility": keypoint_visibility,
                                "boxes": box,
                                "score": score
                            }
                            people.append(person)
                    if not people:  # Output all zero if no detection
                        people = [{
                            "person_id": [-1],
                            "pose_keypoints_2d": [0.] * 17 * 3,
                            "keypoint_visibility": [0.] * 17,
                            "boxes": [0.] * 4,
                            "score": 0.
                        }]
                        print(
                            f"No people detected in frame {frame_id} of {video_path}"
                        )
                    output_dict = {}
                    output_dict["format"] = "ikea"
                    output_dict["people"] = people

                    video_path_split = video_path.split(
                        '/'
                    )  # eg <root>/Lack_TV_Bench/0007_white_floor_08_04_2019_08_28_10_47/dev3/images/scan_video.avi
                    furniture_type = video_path_split[-5]
                    experiment_id = video_path_split[-4]
                    cam_id = video_path_split[-3]
                    json_name = f"scan_video_000000{frame_id:06d}_keypoints.json"

                    output_path = os.path.join(args.out_data_dir,
                                               furniture_type, experiment_id,
                                               cam_id, 'predictions', 'pose2d',
                                               'keypoint_rcnn_ft_all')
                    os.makedirs(output_path, exist_ok=True)
                    json_file = os.path.join(output_path, json_name)
                    # print(f"Writing: {json_file}")
                    with open(json_file, 'w') as f:
                        json.dump(output_dict, f)

            clip.close()
Exemplo n.º 26
0
    if args.export_type != 0:
        video_writer = ffmpeg_writer.FFMPEG_VideoWriter(
            f'images/outputs/{out_dir}/{video_name}.mp4',
            video_clip.size,
            video_clip.fps,
            codec="libx264",
            preset="medium",
            bitrate="2000k",
            audiofile=None,
            threads=None,
            ffmpeg_params=None)

    try:
        fnum = 0
        stylized_frames = []
        for frame in tqdm.tqdm(video_clip.iter_frames(),
                               desc="Processing frames"):
            outframe = stylizer.stylize_with_octaves(
                frame, args.max_size, args.overlap, args.octave_num, args.
                octave_scale) if args.octave_num else stylizer.stylize_image(
                    frame, args.max_size, args.overlap)
            if args.export_type != 0:
                video_writer.write_frame(outframe)
            if args.export_type != 1:
                save(f"{out_dir}/frame_{fnum}.jpg",
                     np.asarray(outframe, dtype='float32'))
                save(f"{out_dir}/latest.jpg",
                     np.asarray(outframe, dtype='float32')
                     )  #open image viewer on this to see video progress along
            fnum += 1
Exemplo n.º 27
0
    high_res_clip = VideoFileClip(
        args.tmp_filepath,
        audio=False,
        resize_algorithm='fast_bilinear',
    )


    # Find match start timestamps
    ###########################################################################
    sec_clip_frames_buffer = collections.deque()
    vs_sec_clip_frames = []
    sec_matches = []
    match_titles = []
    next_sec = 0

    for sec, clip_frame in clip.iter_frames(with_times=True):
        while sec_clip_frames_buffer:
            (buffer_sec, _) = sec_clip_frames_buffer[0]
            if sec - buffer_sec > CLIP_FRAME_BUFFER_MAX_SECS:
                sec_clip_frames_buffer.popleft()
            else:
                break
        sec_clip_frames_buffer.append((sec, clip_frame))

        if sec < next_sec:
            continue

        clip_frame_img = clip_frame_to_image(clip_frame)

        # Detect VS splash screen start/continuing
        vs_img_hash_diff = imagehash.average_hash(clip_frame_img.crop(box=VS_IMAGE_BOX)) - VS_IMAGE_HASH
Exemplo n.º 28
0
Arquivo: core.py Projeto: Bomme/openl3
def process_video_file(filepath,
                       output_dir=None,
                       suffix=None,
                       audio_model=None,
                       image_model=None,
                       input_repr="mel256",
                       content_type="music",
                       audio_embedding_size=6144,
                       audio_center=True,
                       audio_hop_size=0.1,
                       image_embedding_size=8192,
                       audio_batch_size=32,
                       image_batch_size=32,
                       overwrite=False,
                       verbose=True):
    """
    Computes and saves L3 audio and video frame embeddings for a given video file

    Note that image embeddings are computed for every frame of the video. Also
    note that embeddings for the audio and images are not temporally aligned.
    Please refer to the timestamps in the output files for the corresponding
    timestamps for each set of embeddings.

    Parameters
    ----------
    filepath : str or list[str]
        Path or list of paths to video file(s) to be processed.
    output_dir : str or None
        Path to directory for saving output files. If None, output files will
        be saved to the directory containing the input file.
    suffix : str or None
        String to be appended to the output filename,
        i.e. <base filename>_<modality>_<suffix>.npz.
        If None, then no suffix will be added,
        i.e. <base filename>_<modality>.npz.
    audio_model : keras.models.Model or None
        Loaded audio model object. If a model is provided, then `input_repr`,
        `content_type`, and `embedding_size` will be ignored.
        If None is provided, the model will be loaded using
        the provided values of `input_repr`, `content_type` and
        `embedding_size`.
    image_model : keras.models.Model or None
        Loaded audio model object. If a model is provided, then `input_repr`,
        `content_type`, and `embedding_size` will be ignored.
        If None is provided, the model will be loaded using
        the provided values of `input_repr`, `content_type` and
        `embedding_size`.
    input_repr : "linear", "mel128", or "mel256"
        Spectrogram representation used for audio model. Ignored if `model` is
        a valid Keras model.
    content_type : "music" or "env"
        Type of content used to train the embedding model. Ignored if `model` is
        a valid Keras model.
    audio_embedding_size : 6144 or 512
        Audio embedding dimensionality. Ignored if `model` is a valid Keras model.
    audio_center : boolean
        If True, pads beginning of audio signal so timestamps correspond
        to center of window.
    audio_hop_size : float
        Hop size in seconds.
    image_embedding_size : 8192 or 512
        Video frame embedding dimensionality. Ignored if `model` is a valid Keras model.
    audio_batch_size : int
        Batch size used for input to audio embedding model
    image_batch_size : int
        Batch size used for input to image embedding model
    overwrite : bool
        If True, overwrites existing output files
    verbose : bool
        If True, prints verbose messages.

    Returns
    -------

    """
    if isinstance(filepath, str):
        filepath_list = [filepath]
    elif isinstance(filepath, list):
        filepath_list = filepath
    else:
        err_msg = 'filepath should be type str or list[str], but got {}.'
        raise OpenL3Error(err_msg.format(filepath))

    # Load models
    if not audio_model:
        audio_model = load_audio_embedding_model(input_repr, content_type,
                                                 audio_embedding_size)
    if not image_model:
        image_model = load_image_embedding_model(input_repr, content_type,
                                                 image_embedding_size)

    audio_suffix, image_suffix = "audio", "image"
    if suffix:
        audio_suffix += "_" + suffix
        image_suffix += "_" + suffix

    audio_list = []
    sr_list = []
    audio_batch_filepath_list = []
    total_audio_batch_size = 0

    image_list = []
    frame_rate_list = []
    image_batch_filepath_list = []

    num_files = len(filepath_list)
    for file_idx, filepath in enumerate(filepath_list):

        if not os.path.exists(filepath):
            raise OpenL3Error('File "{}" could not be found.'.format(filepath))

        if verbose:
            print("openl3: Processing {} ({}/{})".format(
                filepath, file_idx + 1, num_files))

        # Skip if overwriting isn't enabled and output file exists
        audio_output_path = get_output_path(filepath,
                                            audio_suffix + ".npz",
                                            output_dir=output_dir)
        image_output_path = get_output_path(filepath,
                                            image_suffix + ".npz",
                                            output_dir=output_dir)
        skip_audio = os.path.exists(audio_output_path) and not overwrite
        skip_image = os.path.exists(image_output_path) and not overwrite

        if skip_audio and skip_image:
            err_msg = "openl3: {} and {} exist, skipping."
            print(err_msg.format(audio_output_path, image_output_path))
            continue

        try:
            clip = VideoFileClip(filepath,
                                 target_resolution=(256, 256),
                                 audio_fps=TARGET_SR)
            audio = clip.audio.to_soundarray(fps=TARGET_SR)
            images = np.array([frame for frame in clip.iter_frames()])
        except Exception:
            err_msg = 'Could not open file "{}":\n{}'
            raise OpenL3Error(err_msg.format(filepath, traceback.format_exc()))

        if not skip_audio:
            audio_list.append(audio)
            sr_list.append(TARGET_SR)
            audio_batch_filepath_list.append(filepath)
            audio_len = audio.shape[0]
            audio_hop_length = int(audio_hop_size * TARGET_SR)
            num_windows = 1 + max(
                ceil((audio_len - TARGET_SR) / float(audio_hop_length)), 0)
            total_audio_batch_size += num_windows
        else:
            err_msg = "openl3: {} exists, skipping audio embedding extraction."
            print(err_msg.format(audio_output_path))

        if not skip_image:
            image_list.append(images)
            frame_rate_list.append(int(clip.fps))
            image_batch_filepath_list.append(filepath)
        else:
            err_msg = "openl3: {} exists, skipping image embedding extraction."
            print(err_msg.format(image_output_path))

        if (total_audio_batch_size >= audio_batch_size
                or file_idx == (num_files - 1)) and len(audio_list) > 0:
            embedding_list, ts_list \
                = get_audio_embedding(audio_list, sr_list, model=audio_model,
                                      input_repr=input_repr,
                                      content_type=content_type,
                                      embedding_size=audio_embedding_size,
                                      center=audio_center,
                                      hop_size=audio_hop_size,
                                      batch_size=audio_batch_size,
                                      verbose=verbose)
            for fpath, embedding, ts in zip(audio_batch_filepath_list,
                                            embedding_list, ts_list):
                output_path = get_output_path(fpath,
                                              audio_suffix + ".npz",
                                              output_dir=output_dir)

                np.savez(output_path, embedding=embedding, timestamps=ts)
                assert os.path.exists(output_path)

                if verbose:
                    print("openl3: Saved {}".format(output_path))

            audio_list = []
            sr_list = []
            audio_batch_filepath_list = []
            total_audio_batch_size = 0

        if (len(image_list) >= image_batch_size
                or file_idx == (num_files - 1)) and len(image_list) > 0:
            embedding_list, ts_list \
                = get_image_embedding(image_list, frame_rate_list,
                                      model=image_model, input_repr=input_repr,
                                      content_type=content_type,
                                      embedding_size=image_embedding_size,
                                      batch_size=image_batch_size,
                                      verbose=verbose)
            for fpath, embedding, ts in zip(image_batch_filepath_list,
                                            embedding_list, ts_list):
                output_path = get_output_path(fpath,
                                              image_suffix + ".npz",
                                              output_dir=output_dir)

                np.savez(output_path, embedding=embedding, timestamps=ts)
                assert os.path.exists(output_path)

                if verbose:
                    print("openl3: Saved {}".format(output_path))

            image_list = []
            frame_rate_list = []
            image_batch_filepath_list = []
def ffwd_video(path_in,
               path_out,
               checkpoint_dir,
               device_t='/gpu:0',
               batch_size=4):
    video_clip = VideoFileClip(path_in, audio=False)

    # Create a temporary file to store the audio.
    fp = tempfile.NamedTemporaryFile(suffix='.aac')
    temp_audio_file_name = fp.name
    fp.close()

    # Create a temporary file to store the video.
    fp = tempfile.NamedTemporaryFile(suffix='.mp4')
    temp_video_file_name = fp.name
    fp.close()

    # Extract the audio.
    ffmpeg_tools.ffmpeg_extract_audio(path_in, temp_audio_file_name)

    video_writer = ffmpeg_writer.FFMPEG_VideoWriter(
        temp_video_file_name,
        video_clip.size,
        video_clip.fps,
        codec="libx264",
        preset="medium",
        audiofile=None,
        threads=None,
        ffmpeg_params=["-b:v", "2000k"])

    g = tf.Graph()
    soft_config = tf.compat.v1.ConfigProto(allow_soft_placement=True)
    soft_config.gpu_options.allow_growth = True
    with g.as_default(), g.device(device_t), \
         tf.compat.v1.Session(config=soft_config) as sess:
        batch_shape = (batch_size, video_clip.size[1], video_clip.size[0], 3)
        img_placeholder = tf.compat.v1.placeholder(tf.float32,
                                                   shape=batch_shape,
                                                   name='img_placeholder')

        preds = src.transform.net(img_placeholder)
        saver = tf.compat.v1.train.Saver()
        if os.path.isdir(checkpoint_dir):
            ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)
            else:
                raise Exception("No checkpoint found...")
        else:
            saver.restore(sess, checkpoint_dir)

        X = np.zeros(batch_shape, dtype=np.float32)

        def style_and_write(count):
            for i in range(count, batch_size):
                X[i] = X[count - 1]  # Use last frame to fill X
            _preds = sess.run(preds, feed_dict={img_placeholder: X})
            for i in range(0, count):
                video_writer.write_frame(
                    np.clip(_preds[i], 0, 255).astype(np.uint8))

        frame_count = 0  # The frame count that written to X
        for frame in video_clip.iter_frames():
            X[frame_count] = frame
            frame_count += 1
            if frame_count == batch_size:
                style_and_write(frame_count)
                frame_count = 0

        if frame_count != 0:
            style_and_write(frame_count)

        video_writer.close()

        # Merge audio and video
        ffmpeg_tools.ffmpeg_merge_video_audio(temp_video_file_name,
                                              temp_audio_file_name, path_out)

        # Delete temporary files
        os.remove(temp_video_file_name)
        os.remove(temp_audio_file_name)
Exemplo n.º 30
0
def ffwd_video(path_in,
               path_out,
               checkpoint_dir,
               device_t='/gpu:0',
               batch_size=4):
    ''' feed forward video '''

    # defining video rendering variables
    video_clip = VideoFileClip(path_in, audio=False)
    video_writer = ffmpeg_writer.FFMPEG_VideoWriter(path_out,
                                                    video_clip.size,
                                                    video_clip.fps,
                                                    codec='libx264',
                                                    preset='medium',
                                                    bitrate='2000k',
                                                    audiofile=path_in,
                                                    threads=None,
                                                    ffmpeg_params=None)

    # defining tensorflow variables
    g = tf.Graph()
    soft_config = tf.ConfigProto(allow_soft_placement=True)
    soft_config.gpu_options.allow_growth = True

    # starting the tensorflow session
    with g.as_default(), g.device(device_t), tf.Session(
            config=soft_config) as sess:

        batch_shape = (batch_size, video_clip.size[1], video_clip.size[0], 3)

        # defining placeholder
        vid_ph = tf.placeholder(tf.float32, shape=batch_shape, name='vid_ph')

        # forward propogation (building the graph)
        preds = transform_net.net(vid_ph)

        # defining saver
        saver = tf.train.Saver()

        # restoring the saved model

        if os.path.isdir(checkpoint_dir):
            ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)
            else:
                raise Exception("No checkpoint found...")
        else:
            saver.restore(sess, checkpoint_dir)

        x = np.zeros(batch_shape, dtype=np.float32)

        # function to generate styled video (batch images) and writing
        def style_and_write(count):

            # for batch size not complete case
            for i in range(count, batch_size):
                x[i] = x[
                    count -
                    1]  # using last frame(received from .iter_frames) to fill remaing x (batch size not complete case)

            # running the graph to style video
            _preds = sess.run(preds, feed_dict={vid_ph: x})

            for i in range(0, count):
                video_writer.write_frame(
                    np.clip(_preds, 0, 255).astype(np.uint8))

        frame_count = 0  # the frame count written to x

        for frame in video_clip.iter_frames():

            x[frame_count] = frame
            frame_count += 1

            if frame_count == batch_size:

                style_and_write(frame_count)
                frame_count = 0

        # for last batch where no of images is less than the batch_size
        if frame_count != 0:
            style_and_write(frame_count)

        video_writer.close()
Exemplo n.º 31
0
def stylize_objects(seg_model_path, orig_path_in, style_path_in, path_out,
                    device_t="/gpu:0", target_class=1):
    """Generates a video where objects are segmented out and stylized. An
    outline is also drawn around the person and noise is added in proportion to
    the amount of base.

    Parameters
    ----------
    seg_model_path : str
        The path to the segmentation model. Should be a .pb file.
    orig_path_in : str
        The path to the original un-stylized video file.
    style_path_in : str
        The path to the stylized video file.
    path_out : str
        The path to save the new video with only the objects stylized.
    device_t : str, optional
        The device to run the network on.
    target_class : int, optional
        The target you want generate masks for and stylize.

    Example
    -------
    stylize_objects("models/model.pb", "video.mp4", "inter_styled_video.mp4",
                    "styled_video.mp4")
    """
    video_clip = VideoFileClip(orig_path_in, audio=True)
    style_video_clip = VideoFileClip(style_path_in, audio=False)
    video_writer = ffmpeg_writer.FFMPEG_VideoWriter(path_out, video_clip.size,
                                                    video_clip.fps,
                                                    codec="libx264",
                                                    preset="medium",
                                                    bitrate="2000k",
                                                    audiofile=orig_path_in,
                                                    threads=None,
                                                    ffmpeg_params=None)
    ch1, ch2 = get_base_bumps(video_clip)

    # load model
    g = tf.Graph()
    with g.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(seg_model_path, "rb") as f:
            serialized_graph = f.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name="")

    # code adapted from https://github.com/tensorflow/models/blob/master/research/object_detection/object_detection_tutorial.ipynb
    with g.as_default(), g.device(device_t), tf.Session() as sess:
        ops = tf.get_default_graph().get_operations()
        all_tensor_names = {output.name for op in ops for output in op.outputs}
        tensor_dict = {}
        for key in ["num_detections", "detection_boxes", "detection_scores",
                    "detection_classes", "detection_masks"]:
            tensor_name = key + ":0"
            if tensor_name in all_tensor_names:
                tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
                    tensor_name)

        # The following processing is only for single image
        detection_boxes = tf.squeeze(tensor_dict["detection_boxes"], [0])
        detection_masks = tf.squeeze(tensor_dict["detection_masks"], [0])
        # Reframe is required to translate mask from box coordinates to image
        # coordinates and fit the image size.
        real_num_detection = tf.cast(tensor_dict["num_detections"][0], tf.int32)
        detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
        detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
        detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
            detection_masks, detection_boxes, video_clip.size[1],
            video_clip.size[0])
        detection_masks_reframed = tf.cast(
            tf.greater(detection_masks_reframed, 0.5), tf.uint8)
        # Follow the convention by adding back the batch dimension
        tensor_dict["detection_masks"] = tf.expand_dims(
            detection_masks_reframed, 0)

        image_tensor = tf.get_default_graph().get_tensor_by_name("image_tensor:0")

        pbar = tqdm(total=int(video_clip.fps * video_clip.duration))
        for i, (frame, style_frame), in enumerate(zip(video_clip.iter_frames(),
                                                      style_video_clip.iter_frames())):
            output_dict = sess.run(tensor_dict,
                                   feed_dict={image_tensor:
                                       np.expand_dims(frame, 0)})
            # assume batch size = 1
            classes = output_dict["detection_classes"][0][:int(output_dict["num_detections"][0])]
            # if no target class then have to use a 0 mask
            if target_class not in classes:
                mask = np.zeros((video_clip.size[1], video_clip.size[0]))
                to_style_frame = False
            else:
                mask = merge_classes(output_dict["detection_masks"][0, :, :, :], 1,
                                     classes)
                to_style_frame = True
            mask = draw_random_triangles(mask, size=(ch1[i]*30 + 1e-8))

            outline = Image.fromarray(get_outline(mask))
            mask = Image.fromarray(255*mask)
            nframe = Image.fromarray(frame)
            # can't paste with 0 mask
            if to_style_frame:
                nframe.paste(Image.fromarray(style_frame), mask=mask)
                nframe.paste(outline, mask=outline)

            video_writer.write_frame(nframe)
            pbar.update(1)

        pbar.close()
        video_writer.close()
Exemplo n.º 32
0
        shape = (BATCH_SIZE, video.size[1], video.size[0], 3)
        image = tf.compat.v1.placeholder(tf.float32, shape=shape, name="image")
        pred = transformer.net(image)
        saver = tf.compat.v1.train.Saver()
        saver.restore(sess, STYLE_MODEL)
        images = np.zeros(shape, dtype=np.float32)

        def write(tot):
            for i in range(tot, BATCH_SIZE):
                images[i] = images[i - 1]
            pred_n = sess.run(pred, feed_dict={image: images})
            for i in range(tot):
                styled.append(np.clip(pred_n[i], 0, 255).astype(np.uint8))

        tot = 0
        for frame in video.iter_frames():
            images[tot] = frame
            tot += 1
            if tot == BATCH_SIZE:
                write(tot)
                tot = 0

        if tot != 0:
            write(tot)

    end_time = time.time()

    convert_to_video(styled)

    print(f"Execution time {end_time - start_time}")
    print("The video has been styled!")
def ffwd_video(
        path_in,
        path_out,
        checkpoint_dir,
        device_t='/gpu:0',
        batch_size=4,
        data_format='NHWC',
        num_base_channels=32,  # more cli params
        evaluate=False):
    video_clip = VideoFileClip(path_in, audio=False)
    video_writer = ffmpeg_writer.FFMPEG_VideoWriter(path_out,
                                                    video_clip.size,
                                                    video_clip.fps,
                                                    codec="libx264",
                                                    preset="medium",
                                                    bitrate="2000k",
                                                    audiofile=path_in,
                                                    threads=None,
                                                    ffmpeg_params=None)

    g = tf.Graph()
    soft_config = tf.compat.v1.ConfigProto(allow_soft_placement=True)
    soft_config.gpu_options.allow_growth = True
    with g.as_default(), g.device(device_t), \
            tf.compat.v1.Session(config=soft_config) as sess:
        batch_shape = (batch_size, video_clip.size[1], video_clip.size[0], 3)
        img_placeholder = tf.compat.v1.placeholder(tf.float32,
                                                   shape=batch_shape,
                                                   name='img_placeholder')

        #preds = transform.net(img_placeholder)

        if data_format == 'NHWC':
            #NHWC path
            preds = transform.net(img_placeholder,
                                  data_format=data_format,
                                  num_base_channels=num_base_channels,
                                  evaluate=evaluate)
        else:
            #NCHW path
            img_placeholder_nchw = tf.transpose(a=img_placeholder,
                                                perm=[0, 3, 1, 2])
            preds_nchw = transform.net(img_placeholder_nchw,
                                       data_format=data_format,
                                       num_base_channels=num_base_channels)
            preds = tf.transpose(a=preds_nchw, perm=[0, 2, 3, 1])

        # add output node
        preds = tf.identity(preds, "output")
        #print("tf.identity: {}".format(preds))

        saver = tf.compat.v1.train.Saver()
        if os.path.isdir(checkpoint_dir):
            ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)
            else:
                raise Exception("No checkpoint found...")
        else:
            saver.restore(sess, checkpoint_dir)

        X = np.zeros(batch_shape, dtype=np.float32)

        def style_and_write(count):
            for i in range(count, batch_size):
                X[i] = X[count - 1]  # Use last frame to fill X
            _preds = sess.run(preds, feed_dict={img_placeholder: X})
            for i in range(0, count):
                video_writer.write_frame(
                    np.clip(_preds[i], 0, 255).astype(np.uint8))

        frame_count = 0  # The frame count that written to X
        for frame in video_clip.iter_frames():
            X[frame_count] = frame
            frame_count += 1
            if frame_count == batch_size:
                style_and_write(frame_count)
                frame_count = 0

        if frame_count != 0:
            style_and_write(frame_count)

        video_writer.close()