Пример #1
0
 def save_content(self, video_output, audio_input, audio_output):
     """Concatnate the content shots with ads removed and save to a video file
     
     Arguments:
         video_output {str} -- path of the output video file
         audio_input {str} -- path of the corresponding audio file
         audio_output {str} -- path of the output audio file
     """
     logger.i('Saving content shots to %s...' % video_output)
     frame_width = self.video_reader.width
     frame_height = self.video_reader.height
     self.video_writer = VideoIO(video_output, frame_width, frame_height,
                                 'w')
     self.audio_writer = AudioIO(audio_input, audio_output, 30)
     content, _ = self.get_content_ads_shots()
     for content_shot in content:
         start, end = content_shot
         size = end - start + 1
         logger.i('Writing frames [%d:%d]...' % (start, end))
         self.video_reader.seek(start)
         for i in range(size):
             self.video_writer.write_frame(self.video_reader.read_frame())
         self.audio_writer.copy_frames(start, size)
     self.video_writer.close()
     self.audio_writer.close()
Пример #2
0
 def __init__(self,
              path_video_file,
              frame_width,
              frame_height,
              use_saved=True):
     """Initialize the video segmentation module with a video
     
     Arguments:
         path_video_file {str} -- path of video to segment
         frame_width {int} -- frame width of the video
         frame_height {int} -- frame height of the video
     
     Keyword Arguments:
         use_saved {bool} -- whether to use saved feature matrix file or not (default: {True})
     """
     self.video_reader = VideoIO(path_video_file, frame_width, frame_height)
     feature_matrix_path = VideoSegment.get_feature_matrix_path(
         path_video_file)
     if use_saved and os.path.exists(feature_matrix_path):
         self.feature_matrix_A = np.load(feature_matrix_path)
     else:
         self.feature_matrix_A = VideoSegment.get_feature_matrix(
             self.video_reader)
         np.save(feature_matrix_path, self.feature_matrix_A)
     self.u, self.s, self.vh = np.linalg.svd(self.feature_matrix_A,
                                             full_matrices=False)
     # print('shape(U) = %s, shape(s) = %s, shape(V.T) = %s' % (
     #     self.u.shape, self.s.shape, self.vh.shape))
     self.shot_boundaries = self._segment()
     logger.d('shot_boundaries', self.shot_boundaries)
     self.content_shots, self.ads_shots = self._tag_content_ads()
Пример #3
0
 def __init__(self, path_video_file, frame_width, frame_height, brands_to_detect):
     self.video_path = path_video_file
     self.video_reader = VideoIO(
         path_video_file, frame_width, frame_height)
     self.brands_to_detect = brands_to_detect
     brands_path = {k: brands_to_detect[k]['logo'] for k in brands_to_detect}
     self.logo_detector = LogoDetector(brands_path)
     self.logo_data_in_video = []
     self.logo_first_occurences = dict()
     self._detect()
Пример #4
0
 def __init__(self, audio_path, video_path, width, height, fps):
     self.logo_index = 0
     point_path = str.split(sys.argv[1], '.rgb')[0] + '.pkl'
     if os.path.exists(point_path):
         self.logo_array = pickle.load(open(point_path, "rb"))
         logger.i('Loaded logo positions from % s' % point_path)
     else:
         self.logo_array = []
     self.f = wave.open(audio_path, "rb")
     f = self.f
     self.chunk = int(f.getframerate() / fps)
     self.p = pyaudio.PyAudio()
     p = self.p
     self.stream = p.open(format=p.get_format_from_width(f.getsampwidth()),
                          channels=f.getnchannels(),
                          rate=f.getframerate(),
                          output=True)
     self.data = f.readframes(self.chunk)
     self.audioPath = audio_path
     self.videoPath = video_path
     self.width = width
     self.height = height
     self.fps = fps
     self.ImgLength = width * height * 3
     self.video_io = VideoIO(video_path, width, height)
     self.frameNum = self.video_io.get_num_frames()
     self.index = 0
     self.root = Tk()
     self.state = 0
     self.old = -1
     self.pilImage = self.video_io.read_frame(self.index)
     self.tkImage = ImageTk.PhotoImage(image=self.pilImage)
     self.label2 = Label(self.root, image=self.tkImage)
     self.label2.pack()
     self.button_fast_backword = Button(self.root,
                                        text='<<',
                                        command=self.fast_backword)
     self.button_play = Button(self.root, text='PLAY', command=self.start)
     self.button_pause = Button(self.root, text='PAUSE', command=self.pause)
     self.button_fast_forword = Button(self.root,
                                       text='>>',
                                       command=self.fast_forword)
     self.button_stop = Button(self.root, text='STOP', command=self.stop)
     self.button_fast_backword.pack(side=LEFT)
     self.button_play.pack(side=LEFT)
     self.button_pause.pack(side=LEFT)
     self.button_fast_forword.pack(side=LEFT)
     self.button_stop.pack(side=LEFT)
     self.label1 = Label(self.root, text="")
     self.label1.pack(side=RIGHT)
     self.root.title('Video Player')
     self.root.mainloop()
Пример #5
0
 def _logo_data_with_ads(self):
     """Generate new logo data (frame indices and polygon areas of logos) in 
     in video with ads inserted, using the old logo data in the no-ads video
     
     Returns:
         list(tuple) -- new logo data
     """
     ads_to_insert = self.logo_first_occurences
     pos_ads_length = []
     for ad_name in ads_to_insert:
         ad = self.brands_to_detect[ad_name]['ad']
         ad_n_frames = VideoIO(
             ad['video'], 
             self.video_reader.width, 
             self.video_reader.height).get_num_frames()
         pos_ads_length.append((ads_to_insert[ad_name], ad_n_frames))
     pos_ads_length = np.array(sorted(pos_ads_length, key=lambda t: t[0]))
     logger.d('pos_ads_length', pos_ads_length)
     logo_data_with_ads = self.logo_data_in_video.copy()
     logger.d('logo_data_in_video', np.array(self.logo_data_in_video, dtype='object'))
     for t in logo_data_with_ads:
         prev_positions = np.where(pos_ads_length[:, 0] < t[0])[0]
         t[0] += sum(pos_ads_length[prev_positions][:, 1])
     logger.d('logo_data_with_ads', np.array(logo_data_with_ads, dtype='object'))
     return logo_data_with_ads
Пример #6
0
 def generate_video_with_ads(self, video_output, audio_input, audio_output):
     """Generate video with ads inserted at the first occurrences of detected logos
     
     Arguments:
         video_output {str} -- the path of desired video output
         audio_input {str} -- the path of the wav input corresponding to self.video_path
         audio_output {str} -- the path of desired wav output along with the video
     """
     video_input = self.video_path
     width = self.video_reader.width
     height = self.video_reader.height
     ads_to_insert = self.logo_first_occurences
     pos_ads = sorted([(ads_to_insert[k], k)
                           for k in ads_to_insert], key=lambda t: t[0])
     output_video_writer = VideoIO(video_output, width, height, 'w')
     output_audio_writer = AudioIO(audio_input, audio_output, 30)
     start = 0
     for pos, ad_name in pos_ads:
         ad = self.brands_to_detect[ad_name]['ad']
         output_video_writer.copy_frames_from(
             video_input, start, pos - start + 1)
         logger.i('Writing frames [%d:%d]...' % (start, pos))
         output_audio_writer.copy_frames(start, pos - start + 1)
         logger.i('Writing ads [%s]...' % ad_name)
         output_video_writer.copy_frames_from(ad['video'])
         output_audio_writer.copy_frames_from(ad['audio'])
         start = pos + 1
     output_video_writer.copy_frames_from(video_input, start)
     output_audio_writer.copy_frames(start)
     output_video_writer.close()
     output_audio_writer.close()
     logger.i('Video with new ads saved to (%s, %s)' % (video_output, audio_output))
     logo_data_with_ads = self._logo_data_with_ads()
     logo_data_path = path_util.get_video_logo_data_path(video_output)
     pickle.dump(logo_data_with_ads, open(logo_data_path, 'wb'))
     logger.i('Logo outlines data saved to %s' % logo_data_path)
Пример #7
0
import os
from video_io import VideoIO
from data import DATASETS
from data import OUPUTS
from PIL import Image

dataset_idx = 0
dataset = DATASETS[dataset_idx]
video_io = VideoIO(OUPUTS[dataset_idx]['new_ads']['video'], dataset['width'],
                   dataset['height'])


def get_temp_dir():
    return 'temp/'


def get_frame_image_file_path(path_video_file, frame_idx):
    dirname = get_temp_dir()
    if not os.path.exists(dirname):
        os.makedirs(dirname)
    filename = os.path.basename(path_video_file)[:-4]
    filename = '%s_%d.jpg' % (filename, frame_idx)
    return os.path.join(dirname, filename)


def get_output_video_path(filename):
    return (get_temp_dir(), '%s.rgb' % filename)


def show(f, save=False):
    img = video_io.read_frame(f)
Пример #8
0
class VideoSegment:
    """Video segmentation module helping to segment and classify content/ad in a video
    """

    FRAME_SIM_MAX = 40
    FRAME_SIM_MIN = 30
    SHOT_SIM_MIN = 0.07

    def __init__(self,
                 path_video_file,
                 frame_width,
                 frame_height,
                 use_saved=True):
        """Initialize the video segmentation module with a video
        
        Arguments:
            path_video_file {str} -- path of video to segment
            frame_width {int} -- frame width of the video
            frame_height {int} -- frame height of the video
        
        Keyword Arguments:
            use_saved {bool} -- whether to use saved feature matrix file or not (default: {True})
        """
        self.video_reader = VideoIO(path_video_file, frame_width, frame_height)
        feature_matrix_path = VideoSegment.get_feature_matrix_path(
            path_video_file)
        if use_saved and os.path.exists(feature_matrix_path):
            self.feature_matrix_A = np.load(feature_matrix_path)
        else:
            self.feature_matrix_A = VideoSegment.get_feature_matrix(
                self.video_reader)
            np.save(feature_matrix_path, self.feature_matrix_A)
        self.u, self.s, self.vh = np.linalg.svd(self.feature_matrix_A,
                                                full_matrices=False)
        # print('shape(U) = %s, shape(s) = %s, shape(V.T) = %s' % (
        #     self.u.shape, self.s.shape, self.vh.shape))
        self.shot_boundaries = self._segment()
        logger.d('shot_boundaries', self.shot_boundaries)
        self.content_shots, self.ads_shots = self._tag_content_ads()

    def _difference_between_frames(self, frame_i, frame_j, kappa=150):
        """Calculate the difference between two frames. The higher difference 
        indicates the more visual differences between the two frames.
        
        Arguments:
            frame_i {int} -- index of first frame
            frame_j {int} -- index of second frame
        
        Keyword Arguments:
            kappa {int} -- an parameter controlling the principal components 
            in the feature vectors (default: {150})
        
        Returns:
            float -- difference between two frames
        """
        s, v = self.s, self.vh.T
        diff_square = 0
        for idx in range(kappa):
            diff_square += s[idx] * (v[frame_i, idx] - v[frame_j, idx])**2
        return math.sqrt(diff_square)

    def _is_shot_boundary(self, diff, frame_i):
        """Detect if a given frame is the last frame of a shot given all neighbor 
        differences in the video frames
        
        Arguments:
            diff {array} -- all the neighbor differences
            frame_i {int} -- index of the frame to detect
        
        Returns:
            bool -- whether the frame is the last frame of a shot
        """
        assert frame_i < len(diff)
        if frame_i == len(diff) - 1 or diff[frame_i + 1] >= self.FRAME_SIM_MAX:
            return True, frame_i + 1
        elif diff[frame_i + 1] < self.FRAME_SIM_MIN:
            return False, frame_i + 1
        else:
            x = frame_i + 2
            while x + 1 < len(diff) and self._difference_between_frames(
                    frame_i + 1, x) >= self.FRAME_SIM_MIN:
                x += 1
            if self._difference_between_frames(frame_i,
                                               x) < self.FRAME_SIM_MAX:
                return True, x + 1
            else:
                return False, frame_i + 1

    def _segment(self):
        """Segment the video file into multiple shots
        
        Returns:
            array -- list of shot boundary frame indices
        """
        shot_boundaries = []
        diff = [0 for _ in range(self.video_reader.get_num_frames())]
        for frame in range(1, len(diff)):
            diff[frame] = self._difference_between_frames(frame,
                                                          frame - 1,
                                                          kappa=60)
        frame = 0
        while frame < len(diff):
            is_shot_boundary, next = self._is_shot_boundary(diff, frame)
            if is_shot_boundary:
                shot_boundaries.append(frame)
            frame = next
        import matplotlib.pyplot as plt
        plt.scatter(np.arange(len(diff)), diff, s=3)
        plt.show()
        return np.array(shot_boundaries, dtype=int)

    def _length_of_psi(self, frame_i):
        v = self.vh.T
        rank_A = np.linalg.matrix_rank(self.feature_matrix_A)
        return np.linalg.norm(v[:rank_A])

    def _length_of_singular_weighted_psi(self, frame_i):
        s, v = self.s, self.vh.T
        rank_A = np.linalg.matrix_rank(self.feature_matrix_A)
        return np.linalg.norm(np.multiply(s[:rank_A], v[:rank_A]))

    # def _avg_length_of_psi(self, start_frame, end_frame):
    #     psi_array = [self._length_of_psi(f)
    #         for f in range(start_frame, end_frame + 1, 2)]
    #     return np.average(psi_array)

    # def _avg_length_of_singular_weighted_psi(self, start_frame, end_frame):
    #     sw_psi_array = [self._length_of_singular_weighted_psi(f)
    #         for f in range(start_frame, end_frame + 1, 2)]
    #     return np.average(sw_psi_array)

    def _avg_feature_vector(self, start_frame, end_frame):
        """Get an average feature vector for a series of consecutive frames
        
        Arguments:
            start_frame {int} -- index of the first frame in the frames
            end_frame {int} -- index of the last frame in the frames
        
        Returns:
            array -- average feature vector
        """
        v = self.vh.T
        vectors = v[start_frame:end_frame + 1]
        return np.average(vectors, axis=0)

    def _calc_shots_differences(self):
        """Calculate the differences between every each two shots
        
        Returns:
            array -- the differences between every each two shots
        """
        assert self.shot_boundaries is not None
        shot_first_frame = 0
        shot_vectors = []
        for shot_last_frame in self.shot_boundaries:
            avg_feature_vector = self._avg_feature_vector(
                shot_first_frame, shot_last_frame)
            shot_vectors.append(avg_feature_vector)
            shot_first_frame = shot_last_frame + 1
        len_shots = len(self.shot_boundaries)
        differences = [[
            np.linalg.norm(shot_vectors[i] - shot_vectors[j])
            for j in range(len_shots)
        ] for i in range(len_shots)]
        import matplotlib.pyplot as plt
        plt.imshow(differences)
        plt.show()
        return differences

    def _get_shot(self, shot_idx):
        """Get the first and last frame indices in a shot 
        
        Arguments:
            shot_idx {int} -- index of the shot
        
        Returns:
            tupe(int, int) -- the first and last indices in the shot
        """
        assert self.shot_boundaries is not None
        if shot_idx == 0:
            return 0, self.shot_boundaries[shot_idx]
        else:
            return self.shot_boundaries[shot_idx -
                                        1] + 1, self.shot_boundaries[shot_idx]

    def _get_shot_duration(self, shot_idx):
        """Get the number of frames in a shot
        
        Arguments:
            shot_idx {int} -- index of the shot
        
        Returns:
            int -- number of frames in the shot
        """
        start, end = self._get_shot(shot_idx)
        return end - start + 1

    def _get_shot_set_duration(self, shot_indices):
        """Get the number of frames in a shot
        
        Arguments:
            shot_indices {array} -- indices of the shots
        
        Returns:
            int -- number of frames in the shots
        """
        shots_duration = [self._get_shot_duration(i) for i in shot_indices]
        return np.sum(shots_duration)

    def _get_longest_shot_idx(self):
        """Get the longest shot in the shots
        
        Returns:
            int -- index of the longest shot
        """
        shot_lengths = [
            self._get_shot_duration(i)
            for i in range(len(self.shot_boundaries))
        ]
        return np.argmax(shot_lengths)

    def _tag_content_ads(self, threshold=SHOT_SIM_MIN):
        """Tag content/ad to the shots
        
        Keyword Arguments:
            threshold {float} -- a parameter for tagging (default: {SHOT_SIM_MIN})
        
        Returns:
            tupe(list, list) -- indices of content shots and ad shots
        """
        # we assume the longest shot in a video is not ad
        _longest_shot_idx = self._get_longest_shot_idx()
        logger.d('_longest_shot_idx', _longest_shot_idx)
        _similarity = np.array(
            self._calc_shots_differences()[_longest_shot_idx])
        logger.d('_similarity', _similarity)
        _one_class = np.where(_similarity < threshold)[0]
        _other_class = np.where(_similarity >= threshold)[0]
        if self._get_shot_set_duration(
                _one_class) > self._get_shot_set_duration(_other_class):
            return _one_class, _other_class
        else:
            return _other_class, _one_class

    def get_all_shots(self):
        """Get the first and last frames of all shots
        
        Returns:
            list(tupe(int,int)) -- first and last frames of all shots
        """
        return [self._get_shot(i) for i in range(len(self.shot_boundaries))]

    def get_content_ads_shots(self):
        """Get the first and last frames of content and ad shots
        
        Returns:
            list(tupe(int,int)), list(tupe(int,int)) -- first and last frames of content and ad shots
        """
        content_shots = [self._get_shot(i) for i in self.content_shots]
        ads_shots = [self._get_shot(i) for i in self.ads_shots]
        return np.array(content_shots,
                        dtype='int,int'), np.array(ads_shots, dtype='int,int')

    def save_content(self, video_output, audio_input, audio_output):
        """Concatnate the content shots with ads removed and save to a video file
        
        Arguments:
            video_output {str} -- path of the output video file
            audio_input {str} -- path of the corresponding audio file
            audio_output {str} -- path of the output audio file
        """
        logger.i('Saving content shots to %s...' % video_output)
        frame_width = self.video_reader.width
        frame_height = self.video_reader.height
        self.video_writer = VideoIO(video_output, frame_width, frame_height,
                                    'w')
        self.audio_writer = AudioIO(audio_input, audio_output, 30)
        content, _ = self.get_content_ads_shots()
        for content_shot in content:
            start, end = content_shot
            size = end - start + 1
            logger.i('Writing frames [%d:%d]...' % (start, end))
            self.video_reader.seek(start)
            for i in range(size):
                self.video_writer.write_frame(self.video_reader.read_frame())
            self.audio_writer.copy_frames(start, size)
        self.video_writer.close()
        self.audio_writer.close()

    @staticmethod
    def get_feature_matrix_path(path_video_file):
        import os
        dirname = os.path.dirname(path_video_file)
        filename = os.path.basename(path_video_file)
        feature_matrix_filename = '%s_feature.npy' % filename[:-4]
        return os.path.join(dirname, feature_matrix_filename)

    @staticmethod
    def create_binned_histograms(frame, bin=5):
        NUM_CHANNELS = 3
        result = []
        bins = [int(256 / bin * i) for i in range(bin + 1)]
        bins_3d = [bins, bins, bins]
        block_height = int(frame.height / 3)
        block_width = int(frame.width / 3)
        image_arr = np.array(frame)
        for i in range(3):
            for j in range(3):
                block_starting_h = i * block_height
                block_starting_w = j * block_width
                # print("block_starting_h =", block_starting_h)
                # print("block_starting_w =", block_starting_w)
                block = image_arr[block_starting_h:block_starting_h +
                                  block_height,
                                  block_starting_w:block_starting_w +
                                  block_width]
                # print('block =', block)
                hist, _ = np.histogramdd(block.reshape(
                    block_height * block_width, NUM_CHANNELS),
                                         bins=bins_3d)
                # print('hist =', hist.astype(int))
                result.extend(hist.astype(int).flatten())
        return result

    @staticmethod
    def get_feature_matrix(video_io):
        feature_matrix_A = []
        num_frames = video_io.get_num_frames()
        logger.d('Total # of frames', num_frames)
        for i in range(num_frames):
            logger.i('Getting feature matrix for frame #%d...' % i)
            frame = video_io.read_frame()
            if frame is None:
                break
            else:
                feature = VideoSegment.create_binned_histograms(frame)
                feature_matrix_A.append(feature)
        feature_matrix_A = np.transpose(feature_matrix_A)
        logger.d('shape(A)', feature_matrix_A.shape)
        return feature_matrix_A
Пример #9
0
class VideoPlayer:
    def __init__(self, audio_path, video_path, width, height, fps):
        self.logo_index = 0
        point_path = str.split(sys.argv[1], '.rgb')[0] + '.pkl'
        if os.path.exists(point_path):
            self.logo_array = pickle.load(open(point_path, "rb"))
            logger.i('Loaded logo positions from % s' % point_path)
        else:
            self.logo_array = []
        self.f = wave.open(audio_path, "rb")
        f = self.f
        self.chunk = int(f.getframerate() / fps)
        self.p = pyaudio.PyAudio()
        p = self.p
        self.stream = p.open(format=p.get_format_from_width(f.getsampwidth()),
                             channels=f.getnchannels(),
                             rate=f.getframerate(),
                             output=True)
        self.data = f.readframes(self.chunk)
        self.audioPath = audio_path
        self.videoPath = video_path
        self.width = width
        self.height = height
        self.fps = fps
        self.ImgLength = width * height * 3
        self.video_io = VideoIO(video_path, width, height)
        self.frameNum = self.video_io.get_num_frames()
        self.index = 0
        self.root = Tk()
        self.state = 0
        self.old = -1
        self.pilImage = self.video_io.read_frame(self.index)
        self.tkImage = ImageTk.PhotoImage(image=self.pilImage)
        self.label2 = Label(self.root, image=self.tkImage)
        self.label2.pack()
        self.button_fast_backword = Button(self.root,
                                           text='<<',
                                           command=self.fast_backword)
        self.button_play = Button(self.root, text='PLAY', command=self.start)
        self.button_pause = Button(self.root, text='PAUSE', command=self.pause)
        self.button_fast_forword = Button(self.root,
                                          text='>>',
                                          command=self.fast_forword)
        self.button_stop = Button(self.root, text='STOP', command=self.stop)
        self.button_fast_backword.pack(side=LEFT)
        self.button_play.pack(side=LEFT)
        self.button_pause.pack(side=LEFT)
        self.button_fast_forword.pack(side=LEFT)
        self.button_stop.pack(side=LEFT)
        self.label1 = Label(self.root, text="")
        self.label1.pack(side=RIGHT)
        self.root.title('Video Player')
        self.root.mainloop()

    def fast_forword(self):
        self.index = self.index + 150  # 5s
        self.old = self.index - 1
        if self.index >= self.frameNum:
            self.index = self.frameNum - 1
        while len(self.logo_array
                  ) > self.logo_index and self.index > self.logo_array[
                      self.logo_index][0]:
            self.logo_index = self.logo_index + 1
        self.start()

    def fast_backword(self):
        self.index = self.index - 150  # 5s
        self.old = self.index - 1
        if self.index < 0:
            self.index = 0
        while self.logo_index > 0 and self.index <= self.logo_array[
                self.logo_index - 1][0]:
            self.logo_index = self.logo_index - 1

    def read_wav(self):
        self.f = wave.open(self.audioPath, "rb")
        f = self.f
        self.p = pyaudio.PyAudio()
        p = self.p
        self.stream = p.open(format=p.get_format_from_width(f.getsampwidth()),
                             channels=f.getnchannels(),
                             rate=f.getframerate(),
                             output=True)

    def videoplayer(self):
        if (self.index >= self.frameNum):
            self.stop()
        if self.state:
            self.root.after(1, self.videoplayer)
        if self.state and self.index > self.old:
            self.old = self.index
            self.pilImage = self.video_io.read_frame(self.index)
            self.label1.configure(text="")
            # (index,logo, points)
            if (len(self.logo_array) > self.logo_index
                    and (self.index - self.logo_array[self.logo_index][0]) < 2
                    and
                (self.index - self.logo_array[self.logo_index][0]) > -1):
                points = np.array(self.logo_array[self.logo_index][2])
                logo_name = self.logo_array[self.logo_index][1]
                self.label1.configure(text=logo_name)
                self.pilImage = cv2.cvtColor(np.asarray(self.pilImage),
                                             cv2.COLOR_RGB2BGR)
                cv2.polylines(self.pilImage, [points], 1, (0, 0, 255), 5)
                self.pilImage = Image.fromarray(
                    cv2.cvtColor(self.pilImage, cv2.COLOR_BGR2RGB))
                logger.i('Showing logo box [%s] at %s' %
                         (logo_name, points.tolist()))
                self.logo_index = self.logo_index + 1
            self.tkImage = ImageTk.PhotoImage(image=self.pilImage)
            self.label2.configure(imag=self.tkImage)

    def audioplayer(self):
        while self.state:
            if int(self.index * self.f.getframerate() /
                   self.fps) > self.f.getnframes():
                self.stop()
            else:
                self.f.setpos(
                    int(self.index * self.f.getframerate() / self.fps))
                self.data = self.f.readframes(self.chunk)
                self.index = self.index + 1
                self.stream.write(self.data)

    def start(self):
        logger.d("start")
        if self.state == 0:
            self.state = 1
            try:
                _thread.start_new_thread(self.videoplayer, ())
                _thread.start_new_thread(self.audioplayer, ())
            except:
                logger.e("Error: unable to start thread")

    def pause(self):
        logger.d("pause")
        self.state = 0

    def stop(self):
        logger.d("stop")
        self.state = 0
        self.old = -1
        self.index = 0
        self.logo_index = 0
        self.pilImage = self.video_io.read_frame(self.index)
        self.tkImage = ImageTk.PhotoImage(image=self.pilImage)
        self.label2.configure(imag=self.tkImage)
        self.root.update_idletasks()
        self.read_wav()
Пример #10
0
class VideoLogoDetect:

    SKIP_FRAME = 2

    def __init__(self, path_video_file, frame_width, frame_height, brands_to_detect):
        self.video_path = path_video_file
        self.video_reader = VideoIO(
            path_video_file, frame_width, frame_height)
        self.brands_to_detect = brands_to_detect
        brands_path = {k: brands_to_detect[k]['logo'] for k in brands_to_detect}
        self.logo_detector = LogoDetector(brands_path)
        self.logo_data_in_video = []
        self.logo_first_occurences = dict()
        self._detect()

    def _detect(self):
        """Detect logos in all frames 
        """
        while self.video_reader.get_next_frame_idx() < self.video_reader.get_num_frames():
            frame_idx = self.video_reader.get_next_frame_idx()
            logger.d('Detecting logo in %d' % frame_idx)
            pil_image = self.video_reader.read_frame()
            brand_areas = self.logo_detector.detect(pil_image)
            if len(brand_areas) == 0:
                logger.i('Frame[%d]: no logo detected' % frame_idx)
            else:
                for logo_name, logo_poly in brand_areas:
                    logger.i('Frame[%d]: logo [%s] at area %s' % (
                        frame_idx, logo_name, logo_poly.tolist()))
                    self.logo_data_in_video.append(
                        [frame_idx, logo_name, logo_poly.tolist()])
                    if logo_name not in self.logo_first_occurences:
                        self.logo_first_occurences[logo_name] = frame_idx
            self.video_reader.skip_frame(VideoLogoDetect.SKIP_FRAME)
        logo_data_path = path_util.get_video_logo_data_path(self.video_path)
        pickle.dump(self.logo_data_in_video, open(logo_data_path, 'wb'))
        logger.i('Logo detection data saved to %s' % logo_data_path)
    
    def _logo_data_with_ads(self):
        """Generate new logo data (frame indices and polygon areas of logos) in 
        in video with ads inserted, using the old logo data in the no-ads video
        
        Returns:
            list(tuple) -- new logo data
        """
        ads_to_insert = self.logo_first_occurences
        pos_ads_length = []
        for ad_name in ads_to_insert:
            ad = self.brands_to_detect[ad_name]['ad']
            ad_n_frames = VideoIO(
                ad['video'], 
                self.video_reader.width, 
                self.video_reader.height).get_num_frames()
            pos_ads_length.append((ads_to_insert[ad_name], ad_n_frames))
        pos_ads_length = np.array(sorted(pos_ads_length, key=lambda t: t[0]))
        logger.d('pos_ads_length', pos_ads_length)
        logo_data_with_ads = self.logo_data_in_video.copy()
        logger.d('logo_data_in_video', np.array(self.logo_data_in_video, dtype='object'))
        for t in logo_data_with_ads:
            prev_positions = np.where(pos_ads_length[:, 0] < t[0])[0]
            t[0] += sum(pos_ads_length[prev_positions][:, 1])
        logger.d('logo_data_with_ads', np.array(logo_data_with_ads, dtype='object'))
        return logo_data_with_ads
    
    def generate_video_with_ads(self, video_output, audio_input, audio_output):
        """Generate video with ads inserted at the first occurrences of detected logos
        
        Arguments:
            video_output {str} -- the path of desired video output
            audio_input {str} -- the path of the wav input corresponding to self.video_path
            audio_output {str} -- the path of desired wav output along with the video
        """
        video_input = self.video_path
        width = self.video_reader.width
        height = self.video_reader.height
        ads_to_insert = self.logo_first_occurences
        pos_ads = sorted([(ads_to_insert[k], k)
                              for k in ads_to_insert], key=lambda t: t[0])
        output_video_writer = VideoIO(video_output, width, height, 'w')
        output_audio_writer = AudioIO(audio_input, audio_output, 30)
        start = 0
        for pos, ad_name in pos_ads:
            ad = self.brands_to_detect[ad_name]['ad']
            output_video_writer.copy_frames_from(
                video_input, start, pos - start + 1)
            logger.i('Writing frames [%d:%d]...' % (start, pos))
            output_audio_writer.copy_frames(start, pos - start + 1)
            logger.i('Writing ads [%s]...' % ad_name)
            output_video_writer.copy_frames_from(ad['video'])
            output_audio_writer.copy_frames_from(ad['audio'])
            start = pos + 1
        output_video_writer.copy_frames_from(video_input, start)
        output_audio_writer.copy_frames(start)
        output_video_writer.close()
        output_audio_writer.close()
        logger.i('Video with new ads saved to (%s, %s)' % (video_output, audio_output))
        logo_data_with_ads = self._logo_data_with_ads()
        logo_data_path = path_util.get_video_logo_data_path(video_output)
        pickle.dump(logo_data_with_ads, open(logo_data_path, 'wb'))
        logger.i('Logo outlines data saved to %s' % logo_data_path)
Пример #11
0
import path_util

from video_io import VideoIO
from logo_detector import LogoDetector
from PIL import Image
from data import DATASETS as DATASETS
from data import BRANDS
from logger import logger

logger.set_level('i')

# initializing
dataset_idx = 0
dataset = DATASETS[dataset_idx]
brands_to_detect = {k: BRANDS[k]['logo'] for k in dataset['brands_to_detect']}
video_io = VideoIO(dataset['video'], dataset['width'], dataset['height'])

# detect
logo_detector = LogoDetector(brands_to_detect)
logo_data_in_video = []
while video_io.get_next_frame_idx() < video_io.get_num_frames():
    frame_idx = video_io.get_next_frame_idx()
    logger.d('Detecting logo in %d' % frame_idx)
    pil_image = video_io.read_frame()
    brand_areas = logo_detector.detect(pil_image)
    if len(brand_areas) == 0:
        logger.i('Frame[%d]: no logo detected' % frame_idx)
    else:
        for logo_name, logo_poly in brand_areas:
            logger.i('Frame[%d]: logo [%s] at area %s' % (
                frame_idx, logo_name, logo_poly.tolist()))
Пример #12
0
from PIL import Image
from test_data import DATASETS as DATASETS
from test_data import BRANDS
import util

MIN_MATCH_COUNT = 10
MIN_RANSAC_MATCH_COUNT = 5

dataset_idx = 2
logo_name = 'ae'
dataset = DATASETS[dataset_idx]
logo_frame = dataset['brand_frames'][logo_name]
logo_frame = 2501
logo_path = BRANDS[logo_name]['logo']

video_io = VideoIO(dataset['video'], dataset['width'], dataset['height'])
pil_image = video_io.read_frame(logo_frame).convert('RGB')
_frame = np.array(pil_image)
frame_img = cv.cvtColor(_frame, cv.COLOR_RGB2GRAY)
_logo_img = cv.imread(logo_path)
logo_img = cv.cvtColor(_logo_img, cv.COLOR_RGB2GRAY)

img1 = logo_img
img2 = frame_img

# do sift
sift = cv.xfeatures2d.SIFT_create(edgeThreshold=10)
kp1, des1 = sift.detectAndCompute(img1, None)
kp2, des2 = sift.detectAndCompute(img2, None)
FLANN_INDEX_KDTREE = 0
index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)