def save_content(self, video_output, audio_input, audio_output): """Concatnate the content shots with ads removed and save to a video file Arguments: video_output {str} -- path of the output video file audio_input {str} -- path of the corresponding audio file audio_output {str} -- path of the output audio file """ logger.i('Saving content shots to %s...' % video_output) frame_width = self.video_reader.width frame_height = self.video_reader.height self.video_writer = VideoIO(video_output, frame_width, frame_height, 'w') self.audio_writer = AudioIO(audio_input, audio_output, 30) content, _ = self.get_content_ads_shots() for content_shot in content: start, end = content_shot size = end - start + 1 logger.i('Writing frames [%d:%d]...' % (start, end)) self.video_reader.seek(start) for i in range(size): self.video_writer.write_frame(self.video_reader.read_frame()) self.audio_writer.copy_frames(start, size) self.video_writer.close() self.audio_writer.close()
def __init__(self, path_video_file, frame_width, frame_height, use_saved=True): """Initialize the video segmentation module with a video Arguments: path_video_file {str} -- path of video to segment frame_width {int} -- frame width of the video frame_height {int} -- frame height of the video Keyword Arguments: use_saved {bool} -- whether to use saved feature matrix file or not (default: {True}) """ self.video_reader = VideoIO(path_video_file, frame_width, frame_height) feature_matrix_path = VideoSegment.get_feature_matrix_path( path_video_file) if use_saved and os.path.exists(feature_matrix_path): self.feature_matrix_A = np.load(feature_matrix_path) else: self.feature_matrix_A = VideoSegment.get_feature_matrix( self.video_reader) np.save(feature_matrix_path, self.feature_matrix_A) self.u, self.s, self.vh = np.linalg.svd(self.feature_matrix_A, full_matrices=False) # print('shape(U) = %s, shape(s) = %s, shape(V.T) = %s' % ( # self.u.shape, self.s.shape, self.vh.shape)) self.shot_boundaries = self._segment() logger.d('shot_boundaries', self.shot_boundaries) self.content_shots, self.ads_shots = self._tag_content_ads()
def __init__(self, path_video_file, frame_width, frame_height, brands_to_detect): self.video_path = path_video_file self.video_reader = VideoIO( path_video_file, frame_width, frame_height) self.brands_to_detect = brands_to_detect brands_path = {k: brands_to_detect[k]['logo'] for k in brands_to_detect} self.logo_detector = LogoDetector(brands_path) self.logo_data_in_video = [] self.logo_first_occurences = dict() self._detect()
def __init__(self, audio_path, video_path, width, height, fps): self.logo_index = 0 point_path = str.split(sys.argv[1], '.rgb')[0] + '.pkl' if os.path.exists(point_path): self.logo_array = pickle.load(open(point_path, "rb")) logger.i('Loaded logo positions from % s' % point_path) else: self.logo_array = [] self.f = wave.open(audio_path, "rb") f = self.f self.chunk = int(f.getframerate() / fps) self.p = pyaudio.PyAudio() p = self.p self.stream = p.open(format=p.get_format_from_width(f.getsampwidth()), channels=f.getnchannels(), rate=f.getframerate(), output=True) self.data = f.readframes(self.chunk) self.audioPath = audio_path self.videoPath = video_path self.width = width self.height = height self.fps = fps self.ImgLength = width * height * 3 self.video_io = VideoIO(video_path, width, height) self.frameNum = self.video_io.get_num_frames() self.index = 0 self.root = Tk() self.state = 0 self.old = -1 self.pilImage = self.video_io.read_frame(self.index) self.tkImage = ImageTk.PhotoImage(image=self.pilImage) self.label2 = Label(self.root, image=self.tkImage) self.label2.pack() self.button_fast_backword = Button(self.root, text='<<', command=self.fast_backword) self.button_play = Button(self.root, text='PLAY', command=self.start) self.button_pause = Button(self.root, text='PAUSE', command=self.pause) self.button_fast_forword = Button(self.root, text='>>', command=self.fast_forword) self.button_stop = Button(self.root, text='STOP', command=self.stop) self.button_fast_backword.pack(side=LEFT) self.button_play.pack(side=LEFT) self.button_pause.pack(side=LEFT) self.button_fast_forword.pack(side=LEFT) self.button_stop.pack(side=LEFT) self.label1 = Label(self.root, text="") self.label1.pack(side=RIGHT) self.root.title('Video Player') self.root.mainloop()
def _logo_data_with_ads(self): """Generate new logo data (frame indices and polygon areas of logos) in in video with ads inserted, using the old logo data in the no-ads video Returns: list(tuple) -- new logo data """ ads_to_insert = self.logo_first_occurences pos_ads_length = [] for ad_name in ads_to_insert: ad = self.brands_to_detect[ad_name]['ad'] ad_n_frames = VideoIO( ad['video'], self.video_reader.width, self.video_reader.height).get_num_frames() pos_ads_length.append((ads_to_insert[ad_name], ad_n_frames)) pos_ads_length = np.array(sorted(pos_ads_length, key=lambda t: t[0])) logger.d('pos_ads_length', pos_ads_length) logo_data_with_ads = self.logo_data_in_video.copy() logger.d('logo_data_in_video', np.array(self.logo_data_in_video, dtype='object')) for t in logo_data_with_ads: prev_positions = np.where(pos_ads_length[:, 0] < t[0])[0] t[0] += sum(pos_ads_length[prev_positions][:, 1]) logger.d('logo_data_with_ads', np.array(logo_data_with_ads, dtype='object')) return logo_data_with_ads
def generate_video_with_ads(self, video_output, audio_input, audio_output): """Generate video with ads inserted at the first occurrences of detected logos Arguments: video_output {str} -- the path of desired video output audio_input {str} -- the path of the wav input corresponding to self.video_path audio_output {str} -- the path of desired wav output along with the video """ video_input = self.video_path width = self.video_reader.width height = self.video_reader.height ads_to_insert = self.logo_first_occurences pos_ads = sorted([(ads_to_insert[k], k) for k in ads_to_insert], key=lambda t: t[0]) output_video_writer = VideoIO(video_output, width, height, 'w') output_audio_writer = AudioIO(audio_input, audio_output, 30) start = 0 for pos, ad_name in pos_ads: ad = self.brands_to_detect[ad_name]['ad'] output_video_writer.copy_frames_from( video_input, start, pos - start + 1) logger.i('Writing frames [%d:%d]...' % (start, pos)) output_audio_writer.copy_frames(start, pos - start + 1) logger.i('Writing ads [%s]...' % ad_name) output_video_writer.copy_frames_from(ad['video']) output_audio_writer.copy_frames_from(ad['audio']) start = pos + 1 output_video_writer.copy_frames_from(video_input, start) output_audio_writer.copy_frames(start) output_video_writer.close() output_audio_writer.close() logger.i('Video with new ads saved to (%s, %s)' % (video_output, audio_output)) logo_data_with_ads = self._logo_data_with_ads() logo_data_path = path_util.get_video_logo_data_path(video_output) pickle.dump(logo_data_with_ads, open(logo_data_path, 'wb')) logger.i('Logo outlines data saved to %s' % logo_data_path)
import os from video_io import VideoIO from data import DATASETS from data import OUPUTS from PIL import Image dataset_idx = 0 dataset = DATASETS[dataset_idx] video_io = VideoIO(OUPUTS[dataset_idx]['new_ads']['video'], dataset['width'], dataset['height']) def get_temp_dir(): return 'temp/' def get_frame_image_file_path(path_video_file, frame_idx): dirname = get_temp_dir() if not os.path.exists(dirname): os.makedirs(dirname) filename = os.path.basename(path_video_file)[:-4] filename = '%s_%d.jpg' % (filename, frame_idx) return os.path.join(dirname, filename) def get_output_video_path(filename): return (get_temp_dir(), '%s.rgb' % filename) def show(f, save=False): img = video_io.read_frame(f)
class VideoSegment: """Video segmentation module helping to segment and classify content/ad in a video """ FRAME_SIM_MAX = 40 FRAME_SIM_MIN = 30 SHOT_SIM_MIN = 0.07 def __init__(self, path_video_file, frame_width, frame_height, use_saved=True): """Initialize the video segmentation module with a video Arguments: path_video_file {str} -- path of video to segment frame_width {int} -- frame width of the video frame_height {int} -- frame height of the video Keyword Arguments: use_saved {bool} -- whether to use saved feature matrix file or not (default: {True}) """ self.video_reader = VideoIO(path_video_file, frame_width, frame_height) feature_matrix_path = VideoSegment.get_feature_matrix_path( path_video_file) if use_saved and os.path.exists(feature_matrix_path): self.feature_matrix_A = np.load(feature_matrix_path) else: self.feature_matrix_A = VideoSegment.get_feature_matrix( self.video_reader) np.save(feature_matrix_path, self.feature_matrix_A) self.u, self.s, self.vh = np.linalg.svd(self.feature_matrix_A, full_matrices=False) # print('shape(U) = %s, shape(s) = %s, shape(V.T) = %s' % ( # self.u.shape, self.s.shape, self.vh.shape)) self.shot_boundaries = self._segment() logger.d('shot_boundaries', self.shot_boundaries) self.content_shots, self.ads_shots = self._tag_content_ads() def _difference_between_frames(self, frame_i, frame_j, kappa=150): """Calculate the difference between two frames. The higher difference indicates the more visual differences between the two frames. Arguments: frame_i {int} -- index of first frame frame_j {int} -- index of second frame Keyword Arguments: kappa {int} -- an parameter controlling the principal components in the feature vectors (default: {150}) Returns: float -- difference between two frames """ s, v = self.s, self.vh.T diff_square = 0 for idx in range(kappa): diff_square += s[idx] * (v[frame_i, idx] - v[frame_j, idx])**2 return math.sqrt(diff_square) def _is_shot_boundary(self, diff, frame_i): """Detect if a given frame is the last frame of a shot given all neighbor differences in the video frames Arguments: diff {array} -- all the neighbor differences frame_i {int} -- index of the frame to detect Returns: bool -- whether the frame is the last frame of a shot """ assert frame_i < len(diff) if frame_i == len(diff) - 1 or diff[frame_i + 1] >= self.FRAME_SIM_MAX: return True, frame_i + 1 elif diff[frame_i + 1] < self.FRAME_SIM_MIN: return False, frame_i + 1 else: x = frame_i + 2 while x + 1 < len(diff) and self._difference_between_frames( frame_i + 1, x) >= self.FRAME_SIM_MIN: x += 1 if self._difference_between_frames(frame_i, x) < self.FRAME_SIM_MAX: return True, x + 1 else: return False, frame_i + 1 def _segment(self): """Segment the video file into multiple shots Returns: array -- list of shot boundary frame indices """ shot_boundaries = [] diff = [0 for _ in range(self.video_reader.get_num_frames())] for frame in range(1, len(diff)): diff[frame] = self._difference_between_frames(frame, frame - 1, kappa=60) frame = 0 while frame < len(diff): is_shot_boundary, next = self._is_shot_boundary(diff, frame) if is_shot_boundary: shot_boundaries.append(frame) frame = next import matplotlib.pyplot as plt plt.scatter(np.arange(len(diff)), diff, s=3) plt.show() return np.array(shot_boundaries, dtype=int) def _length_of_psi(self, frame_i): v = self.vh.T rank_A = np.linalg.matrix_rank(self.feature_matrix_A) return np.linalg.norm(v[:rank_A]) def _length_of_singular_weighted_psi(self, frame_i): s, v = self.s, self.vh.T rank_A = np.linalg.matrix_rank(self.feature_matrix_A) return np.linalg.norm(np.multiply(s[:rank_A], v[:rank_A])) # def _avg_length_of_psi(self, start_frame, end_frame): # psi_array = [self._length_of_psi(f) # for f in range(start_frame, end_frame + 1, 2)] # return np.average(psi_array) # def _avg_length_of_singular_weighted_psi(self, start_frame, end_frame): # sw_psi_array = [self._length_of_singular_weighted_psi(f) # for f in range(start_frame, end_frame + 1, 2)] # return np.average(sw_psi_array) def _avg_feature_vector(self, start_frame, end_frame): """Get an average feature vector for a series of consecutive frames Arguments: start_frame {int} -- index of the first frame in the frames end_frame {int} -- index of the last frame in the frames Returns: array -- average feature vector """ v = self.vh.T vectors = v[start_frame:end_frame + 1] return np.average(vectors, axis=0) def _calc_shots_differences(self): """Calculate the differences between every each two shots Returns: array -- the differences between every each two shots """ assert self.shot_boundaries is not None shot_first_frame = 0 shot_vectors = [] for shot_last_frame in self.shot_boundaries: avg_feature_vector = self._avg_feature_vector( shot_first_frame, shot_last_frame) shot_vectors.append(avg_feature_vector) shot_first_frame = shot_last_frame + 1 len_shots = len(self.shot_boundaries) differences = [[ np.linalg.norm(shot_vectors[i] - shot_vectors[j]) for j in range(len_shots) ] for i in range(len_shots)] import matplotlib.pyplot as plt plt.imshow(differences) plt.show() return differences def _get_shot(self, shot_idx): """Get the first and last frame indices in a shot Arguments: shot_idx {int} -- index of the shot Returns: tupe(int, int) -- the first and last indices in the shot """ assert self.shot_boundaries is not None if shot_idx == 0: return 0, self.shot_boundaries[shot_idx] else: return self.shot_boundaries[shot_idx - 1] + 1, self.shot_boundaries[shot_idx] def _get_shot_duration(self, shot_idx): """Get the number of frames in a shot Arguments: shot_idx {int} -- index of the shot Returns: int -- number of frames in the shot """ start, end = self._get_shot(shot_idx) return end - start + 1 def _get_shot_set_duration(self, shot_indices): """Get the number of frames in a shot Arguments: shot_indices {array} -- indices of the shots Returns: int -- number of frames in the shots """ shots_duration = [self._get_shot_duration(i) for i in shot_indices] return np.sum(shots_duration) def _get_longest_shot_idx(self): """Get the longest shot in the shots Returns: int -- index of the longest shot """ shot_lengths = [ self._get_shot_duration(i) for i in range(len(self.shot_boundaries)) ] return np.argmax(shot_lengths) def _tag_content_ads(self, threshold=SHOT_SIM_MIN): """Tag content/ad to the shots Keyword Arguments: threshold {float} -- a parameter for tagging (default: {SHOT_SIM_MIN}) Returns: tupe(list, list) -- indices of content shots and ad shots """ # we assume the longest shot in a video is not ad _longest_shot_idx = self._get_longest_shot_idx() logger.d('_longest_shot_idx', _longest_shot_idx) _similarity = np.array( self._calc_shots_differences()[_longest_shot_idx]) logger.d('_similarity', _similarity) _one_class = np.where(_similarity < threshold)[0] _other_class = np.where(_similarity >= threshold)[0] if self._get_shot_set_duration( _one_class) > self._get_shot_set_duration(_other_class): return _one_class, _other_class else: return _other_class, _one_class def get_all_shots(self): """Get the first and last frames of all shots Returns: list(tupe(int,int)) -- first and last frames of all shots """ return [self._get_shot(i) for i in range(len(self.shot_boundaries))] def get_content_ads_shots(self): """Get the first and last frames of content and ad shots Returns: list(tupe(int,int)), list(tupe(int,int)) -- first and last frames of content and ad shots """ content_shots = [self._get_shot(i) for i in self.content_shots] ads_shots = [self._get_shot(i) for i in self.ads_shots] return np.array(content_shots, dtype='int,int'), np.array(ads_shots, dtype='int,int') def save_content(self, video_output, audio_input, audio_output): """Concatnate the content shots with ads removed and save to a video file Arguments: video_output {str} -- path of the output video file audio_input {str} -- path of the corresponding audio file audio_output {str} -- path of the output audio file """ logger.i('Saving content shots to %s...' % video_output) frame_width = self.video_reader.width frame_height = self.video_reader.height self.video_writer = VideoIO(video_output, frame_width, frame_height, 'w') self.audio_writer = AudioIO(audio_input, audio_output, 30) content, _ = self.get_content_ads_shots() for content_shot in content: start, end = content_shot size = end - start + 1 logger.i('Writing frames [%d:%d]...' % (start, end)) self.video_reader.seek(start) for i in range(size): self.video_writer.write_frame(self.video_reader.read_frame()) self.audio_writer.copy_frames(start, size) self.video_writer.close() self.audio_writer.close() @staticmethod def get_feature_matrix_path(path_video_file): import os dirname = os.path.dirname(path_video_file) filename = os.path.basename(path_video_file) feature_matrix_filename = '%s_feature.npy' % filename[:-4] return os.path.join(dirname, feature_matrix_filename) @staticmethod def create_binned_histograms(frame, bin=5): NUM_CHANNELS = 3 result = [] bins = [int(256 / bin * i) for i in range(bin + 1)] bins_3d = [bins, bins, bins] block_height = int(frame.height / 3) block_width = int(frame.width / 3) image_arr = np.array(frame) for i in range(3): for j in range(3): block_starting_h = i * block_height block_starting_w = j * block_width # print("block_starting_h =", block_starting_h) # print("block_starting_w =", block_starting_w) block = image_arr[block_starting_h:block_starting_h + block_height, block_starting_w:block_starting_w + block_width] # print('block =', block) hist, _ = np.histogramdd(block.reshape( block_height * block_width, NUM_CHANNELS), bins=bins_3d) # print('hist =', hist.astype(int)) result.extend(hist.astype(int).flatten()) return result @staticmethod def get_feature_matrix(video_io): feature_matrix_A = [] num_frames = video_io.get_num_frames() logger.d('Total # of frames', num_frames) for i in range(num_frames): logger.i('Getting feature matrix for frame #%d...' % i) frame = video_io.read_frame() if frame is None: break else: feature = VideoSegment.create_binned_histograms(frame) feature_matrix_A.append(feature) feature_matrix_A = np.transpose(feature_matrix_A) logger.d('shape(A)', feature_matrix_A.shape) return feature_matrix_A
class VideoPlayer: def __init__(self, audio_path, video_path, width, height, fps): self.logo_index = 0 point_path = str.split(sys.argv[1], '.rgb')[0] + '.pkl' if os.path.exists(point_path): self.logo_array = pickle.load(open(point_path, "rb")) logger.i('Loaded logo positions from % s' % point_path) else: self.logo_array = [] self.f = wave.open(audio_path, "rb") f = self.f self.chunk = int(f.getframerate() / fps) self.p = pyaudio.PyAudio() p = self.p self.stream = p.open(format=p.get_format_from_width(f.getsampwidth()), channels=f.getnchannels(), rate=f.getframerate(), output=True) self.data = f.readframes(self.chunk) self.audioPath = audio_path self.videoPath = video_path self.width = width self.height = height self.fps = fps self.ImgLength = width * height * 3 self.video_io = VideoIO(video_path, width, height) self.frameNum = self.video_io.get_num_frames() self.index = 0 self.root = Tk() self.state = 0 self.old = -1 self.pilImage = self.video_io.read_frame(self.index) self.tkImage = ImageTk.PhotoImage(image=self.pilImage) self.label2 = Label(self.root, image=self.tkImage) self.label2.pack() self.button_fast_backword = Button(self.root, text='<<', command=self.fast_backword) self.button_play = Button(self.root, text='PLAY', command=self.start) self.button_pause = Button(self.root, text='PAUSE', command=self.pause) self.button_fast_forword = Button(self.root, text='>>', command=self.fast_forword) self.button_stop = Button(self.root, text='STOP', command=self.stop) self.button_fast_backword.pack(side=LEFT) self.button_play.pack(side=LEFT) self.button_pause.pack(side=LEFT) self.button_fast_forword.pack(side=LEFT) self.button_stop.pack(side=LEFT) self.label1 = Label(self.root, text="") self.label1.pack(side=RIGHT) self.root.title('Video Player') self.root.mainloop() def fast_forword(self): self.index = self.index + 150 # 5s self.old = self.index - 1 if self.index >= self.frameNum: self.index = self.frameNum - 1 while len(self.logo_array ) > self.logo_index and self.index > self.logo_array[ self.logo_index][0]: self.logo_index = self.logo_index + 1 self.start() def fast_backword(self): self.index = self.index - 150 # 5s self.old = self.index - 1 if self.index < 0: self.index = 0 while self.logo_index > 0 and self.index <= self.logo_array[ self.logo_index - 1][0]: self.logo_index = self.logo_index - 1 def read_wav(self): self.f = wave.open(self.audioPath, "rb") f = self.f self.p = pyaudio.PyAudio() p = self.p self.stream = p.open(format=p.get_format_from_width(f.getsampwidth()), channels=f.getnchannels(), rate=f.getframerate(), output=True) def videoplayer(self): if (self.index >= self.frameNum): self.stop() if self.state: self.root.after(1, self.videoplayer) if self.state and self.index > self.old: self.old = self.index self.pilImage = self.video_io.read_frame(self.index) self.label1.configure(text="") # (index,logo, points) if (len(self.logo_array) > self.logo_index and (self.index - self.logo_array[self.logo_index][0]) < 2 and (self.index - self.logo_array[self.logo_index][0]) > -1): points = np.array(self.logo_array[self.logo_index][2]) logo_name = self.logo_array[self.logo_index][1] self.label1.configure(text=logo_name) self.pilImage = cv2.cvtColor(np.asarray(self.pilImage), cv2.COLOR_RGB2BGR) cv2.polylines(self.pilImage, [points], 1, (0, 0, 255), 5) self.pilImage = Image.fromarray( cv2.cvtColor(self.pilImage, cv2.COLOR_BGR2RGB)) logger.i('Showing logo box [%s] at %s' % (logo_name, points.tolist())) self.logo_index = self.logo_index + 1 self.tkImage = ImageTk.PhotoImage(image=self.pilImage) self.label2.configure(imag=self.tkImage) def audioplayer(self): while self.state: if int(self.index * self.f.getframerate() / self.fps) > self.f.getnframes(): self.stop() else: self.f.setpos( int(self.index * self.f.getframerate() / self.fps)) self.data = self.f.readframes(self.chunk) self.index = self.index + 1 self.stream.write(self.data) def start(self): logger.d("start") if self.state == 0: self.state = 1 try: _thread.start_new_thread(self.videoplayer, ()) _thread.start_new_thread(self.audioplayer, ()) except: logger.e("Error: unable to start thread") def pause(self): logger.d("pause") self.state = 0 def stop(self): logger.d("stop") self.state = 0 self.old = -1 self.index = 0 self.logo_index = 0 self.pilImage = self.video_io.read_frame(self.index) self.tkImage = ImageTk.PhotoImage(image=self.pilImage) self.label2.configure(imag=self.tkImage) self.root.update_idletasks() self.read_wav()
class VideoLogoDetect: SKIP_FRAME = 2 def __init__(self, path_video_file, frame_width, frame_height, brands_to_detect): self.video_path = path_video_file self.video_reader = VideoIO( path_video_file, frame_width, frame_height) self.brands_to_detect = brands_to_detect brands_path = {k: brands_to_detect[k]['logo'] for k in brands_to_detect} self.logo_detector = LogoDetector(brands_path) self.logo_data_in_video = [] self.logo_first_occurences = dict() self._detect() def _detect(self): """Detect logos in all frames """ while self.video_reader.get_next_frame_idx() < self.video_reader.get_num_frames(): frame_idx = self.video_reader.get_next_frame_idx() logger.d('Detecting logo in %d' % frame_idx) pil_image = self.video_reader.read_frame() brand_areas = self.logo_detector.detect(pil_image) if len(brand_areas) == 0: logger.i('Frame[%d]: no logo detected' % frame_idx) else: for logo_name, logo_poly in brand_areas: logger.i('Frame[%d]: logo [%s] at area %s' % ( frame_idx, logo_name, logo_poly.tolist())) self.logo_data_in_video.append( [frame_idx, logo_name, logo_poly.tolist()]) if logo_name not in self.logo_first_occurences: self.logo_first_occurences[logo_name] = frame_idx self.video_reader.skip_frame(VideoLogoDetect.SKIP_FRAME) logo_data_path = path_util.get_video_logo_data_path(self.video_path) pickle.dump(self.logo_data_in_video, open(logo_data_path, 'wb')) logger.i('Logo detection data saved to %s' % logo_data_path) def _logo_data_with_ads(self): """Generate new logo data (frame indices and polygon areas of logos) in in video with ads inserted, using the old logo data in the no-ads video Returns: list(tuple) -- new logo data """ ads_to_insert = self.logo_first_occurences pos_ads_length = [] for ad_name in ads_to_insert: ad = self.brands_to_detect[ad_name]['ad'] ad_n_frames = VideoIO( ad['video'], self.video_reader.width, self.video_reader.height).get_num_frames() pos_ads_length.append((ads_to_insert[ad_name], ad_n_frames)) pos_ads_length = np.array(sorted(pos_ads_length, key=lambda t: t[0])) logger.d('pos_ads_length', pos_ads_length) logo_data_with_ads = self.logo_data_in_video.copy() logger.d('logo_data_in_video', np.array(self.logo_data_in_video, dtype='object')) for t in logo_data_with_ads: prev_positions = np.where(pos_ads_length[:, 0] < t[0])[0] t[0] += sum(pos_ads_length[prev_positions][:, 1]) logger.d('logo_data_with_ads', np.array(logo_data_with_ads, dtype='object')) return logo_data_with_ads def generate_video_with_ads(self, video_output, audio_input, audio_output): """Generate video with ads inserted at the first occurrences of detected logos Arguments: video_output {str} -- the path of desired video output audio_input {str} -- the path of the wav input corresponding to self.video_path audio_output {str} -- the path of desired wav output along with the video """ video_input = self.video_path width = self.video_reader.width height = self.video_reader.height ads_to_insert = self.logo_first_occurences pos_ads = sorted([(ads_to_insert[k], k) for k in ads_to_insert], key=lambda t: t[0]) output_video_writer = VideoIO(video_output, width, height, 'w') output_audio_writer = AudioIO(audio_input, audio_output, 30) start = 0 for pos, ad_name in pos_ads: ad = self.brands_to_detect[ad_name]['ad'] output_video_writer.copy_frames_from( video_input, start, pos - start + 1) logger.i('Writing frames [%d:%d]...' % (start, pos)) output_audio_writer.copy_frames(start, pos - start + 1) logger.i('Writing ads [%s]...' % ad_name) output_video_writer.copy_frames_from(ad['video']) output_audio_writer.copy_frames_from(ad['audio']) start = pos + 1 output_video_writer.copy_frames_from(video_input, start) output_audio_writer.copy_frames(start) output_video_writer.close() output_audio_writer.close() logger.i('Video with new ads saved to (%s, %s)' % (video_output, audio_output)) logo_data_with_ads = self._logo_data_with_ads() logo_data_path = path_util.get_video_logo_data_path(video_output) pickle.dump(logo_data_with_ads, open(logo_data_path, 'wb')) logger.i('Logo outlines data saved to %s' % logo_data_path)
import path_util from video_io import VideoIO from logo_detector import LogoDetector from PIL import Image from data import DATASETS as DATASETS from data import BRANDS from logger import logger logger.set_level('i') # initializing dataset_idx = 0 dataset = DATASETS[dataset_idx] brands_to_detect = {k: BRANDS[k]['logo'] for k in dataset['brands_to_detect']} video_io = VideoIO(dataset['video'], dataset['width'], dataset['height']) # detect logo_detector = LogoDetector(brands_to_detect) logo_data_in_video = [] while video_io.get_next_frame_idx() < video_io.get_num_frames(): frame_idx = video_io.get_next_frame_idx() logger.d('Detecting logo in %d' % frame_idx) pil_image = video_io.read_frame() brand_areas = logo_detector.detect(pil_image) if len(brand_areas) == 0: logger.i('Frame[%d]: no logo detected' % frame_idx) else: for logo_name, logo_poly in brand_areas: logger.i('Frame[%d]: logo [%s] at area %s' % ( frame_idx, logo_name, logo_poly.tolist()))
from PIL import Image from test_data import DATASETS as DATASETS from test_data import BRANDS import util MIN_MATCH_COUNT = 10 MIN_RANSAC_MATCH_COUNT = 5 dataset_idx = 2 logo_name = 'ae' dataset = DATASETS[dataset_idx] logo_frame = dataset['brand_frames'][logo_name] logo_frame = 2501 logo_path = BRANDS[logo_name]['logo'] video_io = VideoIO(dataset['video'], dataset['width'], dataset['height']) pil_image = video_io.read_frame(logo_frame).convert('RGB') _frame = np.array(pil_image) frame_img = cv.cvtColor(_frame, cv.COLOR_RGB2GRAY) _logo_img = cv.imread(logo_path) logo_img = cv.cvtColor(_logo_img, cv.COLOR_RGB2GRAY) img1 = logo_img img2 = frame_img # do sift sift = cv.xfeatures2d.SIFT_create(edgeThreshold=10) kp1, des1 = sift.detectAndCompute(img1, None) kp2, des2 = sift.detectAndCompute(img2, None) FLANN_INDEX_KDTREE = 0 index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)