def check_and_extract_videos(extract, video_clips, video_data, key): """ check the availability of video clips and save frames to directory. :param extract: boolean, extract or not. :param video_clips: dictionary with key: "imdb_key", value: list of all video paths of "imdb_key" :param video_data: empty video meta data. :param key: imdb_key e.g. ttxxxxxxx :return: None """ # Warning: Can't not get the last frame of the file temp_video_data, delta, img_list = {}, 5, [] nil_img = np.zeros((299, 299, 3), dtype=np.uint8) for video in video_clips[key]: del img_list[:] # video name without mp4 base_name = fu.basename_wo_ext(video) img_dir = join(_mp.image_dir, base_name) extracted = glob(join(img_dir, '*.jpg')) try: # open the video file with imageio reader = imageio.get_reader( video, ffmpeg_params=['-analyzeduration', '10M']) except OSError: # Almost all errors will be here. # We try our best to make sure the completeness of data. start, end = duration(base_name) num_frame = end - start meta_data = {'nframes': num_frame} if meta_data['nframes'] > len(extracted) + delta: img_list = [nil_img] * num_frame else: # If imageio succeed to open the imageio, we start to extract frames. meta_data = reader.get_meta_data() if meta_data['nframes'] > len(extracted) + delta: try: for img in reader: img_list.append(img) except RuntimeError: # There is no error here anymore. This exception scope is used, just in case. pass meta_data['real_frames'] = len(extracted) # Check if already extracted or not if img_list: if len(extracted) != len(img_list) and extract: fu.make_dirs(img_dir) for i, img in enumerate(img_list): imageio.imwrite( join(img_dir, '%s_%05d.jpg' % (base_name, i + 1)), img) meta_data['real_frames'] = len(img_list) # save metadata for videos temp_video_data[base_name] = meta_data # save all metadata in a movie video_data[key] = temp_video_data
def sample_frame_v2(video_data, frame_time, subtitle, sample, index, key): subt = subtitle[key] ft = frame_time[key] vd = video_data[key] temp_sample = {} temp_index = [] a = 0 b = 0 for video in sorted(list(vd.keys())): if vd[video]['real_frames'] > 0: start_frame, end_frame = duration(video) start_frame = max(start_frame, 0) end_frame = min(start_frame + vd[video]['real_frames'] - 1, len(ft) - 1) start_time, end_time = ft[start_frame], ft[end_frame] start_index, end_index = flbs(subt['end'], start_time), lsbs( subt['start'], end_time) # assert start_index <= end_index, '%s index reversed. %d %d\n%f %f %f %f\n%f %f' % \ # (video, start_index, end_index, subt['start'][start_index], # subt['end'][start_index], # subt['start'][end_index], subt['end'][end_index], # start_time, end_time) if start_index > end_index: end_index = start_index temp_sample[video] = [] index_sample = list(range(start_index, end_index + 1)) for i in index_sample: i_start, i_end = subt['start'][i], subt['end'][i] i_start_frame = min(max(flbs(ft, i_start) - start_frame, 0), vd[video]['real_frames'] - 1) i_end_frame = max( min( lsbs(ft, i_end) - start_frame, vd[video]['real_frames'] - 1), 0) temp_sample[video].append((i_start_frame + i_end_frame) // 2) a += 1 b += len(subt['lines'][index_sample]) assert a == b, '%s not aligned. %d %d %d %d %d %d' % \ (key, a, b, end_index + 1 - start_index, len(subt['lines'][index_sample]), start_index, end_index) temp_index += index_sample sample[key] = temp_sample index[key] = temp_index np.save(os.path.join(_mp.encode_dir, key + '.npy'), subt['lines'][temp_index])
def align_subtitle(video_data, frame_time, subtitle, key): subt = subtitle[key] ft = frame_time[key] video_clips = sorted(list(video_data[key].keys())) subt_embedding = np.zeros((0, 300), dtype=np.float32) for video in video_clips: start_frame, end_frame = duration(video) subsample_list = list( range(0, video_data[key][video]['real_frames'], 15)) temp_embedding = np.zeros((len(subsample_list), 300), dtype=np.float32) for idx, i in enumerate(subsample_list): time = ft[min(start_frame + i, len(ft) - 1)] index = binary_search(subt['start'], subt['end'], time) if subt['start'][index] <= time <= subt['end'][index]: temp_embedding[idx] = subt['lines'][index] subt_embedding = np.concatenate([subt_embedding, temp_embedding], axis=0) np.save(join(_mp.encode_dir, key + '.npy'), subt_embedding)
def align_subtitle(video_data, frame_time, subtitle, tokenize_subt, key): subt = subtitle[key] ft = frame_time[key] video_clips = video_data[key] temp_tokenize = {} for video in video_clips: start_frame, end_frame = duration(video) temp_tokenize[video] = [] for i in range(0, video_clips[video]['real_frames'], 15): time = ft[min(start_frame + i, len(ft) - 1)] index = binary_search(subt['start'], subt['end'], time) if subt['start'][index] <= time <= subt['end'][index] and subt[ 'lines'][index].strip(): temp_tokenize[video].append( word_tokenize(subt['lines'][index].strip().lower())) else: temp_tokenize[video].append(['.']) if not temp_tokenize[video][-1]: temp_tokenize[video][-1] = ['.'] tokenize_subt[key] = temp_tokenize