def get_split(qa, video_data): total_qa = { 'train': [], 'tests': [], 'val': [], } for qa_ in tqdm(qa, desc='Get available split'): total_qa[qid_split(qa_)].append({ "qid": qa_['qid'], "question": qa_['question'], "answers": qa_['answers'], "imdb_key": qa_['imdb_key'], "correct_index": qa_['correct_index'], "mv+sub": qa_['video_clips'] != [], "video_clips": [ fu.basename_wo_ext(vid) for vid in qa_['video_clips'] if video_data[fu.basename_wo_ext(vid)]['avail'] ], }) total_qa[qid_split(qa_)][-1]['avail'] = ( total_qa[qid_split(qa_)][-1]['video_clips'] != []) return total_qa
def main(): index = du.json_load(_mp.sample_index_file) subtitle = Subtitle().include(imdb_key=['tt0086190']).get() sample = du.json_load(_mp.sample_frame_file) qa = QA().include(imdb_key=['tt0086190']).get() # for ins in qa: # if ins['video_clips']: # print(ins['qid']) # print(ins['question']) # print(ins['answers']) # print(ins['answers'][ins['correct_index']]) ins = qa[0] spec = np.load(os.path.join(_mp.encode_dir, ins['qid'] + '_spec' + '.npy')) iid = [idx for i, idx in enumerate(index[ins['imdb_key']]) if spec[i] == 1] sentences = [subtitle[ins['imdb_key']]['lines'][idx] for idx in iid] imgs = [] for v in sorted([fu.basename_wo_ext(n) for n in ins['video_clips']]): imgs.extend([ os.path.join(_mp.image_dir, v, '%s_%05d.jpg' % (v, i + 1)) for i in sample[ins['imdb_key']][v] ]) print(len(imgs)) for idx, img in enumerate(imgs): copy( img, os.path.join(_mp.benchmark_dir, 'pickup', '%d_%s.jpg' % (idx, sentences[idx]))) # ins['lines'] = sentences du.json_dump(ins, os.path.join(_mp.benchmark_dir, 'pickup.json'))
def writer_worker(queue, capacity, npy_names): video_idx = 0 local_feature = [] with tqdm(total=len(npy_names)) as pbar: while len(capacity) > video_idx: item = queue.get() if item is not None: local_feature.append(item) local_size = sum([len(f) for f in local_feature]) while len(capacity) > video_idx and local_size >= capacity[video_idx]: concat_feature = np.concatenate(local_feature, axis=0) final_features = concat_feature[:capacity[video_idx]] assert final_features.shape[0] == capacity[video_idx], \ "%s Both frames are not same!" % npy_names[video_idx] try: np.save(npy_names[video_idx], final_features) except Exception as e: np.save(npy_names[video_idx], final_features) raise e time.sleep(3) pbar.set_description(' '.join([fu.basename_wo_ext(npy_names[video_idx]), str(len(final_features))])) del local_feature[:] local_feature.append(concat_feature[capacity[video_idx]:]) local_size = sum([len(f) for f in local_feature]) video_idx += 1 pbar.update() else: break
def check_and_extract_videos(extract, video_clips, video_data, key): """ check the availability of video clips and save frames to directory. :param extract: boolean, extract or not. :param video_clips: dictionary with key: "imdb_key", value: list of all video paths of "imdb_key" :param video_data: empty video meta data. :param key: imdb_key e.g. ttxxxxxxx :return: None """ # Warning: Can't not get the last frame of the file temp_video_data, delta, img_list = {}, 5, [] nil_img = np.zeros((299, 299, 3), dtype=np.uint8) for video in video_clips[key]: del img_list[:] # video name without mp4 base_name = fu.basename_wo_ext(video) img_dir = join(_mp.image_dir, base_name) extracted = glob(join(img_dir, '*.jpg')) try: # open the video file with imageio reader = imageio.get_reader( video, ffmpeg_params=['-analyzeduration', '10M']) except OSError: # Almost all errors will be here. # We try our best to make sure the completeness of data. start, end = duration(base_name) num_frame = end - start meta_data = {'nframes': num_frame} if meta_data['nframes'] > len(extracted) + delta: img_list = [nil_img] * num_frame else: # If imageio succeed to open the imageio, we start to extract frames. meta_data = reader.get_meta_data() if meta_data['nframes'] > len(extracted) + delta: try: for img in reader: img_list.append(img) except RuntimeError: # There is no error here anymore. This exception scope is used, just in case. pass meta_data['real_frames'] = len(extracted) # Check if already extracted or not if img_list: if len(extracted) != len(img_list) and extract: fu.make_dirs(img_dir) for i, img in enumerate(img_list): imageio.imwrite( join(img_dir, '%s_%05d.jpg' % (base_name, i + 1)), img) meta_data['real_frames'] = len(img_list) # save metadata for videos temp_video_data[base_name] = meta_data # save all metadata in a movie video_data[key] = temp_video_data
def process(): """ Process frame time of each movie, and return a dictionary {imdb_key: a list of timestamp} :return frame_time: dictionary mapping imdb key to a list of timestamp """ frame_time = {} frame_time_paths = glob(join(_mp.frame_time_dir, '*.matidx')) for p in tqdm(frame_time_paths, desc='Process frame time'): # fu.basename_wo_ext(p) -> imdb_key frame_time[fu.basename_wo_ext(p)] = FrameTime.get_frame_time(p) du.json_dump(frame_time, _mp.frame_time_file, indent=0) return frame_time
def process(): shot_boundary = {} sb_paths = glob(join(_mp.shot_boundary_dir, '*.sbd')) for p in tqdm(sb_paths, desc='Process shot boundary'): base_name = fu.basename_wo_ext(p) shot_boundary[base_name] = {'start': [], 'end': []} with open(p, 'r') as f: for match in SHOT_BOUNDARY_REGEX.finditer(f.read()): shot_boundary[base_name]['start'].append(int(match.group(1))) shot_boundary[base_name]['end'].append(int(match.group(2))) du.json_dump(shot_boundary, _mp.shot_boundary_file) return shot_boundary
def writer_worker(queue, capacity, npy_names): video_idx = 0 local_feature = [] local_filename = [] with tqdm(total=len(npy_names)) as pbar: while len(capacity) > video_idx: item = queue.get() if item: f, n = item local_feature.append(f) local_filename.extend(n) local_size = len(local_filename) while len(capacity) > video_idx and local_size >= capacity[video_idx]: concat_feature = np.concatenate(local_feature, axis=0) final_features = concat_feature[:capacity[video_idx]] final_filename = local_filename[:capacity[video_idx]] assert final_features.shape[0] == capacity[video_idx], \ "%s Both frames are not same!" % npy_names[video_idx] for i in range(len(final_features)): assert fu.basename_wo_ext(npy_names[video_idx]) == \ fu.basename_wo_ext(final_filename[i]).split('.')[0], \ "Wrong images! %s\n%s" % (npy_names[video_idx], final_filename[i]) try: np.save(npy_names[video_idx], final_features) except Exception as e: np.save(npy_names[video_idx], final_features) raise e pbar.set_description(' '.join([fu.basename_wo_ext(npy_names[video_idx]), str(len(final_features))])) del local_feature[:] local_feature.append(concat_feature[capacity[video_idx]:]) local_filename = local_filename[capacity[video_idx]:] local_size = len(local_filename) video_idx += 1 pbar.update() else: break
def create_vocab(tokenize_subt, tokenize_qa): vocab = Counter() for ins in tqdm(tokenize_qa, desc='Create vocabulary'): imdb = ins['imdb_key'] vocab.update(ins['question']) for sent in ins['answers']: vocab.update(sent) for video in ins['video_clips']: video = fu.basename_wo_ext(video) for sent in tokenize_subt[imdb][video]: vocab.update(sent) res = {v: i + 1 for i, v in enumerate(vocab.keys())} return res
def process(): """ Process subtitle files of movies. It will encode the subtitle with ISO-8859-1, and substitute new line or <> tokens with '\b' or '', and normalize the characters. :return subtitle: dictionary mapping imdb key to subtitle """ subtitle = {} # print(_mp.subtitle_dir) subtitle_paths = glob(join(_mp.subtitle_dir, '*.srt')) # print(subtitle_paths) for p in tqdm(subtitle_paths, desc='Process subtitle'): iid = 0 # basename imdb_key basename = fu.basename_wo_ext(p) subtitle[basename] = {'lines': [], 'start': [], 'end': []} with open(p, 'r', encoding='iso-8859-1') as f: for match in SRT_REGEX.finditer(f.read()): raw_index, raw_start, raw_end, proprietary, content = match.groups() content = re.sub(r'\r\n|\n', ' ', content) content = re.sub(r'<.+?>', '', content, flags=re.DOTALL) content = re.sub(r'[<>]', '', content) content = normalize("NFKD", content) content = content.encode('utf-8').decode('ascii', 'ignore').strip() if content: content = sent_tokenize(content) content = [sent.strip() for sent in content if sent.strip()] s = Subtitle.timestamp_to_secs(raw_start) e = Subtitle.timestamp_to_secs(raw_end) if s > e: s, e = e, s time_span = (e - s) / len(content) for idx, sent in enumerate(content): subtitle[basename]['start'].append(s + time_span * idx) subtitle[basename]['end'].append(s + time_span * (idx + 1)) subtitle[basename]['lines'].append(sent) iid += 1 index = np.argsort(np.array(subtitle[basename]['start'])) subtitle[basename]['start'] = [subtitle[basename]['start'][idx] for idx in index] subtitle[basename]['end'] = [subtitle[basename]['end'][idx] for idx in index] subtitle[basename]['lines'] = [subtitle[basename]['lines'][idx] for idx in index] du.json_dump(subtitle, _mp.subtitle_file, indent=0) return subtitle
def __init__(self, srt_file): self.lines = [] self.start = [] self.end = [] # self.times self.key = fu.basename_wo_ext(srt_file) with open(srt_file, 'r', encoding='iso-8859-1') as f: for match in SRT_REGEX.finditer(f.read()): raw_index, raw_start, raw_end, proprietary, content = match.groups( ) content = content.strip() content = re.sub(r'\r\n|\n', ' ', content) content = re.sub(r'<.+?>', '', content, flags=re.DOTALL) content = normalize("NFKD", content) self.start.append(srt_timestamp_to_timedelta(raw_start)) self.end.append(srt_timestamp_to_timedelta(raw_end)) self.lines.append(content)