def process_single(black_frames_path, captions_path, metadata_path, commercials_outpath): # Load original data black_frames = load_json(black_frames_path) captions = load_captions(captions_path) metadata = load_json(metadata_path) # Create IntervalSet objects black_frames_set = IntervalSet([ Interval( Bounds3D(frame_num / metadata['fps'], (frame_num + 1) / metadata['fps'])) for frame_num in black_frames ]) captions_set = IntervalSet([ Interval(Bounds3D(start, end), payload=text) for text, start, end, in captions ]) whole_video = IntervalSet( [Interval(Bounds3D(0, metadata['frames'] / metadata['fps']))]) # Detect commercials results = detect_commercials(black_frames_set, captions_set, whole_video) # Convert commercial intervals back to frames results = convert_set_from_seconds_to_frames(results, metadata['fps']) # Save results in JSON format results = [(r['t1'], r['t2']) for r in results.get_intervals()] save_json(results, commercials_outpath)
def align_video(num_threads, video_name, video_path, transcript_path, out_path): aligner = TranscriptAligner(win_size=300, seg_length=60, max_misalign=10, num_thread=num_threads, estimate=True, missing_thresh=0.2, media_path=video_path, transcript_path=transcript_path, align_dir=out_path) try: stats = aligner.run() save_json(stats, os.path.join(out_path, FILE_ALIGNMENT_STATS)) except RuntimeError as e: print('Error in processing {}: {}'.format(video_name, e))
def process_single(identities_path, embeds_path, propagated_ids_outpath): identities = load_json(identities_path) embeddings = load_json(embeds_path) labeled_face_ids = set(x[0] for x in identities) counts = Counter(x[1] for x in identities) names_to_propagate = set(x for x in counts if counts[x] > MIN_LABEL_THRESH) face_id_to_identity = { x[0]: x[1] for x in identities if x[1] in names_to_propagate } face_ids_to_propagate = set(face_id_to_identity.keys()) name_to_face_ids = {name: [] for name in names_to_propagate} for face_id, name in face_id_to_identity.items(): name_to_face_ids[name].append(face_id) face_id_to_embed = { x[0]: x[1] for x in embeddings if x[0] not in labeled_face_ids } face_id_to_embed_prop = { x[0]: x[1] for x in embeddings if x[0] in face_ids_to_propagate } unlabeled_array = np.array([x for x in face_id_to_embed.values()]) best_so_far = [(None, 0)] * unlabeled_array.shape[0] for name, ids in name_to_face_ids.items(): labeled_array = np.array([face_id_to_embed_prop[i] for i in ids]) dists = euclidean_distances(unlabeled_array, labeled_array) dists = (dists < L2_THRESH).astype(int) votes = dists.sum(axis=1) for i in range(votes.shape[0]): if votes[i] > best_so_far[i][1]: best_so_far[i] = (name, votes[i]) for i, face_id in enumerate(face_id_to_embed.keys()): if best_so_far[i][0] is not None: identities.append((face_id, best_so_far[i][0], 50.0)) save_json(identities, propagated_ids_outpath)
def process_video(crops_path, identities_outpath, max_threads=60): assert os.path.isdir(crops_path) img_files = [img for img in sorted(os.listdir(crops_path), key=lambda x: int(get_base_name(x)))] video_labels = [] n_rows = config.MONTAGE_HEIGHT n_cols = config.MONTAGE_WIDTH with ThreadPoolExecutor(max_threads) as executor: futures = [] for i in range(0, len(img_files), n_cols * n_rows): img_span = img_files[i:i + n_cols * n_rows] futures.append(executor.submit( submit_images_for_labeling, crops_path, img_span )) for future in futures: if future.done() and not future.cancelled(): video_labels.extend(future.result()) save_json(video_labels, identities_outpath)
def process_single(in_file, out_file): # Load the detected faces and embeddings and run the classifier result = [(face_id, predict_gender(embed), predict_gender_score(embed)) for face_id, embed in load_json(in_file)] save_json(result, out_file)
def save_data(data_path, filename, data): save_json(os.path.join(data_path, filename), data)