def prepare_process(iPart): # Todo prep all (original, fake) for all parts. Issue tasks for all pairs and mp on those, not the iPart. l_d = read_metadata(iPart) dir = get_part_dir(iPart) output_dir = get_output_dir() mtcnn_detector = MTCNNDetector() num_originals = len(l_d) l_part_task = [] for idx_key in range(num_originals): current = l_d[idx_key] original = dir / current[0] # Pick first fake. Todo: Can pick other fakes for more data. (one set per epoch) num_fakes = len(current[1]) if num_fakes == 0: print( f"p_{iPart}_{str(original.stem)}: No associated fakes. Skipping." ) continue fake = dir / current[1][0] isPairFound = original.is_file() and fake.is_file() if isPairFound: pass else: print(f"p_{iPart}: Original and/or fake not found. Skipping.") continue file_pair_out = output_dir / f"Line_Pair_p_{iPart}_{str(original.stem)}_{str(fake.stem)}.npy" file_real_out = output_dir / f"Line_Test_p_{iPart}_{str(original.stem)}_real.npy" file_fake_out = output_dir / f"Line_Test_p_{iPart}_{str(fake.stem)}_fake.npy" isExisting = file_pair_out.is_file() and file_real_out.is_file( ) and file_fake_out.is_file() if isExisting: continue l_part_task.append((iPart, original, fake)) return l_part_task
def __init__(self): df_c = pd.read_feather(get_meta_dir() / "face_clusters.feather") l_parts = [] for x in range(50): path = get_part_dir(x, False) isDir = path.is_dir() if isDir: l_parts.append(x) l_orig = [] l_file = [] l_part = [] for iPart in l_parts: df_meta = read_metadata(iPart) for x in df_meta: num_fakes = len (x[1]) l_orig.extend([x[0]]* (num_fakes + 1)) l_file.append(x[0]) l_file.extend(x[1]) l_part.extend([iPart] * (num_fakes + 1)) df = pd.DataFrame({'orig': l_orig, 'file': l_file, 'part': l_part}) df = df.merge(df_c, left_on = 'orig', right_on='video') df = df.drop(['video', 'chunk'], axis = 1) l_file_tuple = list(zip (df.file, df.part)) l_exists = [] for x in l_file_tuple: filepath = get_part_dir(x[1]) / x[0] l_exists.append(filepath.is_file()) df = df.assign(exists = l_exists) num_files = df.shape[0] num_originals = np.unique(df.orig).shape[0] num_clusters = np.unique(df.cluster).shape[0] # print(f"num_files = {num_files}, num_originals = {num_originals}, num_clusters = {num_clusters}") self._df = df
def process_part(iPart): l_d = read_metadata(iPart) input_dir = get_part_dir(iPart) output_dir = get_output_dir() mtcnn_detector = MTCNNDetector() for o_set in l_d: l_samples = [] original_path = input_dir / o_set[0] #print(f"{iPart}: {original_path.stem}...") r_data = sample_video_safe(mtcnn_detector, original_path, False) if r_data is None: print(f"{original_path.stem}: Bad original. Skipping set.") continue l_samples.append(r_data) for fake_path in o_set[1]: f_data = sample_video_safe(mtcnn_detector, input_dir / fake_path, False) if f_data is None: continue l_samples.append(f_data) if len(l_samples) >= 2: data = np.concatenate(l_samples) filename = f"p_{iPart}_{original_path.stem}.npy" output_path = output_dir / filename np.save(output_path, data) else: print(f"{original_path.stem}: No good fakes. Skipping set.")
def process(iPart): l_d = read_metadata(iPart) dir = get_part_dir(iPart) output_dir = get_output_dir() num_originals = len(l_d) for idx_key in range(num_originals): print(f"p_{iPart}: Processing original {idx_key + 1} / {num_originals}") current = l_d[idx_key] original = dir / current[0] fake = dir / random.choice(current[1]) if (original.is_file() and fake.is_file()): data_train = sample_pair(original, fake) if data_train is None: print(f"p_{iPart}_{str(original.stem)}_{str(fake.stem)}: No data.") pass else: file_out = output_dir / f"p_{iPart}_Train_{str(original.stem)}_{str(fake.stem)}.npy" np.save(file_out, data_train) data_test_real = sample_single(original) data_test_fake = sample_single(fake) isValid = (data_test_real is not None) and (data_test_fake is not None) if isValid: file_real_out = output_dir / f"p_{iPart}_Test_{str(original.stem)}_real.npy" np.save(file_real_out, data_test_real) file_fake_out = output_dir / f"p_{iPart}_Test_{str(fake.stem)}_fake.npy" np.save(file_fake_out, data_test_fake)
def process(iPart): l_d = read_metadata(iPart) dir = get_part_dir(iPart) output_dir = get_output_dir() num_originals = len(l_d) for idx_key in range(num_originals): print( f"p_{iPart}: Processing original {idx_key + 1} / {num_originals}") current = l_d[idx_key] original = dir / current[0] fake = dir / random.choice(current[1]) if (original.is_file() and fake.is_file()): sample_image_pair(iPart, original, fake) sample_image_single(iPart, original, False) sample_image_single(iPart, fake, True)
def list_all_files(): parts = list(range(50)) l_part = [] l_original = [] l_file = [] for iPart in parts: l_d = read_metadata(iPart) for x in l_d: original = x[0][:-4] l_part.append(iPart) l_original.append(original) l_file.append(original) for fake in x[1]: l_part.append(iPart) l_original.append(original) l_file.append(fake[:-4]) """c""" df = pd.DataFrame({'p': l_part, 'original': l_original, 'file': l_file}) df.to_pickle(get_output_dir() / "all_files.pkl")
from mp4_frames import read_video from mp4_frames import read_metadata from featureline import find_middle_face_box from face_detector import MTCNNDetector from line_sampler import get_line from sklearn.metrics import mean_squared_error a = [7, 3, 4, 11, 24, 123, 3, 7, 3, 4, 10, 21, 123, 3] b = [7, 3, 4, 11, 24, 129, 6, 11, 4, 10, 21, 123, 3, 3] # eye - eye line. # Start .5 outside, continue to l_eye location. l_d = read_metadata(7) entry = l_d[3] input_dir = get_part_dir(7) mtcnn_detector = MTCNNDetector() real_path = input_dir / entry[0] fake_path = input_dir / entry[1][1] assert real_path.is_file() assert fake_path.is_file() video_size = 32 W = 256
def get_sampling_cubes_for_part(iPart, output_dir): l_d = read_metadata(iPart) dir = get_part_dir(iPart) num_videos = len(l_d) print( f"p_{iPart}: Fake detection on part {iPart}. {len(l_d)} original video(s)." ) for idx_key in range(num_videos): current = l_d[idx_key] x_real = current[0] isCompleted = dataframe_exists(iPart, x_real) if isCompleted: # print(f"p_{iPart}_{x_real}: Already done.") continue else: print(f"p_{iPart}_{x_real}: Starting. {idx_key +1} of {len(l_d)}") x_real = dir / x_real assert x_real.is_file(), "Error: Original not found" vidcap = cv2.VideoCapture(str(x_real)) video_real = read_video(vidcap) vidcap.release() num_frames = video_real.shape[0] l_fakes = current[1] l_df_video = [] for x_fake in l_fakes: x_fake = dir / x_fake if not x_fake.is_file(): print( f" WARNING: p_{iPart}_{x_real.stem}: Not a file: {x_fake}. Situation handled." ) continue print(f" p_{iPart}_{x_real.stem}: Processing {str(x_fake.stem)}") vidcap = cv2.VideoCapture(str(x_fake)) video_fake = read_video(vidcap) vidcap.release() df_video = get_sampling_cubes(video_real, video_fake) df_video = df_video.assign(fake=str(x_fake.stem)) l_df_video.append(df_video) if len(l_df_video) > 0: df_video = pd.concat(l_df_video, axis=0) df_video = df_video.assign(original=str(x_real.stem)) df_video = df_video.assign(part=iPart) df_video.to_pickle(output_dir / f"p_{iPart}_{x_real.stem}_.pkl") print(f"p_{iPart}_{x_real.stem}: Complete.") else: print( f"p_{iPart}_{x_real.stem}: WARNING: No fakes found. No sampling cubes produced for video." ) return []