def sample_image_pair(iPart, video_real_path, video_fake_path): W = 128 video_real = read_video(video_real_path, 32) faces = find_two_consistent_faces(video_real) invalid = (faces[0] is None) or (faces[1] is None) if invalid: return video_fake = read_video(video_fake_path, 32) x_max = video_real.shape[2] y_max = video_real.shape[1] anFeatureSet = get_feature_sets() for l_feature_set in anFeatureSet: l_image_real = [] l_image_fake = [] for i in range(100): anLines = get_feature_lines(x_max, y_max, faces, l_feature_set, W, 5) if anLines is None: continue anImageReal = sample_feature_image(anLines, video_real) l_image_real.append(anImageReal) anImageFake = sample_feature_image(anLines, video_fake) l_image_fake.append(anImageFake) if (len(l_image_real) > 0) and (len(l_image_fake) > 0): anImageSetReal = np.stack(l_image_real) anImageSetFake = np.stack(l_image_fake) zFilenameReal = f"IMG_p_{iPart}_{video_real_path.name}_{video_fake_path.name}_{l_feature_set[0]}_{l_feature_set[1]}_real" zFilenameFake = f"IMG_p_{iPart}_{video_real_path.name}_{video_fake_path.name}_{l_feature_set[0]}_{l_feature_set[1]}_fake" np.save(get_output_dir() / zFilenameReal, anImageSetReal) np.save(get_output_dir() / zFilenameFake, anImageSetFake)
def process(t): iPart = t[0] original = t[1] fake = t[2] print(f"Processing p_{iPart}_{str(original.stem)}_{str(fake.stem)}") output_dir = get_output_dir() file_pair_out = output_dir / f"Line_Pair_p_{iPart}_{str(original.stem)}_{str(fake.stem)}.npy" file_real_out = output_dir / f"Line_Test_p_{iPart}_{str(original.stem)}_real.npy" file_fake_out = output_dir / f"Line_Test_p_{iPart}_{str(fake.stem)}_fake.npy" isExisting = file_pair_out.is_file() and file_real_out.is_file( ) and file_fake_out.is_file() assert not isExisting data_pair = sample_pair(original, fake) data_test_real = sample_single(mtcnn_detector, original, 0.4) data_test_fake = sample_single(mtcnn_detector, fake, 0.4) # functions return one zeroed out line in case of errors. assert data_pair.shape[0] > 0 and data_test_real.shape[ 0] > 0 and data_test_fake.shape[0] > 0 np.save(file_pair_out, data_pair) np.save(file_real_out, data_test_real) np.save(file_fake_out, data_test_fake)
def dataframe_exists(iPart, x_real): output_dir = get_output_dir() x_real[:-4] zFilename = output_dir / f"p_{iPart}_{x_real[:-4]}_.pkl" return zFilename.is_file()
def create_chunks(): output_dir = get_output_dir() ready_dir = get_ready_data_dir() l_files = list(sorted(output_dir.iterdir())) l_files = [x for x in l_files if "npy" in x.suffix] iFile = 0 photos = list() labels = list() for x in l_files: # print (x) anData = np.load(x) video_size = 32 W = 256 H = 1 anData = anData.reshape(-1, video_size, W, 3) anReal = anData[:7] # First fake: anFake = anData[7:14] for i in range(7): photos.append(anReal[i]) labels.append(0.0) photos.append(anFake[i]) labels.append(1.0) isLast = (x == l_files[-1]) if isLast or len(photos) > 5000: photos = np.asarray(photos) labels = np.asarray(labels) photos = photos / 255.0 filepath_photo = ready_dir / f"photos_{iFile:04}.npy" filepath_label = ready_dir / f"labels_{iFile:04}.npy" np.save(filepath_photo, photos) np.save(filepath_label, labels) iFile = iFile + 1 photos = list() labels = list()
def prepare_process(iPart): # Todo prep all (original, fake) for all parts. Issue tasks for all pairs and mp on those, not the iPart. l_d = read_metadata(iPart) dir = get_part_dir(iPart) output_dir = get_output_dir() mtcnn_detector = MTCNNDetector() num_originals = len(l_d) l_part_task = [] for idx_key in range(num_originals): current = l_d[idx_key] original = dir / current[0] # Pick first fake. Todo: Can pick other fakes for more data. (one set per epoch) num_fakes = len(current[1]) if num_fakes == 0: print( f"p_{iPart}_{str(original.stem)}: No associated fakes. Skipping." ) continue fake = dir / current[1][0] isPairFound = original.is_file() and fake.is_file() if isPairFound: pass else: print(f"p_{iPart}: Original and/or fake not found. Skipping.") continue file_pair_out = output_dir / f"Line_Pair_p_{iPart}_{str(original.stem)}_{str(fake.stem)}.npy" file_real_out = output_dir / f"Line_Test_p_{iPart}_{str(original.stem)}_real.npy" file_fake_out = output_dir / f"Line_Test_p_{iPart}_{str(fake.stem)}_fake.npy" isExisting = file_pair_out.is_file() and file_real_out.is_file( ) and file_fake_out.is_file() if isExisting: continue l_part_task.append((iPart, original, fake)) return l_part_task
def sample_image_single(iPart, video_path, isFake): W = 128 video = read_video(video_path, 32) faces = find_two_consistent_faces(video) invalid = (faces[0] is None) or (faces[1] is None) if invalid: return x_max = video.shape[2] y_max = video.shape[1] anFeatureSet = get_feature_sets() for l_feature_set in anFeatureSet: l_image = [] for i in range(100): anLines = get_feature_lines(x_max, y_max, faces, l_feature_set, W, 5) if anLines is None: continue anImage = sample_feature_image(anLines, video) l_image.append(anImage) if len(l_image) > 0: if isFake: zClass = 'fake' else: zClass = 'real' anImageSet = np.stack(l_image) zFilename = f"IMG_p_{iPart}_{video_path.name}_{l_feature_set[0]}_{l_feature_set[1]}_{zClass}" np.save(get_output_dir() / zFilename, anImageSet)
def process_part(iPart): l_d = read_metadata(iPart) input_dir = get_part_dir(iPart) output_dir = get_output_dir() mtcnn_detector = MTCNNDetector() for o_set in l_d: l_samples = [] original_path = input_dir / o_set[0] #print(f"{iPart}: {original_path.stem}...") r_data = sample_video_safe(mtcnn_detector, original_path, False) if r_data is None: print(f"{original_path.stem}: Bad original. Skipping set.") continue l_samples.append(r_data) for fake_path in o_set[1]: f_data = sample_video_safe(mtcnn_detector, input_dir / fake_path, False) if f_data is None: continue l_samples.append(f_data) if len(l_samples) >= 2: data = np.concatenate(l_samples) filename = f"p_{iPart}_{original_path.stem}.npy" output_path = output_dir / filename np.save(output_path, data) else: print(f"{original_path.stem}: No good fakes. Skipping set.")
def process(iPart): l_d = read_metadata(iPart) dir = get_part_dir(iPart) output_dir = get_output_dir() num_originals = len(l_d) for idx_key in range(num_originals): print( f"p_{iPart}: Processing original {idx_key + 1} / {num_originals}") current = l_d[idx_key] original = dir / current[0] fake = dir / random.choice(current[1]) if (original.is_file() and fake.is_file()): sample_image_pair(iPart, original, fake) sample_image_single(iPart, original, False) sample_image_single(iPart, fake, True)
def list_all_files(): parts = list(range(50)) l_part = [] l_original = [] l_file = [] for iPart in parts: l_d = read_metadata(iPart) for x in l_d: original = x[0][:-4] l_part.append(iPart) l_original.append(original) l_file.append(original) for fake in x[1]: l_part.append(iPart) l_original.append(original) l_file.append(fake[:-4]) """c""" df = pd.DataFrame({'p': l_part, 'original': l_original, 'file': l_file}) df.to_pickle(get_output_dir() / "all_files.pkl")
def create_test_merge(iPartMin, iPartMax): assert iPartMax > iPartMin l_test_parts = list(range(iPartMin, iPartMax)) num_length = 32 input_dir = get_output_dir() assert input_dir.is_dir() output_dir = get_ready_data_dir() assert output_dir.is_dir() d_f = get_feature_converter() l_files = list(input_dir.iterdir()) l_files = [x for x in l_files if x.suffix == '.npy'] l_data_test = {} for zFeature in list(d_f.keys()): l_data_test[zFeature] = [] l_iPart = [] l_zVideo = [] l_y = [] for x in l_files: l_x = str(x.stem).split("_") isTestFile = (len(l_x) == 6) and (l_x[1] == 'Test') if isTestFile: pass else: continue iPart = int(l_x[3]) video = l_x[4] y = l_x[5] isCollect = (iPart in l_test_parts) if isCollect: pass else: continue data = np.load(x) if is_error_line(data): continue anFeature = data[:, 0] data = data[:, 1:] data = data.reshape(-1, num_length, 3) num_rows = data.shape[0] assert num_rows % len(d_f.keys()) == 0 num_rows_per_feature = num_rows // len(d_f.keys()) l_iPart.extend([iPart] * num_rows_per_feature) l_zVideo.extend([video] * num_rows_per_feature) l_y.extend([y] * num_rows_per_feature) for zFeature in list(d_f.keys()): iF = d_f[zFeature] m_correct_feature = (anFeature == iF) l_data_test[zFeature].append(data[m_correct_feature]) assert data[m_correct_feature].shape[0] == num_rows_per_feature num_meta = len(l_iPart) for zFeature in list(d_f.keys()): if len(l_data_test[zFeature]) > 0: anDataTest = np.concatenate(l_data_test[zFeature]) assert anDataTest.shape[0] == num_meta np.save( output_dir / f"test_{zFeature}_p_{iPartMin}_p_{iPartMax}.npy", anDataTest) else: print(f"No data: test_{zFeature}_p_{iPartMin}_p_{iPartMax}") df_meta = pd.DataFrame({'iPart': l_iPart, 'video': l_zVideo, 'y': l_y}) df_meta.to_pickle(output_dir / f"test_meta_p_{iPartMin}_p_{iPartMax}.pkl")
def create_train_merge(iPartMin, iPartMax): assert iPartMax > iPartMin l_train_parts = list(range(iPartMin, iPartMax)) num_length = 32 input_dir = get_output_dir() assert input_dir.is_dir() output_dir = get_ready_data_dir() assert output_dir.is_dir() d_f = get_feature_converter() l_files = list(input_dir.iterdir()) l_files = [x for x in l_files if x.suffix == '.npy'] l_data_train = {} for zFeature in list(d_f.keys()): l_data_train[zFeature] = [] for x in l_files: l_x = str(x.stem).split("_") isTrainFile = (len(l_x) == 6) and (l_x[1] == 'Pair') if isTrainFile: pass else: continue iPart = int(l_x[3]) original = l_x[4] fake = l_x[5] isCollect = (iPart in l_train_parts) if isCollect: pass else: continue data = np.load(x) if is_error_line(data): continue anFeature = data[:, 0] data = data[:, 1:] data = data.reshape(-1, num_length * 2, 3) for zFeature in list(d_f.keys()): iF = d_f[zFeature] m_correct_feature = (anFeature == iF) l_data_train[zFeature].append(data[m_correct_feature]) for zFeature in list(d_f.keys()): if len(l_data_train[zFeature]) > 0: anDataTrain = np.concatenate(l_data_train[zFeature]) np.save( output_dir / f"train_{zFeature}_p_{iPartMin}_p_{iPartMax}.npy", anDataTrain)
fake = dir / random.choice(current[1]) if (original.is_file() and fake.is_file()): sample_image_pair(iPart, original, fake) sample_image_single(iPart, original, False) sample_image_single(iPart, fake, True) #################################################################################### # # __main__ # if __name__ == '__main__': outdir_test = get_output_dir() assert outdir_test.is_dir() file_test = outdir_test / "test_out_cubes.txt" nPing = file_test.write_text("ping") assert nPing == 4 l_tasks = list(range(50)) num_threads = 50 print(f"Launching on {num_threads} thread(s)") with Pool(num_threads) as p: l = p.map(process, l_tasks)
def process_chunk(iPart): output_dir = get_output_dir() assert output_dir.is_dir() return get_sampling_cubes_for_part(iPart, output_dir)