def ttswcsv(data_path, metadata, output_dir, test_split=0.2, val_split=0.2, verbose=True): """ Train test split with CSV support version 3. Currently no support for ignoring augmented data! This function is similar to the previous variants, except that it creates and returns data frames, instead of directly working with CSV files. This version works with "buckets", which each data point belongs to. This is to ensure that all data has roughly the same chance of being seen. """ metadf = pd.read_csv(metadata) #metadf['FRAME_PTH'] = metadf.apply (lambda row: os.path.join(data_path, "S%04d" % row["SUBJECT"], # "Trial%d_frames" % row["TRIAL"]), axis=1) metadf = metadf[metadf.GOOD == 1] base.check_exists_create_if_not(output_dir) test_len = int(test_split * len(metadf)) val_len = int(val_split * len(metadf)) metadf, test_df = util.get_random_test_set(metadf, test_len) metadf, val_df = util.get_random_test_set(metadf, val_len) test_df.to_csv(os.path.join(output_dir, 'test.csv'), index=False) val_df.to_csv(os.path.join(output_dir, 'val.csv'), index=False) metadf.to_csv(os.path.join(output_dir, 'train.csv'), index=False) print('Test DF\n', list(test_df['HEART_RATE_BPM'])) print('Val DF\n', list(val_df['HEART_RATE_BPM'])) print('Train DF\n', list(metadf['HEART_RATE_BPM'])) return metadf, test_df, val_df
def resize_frame_dir(frame_dir, output_dir, width=224, height=224): """ Copy and resize frames in given directory. """ if not os.path.exists(frame_dir): raise FileNotFoundError( "Error: path {} does not exists".format(frame_dir)) if not os.path.isdir(frame_dir): raise IOError("Error: path {} is not a directory".format(frame_dir)) check_exists_create_if_not(output_dir, suppress=True) print("[resize_frame_dir]: RESIZING {} -> {}".format( frame_dir, output_dir)) listed_directory = os.listdir(frame_dir) num_partitions = len(listed_directory) completed_partitions = 0 for frame in os.listdir(frame_dir): current_frame_dir = os.path.join(frame_dir, frame) img = Image.open(current_frame_dir) img = img.resize((width, height), Image.ANTIALIAS) output_path = os.path.join(output_dir, frame) img.save(output_path) completed_partitions += 1 progressBar(completed_partitions, num_partitions) print()
def create_train_test_split_dataframes(data_path, metadata, output_dir, test_split=0.2, val_split=0.15, verbose=True): """ Description coming soon! """ metadf = pd.read_csv(metadata) metadf['Path'] = metadf.apply(lambda row: os.path.join(data_path, "S" + str(row["Subject"]).zfill(4), "Trial%d_frames" % row["Trial"]), axis=1) real_subjects_df = metadf[metadf['Subject'].apply(lambda x: x.isdigit())] real_subs = list(zip(real_subjects_df['Subject'], real_subjects_df['Trial'])) num_test, num_val = int(len(real_subjects_df) * test_split), int(len(real_subjects_df) * val_split) test_subs = __choose_rand_test_set(real_subs, num_test) val_subs = __choose_rand_test_set(real_subs, num_val) train_subs = real_subs test_df = __dataframe_from_subject_info(metadf, test_subs) val_df = __dataframe_from_subject_info(metadf, val_subs) train_df = __dataframe_from_subject_info(metadf, train_subs) assert len(test_df) + len(val_df) + len(train_df) == len(metadf) base.check_exists_create_if_not(output_dir) test_df.to_csv(os.path.join(output_dir, 'test.csv'), index=False) val_df.to_csv(os.path.join(output_dir, 'val.csv'), index=False) train_df.to_csv(os.path.join(output_dir, 'train.csv'), index=False) return train_df, test_df, val_df
def ttswcvs3(data_path, metadata, output_dir, test_split=0.2, val_split=0.2, verbose=True): """ DEPRECATED (use create_train_test_split_dataframes) Train test split with CSV support version 3. Currently no support for ignoring augmented data! This function is similar to the previous variants, except that it creates and returns data frames, instead of directly working with CSV files. This version works with "buckets", which each data point belongs to. This is to ensure that all data has roughly the same chance of being seen. """ metadf = pd.read_csv(metadata) metadf['Path'] = metadf.apply (lambda row: os.path.join(data_path, "S%04d" % row["Subject"], "Trial%d_frames" % row["Trial"]), axis=1) base.check_exists_create_if_not(output_dir) test_len = int(test_split * len(metadf)) val_len = int(val_split * len(metadf)) metadf, test_df = util.get_testing_set(metadf, test_len) metadf, val_df = util.get_testing_set(metadf, val_len) test_df.to_csv(os.path.join(output_dir, 'test.csv'), index=False) val_df.to_csv(os.path.join(output_dir, 'val.csv'), index=False) metadf.to_csv(os.path.join(output_dir, 'train.csv'), index=False) return metadf, test_df, val_df
def video_dir_to_frame_dir(video_dir, output_dir, suppress=False): """ create a directory that contains subdirectories that contain all of the frames of the movies contained within the video_dir args: video_dir : directory with videos output_dir : location to place the output frames suppress (optional) : display output returns: imgs_captured : a list of image framenames from all videos in video dir """ # path exists? if os.path.exists(video_dir): # path is a dir? if os.path.isdir(video_dir): contents = os.listdir(video_dir) movies = [ os.path.join(video_dir, cont) for cont in contents if cont.lower().endswith(".mov") ] # path contains .MOV or .mov files ?? if len(movies) > 0: imgs_captured = [] check_exists_create_if_not(output_dir, suppress=suppress) for mov in movies: framenames = video_file_to_frames(mov, output_dir=output_dir, suppress=suppress) imgs_captured.extend(framenames) return imgs_captured else: raise FileNotFoundError("%s contains no video files" % video_dir) else: raise ValueError("%s isn't a directory" % video_dir) else: raise FileNotFoundError("%s not found" % video_dir)
def move_frames(source_dir, partitioned_frames, output_dir): """ Helper method to move a selection of frames from the source directory into a new output partition directory. args: source_dir : the origin directory partitioned_frames : a list of frames that are being moved out of source directory output_dir : the directory where selected frames are being placed """ check_exists_create_if_not(output_dir, suppress=True) current_index = 0 for frame in partitioned_frames: frame_path = os.path.join(source_dir, frame) source_path = os.path.join(output_dir, "frame" + str(current_index) + ".png") os.rename(frame_path, source_path) current_index += 1
def generate_output_directory(output_dirname): """ create an output directory that contains looks like this: output_dirname/ models/ args: output_dirname : string - the ouput directory name returns: output_dir : string - name of output_directory """ model_dir = os.path.join(output_dirname, "models") B.check_exists_create_if_not(output_dirname) B.check_exists_create_if_not(model_dir) return output_dirname
if __name__ == "__main__": args = parse_input().parse_args() if not os.path.isdir(args.frame_dir): raise IOError("Error: path {} is not a directory".format( args.frame_dir)) if args.xdim <= 0: raise ValueError("Error: xdim should be > 0, got {}".format(args.xdim)) if args.ydim <= 0: raise ValueError("Error: ydim should be > 0, got {}".format(args.ydim)) base.check_exists_create_if_not(args.output_dir) for subject in os.listdir(args.frame_dir): trials = [t for t in os.listdir(os.path.join(args.frame_dir, subject))] trial_paths = [ os.path.join(args.frame_dir, subject, t) for t in trials ] for trial, trial_path in zip(trials, trial_paths): if not os.path.isdir(trial_path): continue os.makedirs(os.path.join(args.output_dir, subject, trial), exist_ok=True)
type=str) return parser if __name__ == "__main__": args = parse_input().parse_args() selects = args.selects metadata = args.data_csv movie_dir = args.movie_directory output_directory = args.output_directory # validate input base.check_exists_create_if_not(output_directory) if not os.path.exists(selects): raise FileNotFoundError("[selects] -- %s not found" % selects) if not os.path.exists(metadata): raise FileNotFoundError("[data_csv] -- %s not found" % metadata) if not os.path.isdir(movie_dir): raise FileNotFoundError("[movie_dir] -- %s not found" % movie_dir) # we're going to assume the csvs specified are properly formatted and the columns are correct selects_df = pd.read_csv(selects) metadf = pd.read_csv(metadata)
def video_file_to_frames(filename, output_dir=None, suppress=False, clip=2): """ Convert a video file to individual frames args: --> filename : video file to convert --> output_dir (optional): the desired output directory for the frames --> suppress : boolean to suppress messages or not returns: --> list of image filenames some facts: ---------- 1) This procedure will save the png images in an output directory called '$(filename_dir)/$(filename_frames)' if no directory is specified. 2) if an output directory IS specified but not found, this procedure will create the output directory, with a subdir called %filename_frames 3) Stack overflow source: [https://stackoverflow.com/questions/33311153/python-extracting-and-saving-video-frames] """ # check the video's existence vid_valid, err, no_ext = video_file_exists(filename) #print("vid : {}".format(vid_valid)) if vid_valid: if output_dir: output_dir = os.path.join(output_dir, "%s_frames" % no_ext) check_exists_create_if_not(output_dir, suppress=suppress) else: output_dir = "%s_frames" % no_ext check_exists_create_if_not(output_dir, suppress=suppress) # have output directory, now need to create the framesies vidcap = cv2.VideoCapture(filename) FPS = int(round(vidcap.get(cv2.CAP_PROP_FPS))) total = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT)) success, image = vidcap.read() image_names = [] count = 0 success = True if not suppress: print( "[video_file_to_frames]-- extracting frames from %d fps video with %d frames" % (FPS, total)) # while there is a next image while success: pth = "" if count >= FPS * clip and count < total - (FPS * clip): if FPS == 60: if count % 2 == 0: pth = os.path.join( output_dir, "frame-%05d.png" % ((count - FPS * clip) / 2)) image_names.append(pth) cv2.imwrite(pth, image) else: pth = os.path.join(output_dir, "frame-%05d.png" % (count - FPS * clip)) image_names.append(pth) cv2.imwrite(pth, image) success, image = vidcap.read() if not suppress and pth != "": if success: sys.stdout.write( "\r[video_file_to_frames]-- writing [%s]" % pth) sys.stdout.flush() else: sys.stdout.write("\n") sys.stdout.flush() count += 1 if not suppress: print( "\n[video_file_to_frames]-- clipped [%d] seconds off of each end of video" % clip) #clip off two seconds of the video #i = 0 #size = len(image_names) #for video_frame in os.listdir(output_dir): # if i < clip * FPS: # pth = os.path.join(output_dir, video_frame) # os.remove(pth) # elif i > size - (clip * FPS): # pth = os.path.join(output_dir, video_frame) # os.remove(pth) # elif FPS == 60 and i % 2 == 1: # pth = os.path.join(output_dir, video_frame) # os.remove(pth) # i+=1 return image_names # a problem occurred else: raise ValueError(err)
heart_rate, resp_rate = 0, 0 if trial == 1: heart_rate, resp_rate = float(subj_row[1]), float( subj_row[2]) elif trial == 2: heart_rate, resp_rate = float(subj_row[3]), float( subj_row[4]) subj_origin = os.path.join(data_dir, "S%04d" % subj, "Trial%d.MOV" % trial) assert os.path.exists(subj_origin) new_subj = "%d%02d" % (trial, subj) new_subj_path = "S0%s" % new_subj base.check_exists_create_if_not( os.path.join(data_dir, new_subj_path)) subj_target_fast = os.path.join(data_dir, new_subj_path, "Trial1.MOV") subj_target_slow = os.path.join(data_dir, new_subj_path, "Trial2.MOV") """ s hr1 rr1 hr2 rr2 """ """ | | | | | """ row = ["", "", "", "", ""] row[0] = new_subj if heart_rate * 2 <= UPPER_THRESHOLD: row[1] = heart_rate * 2 row[2] = (heart_rate * 2) / 4 selected_df.loc[len(selected_df)] = [new_subj, 1]
if args.vertical_flip: # with probability 0.5, flip vertical axis coin_flip = np.random.random_sample() > 0.5 sequence = fp.sequence_flip_axis(sequence, 0) # flip on the row axis if args.horizontal_flip: # with probability 0.5, flip horizontal axis (cols) coin_flip - np.random.random_sample() > 0.5 sequence = fp.sequence_flip_axis(sequence, 1) # flip on the column axis final = "_warped/" base.check_exists_create_if_not(final) sequence = [img * 255. for img in sequence] for i, img in enumerate(sequence): pth = os.path.join(final, "frame%04d.png" % i) cv2.imwrite(pth, img) gif_name = args.movie_file[:-4] + "_warped.gif" imageio.mimsave(gif_name, sequence) print("cleaning ...") os.system("rm -r _frames/") os.system("rm -r _rsz/") print("wrote a .gif : %s" % gif_name)
def train_test_split_with_csv_support(regular_data_path, filtered_csv, consolidated_csv, dir_out, augmented_data_path=None, ignore_augmented=[], test_split=0.2, val_split=0.1, train_csv_out="train.csv", test_csv_out="test.csv", val_csv_out="val.csv", verbose=True): """ EXTREMELY DEPRECATED (do not use this, use create_train_test_split_dataframes) Split all available data into train, test, and validation sets. This splitting method does not allow the same trial of the same subject to appear in more than one set, e.g., if s1 t1 appears in the training set, then any partition of s1 t1 cannot appear in testing or validation. If the path for speed augmented data is provided, then whenever any subject and trial is added to a set, its associated augmented partition will also be added. Three csv files will be created upon calling this method. These csv files record the data delegated to each of the training, testing, and validation sets, so that they may be loaded in the future. Example usage: train, test, split = train_test_split_with_csv_support('reg_consolidated', 'NextStartingPoint.csv', 'reg_part_out.csv', 'test3', augmented_data_path='aug_consolidated') args: regular_data_path : path of the directory containing all of the data filtered_csv : path to the csv containing the subjects and trials that will be used in the data split consolidated_csv : path to the csv containing the path, heart rate, and respiratory rate of each data point dir_out : directory that the csv files will be written to (optional) augmented_data_path : path to the directory containin the augmented data (optional) test_split : percentage of data to be assigned to the test set (optional) val_split : percentage of data to be assigned to the validation set (optional) verbose : boolean which controls whether or not the program will log each action taken returns: -> a map containing full path keys of data to heart rate, respiratory rate pairs """ if not os.path.exists(regular_data_path): raise FileNotFoundError("Data path {} does not exist".format(regular_data_path)) if not os.path.isdir(regular_data_path): raise ValueError("Data path {} is not a directory".format(regular_data_path)) if augmented_data_path != None and not os.path.exists(augmented_data_path): raise FileNotFoundError("Augmented data path {} does not exist".format(augmented_data_path)) if augmented_data_path != None and not os.path.isdir(augmented_data_path): raise ValueError("Augmented data path {} is not a directory".format(augmented_data_path)) if not os.path.exists(filtered_csv): raise FileNotFoundError("Chosen subjects csv path {} does not exist".format(filtered_csv)) if not os.path.exists(consolidated_csv): raise FileNotFoundError("Consolidated csv path {} does not exist".format(consolidated_csv)) if verbose: if augmented_data_path == None: print('[train_test_split_with_csv_support]: splitting data without augmented data') else: print('[train_test_split_with_csv_support]: splitting data with augmented data directory -> {}'.format(augmented_data_path)) trial1, trial2 = util.all_subjects(filtered_csv) testing_set = util.split_subjects(trial1, trial2, test_split) validation_set = util.split_subjects(trial1, trial2, val_split) trial1.extend(trial2) training_set = trial1 if verbose: print(util.set_to_str("----Training Set----", training_set)) print(util.set_to_str("----Validation Set----", validation_set)) print(util.set_to_str("----Testing Set----", testing_set)) all_paths = util.fetch_paths_with_labels(consolidated_csv, regular_data_path) filtered_training_paths = util.filter_path_with_set(training_set, all_paths, augmented_data_path, verbose) filtered_testing_paths = util.filter_path_with_set(testing_set, all_paths, augmented_data_path, verbose) filtered_validation_paths = util.filter_path_with_set(validation_set, all_paths, augmented_data_path, verbose) base.check_exists_create_if_not(dir_out, not verbose) data_set_to_csv(filtered_training_paths, os.path.join(dir_out, train_csv_out), verbose=verbose) data_set_to_csv(filtered_testing_paths, os.path.join(dir_out, test_csv_out), verbose=verbose) data_set_to_csv(filtered_validation_paths, os.path.join(dir_out, val_csv_out), verbose=verbose) if "train" in ignore_augmented: filtered_training_paths = {path : filtered_training_paths[path] for path in filtered_training_paths if augmented_data_path not in path} if "test" in ignore_augmented: filtered_testing_paths = {path : filtered_testing_paths[path] for path in filtered_testing_paths if augmented_data_path not in path} if "validation" in ignore_augmented: filtered_validation_paths = {path : filtered_validation_paths[path] for path in filtered_validation_paths if augmented_data_path not in path} return filtered_training_paths, filtered_testing_paths, filtered_validation_paths
pred, loss = e.run() observed = e.test_set[['HEART_RATE_BPM', 'RESP_RATE_BR_PM']].values.tolist() print('obs: ', observed) print('pred: ', pred) pred = pred.reshape((int(pred.shape[0] / 2), 4)) pred_avg = list( map(lambda row: [np.mean([row[0], row[2]]), np.mean([row[1], row[3]])], pred)) #print(pred_avg) pred_df = pd.DataFrame(pred_avg, columns=['HEART_RATE', 'RESP_RATE']) print(pred_df) figsdir = check_exists_create_if_not( os.path.join('run_history', relpath, 'figs')) # heart rate x, y hrate_actual = e.test_set['HEART_RATE_BPM'].values.tolist() hrate_pred = pred_df['HEART_RATE'].values.tolist() r, p = pearsonr(hrate_actual, hrate_pred) plt.scatter(hrate_actual, hrate_pred, label='$R=%0.4f$' % r) plt.xlabel('actual heart rate') plt.ylabel('predicted heart rate') plt.savefig(os.path.join(figsdir, 'hrate_corr.png')) plt.clf() # resp rate x, y resprate_actual = e.test_set['RESP_RATE_BR_PM'].values.tolist() resprate_pred = pred_df['RESP_RATE'].values.tolist() r, p = pearsonr(resprate_actual, resprate_pred)