Пример #1
0
def ttswcsv(data_path, metadata, output_dir,
             test_split=0.2, val_split=0.2, verbose=True):

    """
    Train test split with CSV support version 3.
    Currently no support for ignoring augmented data!

    This function is similar to the previous variants, except that it creates and returns data frames,
    instead of directly working with CSV files.
    This version works with "buckets", which each data point belongs to. This is to ensure that all data
    has roughly the same chance of being seen.
    """
    metadf = pd.read_csv(metadata)
    
    #metadf['FRAME_PTH'] = metadf.apply (lambda row: os.path.join(data_path, "S%04d" % row["SUBJECT"], 
    #    "Trial%d_frames" % row["TRIAL"]), axis=1)
    
    metadf = metadf[metadf.GOOD == 1]

    base.check_exists_create_if_not(output_dir)    
    
    test_len = int(test_split * len(metadf))
    val_len = int(val_split * len(metadf))
    metadf, test_df = util.get_random_test_set(metadf, test_len)
    metadf, val_df = util.get_random_test_set(metadf, val_len) 

    test_df.to_csv(os.path.join(output_dir, 'test.csv'), index=False)
    val_df.to_csv(os.path.join(output_dir, 'val.csv'), index=False)
    metadf.to_csv(os.path.join(output_dir, 'train.csv'), index=False)

    print('Test DF\n', list(test_df['HEART_RATE_BPM']))
    print('Val DF\n', list(val_df['HEART_RATE_BPM']))
    print('Train DF\n', list(metadf['HEART_RATE_BPM']))

    return metadf, test_df, val_df
Пример #2
0
def resize_frame_dir(frame_dir, output_dir, width=224, height=224):
    """
    Copy and resize frames in given directory.
    """
    if not os.path.exists(frame_dir):
        raise FileNotFoundError(
            "Error: path {} does not exists".format(frame_dir))
    if not os.path.isdir(frame_dir):
        raise IOError("Error: path {} is not a directory".format(frame_dir))

    check_exists_create_if_not(output_dir, suppress=True)

    print("[resize_frame_dir]: RESIZING {} -> {}".format(
        frame_dir, output_dir))
    listed_directory = os.listdir(frame_dir)
    num_partitions = len(listed_directory)
    completed_partitions = 0

    for frame in os.listdir(frame_dir):
        current_frame_dir = os.path.join(frame_dir, frame)
        img = Image.open(current_frame_dir)
        img = img.resize((width, height), Image.ANTIALIAS)
        output_path = os.path.join(output_dir, frame)
        img.save(output_path)
        completed_partitions += 1
        progressBar(completed_partitions, num_partitions)
    print()
Пример #3
0
def create_train_test_split_dataframes(data_path, metadata, output_dir,
                                       test_split=0.2, val_split=0.15, verbose=True):
    """
    Description coming soon!
    """

    metadf = pd.read_csv(metadata)
    metadf['Path'] = metadf.apply(lambda row: os.path.join(data_path, "S" + str(row["Subject"]).zfill(4), 
                                                           "Trial%d_frames" % row["Trial"]), axis=1)
    
    real_subjects_df = metadf[metadf['Subject'].apply(lambda x: x.isdigit())]
    
    real_subs = list(zip(real_subjects_df['Subject'], real_subjects_df['Trial']))
    num_test, num_val = int(len(real_subjects_df) * test_split), int(len(real_subjects_df) * val_split)
    
    test_subs = __choose_rand_test_set(real_subs, num_test)
    val_subs = __choose_rand_test_set(real_subs, num_val)
    train_subs = real_subs

    test_df    = __dataframe_from_subject_info(metadf, test_subs)
    val_df     = __dataframe_from_subject_info(metadf, val_subs)
    train_df   = __dataframe_from_subject_info(metadf, train_subs)
    assert len(test_df) + len(val_df) + len(train_df) == len(metadf)
    base.check_exists_create_if_not(output_dir)
    test_df.to_csv(os.path.join(output_dir, 'test.csv'), index=False)
    val_df.to_csv(os.path.join(output_dir, 'val.csv'), index=False)
    train_df.to_csv(os.path.join(output_dir, 'train.csv'), index=False)

    return train_df, test_df, val_df
def ttswcvs3(data_path, metadata, output_dir,
             test_split=0.2, val_split=0.2, verbose=True):

    """
    DEPRECATED (use create_train_test_split_dataframes)

    Train test split with CSV support version 3.
    Currently no support for ignoring augmented data!

    This function is similar to the previous variants, except that it creates and returns data frames,
    instead of directly working with CSV files.
    This version works with "buckets", which each data point belongs to. This is to ensure that all data
    has roughly the same chance of being seen.
    """
    metadf = pd.read_csv(metadata)
    
    metadf['Path'] = metadf.apply (lambda row: os.path.join(data_path, "S%04d" % row["Subject"], 
        "Trial%d_frames" % row["Trial"]), axis=1)
        
    base.check_exists_create_if_not(output_dir)    
    
    test_len = int(test_split * len(metadf))
    val_len = int(val_split * len(metadf))
    metadf, test_df = util.get_testing_set(metadf, test_len)
    metadf, val_df = util.get_testing_set(metadf, val_len) 

    test_df.to_csv(os.path.join(output_dir, 'test.csv'), index=False)
    val_df.to_csv(os.path.join(output_dir, 'val.csv'), index=False)
    metadf.to_csv(os.path.join(output_dir, 'train.csv'), index=False)
    return metadf, test_df, val_df
Пример #5
0
def video_dir_to_frame_dir(video_dir, output_dir, suppress=False):
    """
    create a directory that contains subdirectories
    that contain all of the frames of the movies contained
    within the video_dir

    args:
        video_dir : directory with videos
        output_dir : location to place the output frames
        suppress (optional) : display output 
    returns:
        imgs_captured : a list of image framenames from all 
                        videos in video dir
    """

    # path exists?
    if os.path.exists(video_dir):

        # path is a dir?
        if os.path.isdir(video_dir):
            contents = os.listdir(video_dir)
            movies = [
                os.path.join(video_dir, cont) for cont in contents
                if cont.lower().endswith(".mov")
            ]

            # path contains .MOV or .mov files ??
            if len(movies) > 0:

                imgs_captured = []

                check_exists_create_if_not(output_dir, suppress=suppress)

                for mov in movies:
                    framenames = video_file_to_frames(mov,
                                                      output_dir=output_dir,
                                                      suppress=suppress)
                    imgs_captured.extend(framenames)

                return imgs_captured

            else:
                raise FileNotFoundError("%s contains no video files" %
                                        video_dir)

        else:
            raise ValueError("%s isn't a directory" % video_dir)

    else:
        raise FileNotFoundError("%s not found" % video_dir)
Пример #6
0
def move_frames(source_dir, partitioned_frames, output_dir):
    """
    Helper method to move a selection of frames from the source directory
    into a new output partition directory.

    args:
        source_dir : the origin directory
        partitioned_frames : a list of frames that are being moved out of source directory
        output_dir : the directory where selected frames are being placed
    """
    check_exists_create_if_not(output_dir, suppress=True)
    current_index = 0
    for frame in partitioned_frames:
        frame_path = os.path.join(source_dir, frame)
        source_path = os.path.join(output_dir,
                                   "frame" + str(current_index) + ".png")
        os.rename(frame_path, source_path)
        current_index += 1
Пример #7
0
def generate_output_directory(output_dirname):
    """
    create an output directory that contains looks like this:
    
    output_dirname/
        models/
    
    args:
        output_dirname : string - the ouput directory name
    
    returns:
        output_dir : string - name of output_directory
    """

    model_dir = os.path.join(output_dirname, "models")

    B.check_exists_create_if_not(output_dirname)
    B.check_exists_create_if_not(model_dir)

    return output_dirname
Пример #8
0

if __name__ == "__main__":
    args = parse_input().parse_args()

    if not os.path.isdir(args.frame_dir):
        raise IOError("Error: path {} is not a directory".format(
            args.frame_dir))

    if args.xdim <= 0:
        raise ValueError("Error: xdim should be > 0, got {}".format(args.xdim))

    if args.ydim <= 0:
        raise ValueError("Error: ydim should be > 0, got {}".format(args.ydim))

    base.check_exists_create_if_not(args.output_dir)

    for subject in os.listdir(args.frame_dir):

        trials = [t for t in os.listdir(os.path.join(args.frame_dir, subject))]

        trial_paths = [
            os.path.join(args.frame_dir, subject, t) for t in trials
        ]

        for trial, trial_path in zip(trials, trial_paths):
            if not os.path.isdir(trial_path):
                continue

            os.makedirs(os.path.join(args.output_dir, subject, trial),
                        exist_ok=True)
Пример #9
0
                        type=str)

    return parser


if __name__ == "__main__":
    
    args = parse_input().parse_args()
    
    selects = args.selects
    metadata = args.data_csv
    movie_dir = args.movie_directory
    output_directory = args.output_directory
    
    # validate input
    base.check_exists_create_if_not(output_directory)
    
    if not os.path.exists(selects):
        raise FileNotFoundError("[selects] -- %s not found" % selects)
    
    if not os.path.exists(metadata):
        raise FileNotFoundError("[data_csv] -- %s not found" % metadata)

    if not os.path.isdir(movie_dir):
        raise FileNotFoundError("[movie_dir] -- %s not found" % movie_dir)
    
    #  we're going to assume the csvs specified are properly formatted and the columns are correct
    
    selects_df = pd.read_csv(selects)
    metadf = pd.read_csv(metadata)
Пример #10
0
def video_file_to_frames(filename, output_dir=None, suppress=False, clip=2):
    """
    Convert a video file to individual frames

    args:
        --> filename : video file to convert
        --> output_dir (optional): the desired output directory
                               for the frames

        --> suppress : boolean to suppress messages or not
    returns:
        --> list of image filenames
    some facts:
    ----------
        1) This procedure will save the png images in an output directory
           called '$(filename_dir)/$(filename_frames)' if no directory is specified.

        2) if an output directory IS specified but not found, this procedure
           will create the output directory, with a subdir called %filename_frames

        3) Stack overflow source:
        [https://stackoverflow.com/questions/33311153/python-extracting-and-saving-video-frames]
    """
    # check the video's existence
    vid_valid, err, no_ext = video_file_exists(filename)

    #print("vid : {}".format(vid_valid))

    if vid_valid:
        if output_dir:
            output_dir = os.path.join(output_dir, "%s_frames" % no_ext)
            check_exists_create_if_not(output_dir, suppress=suppress)

        else:
            output_dir = "%s_frames" % no_ext
            check_exists_create_if_not(output_dir, suppress=suppress)

        # have output directory, now need to create the framesies
        vidcap = cv2.VideoCapture(filename)
        FPS = int(round(vidcap.get(cv2.CAP_PROP_FPS)))
        total = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))

        success, image = vidcap.read()

        image_names = []
        count = 0
        success = True

        if not suppress:
            print(
                "[video_file_to_frames]-- extracting frames from %d fps video with %d frames"
                % (FPS, total))

        # while there is a next image
        while success:
            pth = ""
            if count >= FPS * clip and count < total - (FPS * clip):
                if FPS == 60:
                    if count % 2 == 0:
                        pth = os.path.join(
                            output_dir,
                            "frame-%05d.png" % ((count - FPS * clip) / 2))
                        image_names.append(pth)
                        cv2.imwrite(pth, image)
                else:
                    pth = os.path.join(output_dir,
                                       "frame-%05d.png" % (count - FPS * clip))
                    image_names.append(pth)
                    cv2.imwrite(pth, image)

            success, image = vidcap.read()

            if not suppress and pth != "":
                if success:
                    sys.stdout.write(
                        "\r[video_file_to_frames]-- writing [%s]" % pth)
                    sys.stdout.flush()

                else:
                    sys.stdout.write("\n")
                    sys.stdout.flush()

            count += 1

        if not suppress:
            print(
                "\n[video_file_to_frames]-- clipped [%d] seconds off of each end of video"
                % clip)
        #clip off two seconds of the video

        #i = 0
        #size = len(image_names)
        #for video_frame in os.listdir(output_dir):
        #    if i < clip * FPS:
        #        pth = os.path.join(output_dir, video_frame)
        #        os.remove(pth)
        #    elif i > size - (clip * FPS):
        #        pth = os.path.join(output_dir, video_frame)
        #        os.remove(pth)
        #    elif FPS == 60 and i % 2 == 1:
        #        pth = os.path.join(output_dir, video_frame)
        #        os.remove(pth)
        #    i+=1
        return image_names

    # a problem occurred
    else:
        raise ValueError(err)
Пример #11
0
                heart_rate, resp_rate = 0, 0
                if trial == 1:
                    heart_rate, resp_rate = float(subj_row[1]), float(
                        subj_row[2])
                elif trial == 2:
                    heart_rate, resp_rate = float(subj_row[3]), float(
                        subj_row[4])

                subj_origin = os.path.join(data_dir, "S%04d" % subj,
                                           "Trial%d.MOV" % trial)
                assert os.path.exists(subj_origin)

                new_subj = "%d%02d" % (trial, subj)
                new_subj_path = "S0%s" % new_subj

                base.check_exists_create_if_not(
                    os.path.join(data_dir, new_subj_path))

                subj_target_fast = os.path.join(data_dir, new_subj_path,
                                                "Trial1.MOV")
                subj_target_slow = os.path.join(data_dir, new_subj_path,
                                                "Trial2.MOV")
                """     s   hr1  rr1  hr2  rr2   """
                """     |    |    |    |    |    """
                row = ["", "", "", "", ""]

                row[0] = new_subj

                if heart_rate * 2 <= UPPER_THRESHOLD:
                    row[1] = heart_rate * 2
                    row[2] = (heart_rate * 2) / 4
                    selected_df.loc[len(selected_df)] = [new_subj, 1]
Пример #12
0
    if args.vertical_flip:
        # with probability 0.5, flip vertical axis
        coin_flip = np.random.random_sample() > 0.5

        sequence = fp.sequence_flip_axis(sequence, 0)  # flip on the row axis

    if args.horizontal_flip:
        # with probability 0.5, flip horizontal axis (cols)
        coin_flip - np.random.random_sample() > 0.5

        sequence = fp.sequence_flip_axis(sequence,
                                         1)  # flip on the column axis

    final = "_warped/"
    base.check_exists_create_if_not(final)
    sequence = [img * 255. for img in sequence]

    for i, img in enumerate(sequence):
        pth = os.path.join(final, "frame%04d.png" % i)
        cv2.imwrite(pth, img)

    gif_name = args.movie_file[:-4] + "_warped.gif"
    imageio.mimsave(gif_name, sequence)

    print("cleaning ...")
    os.system("rm -r _frames/")
    os.system("rm -r _rsz/")

    print("wrote a .gif : %s" % gif_name)
Пример #13
0
def train_test_split_with_csv_support(regular_data_path, filtered_csv, consolidated_csv, dir_out, 
        augmented_data_path=None, ignore_augmented=[], test_split=0.2, val_split=0.1, train_csv_out="train.csv", 
        test_csv_out="test.csv", val_csv_out="val.csv", verbose=True): 
    """
    EXTREMELY DEPRECATED (do not use this, use create_train_test_split_dataframes)

    Split all available data into train, test, and validation sets.
    This splitting method does not allow the same trial of the same subject to appear in more than one set,
    e.g., if s1 t1 appears in the training set, then any partition of s1 t1 cannot appear in
    testing or validation. If the path for speed augmented data is provided, then whenever any subject
    and trial is added to a set, its associated augmented partition will also be added.

    Three csv files will be created upon calling this method. These csv files record the data
    delegated to each of the training, testing, and validation sets, so that they may be loaded in the future.
    
    Example usage:
    train, test, split = train_test_split_with_csv_support('reg_consolidated', 'NextStartingPoint.csv', 
    'reg_part_out.csv', 'test3', augmented_data_path='aug_consolidated')
    
    args:
        regular_data_path : path of the directory containing all of the data
        filtered_csv : path to the csv containing the subjects and trials that will be used in the data split
        consolidated_csv : path to the csv containing the path, heart rate, and respiratory rate of each data point
        dir_out : directory that the csv files will be written to
        (optional) augmented_data_path : path to the directory containin the augmented data
        (optional) test_split : percentage of data to be assigned to the test set
        (optional) val_split : percentage of data to be assigned to the validation set
        (optional) verbose : boolean which controls whether or not the program will log each action taken 
    returns:
        -> a map containing full path keys of data to heart rate, respiratory rate pairs
    """

    if not os.path.exists(regular_data_path):
        raise FileNotFoundError("Data path {} does not exist".format(regular_data_path))
    if not os.path.isdir(regular_data_path):
        raise ValueError("Data path {} is not a directory".format(regular_data_path))
    if augmented_data_path != None and not os.path.exists(augmented_data_path):
        raise FileNotFoundError("Augmented data path {} does not exist".format(augmented_data_path))
    if augmented_data_path != None and not os.path.isdir(augmented_data_path):
        raise ValueError("Augmented data path {} is not a directory".format(augmented_data_path))    
    if not os.path.exists(filtered_csv):  
        raise FileNotFoundError("Chosen subjects csv path {} does not exist".format(filtered_csv))        
    if not os.path.exists(consolidated_csv):  
        raise FileNotFoundError("Consolidated csv path {} does not exist".format(consolidated_csv)) 
    
    if verbose:
        if augmented_data_path == None:
            print('[train_test_split_with_csv_support]: splitting data without augmented data')
        else:
            print('[train_test_split_with_csv_support]: splitting data with augmented data directory -> {}'.format(augmented_data_path))

    trial1, trial2 = util.all_subjects(filtered_csv)
    testing_set = util.split_subjects(trial1, trial2, test_split)
    validation_set = util.split_subjects(trial1, trial2, val_split)
    
    trial1.extend(trial2)
    training_set = trial1 
    
    if verbose:
        print(util.set_to_str("----Training Set----", training_set))        
        print(util.set_to_str("----Validation Set----", validation_set))         
        print(util.set_to_str("----Testing Set----", testing_set))         
    
    all_paths = util.fetch_paths_with_labels(consolidated_csv, regular_data_path)

    filtered_training_paths = util.filter_path_with_set(training_set, all_paths, augmented_data_path, verbose)
    filtered_testing_paths = util.filter_path_with_set(testing_set, all_paths, augmented_data_path, verbose)
    filtered_validation_paths = util.filter_path_with_set(validation_set, all_paths, augmented_data_path, verbose)
   
    base.check_exists_create_if_not(dir_out, not verbose)

    data_set_to_csv(filtered_training_paths, os.path.join(dir_out, train_csv_out), verbose=verbose) 
    data_set_to_csv(filtered_testing_paths, os.path.join(dir_out, test_csv_out), verbose=verbose)
    data_set_to_csv(filtered_validation_paths, os.path.join(dir_out, val_csv_out), verbose=verbose)
    
    if "train" in ignore_augmented:
        filtered_training_paths = {path : filtered_training_paths[path] for path in filtered_training_paths if augmented_data_path not in path}
    if "test" in ignore_augmented:
        filtered_testing_paths = {path : filtered_testing_paths[path] for path in filtered_testing_paths if augmented_data_path not in path}
    if "validation" in ignore_augmented: 
        filtered_validation_paths = {path : filtered_validation_paths[path] for path in filtered_validation_paths if augmented_data_path not in path}

    return filtered_training_paths, filtered_testing_paths, filtered_validation_paths
Пример #14
0
    pred, loss = e.run()
    observed = e.test_set[['HEART_RATE_BPM',
                           'RESP_RATE_BR_PM']].values.tolist()
    print('obs: ', observed)
    print('pred: ', pred)

    pred = pred.reshape((int(pred.shape[0] / 2), 4))
    pred_avg = list(
        map(lambda row: [np.mean([row[0], row[2]]),
                         np.mean([row[1], row[3]])], pred))
    #print(pred_avg)
    pred_df = pd.DataFrame(pred_avg, columns=['HEART_RATE', 'RESP_RATE'])
    print(pred_df)

    figsdir = check_exists_create_if_not(
        os.path.join('run_history', relpath, 'figs'))

    # heart rate x, y
    hrate_actual = e.test_set['HEART_RATE_BPM'].values.tolist()
    hrate_pred = pred_df['HEART_RATE'].values.tolist()
    r, p = pearsonr(hrate_actual, hrate_pred)
    plt.scatter(hrate_actual, hrate_pred, label='$R=%0.4f$' % r)
    plt.xlabel('actual heart rate')
    plt.ylabel('predicted heart rate')
    plt.savefig(os.path.join(figsdir, 'hrate_corr.png'))
    plt.clf()

    # resp rate x, y
    resprate_actual = e.test_set['RESP_RATE_BR_PM'].values.tolist()
    resprate_pred = pred_df['RESP_RATE'].values.tolist()
    r, p = pearsonr(resprate_actual, resprate_pred)