def segment_inflections():
    """

    """

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')
    segment_list = retrieve_ref('segment_list')
    searchRange = retrieve_ref('searchRange')

    for study in study_list:

        format_type = 'clean'
        clean_path = os.path.join(study, 'formatted', format_type)
        recordNames = os.listdir(clean_path)

        for sensor in sensor_list:

            for record in recordNames:

                for range in searchRange:

                    for segment in segment_list:

                        if segment == 'All':
                            continue

                        segmentRef = 'All'
                        path = [
                            study, 'analyzed', 'inflections', 'all_times',
                            str(range), record, segmentRef
                        ]
                        path = build_path(path)
                        file = os.path.join(path, sensor + ".csv")

                        if os.path.isfile(file):

                            df = pd.read_csv(file)

                            for colName in df.columns:
                                if 'Unnamed' in str(colName):
                                    del df[colName]

                            df = segment_df(segment, df)
                            path = [
                                study, 'analyzed', 'inflections', 'all_times',
                                str(range), record, segmentRef
                            ]
                            path = build_path(path)
                            file = os.path.join(path, sensor + ".csv")

                            df.to_csv(file)
def save_meta(study, df):
    """
    save the metadata to folder
    save a copy to the archive folder in the metadata folder
    """

    print("begin saving metadata")


    # remove unnamed columns created from reading in the csv
    col_names = df.head()
    for name in col_names:
        if 'Unnamed' in name:
            del df[name]

    # metadata_path = os.path.join('studies', study, 'meta')
    metadata_path = build_path(['studies', study, 'meta'])
    metadata_file = os.path.join(metadata_path, 'metadata.csv')
    # print('metadata_file = ' + str(metadata_file))
    df.to_csv(metadata_file)


    # metadata_path = os.path.join('studies', study, 'meta', 'archive')
    metadata_path = build_path(['studies', study, 'meta', 'archive'])

    col_names = list(df.columns)

    # print('col_names ')
    # print(col_names)
    # print('len(col_names) = ' + str(len(col_names)))

    if len(col_names) == 1:
        print('metadata archive deleted. ')

        meta_files_archived = os.listdir(metadata_path)

        for file in meta_files_archived:
            file = os.path.join(metadata_path , file)
            # print('file = ' + str(file))
            os.remove(file)

    if not os.path.isdir(metadata_path): os.mkdir(metadata_path)

    meta_files_archived = os.listdir(metadata_path)
    iteration = int(len(meta_files_archived))+1
    # print('iteration = ' + str(iteration))

    metadata_file = os.path.join(metadata_path, 'metadata' + '_' + str(iteration) + '.csv')
    # print('metadata_file = ' + str(metadata_file))
    df.to_csv(metadata_file)

    print("completed saving metadata")
示例#3
0
def segment_inflections(study, record, sensor, segment, range):
    """

    """

    segmentRef = 'All'
    path = [study, 'analyzed', 'inflections', 'all_times', str(range), record, segmentRef]
    file = os.path.join(*path, sensor + ".csv")

    if not os.path.isfile(file):
        return

    df = pd.read_csv(file)

    for colName in df.columns:

        if 'Unnamed' in str(colName):
            del df[colName]

        df = segment_df(segment, df)

        path = [study, 'analyzed', 'inflections', 'all_times', str(range), record, segmentRef]
        path = build_path(path)
        file = os.path.join(path, sensor + ".csv")
        df.to_csv(file)
        print('segmented inflection file saved - ' + file)
def timestamp_source(study, format_type, segment, record, sensor):
    """
    Input: path to a csv
    Output: list of timestamps
    """

    # read in the source
    source = os.path.join('studies', study, 'source', record, sensor + '.csv')
    df_source = pd.read_csv(source)
    # print('df_source = ')
    # print(df_source)

    df_timestamped = build_timestamps(df_source, sensor)

    path = [
        'studies', study, 'formatted',
        str(format_type),
        str(record),
        str(segment)
    ]
    path = build_path(path)
    file = os.path.join(path, sensor + ".csv")

    # print('timestamped_file = ' + str(timestamped_file))
    df_timestamped.to_csv(file)

    # print('timestamped saved: ' + str(file))

    return (df_timestamped)
示例#5
0
def plot_regression():
    """

    """

    print('plotting regression')

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')
    segment_list = retrieve_ref('segment_list')

    degree_list = retrieve_ref('degree_list')
    degree_list = [int(x) for x in degree_list]

    for study in study_list:

        format_type = 'clean'
        clean_path = os.path.join('studies', study, 'formatted', format_type)
        recordNames = os.listdir(clean_path)

        for sensor in sensor_list:

            for degree in degree_list:

                for record in recordNames:

                    row_num, col_num, plot_num = len(segment_list) + 2, 1, 0
                    row_width_mulp, col_width_mulp = 14, 5
                    plot_width, plot_height = col_num * row_width_mulp, row_num * col_width_mulp
                    plt.figure(figsize=(plot_width, plot_height))

                    for segment in segment_list:
                        plot_num += 1
                        plt.subplot(row_num, col_num, plot_num)
                        complete = plot_regression_segment(
                            study, record, segment, sensor, degree)

                    plot_num += 1
                    plt.subplot(row_num, col_num, plot_num)
                    for segment in segment_list[:-1]:
                        complete = plot_regression_segment(
                            study, record, segment, sensor, degree)
                        plt.title(' ')

                    plot_num += 1
                    plt.subplot(row_num, col_num, plot_num)
                    complete = plot_coefficient_bar(study, record, sensor,
                                                    degree)
                    plt.title(' ')

                    path = [
                        'studies', study, 'plotted', 'regression',
                        str(degree), record
                    ]
                    path = build_path(path)
                    file = os.path.join(path, sensor + ".png")
                    plt.savefig(file, bbox_inches='tight')
                    print('plotted regression for ' + file)
示例#6
0
def format_source():
    """
    define each record
    set the beginning of the record
    set the end of the record
    record the length of the record
    """

    print("begin format_source")

    # timestamp and save the source measurements
    # no truncation
    # save as their recordName

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')

    # check each study
    for study in study_list:

        df_meta = retrieve_meta(study)
        print(df_meta)

        recordNames = list(df_meta['recordName'])

        for record in recordNames:

            i = df_meta[df_meta['recordName'] == record].index.values[0]
            recordSource = df_meta.loc[i, 'source_path']
            recordBegin = df_meta.loc[i, 'recordBegin']
            recordEnd = df_meta.loc[i, 'recordEnd']

            print('i = ' + str(i))
            print('record = ' + str(record))
            print('recordSource = ' + str(recordSource))

            for sensor in sensor_list:

                format_type, segment = 'source', 'All'
                source = os.path.join('studies', study, format_type,
                                      recordSource, sensor + '.csv')
                df_source = pd.read_csv(source)

                df_timestamped = build_timestamps(df_source, sensor)

                # df_timestamped = df_timestamped[df_timestamped['timeUnix'] > recordBegin]
                # df_timestamped = df_timestamped[df_timestamped['timeUnix'] < recordEnd]

                path = [
                    'studies', study, 'formatted', format_type, record, segment
                ]
                path = build_path(path)
                file = os.path.join(path, sensor + ".csv")
                df_timestamped.to_csv(file)
                print('formatted source file = ' + str(file))
def segment_formatted(format_type):
    """
    for each record
    break the record into a PreStudy, Study, and PostStudy period
    save each segment as a separate .csv
    """

    print("begin segment_formatted")

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')
    format_types = retrieve_ref('format_types')

    segment_list = retrieve_ref('segment_list')
    timePreStudy = retrieve_ref('timePreStudy')
    timePostStudy = retrieve_ref('timePostStudy')

    # check each study
    for study in study_list:

        df_meta = retrieve_meta(study)
        recordNames = list(df_meta['recordName'])

        for record in recordNames:

            i = df_meta[df_meta['recordName'] == record].index.values[0]
            print('i = ' + str(i))

            for sensor in sensor_list:

                for segment in segment_list:

                    format_type, segmentRef = 'clean', 'All'
                    source = os.path.join('studies', study, 'formatted',
                                          format_type, record, segmentRef,
                                          sensor + '.csv')
                    df = pd.read_csv(source)

                    df_segmented = segment_df(segment, df)

                    path = [
                        'studies', study, 'formatted', format_type, record,
                        segment
                    ]
                    path = build_path(path)
                    file = os.path.join(path, sensor + ".csv")
                    df_segmented.to_csv(file)
                    print('segmented clean file = ' + str(file))
示例#8
0
def format_truncate():
    """
    define each record
    set the beginning of the record
    set the end of the record
    record the length of the record
    """

    print("begin format_truncate")

    # timestamp and save the source measurements
    # no truncation
    # save as their recordName

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')

    # check each study
    for study in study_list:

        df_meta = retrieve_meta(study)
        recordNames = list(df_meta['recordName'])

        for record in recordNames:

            i = df_meta[ df_meta['recordName']== record].index.values[0]
            recordBegin = df_meta.loc[i, 'recordBegin' ]
            recordEnd = df_meta.loc[i, 'recordEnd' ]
            print('i = ' + str(i))

            for sensor in sensor_list:

                format_type, segment = 'source', 'All'
                source = os.path.join('studies', study, 'formatted', format_type, record, segment, sensor + '.csv')
                df = pd.read_csv(source)

                df = df[df['timeUnix'] > recordBegin]
                df = df[df['timeUnix'] < recordEnd]

                assert len(list(df['timeUnix'])) > 0, 'during format truncate, dataframe empty'

                format_type, segment = 'truncate', 'All'
                path = ['studies', study, 'formatted', format_type, record, segment]
                path = build_path(path)
                file = os.path.join(path, sensor + ".csv")
                df.to_csv(file)
                print('formatted truncated file = ' + str(file))
示例#9
0
def retrieve_regression(study, segment, sensor, degree):
    """

    """

    # print('retrieving regression. ')

    path = ['studies', study, 'analyzed', 'regression', str(degree), segment]
    path = build_path(path)

    file = os.path.join(path, sensor +  '.csv')

    df = pd.read_csv(file)

    colNames = list(df.head())
    for colName in colNames:
        if 'Unnamed' in colName:
            del df[colName]


    colNames = list(df.head())
    if pd.isnull(df.loc[1, colNames[-1]]) is True or pd.isnull(df.loc[1, colNames[-2]]):

        df['coefficients'] = [None]*len(list(df['recordName']))

        for record in list(df['recordName']):

            i = df[df['recordName'] == record].index.values[0]

            for colName in colNames:

                if "record" not in colName:

                    if pd.isnull(df.loc[i, colName]) is False:

                        valueCol = df.loc[i, colName]
                        df.loc[i,'coefficients'] = valueCol

        del df[colNames[-1]]
        del df[colNames[-2]]

    print('retrieve_regression df = ')
    print(df)


    return(df)
def compile_formatted(format_type):
    """
    collapse the information stored as separate csv into a single csv
    to make the information easier to plot in javascript/html
    also to upload less files to github
    """

    print("begin compile_formatted")

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')
    format_types = retrieve_ref('format_types')

    segment_list = retrieve_ref('segment_list')
    timePreStudy = retrieve_ref('timePreStudy')
    timePostStudy = retrieve_ref('timePostStudy')

    # check each study
    for study in study_list:

        df_meta = retrieve_meta(study)
        recordNames = list(df_meta['recordName'])

        for record in recordNames:

            i = df_meta[ df_meta['recordName']== record].index.values[0]
            print('i = ' + str(i))

            for sensor in sensor_list:

                for segment in segment_list:

                    format_type, segmentRef = 'clean', 'All'
                    source = os.path.join('studies', study, 'formatted', format_type, record, segmentRef, sensor + '.csv')
                    df = pd.read_csv(source)

                    df_segmented = segment_df(segment, df)

                    path = ['studies', study, 'formatted', format_type, record, segment]
                    path = build_path(path)
                    file = os.path.join(path, sensor + ".csv")
                    df_segmented.to_csv(file)
                    print('segmented clean file = ' + str(file))
示例#11
0
def clean_save():
    """
    for each record
    break the record into a PreStudy, Study, and PostStudy period
    save each segment as a separate .csv
    """

    print("begin clean_save")

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')
    segment_list = retrieve_ref('segment_list')

    # check each study
    for study in study_list:

        df_meta = retrieve_meta(study)
        recordNames = list(df_meta['recordName'])

        for record in recordNames:

            i = df_meta[ df_meta['recordName']== record].index.values[0]
            print('i = ' + str(i))

            for sensor in sensor_list:

                format_type, segment = 'coregister', 'All'
                source = os.path.join('studies', study, 'formatted', format_type, record, segment, sensor + '.csv')
                df = pd.read_csv(source)

                df = reset_minutes(segment, df)

                for colName in list(df.head()):
                    if 'Unnamed' in colName:
                        del df[colName]

                format_type = 'clean'
                path = ['studies', study, 'formatted', format_type, record, segment]
                path = build_path(path)
                file = os.path.join(path, sensor + ".csv")
                df.to_csv(file)
                print('formatted clean file = ' + str(file))
def segment_inflections(study, record, sensor, segment, range):
    """

    """
    segmentRef = 'All'
    path = [
        study, 'analyzed', 'inflections', 'all_times',
        str(range), record, segmentRef
    ]
    file = os.path.join(*path, sensor + ".csv")

    if not os.path.isfile(file):
        return

    df = pd.read_csv(file)
    df_segmented = segment_df(segment, df)

    path = [
        study, 'analyzed', 'inflections', 'all_times',
        str(range), record, segment
    ]
    path = build_path(path)
    file = os.path.join(path, sensor + ".csv")
    df_segmented.to_csv(file)
示例#13
0
def plot_source(study):
    """

    """

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')
    segment_list = retrieve_ref('segment_list')

    format_type = 'source'
    source_path = os.path.join('studies', study, 'formatted', format_type)
    format_folders = os.listdir(source_path)

    for record in format_folders:

        row_num, col_num, plot_num = len(sensor_list), 1, 0
        row_width_mulp, col_width_mulp = 14, 5
        plot_width, plot_height = col_num * row_width_mulp, row_num * col_width_mulp
        plt.figure(figsize=(plot_width, plot_height))

        for sensor in sensor_list:

            plot_num += 1
            plt.subplot(row_num, col_num, plot_num)

            for segment in segment_list:

                format_types = ['source', 'truncate']

                for format_type in format_types:

                    source = os.path.join('studies', study, 'formatted',
                                          format_type, record, segment,
                                          sensor + '.csv')

                    if os.path.isfile(source):

                        print('source = ' + source)

                        df = pd.read_csv(source)

                        colNames = list(df.head())
                        print('colNames = ')
                        print(colNames)

                        for colName in colNames:

                            if str('measurement') in str(colName):

                                colNameSplit = colName.split('_')
                                labelName = format_type
                                print('labelName = ' + labelName)
                                valueColor = retrieve_ref_color(labelName)

                                plt.scatter(df['timeUnix'],
                                            df[colName],
                                            color=valueColor,
                                            label=labelName)
                                plt.xlabel('time Unix')

                                sensor_unit = retrieve_sensor_unit(sensor)
                                plt.ylabel(sensor + ' ' + sensor_unit)
                                plt.legend(bbox_to_anchor=(1, 0.5, 0.3, 0.2),
                                           loc='upper left')

        path = ['studies', study, 'plotted', format_type, record]
        path = build_path(path)
        file = os.path.join(path, sensor + ".png")
        plt.savefig(file, bbox_inches='tight')
示例#14
0
def plot_coregister(study):
    """

    """

    segment = 'All'

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')
    segment_list = retrieve_ref('segment_list')
    segment_list.reverse()

    format_type = 'clean'
    source_path = os.path.join('studies', study, 'formatted', format_type)
    format_folders = os.listdir(source_path)

    format_types = ['clean']

    for record in format_folders:

        row_num, col_num, plot_num = len(sensor_list), 1, 0
        row_width_mulp, col_width_mulp = 14, 5
        plot_width, plot_height = col_num * row_width_mulp, row_num * col_width_mulp
        plt.figure(figsize=(plot_width, plot_height))

        for sensor in sensor_list:

            plot_num += 1
            plt.subplot(row_num, col_num, plot_num)

            for format_type in format_types:

                for segment in segment_list:

                    source = os.path.join('studies', study, 'formatted',
                                          format_type, record, segment,
                                          sensor + '.csv')

                    if os.path.isfile(source):

                        print('source = ' + source)

                        df = pd.read_csv(source)

                        colNames = list(df.head())
                        print('colNames = ')
                        print(colNames)

                        for colName in colNames:

                            if str('measurement') in str(colName):

                                colNameSplit = colName.split('_')
                                labelName = str(format_type + ' ' +
                                                colNameSplit[0])
                                print('labelName = ' + labelName)
                                index_col = df.columns.get_loc(colName)
                                wearable_num = len(colNames) - index_col
                                print('wearable_num = ' + str(wearable_num))
                                colorWearableSegment = retrieve_ref_color_wearable_segment(
                                    wearable_num, segment)

                                plt.scatter(df['timeMinutes'],
                                            df[colName],
                                            color=colorWearableSegment,
                                            label=labelName)

            plt.xlabel('Record Time (minutes)')
            sensor_unit = retrieve_sensor_unit(sensor)
            plt.ylabel(sensor + ' ' + sensor_unit)
            plt.legend(bbox_to_anchor=(1, 0.5, 0.3, 0.2), loc='upper left')

        path = ['studies', study, 'plotted', 'clean', record]
        path = build_path(path)
        file = os.path.join(path, sensor + ".png")
        plt.savefig(file, bbox_inches='tight')
示例#15
0
def find_inflections(study, record, sensor, segment, range):
    """
    Break each set of measurements into a subset - a range of ~30-120 seconds
    Use polyfit to find the best fit second order polynomial
    Find the inflection point of the best fit polynomial
    If the polyfit inflection point is very close to the median time point in the record
    An inflection is found
    """

    # check if the inflections have already been found
    path = [study, 'analyzed', 'inflections', 'all_times', str(range), record, segment]
    pathJoined = os.path.join(*path)
    file = os.path.join(pathJoined, sensor + ".csv")

    if os.path.isfile(file):
        print('file found, not recalculated.')
        return

    print('finding inflections to build : ' + file)

    # retrieve the timestamped measurements for the study - record - sensor - segment
    format_type = 'truncate'
    source = os.path.join(study, 'formatted', format_type, record, segment, sensor + '.csv')
    print('source = ' + source)
    df = pd.read_csv(source)

    for colName in df.columns:

        # remove extra columns because the dataframe will be saved
        if 'Unnamed' in str(colName):
            del df[colName]

        # save the timestamps as a list
        elif 'Minutes' in str(colName):
            timeMinutes = list(df[colName])

        # find the measurement
        elif 'meas' in colName:

            # add new columns to the dataframe to save the new variables
            newColNames = ['inflectionDecision', 'inflectionLocation', 'polyfitCoefficients', 'polyfitEquation', 'polyfitSolution', 'derivativeEquation', 'derivativeSolution']
            colNameSplit = colName.split('_')
            print('colNameSplit[0] = ' + colNameSplit[0])

            for suffix in newColNames:
                label = str(colNameSplit[0] + '_' + suffix)
                print('label = ' + label)
                if label not in df.columns:
                    df[label] = [None]*len((list(df['timeMinutes'])))

            df['timeBegin'] = [None]*len((list(df['timeMinutes'])))
            df['timeEnd'] = [None]*len((list(df['timeMinutes'])))

            for timeMinute in timeMinutes:

                i = df[ df['timeMinutes']== timeMinute].index.values[0]

                timeDif = (float(df.loc[2,'timeMinutes']) - float(df.loc[1,'timeMinutes']))
                timeTolerance = timeDif/2
                iRange = int(range/60*1/(timeDif))
                # print('iRange = ' + str(iRange))

                if len(list(df['timeMinutes'])) - i <= iRange+2:
                    continue

                timeMedian = df.loc[int(i+iRange/2), 'timeMinutes']
                timeBegin = df.loc[int(i), 'timeMinutes']
                timeEnd = df.loc[int(i+iRange), 'timeMinutes']

                # print('timeMedian = ' + str(timeMedian) + ' timeBegin = ' + str(timeBegin) + ' timeEnd = ' + str(timeEnd))
                # print('range = ' + str(range/60) +  ' timeEnd-timeBegin = ' + str(timeEnd-timeBegin) + ' % = ' + str(range/60/(timeEnd-timeBegin)))

                df_truncate = df[df['timeMinutes'] >= timeMinute]
                df_truncate = df_truncate[df_truncate['timeMinutes'] <= timeMinute + range/60]
                # df_truncate = df[df['timeMinutes'] >= timeMinute & df_truncate['timeMinutes'] <= timeMinute + range/60]

                timeTruncate = list(df_truncate['timeMinutes'])
                df.loc[int(i+iRange/2), 'timeBegin'] = min(timeTruncate)
                df.loc[int(i+iRange/2), 'timeEnd'] = max(timeTruncate)

                measTruncate = list(df_truncate[colName])

                coef = np.polyfit(timeTruncate, measTruncate, 2)
                # coef = [float(x) for x in coef]

                x = sym.Symbol('x')

                f = coef[0]*x*x+coef[1]*x+coef[2]
                # print('f = ')
                # print(f)

                dff = sym.diff(f,x)
                # print('dff = ')
                # print(dff)

                solf = sym.solve(f)
                soldf = sym.solve(dff)
                soldf = soldf[0]


                label = str(colNameSplit[0] + '_' + 'inflectionDecision')
                df.loc[int(i+iRange/2), label] = 'No'

                label = str(colNameSplit[0] + '_' + 'inflectionLocation')
                df.loc[int(i+iRange/2), label] = timeMinute

                label = str(colNameSplit[0] + '_' + 'polyfitCoefficients')
                df.loc[int(i+iRange/2), label] = str(''.join([str(x) for x in coef]))

                label = str(colNameSplit[0] + '_' + 'polyfitEquation')
                df.loc[int(i+iRange/2), label] = str(f)

                label = str(colNameSplit[0] + '_' + 'polyfitSolution')
                df.loc[int(i+iRange/2), label] = str(''.join([str(x) for x in solf]))

                label = str(colNameSplit[0] + '_' + 'derivativeEquation')
                df.loc[int(i+iRange/2), label] = str(dff)

                label = str(colNameSplit[0] + '_' + 'derivativeSolution')
                df.loc[int(i+iRange/2), label] = str(soldf)

                if soldf < timeMedian + timeTolerance:

                    if soldf > timeMedian - timeTolerance:

                        print('inflection found at time = ' + str(soldf))
                        label = str(colNameSplit[0] + '_' + 'inflectionDecision')
                        df.loc[int(i+iRange/2), label] = 'Yes'

    path = build_path(path)
    file = os.path.join(path, sensor + ".csv")
    df.to_csv(file)
    print('inflection list saved : ' + file)
    return(file)
def plot_inflections():
    """

    """

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')
    segment_list = retrieve_ref('segment_list')
    searchRange = retrieve_ref('searchRange')

    for study in study_list:

        for sensor in sensor_list:

            format_type = 'clean'
            clean_path = os.path.join(study, 'formatted', format_type)
            recordNames = os.listdir(clean_path)

            for sensor in sensor_list:

                for record in recordNames:

                    row_num, col_num, plot_num = len(searchRange), 2, 0
                    row_width_mulp, col_width_mulp = 7, 5
                    plot_width, plot_height = col_num * row_width_mulp, row_num * col_width_mulp
                    plt.figure(figsize=(plot_width, plot_height))

                    for range in searchRange:

                        plot_num += 1
                        plt.subplot(row_num, col_num, plot_num)

                        format_type = 'clean'
                        segment = 'All'

                        path = [
                            study, 'analyzed', 'inflections', 'all_times',
                            str(range), record, segment
                        ]
                        path = build_path(path)
                        file = os.path.join(path, sensor + ".csv")

                        if os.path.isfile(file):

                            source = os.path.join(study, 'formatted',
                                                  format_type, record, segment,
                                                  sensor + '.csv')
                            print('source = ' + source)
                            df = pd.read_csv(source)

                            for colName in df.columns:
                                if 'timeMinutes' in colName:
                                    timeMinutes = list(df[colName])

                                if 'meas' in colName:
                                    measList = list(df[colName])
                                    measMin = min(measList)
                                    measMax = max(measList)
                                    plt.scatter(timeMinutes,
                                                measList,
                                                label=str(colName))

                            df = pd.read_csv(file)
                            for colName in df.columns:

                                if 'inflection' in colName:

                                    df = df.drop(
                                        df[(df[colName] != 'Yes')].index)

                            timeInflections = list(df['timeMinutes'])

                            for time in timeInflections:

                                xx = np.linspace(time, time, 100)
                                yy = np.linspace(measMin, measMax, 100)
                                plt.plot(xx, yy, color=[0, .9, .6])

                            plt.xlabel('time Unix')
                            sensor_unit = retrieve_sensor_unit(sensor)
                            plt.ylabel(sensor + ' ' + sensor_unit)
                            # plt.legend(bbox_to_anchor=(1, 0.5, 0.3, 0.2), loc='upper left')
                            plt.title('Record = ' + str(record) + ' Range = ' +
                                      str(range) + ' seconds')

                    path = [
                        study, 'plotted', 'inflection', 'each_record', record
                    ]
                    path = build_path(path)
                    file = os.path.join(path, sensor + ".png")
                    plt.savefig(file, bbox_inches='tight')
                    print('inflection plot saved ' + file)
def analyze_mean():
    """
    analyze records
    """

    print("begin statistical analysis of records")

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')
    segment_list = retrieve_ref('segment_list')

    for study in study_list:

        df_meta = retrieve_meta(study)



        format_type = 'clean'
        clean_path = os.path.join('studies', study, 'formatted', format_type)
        recordNames = os.listdir(clean_path)

        for sensor in sensor_list:

            for segment in segment_list:

                df_mean = pd.DataFrame()
                df_mean['recordName'] = recordNames

                i = df_meta[ df_meta['recordName']== recordNames[0]].index.values[0]
                coregisterRecords = df_meta.loc[i, 'coregisterRecords' ]

                if len(coregisterRecords) > 2*len(recordNames[0]):
                    colNameSplit = colName.split('_')
                    wearableName = colNameSplit[0]
                    newColName = str(wearableName + '_mean')
                    meanColName = newColName
                    df_mean[newColName] = [None] * len(recordNames)

                    recordRef = recordNames[0]
                    source = os.path.join('studies', study, 'formatted', format_type, recordRef, segment, sensor + '.csv')
                    df = pd.read_csv(source)

                    colNames = list(df.head())
                    for colName in colNames:
                        if str('meas') in str(colName):
                            colNameSplit = colName.split('_')
                            wearableName = colNameSplit[0]
                            newColName = str(wearableName + '_mean')
                            df_mean[newColName] = [None] * len(recordNames)
                            meanColName = newColName

                else:
                    newColName = 'mean'
                    df_mean[newColName] = [None] * len(recordNames)



                for record in recordNames:

                    i = df_meta[ df_meta['recordName']== record].index.values[0]
                    coregisterRecords = df_meta.loc[i, 'coregisterRecords' ]

                    source = os.path.join('studies', study, 'formatted', format_type, record, segment, sensor + '.csv')
                    print('source = ' + str(source))
                    df = pd.read_csv(source)

                    colNames = list(df.head())

                    for colName in colNames:

                        if str('measurement') in str(colName):

                            measurement = list(df[colName])
                            avg = np.mean(measurement)

                            # print('measurement = ')
                            # print(measurement)

                            if len(coregisterRecords) > 2*len(record):
                                colNameSplit = colName.split('_')
                                wearableName = colNameSplit[0]
                                newColName = str(wearableName + '_mean')
                                meanColName = newColName

                            else:
                                newColName = 'mean'

                            j = df_mean[ df_mean['recordName']== record].index.values[0]
                            df_mean.loc[j, newColName ] = round(avg, 4)
                            print('j = ' + str(j) + ' mean = ' + str(avg))


                path = ['studies', study, 'analyzed', 'statistics', 'mean', segment]
                path = build_path(path)
                file = os.path.join(path, sensor + ".csv")
                df_mean.to_csv(file)
                print('mean file saved: ' + file)


        print("completed statistical analysis of records")
def find_inflections(path, file, study, format_type, record, sensor, segment,
                     range):
    """

    """

    source = os.path.join(study, 'formatted', format_type, record, segment,
                          sensor + '.csv')
    print('source = ' + source)
    df = pd.read_csv(source)
    print('df = ')
    print(df)

    for colName in df.columns:

        # remove extra columns because the dataframe will be saved
        if 'Unnamed' in str(colName):
            del df[colName]

        # save the timestamps as a list
        elif 'Minutes' in str(colName):
            timeMinutes = list(df[colName])

        elif 'meas' in colName:

            # add new columns to the dataframe to save the new variables
            newColNames = [
                'inflectionDecision', 'inflectionLocation',
                'polyfitCoefficients', 'polyfitEquation', 'polyfitSolution',
                'derivativeEquation', 'derivativeSolution'
            ]
            colNameSplit = colName.split('_')
            print('colNameSplit[0] = ' + colNameSplit[0])

            for suffix in newColNames:
                label = str(colNameSplit[0] + '_' + suffix)
                print('label = ' + label)
                if label not in df.columns:
                    df[label] = [None] * len((list(df['timeMinutes'])))

            df['timeBegin'] = [None] * len((list(df['timeMinutes'])))
            df['timeEnd'] = [None] * len((list(df['timeMinutes'])))

            for timeMinute in timeMinutes:

                i = df[df['timeMinutes'] == timeMinute].index.values[0]

                timeDif = (float(df.loc[2, 'timeMinutes']) -
                           float(df.loc[1, 'timeMinutes']))
                timeTolerance = timeDif / 2
                iRange = int(range / 60 * 1 / (timeDif))
                # print('iRange = ' + str(iRange))

                if len(list(df['timeMinutes'])) - i <= iRange + 2:
                    continue

                timeMedian = df.loc[int(i + iRange / 2), 'timeMinutes']
                timeBegin = df.loc[int(i), 'timeMinutes']
                timeEnd = df.loc[int(i + iRange), 'timeMinutes']

                # print('timeMedian = ' + str(timeMedian) + ' timeBegin = ' + str(timeBegin) + ' timeEnd = ' + str(timeEnd))
                # print('range = ' + str(range/60) +  ' timeEnd-timeBegin = ' + str(timeEnd-timeBegin) + ' % = ' + str(range/60/(timeEnd-timeBegin)))

                df_truncate = df[df['timeMinutes'] >= timeMinute]
                df_truncate = df_truncate[
                    df_truncate['timeMinutes'] <= timeMinute + range / 60]
                # df_truncate = df[df['timeMinutes'] >= timeMinute & df_truncate['timeMinutes'] <= timeMinute + range/60]

                timeTruncate = list(df_truncate['timeMinutes'])
                df.loc[int(i + iRange / 2), 'timeBegin'] = min(timeTruncate)
                df.loc[int(i + iRange / 2), 'timeEnd'] = max(timeTruncate)

                measTruncate = list(df_truncate[colName])

                coef = np.polyfit(timeTruncate, measTruncate, 2)
                x = sym.Symbol('x')
                f = coef[0] * x * x + coef[1] * x + coef[2]
                dff = sym.diff(f, x)
                solf = sym.solve(f)
                soldf = sym.solve(dff)
                soldf = soldf[0]

                label = str(colNameSplit[0] + '_' + 'inflectionDecision')
                df.loc[int(i + iRange / 2), label] = 'No'

                label = str(colNameSplit[0] + '_' + 'inflectionLocation')
                df.loc[int(i + iRange / 2), label] = timeMinute

                label = str(colNameSplit[0] + '_' + 'polyfitCoefficients')
                df.loc[int(i + iRange / 2),
                       label] = str(''.join([str(x) for x in coef]))

                label = str(colNameSplit[0] + '_' + 'polyfitEquation')
                df.loc[int(i + iRange / 2), label] = str(f)

                label = str(colNameSplit[0] + '_' + 'polyfitSolution')
                df.loc[int(i + iRange / 2),
                       label] = str(''.join([str(x) for x in solf]))

                label = str(colNameSplit[0] + '_' + 'derivativeEquation')
                df.loc[int(i + iRange / 2), label] = str(dff)

                label = str(colNameSplit[0] + '_' + 'derivativeSolution')
                df.loc[int(i + iRange / 2), label] = str(soldf)

                if soldf < timeMedian + timeTolerance:

                    if soldf > timeMedian - timeTolerance:

                        print('inflection found at time = ' + str(soldf))
                        label = str(colNameSplit[0] + '_' +
                                    'inflectionDecision')
                        df.loc[int(i + iRange / 2), label] = 'Yes'

    path = build_path(path)
    file = os.path.join(path, sensor + ".csv")
    df.to_csv(file)
    print('inflection list saved : ' + file)
    return (file)
def unique_inflections(study, format_type, record, sensor, segment):
    """

    """

    uniqueList = []

    searchRange = retrieve_ref('searchRange')
    searchRange = [int(x) for x in searchRange]

    for range in searchRange:

        path = [
            study, 'analyzed', 'inflections', 'all_times',
            str(range), record, segment
        ]
        file = os.path.join(*path, sensor + ".csv")

        if not os.path.isfile(file):
            continue

        df = pd.read_csv(file)

        recordLength = max(list(df['timeMinutes']))

        for colName in df.columns:
            if 'Decision' in colName:
                dfInflections = df.drop(df[(df[colName] != 'Yes')].index)

        listInflections = list(dfInflections['timeMinutes'])

        # uniqueList = [uniqueList.append(x) for x in listInflections]
        for time in listInflections:
            if time not in uniqueList:
                uniqueList.append(float(time))

    if len(uniqueList) == 0:
        return

    uniqueList.sort()
    uniqueListBuffer = []
    for time in uniqueList:
        if len(uniqueListBuffer
               ) == 0 or time > max(uniqueListBuffer) + 10 / 60:
            uniqueListBuffer.append(time)

    uniqueList = uniqueListBuffer
    print('uniqueList = ')
    print(uniqueList)
    print('length of uniqueList = ' + str(len(uniqueList)))

    inflectionRate = len(uniqueList) / recordLength

    df = pd.DataFrame()
    df['uniqueList'] = uniqueList
    df['inflectionRate'] = [inflectionRate] * len(uniqueList)

    path = [
        study, 'analyzed', 'inflections', 'all_times', 'unique', record,
        segment
    ]
    path = build_path(path)
    file = os.path.join(path, sensor + ".csv")
    df.to_csv(file)
def analyze_records():
    """
    analyze records
    """

    print("begin statistical analysis of records")

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')
    segment_list = retrieve_ref('segment_list')

    quanList = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
    format_type = 'clean'

    for study in study_list:

        source_path = os.path.join('studies', study, 'formatted', format_type)
        format_folders = os.listdir(source_path)

        record, segment, sensor = str(format_folders[0]), 'All', 'TEMP'
        source = os.path.join('studies', study, 'formatted', format_type,
                              record, segment, sensor + '.csv')
        df = pd.read_csv(source)

        df_quans = pd.DataFrame()
        df_quans['recordName'] = format_folders

        colNames = list(df.head())
        for colName in colNames:
            if str('measurement') in str(colName):
                colNameSplit = colName.split('_')
                print('colNameSplit = ')
                print(colNameSplit)
                wearableName = colNameSplit[1]

                for quan in quanList:
                    dfColName = str('quan' + str(quan) + '_' + wearableName)
                    df_quans[dfColName] = [None] * len(format_folders)

        print('df_quans = ')
        print(df_quans)

        for sensor in sensor_list:

            for segment in segment_list:

                for record in format_folders:

                    source = os.path.join('studies', study, 'formatted',
                                          format_type, record, segment,
                                          sensor + '.csv')
                    df = pd.read_csv(source)
                    print('clean file retrieved: ' + source)

                    i = df_quans[df_quans['recordName'] ==
                                 record].index.values[0]
                    print('i = ' + str(i))

                    colNames = list(df.head())
                    for colName in colNames:

                        if str('measurement') in str(colName):

                            measurement = list(df[colName])
                            print('measurement = ')
                            print(measurement[0:100])

                            for quan in quanList:

                                dfColName = str('quan' + str(quan) + '_' +
                                                wearableName)
                                df_quans.loc[i, dfColName] = np.quantile(
                                    measurement, quan)

                path = [
                    'studies', study, 'analyzed', 'statistics', 'quantiles'
                ]
                path = build_path(path)
                file = os.path.join(path, sensor + ".csv")
                df_quans.to_csv(file)
                print('quantile file saved: ' + file)

        print("completed statistical analysis of records")
def calculate_regression():
    """

    """

    print('analyzing regression. ')

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')
    segment_list = retrieve_ref('segment_list')

    degree_list = retrieve_ref('degree_list')
    degree_list = [int(x) for x in degree_list]

    for study in study_list:

        format_type = 'clean'
        clean_path = os.path.join('studies', study, 'formatted', format_type)
        recordNames = os.listdir(clean_path)

        for sensor in sensor_list:

            for segment in segment_list:

                df_coef = pd.DataFrame()
                df_coef['recordName'] = recordNames

                for degree in degree_list:

                    for record in recordNames:

                        source = os.path.join('studies', study, 'formatted',
                                              format_type, record, segment,
                                              sensor + '.csv')
                        print('source = ' + source)
                        df = pd.read_csv(source)

                        if not len(list(df['timeUnix'])) > 0:
                            continue

                        for colName in list(df.head()):

                            if 'meas' in colName:

                                if colName not in list(df_coef.head()):
                                    df_coef[colName] = [None
                                                        ] * len(recordNames)

                                i = df_coef[df_coef['recordName'] ==
                                            record].index.values[0]
                                xx = list(df['timeMinutes'])
                                yy = list(df[colName])

                                coef = np.polyfit(xx, yy, degree)
                                print('coef = ')
                                print(coef)
                                coef_str = [str(x) for x in coef]
                                print(' '.join(coef_str))
                                df_coef.loc[i, colName] = ' '.join(coef_str)

                    path = [
                        'studies', study, 'analyzed', 'regression',
                        str(degree), segment
                    ]
                    path = build_path(path)
                    file = os.path.join(path, sensor + ".csv")
                    df_coef.to_csv(file)
                    print('regression file saved: ' + file)
示例#22
0
def format_coregister():
    """
    combine paired record in a single csv
    save in the coregister folder of formatted data
    """

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')

    for study in study_list:

        df_meta = retrieve_meta(study)
        recordNames = list(df_meta['recordName'])

        # name the wearable used for each record
        for record in recordNames:

            i = df_meta[df_meta['recordName'] == record].index.values[0]
            print('i = ' + str(i))
            coregisterBegin = df_meta.loc[i, 'coregisterBegin']
            coregisterEnd = df_meta.loc[i, 'coregisterEnd']
            coregisterRecords = df_meta.loc[i, 'coregisterRecords']

            for sensor in sensor_list:

                df_coregister = pd.DataFrame()

                if len(coregisterRecords) == len(record):
                    coregisterRecords = list([coregisterRecords])

                elif len(coregisterRecords) > len(record):
                    coregisterRecords = coregisterRecords.split(' ')

                print('coregisterRecords = ')
                print(coregisterRecords)

                for item in coregisterRecords:

                    format_type, segment, recordRef = 'truncate', 'All', item
                    source = os.path.join('studies', study, 'formatted',
                                          format_type, recordRef, segment,
                                          sensor + '.csv')
                    df = pd.read_csv(source)

                    assert coregisterEnd > coregisterBegin + 100, 'during coregister format, coregisterBegin >= coregisterEnd'
                    assert coregisterEnd < max(list(
                        df['timeUnix'])), 'possible error with time'

                    print('coregisterEnd = ' + str(coregisterEnd) +
                          ' timeUnixEnd = ' + str(max(list(df['timeUnix']))))
                    print('timeUnixEnd - coregisterEnd = ' +
                          str((max(list(df['timeUnix'])) - coregisterEnd) /
                              60))
                    print('coregisterEnd - timeUnixBegin = ' +
                          str((coregisterEnd - min(list(df['timeUnix']))) /
                              60))

                    assert coregisterEnd > min(list(df['timeUnix']))

                    df = df[df['timeUnix'] > coregisterBegin]
                    df = df[df['timeUnix'] < coregisterEnd]

                    assert len(list(
                        df['timeUnix'])) > 0, 'coregistered df removed'

                    recordSplit = item.split('_')
                    wearableName = recordSplit[1]

                    df_coregister['timeUnix'] = list(df['timeUnix'])
                    df_coregister['timeMinutes'] = list(df['timeMinutes'])

                    colName = str(wearableName + '_' + 'measurement')
                    print('colName = ' + colName)
                    df_coregister[colName] = list(df['measurement'])

                path = [
                    'studies', study, 'formatted', 'coregister', record,
                    segment
                ]
                path = build_path(path)
                file = os.path.join(path, sensor + ".csv")
                df_coregister.to_csv(file)
                print('formatted coregister file = ' + str(file))
def find_inflections(range, buffer):
    """

    """

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')
    segment_list = retrieve_ref('segment_list')
    searchRange = retrieve_ref('searchRange')

    for study in study_list:

        format_type = 'clean'
        clean_path = os.path.join(study, 'formatted', format_type)
        recordNames = os.listdir(clean_path)

        for sensor in sensor_list:

            for record in recordNames:

                segment = "All"
                path = [
                    study, 'analyzed', 'inflections', 'all_times',
                    str(range), record, segment
                ]
                path = build_path(path)
                file = os.path.join(path, sensor + ".csv")

                if os.path.isfile(file):
                    continue

                source = os.path.join(study, 'formatted', format_type, record,
                                      segment, sensor + '.csv')
                print('source = ' + source)
                df = pd.read_csv(source)

                # print('df[timeMinutes] = ')
                # print(list(df['timeMinutes']))

                for colName in df.columns:

                    if 'Unnamed' in str(colName):
                        del df[colName]
                        print('deleted ' + colName)
                        continue

                    if 'meas' in colName:

                        colNameSplit = colName.split('_')
                        print('colNameSplit = ')
                        print(colNameSplit)
                        print('colNameSplit[0] = ')
                        print(colNameSplit[0])

                        for suffix in [
                                'inflection', 'coefficients', 'derivativeZero',
                                'equation', 'derivative'
                        ]:

                            label = str(colNameSplit[0] + '_' + suffix)
                            print('label = ' + label)
                            if label not in df.columns:
                                df[label] = [None] * len(
                                    (list(df['timeMinutes'])))

                        df['timeBegin'] = [None] * len(
                            (list(df['timeMinutes'])))
                        df['timeEnd'] = [None] * len((list(df['timeMinutes'])))

                        for timeMinute in list(df['timeMinutes']):

                            i = df[df['timeMinutes'] ==
                                   timeMinute].index.values[0]
                            # print('i = '  + str(i))

                            timeTolerance = (
                                float(df.loc[2, 'timeMinutes']) -
                                float(df.loc[1, 'timeMinutes'])) / 2
                            iRange = int(range / 60 * 1 / (timeTolerance * 2))
                            # print('iRange = ' + str(iRange))

                            if len(list(df['timeMinutes'])) - i <= iRange + 2:
                                continue

                            timeMedian = df.loc[int(i + iRange / 2),
                                                'timeMinutes']
                            timeBegin = df.loc[int(i), 'timeMinutes']
                            timeEnd = df.loc[int(i + iRange), 'timeMinutes']

                            # print('timeMedian = ' + str(timeMedian) + ' timeBegin = ' + str(timeBegin) + ' timeEnd = ' + str(timeEnd))
                            # print('range = ' + str(range/60) +  ' timeEnd-timeBegin = ' + str(timeEnd-timeBegin) + ' % = ' + str(range/60/(timeEnd-timeBegin)))

                            df_truncate = df[df['timeMinutes'] >= timeMinute]
                            df_truncate = df_truncate[
                                df_truncate['timeMinutes'] <= timeMinute +
                                range / 60]
                            # df_truncate = df[df['timeMinutes'] >= timeMinute & df_truncate['timeMinutes'] <= timeMinute + range/60]

                            timeTruncate = list(df_truncate['timeMinutes'])
                            df.loc[int(i + iRange / 2),
                                   'timeBegin'] = min(timeTruncate)
                            df.loc[int(i + iRange / 2),
                                   'timeEnd'] = max(timeTruncate)

                            measTruncate = list(df_truncate[colName])

                            coef = np.polyfit(timeTruncate, measTruncate, 2)
                            # coef = [float(x) for x in coef]

                            x = sym.Symbol('x')

                            f = coef[0] * x * x + coef[1] * x + coef[2]
                            # print('f = ')
                            # print(f)

                            dff = sym.diff(f, x)
                            # print('dff = ')
                            # print(dff)

                            solf = sym.solve(f)
                            soldf = sym.solve(dff)
                            soldf = soldf[0]

                            label = str(colNameSplit[0] + '_' + 'inflection')
                            df.loc[int(i + iRange / 2), label] = 'No'

                            label = str(colNameSplit[0] + '_' + 'coefficients')
                            df.loc[int(i + iRange / 2),
                                   label] = str(''.join([str(x)
                                                         for x in coef]))

                            label = str(colNameSplit[0] + '_' +
                                        'derivativeZero')
                            df.loc[int(i + iRange / 2), label] = soldf

                            label = str(colNameSplit[0] + '_' + 'equation')
                            df.loc[int(i + iRange / 2), label] = str(f)

                            label = str(colNameSplit[0] + '_' + 'derivative')
                            df.loc[int(i + iRange / 2), label] = str(dff)

                            if soldf > min(timeTruncate):

                                if soldf < max(timeTruncate):

                                    if soldf < timeMedian + timeTolerance:

                                        if soldf > timeMedian - timeTolerance:

                                            print(
                                                'inflection found at time = ' +
                                                str(soldf))
                                            label = str(colNameSplit[0] + '_' +
                                                        'inflection')
                                            df.loc[int(i + iRange / 2),
                                                   label] = 'Yes'

                df.to_csv(file)
                print('inflection list saved : ' + file)

                for colName in df.columns:

                    if 'inflection' in colName:

                        colNameSplit = colName.split('_')
                        label = str(colNameSplit[0] + '_' + 'inflection')
                        df = df.drop(df[(df[label] != 'Yes')].index)

                path = [
                    study, 'analyzed', 'inflections', 'inflection_only',
                    str(range), record, segment
                ]
                path = build_path(path)
                file = os.path.join(path, sensor + ".csv")
                df.to_csv(file)
def plot_inflections(study, record, sensor, segment):
    """

    """

    searchRange = retrieve_ref('searchRange')
    searchRange = [int(x) for x in searchRange]
    searchRange.append('unique')

    row_num, col_num, plot_num = len(searchRange) + 2, 2, 0
    row_width_mulp, col_width_mulp = 12, 5
    plot_width, plot_height = col_num * row_width_mulp, row_num * col_width_mulp
    plt.figure(figsize=(plot_width, plot_height))

    for range in searchRange:

        plot_num += 1
        plt.subplot(row_num, col_num, plot_num)

        path = [
            study, 'analyzed', 'inflections', 'all_times',
            str(range), record, segment
        ]
        file = os.path.join(*path, sensor + ".csv")

        if not os.path.isfile(file):
            return

        df = pd.read_csv(file)

        for colName in df.columns:

            if 'Minutes' in colName:
                timeMinutes = list(df[colName])

            if 'measurement' in colName:

                measList = list(df[colName])
                measMin = min(measList)
                measMax = max(measList)
                plt.scatter(timeMinutes, measList, label=str(colName))

            if 'inflectionDecision' in colName or 'unique' in colName:

                if 'inflectionDecision' in colName:
                    dfInflections = df.drop(df[(df[colName] != 'Yes')].index)
                    timeInflections = list(dfInflections['timeMinutes'])

                if 'unique' in colName:
                    plt.scatter(timeMinutes, measList)
                    timeInflections = list(df[colName])

                for time in timeInflections:

                    # multp = searchRange.index(range)/len(searchRange)
                    # colorScatter = [multp*x for x in [0,1,.5]]
                    colorScatter = [0, .9, .6]

                    xx = np.linspace(time, time, 100)
                    yy = np.linspace(measMin, measMax, 100)
                    plt.plot(xx, yy, color=colorScatter, linestyle='--')

                    plt.title('Record = ' + str(record) + ' Range = ' +
                              str(range) + ' seconds ' +
                              ' Inflections Found = ' +
                              str(len(timeInflections)))
                    plt.xlabel('Time (Minutes)')
                    sensor_unit = retrieve_sensor_unit(sensor)
                    plt.ylabel(sensor + ' ' + sensor_unit)
                    # plt.legend(bbox_to_anchor=(1, 0.5, 0.3, 0.2), loc='upper left')

            if 'polyfitEquation' in colName:

                polyfitCoeff = list(dfInflections[colName])

                coeffPolyList = []
                for coeff in polyfitCoeff:

                    # print('coeff = ' + str(coeff))
                    coeff = str(coeff)
                    coeff = coeff.replace("x", ",")
                    coeff = coeff.replace("*", "")
                    coeff = coeff.replace("**2", "")
                    coeff = coeff.replace("**", "")
                    coeff = coeff.replace("  ", "")
                    coeff = coeff.replace(" ", "")
                    coeff = coeff.replace("+", "")
                    # print('coeff = ' + str(coeff))
                    coeffList = coeff.split(',')
                    coeffPoly = float(coeffList[0])
                    # print('coeffPoly = ' + str(coeffPoly))
                    coeffPolyList.append(coeffPoly)

                plot_num += 1
                plt.subplot(row_num, col_num, plot_num)

                plt.scatter(timeInflections, coeffPolyList)

                plt.title('Time Infletions vs Coefficients')
                plt.xlabel('Time (Minutes)')
                sensor_unit = retrieve_sensor_unit(sensor)
                plt.ylabel(sensor + ' ' + sensor_unit)
                # plt.legend(bbox_to_anchor=(1, 0.5, 0.3, 0.2), loc='upper left')

    path = [study, 'plotted', 'inflection', 'each_record', record]
    path = build_path(path)
    file = os.path.join(path, sensor + ".png")
    plt.savefig(file, bbox_inches='tight')
    print('inflection plot saved ' + file)
示例#25
0
def plot_acc(study):
    """

    """

    print('compare ACC sensors')

    format_type, sensor, segment = 'truncate', 'ACC', 'All'
    source_path = os.path.join('studies', study, 'formatted', format_type)
    format_folders = os.listdir(source_path)

    for record in format_folders:

        row_num, col_num, plot_num = 5, 1, 0
        row_width_mulp, col_width_mulp = 14, 5
        plot_width, plot_height = col_num * row_width_mulp, row_num * col_width_mulp
        plt.figure(figsize=(plot_width, plot_height))

        source = os.path.join('studies', study, 'formatted', format_type,
                              record, segment, sensor + '.csv')

        if os.path.isfile(source):

            print('source = ' + source)

            df = pd.read_csv(source)

            colNames = list(df.head())
            print('colNames = ')
            print(colNames)

            for colName in colNames:

                if str('eas') in str(colName):

                    plot_num += 1
                    plt.subplot(row_num, col_num, plot_num)

                    labelName = colName
                    print('labelName = ' + labelName)

                    valueColor = retrieve_ref_color(
                        str('color_' + str(sensor) + '_' + str(colName)))
                    plt.scatter(df['timeUnix'],
                                df[colName],
                                color=valueColor,
                                label=labelName)

                    plt.xlabel('time Unix')
                    sensor_unit = retrieve_sensor_unit(sensor)
                    plt.ylabel(sensor + ' ' + sensor_unit)
                    plt.legend(bbox_to_anchor=(1, 0.5, 0.3, 0.2),
                               loc='upper left')

            plot_num += 1
            plt.subplot(row_num, col_num, plot_num)

            for colName in colNames:

                if str('eas') in str(colName):

                    labelName = colName
                    print('labelName = ' + labelName)

                    valueColor = retrieve_ref_color(
                        str('color_' + str(sensor) + '_' + str(colName)))
                    plt.scatter(df['timeUnix'],
                                df[colName],
                                color=valueColor,
                                label=labelName)

            plt.xlabel('time (Unix)')
            sensor_unit = retrieve_sensor_unit(sensor)
            plt.ylabel(sensor + ' ' + sensor_unit)
            plt.legend(bbox_to_anchor=(1, 0.5, 0.3, 0.2), loc='upper left')

        path = ['studies', study, 'plotted', format_type, record]
        path = build_path(path)
        file = os.path.join(path, sensor + ".png")
        plt.savefig(file, bbox_inches='tight')
        print('acc saved: ' + file)
def add_embedded_to_meta():
    """

    """

    study_list = retrieve_ref('study_list')
    min_record_time = float(retrieve_ref('min_record_time'))

    # check each study
    for study in study_list:

        # retrieve the list of records from the metadata.csv file
        df_meta = retrieve_meta(study)
        source_path = list(df_meta['source_path'])

        # define the original length of the record
        # remove records that are too short
        for record in source_path:

            print('record = ' + str(record))
            print('df_meta = ')
            print(df_meta)

            i = df_meta[df_meta['source_path'] == record].index.values[0]
            embeddedRecord = float(df_meta.loc[i, 'embeddedRecord'])

            if embeddedRecord > 0:

                format_type, segment, sensor = 'source', 'All', 'TEMP'
                source = os.path.join('studies', study, 'formatted',
                                      format_type, record, segment,
                                      sensor + '.csv')
                df = pd.read_csv(source)

                recordBegin = int(embeddedRecord)
                df = df[df['timeUnix'] > recordBegin]
                timeEndUnix = find_record_end_using_temp(df)
                recordLength = (timeEndUnix - recordBegin) / 60

                df_row = df_meta[df_meta['source_path'] == record]

                record_split = record.split('_')
                recordName = str(str(recordBegin) + '_' + str(record_split[1]))
                print('embedded recordName = ' + recordName)

                df_row.loc[i, 'recordName'] = recordName
                df_row.loc[i, 'recordBegin'] = int(embeddedRecord)
                df_row.loc[i, 'recordEnd'] = int(timeEndUnix)
                df_row.loc[i, 'recordLength'] = round(recordLength, 4)

                print('df_row = ')
                print(df_row)

                df_meta = df_meta.append(df_row)
                # print(df_meta)

                format_type, segment, sensor = 'source', 'All', 'TEMP'
                source = os.path.join('studies', study, format_type, record,
                                      sensor + '.csv')
                df_source = pd.read_csv(source)
                df_timestamped = build_timestamps(df_source, sensor)

                path = build_path([
                    'studies', study, 'formatted', format_type, recordName,
                    segment
                ])
                file = os.path.join(path, sensor + ".csv")
                df_timestamped.to_csv(file)
                print('formatted source file = ' + str(file))

        df_meta = df_meta.sort_values(by='recordName')
        save_meta(study, df_meta)
示例#27
0
def plot_inflections(study, record, sensor, segment):
    """
    plot inflections
    """

    if segment != 'All':
        return

    searchRange = retrieve_ref('searchRange')
    searchRange = [int(x) for x in searchRange]

    format_type, segment, range = 'trunate', 'All', searchRange[0]
    path = [study, 'analyzed', 'inflections', 'all_times', str(range), record, segment]
    file = os.path.join(*path, sensor + ".csv")

    if not os.path.isfile(file):
        return


    row_num, col_num, plot_num = len(searchRange)+2, 1, 0
    row_width_mulp, col_width_mulp = 12, 5
    plot_width, plot_height = col_num*row_width_mulp, row_num*col_width_mulp
    plt.figure(figsize=(plot_width, plot_height))

    for range in searchRange:

        plot_num += 1
        plt.subplot(row_num, col_num, plot_num)

        path = [study, 'analyzed', 'inflections', 'all_times', str(range), record, segment]
        file = os.path.join(*path, sensor + ".csv")

        if not os.path.isfile(file):
            continue

        # source = os.path.join(study, 'formatted', format_type, record, segment, sensor + '.csv')
        # print('source = ' + source)
        df = pd.read_csv(file)

        for colName in df.columns:

            if 'Unnamed' in colName:
                del df[colName]

            elif 'Minutes' in colName:
                timeMinutes = list(df[colName])

            elif 'measurement' in colName and '_' not in colName:
                measList = list(df[colName])
                measMin = min(measList)
                measMax = max(measList)
                plt.scatter(timeMinutes, measList, label = str(colName))

            elif 'inflectionDecision' in colName:
                dfInflections = df.drop(df[(df[colName] != 'Yes')].index)
                timeInflections = list(dfInflections['timeMinutes'])

            print('timeInflections = ')
            print(timeInflections)

            for time in timeInflections:

                xx = np.linspace( time, time, 100)
                yy = np.linspace( measMin, measMax, 100)
                plt.plot(xx, yy, color=[0,.9,.6])

            plt.xlabel('time Unix')
            sensor_unit = retrieve_sensor_unit(sensor)
            plt.ylabel(sensor + ' ' + sensor_unit )
            # plt.legend(bbox_to_anchor=(1, 0.5, 0.3, 0.2), loc='upper left')
            plt.title('Record = ' + str(record) + ' Range = ' + str(range) + ' seconds ' + ' Inflections Found = ' + str(len(timeInflections)) )

        path = [study, 'plotted', 'inflection', 'each_record', record]
        path = build_path(path)
        file = os.path.join(path, sensor + ".png")
        plt.savefig(file, bbox_inches='tight')
        print('inflection plot saved ' + file)
def modeling_test():
    """
    Write code for openscad to model parameters of the analysis
    """

    print("openSCAD modeling begin")

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')

    for study in study_list:

        metadata_file = os.path.join('studies', study, 'meta', 'metadata.csv')
        df_meta = pd.read_csv(metadata_file)
        df_meta = df_meta.sort_values(by=['recordLength'])

        records_found = list(df_meta['source_path'])
        recordLength = list(df_meta['recordLength'])


        openscad_path = os.path.join('code', 'openSCAD', study)
        if not os.path.isdir(openscad_path ):
            path = build_path(['code', 'openSCAD', study])
        openscad_file = os.path.join(openscad_path, str(study) + '_' + 'cleaning_data.scad')
        file = open(openscad_file, "w")
        file = open(openscad_file, "w")

        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        file.write('// File made on ' + str(date.today()) + ' ' + str(current_time) )

        file.write('\n' + '// records found = ' +  str(len(records_found)))
        # file.write('\n' + 'd = '  + str(10) + ' ; ' + '\n')

        # file.write('\n' + 'sphere( size = ' + str(d) + ') ;')

        count_xaxis = math.sqrt(len(records_found))

        spacing = round(max(recordLength)*2, 3)
        file.write('\n' + '// spacing = ' + str(spacing))

        for i in range(len(records_found)):

            # print('index = ' + str(i))
            x_num = int((i+1)/count_xaxis)
            y_num = int((i+1)%count_xaxis)
            z_num = 0
            length = round(recordLength[i], 3)

            # print('x_num, y_num = ' + str(x_num) + ' , ' + str(y_num))

            file.write('\n')
            file.write('\n' + 'translate([ ' + str(spacing*x_num)  + ' , ' + str(spacing*y_num) + ' , ' + str(spacing*z_num) + '])')
            file.write('\n' + 'union() {')
            file.write('  ' + 'color([ ' + str(1) + ' , ' + str(0)  +' , ' + str(1) + ' ])')
            file.write('  ' + 'sphere(' + str(length)    +  ' , $fn=60);')
            file.write('  ' + 'color([ ' + str(0.5) + ' , ' + str(0.5)  +' , ' + str(1) + ' ])')
            file.write('  ' + 'cylinder( r= ' + str(length/2) + ', h= ' + str(2*length)   +  ' , $fn=60);')
            file.write(' } ')
            file.write('\n')
            file.write('\n')

        file.close()


    print("openSCAD modeling complete")
def plot_mean():
    """

    """

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')
    segment_list = retrieve_ref('segment_list')

    for study in study_list:

        row_num, col_num, plot_num = len(sensor_list), 3, 0
        row_width_mulp, col_width_mulp = 6, 6
        plot_width, plot_height = col_num*row_width_mulp, row_num*col_width_mulp
        plt.figure(figsize=(plot_width, plot_height))

        for sensor in sensor_list:

            plot_num += 1
            plt.subplot(row_num, col_num, plot_num)

            for segment in segment_list:

                path = ['studies', study, 'analyzed', 'statistics', 'mean', segment]
                # print('path = ' + path)
                file = os.path.join('studies', study, 'analyzed', 'statistics', 'mean', segment, sensor +  '.csv')
                # file = os.path.join(path, sensor +  '.csv')
                print('file = ' + file)
                df_mean = pd.read_csv(file)

                df_mean = df_mean.dropna()

                colNames = list(df_mean.head())
                for colName in colNames:
                    if 'Unnamed' in colName:
                        del df_mean[colName]

                print('df_mean = ')
                print(df_mean)

                yy = df_mean.iloc[ : , 1]
                print('yy = ')
                print(yy)
                ylabel = colName

                xx = list(range(1, len(yy)+1))
                xlabel = 'Records Num'

                if len(colNames) > 3:
                    xx = df_mean.iloc[ : , 2]
                    xlabel = colNames[-1]

                print('xx = ')
                print(xx)

                assert len(xx) == len(yy)
                assert sum(xx) > -1000000000
                assert sum(yy) > -1000000000


                if segment == 'All':
                    xxsym = np.linspace(min(xx), max(xx), 200)
                    yysym = np.linspace(min(yy), max(yy), 200)
                    plt.scatter(xxsym, yysym, color = [.8, .8, .8])

                wearable_num = 1
                colorWearableSegment = retrieve_ref_color_wearable_segment(wearable_num, segment)

                plt.scatter(xx, yy, color = colorWearableSegment, label = str(segment))

                plt.xlabel(xlabel + ' ' + sensor)
                plt.ylabel(ylabel + ' ' + sensor)

                print('xlabel / ylabel = ' + xlabel + ' ' + ylabel)

                if sensor == sensor_list[-1]:
                    plt.legend(bbox_to_anchor=(1, 0.5, 0.3, 0.2), loc='upper left')


        path = ['studies', study, 'plotted', 'analysis', 'mean']
        path = build_path(path)
        file = os.path.join(path, sensor + ".png")
        plt.savefig(file, bbox_inches='tight')
        print('plotted mean = ' + file)