def find_paired_duration():
    """
    Find the duration of the record
    Add the end of the coregistered record in the meta file
    """

    print("begin find_paired_duration")

    study_list = retrieve_ref('study_list')

    for study in study_list:

        df_meta = retrieve_meta(study)
        # print(df_meta)
        source_path = list(df_meta['source_path'])

        # add emptyt column
        df_meta['recordDuration'] = [None] * len(source_path)

        for record in source_path:

            # save that value in the dataframe
            i = df_meta[df_meta['source_path'] == record].index.values[0]
            print('i = ' + str(i))

            recordBegin = int(df_meta.loc[i, 'recordBegin'])
            print('recordBegin = ' + str(recordBegin))

            recordEnd = int(df_meta.loc[i, 'recordEnd'])
            print('recordEnd = ' + str(recordEnd))

            recordDuration = round((recordEnd - recordBegin) / 60, 4)

            df_meta.loc[i, 'recordDuration'] = recordDuration

            print('recordDuration = ' + str(recordDuration))

        save_meta(study, df_meta)
        print('df_meta = ')
        print(df_meta)
def trim_record_to_max():
    """
    Input: path to a csv
    Output: list of timestamps
    """

    print("finding the end of the record")

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')
    max_record_time = retrieve_ref('max_record_time')

    sensor = 'TEMP'

    for study in study_list:

        df_meta = retrieve_meta(study)

        source_path = list(df_meta['source_path'])

        df_meta['recordLength'] = [None] * len(source_path)

        for record in source_path:

            # timestamped_file = os.path.join(study, 'timestamp', record, sensor + ".csv")
            timestamped_file = os.path.join(study, 'formatted', 'source',
                                            record, 'All', sensor + ".csv")
            df_timestamped = pd.read_csv(timestamped_file)

            record_length = max(list(df_timestamped['timeMinutes']))
            if record_length > max_record_time:
                record_length = max_record_time

            record_length = round(record_length, 4)
            i = df_meta[df_meta['source_path'] == record].index.values[0]
            df_meta.loc[i, 'recordLength'] = record_length

        # save the record length to meta file
        save_meta(study, df_meta)
def decide_inclusion():
    """
    Determine inclusion based on length of the record
    """

    print("begin decide inclusion")

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')
    max_record_time = retrieve_ref('max_record_time')
    min_record_time = retrieve_ref('min_record_time')

    for study in study_list:

        df_meta = retrieve_meta(study)
        df_meta = df_meta.sort_values(by=['recordLength'])

        records_found = list(df_meta['source_path'])
        recordLength = list(df_meta['recordLength'])

        inclusionList = []
        for i in range(len(recordLength)):

            if recordLength[i] < min_record_time:
                inclusionList.append('excluded')

            else:
                inclusionList.append('included')

        # save the record length to meta file
        df_meta['included'] = inclusionList
        save_meta(study, df_meta)

        df_meta = df_meta.drop(
            df_meta[df_meta['included'] == 'excluded'].index)
        df_meta = df_meta.sort_values(by=['source_path'])
        save_meta(study, df_meta)

    print("completed decide inclusion")
def multiple_record_check():
    """
    check the record for multiple records
    """

    print("begin multiple record check")

    study_list = retrieve_ref('study_list')
    format_types = retrieve_ref('format_types')
    segment_list = retrieve_ref('segment_list')
    sensor_list = retrieve_ref('sensor_list')

    timePreStudy = retrieve_ref('timePreStudy')
    timePostStudy = retrieve_ref('timePostStudy')

    for study in study_list:

        df_meta = retrieve_meta(study)
        source_path = list(df_meta['source_path'])
        source_path_new = list(df_meta['source_path'])
        timeBegin_list = list(df_meta['recordBegin'])
        timeEnd_list = list(df_meta['recordEnd'])

        for record in source_path:

            i = df_meta[df_meta['source_path'] == record].index.values[0]
            fullLength = float(df_meta.loc[i, 'fullLength'])
            truncatedLength = float(df_meta.loc[i, 'truncatedLength'])

            format_type = 'source'
            segment = 'All'
            sensor = 'TEMP'
            df = retrieve_analyzed(study, format_type, record, segment, sensor)

            new_record_list = []

            if fullLength > truncatedLength + 30:

                df = df.drop(df[df['timeMinutes'] < truncatedLength + 5].index)

                # print('df = ')
                # print(df)

                timeUnix = list(df['timeUnix'])
                timeMinutes = list(df['timeMinutes'])
                measurements = list(df['measurement'])

                for i in range(len(measurements)):

                    if i < len(measurements) - 30:

                        if measurements[i] + 3 < measurements[i + 28]:

                            print('new record found')

                            df = df.drop(
                                df[df['timeMinutes'] < timeMinutes[i +
                                                                   28]].index)

                            time_end = find_record_end_from_temp(df)
                            print('time_end = ' + str(time_end))

                            df = df.drop(
                                df[df['timeMinutes'] > time_end].index)

                            # print('df = ')
                            # print(df)

                            wearable_name = record.split('_')
                            wearable_name = wearable_name[1]

                            recordName = str(
                                str(int(timeUnix[0])) + '_' +
                                str(wearable_name))
                            print('recordName = ' + str(recordName))

                            new_record_list.append(recordName)

                            source_path_new.append(record)
                            timeBegin_list.append(int(timeUnix[0]))
                            print('timeUnix[0:20] = ')
                            print(timeUnix[0:20])
                            timeEnd = min(timeUnix)
                            print('timeEnd = ' + str(timeEnd))
                            timeEnd = min(timeUnix) + 60
                            print('timeEnd = ' + str(timeEnd))
                            timeEnd_list.append(int(timeEnd))

                            break

        df_meta_new = pd.DataFrame()
        df_meta_new['source_path'] = source_path_new
        df_meta_new['recordBegin'] = timeBegin_list
        df_meta_new['recordEnd'] = timeEnd_list

        save_meta(study, df_meta_new)
def find_paired_end():
    """
    Find the end of the paired record
    Add the end of the coregistered record in the meta file
    """

    print("begin find_paired_end")

    study_list = retrieve_ref('study_list')

    format_type = 'truncate'
    sensor = 'TEMP'
    segment = 'All'

    for study in study_list:

        df_meta = retrieve_meta(study)
        # print(df_meta)
        source_path = list(df_meta['source_path'])
        # recordCoregistered = list(df_meta['recordCoregistered'])

        df_meta['recordEnd'] = [None] * len(source_path)

        # there could be two wearables - or one
        # one wearable was turned off before the other
        # check if the participant record has one or two wearables
        # if there are two find the earlier stop time and save to meta file
        for record in source_path:

            # find the max value in the "timeUnix' column of analyzed data"
            df = retrieve_analyzed(study, format_type, record, segment, sensor)
            timeEndRecord = max(list(df['timeUnix']))

            # save that value in the dataframe
            i = df_meta[df_meta['source_path'] == record].index.values[0]
            df_meta.loc[i, 'recordEnd'] = int(timeEndRecord)

            # print('i = ' + str(i))
            recordCoregistered = df_meta.loc[i, 'recordCoregistered']
            # print('recordCoregistered = ')
            # print(recordCoregistered)

            if pd.isnull(df_meta.loc[i, 'recordCoregistered']):
                print('no pair found')

            elif len(df_meta.loc[i, 'recordCoregistered']) > 3 + len(record):

                recordCoregisteredStr = str(df_meta.loc[i,
                                                        'recordCoregistered'])
                recordCoregisteredStrList = recordCoregisteredStr.split(' ')
                timeEndRecord = []

                for recordCoregisteredStr in recordCoregisteredStrList:

                    df = retrieve_analyzed(study, analysis_type,
                                           recordCoregisteredStr, sensor)
                    timeEndRecord.append(max(list(df['timeUnix'])))

                df_meta.loc[i, 'recordEnd'] = int(min(timeEndRecord))

        save_meta(study, df_meta)
        print('df_meta = ')
        print(df_meta)
示例#6
0
def find_pairs():
    """
    Pair up records
    Note pairs in the meta file
    """

    print("begin find_pairs")

    study_list = retrieve_ref('study_list')

    for study in study_list:

        df_meta = retrieve_meta(study)
        print(df_meta)
        source_path = list(df_meta['source_path'])

        df_meta['pairedRecord'] = [None] * len(source_path)
        df_meta['recordCoregistered'] = source_path
        df_meta['recordBegin'] = [None] * len(source_path)
        # df_meta['recordEnd'] = [None] * len(source_path)
        df_meta['wearableName'] = [None] * len(source_path)

        # sort dataframe by the wearable name
        for record in source_path:
            recordList = record.split('_')
            recordWearable = str(recordList[1])
            i = df_meta[df_meta['source_path'] == record].index.values[0]
            df_meta.loc[i, 'wearableName'] = recordWearable
        df_meta = df_meta.sort_values(by='wearableName')

        for recordA in source_path:

            recordAList = recordA.split('_')
            recordABegin = int(recordAList[0])
            recordAWearable = str(recordAList[1])

            # print('recordAList = ')
            # print(recordAList)
            # print('recordABegin = ')
            # print(recordABegin)
            # print('recordAWearable = ')
            # print(recordAWearable)

            recordCoregistered = str(recordA)
            i = df_meta[df_meta['source_path'] == recordA].index.values[0]
            # df_meta.loc[i, 'pairedRecord' ] = str(recordA)
            # df_meta.loc[i, 'recordCoregistered' ] = str(recordCoregistered)
            df_meta.loc[i, 'recordBegin'] = recordABegin

            recordList = []
            recordList.append(recordA)

            recordBegin = [recordABegin]

            for recordB in source_path:

                recordBList = recordB.split('_')
                recordBBegin = int(recordBList[0])
                recordBWearable = str(recordBList[1])

                if abs(recordABegin - recordBBegin
                       ) < 300 and recordAWearable != recordBWearable:

                    # print('pair found: ')

                    # print('recordBList = ')
                    # print(recordBList)
                    # print('recordBBegin = ')
                    # print(recordBBegin)
                    # print('recordBWearable = ')
                    # print(recordBWearable)

                    recordList = list([recordA, recordB])
                    recordBegin = list([recordABegin, recordBBegin])
                    recordWearable = list([recordAWearable, recordBWearable])

                    # print('recordList = ')
                    # print(recordList)
                    # print('recordBegin = ')
                    # print(recordBegin)
                    # print('recordWearable = ')
                    # print(recordWearable)

                    recordBegin = max(recordBegin)

                    recordCoregistered = str(recordA) + ' ' + str(recordB)

                    df_meta.loc[i, 'pairedRecord'] = str(recordB)
                    df_meta.loc[i,
                                'recordCoregistered'] = str(recordCoregistered)
                    df_meta.loc[i, 'recordBegin'] = recordBegin

        save_meta(study, df_meta)
        # print('df_meta = ')
        # print(df_meta)

        # drop duplicated entries
        df_meta = df_meta.drop_duplicates('recordBegin', keep='last')
        df_meta = df_meta.sort_values(by='recordBegin')
        del df_meta['wearableName']
        save_meta(study, df_meta)
        print('df_meta = ')
        print(df_meta)