def find_records():
    """
    plot the timestamped data for the temperature
    """

    print("begin find records")

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')
    # sensor_unit_list = retrieve_ref('sensor_unit_list')

    for study in study_list:
        # print('study = ' + str(study))
        source_path = os.path.join('studies', study, 'source')
        # print('source_path = ' + str(source_path))

        source_folders = os.listdir(source_path)
        # print(str(study) + ' source_folders = ')
        # print(source_folders)

        df_meta = pd.DataFrame()
        df_meta['source_path'] = source_folders
        save_meta(study, df_meta)
        record_to_summary(study, 'Records found', str(len(source_folders)))

    print("completed find records")
def find_record_end():
    """
    timestamp the source
    """

    print("begin timestamp source")

    study_list = retrieve_ref('study_list')
    format_types = retrieve_ref('format_types')
    segment_list = retrieve_ref('segment_list')
    sensor_list = retrieve_ref('sensor_list')

    # timestamp temp
    format_type = 'source'
    segment = 'All'
    sensor = 'TEMP'

    for study in study_list:

        print('study = ' + str(study))

        df_meta = retrieve_meta(study)
        source_path = list(df_meta['source_path'])

        df_meta['recordBegin'] = [None] * len(source_path)
        df_meta['recordEnd'] = [None] * len(source_path)
        df_meta['fullLength'] = [None] * len(source_path)

        # summarize what has been found so far
        record_to_summary(study, 'Records found', len(source_path))

        for record in source_path:
            # source = os.path.join(study, 'source', record, sensor + '.csv')
            df_timestamped = timestamp_source(study, format_type, segment, record, sensor)


            # Save the full length of the uneditted record
            i = df_meta[ df_meta['source_path']== record].index.values[0]
            recordSplit = record.split('_')
            df_meta.loc[i, 'recordBegin' ] = int(recordSplit[0])
            df_meta.loc[i, 'recordEnd' ] = int(recordSplit[0]) + 60*(max(df_timestamped['timeMinutes']))
            df_meta.loc[i, 'fullLength' ] = round(max(df_timestamped['timeMinutes']) , 4)


        save_meta(study, df_meta)


    find_temp_end()

    """
def define_record():
    """
    define the original start time, end time, and length
    record to the metadata
    remove any records shorter than the minimum lemgth requirements
    """

    study_list = retrieve_ref('study_list')
    min_record_time = retrieve_ref('min_record_time')
    max_record_time = retrieve_ref('max_record_time')

    # check each study
    for study in study_list:

        # retrieve the list of records from the metadata.csv file
        df_meta = retrieve_meta(study)
        source_path = list(df_meta['source_path'])

        df_meta['recordBegin'] = [None] * len(source_path)
        df_meta['recordEnd'] = [None] * len(source_path)
        df_meta['recordLength'] = [None] * len(source_path)

        # define the original length of the record
        # remove records that are too short
        for record in source_path:

            i = df_meta[df_meta['source_path'] == record].index.values[0]
            originalBegin = df_meta.loc[i, 'originalBegin']
            originalEnd = df_meta.loc[i, 'originalEnd']
            originalLength = df_meta.loc[i, 'originalLength']

            format_type, segment, sensor = 'source', 'All', 'TEMP'
            source = os.path.join('studies', study, 'formatted', format_type,
                                  record, segment, sensor + '.csv')
            df = pd.read_csv(source)

            timeEndUnix = find_record_end_using_temp(df)
            recordBegin = originalBegin
            recordEnd = timeEndUnix
            recordLength = (timeEndUnix - recordBegin) / 60

            df_meta.loc[i, 'recordBegin'] = recordBegin
            df_meta.loc[i, 'recordEnd'] = recordEnd
            df_meta.loc[i, 'recordLength'] = round(recordLength, 4)

        # save the metadata file
        save_meta(study, df_meta)
def define_original():
    """
    define the original start time, end time, and length
    record to the metadata
    remove any records shorter than the minimum lemgth requirements
    """

    study_list = retrieve_ref('study_list')
    min_record_time = retrieve_ref('min_record_time')

    # check each study
    for study in study_list:

        # retrieve the list of records from the metadata.csv file
        df_meta = retrieve_meta(study)
        source_path = list(df_meta['source_path'])

        # add the columns to define the original record
        df_meta['recordName'] = source_path
        df_meta['originalBegin'] = [None] * len(source_path)
        df_meta['originalEnd'] = [None] * len(source_path)
        df_meta['originalLength'] = [None] * len(source_path)

        # define the original length of the record
        # remove records that are too short
        for record in source_path:

            format_type, segment, sensor = 'source', 'All', 'TEMP'
            df_timestamped = timestamp_source(study, format_type, segment,
                                              record, sensor)

            originalBegin = int(min(list(df_timestamped['timeUnix'])))
            originalEnd = int(max(list(df_timestamped['timeUnix'])))
            originalLength = (originalEnd - originalBegin) / 60

            i = df_meta[df_meta['source_path'] == record].index.values[0]

            df_meta.loc[i, 'originalBegin'] = originalBegin
            df_meta.loc[i, 'originalEnd'] = originalEnd
            df_meta.loc[i, 'originalLength'] = round(originalLength, 4)

        # save the metadata file
        save_meta(study, df_meta)
        df_meta = df_meta.drop(
            df_meta[df_meta['originalLength'] < min_record_time].index)
        save_meta(study, df_meta)
示例#5
0
def pair_records():
    """
    use the record begin time and wearable id to check all studies and records for pairs
    if found, find the latest common start time and earliest end times
    save as new columns in the metadata file
    """

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')

    # check each study
    for study in study_list:

        df_meta = retrieve_meta(study)
        recordNames = list(df_meta['recordName'])

        # create column to list wearableName and coregister records
        df_meta = add_wearableName(df_meta)
        df_meta['coregisterRecords'] = recordNames

        # look for paired records using the unix time stamp for when the record begins
        for recordA in recordNames:

            i = df_meta[df_meta['recordName'] == recordA].index.values[0]
            recordBeginA = df_meta.loc[i, 'recordBegin']
            wearableA = df_meta.loc[i, 'wearableName']

            for recordB in recordNames:

                j = df_meta[df_meta['recordName'] == recordB].index.values[0]
                recordBeginB = df_meta.loc[j, 'recordBegin']
                wearableB = df_meta.loc[j, 'wearableName']

                if abs(recordBeginA - recordBeginB) < 300:

                    if recordA != recordB:

                        if wearableA != wearableB:

                            print('coregister record found for ' + recordA +
                                  ' + ' + recordB)
                            coregisterList = str(recordA + ' ' + recordB)
                            df_meta.loc[i,
                                        'coregisterRecords'] = coregisterList

        save_meta(study, df_meta)
def add_embedded_to_meta():
    """

    """

    study_list = retrieve_ref('study_list')
    min_record_time = float(retrieve_ref('min_record_time'))

    # check each study
    for study in study_list:

        # retrieve the list of records from the metadata.csv file
        df_meta = retrieve_meta(study)
        source_path = list(df_meta['source_path'])

        # define the original length of the record
        # remove records that are too short
        for record in source_path:

            print('record = ' + str(record))
            print('df_meta = ')
            print(df_meta)

            i = df_meta[df_meta['source_path'] == record].index.values[0]
            embeddedRecord = float(df_meta.loc[i, 'embeddedRecord'])

            if embeddedRecord > 0:

                format_type, segment, sensor = 'source', 'All', 'TEMP'
                source = os.path.join('studies', study, 'formatted',
                                      format_type, record, segment,
                                      sensor + '.csv')
                df = pd.read_csv(source)

                recordBegin = int(embeddedRecord)
                df = df[df['timeUnix'] > recordBegin]
                timeEndUnix = find_record_end_using_temp(df)
                recordLength = (timeEndUnix - recordBegin) / 60

                df_row = df_meta[df_meta['source_path'] == record]

                record_split = record.split('_')
                recordName = str(str(recordBegin) + '_' + str(record_split[1]))
                print('embedded recordName = ' + recordName)

                df_row.loc[i, 'recordName'] = recordName
                df_row.loc[i, 'recordBegin'] = int(embeddedRecord)
                df_row.loc[i, 'recordEnd'] = int(timeEndUnix)
                df_row.loc[i, 'recordLength'] = round(recordLength, 4)

                print('df_row = ')
                print(df_row)

                df_meta = df_meta.append(df_row)
                # print(df_meta)

                format_type, segment, sensor = 'source', 'All', 'TEMP'
                source = os.path.join('studies', study, format_type, record,
                                      sensor + '.csv')
                df_source = pd.read_csv(source)
                df_timestamped = build_timestamps(df_source, sensor)

                path = build_path([
                    'studies', study, 'formatted', format_type, recordName,
                    segment
                ])
                file = os.path.join(path, sensor + ".csv")
                df_timestamped.to_csv(file)
                print('formatted source file = ' + str(file))

        df_meta = df_meta.sort_values(by='recordName')
        save_meta(study, df_meta)
def find_embedded_records():
    """
    check for long records
    look for second sudden increase in temperature
    define the record begin, end, and duration
    log in the metadata file
    """

    study_list = retrieve_ref('study_list')
    min_record_time = float(retrieve_ref('min_record_time'))

    # check each study
    for study in study_list:

        # retrieve the list of records from the metadata.csv file
        df_meta = retrieve_meta(study)
        source_path = list(df_meta['source_path'])

        df_meta['embeddedRecord'] = [0] * len(source_path)

        print('df_meta = ')
        print(df_meta)

        # define the original length of the record
        # remove records that are too short
        for record in source_path:

            print('record = ' + str(record))

            i = df_meta[df_meta['source_path'] == record].index.values[0]
            originalLength = float(df_meta.loc[i, 'originalLength'])
            recordLength = float(df_meta.loc[i, 'recordLength'])

            print('originalLength = ' + str(originalLength))
            print('recordLength = ' + str(recordLength))

            if recordLength + min_record_time < originalLength:

                format_type, segment, sensor = 'source', 'All', 'TEMP'
                source = os.path.join('studies', study, 'formatted',
                                      format_type, record, segment,
                                      sensor + '.csv')
                df = pd.read_csv(source)

                print('df = ')
                print(df)

                timeUnix = list(df['timeUnix'])
                timeMinutes = list(df['timeMinutes'])
                measurements = list(df['measurement'])

                for j in range(len(measurements) - 12):

                    if timeMinutes[j] > recordLength + 1:

                        if timeMinutes[j] + min_record_time < originalLength:

                            if measurements[j] + 2 < measurements[j + 12]:

                                if measurements[j] + 2 < measurements[j + 100]:

                                    if measurements[j] + 3 < measurements[j +
                                                                          200]:

                                        secondRecordBegin = int(timeUnix[j +
                                                                         12])
                                        print('secondRecordBegin = ' +
                                              str(secondRecordBegin))
                                        df_meta.loc[
                                            i,
                                            'embeddedRecord'] = secondRecordBegin

        save_meta(study, df_meta)
示例#8
0
def define_pairedRecords():
    """

    """
    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')

    # check each study
    for study in study_list:

        df_meta = retrieve_meta(study)
        recordNames = list(df_meta['recordName'])
        df_meta['coregisterBegin'] = [0] * len(recordNames)
        df_meta['coregisterEnd'] = [0] * len(recordNames)

        # name the wearable used for each record
        for record in recordNames:

            i = df_meta[df_meta['recordName'] == record].index.values[0]
            coregisterRecords = df_meta.loc[i, 'coregisterRecords']

            if len(coregisterRecords) > len(record):
                coregisterRecords = coregisterRecords.split(' ')
                print('coregisterRecords = ')
                print(coregisterRecords)

                print('coregisterRecords[0] = ')
                print(coregisterRecords[0])

            else:
                coregisterRecords = list([coregisterRecords])

            for item in coregisterRecords:

                print('coregisterRecords = ')
                print(coregisterRecords)
                print('item = ' + item)

                format_type, segment, sensor, recordRef = 'truncate', 'All', 'TEMP', item
                source = os.path.join('studies', study, 'formatted',
                                      format_type, recordRef, segment,
                                      sensor + '.csv')
                df_source = pd.read_csv(source)

                unixMin = int(min(list(df_source['timeUnix'])) + 12)
                unixMax = int(max(list(df_source['timeUnix'])) - 12)

                if df_meta.loc[i, 'coregisterBegin'] < unixMin or df_meta.loc[
                        i, 'coregisterBegin'] == 0:
                    df_meta.loc[i, 'coregisterBegin'] = unixMin

                if df_meta.loc[i, 'coregisterEnd'] > unixMax or df_meta.loc[
                        i, 'coregisterEnd'] == 0:
                    df_meta.loc[i, 'coregisterEnd'] = unixMax

        # sort meta file by record begin and drop duplicates
        df_meta = df_meta.sort_values(by='wearableName')
        df_meta = df_meta.drop_duplicates('coregisterRecords', keep='first')
        df_meta = df_meta.drop_duplicates('coregisterBegin', keep='first')
        df_meta = df_meta.sort_values(by='recordBegin')
        save_meta(study, df_meta)
示例#9
0
def find_temp_end():
    """
    plot the timestamped data for the temperature
    """

    print("begin find temp end")

    study_list = retrieve_ref('study_list')
    sensor_list = retrieve_ref('sensor_list')
    max_record_time = float(retrieve_ref('max_record_time'))
    min_record_time = float(retrieve_ref('min_record_time'))
    trimBegin = float(retrieve_ref('trimBegin'))

    sensor = 'TEMP'
    segment = 'All'
    format_type = 'source'

    for study in study_list:

        df_meta = retrieve_meta(study)
        source_path = list(df_meta['source_path'])
        recordLength = list(df_meta['recordLength'])

        df_meta['recordEnd'] = [None] * len(source_path)
        df_meta['truncatedLength'] = [None] * len(source_path)

        for record in source_path:

            index = source_path.index(record)
            length = recordLength[index]

            df_timestamped = retrieve_analyzed(study, format_type, record,
                                               segment, sensor)
            df_timestamped = df_timestamped.drop(df_timestamped[
                df_timestamped['timeMinutes'] > max_record_time].index)

            timeUnix = df_timestamped['timeUnix']
            timeMinutes = df_timestamped['timeMinutes']
            measurements = df_timestamped['measurement']

            time_end = find_record_end_from_temp(df_timestamped)

            df_timestamped = df_timestamped.drop(
                df_timestamped[df_timestamped['timeMinutes'] > time_end].index)
            # df_timestamped = df_timestamped.drop(df_timestamped[df_timestamped['timeMinutes'] < trimBegin].index)

            path = os.path.join(study, 'formatted', 'truncate')
            if not os.path.isdir(path): os.mkdir(path)
            path = os.path.join(study, 'formatted', 'truncate')
            if not os.path.isdir(path): os.mkdir(path)
            path = os.path.join(study, 'formatted', 'truncate', record)
            if not os.path.isdir(path): os.mkdir(path)
            path = os.path.join(study, 'formatted', 'truncate', record, 'All')
            if not os.path.isdir(path): os.mkdir(path)
            path = os.path.join(study, 'formatted', 'truncate', record,
                                sensor + ".csv")
            df_timestamped.to_csv(path)

            truncatedLength = (max(df_timestamped['timeMinutes']))

            i = df_meta[df_meta['source_path'] == record].index.values[0]
            df_meta.loc[i, 'truncatedLength'] = round(truncatedLength, 4)
            df_meta.loc[i, 'recordEnd'] = timeUnix[0]

        save_meta(study, df_meta)