def find_records(): """ plot the timestamped data for the temperature """ print("begin find records") study_list = retrieve_ref('study_list') sensor_list = retrieve_ref('sensor_list') # sensor_unit_list = retrieve_ref('sensor_unit_list') for study in study_list: # print('study = ' + str(study)) source_path = os.path.join('studies', study, 'source') # print('source_path = ' + str(source_path)) source_folders = os.listdir(source_path) # print(str(study) + ' source_folders = ') # print(source_folders) df_meta = pd.DataFrame() df_meta['source_path'] = source_folders save_meta(study, df_meta) record_to_summary(study, 'Records found', str(len(source_folders))) print("completed find records")
def find_record_end(): """ timestamp the source """ print("begin timestamp source") study_list = retrieve_ref('study_list') format_types = retrieve_ref('format_types') segment_list = retrieve_ref('segment_list') sensor_list = retrieve_ref('sensor_list') # timestamp temp format_type = 'source' segment = 'All' sensor = 'TEMP' for study in study_list: print('study = ' + str(study)) df_meta = retrieve_meta(study) source_path = list(df_meta['source_path']) df_meta['recordBegin'] = [None] * len(source_path) df_meta['recordEnd'] = [None] * len(source_path) df_meta['fullLength'] = [None] * len(source_path) # summarize what has been found so far record_to_summary(study, 'Records found', len(source_path)) for record in source_path: # source = os.path.join(study, 'source', record, sensor + '.csv') df_timestamped = timestamp_source(study, format_type, segment, record, sensor) # Save the full length of the uneditted record i = df_meta[ df_meta['source_path']== record].index.values[0] recordSplit = record.split('_') df_meta.loc[i, 'recordBegin' ] = int(recordSplit[0]) df_meta.loc[i, 'recordEnd' ] = int(recordSplit[0]) + 60*(max(df_timestamped['timeMinutes'])) df_meta.loc[i, 'fullLength' ] = round(max(df_timestamped['timeMinutes']) , 4) save_meta(study, df_meta) find_temp_end() """
def define_record(): """ define the original start time, end time, and length record to the metadata remove any records shorter than the minimum lemgth requirements """ study_list = retrieve_ref('study_list') min_record_time = retrieve_ref('min_record_time') max_record_time = retrieve_ref('max_record_time') # check each study for study in study_list: # retrieve the list of records from the metadata.csv file df_meta = retrieve_meta(study) source_path = list(df_meta['source_path']) df_meta['recordBegin'] = [None] * len(source_path) df_meta['recordEnd'] = [None] * len(source_path) df_meta['recordLength'] = [None] * len(source_path) # define the original length of the record # remove records that are too short for record in source_path: i = df_meta[df_meta['source_path'] == record].index.values[0] originalBegin = df_meta.loc[i, 'originalBegin'] originalEnd = df_meta.loc[i, 'originalEnd'] originalLength = df_meta.loc[i, 'originalLength'] format_type, segment, sensor = 'source', 'All', 'TEMP' source = os.path.join('studies', study, 'formatted', format_type, record, segment, sensor + '.csv') df = pd.read_csv(source) timeEndUnix = find_record_end_using_temp(df) recordBegin = originalBegin recordEnd = timeEndUnix recordLength = (timeEndUnix - recordBegin) / 60 df_meta.loc[i, 'recordBegin'] = recordBegin df_meta.loc[i, 'recordEnd'] = recordEnd df_meta.loc[i, 'recordLength'] = round(recordLength, 4) # save the metadata file save_meta(study, df_meta)
def define_original(): """ define the original start time, end time, and length record to the metadata remove any records shorter than the minimum lemgth requirements """ study_list = retrieve_ref('study_list') min_record_time = retrieve_ref('min_record_time') # check each study for study in study_list: # retrieve the list of records from the metadata.csv file df_meta = retrieve_meta(study) source_path = list(df_meta['source_path']) # add the columns to define the original record df_meta['recordName'] = source_path df_meta['originalBegin'] = [None] * len(source_path) df_meta['originalEnd'] = [None] * len(source_path) df_meta['originalLength'] = [None] * len(source_path) # define the original length of the record # remove records that are too short for record in source_path: format_type, segment, sensor = 'source', 'All', 'TEMP' df_timestamped = timestamp_source(study, format_type, segment, record, sensor) originalBegin = int(min(list(df_timestamped['timeUnix']))) originalEnd = int(max(list(df_timestamped['timeUnix']))) originalLength = (originalEnd - originalBegin) / 60 i = df_meta[df_meta['source_path'] == record].index.values[0] df_meta.loc[i, 'originalBegin'] = originalBegin df_meta.loc[i, 'originalEnd'] = originalEnd df_meta.loc[i, 'originalLength'] = round(originalLength, 4) # save the metadata file save_meta(study, df_meta) df_meta = df_meta.drop( df_meta[df_meta['originalLength'] < min_record_time].index) save_meta(study, df_meta)
def pair_records(): """ use the record begin time and wearable id to check all studies and records for pairs if found, find the latest common start time and earliest end times save as new columns in the metadata file """ study_list = retrieve_ref('study_list') sensor_list = retrieve_ref('sensor_list') # check each study for study in study_list: df_meta = retrieve_meta(study) recordNames = list(df_meta['recordName']) # create column to list wearableName and coregister records df_meta = add_wearableName(df_meta) df_meta['coregisterRecords'] = recordNames # look for paired records using the unix time stamp for when the record begins for recordA in recordNames: i = df_meta[df_meta['recordName'] == recordA].index.values[0] recordBeginA = df_meta.loc[i, 'recordBegin'] wearableA = df_meta.loc[i, 'wearableName'] for recordB in recordNames: j = df_meta[df_meta['recordName'] == recordB].index.values[0] recordBeginB = df_meta.loc[j, 'recordBegin'] wearableB = df_meta.loc[j, 'wearableName'] if abs(recordBeginA - recordBeginB) < 300: if recordA != recordB: if wearableA != wearableB: print('coregister record found for ' + recordA + ' + ' + recordB) coregisterList = str(recordA + ' ' + recordB) df_meta.loc[i, 'coregisterRecords'] = coregisterList save_meta(study, df_meta)
def add_embedded_to_meta(): """ """ study_list = retrieve_ref('study_list') min_record_time = float(retrieve_ref('min_record_time')) # check each study for study in study_list: # retrieve the list of records from the metadata.csv file df_meta = retrieve_meta(study) source_path = list(df_meta['source_path']) # define the original length of the record # remove records that are too short for record in source_path: print('record = ' + str(record)) print('df_meta = ') print(df_meta) i = df_meta[df_meta['source_path'] == record].index.values[0] embeddedRecord = float(df_meta.loc[i, 'embeddedRecord']) if embeddedRecord > 0: format_type, segment, sensor = 'source', 'All', 'TEMP' source = os.path.join('studies', study, 'formatted', format_type, record, segment, sensor + '.csv') df = pd.read_csv(source) recordBegin = int(embeddedRecord) df = df[df['timeUnix'] > recordBegin] timeEndUnix = find_record_end_using_temp(df) recordLength = (timeEndUnix - recordBegin) / 60 df_row = df_meta[df_meta['source_path'] == record] record_split = record.split('_') recordName = str(str(recordBegin) + '_' + str(record_split[1])) print('embedded recordName = ' + recordName) df_row.loc[i, 'recordName'] = recordName df_row.loc[i, 'recordBegin'] = int(embeddedRecord) df_row.loc[i, 'recordEnd'] = int(timeEndUnix) df_row.loc[i, 'recordLength'] = round(recordLength, 4) print('df_row = ') print(df_row) df_meta = df_meta.append(df_row) # print(df_meta) format_type, segment, sensor = 'source', 'All', 'TEMP' source = os.path.join('studies', study, format_type, record, sensor + '.csv') df_source = pd.read_csv(source) df_timestamped = build_timestamps(df_source, sensor) path = build_path([ 'studies', study, 'formatted', format_type, recordName, segment ]) file = os.path.join(path, sensor + ".csv") df_timestamped.to_csv(file) print('formatted source file = ' + str(file)) df_meta = df_meta.sort_values(by='recordName') save_meta(study, df_meta)
def find_embedded_records(): """ check for long records look for second sudden increase in temperature define the record begin, end, and duration log in the metadata file """ study_list = retrieve_ref('study_list') min_record_time = float(retrieve_ref('min_record_time')) # check each study for study in study_list: # retrieve the list of records from the metadata.csv file df_meta = retrieve_meta(study) source_path = list(df_meta['source_path']) df_meta['embeddedRecord'] = [0] * len(source_path) print('df_meta = ') print(df_meta) # define the original length of the record # remove records that are too short for record in source_path: print('record = ' + str(record)) i = df_meta[df_meta['source_path'] == record].index.values[0] originalLength = float(df_meta.loc[i, 'originalLength']) recordLength = float(df_meta.loc[i, 'recordLength']) print('originalLength = ' + str(originalLength)) print('recordLength = ' + str(recordLength)) if recordLength + min_record_time < originalLength: format_type, segment, sensor = 'source', 'All', 'TEMP' source = os.path.join('studies', study, 'formatted', format_type, record, segment, sensor + '.csv') df = pd.read_csv(source) print('df = ') print(df) timeUnix = list(df['timeUnix']) timeMinutes = list(df['timeMinutes']) measurements = list(df['measurement']) for j in range(len(measurements) - 12): if timeMinutes[j] > recordLength + 1: if timeMinutes[j] + min_record_time < originalLength: if measurements[j] + 2 < measurements[j + 12]: if measurements[j] + 2 < measurements[j + 100]: if measurements[j] + 3 < measurements[j + 200]: secondRecordBegin = int(timeUnix[j + 12]) print('secondRecordBegin = ' + str(secondRecordBegin)) df_meta.loc[ i, 'embeddedRecord'] = secondRecordBegin save_meta(study, df_meta)
def define_pairedRecords(): """ """ study_list = retrieve_ref('study_list') sensor_list = retrieve_ref('sensor_list') # check each study for study in study_list: df_meta = retrieve_meta(study) recordNames = list(df_meta['recordName']) df_meta['coregisterBegin'] = [0] * len(recordNames) df_meta['coregisterEnd'] = [0] * len(recordNames) # name the wearable used for each record for record in recordNames: i = df_meta[df_meta['recordName'] == record].index.values[0] coregisterRecords = df_meta.loc[i, 'coregisterRecords'] if len(coregisterRecords) > len(record): coregisterRecords = coregisterRecords.split(' ') print('coregisterRecords = ') print(coregisterRecords) print('coregisterRecords[0] = ') print(coregisterRecords[0]) else: coregisterRecords = list([coregisterRecords]) for item in coregisterRecords: print('coregisterRecords = ') print(coregisterRecords) print('item = ' + item) format_type, segment, sensor, recordRef = 'truncate', 'All', 'TEMP', item source = os.path.join('studies', study, 'formatted', format_type, recordRef, segment, sensor + '.csv') df_source = pd.read_csv(source) unixMin = int(min(list(df_source['timeUnix'])) + 12) unixMax = int(max(list(df_source['timeUnix'])) - 12) if df_meta.loc[i, 'coregisterBegin'] < unixMin or df_meta.loc[ i, 'coregisterBegin'] == 0: df_meta.loc[i, 'coregisterBegin'] = unixMin if df_meta.loc[i, 'coregisterEnd'] > unixMax or df_meta.loc[ i, 'coregisterEnd'] == 0: df_meta.loc[i, 'coregisterEnd'] = unixMax # sort meta file by record begin and drop duplicates df_meta = df_meta.sort_values(by='wearableName') df_meta = df_meta.drop_duplicates('coregisterRecords', keep='first') df_meta = df_meta.drop_duplicates('coregisterBegin', keep='first') df_meta = df_meta.sort_values(by='recordBegin') save_meta(study, df_meta)
def find_temp_end(): """ plot the timestamped data for the temperature """ print("begin find temp end") study_list = retrieve_ref('study_list') sensor_list = retrieve_ref('sensor_list') max_record_time = float(retrieve_ref('max_record_time')) min_record_time = float(retrieve_ref('min_record_time')) trimBegin = float(retrieve_ref('trimBegin')) sensor = 'TEMP' segment = 'All' format_type = 'source' for study in study_list: df_meta = retrieve_meta(study) source_path = list(df_meta['source_path']) recordLength = list(df_meta['recordLength']) df_meta['recordEnd'] = [None] * len(source_path) df_meta['truncatedLength'] = [None] * len(source_path) for record in source_path: index = source_path.index(record) length = recordLength[index] df_timestamped = retrieve_analyzed(study, format_type, record, segment, sensor) df_timestamped = df_timestamped.drop(df_timestamped[ df_timestamped['timeMinutes'] > max_record_time].index) timeUnix = df_timestamped['timeUnix'] timeMinutes = df_timestamped['timeMinutes'] measurements = df_timestamped['measurement'] time_end = find_record_end_from_temp(df_timestamped) df_timestamped = df_timestamped.drop( df_timestamped[df_timestamped['timeMinutes'] > time_end].index) # df_timestamped = df_timestamped.drop(df_timestamped[df_timestamped['timeMinutes'] < trimBegin].index) path = os.path.join(study, 'formatted', 'truncate') if not os.path.isdir(path): os.mkdir(path) path = os.path.join(study, 'formatted', 'truncate') if not os.path.isdir(path): os.mkdir(path) path = os.path.join(study, 'formatted', 'truncate', record) if not os.path.isdir(path): os.mkdir(path) path = os.path.join(study, 'formatted', 'truncate', record, 'All') if not os.path.isdir(path): os.mkdir(path) path = os.path.join(study, 'formatted', 'truncate', record, sensor + ".csv") df_timestamped.to_csv(path) truncatedLength = (max(df_timestamped['timeMinutes'])) i = df_meta[df_meta['source_path'] == record].index.values[0] df_meta.loc[i, 'truncatedLength'] = round(truncatedLength, 4) df_meta.loc[i, 'recordEnd'] = timeUnix[0] save_meta(study, df_meta)