def main(): # loop over lines in export file path_to_export = os.path.join(PATH_TO_QS_EXPORT_FILE) with open(path_to_export) as fp: reader = csv.reader(fp, delimiter=';') next(reader, None) for row in tqdm(reader): if not row: continue # get info from entry row = row[0].split(',') start = dt.strptime(row[0], '%d-%b-%Y %H:%M') end = dt.strptime(row[1], '%d-%b-%Y %H:%M') date = dt.strptime(start.strftime('%Y-%m-%d'), '%Y-%m-%d') index = (date - config.START_DATE).days distance = float(row[2]) # save to database document = { 'date': date, 'start': start, 'end': end, 'distance': distance, } path = ['raw_data', 'qs_export', 'distance'] save_to_database(document, path=path)
def main(): categories = [ 'active calories', 'distance', 'cycling distance', 'steps', 'flights climbed', 'heart rate', 'heart rate at rest', # 'heart rate variability', ] # prepare empty time series foo = {} for category in categories: foo[category] = prepare_dict(config.START_DATE, config.END_DATE) # transform high-precision dataset into daily (sum or avg) for category in categories: collection = config.MDB['raw_data']['qs_export'][category] daily_counter = {} for entry in collection.find(): date = dt.strptime(entry['date'].strftime('%Y-%m-%d'), '%Y-%m-%d') value = entry[category] if foo[category][date] is not None: foo[category][date] += value daily_counter[date] += 1. else: foo[category][date] = value daily_counter[date] = 1. # for some datasets, we need to take the average (e.g. heart rate) if category in [ 'heart rate', 'heart rate at rest', 'heart rate variability' ]: for date in foo[category].keys(): if date in daily_counter.keys(): foo[category][date] /= daily_counter[date] # save to database for category in foo.keys(): dates = sorted(foo[category].keys()) # df = pd.DataFrame({ # 'date': dates, # category: [foo[category][d] for d in dates] # }) # print(df.head()) # if category == 'heart rate': # plt.plot(df['date'], df[category]) # plt.show() document = {'date': dates, category: [foo[category][d] for d in dates]} path = [ 'stats', 'time series', 'daily', 'health', 'activity', category ] save_to_database(document, path=path)
def main(): # daily_log_history = [] daily_log_files = sorted(os.listdir(PATH_TO_DAILY_LOGS)) # measurement_times, daily_logs, nrs_of_chars, nrs_of_words = [], [], [], [] # date = config.START_DATE # while date <= config.END_DATE: # measurement_times.append(date) # # daily_logs.append(None) # nrs_of_chars.append(None) # nrs_of_words.append(None) # date += td(days=1) for file_name in tqdm(daily_log_files): file_path = os.path.join(PATH_TO_DAILY_LOGS, file_name) if os.path.isdir(file_path): # skip directories continue with open(file_path) as fp: content = fp.readlines() # get info for each journal file date = dt.strptime(file_name.split('.')[0], '%Y-%m-%d') # save to database document = { 'date': date, 'content': content, 'nr of characters': len(''.join(content)), 'nr of words': len(' '.join(content).split(' ')), } path = ['raw_data', 'daily_log'] save_to_database(document, path=path)
def main(): rules = dict(resolution='daily') df = prepare_datasets(rules) correlation_matrix = get_correlation_matrix(df) document = { 'correlation matrix': [list(i) for i in correlation_matrix], 'date': dt.now().strftime('%Y-%m-%d') } path = ['stats', 'correlations'] save_to_database(document, path=path) plot_correlation_matrix(df, correlation_matrix)
def main(cat, subcat): if cat not in translate_notes.keys(): return if subcat not in translate_notes[cat].keys(): return db_keys = translate_notes[cat][subcat].keys() # prepare empty time series (containing only None) foo = {} for db_key in db_keys: foo[db_key] = prepare_dict(START_DATE, END_DATE) # fill time series object for db_key in db_keys: collection = MDB['stats']['time series']['daily'][cat] collection[subcat]['db_key'].delete_many({}) found_entry = False entries = config.MDB['raw_data']['sleep cycle'].find({}) for entry in entries: found_translation = False for translation in translate_notes[cat][subcat][db_key]: if translation in entry['sleep notes']: foo[db_key][entry['date']] = 1 found_entry = True found_translation = True break if found_entry: if not found_translation: foo[db_key][entry['date']] = 0 # create document & save to database dates = sorted(foo[db_key].keys()) timestamps = [d.timestamp() for d in dates] values = [foo[db_key][d] for d in dates] document = { 'dates': dates, 'timestamps': timestamps, 'values': values, 'category': cat, 'subcategory': subcat, 'title': db_key, 'resolution': 'daily', } path = ['stats', 'time series', 'daily'] save_to_database(document, path=path)
def main(): foo = {db_key: prepare_dict(START_DATE, END_DATE) for db_key in db_keys} for db_key in db_keys: for date in foo[db_key].keys(): if db_key == 'whether it was weekend': if date.strftime('%a') in ['Sat', 'Sun']: foo[db_key][date] = 1 else: foo[db_key][date] = 0 if db_key == 'whether it was spring': if date.strftime('%b') in ['Mar', 'Apr', 'May']: foo[db_key][date] = 1 else: foo[db_key][date] = 0 if db_key == 'whether it was summer': if date.strftime('%b') in ['Jun', 'Jul', 'Aug']: foo[db_key][date] = 1 else: foo[db_key][date] = 0 if db_key == 'whether it was fall': if date.strftime('%b') in ['Sep', 'Oct', 'Nov']: foo[db_key][date] = 1 else: foo[db_key][date] = 0 if db_key == 'whether it was winter': if date.strftime('%b') in ['Dec', 'Jan', 'Feb']: foo[db_key][date] = 1 else: foo[db_key][date] = 0 # create document & save to database dates = sorted(foo[db_key].keys()) timestamps = [d.timestamp() for d in dates] values = [foo[db_key][d] for d in dates] document = { 'dates': dates, 'timestamps': timestamps, 'values': values, 'category': 'various', 'subcategory': 'seasons etc.', 'title': db_key, 'resolution': 'daily', } path = [ 'stats', 'time series', 'daily', ] save_to_database(document, path=path)
def main(): chat_history = [] for chat_file in os.listdir(PATH_TO_WHATSAPP_DATA): if chat_file == '.DS_Store': continue chat_name = chat_file.split('.')[0] if chat_name[-1] in [str(i) for i in range(1, 10)]: chat_name = chat_name[:-2] path_to_chat_file = os.path.join(PATH_TO_WHATSAPP_DATA, chat_file) with open(path_to_chat_file) as fp: content = fp.readlines() # make sure one line corresponds to exactly one message tmp = [] for line in content: if line.startswith('['): tmp.append(line) else: tmp[-1] += '\n' + line content = tmp # go through messages for line in content: tmp = line.split(']') # using these tmp vars saves ~10% comp. time date_str = tmp[0][1:] timestamp = dt.strptime(date_str, '%d.%m.%y, %H:%M:%S').timestamp() tmp = tmp[1][1:].split(':') message_content = tmp[1][1:] sender = tmp[0] if tmp[0] != 'vinc' else 'Vincent Mader' receiver = chat_name if sender == 'Vincent Mader' else 'Vincent Mader' document = { 'message_type': 'whatsapp', 'chat_name': chat_name, 'timestamp': timestamp, 'sender': sender, 'receiver': receiver, 'message_content': message_content, } path = ['raw_data', 'whatsapp'] save_to_database(document, path=path) return chat_history
def main(): # remove entries from database collection coll = MDB['stats']['time series']['daily']['health']['sleep analysis'] for db_key in db_keys: coll[db_key].delete_many({}) # setup temporary dictionary foo = {k: prepare_dict(START_DATE, END_DATE) for k in db_keys} # load raw data sleep_cycle_history = MDB['raw_data']['sleep cycle'].find({}) # transform datapoints into time series & save to database for entry in sleep_cycle_history: for db_key in db_keys: date = entry['date'] value = entry[db_key] foo[db_key][date] = value for db_key in db_keys: # create document & save to database dates = sorted(foo[db_key].keys()) timestamps = [d.timestamp() for d in dates] values = [foo[db_key][d] for d in dates] document = { 'dates': dates, 'timestamps': timestamps, 'values': values, 'category': 'health', 'subcategory': 'sleep analysis', 'title': db_key, 'resolution': 'daily', } # if db_key == 'wake-up mood': # print([k for k in values if k is not None]) # input() path = [ 'stats', 'time series', 'daily', ] save_to_database(document, path=path)
def main(): PATH_TO_SPOTIFY_DATA = os.path.join(PRD, 'spotify') # play_history = [] timestamps_of_already_added_datapoints = [] for export_directory in sorted(os.listdir(PATH_TO_SPOTIFY_DATA)): if export_directory == '.DS_Store': continue path_to_export_directory = os.path.join(PATH_TO_SPOTIFY_DATA, export_directory) for export_file in os.listdir(path_to_export_directory): if not export_file.startswith('StreamingHistory'): continue path_to_export_file = os.path.join(PATH_TO_SPOTIFY_DATA, export_directory, export_file) with open(path_to_export_file) as fp: content = json.load(fp) for datapoint in content: play_end_dt = dt.strptime(datapoint['endTime'], '%Y-%m-%d %H:%M') if play_end_dt in timestamps_of_already_added_datapoints: continue date = play_end_dt artist_name = datapoint['artistName'] track_name = datapoint['trackName'] ms_played = datapoint['msPlayed'] document = { 'date': date, 'artist name': artist_name, 'track name': track_name, 'time played [s]': ms_played / 1000. } path = ['raw_data', 'spotify', 'artist name'] save_to_database(document, path=path)
def main(): path_to_inbox = os.path.join(PRD, 'facebook/json/messages/inbox') # chat_history = [] for dir_name in os.listdir(path_to_inbox): path_to_chat = os.path.join(path_to_inbox, dir_name, 'message.json') with open(path_to_chat) as fp: content = json.load(fp) participants = content['participants'] if len(participants) > 2: # TODO: handle group chats continue for msg in content['messages']: # TODO: make dict to translate into real names chat_id = dir_name.split('_')[0] sender = msg['sender_name'] timestamp = int(msg['timestamp_ms']) / 1e3 receiver = ME if sender != ME else sender try: message_content = msg['content'] except KeyError: continue document = { 'message type': 'facebook', 'chat name': chat_id, 'timestamp': timestamp, 'sender': sender, 'receiver': receiver, 'message content': message_content, 'message length': len(message_content), } path = ['raw_data', 'facebook', 'chat_history'] save_to_database(document, path=path)
def main(): # loop over entries in export file path_to_export = os.path.join(PATH_TO_QS_EXPORT_FILE) with open(path_to_export) as fp: reader = csv.reader(fp, delimiter=';') next(reader, None) for row in tqdm(reader): if not row: continue # get info from entry row = row[0].split(',') start = dt.strptime(row[0], '%d-%b-%Y %H:%M') end = dt.strptime(row[1], '%d-%b-%Y %H:%M') date = dt.strptime(start.strftime('%Y-%m-%d'), '%Y-%m-%d') index = (date - config.START_DATE).days time_to_fall_asleep = float(row[4]) nr_of_sleep_cycles = float(row[5]) # save to database document = { 'date': date, 'time to fall asleep': time_to_fall_asleep, } path = ['raw_data', 'qs_export', 'time to fall asleep'] save_to_database(document, path=path) document = { 'date': date, 'start': start, 'end': end, 'nr of sleep cycles': nr_of_sleep_cycles, } path = ['raw_data', 'qs_export', 'nr of sleep cycles'] save_to_database(document, path=path)
def main(): PATH_TO_SLEEP_CYCLE_EXPORTS = os.path.join( config.PATH_TO_RAW_DATA, 'sleep_cycle' ) for export in os.listdir(PATH_TO_SLEEP_CYCLE_EXPORTS): if not export.startswith('sleepdata'): continue path_to_export = os.path.join(PATH_TO_SLEEP_CYCLE_EXPORTS, export) with open(path_to_export) as fp: reader = csv.reader(fp, delimiter=';') next(reader, None) for row in reader: if not row: continue # pull information from old export file (other format) if export == 'sleepdata5.csv': start = dt.strptime(row[0], '%Y-%m-%d %H:%M:%S') end = dt.strptime(row[1], '%Y-%m-%d %H:%M:%S') date = dt(start.year, start.month, start.day) index = (date - config.START_DATE).days # measurement_dates[index] = date regularity = None sleep_quality = float(row[2][:-1]) / 100 wake_up_mood = row[4] if wake_up_mood: wake_up_mood = { ':)': 1, ':|': 0, ':(': -1 }[wake_up_mood] else: wake_up_mood = None # heart_rate = None # steps = None # alarm_mode = None air_pressure = None city = None movements_per_hour = None hours_in_bed = (end - start).seconds / 3600. time_asleep = None time_before_sleep = None window_start = None window_stop = None did_snore = None snore_time = None weather_temperature = None weather_type = None sleep_notes = row[5].split( ':') if row[5] and row[5] != '0' else [] # pull information from new export file elif export == 'sleepdata.csv': start = dt.strptime(row[0], '%Y-%m-%d %H:%M:%S') end = dt.strptime(row[1], '%Y-%m-%d %H:%M:%S') date = dt(start.year, start.month, start.day) index = (date - config.START_DATE).days # measurement_dates[index] = date sleep_quality = float(row[2][:-1]) / 100 regularity = float(row[3][:-1]) / 100 wake_up_mood = row[4] if wake_up_mood: wake_up_mood = { 'Good': 1, 'OK': 0, 'Bad': -1 }[wake_up_mood] else: wake_up_mood = None # heart_rate = None # steps # /alarm mode air_pressure = row[8] city = row[9] movements_per_hour = row[10] hours_in_bed = (end - start).seconds / 3600. # time asleep # time before sleep # window start # window Stop # did snore # snore time # weather_temperature = # weather type sleep_notes = row[20].split( ':') if row[20] and row[20] != '0' else [] # create document & save to database document = { 'date': date, 'start': start, 'end': end, 'sleep quality': sleep_quality, 'sleep regularity': regularity, 'wake-up mood': wake_up_mood, # /heart rate # /steps # /alarm mode 'air pressure': air_pressure, 'city': city, 'movements per hour': movements_per_hour, 'hours in bed': hours_in_bed, # time asleep # time before sleep # window start # window Stop # did snore # snore time # weather temperature # weather type 'sleep notes': sleep_notes, } path = ['raw_data', 'sleep cycle'] save_to_database(document, path=path)