def generate_data_by_date(apple_data_type, dataset_name, data_type): date_dict = dict() for child in root: attr = child.attrib # fild the matching data type if child.tag == 'Record' and attr['type'] == apple_data_type: start_date = datetime.strptime(attr['startDate'], '%Y-%m-%d %H:%M:%S %z') end_date = datetime.strptime(attr['endDate'], '%Y-%m-%d %H:%M:%S %z') #check year if start_date.year == YEAR: # step count & date count = int(attr['value']) date = datetime.strftime(start_date, '%-m/%-d/%Y') # check start and end date if count happens over two or more days if datetime.isocalendar(start_date) != datetime.isocalendar( end_date): # split the count in proportion to duration of before and after midnight midnight = datetime.strftime(datetime.date(end_date), '%Y-%m-%d %H:%M:%S') midnight_time = datetime.strptime(midnight, '%Y-%m-%d %H:%M:%S') till_midnight = (midnight_time - start_date.replace(tzinfo=None)).seconds from_midnight = (end_date.replace(tzinfo=None) - midnight_time).seconds in_the_middle = 0 mid_date_count = (end_date - start_date).days - 1 # more than one day gap, second of the middle days if mid_date_count > 0: in_the_middle = 60 * 60 * 24 * mid_date_count count_before_midnight = round( till_midnight / (till_midnight + in_the_middle + from_midnight) * count) count_after_midnight = round( from_midnight / (till_midnight + in_the_middle + from_midnight) * count) # add count to start and end date date_dict[date] = date_dict[ date] + count_before_midnight if date in date_dict.keys( ) else count_before_midnight if end_date.year == YEAR: next_date = datetime.strftime(end_date, '%-m/%-d/%Y') date_dict[next_date] = date_dict[ next_date] + count_after_midnight if next_date in date_dict.keys( ) else count_after_midnight # add count to the dates evenly distributed to the dates in the middle for i in range(mid_date_count): count_in_a_mid_day = round( (count - count_before_midnight - count_after_midnight) / mid_date_count) mid_datetime = start_date + timedelta(days=(i + 1)) mid_date = datetime.strftime(mid_datetime, '%-m/%-d/%Y') if mid_datetime.year == YEAR: date_dict[mid_date] = date_dict[ mid_date] + count_in_a_mid_day if mid_date in date_dict.keys( ) else count_in_a_mid_day else: date_dict[date] = date_dict[ date] + count if date in date_dict.keys() else count # convert dict to array data_of_year = [] for d in date_dict: data_of_year.append(dict(date=d, value=date_dict[d])) # sort by date; often date isn't ordered in the original data data_of_year = sorted( data_of_year, key=lambda i: datetime.strptime(i['date'], '%m/%d/%Y').timestamp()) # save data as json _savedatasets.save_dataset(data_of_year, _setup.NAME, dataset_name, data_type)
import csv from datetime import datetime import _setup import _savedatasets YEAR = _setup.YEAR day_dict = dict() # Remove first few lines to start with the column heading with open('data/pge_electric_interval_data.csv', newline='') as csvfile: for row in csv.DictReader(csvfile): date = row['DATE'] # to keep the accuracy, multiply 100 as int usage = int(float(row['USAGE']) * 100) day_dict[date] = day_dict[date] + usage if date in day_dict.keys( ) else usage data_of_year = [] for day in day_dict: day_p = datetime.strptime(day, '%m/%d/%y') day_formatted = datetime.strftime(day_p, '%-m/%-d/%Y') # divide by 100 to get the sum as float format data_of_year.append(dict(date=day_formatted, value=day_dict[day] / 100)) # save datasets _savedatasets.save_dataset(data_of_year, _setup.PLACE, 'electricity-usage', 'electricity')
def addDate(date): data_of_year.insert(0, dict(date=date, value=1)) def getScrobble(d): date = d.strftime('%-m/%-d/%-Y') if len(data_of_year) == 0: addDate(date) else: if data_of_year[0]['date'] == date: data_of_year[0]['value'] += 1 else: addDate(date) with open('data/' + _setup.LASTFM + '.csv', newline='') as csvfile: sheet = csv.DictReader(csvfile) for row in sheet: utc_date = datetime.strptime(row['date'], '%d %b %Y %H:%M') date = utc_date.astimezone(DEFAULT_TIMEZONE) year = int(date.year) if year == YEAR: getScrobble(date) elif year < YEAR: break # save datasets _savedatasets.save_dataset(data_of_year, _setup.LASTFM, 'lastfm', 'lastfm')
def get_tweet_info(d): date = d.strftime('%-m/%-d/%-Y') if len(data_of_year) == 0: add_date(date) else: if data_of_year[0]['date'] == date: data_of_year[0]['value'] += 1 else: add_date(date) # collect data first tweets = [] # remove the variable name in tweet.js and tweet-part2.js and save them as JSON for file_name in ['tweet', 'tweet-part1']: with open('data/' + file_name + '.json') as file: data = json.load(file) for d in data: utc_date = datetime.strptime(d['created_at'], '%a %b %d %H:%M:%S %z %Y') date = utc_date.astimezone(DEFAULT_TIMEZONE) if int(date.year) == YEAR: tweets.append(dict(date=date, id=d['id_str'])) # sort by date desc for tweet in sorted(tweets, key=lambda x: x['id'], reverse=True): get_tweet_info(tweet['date']) # save dataset _savedatasets.save_dataset(data_of_year, _setup.TWITTER, 'twitter', 'twitter')
timestamp = dt.replace(tzinfo=pytz.utc).timestamp() dt_tz = datetime.fromtimestamp(timestamp).astimezone(pytz.utc).astimezone(DEFAULT_TIMEZONE) date = dt_tz.strftime('%-m/%-d/%Y') if current_date == '7/29/2018': print(date, current_date, row['totalOdometerMeters']) if current_date == '7/30/2018': print(date, current_date, row['totalOdometerMeters']) # get the very first odometer if first_odometer_of_date == 0 and dt_tz.year == YEAR: first_odometer_of_date = int(row['totalOdometerMeters']) # date change if date != current_date and first_odometer_of_date > 0: driving_distance = int(row['totalOdometerMeters']) - first_odometer_of_date if date == '7/30/2018' or date == '7/31/2018': print ('---', date, current_date, driving_distance) if driving_distance > 0 and dt_tz.year == YEAR: # meter to mile data_of_year.append(dict(date=current_date, value= math.ceil(driving_distance * 0.000621371 * 100) / 100 )) # reset for next day current_date = date first_odometer_of_date = int(row['totalOdometerMeters']) # save datasets _savedatasets.save_dataset(data_of_year, _setup.CAR, 'driving', 'driving')
# accumulate time spent at home location by day by_date[today] = by_date[ today] + from_midnight if today in by_date.keys( ) else from_midnight by_date[yesterday] = by_date[ yesterday] + till_midnight if yesterday in by_date.keys( ) else till_midnight # same date else: duration = (current_time - prev_time).seconds by_date[today] = by_date[ today] + duration if today in by_date.keys( ) else duration elif year < YEAR: break # convert dict to array data_of_year = [] for d in by_date: # save it as hour, record only valid points hour = round(by_date[d] / 60 / 60 * 10) / 10 if hour > 0.0: data_of_year.insert(0, dict(date=d, value=hour)) print(data_of_year) # save datasets _savedatasets.save_dataset(data_of_year, _setup.GOOGLE, 'time-at-' + _setup.PLACE, 'location')
data_of_year[len(data_of_year) - 1]['value'] += duration else: add_date(date, duration) with open('data/piano.json') as file: data = json.load(file) for i in range(len(data) - 1): d = data[i] # if the first start time isn't recorded # in case there's no start or stop time recorded, set it to 20 minutes diff_in_min = 20 if i == 0 and d[1] == 'stop': t = datetime.strptime(d[0], '%B %d, %Y at %I:%M%p') add_duration(datetime.strftime(t, '%-m/%-d/%Y'), diff_in_min) # find start time first elif d[1] == 'start': start_time = datetime.strptime(d[0], '%B %d, %Y at %I:%M%p') # end time should come in the next element end = data[i + 1] if end[1] == 'stop': end_time = datetime.strptime(end[0], '%B %d, %Y at %I:%M%p') # duration in minute diff_in_min = int((end_time - start_time).seconds / 60) date = datetime.strftime(start_time, '%-m/%-d/%Y') add_duration(date, diff_in_min) # save datasets _savedatasets.save_dataset(data_of_year, 'tanyoung', 'piano_practice', 'piano')