def listing_to_time(row): listing_pattern = '%Y %A, %B %d %I:%M %p' time = pad_hour(row['time']) date = '2016 ' + pad_day(row['day']) time_str = date + ' ' + time time_str_dt = datetime.strptime(time_str, listing_pattern) date = util.set_eastern(time_str_dt) return date
import agate import util from datetime import datetime from pytz import timezone io = '../../io/' bos_custom = util.load_boston_data(io) dec = util.set_eastern(datetime(2015, 12, 1, 0)) since_dec = bos_custom.where(lambda row: row['start_time_est'] >= dec) unique = len(since_dec.distinct('archive_id').rows) print('%d unique ads since dec' % unique) feb = util.set_eastern(datetime(2016, 2, 1, 0)) since_feb = bos_custom.where(lambda row: row['start_time_est'] >= feb) print('%d ads aired since dec' % len(since_dec.rows)) print('%d ads aired since feb' % len(since_feb.rows)) counts_candidate = since_dec.counts('for_candidate') print('Ads aired per candidate since dec:') counts_candidate.order_by('count', reverse=True).print_table()
import agate import util from datetime import datetime from pytz import timezone io = '../../io/'; bos_custom = util.load_boston_data(io) date = util.set_eastern(datetime(2015, 12, 1, 0)) since_date = bos_custom.where(lambda row: row['start_time_est'] >= date) with_hour = bos_custom.compute([ ('hour', agate.Formula(agate.Number(), lambda row: row['start_time_est'].hour)) ]) counts_hour = with_hour.counts('hour') counts_hour.order_by('hour').select(['hour', 'count']).to_csv(io + 'graphic_by_hour.csv')
time = pad_hour(row['time']) date = '2016 ' + pad_day(row['day']) time_str = date + ' ' + time time_str_dt = datetime.strptime(time_str, listing_pattern) date = util.set_eastern(time_str_dt) return date # add datetime for listings listings_with_time = listings.compute([ ('start_time_est', agate.Formula(agate.DateTime(), listing_to_time)) ]) start_date = util.set_eastern(datetime(2016, 2, 1, 0)) end_date = util.set_eastern(datetime(2016, 2, 9, 0)) start_date_listings = util.set_eastern(datetime(2016, 1, 31, 19)) ads_recent = bos_custom.where(lambda row: row['start_time_est'] >= start_date and row['start_time_est'] < end_date) listings_recent = listings_with_time.where(lambda row: row['start_time_est'] >= start_date_listings and row['start_time_est'] < end_date) def is_in_range(spot_time, row): begin_search_time = spot_time - timedelta(hours=5) end_search_time = spot_time begin = begin_search_time <= row['start_time_est'] end = end_search_time >= row['start_time_est'] return end and begin
import agate import util from datetime import datetime from pytz import timezone io = '../../io/'; bos_custom = util.load_boston_data(io) date = util.set_eastern(datetime(2016, 1, 1, 0)) since_date = bos_custom.where(lambda row: row['start_time_est'] >= date) # get total ads aired per day since january, by candidate with_date = since_date.compute([ ('date', agate.Formula(agate.Date(), lambda row: row['start_time_est'].date())) ]) with_count = with_date.group_by('date').group_by('for_candidate').aggregate([ ('count', agate.Length()) ]) with_count.order_by('date').order_by('for_candidate').to_csv(io + 'graphic_daily_by_candidate.csv')