def process_async_responses(results): """Adds consistent timestamp to pages responses""" # TODO: find more accurate way to timestamp each response # Better to pull out of response object timestamp = create_epoch_timestamp() docs = [] for r in results: if r.status_code == 200: docs.extend(process_async_response(r, timestamp)) return docs
def process_domains(domains): ts = int(create_epoch_timestamp()) start_time = ts - ACCELERATION_INTERVAL for domain in domains: data = get_page_data(domain, start_time) df = pd.DataFrame(data) # TODO: check sqlite3's output vs. delivers Python dict.items() # but anyway will have to be fixed so changing db server does not # lead to errors acceleration_minutes = ACCELERATION_INTERVAL / 60 trending = [{domain: 'Not enough data for %s-minute window' % acceleration_minutes}] if not df.empty: # TODO: revisit df.columns = pageS_FIELDS moments = pd.unique(df.timestamp.ravel()) if len(moments) >= acceleration_minutes: trending = calculate_accelerands(df, ts) publish_accelerands(domain, trending)
def calculcate_half_lookback(): now = create_epoch_timestamp() lookback = now - ( ACCELERATION_INTERVAL / 2 ) return lookback