def calculate_moving_average(term, day, window_size): google_scores = term.scores.filter( GoogleScore.day > day - timedelta(days=window_size), GoogleScore.day <= day).all() if len(google_scores) != window_size: logger.warn('Not enough data to average %s on %s by %d days' % (term, day, window_size)) return avg = sum(gs.value for gs in google_scores) if avg: avg = avg / window_size return avg
def calculate_twitter_scores(model, start, end): start = start - timedelta( days=7) # Go back an extra week so we can average over 7 days date_range = [ start + timedelta(days=i) for i in xrange((end - start).days) ] for day in date_range: try: collect_tweets(day) shrew(day) except FluDetectorError as e: logger.warn(e.message) except sh.ErrorReturnCode_1 as e: logger.error(e.stdout) return []
def run_batch(batch, start, end): delay = 0 for attempt in xrange(1, 6): try: for gs in collect_google_scores(batch, start, end): db.session.add(gs) gl = GoogleLog() gl.score_date = gs.day gl.score_timestamp = datetime.utcnow() db.session.add(gl) return except HttpError as e: if attempt == 6: raise e delay = 3 ** attempt logger.warn('HTTP error on attempt %d, sleeping and trying again in %d seconds' % (attempt, delay)) time.sleep(delay)