def update_scores(self, c): now = time.time() if now - self.last_scored < self.score_interval: return self.conn.create_function("decay", 1, lambda x: exp_decay(x, halflife=300)) sql_update_current_scores = """ UPDATE entities SET current_score = ( SELECT sum(decay(? - created_at)) FROM tweets t, tweet_entities x WHERE x.entity_id = entities.id AND x.tweet_id = t.id GROUP BY entity_id ) WHERE current_score > 1 OR ?-last_occurrence < 60 -- ignore items we haven't seen in the last minute. """ sql_update_max_scores = """ UPDATE entities SET max_score = current_score WHERE current_score > max_score """ c.execute(sql_update_current_scores, [now, now]) c.execute(sql_update_max_scores) self.last_scored = now
def __init__(self, db_name=DB_NAME, conn=None): if not conn: conn = sqlite3.Connection(db_name) self.conn = conn c = conn.cursor() try: c.execute('SELECT 1 FROM ENTITIES') except: with open(os.path.join(here,'schema.sql'), 'r') as f: c.executescript(f.read()) self.conn.create_function("decay", 1, lambda x: exp_decay(x, halflife=300)) c.close() self.last_flushed = 0 self.flush_interval = 300 # clear out old data every five minutes self.last_scored = 0 self.score_interval = 1