def _load_feed_statistics(self): ''' Loads the average_peak_score and std_deviation of all feeds from the database. Stores them as a tuple (average_peak_score,std_deviation) in self._feed_statistics Also computes self._feed_statistics[0] as the (global average_peak_score, global std_deviation ) WHERE global std_deviation = (std_dev_sum/no_of_feeds) * global average_peak_score ( DESPITE THE GLOBAL BEING COMPUTABLE. If you did it properly, You'd have retarded standard deviation ) Also remember, Std_dev(x) = E[X^2] - E[X] ''' self._feed_statistics = dict() dbi = DatabaseInterface.get_shared_instance() dbcursor = dbi.execute("SELECT feed_id, sum_x,sum_x2,feed_n FROM news_social_score_feed_statistics",None) avg_sum = float(0) std_dev_sum = float(0) i = 0 for row in dbcursor.fetchall(): feed_id = int(row["feed_id"]) feed_n = row['feed_n'] E_x = float(row['sum_x'])/feed_n E_x2 = float(row['sum_x2'])/feed_n std_dev = math.sqrt(E_x2 - (E_x*E_x)) self._feed_statistics[feed_id] = (E_x,std_dev,feed_n ) avg_sum += E_x std_dev_sum += (float(std_dev)/max(1,E_x)) i+=1 if i==0: i=1 avg_avg = avg_sum / i avg_std_dev = (std_dev_sum/i) * avg_avg if avg_std_dev == 0: avg_std_dev = 1 self._feed_statistics[0] = ( avg_avg, avg_std_dev,i ) #If you hadn't figured it out by now, This is bullshit :p
def prepare_update(self): dbi = DatabaseInterface.get_shared_instance() dbi.execute('TRUNCATE news_social_score_update',None) last_retrieved_id = 0 dbi.autocommit(False) if self.update_all: ''' Tested. Seems to work. ''' rowcount = 1 while rowcount>0: cursor = dbi.execute("\ INSERT INTO news_social_score_update (story_id,last_update,old_raw_score,total_shares,created,state) \ ( SELECT story_id,last_update,raw_score,total_shares,created,'READY' FROM news_social_score_all \ WHERE story_id>( SELECT IFNULL(MAX(story_id), 0) FROM news_social_score_update ) LIMIT %s)",(SocialScoreUpdater.story_chunk_size,)) rowcount = cursor.rowcount #print rowcount dbi.commit() #dbi.execute("INSERT INTO news_social_score_update (story_id,last_update,old_raw_score,total_shares,state) ( SELECT story_id,last_update,raw_score,total_shares,'READY' FROM news_social_score_all) ",None) else: ''' Tested. Seems to work. ''' ''' Would the below query affect select queries on the active set? I don't need a write lock so it shouldn't ''' ''' The active set is assumed to be small enough for the mysql server to handle by itself. ''' dbi.execute("INSERT INTO news_social_score_update (story_id,old_raw_score,state) ( SELECT story_id,raw_score,'READY' FROM news_social_score_active) ",None) dbi.execute("\ UPDATE news_social_score_update JOIN news_social_score_all USING(story_id) SET\ news_social_score_update.total_shares = news_social_score_all.total_shares,\ news_social_score_update.last_update=news_social_score_all.last_update, \ news_social_score_update.created=news_social_score_all.created\ ",None ) dbi.commit() dbi.autocommit(True)
def rotate_active_set(self): ''' Swaps in the newly built active set table by renaming the old one to news_social_score_active_old and the new one to news_social_score_active ''' dbi = DatabaseInterface.get_shared_instance() dbi.autocommit(False) dbi.execute("DROP TABLE IF EXISTS news_social_score_active_old",None) #Let's keep this just incase dbi.execute("RENAME TABLE news_social_score_active TO news_social_score_active_old, news_social_score_active_new TO news_social_score_active",None) dbi.commit() dbi.autocommit(True)
def load_feed_ids(self): dbi = DatabaseInterface.get_shared_instance() cursor = dbi.execute("SELECT feed_id,facebook_page_id FROM news_feeds WHERE facebook_page_id IS NOT NULL",None) rows = cursor.fetchall() self.feed_ids = dict() for row in rows: if row['facebook_page_id'] is None: continue self.feed_ids[row['facebook_page_id']] = row['feed_id']
def add_new_story(story_id): ''' Adds a story to both the active and global set. Responsible for the part that gives newer statistics higher weightages ''' dbi = DatabaseInterface.get_shared_instance() dbi.autocommit(False) dbi.execute("INSERT INTO news_social_score_all ( story_id, created ) (SELECT story_id,created FROM news_stories WHERE story_id=%s)",(story_id,)) dbi.execute("INSERT INTO news_social_score_active ( normalized_score, story_id, raw_score ) (SELECT 0,story_id,0 FROM news_stories WHERE story_id=%s)", (story_id,)) cursor = dbi.execute("SELECT feed_id,sum_x,sum_x2,feed_n FROM news_social_score_feed_statistics JOIN news_stories USING(feed_id) WHERE story_id=%s", (story_id,) ) dbi.commit() dbi.autocommit(True)
def load_category_interests(self): ''' Loads the interest in all categories of the user_specified ''' self.categories = dict() dbi = DatabaseInterface.get_shared_instance() cursor = dbi.execute("SELECT category_id,read_count FROM news_user_interests_read_count where user_id=%s",(self.user_id,)) rows = cursor.fetchall() self._process_db_rows(rows) self._compute_interests() return self.categories
def rotate_active_set(self): ''' Swaps in the newly built active set table by renaming the old one to news_social_score_active_old and the new one to news_social_score_active ''' dbi = DatabaseInterface.get_shared_instance() dbi.autocommit(False) dbi.execute("DROP TABLE IF EXISTS news_social_score_active_old", None) #Let's keep this just incase dbi.execute( "RENAME TABLE news_social_score_active TO news_social_score_active_old, news_social_score_active_new TO news_social_score_active", None) dbi.commit() dbi.autocommit(True)
def update_scores(self): #Do it in chunks of story_chunk_size ''' Computes and updates the fields new_raw_score,normalized_score, last_updated, total_shares, state in news_social_score_updated, <story_chunk_size> stories at a time ''' dbi = DatabaseInterface.get_shared_instance() dbi.autocommit(False) self._load_feed_statistics() #print self._feed_statistics row_count = 1 while row_count>0 : cursor = dbi.execute("\ SELECT story_id, feed_id, news_social_score_update.created, url, old_raw_score, total_shares, last_update FROM\ news_social_score_update JOIN news_stories USING(story_id)\ WHERE news_social_score_update.state='READY' LIMIT %s", (SocialScoreUpdater.story_chunk_size,) ) #OMG A JOIN! if cursor.rowcount==0: break rowcount = cursor.rowcount stories = list() urls = list() for row in cursor.fetchall(): stories.append( SocialScoreUpdater.SocialScoreStory(row['story_id'],row['url'],row['feed_id'],row['old_raw_score'],row['total_shares'],row['last_update']) ) #story_id,url,feed_id,previous_score=None,last_updated=None, urls.append( row['url'] ) #Get the score from Facebook's opengraph ogscraper = OpenGraphShareScraper() ogscraper.set_urls( urls ) ogscraper.update_og_shares() og_shares = ogscraper.get_result() #Compute the new scores for each and update in DB ( But commit all at once ) time_now = int(time.time()) for story in stories: if story.url not in og_shares: new_total_shares = 0 #Nothing we can do. facebook has no records of it else: new_total_shares = og_shares[story.url] shares_since = new_total_shares - story.total_shares story.total_shares = new_total_shares story_id = story.story_id story.updated_raw_score = self._compute_updated_raw_score(story,shares_since,time_now) story.normalized_score = self._normalize_across_feeds(story.updated_raw_score,story.feed_id) query_params = ( story.updated_raw_score, story.total_shares,time_now, story.normalized_score, story.story_id ) dbi.execute("UPDATE news_social_score_update SET new_raw_score=%s , total_shares=%s, last_update=%s, new_normalized_score=%s,state='SCORE_COMPUTED' WHERE story_id=%s",query_params ) dbi.commit() dbi.autocommit(True) ''' Even this seems to work correctly. Efficiency i have no idea :p '''
def add_read_count(self,read_count,category_ids): #Seems to work ''' Increment the read count of each of the category_ids specified by read_count ''' dbi = DatabaseInterface.get_shared_instance() dbi.autocommit(False) for category_id in category_ids: dbi.execute("\ INSERT INTO news_user_interests_read_count (user_id,category_id,read_count) VALUES( %s,%s, %s )\ ON DUPLICATE KEY UPDATE read_count= read_count+%s\ ", ( self.user_id, category_id, read_count, read_count ) ) dbi.commit() dbi.autocommit(True)
def update_permanent_scores(self): ''' Updates news_social_score_all, which is the main database. news_social_score_active is just a cache of the active set ''' dbi = DatabaseInterface.get_shared_instance() rowcount=1 while rowcount > 0: cursor = dbi.execute("\ UPDATE ( SELECT story_id FROM news_social_score_update WHERE state='CONSIDERED_IN_SET' LIMIT %s)t_dummy JOIN news_social_score_update t_ud JOIN news_social_score_all t_all\ ON t_dummy.story_id = t_ud.story_id AND t_dummy.story_id=t_all.story_id SET\ t_all.reflected_in_stats = IF( (t_ud.new_raw_score > t_all.peak_score AND t_all.reflected_in_stats='UPTODATE'),'OUTDATED',t_all.reflected_in_stats) , \ t_ud.state='UPDATED_STORIES', t_all.total_shares=t_ud.total_shares, t_all.raw_score = t_ud.new_raw_score,\ t_all.prev_peak_score = t_all.peak_score, t_all.peak_score=GREATEST(t_all.peak_score, t_ud.new_raw_score), t_all.last_update = t_ud.last_update \ ", (SocialScoreUpdater.story_chunk_size,) ) rowcount = cursor.rowcount dbi.commit()
def build_new_active_set(self): ''' Creates a new active set ( news_social_score_active table), and populates it with updated data. rotate_active_set can be called to bring this new active set into use ''' dbi = DatabaseInterface.get_shared_instance() dbi.execute("DROP TABLE IF EXISTS news_social_score_active_new",None) #Creates a new table dbi.execute("CREATE TABLE news_social_score_active_new SELECT * FROM news_social_score_active LIMIT 0",None) #Creates a new table dbi.autocommit(False) ''' Add the highest scored articles into the new active set ''' rowcount = 1 while rowcount > 0: cursor = dbi.execute("UPDATE news_social_score_update SET state='ABOVE_TRESHOLD' WHERE new_normalized_score > %s AND state='SCORE_COMPUTED' ORDER BY new_normalized_score DESC LIMIT %s", (SocialScoreUpdater.treshold_normalized_score, SocialScoreUpdater.story_chunk_size) ) rowcount = cursor.rowcount if rowcount==0: break dbi.execute("\ INSERT INTO news_social_score_active_new (normalized_score, story_id, raw_score ) \ ( SELECT new_normalized_score,story_id,new_raw_score FROM news_social_score_update WHERE state='ABOVE_TRESHOLD' ORDER BY new_normalized_score DESC)\ ", None) dbi.execute("UPDATE news_social_score_update SET state='CONSIDERED_IN_SET' WHERE state='ABOVE_TRESHOLD'",None) dbi.commit() #Get those that lie below the treshold but are young enough to stay treshold_created_time = int(time.time()) - SocialScoreUpdater.treshold_age rowcount = 1 while rowcount > 0: cursor = dbi.execute("UPDATE news_social_score_update SET state='ABOVE_TRESHOLD' WHERE created > %s AND state='SCORE_COMPUTED' LIMIT %s", (treshold_created_time,SocialScoreUpdater.story_chunk_size) ) rowcount = cursor.rowcount if rowcount==0: break dbi.execute("\ INSERT INTO news_social_score_active_new (normalized_score, story_id, raw_score ) \ ( SELECT new_normalized_score,story_id,new_raw_score FROM news_social_score_update WHERE state='ABOVE_TRESHOLD' ORDER BY new_normalized_score DESC)\ ", None) dbi.execute("UPDATE news_social_score_update SET state='CONSIDERED_IN_SET' WHERE state='ABOVE_TRESHOLD'",None) dbi.commit() #Update the rest rowcount = 1 while rowcount > 0: cursor = dbi.execute("UPDATE news_social_score_update SET state='CONSIDERED_IN_SET' WHERE state='SCORE_COMPUTED' LIMIT %s", (SocialScoreUpdater.story_chunk_size,) ) rowcount = cursor.rowcount dbi.commit() dbi.commit() dbi.autocommit(True)
def get_personal_recos(user_id): ''' --------- FUNCTIONS -----''' def score_comparator(x, y): if x.score < y.score: return -1 elif x.score == y.score: return 0 else: return 1 ''' --------- MAIN -----''' ui = UserCategoryInterests(user_id) ui.load_category_interests() #Load ALL stories with the normalized score dbi = DatabaseInterface.get_shared_instance() cursor = dbi.execute( "SELECT * FROM news_stories JOIN news_social_score_active USING(story_id)", None) rows = cursor.fetchall() content = [] seen_categories = dict() for r in rows: cursor = dbi.execute( "SELECT category_id FROM news_story_categories WHERE story_id=%s", (r['story_id'], )) category_rows = cursor.fetchall() story_categories = list() for c in category_rows: story_categories.append(c['category_id']) seen_categories[c['category_id']] = True content.append( NewsStory(r['story_id'], r['url'], r['title'], r['subtitle'], r['feed_id'], r['created'], r['image'], story_categories, r['normalized_score'])) interests = dict() #print "\ncontent",content,"\nseen_categories",seen_categories for category_id in seen_categories: interests[category_id] = ui.get_interest(category_id) #print ("User_id %s has interests: "%(user_id,)), interests pscore = PersonalizedScore() pscore.set_data(content, interests) pscore.personalize_scores() return sorted(content, cmp=score_comparator, reverse=True)
def update_permanent_scores(self): ''' Updates news_social_score_all, which is the main database. news_social_score_active is just a cache of the active set ''' dbi = DatabaseInterface.get_shared_instance() rowcount = 1 while rowcount > 0: cursor = dbi.execute( "\ UPDATE ( SELECT story_id FROM news_social_score_update WHERE state='CONSIDERED_IN_SET' LIMIT %s)t_dummy JOIN news_social_score_update t_ud JOIN news_social_score_all t_all\ ON t_dummy.story_id = t_ud.story_id AND t_dummy.story_id=t_all.story_id SET\ t_all.reflected_in_stats = IF( (t_ud.new_raw_score > t_all.peak_score AND t_all.reflected_in_stats='UPTODATE'),'OUTDATED',t_all.reflected_in_stats) , \ t_ud.state='UPDATED_STORIES', t_all.total_shares=t_ud.total_shares, t_all.raw_score = t_ud.new_raw_score,\ t_all.prev_peak_score = t_all.peak_score, t_all.peak_score=GREATEST(t_all.peak_score, t_ud.new_raw_score), t_all.last_update = t_ud.last_update \ ", (SocialScoreUpdater.story_chunk_size, )) rowcount = cursor.rowcount dbi.commit()
def add_new_story(story_id): ''' Adds a story to both the active and global set. Responsible for the part that gives newer statistics higher weightages ''' dbi = DatabaseInterface.get_shared_instance() dbi.autocommit(False) dbi.execute( "INSERT INTO news_social_score_all ( story_id, created ) (SELECT story_id,created FROM news_stories WHERE story_id=%s)", (story_id, )) dbi.execute( "INSERT INTO news_social_score_active ( normalized_score, story_id, raw_score ) (SELECT 0,story_id,0 FROM news_stories WHERE story_id=%s)", (story_id, )) cursor = dbi.execute( "SELECT feed_id,sum_x,sum_x2,feed_n FROM news_social_score_feed_statistics JOIN news_stories USING(feed_id) WHERE story_id=%s", (story_id, )) dbi.commit() dbi.autocommit(True)
def _load_feed_statistics(self): ''' Loads the average_peak_score and std_deviation of all feeds from the database. Stores them as a tuple (average_peak_score,std_deviation) in self._feed_statistics Also computes self._feed_statistics[0] as the (global average_peak_score, global std_deviation ) WHERE global std_deviation = (std_dev_sum/no_of_feeds) * global average_peak_score ( DESPITE THE GLOBAL BEING COMPUTABLE. If you did it properly, You'd have retarded standard deviation ) Also remember, Std_dev(x) = E[X^2] - E[X] ''' self._feed_statistics = dict() dbi = DatabaseInterface.get_shared_instance() dbcursor = dbi.execute( "SELECT feed_id, sum_x,sum_x2,feed_n FROM news_social_score_feed_statistics", None) avg_sum = float(0) std_dev_sum = float(0) i = 0 for row in dbcursor.fetchall(): feed_id = int(row["feed_id"]) feed_n = row['feed_n'] E_x = float(row['sum_x']) / feed_n E_x2 = float(row['sum_x2']) / feed_n std_dev = math.sqrt(E_x2 - (E_x * E_x)) self._feed_statistics[feed_id] = (E_x, std_dev, feed_n) avg_sum += E_x std_dev_sum += (float(std_dev) / max(1, E_x)) i += 1 if i == 0: i = 1 avg_avg = avg_sum / i avg_std_dev = (std_dev_sum / i) * avg_avg if avg_std_dev == 0: avg_std_dev = 1 self._feed_statistics[0] = ( avg_avg, avg_std_dev, i ) #If you hadn't figured it out by now, This is bullshit :p
def prepare_update(self): dbi = DatabaseInterface.get_shared_instance() dbi.execute('TRUNCATE news_social_score_update', None) last_retrieved_id = 0 dbi.autocommit(False) if self.update_all: ''' Tested. Seems to work. ''' rowcount = 1 while rowcount > 0: cursor = dbi.execute( "\ INSERT INTO news_social_score_update (story_id,last_update,old_raw_score,total_shares,created,state) \ ( SELECT story_id,last_update,raw_score,total_shares,created,'READY' FROM news_social_score_all \ WHERE story_id>( SELECT IFNULL(MAX(story_id), 0) FROM news_social_score_update ) LIMIT %s)", (SocialScoreUpdater.story_chunk_size, )) rowcount = cursor.rowcount #print rowcount dbi.commit() #dbi.execute("INSERT INTO news_social_score_update (story_id,last_update,old_raw_score,total_shares,state) ( SELECT story_id,last_update,raw_score,total_shares,'READY' FROM news_social_score_all) ",None) else: ''' Tested. Seems to work. ''' ''' Would the below query affect select queries on the active set? I don't need a write lock so it shouldn't ''' ''' The active set is assumed to be small enough for the mysql server to handle by itself. ''' dbi.execute( "INSERT INTO news_social_score_update (story_id,old_raw_score,state) ( SELECT story_id,raw_score,'READY' FROM news_social_score_active) ", None) dbi.execute( "\ UPDATE news_social_score_update JOIN news_social_score_all USING(story_id) SET\ news_social_score_update.total_shares = news_social_score_all.total_shares,\ news_social_score_update.last_update=news_social_score_all.last_update, \ news_social_score_update.created=news_social_score_all.created\ ", None) dbi.commit() dbi.autocommit(True)
def update_scores(self): #Do it in chunks of story_chunk_size ''' Computes and updates the fields new_raw_score,normalized_score, last_updated, total_shares, state in news_social_score_updated, <story_chunk_size> stories at a time ''' dbi = DatabaseInterface.get_shared_instance() dbi.autocommit(False) self._load_feed_statistics() #print self._feed_statistics row_count = 1 while row_count > 0: cursor = dbi.execute( "\ SELECT story_id, feed_id, news_social_score_update.created, url, old_raw_score, total_shares, last_update FROM\ news_social_score_update JOIN news_stories USING(story_id)\ WHERE news_social_score_update.state='READY' LIMIT %s", (SocialScoreUpdater.story_chunk_size, )) #OMG A JOIN! if cursor.rowcount == 0: break rowcount = cursor.rowcount stories = list() urls = list() for row in cursor.fetchall(): stories.append( SocialScoreUpdater.SocialScoreStory( row['story_id'], row['url'], row['feed_id'], row['old_raw_score'], row['total_shares'], row['last_update']) ) #story_id,url,feed_id,previous_score=None,last_updated=None, urls.append(row['url']) #Get the score from Facebook's opengraph ogscraper = OpenGraphShareScraper() ogscraper.set_urls(urls) ogscraper.update_og_shares() og_shares = ogscraper.get_result() #Compute the new scores for each and update in DB ( But commit all at once ) time_now = int(time.time()) for story in stories: if story.url not in og_shares: new_total_shares = 0 #Nothing we can do. facebook has no records of it else: new_total_shares = og_shares[story.url] shares_since = new_total_shares - story.total_shares story.total_shares = new_total_shares story_id = story.story_id story.updated_raw_score = self._compute_updated_raw_score( story, shares_since, time_now) story.normalized_score = self._normalize_across_feeds( story.updated_raw_score, story.feed_id) query_params = (story.updated_raw_score, story.total_shares, time_now, story.normalized_score, story.story_id) dbi.execute( "UPDATE news_social_score_update SET new_raw_score=%s , total_shares=%s, last_update=%s, new_normalized_score=%s,state='SCORE_COMPUTED' WHERE story_id=%s", query_params) dbi.commit() dbi.autocommit(True) ''' Even this seems to work correctly. Efficiency i have no idea :p '''
def build_new_active_set(self): ''' Creates a new active set ( news_social_score_active table), and populates it with updated data. rotate_active_set can be called to bring this new active set into use ''' dbi = DatabaseInterface.get_shared_instance() dbi.execute("DROP TABLE IF EXISTS news_social_score_active_new", None) #Creates a new table dbi.execute( "CREATE TABLE news_social_score_active_new SELECT * FROM news_social_score_active LIMIT 0", None) #Creates a new table dbi.autocommit(False) ''' Add the highest scored articles into the new active set ''' rowcount = 1 while rowcount > 0: cursor = dbi.execute( "UPDATE news_social_score_update SET state='ABOVE_TRESHOLD' WHERE new_normalized_score > %s AND state='SCORE_COMPUTED' ORDER BY new_normalized_score DESC LIMIT %s", (SocialScoreUpdater.treshold_normalized_score, SocialScoreUpdater.story_chunk_size)) rowcount = cursor.rowcount if rowcount == 0: break dbi.execute( "\ INSERT INTO news_social_score_active_new (normalized_score, story_id, raw_score ) \ ( SELECT new_normalized_score,story_id,new_raw_score FROM news_social_score_update WHERE state='ABOVE_TRESHOLD' ORDER BY new_normalized_score DESC)\ ", None) dbi.execute( "UPDATE news_social_score_update SET state='CONSIDERED_IN_SET' WHERE state='ABOVE_TRESHOLD'", None) dbi.commit() #Get those that lie below the treshold but are young enough to stay treshold_created_time = int( time.time()) - SocialScoreUpdater.treshold_age rowcount = 1 while rowcount > 0: cursor = dbi.execute( "UPDATE news_social_score_update SET state='ABOVE_TRESHOLD' WHERE created > %s AND state='SCORE_COMPUTED' LIMIT %s", (treshold_created_time, SocialScoreUpdater.story_chunk_size)) rowcount = cursor.rowcount if rowcount == 0: break dbi.execute( "\ INSERT INTO news_social_score_active_new (normalized_score, story_id, raw_score ) \ ( SELECT new_normalized_score,story_id,new_raw_score FROM news_social_score_update WHERE state='ABOVE_TRESHOLD' ORDER BY new_normalized_score DESC)\ ", None) dbi.execute( "UPDATE news_social_score_update SET state='CONSIDERED_IN_SET' WHERE state='ABOVE_TRESHOLD'", None) dbi.commit() #Update the rest rowcount = 1 while rowcount > 0: cursor = dbi.execute( "UPDATE news_social_score_update SET state='CONSIDERED_IN_SET' WHERE state='SCORE_COMPUTED' LIMIT %s", (SocialScoreUpdater.story_chunk_size, )) rowcount = cursor.rowcount dbi.commit() dbi.commit() dbi.autocommit(True)
def update_statistics(self): ''' PRECONDITION: All stories in news_social_score_all must be accounted for in feed_n. ie, Even when they were at 0-0 score. Yes, it makes a small difference to the std_deviation but the simplification in code is worth the negligible loss of precision. If you find a better way, pls implement NOT AN INVARIANT: feed_n is less than stat_update_upper. Also remember, Std_dev(x) = E[X^2] - E[X] ''' dbi = DatabaseInterface.get_shared_instance() rowcount=1 #Let's use some python here. cursor = dbi.execute("SELECT feed_id,peak_score,prev_peak_score,reflected_in_stats FROM news_social_score_all JOIN news_stories USING(story_id) WHERE reflected_in_stats!='UPTODATE'",None) rows = cursor.fetchall() peak_scores = dict() added_x = dict() added_x2 = dict() new_insertions=dict() for row in rows: feed_id = row['feed_id'] peak_score = row['peak_score'] prev_peak_score = row['prev_peak_score'] if feed_id not in added_x: added_x[feed_id] = 0 added_x2[feed_id] = 0 added_x[feed_id] += peak_score - prev_peak_score added_x2[feed_id] += (peak_score*peak_score) - (prev_peak_score*prev_peak_score) if row['reflected_in_stats']=='NEVER': if feed_id not in new_insertions: new_insertions[feed_id] = 0 new_insertions[feed_id] += 1 #Let's get stats for each feed dbi.autocommit(False) for feed_id in new_insertions: insert_values = (feed_id,new_insertions[feed_id],new_insertions[feed_id]) dbi.execute("INSERT INTO news_social_score_feed_statistics (feed_id,feed_n) VALUES(%s,%s) ON DUPLICATE KEY UPDATE feed_n=feed_n+%s", insert_values) for feed_id in added_x: update_params = (added_x[feed_id], added_x2[feed_id], feed_id) dbi.execute('UPDATE news_social_score_feed_statistics SET sum_x=sum_x+%s,sum_x2=sum_x2+%s WHERE feed_id=%s', update_params) dbi.execute("UPDATE news_social_score_all SET reflected_in_stats='UPTODATE' WHERE reflected_in_stats!='UPTODATE'",None) ''' Finally, do the scaling so that newer articles have more weightage. This isn't critical so we don't need autocommit off ) ''' stat_update_lower = SocialScoreUpdater.stat_update_lower stat_update_upper = SocialScoreUpdater.stat_update_upper cursor = dbi.execute("SELECT feed_id,sum_x,sum_x2,feed_n FROM news_social_score_feed_statistics WHERE feed_n>%s",(stat_update_upper,)); rows = cursor.fetchall() for row in rows: feed_id=row['feed_id'] scale = (float(stat_update_lower)/row['feed_n']) new_x = scale * row['sum_x'] new_x2 =scale * row['sum_x2'] new_n = stat_update_lower # = scale * row['feed_n'] dbi.execute("UPDATE news_social_score_feed_statistics SET sum_x=%s, sum_x2=%s, feed_n=%s WHERE feed_id=%s", (new_x,new_x2,new_n,feed_id)) dbi.commit() dbi.autocommit(True)
def update_statistics(self): ''' PRECONDITION: All stories in news_social_score_all must be accounted for in feed_n. ie, Even when they were at 0-0 score. Yes, it makes a small difference to the std_deviation but the simplification in code is worth the negligible loss of precision. If you find a better way, pls implement NOT AN INVARIANT: feed_n is less than stat_update_upper. Also remember, Std_dev(x) = E[X^2] - E[X] ''' dbi = DatabaseInterface.get_shared_instance() rowcount = 1 #Let's use some python here. cursor = dbi.execute( "SELECT feed_id,peak_score,prev_peak_score,reflected_in_stats FROM news_social_score_all JOIN news_stories USING(story_id) WHERE reflected_in_stats!='UPTODATE'", None) rows = cursor.fetchall() peak_scores = dict() added_x = dict() added_x2 = dict() new_insertions = dict() for row in rows: feed_id = row['feed_id'] peak_score = row['peak_score'] prev_peak_score = row['prev_peak_score'] if feed_id not in added_x: added_x[feed_id] = 0 added_x2[feed_id] = 0 added_x[feed_id] += peak_score - prev_peak_score added_x2[feed_id] += (peak_score * peak_score) - (prev_peak_score * prev_peak_score) if row['reflected_in_stats'] == 'NEVER': if feed_id not in new_insertions: new_insertions[feed_id] = 0 new_insertions[feed_id] += 1 #Let's get stats for each feed dbi.autocommit(False) for feed_id in new_insertions: insert_values = (feed_id, new_insertions[feed_id], new_insertions[feed_id]) dbi.execute( "INSERT INTO news_social_score_feed_statistics (feed_id,feed_n) VALUES(%s,%s) ON DUPLICATE KEY UPDATE feed_n=feed_n+%s", insert_values) for feed_id in added_x: update_params = (added_x[feed_id], added_x2[feed_id], feed_id) dbi.execute( 'UPDATE news_social_score_feed_statistics SET sum_x=sum_x+%s,sum_x2=sum_x2+%s WHERE feed_id=%s', update_params) dbi.execute( "UPDATE news_social_score_all SET reflected_in_stats='UPTODATE' WHERE reflected_in_stats!='UPTODATE'", None) ''' Finally, do the scaling so that newer articles have more weightage. This isn't critical so we don't need autocommit off ) ''' stat_update_lower = SocialScoreUpdater.stat_update_lower stat_update_upper = SocialScoreUpdater.stat_update_upper cursor = dbi.execute( "SELECT feed_id,sum_x,sum_x2,feed_n FROM news_social_score_feed_statistics WHERE feed_n>%s", (stat_update_upper, )) rows = cursor.fetchall() for row in rows: feed_id = row['feed_id'] scale = (float(stat_update_lower) / row['feed_n']) new_x = scale * row['sum_x'] new_x2 = scale * row['sum_x2'] new_n = stat_update_lower # = scale * row['feed_n'] dbi.execute( "UPDATE news_social_score_feed_statistics SET sum_x=%s, sum_x2=%s, feed_n=%s WHERE feed_id=%s", (new_x, new_x2, new_n, feed_id)) dbi.commit() dbi.autocommit(True)
from news.contentranking.classes.usercategoryinterests import UserCategoryInterests from news.contentranking.classes.scrapers.facebookpagelikescraper import FacebookPageLikeScraper as FPLS from databaseinterface.databaseinterface import DatabaseInterface from news.contentranking.classes.localconfig import LocalConfig as ContentRankingConfig import sys dbi = DatabaseInterface.get_shared_instance() rows = list() if len(sys.argv) < 2: cursor = dbi.execute( "SELECT user_id,email,access_token FROM first_test_users ", None) else: user_id = sys.argv[1] cursor = dbi.execute( "SELECT user_id,email,access_token FROM first_test_users WHERE user_id=%s", (user_id, )) rows = cursor.fetchall() for row in rows: user_id = row['user_id'] user_access_token = row['access_token'] fpls = FPLS(user_access_token) fpls.load_feed_ids() liked_feed_ids = fpls.get_liked_feed_ids() #liked_feed_ids = [4,5,7] #print "Liked feed_ids",liked_feed_ids dbi = DatabaseInterface.get_shared_instance()
from news.contentranking.classes.usercategoryinterests import UserCategoryInterests from news.contentranking.classes.scrapers.facebookpagelikescraper import FacebookPageLikeScraper as FPLS from databaseinterface.databaseinterface import DatabaseInterface from news.contentranking.classes.localconfig import LocalConfig as ContentRankingConfig import sys dbi = DatabaseInterface.get_shared_instance() rows = list() if len(sys.argv)<2: cursor = dbi.execute("SELECT user_id,email,access_token FROM first_test_users ",None) else: user_id = sys.argv[1] cursor = dbi.execute("SELECT user_id,email,access_token FROM first_test_users WHERE user_id=%s",(user_id,)) rows = cursor.fetchall() for row in rows: user_id = row['user_id'] user_access_token = row['access_token'] fpls = FPLS(user_access_token) fpls.load_feed_ids() liked_feed_ids = fpls.get_liked_feed_ids() #liked_feed_ids = [4,5,7] #print "Liked feed_ids",liked_feed_ids dbi = DatabaseInterface.get_shared_instance() categories_liked = dict() total_likes = 0