def update_user_histories(active_users): """ """ ## Get Today's Date today = datetime.now().date() ## Initialize Connection user_history_con = sql.connect(USER_HISTORY_DB_PATH) cursor = user_history_con.cursor() ## Get User Query Dates query_dates = {} max_date_command = """ SELECT USER, QUERY_END_DATE FROM HISTORY WHERE USER='******' ORDER BY QUERY_END_DATE LIMIT 1;""" for user in tqdm(active_users, total=len(active_users), desc="Query Periods", file=sys.stdout): res = cursor.execute(max_date_command.format(user)) result = res.fetchall() if len(result) == 0: query_dates[user] = (GLOBAL_START_DATE, today.isoformat()) else: if result[0][1] != today.isoformat(): query_dates[user] = (result[0][1], today.isoformat()) ## Initialize Reddit Wrapper reddit = RedditData() ## Query Comment History user_comment_histories = [] for user, (start, stop) in tqdm(query_dates.items(), total=len(query_dates), file=sys.stdout, desc="User Histories"): df = reddit.retrieve_author_comments(user, start_date=start, end_date=stop) try: subreddit_counts = df.groupby( ["author"])["subreddit"].value_counts().rename( "COMMENT_COUNT").reset_index() except: print('failed user:', user) import pdb pdb.set_trace() subreddit_counts["QUERY_START_DATE"] = start subreddit_counts["QUERY_END_DATE"] = stop subreddit_counts.rename(columns={ "author": "USER", "subreddit": "SUBREDDIT" }, inplace=True) user_comment_histories.append(subreddit_counts) ## Update Database if len(user_comment_histories) > 0: user_comment_histories = pd.concat(user_comment_histories).reset_index( drop=True) user_comment_histories.to_sql( name="HISTORY", con=user_history_con, if_exists="append", index=False, ) ## Close Connection user_history_con.commit() user_history_con.close()
subreddit_mask = np.nonzero((X_masked > 0).sum(axis=1) >= MIN_SUPPORT)[0] X_masked = X_masked[subreddit_mask] rows_masked = [rows[i] for i in subreddit_mask] ## Weight Using BM25 if BM25_WEIGHTING: X_masked = bm25_weight(X_masked).tocsr() ## Fit Model cf = CollaborativeFiltering(factors=N_FACTORS, regularization=REGULARIZATION, iterations=ITERATIONS, num_threads=NUM_THREADS, random_state=RANDOM_STATE) cf = cf.fit(X_masked, rows=rows_masked, columns=columns_masked) ##################### ### Testing ##################### ## Test Recommendations reddit = RedditData() keith = reddit.retrieve_author_comments("HuskyKeith") keith_counts = keith["subreddit"].tolist() keith_recs = cf.recommend(keith_counts, 20) ## Test Similarity cf.get_similar_item("movies") ## Dump Model cf.dump(f"{MODEL_DIR}{MODEL_NAME}")