def get_reddit_posts(): try: all_message_replies = [] for subreddit in COMMENT_SUBREDDIT_LIST: message_replies = get_message_replies(subreddit =subreddit, max_replies= 500, submission_count = 300, min_reply_score = 20) all_message_replies += message_replies raw_data = list([mr.get_raw_data() for mr in all_message_replies]) write_data_to_cache(raw_data, "raw_data_cache.p") except Exception: log.exception("Could not save posts.")
def get_reddit_posts(): try: all_message_replies = [] for subreddit in COMMENT_SUBREDDIT_LIST: message_replies = get_message_replies(subreddit=subreddit, max_replies=500, submission_count=300, min_reply_score=20) all_message_replies += message_replies raw_data = list([mr.get_raw_data() for mr in all_message_replies]) write_data_to_cache(raw_data, "raw_data_cache.p") except Exception: log.exception("Could not save posts.")
return max_ind def find_input_match(input_text, reply_matrix, match_mat, vectorizer, reply_text): match_mat_rows = find_input_match_vec(input_text, match_mat, vectorizer) max_cos = find_cosine_match(match_mat, match_mat_rows, reply_matrix) return max_cos def create_reply_matrix(reply_text, vectorizer): return vectorizer.transform(reply_text) if __name__ == '__main__': message_replies = get_message_replies() table_data = list( chain.from_iterable([mr.get_table_data() for mr in message_replies])) pd_frame = pd.DataFrame( np.array(table_data), columns=["message", "message_score", "reply", "reply_score"]) pd_frame['reply_score'] = pd_frame['reply_score'].map(lambda x: int(x)) pd_frame['message_score'] = pd_frame['message_score'].map(lambda x: int(x)) match_mat, vectorizer = create_match_matrix(table_data, pd_frame) reply_matrix = create_reply_matrix(pd_frame['reply'], vectorizer) input_set = list(set(pd_frame['message'])) replies = []
match_sum += np.sum(small_mat[z,match_mat_cols]) if match_sum>= max_val: max_val = match_sum max_ind = i return max_ind def find_input_match(input_text, reply_matrix, match_mat, vectorizer, reply_text): match_mat_rows = find_input_match_vec(input_text, match_mat, vectorizer) max_cos = find_cosine_match(match_mat, match_mat_rows, reply_matrix) return max_cos def create_reply_matrix(reply_text, vectorizer): return vectorizer.transform(reply_text) if __name__ == '__main__': message_replies = get_message_replies() table_data = list(chain.from_iterable([mr.get_table_data() for mr in message_replies])) pd_frame = pd.DataFrame(np.array(table_data),columns=["message", "message_score", "reply", "reply_score"]) pd_frame['reply_score'] = pd_frame['reply_score'].map(lambda x : int(x)) pd_frame['message_score'] = pd_frame['message_score'].map(lambda x : int(x)) match_mat, vectorizer = create_match_matrix(table_data, pd_frame) reply_matrix = create_reply_matrix(pd_frame['reply'], vectorizer) input_set = list(set(pd_frame['message'])) replies = [] reply_accurate = [] up_to = len(input_set) for z in xrange(0,up_to):