def pull_down_comments(): try: sleep_time = random.randint(0,300) time.sleep(sleep_time) raw_data = read_raw_data_from_cache("raw_data_cache.p") items_done = read_raw_data_from_cache("items_done.p") comments = [c['comment'] for c in items_done] replies = [c['reply'] for c in items_done] knn_matcher = train_knn_matcher(raw_data) for subreddit in REPLY_SUBREDDIT_LIST: try: comment = get_single_comment(subreddit) print comment if comment is None: log.info("Could not get a comment") continue text = comment.body cid = comment.id reply = test_knn_matcher(knn_matcher, text) if text in comments or (reply in replies and reply is not None): continue data = {'comment' : text, 'reply' : reply, 'comment_id' : cid} items_done.append(data) replies.append(reply) comments.append(text) log.info("Subreddit: {0}".format(subreddit)) log.info("Comment: {0} {1}".format(cid, text)) log.info("Reply: {0}".format(reply)) log.info("-------------------") except: log.exception("Cannot get reply for {0}".format(subreddit)) continue write_data_to_cache(items_done, "items_done.p", "comment_id") except Exception: log.exception("Could not pull down comment.")
def get_reddit_posts(): try: all_message_replies = [] for subreddit in COMMENT_SUBREDDIT_LIST: message_replies = get_message_replies(subreddit =subreddit, max_replies= 500, submission_count = 300, min_reply_score = 20) all_message_replies += message_replies raw_data = list([mr.get_raw_data() for mr in all_message_replies]) write_data_to_cache(raw_data, "raw_data_cache.p") except Exception: log.exception("Could not save posts.")
def get_reddit_posts(): try: all_message_replies = [] for subreddit in COMMENT_SUBREDDIT_LIST: message_replies = get_message_replies(subreddit=subreddit, max_replies=500, submission_count=300, min_reply_score=20) all_message_replies += message_replies raw_data = list([mr.get_raw_data() for mr in all_message_replies]) write_data_to_cache(raw_data, "raw_data_cache.p") except Exception: log.exception("Could not save posts.")
def pull_down_comments(): try: sleep_time = random.randint(0, 300) time.sleep(sleep_time) raw_data = read_raw_data_from_cache("raw_data_cache.p") items_done = read_raw_data_from_cache("items_done.p") comments = [c['comment'] for c in items_done] replies = [c['reply'] for c in items_done] knn_matcher = train_knn_matcher(raw_data) for subreddit in REPLY_SUBREDDIT_LIST: try: comment = get_single_comment(subreddit) print comment if comment is None: log.info("Could not get a comment") continue text = comment.body cid = comment.id reply = test_knn_matcher(knn_matcher, text) if text in comments or (reply in replies and reply is not None): continue data = {'comment': text, 'reply': reply, 'comment_id': cid} items_done.append(data) replies.append(reply) comments.append(text) log.info("Subreddit: {0}".format(subreddit)) log.info("Comment: {0} {1}".format(cid, text)) log.info("Reply: {0}".format(reply)) log.info("-------------------") except: log.exception("Cannot get reply for {0}".format(subreddit)) continue write_data_to_cache(items_done, "items_done.p", "comment_id") except Exception: log.exception("Could not pull down comment.")