def pull_down_comments(): try: sleep_time = random.randint(0,300) time.sleep(sleep_time) raw_data = read_raw_data_from_cache("raw_data_cache.p") items_done = read_raw_data_from_cache("items_done.p") comments = [c['comment'] for c in items_done] replies = [c['reply'] for c in items_done] knn_matcher = train_knn_matcher(raw_data) for subreddit in REPLY_SUBREDDIT_LIST: try: comment = get_single_comment(subreddit) print comment if comment is None: log.info("Could not get a comment") continue text = comment.body cid = comment.id reply = test_knn_matcher(knn_matcher, text) if text in comments or (reply in replies and reply is not None): continue data = {'comment' : text, 'reply' : reply, 'comment_id' : cid} items_done.append(data) replies.append(reply) comments.append(text) log.info("Subreddit: {0}".format(subreddit)) log.info("Comment: {0} {1}".format(cid, text)) log.info("Reply: {0}".format(reply)) log.info("-------------------") except: log.exception("Cannot get reply for {0}".format(subreddit)) continue write_data_to_cache(items_done, "items_done.p", "comment_id") except Exception: log.exception("Could not pull down comment.")
def pull_down_comments(): try: sleep_time = random.randint(0, 300) time.sleep(sleep_time) raw_data = read_raw_data_from_cache("raw_data_cache.p") items_done = read_raw_data_from_cache("items_done.p") comments = [c['comment'] for c in items_done] replies = [c['reply'] for c in items_done] knn_matcher = train_knn_matcher(raw_data) for subreddit in REPLY_SUBREDDIT_LIST: try: comment = get_single_comment(subreddit) print comment if comment is None: log.info("Could not get a comment") continue text = comment.body cid = comment.id reply = test_knn_matcher(knn_matcher, text) if text in comments or (reply in replies and reply is not None): continue data = {'comment': text, 'reply': reply, 'comment_id': cid} items_done.append(data) replies.append(reply) comments.append(text) log.info("Subreddit: {0}".format(subreddit)) log.info("Comment: {0} {1}".format(cid, text)) log.info("Reply: {0}".format(reply)) log.info("-------------------") except: log.exception("Cannot get reply for {0}".format(subreddit)) continue write_data_to_cache(items_done, "items_done.p", "comment_id") except Exception: log.exception("Could not pull down comment.")
return sorted_test_results def test_accuracy(test_results, raw_data): correct = [] for i in xrange(0,len(raw_data)): if test_results[i] in raw_data[i]['replies']: correct.append(True) else: correct.append(False) return correct if __name__ == '__main__': #message_replies = get_message_replies(subreddit = "funny", max_replies= 500, submission_count = 300, min_reply_score = 20) #raw_data = write_data_to_cache(raw_data, "raw_data_cache.p") raw_data = read_raw_data_from_cache("raw_data_cache.p") knn_matcher = train_knn_matcher(raw_data) message = test_knn_matcher(knn_matcher, raw_data[1]['message']) test_results = cross_validate(raw_data,train_knn_matcher, test_knn_matcher) for i in xrange(0,len(test_results)): print i print raw_data[i]['message'] print test_results[i] print "------------------"