示例#1
0
def pull_down_comments():
    try:
        sleep_time = random.randint(0,300)
        time.sleep(sleep_time)
        raw_data = read_raw_data_from_cache("raw_data_cache.p")
        items_done = read_raw_data_from_cache("items_done.p")
        comments = [c['comment'] for c in items_done]
        replies = [c['reply'] for c in items_done]
        knn_matcher = train_knn_matcher(raw_data)
        for subreddit in REPLY_SUBREDDIT_LIST:
            try:
                comment = get_single_comment(subreddit)
                print comment
                if comment is None:
                    log.info("Could not get a comment")
                    continue
                text = comment.body
                cid = comment.id
                reply = test_knn_matcher(knn_matcher, text)
                if text in comments or (reply in replies and reply is not None):
                    continue
                data = {'comment' : text, 'reply' : reply, 'comment_id' : cid}
                items_done.append(data)
                replies.append(reply)
                comments.append(text)
                log.info("Subreddit: {0}".format(subreddit))
                log.info("Comment: {0} {1}".format(cid, text))
                log.info("Reply: {0}".format(reply))
                log.info("-------------------")
            except:
                log.exception("Cannot get reply for {0}".format(subreddit))
                continue
        write_data_to_cache(items_done, "items_done.p", "comment_id")
    except Exception:
        log.exception("Could not pull down comment.")
示例#2
0
def pull_down_comments():
    try:
        sleep_time = random.randint(0, 300)
        time.sleep(sleep_time)
        raw_data = read_raw_data_from_cache("raw_data_cache.p")
        items_done = read_raw_data_from_cache("items_done.p")
        comments = [c['comment'] for c in items_done]
        replies = [c['reply'] for c in items_done]
        knn_matcher = train_knn_matcher(raw_data)
        for subreddit in REPLY_SUBREDDIT_LIST:
            try:
                comment = get_single_comment(subreddit)
                print comment
                if comment is None:
                    log.info("Could not get a comment")
                    continue
                text = comment.body
                cid = comment.id
                reply = test_knn_matcher(knn_matcher, text)
                if text in comments or (reply in replies
                                        and reply is not None):
                    continue
                data = {'comment': text, 'reply': reply, 'comment_id': cid}
                items_done.append(data)
                replies.append(reply)
                comments.append(text)
                log.info("Subreddit: {0}".format(subreddit))
                log.info("Comment: {0} {1}".format(cid, text))
                log.info("Reply: {0}".format(reply))
                log.info("-------------------")
            except:
                log.exception("Cannot get reply for {0}".format(subreddit))
                continue
        write_data_to_cache(items_done, "items_done.p", "comment_id")
    except Exception:
        log.exception("Could not pull down comment.")
    return sorted_test_results

def test_accuracy(test_results, raw_data):
    correct = []
    for i in xrange(0,len(raw_data)):
        if test_results[i] in raw_data[i]['replies']:
            correct.append(True)
        else:
            correct.append(False)
    return correct

if __name__ == '__main__':
    #message_replies = get_message_replies(subreddit = "funny", max_replies= 500, submission_count = 300, min_reply_score = 20)
    #raw_data = write_data_to_cache(raw_data, "raw_data_cache.p")

    raw_data = read_raw_data_from_cache("raw_data_cache.p")
    knn_matcher = train_knn_matcher(raw_data)
    message = test_knn_matcher(knn_matcher, raw_data[1]['message'])

    test_results = cross_validate(raw_data,train_knn_matcher, test_knn_matcher)

    for i in xrange(0,len(test_results)):
        print i
        print raw_data[i]['message']
        print test_results[i]
        print "------------------"