Exemplo n.º 1
0
def get_reddit_posts():
    try:
        all_message_replies = []
        for subreddit in COMMENT_SUBREDDIT_LIST:
            message_replies = get_message_replies(subreddit =subreddit, max_replies= 500, submission_count = 300, min_reply_score = 20)
            all_message_replies += message_replies
        raw_data = list([mr.get_raw_data() for mr in all_message_replies])
        write_data_to_cache(raw_data, "raw_data_cache.p")
    except Exception:
        log.exception("Could not save posts.")
Exemplo n.º 2
0
def get_reddit_posts():
    try:
        all_message_replies = []
        for subreddit in COMMENT_SUBREDDIT_LIST:
            message_replies = get_message_replies(subreddit=subreddit,
                                                  max_replies=500,
                                                  submission_count=300,
                                                  min_reply_score=20)
            all_message_replies += message_replies
        raw_data = list([mr.get_raw_data() for mr in all_message_replies])
        write_data_to_cache(raw_data, "raw_data_cache.p")
    except Exception:
        log.exception("Could not save posts.")
    return max_ind


def find_input_match(input_text, reply_matrix, match_mat, vectorizer,
                     reply_text):
    match_mat_rows = find_input_match_vec(input_text, match_mat, vectorizer)
    max_cos = find_cosine_match(match_mat, match_mat_rows, reply_matrix)
    return max_cos


def create_reply_matrix(reply_text, vectorizer):
    return vectorizer.transform(reply_text)


if __name__ == '__main__':
    message_replies = get_message_replies()

    table_data = list(
        chain.from_iterable([mr.get_table_data() for mr in message_replies]))
    pd_frame = pd.DataFrame(
        np.array(table_data),
        columns=["message", "message_score", "reply", "reply_score"])
    pd_frame['reply_score'] = pd_frame['reply_score'].map(lambda x: int(x))
    pd_frame['message_score'] = pd_frame['message_score'].map(lambda x: int(x))

    match_mat, vectorizer = create_match_matrix(table_data, pd_frame)

    reply_matrix = create_reply_matrix(pd_frame['reply'], vectorizer)

    input_set = list(set(pd_frame['message']))
    replies = []
            match_sum += np.sum(small_mat[z,match_mat_cols])
        if match_sum>= max_val:
            max_val = match_sum
            max_ind = i
    return max_ind

def find_input_match(input_text, reply_matrix, match_mat, vectorizer, reply_text):
    match_mat_rows = find_input_match_vec(input_text, match_mat, vectorizer)
    max_cos = find_cosine_match(match_mat, match_mat_rows, reply_matrix)
    return max_cos

def create_reply_matrix(reply_text, vectorizer):
    return vectorizer.transform(reply_text)

if __name__ == '__main__':
    message_replies = get_message_replies()

    table_data = list(chain.from_iterable([mr.get_table_data() for mr in message_replies]))
    pd_frame = pd.DataFrame(np.array(table_data),columns=["message", "message_score", "reply", "reply_score"])
    pd_frame['reply_score'] = pd_frame['reply_score'].map(lambda x : int(x))
    pd_frame['message_score'] = pd_frame['message_score'].map(lambda x : int(x))

    match_mat, vectorizer = create_match_matrix(table_data, pd_frame)

    reply_matrix = create_reply_matrix(pd_frame['reply'], vectorizer)

    input_set = list(set(pd_frame['message']))
    replies = []
    reply_accurate = []
    up_to = len(input_set)
    for z in xrange(0,up_to):