Exemplo n.º 1
0
def commentData(comments_list, now_time, subreddit_name):
    str_time = time.strftime('%m%d%y_%I%p', time.localtime())
    output_name = subreddit_name + "_comment_" + str_time + ".csv"
    if subreddit_name == "fp":
        front_page = 1
    else:
        front_page = 0
    now_time = now_time
    comment_data = []

    counter = 1
    for thread_comments in comments_list:
        print "subreddit: ", subreddit_name, "; counter: ", counter
        position = 1
        for comment_object in thread_comments:
            try:
                thread_id = comment_object._submission.id
                comment_str = comment_object.body_html #Get Comment
                cleaned_comment = clean_html(comment_str)
                comment_id = comment_object.id
                comment_usr = comment_object.author._case_name #Get User\
                usr_id = comment_object.author.id
                comment_upvotes = comment_object.score #Number of upvotes
                #comment_subreddit_id = comment_object.subreddit_id
                #comment_subreddit = comment_object.subreddit
                comment_gilds = comment_object.gilded
                #comment_epoch_time = comment_object.created
                comment_created = time.strftime('%Y-%m-%d %H:%M:%S %Z', time.localtime(comment_object.created))
                edited = str(comment_object.edited)
                if edited != "False":
                    edited = "True"
                comment_position = position
                length_comment = len(cleaned_comment)
                num_replies = len(comment_object.replies)
                comment_data.append([unicode(front_page),
                             thread_id,
                             comment_id,
                             comment_usr,
                             usr_id,
                             unicode(comment_upvotes),
                             cleaned_comment,
                             unicode(comment_position),
                             unicode(length_comment),
                             unicode(num_replies),
                             unicode(edited),
                             unicode(comment_gilds),
                             comment_created,
                             now_time])
            except:
                pass
            position += 1
        counter += 1
    csvSave(comment_data, output_name)
    return comment_data
Exemplo n.º 2
0
def threadData(list_items, now_time, subreddit_name):
    str_time = time.strftime('%m%d%y_%I%p', time.localtime())
    output_name = subreddit_name + "_thread_" + str_time + ".csv"
    if subreddit_name == "fp":
        front_page = 1
    else:
        front_page = 0
    thread_data = []

    ranking = 0
    now_time = now_time
    for post in list_items:
        subreddit_name = post.subreddit._case_name
        reddit_usernames = post.author._case_name
        post_title = post.title
        post_text = post.selftext
        total_num_comments = post.num_comments
        post_timestamp_tmp = post.created
        post_timestamp_final = time.strftime('%Y-%m-%d %H:%M:%S %Z', time.localtime(post_timestamp_tmp))
        domains = cleanDomain(post.domain)
        gilded_score = post.gilded
        post_score = post.score
        thread_ids = post.id #matched with comment's domain called _submission_id
        post_url = post.url
        ranking += 1
        thread_data.append([unicode(front_page),
                            subreddit_name,
                            reddit_usernames,
                            post_title,
                            post_text,
                            thread_ids,
                            unicode(total_num_comments),
                            domains,
                            unicode(gilded_score),
                            unicode(post_score),
                            unicode(ranking),
                            post_timestamp_final,
                            now_time,
                            post_url])
    csvSave(thread_data, output_name)
    return thread_data
Exemplo n.º 3
0
def userData(list_items, subreddit_name):
    str_time = time.strftime('%m%d%y_%I%p', time.localtime())
    output_name = subreddit_name + "_users_" + str_time + ".csv"
    r = praw.Reddit(user_agent='blah')
    user_data = []
    for post in list_items:
        reddit_username = post.author._case_name
        user = r.get_redditor(reddit_username)
        date_created = user.created #unix time
        date_created = time.strftime('%Y-%m-%d %H:%M:%S %Z', time.localtime(date_created))
        comment_karmas = user.comment_karma
        link_karmas = user.link_karma
        verified_email = user.has_verified_email
        gold = user.is_gold
        mod = user.is_mod
        user_data.append([reddit_username,
                          date_created,
                          unicode(comment_karmas),
                          unicode(link_karmas),
                          verified_email,
                          gold,
                          mod])
    csvSave(user_data, output_name)
    return user_data