def commentData(comments_list, now_time, subreddit_name): str_time = time.strftime('%m%d%y_%I%p', time.localtime()) output_name = subreddit_name + "_comment_" + str_time + ".csv" if subreddit_name == "fp": front_page = 1 else: front_page = 0 now_time = now_time comment_data = [] counter = 1 for thread_comments in comments_list: print "subreddit: ", subreddit_name, "; counter: ", counter position = 1 for comment_object in thread_comments: try: thread_id = comment_object._submission.id comment_str = comment_object.body_html #Get Comment cleaned_comment = clean_html(comment_str) comment_id = comment_object.id comment_usr = comment_object.author._case_name #Get User\ usr_id = comment_object.author.id comment_upvotes = comment_object.score #Number of upvotes #comment_subreddit_id = comment_object.subreddit_id #comment_subreddit = comment_object.subreddit comment_gilds = comment_object.gilded #comment_epoch_time = comment_object.created comment_created = time.strftime('%Y-%m-%d %H:%M:%S %Z', time.localtime(comment_object.created)) edited = str(comment_object.edited) if edited != "False": edited = "True" comment_position = position length_comment = len(cleaned_comment) num_replies = len(comment_object.replies) comment_data.append([unicode(front_page), thread_id, comment_id, comment_usr, usr_id, unicode(comment_upvotes), cleaned_comment, unicode(comment_position), unicode(length_comment), unicode(num_replies), unicode(edited), unicode(comment_gilds), comment_created, now_time]) except: pass position += 1 counter += 1 csvSave(comment_data, output_name) return comment_data
def threadData(list_items, now_time, subreddit_name): str_time = time.strftime('%m%d%y_%I%p', time.localtime()) output_name = subreddit_name + "_thread_" + str_time + ".csv" if subreddit_name == "fp": front_page = 1 else: front_page = 0 thread_data = [] ranking = 0 now_time = now_time for post in list_items: subreddit_name = post.subreddit._case_name reddit_usernames = post.author._case_name post_title = post.title post_text = post.selftext total_num_comments = post.num_comments post_timestamp_tmp = post.created post_timestamp_final = time.strftime('%Y-%m-%d %H:%M:%S %Z', time.localtime(post_timestamp_tmp)) domains = cleanDomain(post.domain) gilded_score = post.gilded post_score = post.score thread_ids = post.id #matched with comment's domain called _submission_id post_url = post.url ranking += 1 thread_data.append([unicode(front_page), subreddit_name, reddit_usernames, post_title, post_text, thread_ids, unicode(total_num_comments), domains, unicode(gilded_score), unicode(post_score), unicode(ranking), post_timestamp_final, now_time, post_url]) csvSave(thread_data, output_name) return thread_data
def userData(list_items, subreddit_name): str_time = time.strftime('%m%d%y_%I%p', time.localtime()) output_name = subreddit_name + "_users_" + str_time + ".csv" r = praw.Reddit(user_agent='blah') user_data = [] for post in list_items: reddit_username = post.author._case_name user = r.get_redditor(reddit_username) date_created = user.created #unix time date_created = time.strftime('%Y-%m-%d %H:%M:%S %Z', time.localtime(date_created)) comment_karmas = user.comment_karma link_karmas = user.link_karma verified_email = user.has_verified_email gold = user.is_gold mod = user.is_mod user_data.append([reddit_username, date_created, unicode(comment_karmas), unicode(link_karmas), verified_email, gold, mod]) csvSave(user_data, output_name) return user_data