def crawlusermeta(): db = DbInteract() unique_users = db.get_unique_users() print(unique_users) for row in unique_users: u = User(row) u.pull_metadata() print("User metadata refreshed")
def crawl(self): r = Reddit() db = DbInteract() user = r.conn.redditor(self.id) activity = [] for comment in list(user.comments.new(limit=None)): activity.append( Activity( comment.subreddit_id, comment.author.id, 'comment', datetime.datetime.fromtimestamp( comment.created_utc).isoformat())) for submission in list(user.submissions.new(limit=None)): activity.append( Activity( submission.subreddit.id, submission.author.id, 'submission', datetime.datetime.fromtimestamp( submission.created_utc).isoformat())) db.add_activity_records(activity)
def read_lines(self, activity_file, act_type): db = DbInteract() activity = [] for line_number, line in enumerate(activity_file): if line_number == 0: if isinstance(line, str): field_list = line.split(",") field_list = [field.strip("\n") for field in field_list] else: field_list = line.decode('utf-8').split(",") field_list = [field.strip("\n") for field in field_list] continue if isinstance(line, str): line = line.split(",") else: line = line.decode('utf-8').split(",") if len(field_list) != len(line): continue if act_type == 'comment': subreddit_id_index = self.index_by_field_name("subreddit", field_list) user_id_index = self.index_by_field_name("author", field_list) created_utc_index = self.index_by_field_name("created_utc", field_list) page_id_index = self.index_by_field_name("link_id", field_list) if line[user_id_index] != "[deleted]": a = (line[page_id_index][-6:], line[subreddit_id_index], line[user_id_index], 'comment', line[created_utc_index]) activity.append(a) else: subreddit_index = self.index_by_field_name("subreddit", field_list) user_id_index = self.index_by_field_name("author", field_list) created_utc_index = self.index_by_field_name("created_utc", field_list) page_id_index = self.index_by_field_name("id", field_list) if line[user_id_index] != "[deleted]": a = (line[page_id_index], line[subreddit_index], line[user_id_index], 'link', line[created_utc_index]) activity.append(a) db.add_activity_records(activity)
def pull_metadata(self): db = DbInteract() r = Reddit() try: print(self.id) if db.user_metadata_exists((self.id, )): return registrationTime = r.conn.redditor(self.id).created_utc comments = [] oldest = None for comment in r.conn.redditor(self.id).comments.new(limit=None): comments.append(comment) if oldest == None or comment.created_utc < oldest: oldest = comment.created_utc submitted = [] for submission in r.conn.redditor( self.id).submissions.new(limit=None): submitted.append(submission) if oldest == None or submission.created_utc < oldest: oldest = submission.created_utc comment_count = len(comments) submission_count = len(submitted) max_activity = False if comment_count == 1000 or submission_count == 1000: max_activity = True db.save_user_metadata((self.id, max_activity, registrationTime, oldest, oldest - registrationTime)) print("Saved") except Exception as e: print(e)
def delaypercentile(user_id): db = DbInteract() delaypercentile = db.calculate_delay_percentile(user_id) print("User is in the", delaypercentile, "percentile of user delays.")
def activitypercentile(user_id): db = DbInteract() act_percentile = db.calculate_activity_percentile(user_id) print("User is in the", act_percentile, "percentile of user activity.")
def refresh(): db = DbInteract() db.refresh_relationship_graph()
def refresh(): db = DbInteract() db.refresh_action_graph()