class Reddit: def __init__(self, user, no_post): log.info(f"Initializing reddit class: user={user} no_post={no_post}") self.no_post = no_post try: self.reddit = praw.Reddit(user, user_agent=static.USER_AGENT) except configparser.NoSectionError: log.error("User " + user + " not in praw.ini, aborting") raise ValueError static.ACCOUNT_NAME = self.reddit.user.me().name log.info("Logged into reddit as /u/" + static.ACCOUNT_NAME) self.processed_comments = Queue(100) self.consecutive_timeouts = 0 self.timeout_warn_threshold = 1 def run_function(self, function, arguments): output = None result = None try: output = function(*arguments) except praw.exceptions.APIException as err: for return_type in ReturnType: if err.error_type == return_type.name: result = return_type break if result is None: raise except prawcore.exceptions.Forbidden: result = ReturnType.FORBIDDEN except IndexError: result = ReturnType.QUARANTINED if result is None: result = ReturnType.SUCCESS return output, result def is_message(self, item): return isinstance(item, praw.models.Message) def get_messages(self, count=500): log.debug("Fetching unread messages") message_list = [] for message in self.reddit.inbox.unread(limit=count): message_list.append(message) return message_list def reply_message(self, message, body): log.debug(f"Replying to message: {message.id}") if self.no_post: log.info(body) return ReturnType.SUCCESS else: output, result = self.run_function(message.reply, [body]) return result def mark_read(self, message): log.debug(f"Marking message as read: {message.id}") if not self.no_post: message.mark_read() def reply_comment(self, comment, body): log.debug(f"Replying to message: {comment.id}") if self.no_post: log.info(body) return "xxxxxx", ReturnType.SUCCESS else: output, result = self.run_function(comment.reply, [body]) if output is not None: return output.id, result else: return None, result def send_message(self, username, subject, body): log.debug(f"Sending message to u/{username}") if self.no_post: log.info(body) return ReturnType.SUCCESS else: redditor = self.reddit.redditor(username) output, result = self.run_function(redditor.message, [subject, body]) return result def get_comment(self, comment_id): log.debug(f"Fetching comment by id: {comment_id}") if comment_id == "xxxxxx": return None else: return self.reddit.comment(comment_id) def edit_comment(self, body, comment=None, comment_id=None): if comment is None: comment = self.get_comment(comment_id) log.debug(f"Editing comment: {comment.id}") if self.no_post: log.info(body) else: output, result = self.run_function(comment.edit, [body]) return result def delete_comment(self, comment): log.debug(f"Deleting comment: {comment.id}") if not self.no_post: try: comment.delete() except Exception: log.warning(f"Error deleting comment: {comment.comment_id}") log.warning(traceback.format_exc()) return False return True def quarantine_opt_in(self, subreddit_name): log.debug(f"Opting in to subreddit: {subreddit_name}") if not self.no_post: try: self.reddit.subreddit(subreddit_name).quaran.opt_in() except Exception: log.warning(f"Error opting in to subreddit: {subreddit_name}") log.warning(traceback.format_exc()) return False return True def get_keyword_comments(self, keyword, last_seen): if not len(self.processed_comments.list): last_seen = last_seen + timedelta(seconds=1) log.debug( f"Fetching comments for keyword: {keyword} : {utils.get_datetime_string(last_seen)}" ) url = f"https://api.pushshift.io/reddit/comment/search?q={keyword}&limit=100&sort=desc" try: json = requests.get(url, headers={'User-Agent': static.USER_AGENT}, timeout=10) if json.status_code != 200: log.warning( f"Could not parse data for search term: {keyword} status: {str(json.status_code)}" ) return [] comments = json.json()['data'] if self.timeout_warn_threshold > 1: log.warning( f"Recovered from timeouts after {self.consecutive_timeouts} attempts" ) self.consecutive_timeouts = 0 self.timeout_warn_threshold = 1 except requests.exceptions.ReadTimeout: self.consecutive_timeouts += 1 if self.consecutive_timeouts >= pow(self.timeout_warn_threshold, 2) * 5: log.warning( f"{self.consecutive_timeouts} consecutive timeouts for search term: {keyword}" ) self.timeout_warn_threshold += 1 return [] except Exception as err: log.warning(f"Could not parse data for search term: {keyword}") log.warning(traceback.format_exc()) return [] if not len(comments): log.warning(f"No comments found for search term: {keyword}") return [] result_comments = [] for comment in comments: date_time = utils.datetime_from_timestamp(comment['created_utc']) if last_seen > date_time: break if not self.processed_comments.contains(comment['id']): result_comments.append(comment) log.debug(f"Found comments: {len(result_comments)}") return result_comments def mark_keyword_comment_processed(self, comment_id): self.processed_comments.put(comment_id)
class RedditKeywordWatcher: def __init__(self, keyword): self.keyword = keyword self.processed_comments = Queue(100) self.consecutive_timeouts = 0 self.timeout_warn_threshold = 1 self.pushshift_lag = 0 self.pushshift_lag_checked = None def get(self): last_seen = utils.get_last_seen(self.keyword) log.debug( f"Fetching comments for keyword: {self.keyword} : {last_seen}") url = f"https://api.pushshift.io/reddit/comment/search?q={self.keyword}&limit=100&sort=desc&fields=created_utc,id" lag_url = "https://api.pushshift.io/reddit/comment/search?limit=1&sort=desc" try: json = requests.get(url, headers={'User-Agent': config.USER_AGENT}, timeout=10) if json.status_code != 200: log.warning( f"Could not parse data for search term: {self.keyword} status: {str(json.status_code)}" ) return [] comments = json.json()['data'] if self.pushshift_lag_checked is None or \ utils.datetime_now() - timedelta(minutes=10) > self.pushshift_lag_checked: log.debug("Updating pushshift comment lag") json = requests.get(lag_url, headers={'User-Agent': config.USER_AGENT}, timeout=10) if json.status_code == 200: comment_created = utils.datetime_from_timestamp( json.json()['data'][0]['created_utc']) self.pushshift_lag = round( (utils.datetime_now() - comment_created).seconds / 60, 0) self.pushshift_lag_checked = utils.datetime_now() if self.timeout_warn_threshold > 1: log.warning( f"Recovered from timeouts after {self.consecutive_timeouts} attempts" ) self.consecutive_timeouts = 0 self.timeout_warn_threshold = 1 except requests.exceptions.ReadTimeout: self.consecutive_timeouts += 1 if self.consecutive_timeouts >= pow(self.timeout_warn_threshold, 2) * 5: log.warning( f"{self.consecutive_timeouts} consecutive timeouts for search term: {self.keyword}" ) self.timeout_warn_threshold += 1 return [] except Exception as err: log.warning( f"Could not parse data for search term: {self.keyword}") log.warning(traceback.format_exc()) return [] if not len(comments): log.warning(f"No comments found for search term: {self.keyword}") return [] result_comments = [] for comment in comments: date_time = utils.datetime_from_timestamp(comment['created_utc']) if last_seen > date_time: break if not self.processed_comments.contains(comment['id']): result_comments.append(comment) return result_comments def set_processed(self, comment_id): self.processed_comments.put(comment_id)