def check_reports_html(sr_dict): """Does report alerts/reapprovals, requires loading HTML page.""" global r logging.info('Checking reports html page') reports_page = r._request('http://www.reddit.com/r/mod/about/reports') soup = BeautifulSoup(reports_page) # check for report alerts for reported_item in soup.findAll( attrs={'class': 'rounded reported-stamp stamp'}): permalink = (reported_item.parent .findAll('li', attrs={'class': 'first'})[0].a['href']) sub_name = re.search('^http://www.reddit.com/r/([^/]+)', permalink).group(1).lower() try: subreddit = sr_dict[sub_name] except KeyError: continue if not subreddit.report_threshold: continue reports = re.search('(\d+)$', reported_item.text).group(1) if int(reports) >= subreddit.report_threshold: try: # check log to see if this item has already had an alert ActionLog.query.filter( and_(ActionLog.subreddit_id == subreddit.id, ActionLog.permalink == permalink, ActionLog.action == 'alert')).one() except NoResultFound: c = Condition() c.action = 'alert' perform_action(subreddit, permalink, c) # do auto-reapprovals for approved_item in soup.findAll( attrs={'class': 'approval-checkmark'}): report_stamp = approved_item.parent.parent.findAll( attrs={'class': 'rounded reported-stamp stamp'})[0] permalink = (report_stamp.parent .findAll('li', attrs={'class': 'first'})[0].a['href']) sub_name = re.search('^http://www.reddit.com/r/([^/]+)', permalink).group(1).lower() try: subreddit = sr_dict[sub_name] except KeyError: continue if not subreddit.auto_reapprove: continue num_reports = re.search('(\d+)$', report_stamp.text).group(1) num_reports = int(num_reports) try: # see if this item has already been auto-reapproved entry = (AutoReapproval.query.filter( and_(AutoReapproval.subreddit_id == subreddit.id, AutoReapproval.permalink == permalink)) .one()) in_db = True except NoResultFound: entry = AutoReapproval() entry.subreddit_id = subreddit.id entry.permalink = permalink entry.original_approver = (re.search('approved by (.+)$', approved_item['title']) .group(1)) entry.total_reports = 0 entry.first_approval_time = datetime.utcnow() in_db = False if (in_db or approved_item['title'].lower() != \ 'approved by '+cfg_file.get('reddit', 'username').lower()): sub = r.get_submission(permalink) sub.approve() entry.total_reports += num_reports entry.last_approval_time = datetime.utcnow() db.session.add(entry) db.session.commit() logging.info(' Re-approved %s', entry.permalink)
def check_items(name, items, sr_dict, cond_dict, stop_time): """Checks the items generator for any matching conditions.""" item_count = 0 comment_counts = Counter() start_time = time() seen_subs = set() logging.info('Checking new %ss', name) try: for item in items: # skip any items in /new that have been approved if name == 'submission' and item.approved_by: continue item_time = datetime.utcfromtimestamp(item.created_utc) if item_time <= stop_time: break subreddit = sr_dict[item.subreddit.display_name.lower()] conditions = cond_dict[item.subreddit.display_name.lower()][name] # don't need to check for shadowbanned unless we're in spam if name == 'spam': for condition in conditions: condition.check_shadowbanned = True else: for condition in conditions: condition.check_shadowbanned = False item_count += 1 if name == 'comment': comment_counts[item.subreddit.display_name.lower()] += 1 if subreddit.name not in seen_subs: setattr(subreddit, 'last_'+name, item_time) seen_subs.add(subreddit.name) logging.debug(' Checking item %s', get_permalink(item)) # check removal conditions, stop checking if any matched if check_conditions(subreddit, item, [c for c in conditions if c.action == 'remove']): continue # check set_flair conditions check_conditions(subreddit, item, [c for c in conditions if c.action == 'set_flair']) # check approval conditions check_conditions(subreddit, item, [c for c in conditions if c.action == 'approve']) # check alert conditions check_conditions(subreddit, item, [c for c in conditions if c.action == 'alert']) # check report conditions check_conditions(subreddit, item, [c for c in conditions if c.action == 'report']) # if doing reports, check auto-reapproval if enabled if (name == 'report' and subreddit.auto_reapprove and item.approved_by is not None): try: # see if this item has already been auto-reapproved entry = (session.query(AutoReapproval).filter( AutoReapproval.permalink == get_permalink(item)) .one()) in_db = True except NoResultFound: entry = AutoReapproval() entry.subreddit_id = subreddit.id entry.permalink = get_permalink(item) entry.original_approver = item.approved_by.name entry.total_reports = 0 entry.first_approval_time = datetime.utcnow() in_db = False if (in_db or item.approved_by.name != cfg_file.get('reddit', 'username')): item.approve() entry.total_reports += item.num_reports entry.last_approval_time = datetime.utcnow() session.add(entry) session.commit() logging.info(' Re-approved %s', entry.permalink) log_request('reapprove') session.commit() except Exception as e: logging.error(' ERROR: %s', e) session.rollback() # This isn't really correct, since we don't collect any 0 samples # but the difference won't matter much in practice for subreddit in comment_counts: prev_total = (sr_dict[subreddit].avg_comments * sr_dict[subreddit].avg_comments_samples) new_avg = ((prev_total + comment_counts[subreddit]) / (sr_dict[subreddit].avg_comments_samples + 1)) sr_dict[subreddit].avg_comments = new_avg sr_dict[subreddit].avg_comments_samples += 1 session.commit() logging.info(' Checked %s items in %s', item_count, elapsed_since(start_time)) log_request('listing', item_count / 100 + 1)
def check_items(name, items, sr_dict, stop_time): """Checks the items generator for any matching conditions.""" item_count = 0 skip_count = 0 skip_subs = set() start_time = time() seen_subs = set() logging.info('Checking new %ss', name) try: for item in items: # skip any items in /new that have been approved if name == 'submission' and item.approved_by: continue item_time = datetime.utcfromtimestamp(item.created_utc) if item_time <= stop_time: break try: subreddit = sr_dict[item.subreddit.display_name.lower()] except KeyError: skip_count += 1 skip_subs.add(item.subreddit.display_name.lower()) continue conditions = (subreddit.conditions .filter(Condition.parent_id == None) .all()) conditions = filter_conditions(name, conditions) item_count += 1 if subreddit.name not in seen_subs: setattr(subreddit, 'last_'+name, item_time) seen_subs.add(subreddit.name) # check removal conditions, stop checking if any matched if check_conditions(subreddit, item, [c for c in conditions if c.action == 'remove']): continue # check set_flair conditions check_conditions(subreddit, item, [c for c in conditions if c.action == 'set_flair']) # check approval conditions check_conditions(subreddit, item, [c for c in conditions if c.action == 'approve']) # check alert conditions check_conditions(subreddit, item, [c for c in conditions if c.action == 'alert']) # if doing reports, check auto-reapproval if enabled if (name == 'report' and subreddit.auto_reapprove and item.approved_by is not None): try: # see if this item has already been auto-reapproved entry = (AutoReapproval.query.filter( AutoReapproval.permalink == get_permalink(item)) .one()) in_db = True except NoResultFound: entry = AutoReapproval() entry.subreddit_id = subreddit.id entry.permalink = get_permalink(item) entry.original_approver = item.approved_by.name entry.total_reports = 0 entry.first_approval_time = datetime.utcnow() in_db = False if (in_db or item.approved_by.name != cfg_file.get('reddit', 'username')): item.approve() entry.total_reports += item.num_reports entry.last_approval_time = datetime.utcnow() db.session.add(entry) db.session.commit() logging.info(' Re-approved %s', entry.permalink) db.session.commit() except Exception as e: logging.error(' ERROR: %s', e) db.session.rollback() logging.info(' Checked %s items, skipped %s items in %s (skips: %s)', item_count, skip_count, elapsed_since(start_time), ', '.join(skip_subs))