def check_exception(self, post):
     try:
         #check for link flair
         if post.link_flair_css_class is None:
             return True
     except:
         pass
     try:
         #check for removal reason
         if post.removal_reason is not None:
             return True
     except:
         pass
     #check top level comments for specific keyword matches
     try:
         success = True
         #check comments
         for comment in post.comments:
             #test comment
             if not Actions.is_deleted(comment) and comment.distinguished == 'moderator':
                 #test keyword
                 for exception in self.policy.exception_list:
                     if re.search(exception, comment.body):
                         return True
         return False
     except Exception, e:
         success = False
         time.sleep(1)
Пример #2
0
def main():
    cred = CRImport("credentials.cred")
    db = sqlite3.connect('database.db', detect_types=sqlite3.PARSE_DECLTYPES|sqlite3.PARSE_COLNAMES)
    cursor = db.cursor()
    post_list = [post[0] for post in cursor.execute('select short_url from reddit_record where submitter is null').fetchall()]
    praw = utilitymethods.create_multiprocess_praw(cred)
    reddit = utilitymethods.get_subreddit(cred, praw, 'listentothis')
    mods = [mod.name for mod in Actions.get_mods(praw, reddit)]
    stride = 100
    total_len = len(post_list)
    count = 0
    while len(post_list):
        num_loaded = min(stride, len(post_list))
        reddit_posts = Actions.get_by_ids(praw, post_list[:num_loaded])
        update_list = []
        print "{} / {}".format(count, total_len)
        count += stride
        for i, post in enumerate(reddit_posts):
            #check
            submitter = cursor.execute('select submitter from reddit_record where short_url = ?', (post_list[i],)).fetchone()[0]
            if submitter is not None:
                continue
            assert(post_list[i] == post.name)
            success = False
            while not success:
                try:
                    success = True
                    if Actions.is_deleted(post):
                        #check comments
                        found = False
                        for comment in post.comments:
                            if comment.distinguished == 'moderator':
                                if re.search(r'^(?:\*\*)?/u/', comment.body):
                                    search = re.search(r'^(?:\*\*)?/u/([\w\d_\-\*]+)[,\s]', comment.body)
                                    if search:
                                        found = True
                                        success = True
                                        update_list.append((search.group(1), post_list[i]))
                                        break
                                elif re.search(r'^All apologies /u/([\w\d_\-\*]+)[,\s]', comment.body):
                                    search = re.search(r'^All apologies /u/([\w\d_\-\*]+)[,\s]', comment.body)
                                    if search:
                                        found = True
                                        success = True
                                        update_list.append((search.group(1), post_list[i]))
                                        break
                                elif re.search(r'/u/([\w\d\*-_]+), your submission', comment.body):
                                    search = re.search(r'/u/([\w\d\*-_]+), your submission', comment.body)
                                    if search:
                                        found = True
                                        success = True
                                        update_list.append((search.group(1), post_list[i]))
                                        break
                                elif re.search(r'^Hey /u/([\w\d_\-\*]+)[,\s]', comment.body):
                                    search = re.search(r'^Hey /u/([\w\d_\-\*]+)[,\s]', comment.body)
                                    if search:
                                        found = True
                                        success = True
                                        update_list.append((search.group(1), post_list[i]))
                                        break
                                elif re.search(r'/u/([\w\d_\-\*]+)[,\s]', comment.body):
                                    search = re.search(r'/u/([\w\d_\-\*]+)[,\s]', comment.body)
                                    if search and 'evilnight' not in search.group(1):
                                        print comment.body
                                        print search.group(1)
                        if not found:
                            success = True
                            update_list.append((None, post_list[i]))
                    else:
                        success = True
                        update_list.append((post.author.name, post_list[i]))
                    if update_list[-1][0] is not None and update_list[-1][0].endswith(','):
                        print update_list[-1]
                except Exception, e:
                    success = False
                    time.sleep(1)
        assert (not any(val[0].endswith(',') for val in update_list if val[0] is not None))
        post_list = post_list[num_loaded:]

        cursor.executemany('update reddit_record set submitter = ? where short_url = ?', update_list)
        db.commit()
    def scan(self):
        """
        Scans the previously collected reddit posts for deleted posts
        """
        #scan old messages, see if deleted
        with DataBase.DataBaseWrapper(self.database_file, False) as db:
            now = datetime.datetime.now()
            global_strike_date = now - self.policy.Strike_Counter_Global_Strike_History
            history_date = now - self.policy.Strike_Counter_Scan_History
            entries = db.get_reddit(date_added=history_date, processed=0, return_dateadded=True)
            if entries is None:
                logging.warning(u"No reddit entries found in database...")
                return

            new_strike_channels = []
            #loop over entries
            stride = 100
            while len(entries):
                num_loaded = min(stride, len(entries))
                (ids, channels, domains, add_dates) = zip(*entries[:num_loaded])
                ids = list(ids)
                #see if we have submitters
                have_submitters = db.have_submitter(ids)
                #any new detected usernames go here
                new_submitters_list = []
                channels = list(channels)
                domains = list(domains)
                loaded = Actions.get_by_ids(self.praw, ids)
                if not loaded:
                    logging.info(u"Historical posts not loaded...")
                    return

                #make sure posts retrieved
                posts = [post for post in loaded]
                if not posts:
                    logging.info(u"Bad post retrieve")
                    return

                #make sure channels exist
                add_channels = []
                exists = db.channel_exists([(channel, domains[i]) for i, channel in enumerate(channels)])
                for i, e in enumerate(exists):
                    if not e:
                        #pull up the url
                        add_channels.append((channels[i], domains[i]))

                #resolve all the added ids
                if add_channels:
                    if not db.add_channels(add_channels):
                        logging.info(u"Error adding channels to channel_record, skipping processing of posts")
                        continue #if there was an error adding the channels, don't mark as processed


                #check for deleted / exceptions
                increment_posts = {}
                processed_posts = []
                excepted_posts = []
                for i, post in enumerate(posts):
                    if Actions.is_deleted(post):
                        if not have_submitters[i]:
                            val = self.check_for_submitter(post)
                            if val is not None:
                                new_submitters_list.append((val, ids[i]))
                        if not self.check_exception(post):
                            #self.policy.info(u"Deleted post found {}".format(post.name), u"channel = {}, domain = {}".format(channels[i], domains[i]))
                            if add_dates[i] > global_strike_date or self.recount_strikes:
                                if not (channels[i], domains[i]) in increment_posts:
                                    increment_posts[(channels[i], domains[i])] = 1
                                else:
                                    increment_posts[(channels[i], domains[i])] += 1
                            if not (channels[i], domains[i]) in new_strike_channels:
                                new_strike_channels.append((channels[i], domains[i]))
                        else:
                            excepted_posts.append(post.name)
                        processed_posts.append(post.name)

                if len(increment_posts):
                    #add strikes
                    db.add_strike([(increment_posts[key],) + key  for key in increment_posts])
                    if __debug__:
                        logging.info(u"Strike Counter found {} new deleted posts...".format(len(increment_posts)))

                if len(increment_posts) or len(excepted_posts):
                    #remove from consideration (so we don't count them over and over)
                    db.set_processed(processed_posts)
                    db.set_exception(excepted_posts)


                #update submitters
                if len(new_submitters_list):
                    db.update_submitter(new_submitters_list)

                #forget old entries
                entries = entries[num_loaded:]

            #check for rule breaking channels
            channels = db.get_channels(strike_count=self.policy.Strike_Count_Max, blacklist=Blacklist.BlacklistEnums.NotFound)

            if channels and len(channels):
                if __debug__:
                    logging.info(u"{} new channels added to the blacklist".format(len(channels)))
                db.set_blacklist(channels, Blacklist.BlacklistEnums.Blacklisted, self.owner.credentials['USERNAME'],
                                 u"Global strike count exceeded")

            #check for user strike counts
            user_strikes = db.max_processed_from_user(not_found_value=Blacklist.BlacklistEnums.NotFound,
                                                      strike_limit=self.policy.User_Strike_Count_Max)
            if len(user_strikes):
                reason_list = [u"User strike count exceeded by {} ({} strikes counted) for channel {} on domain {}" \
                               u"".format(user[1], user[0], user[2], user[3]) for user in user_strikes]
                new_blacklist = [(user[2], user[3]) for user in user_strikes]
                db.set_blacklist(new_blacklist, Blacklist.BlacklistEnums.Blacklisted, self.owner.credentials['USERNAME'],
                                 reason_list)

            #update global strike counts

            #find posts older than scan period marked as processed
            old_strikes = db.processed_older_than(global_strike_date, old_flag=0)
            if old_strikes is not None and len(old_strikes):
                decrement_count = {}
                for pair in old_strikes:
                    if not pair in decrement_count:
                        decrement_count[pair] = 0
                    decrement_count[pair] += 1

                #and remove them from the count
                db.subtract_strikes_and_mark([(decrement_count[pair],) + pair for pair in decrement_count], global_strike_date)

            #remove older than scan period
            db.remove_reddit_older_than(history_date)

            #turn off recount if true
            if self.recount_strikes:
                self.recount_strikes = False
                logging.info(u'Strike recount completed successfully')

            if __debug__:
                logging.info(u"Strike count completed successfully at {}".format(datetime.datetime.now()))