Пример #1
0
def test_get_by_id(praw, ids):
    list = a.get_by_ids(praw, ids)
    if list:
        list = [l for l in list]
        if len(list):
            print "Passed"
            return True
    print "Failed"
    return False
Пример #2
0
 def get_posts(self, lim):
     # first ask for posts
     try:
         posts = self.sub.mod.log(action="removelink", limit=lim)
         posts = [posts.next().target_fullname for i in range(lim)]
         return Actions.get_by_ids(self.praw, posts)
     except Exception, e:
         logging.error(str(e))
         if __debug__:
             logging.exception(e)
Пример #3
0
 def get_historial_posts(self, goto):
     """Scans the sub with more intensive detection of previously found reddit posts
     Allows for mass processing of past posts
     """
     last_id = None
     last_seen = datetime.datetime.now()
     posts = []
     if self.policy.Use_Reddit_Analytics_For_Historical_Scan:
         while last_seen > goto:
             raise NotImplementedError
             if last_id:
                 self.RA_params["after"] = last_id
             try:
                 data = requests.get("http://api.redditanalytics.com/getPosts", params=self.RA_params,
                                     headers=self.RA_headers)
                 json_data = json.loads(data.content)
                 ids = [post["name"] for post in json_data["data"]]
                 with DataBase.DataBaseWrapper(self.file) as db:
                     exists = db.reddit_exists(ids)
                 if exists is not None:
                     ids = [ids[i] for i in range(len(ids)) if not exists[i]]
                 else:
                     return None
                 if not len(ids):
                     continue
                 temp_posts = Actions.get_by_ids(self.praw, ids)
                 if temp_posts is not None:
                     posts.extend(temp_posts)
                 else:
                     return None
                 last_id = json_data["metadata"]["oldest_id"]
                 last_seen = datetime.datetime.fromtimestamp(json_data["metadata"]["oldest_date"])
             except ValueError, e:
                 if str(e).startswith("No JSON object"):
                     logging.error("Reddit-Analytics is down, retrying historical scan after pause...")
                 else:
                     logging.error(str(e))
                     if __debug__:
                         logging.exception(e)
                 #temporary fix to avoid endless waiting while RA is down
                 return []
             except Exception, e:
                 logging.error(str(e))
                 if __debug__:
                     logging.exception(e)
                 #temporary fix to avoid endless waiting while RA is down
                 return []
Пример #4
0
def main():
    cred = CRImport("credentials.cred")
    db = sqlite3.connect('database.db', detect_types=sqlite3.PARSE_DECLTYPES|sqlite3.PARSE_COLNAMES)
    cursor = db.cursor()
    post_list = [post[0] for post in cursor.execute('select short_url from reddit_record where submitter is null').fetchall()]
    praw = utilitymethods.create_multiprocess_praw(cred)
    reddit = utilitymethods.get_subreddit(cred, praw, 'listentothis')
    mods = [mod.name for mod in Actions.get_mods(praw, reddit)]
    stride = 100
    total_len = len(post_list)
    count = 0
    while len(post_list):
        num_loaded = min(stride, len(post_list))
        reddit_posts = Actions.get_by_ids(praw, post_list[:num_loaded])
        update_list = []
        print "{} / {}".format(count, total_len)
        count += stride
        for i, post in enumerate(reddit_posts):
            #check
            submitter = cursor.execute('select submitter from reddit_record where short_url = ?', (post_list[i],)).fetchone()[0]
            if submitter is not None:
                continue
            assert(post_list[i] == post.name)
            success = False
            while not success:
                try:
                    success = True
                    if Actions.is_deleted(post):
                        #check comments
                        found = False
                        for comment in post.comments:
                            if comment.distinguished == 'moderator':
                                if re.search(r'^(?:\*\*)?/u/', comment.body):
                                    search = re.search(r'^(?:\*\*)?/u/([\w\d_\-\*]+)[,\s]', comment.body)
                                    if search:
                                        found = True
                                        success = True
                                        update_list.append((search.group(1), post_list[i]))
                                        break
                                elif re.search(r'^All apologies /u/([\w\d_\-\*]+)[,\s]', comment.body):
                                    search = re.search(r'^All apologies /u/([\w\d_\-\*]+)[,\s]', comment.body)
                                    if search:
                                        found = True
                                        success = True
                                        update_list.append((search.group(1), post_list[i]))
                                        break
                                elif re.search(r'/u/([\w\d\*-_]+), your submission', comment.body):
                                    search = re.search(r'/u/([\w\d\*-_]+), your submission', comment.body)
                                    if search:
                                        found = True
                                        success = True
                                        update_list.append((search.group(1), post_list[i]))
                                        break
                                elif re.search(r'^Hey /u/([\w\d_\-\*]+)[,\s]', comment.body):
                                    search = re.search(r'^Hey /u/([\w\d_\-\*]+)[,\s]', comment.body)
                                    if search:
                                        found = True
                                        success = True
                                        update_list.append((search.group(1), post_list[i]))
                                        break
                                elif re.search(r'/u/([\w\d_\-\*]+)[,\s]', comment.body):
                                    search = re.search(r'/u/([\w\d_\-\*]+)[,\s]', comment.body)
                                    if search and 'evilnight' not in search.group(1):
                                        print comment.body
                                        print search.group(1)
                        if not found:
                            success = True
                            update_list.append((None, post_list[i]))
                    else:
                        success = True
                        update_list.append((post.author.name, post_list[i]))
                    if update_list[-1][0] is not None and update_list[-1][0].endswith(','):
                        print update_list[-1]
                except Exception, e:
                    success = False
                    time.sleep(1)
        assert (not any(val[0].endswith(',') for val in update_list if val[0] is not None))
        post_list = post_list[num_loaded:]

        cursor.executemany('update reddit_record set submitter = ? where short_url = ?', update_list)
        db.commit()
    def scan(self):
        """
        Scans the previously collected reddit posts for deleted posts
        """
        #scan old messages, see if deleted
        with DataBase.DataBaseWrapper(self.database_file, False) as db:
            now = datetime.datetime.now()
            global_strike_date = now - self.policy.Strike_Counter_Global_Strike_History
            history_date = now - self.policy.Strike_Counter_Scan_History
            entries = db.get_reddit(date_added=history_date, processed=0, return_dateadded=True)
            if entries is None:
                logging.warning(u"No reddit entries found in database...")
                return

            new_strike_channels = []
            #loop over entries
            stride = 100
            while len(entries):
                num_loaded = min(stride, len(entries))
                (ids, channels, domains, add_dates) = zip(*entries[:num_loaded])
                ids = list(ids)
                #see if we have submitters
                have_submitters = db.have_submitter(ids)
                #any new detected usernames go here
                new_submitters_list = []
                channels = list(channels)
                domains = list(domains)
                loaded = Actions.get_by_ids(self.praw, ids)
                if not loaded:
                    logging.info(u"Historical posts not loaded...")
                    return

                #make sure posts retrieved
                posts = [post for post in loaded]
                if not posts:
                    logging.info(u"Bad post retrieve")
                    return

                #make sure channels exist
                add_channels = []
                exists = db.channel_exists([(channel, domains[i]) for i, channel in enumerate(channels)])
                for i, e in enumerate(exists):
                    if not e:
                        #pull up the url
                        add_channels.append((channels[i], domains[i]))

                #resolve all the added ids
                if add_channels:
                    if not db.add_channels(add_channels):
                        logging.info(u"Error adding channels to channel_record, skipping processing of posts")
                        continue #if there was an error adding the channels, don't mark as processed


                #check for deleted / exceptions
                increment_posts = {}
                processed_posts = []
                excepted_posts = []
                for i, post in enumerate(posts):
                    if Actions.is_deleted(post):
                        if not have_submitters[i]:
                            val = self.check_for_submitter(post)
                            if val is not None:
                                new_submitters_list.append((val, ids[i]))
                        if not self.check_exception(post):
                            #self.policy.info(u"Deleted post found {}".format(post.name), u"channel = {}, domain = {}".format(channels[i], domains[i]))
                            if add_dates[i] > global_strike_date or self.recount_strikes:
                                if not (channels[i], domains[i]) in increment_posts:
                                    increment_posts[(channels[i], domains[i])] = 1
                                else:
                                    increment_posts[(channels[i], domains[i])] += 1
                            if not (channels[i], domains[i]) in new_strike_channels:
                                new_strike_channels.append((channels[i], domains[i]))
                        else:
                            excepted_posts.append(post.name)
                        processed_posts.append(post.name)

                if len(increment_posts):
                    #add strikes
                    db.add_strike([(increment_posts[key],) + key  for key in increment_posts])
                    if __debug__:
                        logging.info(u"Strike Counter found {} new deleted posts...".format(len(increment_posts)))

                if len(increment_posts) or len(excepted_posts):
                    #remove from consideration (so we don't count them over and over)
                    db.set_processed(processed_posts)
                    db.set_exception(excepted_posts)


                #update submitters
                if len(new_submitters_list):
                    db.update_submitter(new_submitters_list)

                #forget old entries
                entries = entries[num_loaded:]

            #check for rule breaking channels
            channels = db.get_channels(strike_count=self.policy.Strike_Count_Max, blacklist=Blacklist.BlacklistEnums.NotFound)

            if channels and len(channels):
                if __debug__:
                    logging.info(u"{} new channels added to the blacklist".format(len(channels)))
                db.set_blacklist(channels, Blacklist.BlacklistEnums.Blacklisted, self.owner.credentials['USERNAME'],
                                 u"Global strike count exceeded")

            #check for user strike counts
            user_strikes = db.max_processed_from_user(not_found_value=Blacklist.BlacklistEnums.NotFound,
                                                      strike_limit=self.policy.User_Strike_Count_Max)
            if len(user_strikes):
                reason_list = [u"User strike count exceeded by {} ({} strikes counted) for channel {} on domain {}" \
                               u"".format(user[1], user[0], user[2], user[3]) for user in user_strikes]
                new_blacklist = [(user[2], user[3]) for user in user_strikes]
                db.set_blacklist(new_blacklist, Blacklist.BlacklistEnums.Blacklisted, self.owner.credentials['USERNAME'],
                                 reason_list)

            #update global strike counts

            #find posts older than scan period marked as processed
            old_strikes = db.processed_older_than(global_strike_date, old_flag=0)
            if old_strikes is not None and len(old_strikes):
                decrement_count = {}
                for pair in old_strikes:
                    if not pair in decrement_count:
                        decrement_count[pair] = 0
                    decrement_count[pair] += 1

                #and remove them from the count
                db.subtract_strikes_and_mark([(decrement_count[pair],) + pair for pair in decrement_count], global_strike_date)

            #remove older than scan period
            db.remove_reddit_older_than(history_date)

            #turn off recount if true
            if self.recount_strikes:
                self.recount_strikes = False
                logging.info(u'Strike recount completed successfully')

            if __debug__:
                logging.info(u"Strike count completed successfully at {}".format(datetime.datetime.now()))