def sub_monitor_check_post(self, submission: Dict,
                           monitored_sub: MonitoredSub):
    if self.sub_monitor.has_post_been_checked(submission['id']):
        log.debug('Post %s has already been checked', submission['id'])
        return
    start = time.perf_counter()
    with self.uowm.start() as uow:
        post = uow.posts.get_by_post_id(submission['id'])
        if not post:
            log.info('Post %s does exist, sending to ingest queue',
                     submission['id'])
            post = pushshift_to_post(submission, source='reddit_json')
            celery.send_task(
                'redditrepostsleuth.core.celery.ingesttasks.save_new_post',
                args=[post],
                queue='postingest')
            return

    title_keywords = []
    if monitored_sub.title_ignore_keywords:
        title_keywords = monitored_sub.title_ignore_keywords.split(',')

    if not self.sub_monitor.should_check_post(
            post,
            monitored_sub.check_image_posts,
            monitored_sub.check_link_posts,
            title_keyword_filter=title_keywords):
        return

    self.sub_monitor.check_submission(monitored_sub, post)
    print(f'Total time: {round(time.perf_counter() - start, 5)}')
Exemplo n.º 2
0
    def _process_user_report(self, msg: Message):
        with self.uowm.start() as uow:
            existing = uow.user_report.get_first_by_message_id(msg.id)
            if existing:
                log.debug('Report %s has already been saved', msg.id)
                return

        report_data = self._load_msg_body_data(msg.body)
        if not report_data:
            log.info('Failed to get report data from message %s.  Not saving',
                     msg.id)
            if len(self.failed_checks) > 10000:
                self.failed_checks = []
            if msg.id not in self.failed_checks:
                self.failed_checks.append(msg.id)
            return

        report = UserReport(post_id=report_data['post_id'],
                            reported_by=msg.author.name,
                            report_type=msg.subject,
                            meme_template=report_data['meme_template'],
                            msg_body=msg.body,
                            message_id=msg.id,
                            sent_for_voting=False)

        with self.uowm.start() as uow:
            uow.user_report.add(report)
            uow.commit()

        self.response_handler.reply_to_private_message(msg, REPORT_RESPONSE)
def sub_monitor_check_post_old(self, submission, monitored_sub):
    if self.sub_monitor.has_post_been_checked(submission.id):
        log.debug('Post %s has already been checked', submission.id)
        return

    with self.uowm.start() as uow:
        post = uow.posts.get_by_post_id(submission.id)
        if not post:
            log.info('Post %s does exist, sending to ingest queue',
                     submission.id)
            post = submission_to_post(submission)
            celery.send_task(
                'redditrepostsleuth.core.celery.ingesttasks.save_new_post',
                args=[post],
                queue='postingest')
            return

    title_keywords = []
    if monitored_sub.title_ignore_keywords:
        title_keywords = monitored_sub.title_ignore_keywords.split(',')

    if not self.sub_monitor.should_check_post(
            post, title_keyword_filter=title_keywords):
        return
    self.sub_monitor.check_submission(submission, monitored_sub, post)
    def _final_meme_filter(self, searched_hash: Text,
                           matches: List[ImageSearchMatch],
                           target_hamming) -> List[ImageSearchMatch]:
        results = []
        log.debug('MEME FILTER - Filtering %s matches', len(matches))
        if len(matches) == 0:
            return matches

        for match in matches:
            try:
                match_hash = self._get_meme_hash(match.post.url)
            except Exception as e:
                log.error('Failed to get meme hash for %s', match.post.id)
                continue

            h_distance = hamming(searched_hash, match_hash)

            if h_distance > target_hamming:
                log.info(
                    'Meme Hamming Filter Reject - Target: %s Actual: %s - %s',
                    target_hamming, h_distance,
                    f'https://redd.it/{match.post.post_id}')
                continue
            log.debug('Match found: %s - H:%s',
                      f'https://redd.it/{match.post.post_id}', h_distance)
            match.hamming_distance = h_distance
            match.hash_size = len(searched_hash)
            results.append(match)

        return results
Exemplo n.º 5
0
    def _reply_to_comment(self, response: SummonsResponse) -> SummonsResponse:
        log.debug('Sending response to summons comment %s. MESSAGE: %s',
                  response.summons.comment_id, response.message)
        try:
            reply_comment = self.response_handler.reply_to_comment(
                response.summons.comment_id, response.message)
            response.comment_reply_id = reply_comment.id
        except APIException as e:
            if e.error_type == 'DELETED_COMMENT':
                log.debug('Comment %s has been deleted',
                          response.summons.comment_id)
                response.message = 'DELETED COMMENT'
            elif e.error_type == 'THREAD_LOCKED':
                log.info('Comment %s is in a locked thread',
                         response.summons.comment_id)
                response.message = 'THREAD LOCKED'
            elif e.error_type == 'TOO_OLD':
                log.info('Comment %s is too old to reply to',
                         response.summons.comment_id)
                response.message = 'TOO OLD'
            elif e.error_type == 'RATELIMIT':
                log.exception('PRAW Ratelimit exception', exc_info=False)
                raise
            else:
                log.exception('APIException without error_type', exc_info=True)
                raise
        except Exception:
            log.exception('Problem leaving response', exc_info=True)
            raise

        return response
Exemplo n.º 6
0
def update_ban_list(uowm: UnitOfWorkManager,
                    reddit: Reddit,
                    notification_svc: NotificationService = None) -> NoReturn:
    """
    Go through banned subs and see if we're still banned
    :rtype: NoReturn
    :param uowm: UnitOfWorkManager
    :param reddit: Reddit
    """
    log.info('Starting Job: Update Subreddit Bans')
    with uowm.start() as uow:
        bans = uow.banned_subreddit.get_all()
        for ban in bans:
            last_checked_delta = (datetime.utcnow() - ban.last_checked).days
            if last_checked_delta < 1:
                log.debug('Banned sub %s last checked %s days ago.  Skipping',
                          ban.subreddit, last_checked_delta)
                continue
            if is_bot_banned(ban.subreddit, reddit):
                log.info('[Subreddit Ban Check] Still banned on %s',
                         ban.subreddit)
                ban.last_checked = func.utc_timestamp()
            else:
                log.info('[Subreddit Ban Check] No longer banned on %s',
                         ban.subreddit)
                uow.banned_subreddit.remove(ban)
                if notification_svc:
                    notification_svc.send_notification(
                        f'Removed {ban.subreddit} from ban list',
                        subject='Subreddit Removed From Ban List!')
            uow.commit()
 def filter_title(match: SearchMatch):
     for kw in keywords:
         log.info('Title: %s - KW: %s', match.post.title, kw)
         if kw in match.post.title.lower():
             log.debug('Title Filter Reject. Title contains %s', kw)
             return False
     return True
 def sub_filter(match: SearchMatch):
     if match.post.subreddit != subreddit:
         log.debug('Same Sub Reject: Orig sub: %s - Match Sub: %s - %s',
                   subreddit, match.post.subreddit,
                   f'https://redd.it/{match.post.post_id}')
         return False
     return True
Exemplo n.º 9
0
 def _update_wiki_page(self, wiki_page: WikiPage,
                       new_config: Dict) -> NoReturn:
     log.info('Writing new config to %s', wiki_page.subreddit.display_name)
     log.debug('New Config For %s: %s', wiki_page.subreddit.display_name,
               new_config)
     # TODO - Check what exceptions can be thrown here
     wiki_page.edit(json.dumps(new_config))
    def _offer_watch(self, submission: Submission) -> NoReturn:
        """
        Offer to add watch to OC post
        :param search:
        """
        if not self.config.top_post_offer_watch:
            log.debug('Top Post Offer Watch Disabled')
            return

        log.info('Offer watch to %s on post %s', submission.author.name,
                 submission.id)

        with self.uowm.start() as uow:
            existing_response = uow.bot_private_message.get_by_user_source_and_post(
                submission.author.name, 'toppost', submission.id)

        if existing_response:
            log.info('Already sent a message to %s', submission.author.name)
            return

        try:
            self.response_handler.send_private_message(
                submission.author,
                TOP_POST_WATCH_BODY.format(
                    shortlink=f'https://redd.it/{submission.id}'),
                subject=TOP_POST_WATCH_SUBJECT,
                source='toppost',
                post_id=submission.id)
        except APIException as e:
            if e.error_type == 'NOT_WHITELISTED_BY_USER_MESSAGE':
                log.error('Not whitelisted API error')
            else:
                log.exception('Unknown error sending PM to %s',
                              submission.author.name,
                              exc_info=True)
 def hamming_filter(match: ImageSearchMatch):
     if match.hamming_distance <= target_hamming_distance:
         return True
     log.debug('Hamming Filter Reject - Target: %s Actual: %s - %s',
               target_hamming_distance, match.hamming_distance,
               f'https://redd.it/{match.post.post_id}')
     return False
Exemplo n.º 12
0
def get_link_reposts(
    url: Text,
    uowm: UnitOfWorkManager,
    search_settings: SearchSettings,
    post: Post = None,
    get_total: bool = False,
) -> LinkSearchResults:

    url_hash = md5(url.encode('utf-8'))
    url_hash = url_hash.hexdigest()
    with uowm.start() as uow:
        search_results = LinkSearchResults(url,
                                           search_settings,
                                           checked_post=post,
                                           search_times=LinkSearchTimes())
        search_results.search_times.start_timer('query_time')
        search_results.search_times.start_timer('total_search_time')
        raw_results = uow.posts.find_all_by_url_hash(url_hash)
        search_results.search_times.stop_timer('query_time')
        log.debug('Query time: %s', search_results.search_times.query_time)
        search_results.matches = [
            SearchMatch(url, match) for match in raw_results
        ]

        if get_total:
            search_results.total_searched = uow.posts.count_by_type('link')

    return search_results
def cross_post_filter(match: SearchMatch) -> bool:
    if match.post.crosspost_parent:
        log.debug('Crosspost Filter Reject - %s',
                  f'https://redd.it/{match.post.post_id}')
        return False
    else:
        return True
Exemplo n.º 14
0
    def _reply_to_submission(self, submission_id: str, comment_body) -> Optional[Comment]:
        submission = self.reddit.submission(submission_id)
        if not submission:
            log.error('Failed to get submission %s', submission_id)
            return

        try:
            start_time = perf_counter()
            comment = submission.reply(comment_body)
            self._record_api_event(
                float(round(perf_counter() - start_time, 2)),
                'reply_to_submission',
                self.reddit.reddit.auth.limits['remaining']
            )
            log.info('Left comment at: https://reddit.com%s', comment.permalink)
            log.debug(comment_body)
            self._log_response(comment)
            return comment
        except APIException as e:
            if e.error_type == 'RATELIMIT':
                log.exception('Reddit rate limit')
                raise RateLimitException('Hit rate limit')
            else:
                log.exception('Unknown error type of APIException', exc_info=True)
                raise
        except Forbidden:
            self._save_banned_sub(submission.subreddit.display_name)
        except Exception:
            log.exception('Unknown exception leaving comment on post https://redd.it/%s', submission_id, exc_info=True)
            raise
 def days_filter(match: SearchMatch):
     if (datetime.utcnow() - match.post.created_at).days > cutoff_days:
         log.debug('Date Cutoff Reject: Target: %s Actual: %s - %s',
                   cutoff_days,
                   (datetime.utcnow() - match.post.created_at).days,
                   f'https://redd.it/{match.post.post_id}')
         return False
     return True
 def date_filter(match: SearchMatch):
     if match.post.created_at >= cutoff_date:
         log.debug('Date Filter Reject: Target: %s Actual: %s - %s',
                   cutoff_date.strftime('%Y-%d-%m %H:%M:%S'),
                   match.post.created_at.strftime('%Y-%d-%m %H:%M:%S'),
                   f'https://redd.it/{match.post.post_id}')
         return False
     return True
Exemplo n.º 17
0
 def save_event(self, event: InfluxEvent):
     log.debug('Unsaved events %s', len(self._unsaved_events))
     if not self.can_save():
         log.info('Event logging disabled until %s', self._retry_time)
         self._unsaved_events.append(event)
         return
     self._write_to_influx(event)
     self._flush_unsaved()
Exemplo n.º 18
0
 def send_notification(self, msg: Text, **kwargs) -> NoReturn:
     for agent in self.notification_agents:
         log.info('Sending notification to %s', agent.name)
         log.debug(msg)
         try:
             agent.send(msg, **kwargs)
         except Exception as e:
             log.exception('Failed to send notification', exc_info=True)
Exemplo n.º 19
0
def save_new_post(self, post):
    with self.uowm.start() as uow:
        existing = uow.posts.get_by_post_id(post.post_id)
        if existing:
            return
        log.debug('Post %s: Ingesting', post.post_id)
        post = pre_process_post(post, self.uowm, self.config.image_hash_api)
        if post:
            ingest_repost_check.apply_async((post,self.config), queue='repost')
            log.debug('Post %s: Sent post to repost queue', post.post_id)
 def _return_redditor(self, username: Text) -> Redditor:
     for redditor in self._redditors:
         if redditor.name == username:
             log.debug('Returning cached redditor %s', redditor.name)
             return redditor
     new_redditor = self.reddit.redditor(username)
     if new_redditor:
         self._redditors.append(new_redditor)
         log.debug('Returning new redditor %s', username)
         return new_redditor
 def _return_submission(self, submission_id: Text) -> Submission:
     for submission in self._submissions:
         if submission.id == submission_id:
             log.debug('Returning cached submission %s', submission_id)
             return submission
     new_submission = self.reddit.submission(submission_id)
     if new_submission:
         self._submissions.append(new_submission)
         log.debug('Returning new submission %s', submission_id)
         return new_submission
 def _return_comment(self, comment_id: Text) -> Comment:
     for comment in self._comments:
         if comment.id == comment_id:
             log.debug('Returning cached comment %s', comment_id)
             return comment
     new_comment = self.reddit.comment(comment_id)
     log.debug('Returning new comment %s', comment_id)
     if new_comment:
         self._comments.append(new_comment)
         return new_comment
 def _return_subreddit(self, sub_name: Text) -> Subreddit:
     for sub in self._subreddits:
         if sub.display_name == sub_name:
             log.debug('Returning cached sub %s', sub_name)
             return sub
     new_sub = self.reddit.subreddit(sub_name)
     if new_sub:
         log.debug('Returning new subreddit %s', sub_name)
         self._subreddits.append(new_sub)
         return new_sub
Exemplo n.º 24
0
def save_pushshift_results_archive(self, data):
    with self.uowm.start() as uow:
        for submission in data:
            existing = uow.posts.get_by_post_id(submission['id'])
            if existing:
                log.debug('Skipping pushshift post: %s', submission['id'])
                continue
            post = pushshift_to_post(submission)
            log.debug('Saving pushshift post: %s', submission['id'])
            save_new_post.apply_async((post,), queue='pushshift_ingest')
    def monitor_for_mentions(self):
        bad_mentions = []
        while True:
            try:
                for comment in self.reddit.inbox.mentions():
                    if comment.created_utc < datetime.utcnow().timestamp(
                    ) - 86400:
                        log.debug('Skipping old mention. Created at %s',
                                  datetime.fromtimestamp(comment.created_utc))
                        continue

                    if comment.author.name.lower() in [
                            'sneakpeekbot', 'automoderator'
                    ]:
                        continue

                    if comment.id in bad_mentions:
                        continue

                    with self.uowm.start() as uow:
                        existing_summons = uow.summons.get_by_comment_id(
                            comment.id)
                        if existing_summons:
                            log.debug('Skipping existing mention %s',
                                      comment.id)
                            continue
                        summons = Summons(
                            post_id=comment.submission.id,
                            comment_id=comment.id,
                            comment_body=comment.body.replace('\\', ''),
                            summons_received_at=datetime.fromtimestamp(
                                comment.created_utc),
                            requestor=comment.author.name,
                            subreddit=comment.subreddit.display_name)
                        uow.summons.add(summons)
                        try:
                            uow.commit()
                        except DataError as e:
                            log.error('SQLAlchemy Data error saving comment')
                            bad_mentions.append(comment.id)
                            continue
            except ResponseException as e:
                if e.response.status_code == 429:
                    log.error('IP Rate limit hit.  Waiting')
                    time.sleep(60)
                    continue
            except AssertionError as e:
                if 'code: 429' in str(e):
                    log.error('Too many requests from IP.  Waiting')
                    time.sleep(60)
                    return
            except Exception as e:
                log.exception('Mention monitor failed', exc_info=True)

            time.sleep(20)
Exemplo n.º 26
0
def link_repost_check(self, posts, ):
    with self.uowm.start() as uow:
        for post in posts:
            """
            if post.url_hash == '540f1167d27dcca2ea2772443beb5c79':
                continue
            """
            if post.url_hash in self.link_blacklist:
                log.info('Skipping blacklisted URL hash %s', post.url_hash)
                continue

            log.debug('Checking URL for repost: %s', post.url_hash)
            search_results = get_link_reposts(post.url, self.uowm, get_default_link_search_settings(self.config),
                                              post=post)

            if len(search_results.matches) > 10000:
                log.info('Link hash %s shared %s times. Adding to blacklist', post.url_hash, len(search_results.matches))
                self.link_blacklist.append(post.url_hash)
                self.notification_svc.send_notification(f'URL has been shared {len(search_results.matches)} times. Adding to blacklist. \n\n {post.url}')

            search_results = filter_search_results(
                search_results,
                uitl_api=f'{self.config.util_api}/maintenance/removed'
            )
            search_results.search_times.stop_timer('total_search_time')
            log.info('Link Query Time: %s', search_results.search_times.query_time)
            if not search_results.matches:
                log.debug('Not matching linkes for post %s', post.post_id)
                post.checked_repost = True
                uow.posts.update(post)
                uow.commit()
                continue

            log.info('Found %s matching links', len(search_results.matches))
            log.info('Creating Link Repost. Post %s is a repost of %s', post.post_id, search_results.matches[0].post.post_id)
            repost_of = search_results.matches[0].post
            new_repost = LinkRepost(post_id=post.post_id, repost_of=repost_of.post_id, author=post.author, source='ingest', subreddit=post.subreddit)
            repost_of.repost_count += 1
            post.checked_repost = True
            uow.posts.update(post)
            uow.link_repost.add(new_repost)

            try:
                uow.commit()
                self.event_logger.save_event(RepostEvent(event_type='repost_found', status='success',
                                                         repost_of=search_results.matches[0].post.post_id,
                                                         post_type=post.post_type))
            except IntegrityError as e:
                uow.rollback()
                log.exception('Error saving link repost', exc_info=True)
                self.event_logger.save_event(RepostEvent(event_type='repost_found', status='error',
                                                         repost_of=search_results.matches[0].post.post_id,
                                                         post_type=post.post_type))
        self.event_logger.save_event(
            BatchedEvent(event_type='repost_check', status='success', count=len(posts), post_type='link'))
Exemplo n.º 27
0
 def reply_to_comment(self,
                      comment_id: Text,
                      comment_body: Text,
                      subreddit: Text = None) -> Optional[Comment]:
     if self.live_response:
         return self._reply_to_comment(comment_id,
                                       comment_body,
                                       subreddit=subreddit)
     log.debug('Live response disabled')
     # TODO - 1/12/2021 - Sketchy at best
     return Comment(self.reddit.reddit, id='1111')
 def _remove_duplicates(
         self, matches: List[ImageSearchMatch]) -> List[ImageSearchMatch]:
     log.debug('Remove duplicates from %s matches', len(matches))
     results = []
     for a in matches:
         match = next((x for x in results if x.post.id == a.post.id), None)
         if match:
             continue
         results.append(a)
     log.debug('%s matches after duplicate removal', len(results))
     return results
Exemplo n.º 29
0
def create_image_posts(
        post: Post) -> Tuple[Post, RedditImagePost, RedditImagePostCurrent]:
    """
    Since we need to store multiple copies of an image post for the multiple indexes, this function creates all in one shot
    :param post: Post obj
    """
    image_post = post_to_image_post(post)
    image_post_current = post_to_image_post_current(post)
    log.debug('Post %s: Created image_post and image_post_current',
              post.post_id)
    return post, image_post, image_post_current
Exemplo n.º 30
0
def filter_dead_urls(match: SearchMatch) -> bool:
    try:
        headers = {'User-Agent': random.choice(USER_AGENTS)}
        r = requests.head(match.post.url, timeout=3, headers=headers)
    except (ConnectionError, SSLError, ReadTimeout):
        return False
    if r.status_code == 200:
        return True
    else:
        log.debug('Active URL Reject:  https://redd.it/%s', match.post.post_id)
        return False