def sub_monitor_check_post_old(self, submission, monitored_sub):
    if self.sub_monitor.has_post_been_checked(submission.id):
        log.debug('Post %s has already been checked', submission.id)
        return

    with self.uowm.start() as uow:
        post = uow.posts.get_by_post_id(submission.id)
        if not post:
            log.info('Post %s does exist, sending to ingest queue',
                     submission.id)
            post = submission_to_post(submission)
            celery.send_task(
                'redditrepostsleuth.core.celery.ingesttasks.save_new_post',
                args=[post],
                queue='postingest')
            return

    title_keywords = []
    if monitored_sub.title_ignore_keywords:
        title_keywords = monitored_sub.title_ignore_keywords.split(',')

    if not self.sub_monitor.should_check_post(
            post, title_keyword_filter=title_keywords):
        return
    self.sub_monitor.check_submission(submission, monitored_sub, post)
Пример #2
0
    def save_unknown_post(self, post_id: Text) -> Optional[Post]:
        """
        If we received a request on a post we haven't ingest save it
        :rtype: Optional[Post]
        :param post_id: Submission ID
        :return: Post object
        """
        submission = self.reddit.submission(post_id)
        try:
            post = pre_process_post(submission_to_post(submission), self.uowm,
                                    None)
        except InvalidImageUrlException:
            return
        except Forbidden:
            log.error('Failed to download post %s, appears we are banned',
                      post_id)
            return

        if not post or post.post_type != 'image':
            log.error(
                'Problem ingesting post.  Either failed to save or it is not an image'
            )
            return

        return post
Пример #3
0
def save_unknown_post(post_id: str, uowm: UnitOfWorkManager,
                      reddit: RedditManager) -> Post:
    """
    If we received a request on a post we haven't ingest save it
    :param submission: Reddit Submission
    :return:
    """
    submission = reddit.submission(post_id)
    post = pre_process_post(submission_to_post(submission), uowm, None)
    if not post or post.post_type != 'image':
        log.error(
            'Problem ingesting post.  Either failed to save or it is not an image'
        )
        return

    return post
Пример #4
0
 def ingest_new_posts(self):
     while True:
         sr = self.reddit.subreddit('all')
         try:
             while True:
                 try:
                     for submission in sr.stream.submissions():
                         log.debug('Saving post %s', submission.id)
                         post = submission_to_post(submission)
                         if not post.post_type:
                             post.post_type = post_type_from_url(post.url)
                             log.error('Last resort post type %s',
                                       post.post_type)
                             log.error(post.url)
                         save_new_post.apply_async((post, ),
                                                   queue='postingest')
                 except Forbidden as e:
                     pass
         except Exception as e:
             log.exception('INGEST THREAD DIED', exc_info=True)
Пример #5
0
    def save_unknown_post(self, post_id: str) -> Post:
        """
        If we received a request on a post we haven't ingest save it
        :param submission: Reddit Submission
        :return:
        """
        log.info('Post %s does not exist, attempting to ingest', post_id)
        submission = self.reddit.submission(post_id)
        post = None
        try:
            post = pre_process_post(submission_to_post(submission), self.uowm,
                                    None)
        except InvalidImageUrlException:
            log.error('Failed to ingest post %s.  URL appears to be bad',
                      post_id)
        if not post:
            log.error(
                'Problem ingesting post.  Either failed to save or it is not an image'
            )
            return

        return post
Пример #6
0
    def ingest_without_stream(self):
        seen_posts = []
        while True:
            try:
                if len(seen_posts) > 10000:
                    seen_posts = []
                try:
                    submissions = [
                        sub
                        for sub in self.reddit.subreddit('all').new(limit=500)
                    ]
                except ResponseException as e:
                    if e.response.status_code == 429:
                        log.error('Too many requests from IP.  Waiting')
                        time.sleep(60)
                        continue
                except Exception as e:
                    if 'code: 429' in str(e):
                        log.error('Too many requests from IP.  Waiting')
                        time.sleep(60)
                        continue

                log.debug('%s posts from API', len(submissions))
                for submission in submissions:
                    if submission.id in seen_posts:
                        continue
                    #log.debug('Saving post %s', submission.id)
                    post = submission_to_post(submission)
                    if not post.post_type:
                        post.post_type = post_type_from_url(post.url)
                        #log.debug('Last resort post type %s', post.post_type)
                        #log.debug(post.url)
                    save_new_post.apply_async((post, ), queue='postingest')
                    seen_posts.append(post.post_id)
            except Exception as e:
                log.exception('INGEST THREAD DIED', exc_info=True)