Exemplo n.º 1
0
    def callback(self, response):
        """will fetch the feed and interprete results (304, etag) or will
        challenge jarr to compare gotten entries with existing ones"""
        try:
            response = response.result()
            response.raise_for_status()
        except Exception as error:
            error_count = self.feed['error_count'] + 1
            logger.exception('%r %r - an error occured while fetching '
                    'feed; bumping  error count to %r',
                    self.feed['id'], self.feed['title'], error_count)
            future = self.query_jarr('put', 'feed/%d' % self.feed['id'],
                                      {'error_count': error_count,
                                       'last_error': str(error),
                                       'user_id': self.feed['user_id']})
            return

        if response.status_code == 304:
            logger.info("%r %r - feed responded with 304",
                        self.feed['id'], self.feed['title'])
            self.clean_feed()
            return
        if 'etag' not in response.headers:
            logger.debug('%r %r - manually generating etag',
                         self.feed['id'], self.feed['title'])
            response.headers['etag'] = 'jarr/"%s"' % to_hash(response.text)
        if response.headers['etag'] and self.feed['etag'] \
                and response.headers['etag'] == self.feed['etag']:
            if 'jarr' in self.feed['etag']:
                logger.info("%r %r - calculated hash matches (%d)",
                            self.feed['id'], self.feed['title'],
                            response.status_code)
            else:
                logger.info("%r %r - feed responded with same etag (%d)",
                            self.feed['id'], self.feed['title'],
                            response.status_code)
            self.clean_feed()
            return
        else:
            logger.debug('%r %r - etag mismatch %r != %r',
                         self.feed['id'], self.feed['title'],
                         response.headers['etag'], self.feed['etag'])
        logger.info('%r %r - cache validation failed, challenging entries',
                    self.feed['id'], self.feed['title'])

        ids, entries = [], {}
        parsed_response = feedparser.parse(response.content)
        for entry in parsed_response['entries']:
            entry_ids = extract_id(entry)
            entry_ids['feed_id'] = self.feed['id']
            entry_ids['user_id'] = self.feed['user_id']
            entries[tuple(sorted(entry_ids.items()))] = entry
            ids.append(entry_ids)
        logger.debug('%r %r - found %d entries %r',
                     self.feed['id'], self.feed['title'], len(ids), ids)
        future = self.query_jarr('get', 'articles/challenge', {'ids': ids})
        updater = JarrUpdater(self.feed, entries, response.headers,
                               parsed_response,
                               self.auth, self.pool, self.session)
        future.add_done_callback(updater.callback)
Exemplo n.º 2
0
async def insert_database(user, feed):

    articles = await parse_feed(user, feed)
    if None is articles:
        return []

    logger.debug('inserting articles for {}'.format(feed.title))

    logger.info("Database insertion...")
    new_articles = []
    art_contr = ArticleController(user.id)
    for article in articles:
        existing_article_req = art_contr.read(feed_id=feed.id,
                        **extract_id(article))
        exist = existing_article_req.count() != 0
        if exist:
            existing_article = existing_article_req.first()
            is_updated = False
            logger.debug("Article %r (%r) already in the database.",
                         article['title'], article['link'])
            content = get_article_content(article)
            if existing_article.title != article['title']:
                existing_article.title = article['title']
                is_updated = True
            if existing_article.content != content:
                existing_article.content = content
                existing_article.readed = False
                is_updated = True
            if is_updated:
                art_contr.update({'entry_id': existing_article.entry_id},
                                 existing_article.dump())
            continue
        article = construct_article(article, feed.dump())
        try:
            new_articles.append(art_contr.create(**article))
            logger.info("New article % (%r) added.",
                        article['title'], article['link'])
        except Exception:
            logger.exception("Error when inserting article in database:")
            continue
    return new_articles
Exemplo n.º 3
0
async def insert_database(user, feed):

    articles = await parse_feed(user, feed)
    if None is articles:
        return []

    logger.debug('inserting articles for {}'.format(feed.title))

    logger.info("Database insertion...")
    new_articles = []
    art_contr = ArticleController(user.id)
    for article in articles:
        existing_article_req = art_contr.read(feed_id=feed.id,
                                              **extract_id(article))
        exist = existing_article_req.count() != 0
        if exist:
            existing_article = existing_article_req.first()
            is_updated = False
            logger.debug("Article %r (%r) already in the database.",
                         article['title'], article['link'])
            content = get_article_content(article)
            if existing_article.title != article['title']:
                existing_article.title = article['title']
                is_updated = True
            if existing_article.content != content:
                existing_article.content = content
                existing_article.readed = False
                is_updated = True
            if is_updated:
                art_contr.update({'entry_id': existing_article.entry_id},
                                 existing_article.dump())
            continue
        article = construct_article(article, feed.dump())
        try:
            new_articles.append(art_contr.create(**article))
            logger.info("New article % (%r) added.", article['title'],
                        article['link'])
        except Exception:
            logger.exception("Error when inserting article in database:")
            continue
    return new_articles
Exemplo n.º 4
0
    def callback(self, response):
        """will fetch the feed and interprete results (304, etag) or will
        challenge jarr to compare gotten entries with existing ones"""
        try:
            response = response.result()
            response.raise_for_status()
        except Exception as error:
            error_count = self.feed['error_count'] + 1
            logger.warn(
                '%r %r - an error occured while fetching '
                'feed; bumping  error count to %r', self.feed['id'],
                self.feed['title'], error_count)
            future = self.query_jarr(
                'put', 'feed/%d' % self.feed['id'], {
                    'error_count': error_count,
                    'last_error': str(error),
                    'user_id': self.feed['user_id']
                })
            return

        if response.status_code == 304:
            logger.info("%r %r - feed responded with 304", self.feed['id'],
                        self.feed['title'])
            self.clean_feed()
            return
        if 'etag' not in response.headers:
            logger.debug('%r %r - manually generating etag', self.feed['id'],
                         self.feed['title'])
            response.headers['etag'] = 'jarr/"%s"' % to_hash(response.text)
        if response.headers['etag'] and self.feed['etag'] \
                and response.headers['etag'] == self.feed['etag']:
            if 'jarr' in self.feed['etag']:
                logger.info("%r %r - calculated hash matches (%d)",
                            self.feed['id'], self.feed['title'],
                            response.status_code)
            else:
                logger.info("%r %r - feed responded with same etag (%d)",
                            self.feed['id'], self.feed['title'],
                            response.status_code)
            self.clean_feed()
            return
        else:
            logger.debug('%r %r - etag mismatch %r != %r', self.feed['id'],
                         self.feed['title'], response.headers['etag'],
                         self.feed['etag'])
        logger.info('%r %r - cache validation failed, challenging entries',
                    self.feed['id'], self.feed['title'])

        ids, entries = [], {}
        parsed_response = feedparser.parse(response.content)
        for entry in parsed_response['entries']:
            entry_ids = extract_id(entry)
            entry_ids['feed_id'] = self.feed['id']
            entry_ids['user_id'] = self.feed['user_id']
            entries[tuple(sorted(entry_ids.items()))] = entry
            ids.append(entry_ids)
        logger.debug('%r %r - found %d entries %r', self.feed['id'],
                     self.feed['title'], len(ids), ids)
        future = self.query_jarr('get', 'articles/challenge', {'ids': ids})
        updater = JarrUpdater(self.feed, entries, response.headers,
                              parsed_response, self.auth)
        future.add_done_callback(updater.callback)