def _process_article(self, source, rsp): try: if rsp is None: return if not rsp: info = 'download error: %s' % rsp.request.url logging.error(info) raise Exception(info) if not rsp.content: info = 'rsp no content: %s' % rsp.request.url logging.error(info) raise Exception(info) data = parse_data(rsp.content) if not data.get('content'): return data['origin_url'] = rsp.url article = Article(source=source, category=source, rss_url=rsp.request.url) gen_article_data(source, article, data) try: article.save() except (ArticleExsit, NotUniqueError): info = 'duplicated: %s' % article.source_url logging.info(info) return if article.top_images: article.top_images = download_images([image['url'] for image in article.top_images], article.seq_id) if article.related_images: article.related_images = download_images([image['url'] for image in article.related_images], article.seq_id) article.attach_feature(ArticleParser(article.text).gen_content_feature()) article.set_usable() article.save() article.warm(warm_conn) except: raven_client.captureException()
def save_to_db(self, item): url = item['url'] data = item['data'] if not data['type'] == 'article': return item text = data.get('text') if not text or len(text) < 140: return item try: article = Article.objects.get(source_url=url) gen_article_data(article, data) try: article.save() except (ArticleExsit, NotUniqueError): info = 'duplicated: %s' % article.source_url logging.info(info) return if article.top_images: article.top_images = download_images([image['url'] for image in article.top_images], article.seq_id) if article.related_images: article.related_images = download_images([image['url'] for image in article.related_images], article.seq_id) article.attach_feature(ArticleParser(article.text).gen_content_feature()) article.set_usable() article.save() article.warm(warm_conn) except: raven_client.captureException()