def test_ListFeedResource_get(self): resp = self.jarr_client('get', 'feeds') self.assertStatusCode(401, resp) feeds_u1 = self.jarr_client('get', 'feeds', user='******').json feeds_u2 = self.jarr_client('get', 'feeds', user='******').json feeds_u1 = [f['id'] for f in feeds_u1] feeds_u2 = [f['id'] for f in feeds_u2] self.assertFalse(set(feeds_u1).intersection(feeds_u2)) # testing time formating feed = self.jarr_client('get', 'feeds', user='******').json[0] now = utc_now() FeedController().update({'id': feed['id']}, {'last_retrieved': now}) json = self._get(feed['id'], 'user1') self.assertEqual(json['last_retrieved'], now.isoformat()) FeedController().update({'id': feed['id']}, {'last_retrieved': now.replace(tzinfo=None)}) json = self._get(feed['id'], 'user1') self.assertEqual(json['last_retrieved'], now.isoformat()) FeedController().update({'id': feed['id']}, {'last_retrieved': now.astimezone(timezone(timedelta(hours=12)))}) json = self._get(feed['id'], 'user1') self.assertEqual(json['last_retrieved'], now.isoformat())
def test_cluster_same_feed(self): article = ArticleController().read(category_id__ne=None).first() cluster = article.cluster # all is enabled, article in cluster update_on_all_objs(articles=cluster.articles, cluster_enabled=True, cluster_same_feed=True) article = self.create_article_from(cluster, cluster.main_article.feed) self.assertInCluster(article, cluster) # feed's disabled, won't cluster FeedController().update( {'id__in': [a.feed_id for a in cluster.articles]}, {'cluster_same_feed': False}) article = self.create_article_from(cluster, cluster.main_article.feed) self.assertNotInCluster(article, cluster) # category's disabled, won't cluster FeedController().update( {'id__in': [a.feed_id for a in cluster.articles]}, {'cluster_same_feed': None}) CategoryController().update({'id': cluster.main_article.category.id}, {'cluster_same_feed': False}) article = self.create_article_from(cluster, cluster.main_article.feed) self.assertNotInCluster(article, cluster) # user's disable, won't cluster CategoryController().update({'id': cluster.main_article.category.id}, {'cluster_same_feed': None}) UserController().update({'id': cluster.user_id}, {'cluster_same_feed': False}) article = self.create_article_from(cluster, cluster.main_article.feed) self.assertNotInCluster(article, cluster) # reenabling user, will cluster UserController().update({'id': cluster.user_id}, {'cluster_same_feed': True}) article = self.create_article_from(cluster, cluster.main_article.feed) self.assertInCluster(article, cluster)
def test_articles_with_enclosure_and_fetched_content(self, truncated_cnt, get_vector): self._clean_objs() get_vector.return_value = None truncated_cnt.return_value = {'type': 'fetched', 'title': 'holy grail', 'content': 'blue, no read, aaah', 'link': 'https://monthy.python/brian'} feed = FeedController().read().first() FeedController().update({'id': feed.id}, {'truncated_content': True, 'cluster_enabled': True}) UserController().update({'id': feed.user_id}, {'cluster_enabled': True}) builder = ClassicArticleBuilder(feed, self.entry_w_enclosure, {}) self.assertIsNone(builder.article.get('article_type')) raw_articles = list(builder.enhance()) self.assertEqual(2, len(raw_articles)) self.assertEqual('audio', raw_articles[1]['article_type'].value) articles = [] for raw_article in raw_articles: articles.append( ArticleController(feed.user_id).create(**raw_article)) ClusterController(feed.user_id).clusterize_pending_articles() a1 = ArticleController().get(id=articles[0].id) a2 = ArticleController().get(id=articles[1].id) self.assertEqual(a1.cluster_id, a2.cluster_id) cluster = ClusterController().get(id=a1.cluster_id) self.assertEqual(2, cluster.content['v']) self.assertEqual(1, len(cluster.content['contents'])) self.assertEqual('fetched', cluster.content['contents'][0]['type'])
def test_admin_update_cluster_on_change_title(self): feed = ClusterController(2).read()[0].main_article.feed for cluster in feed.clusters: self.assertEqual(feed.title, cluster.main_feed_title) FeedController().update({'id': feed.id}, {'title': 'updated title'}) feed = FeedController().get(id=feed.id) self.assertEqual('updated title', feed.title) for cluster in feed.clusters: self.assertEqual(feed.title, cluster.main_feed_title)
def test_cluster_enabled(self): ccontr = ClusterController() cluster = ccontr.read().first() feed = FeedController(cluster.user_id).read( category_id__ne=None, id__nin=[art.feed_id for art in cluster.articles]).first() category = feed.category # clustering works when all is true update_on_all_objs(articles=cluster.articles, feeds=[feed], cluster_enabled=True) article = self.create_article_from(cluster, feed) self.assertInCluster(article, cluster) # disabling on user desactivate all clustering by default update_on_all_objs(articles=cluster.articles, feeds=[feed], cluster_enabled=None) UserController().update({'id': cluster.user_id}, {'cluster_enabled': False}) article = self.create_article_from(cluster, feed) self.assertNotInCluster(article, cluster) # disabling on article's feed prevents from clustering update_on_all_objs(articles=cluster.articles, feeds=[feed], cluster_enabled=True) FeedController().update({'id': feed.id}, {'cluster_enabled': False}) article = self.create_article_from(cluster, feed) self.assertNotInCluster(article, cluster) # disabling on feed from cluster's articles prevents from clustering update_on_all_objs(articles=cluster.articles, feeds=[feed], cluster_enabled=True) FeedController().update( {'id__in': [a.feed_id for a in cluster.articles]}, {'cluster_enabled': False}) article = self.create_article_from(cluster, feed) self.assertNotInCluster(article, cluster) # disabling on article's category prevents from clustering CategoryController(cluster.user_id).update({'id': category.id}, {'cluster_enabled': False}) article = self.create_article_from(cluster, feed) self.assertNotInCluster(article, cluster) update_on_all_objs(articles=cluster.articles, feeds=[feed], cluster_enabled=True) article = self.create_article_from(cluster, feed) self.assertInCluster(article, cluster)
def test_adding_to_cluster_by_link(self): ccontr = ClusterController() cluster = ccontr.read().first() ccontr.update({'id': cluster.id}, { 'read': True, 'read_reason': 'marked' }) cluster = ccontr.get(id=cluster.id) self.assertTrue(cluster.read) article = cluster.articles[0] articles_count = len(cluster.articles) fcontr = FeedController(cluster.user_id) acontr = ArticleController(cluster.user_id) fcontr.update({'id': article.feed_id}, {'cluster_wake_up': True}) feed = fcontr.read(id__ne=article.feed_id).first() update_on_all_objs(articles=[article], feeds=[feed], cluster_enabled=True) self._clone_article(acontr, article, feed) ccontr.clusterize_pending_articles() cluster = ccontr.get(id=cluster.id) self.assertEqual(articles_count + 1, len(cluster.articles)) self.assertFalse(cluster.read)
def _test_unread_on_cluster(self, read_reason): ccontr = ClusterController() fcontr = FeedController() cluster = ccontr.read().first() clusterizer = Clusterizer() self.assertFalse(clusterizer.get_config(cluster, 'cluster_enabled')) self.assertTrue(clusterizer.get_config(cluster, 'cluster_wake_up')) ccontr.update({'id': cluster.id}, { 'read': True, 'read_reason': read_reason }) target_feed = fcontr.read(id__ne=cluster.main_article.feed_id, user_id=cluster.user_id).first() clusterizer = Clusterizer() self.assertFalse(clusterizer.get_config(target_feed, 'cluster_enabled')) fcontr.update( {'id__in': [f.id for f in cluster.feeds] + [target_feed.id]}, { 'cluster_wake_up': True, 'cluster_enabled': True }) clusterizer = Clusterizer() self.assertTrue(clusterizer.get_config(cluster, 'cluster_enabled')) target_feed = fcontr.read(id__ne=cluster.main_article.feed_id, user_id=cluster.user_id).first() article = self._clone_article(ArticleController(), cluster.main_article, target_feed) clusterizer = Clusterizer() self.assertTrue(clusterizer.get_config(article, 'cluster_wake_up')) ClusterController(cluster.user_id).clusterize_pending_articles() self.assertEqual(2, len(article.cluster.articles)) self.assertInCluster(article, cluster) return ccontr.get(id=cluster.id)
def scheduler(): logger.warning("Running scheduler") start = datetime.now() fctrl = FeedController() # browsing feeds to fetch feeds = list(fctrl.list_fetchable(conf.crawler.batch_size)) WORKER_BATCH.labels(worker_type='fetch-feed').observe(len(feeds)) logger.info('%d to enqueue', len(feeds)) for feed in feeds: logger.debug("%r: scheduling to be fetched", feed) process_feed.apply_async(args=[feed.id]) # browsing feeds to delete feeds_to_delete = list(fctrl.read(status=FeedStatus.to_delete)) if feeds_to_delete and REDIS_CONN.setnx(JARR_FEED_DEL_KEY, 'true'): REDIS_CONN.expire(JARR_FEED_DEL_KEY, LOCK_EXPIRE) logger.info('%d to delete, deleting one', len(feeds_to_delete)) for feed in feeds_to_delete: logger.debug("%r: scheduling to be delete", feed) feed_cleaner.apply_async(args=[feed.id]) break # only one at a time # applying clusterizer for user_id in ArticleController.get_user_id_with_pending_articles(): if not UserController().get(id=user_id).effectivly_active: continue if REDIS_CONN.setnx(JARR_CLUSTERIZER_KEY % user_id, 'true'): REDIS_CONN.expire(JARR_CLUSTERIZER_KEY % user_id, conf.crawler.clusterizer_delay) clusterizer.apply_async(args=[user_id]) scheduler.apply_async(countdown=conf.crawler.idle_delay) WORKER.labels(method='scheduler').observe( (datetime.now() - start).total_seconds()) update_slow_metrics.apply_async()
def crawler(): user_ids = set() for feed in FeedController().list_fetchable(limit=1): process_feed.apply(args=[feed.id]) user_ids.add(feed.user_id) for user_id in user_ids: clusterizer.apply(args=[user_id])
def test_feed_and_article_deletion(self): ccontr = CategoryController(2) cat = ccontr.read().first() ccontr.delete(cat.id) self.assertEqual(0, ArticleController().read(category_id=cat.id).count()) self.assertEqual(0, FeedController().read(category_id=cat.id).count())
def test_delete_main_cluster_handling(self): suffix = 'suffix' clu = ClusterController().get(id=10) acontr = ArticleController(clu.user_id) fcontr = FeedController(clu.user_id) old_title = clu.main_title old_feed_title, old_art_id = clu.main_feed_title, clu.main_article_id for art_to_del in acontr.read(link=clu.main_article.link, id__ne=clu.main_article.id): acontr.delete(art_to_del.id) other_feed = fcontr.read(id__ne=clu.main_article.feed_id).first() update_on_all_objs(articles=[clu.main_article], feeds=[other_feed], cluster_enabled=True) acontr.create( feed_id=other_feed.id, entry_id=clu.main_article.entry_id + suffix, link=clu.main_article.link, title=clu.main_article.title + suffix, content=clu.main_article.content + suffix, date=clu.main_article.date + timedelta(1), retrieved_date=clu.main_article.retrieved_date + timedelta(1), ) ClusterController(clu.user_id).clusterize_pending_articles() clu = ClusterController().get(id=10) self.assertEqual(2, len(clu.articles)) fcontr.delete(clu.main_article.feed_id) new_cluster = ClusterController(clu.user_id).get(id=clu.id) self.assertEqual(1, len(new_cluster.articles)) self.assertNotEqual(old_title, new_cluster.main_title) self.assertNotEqual(old_feed_title, new_cluster.main_feed_title) self.assertNotEqual(old_art_id, new_cluster.main_article_id)
def set_feed_error(self, error=None, parsed_feed=None): error_count = self.feed.error_count + 1 if error: last_error = str(error) elif parsed_feed: last_error = str(parsed_feed.get('bozo_exception', '')) if self.feed.error_count > conf.feed.error_threshold: level = logging.WARNING else: level = logging.DEBUG logger.log(level, "%r: fetching feed error'd; error count -> %r", self.feed, error_count) logger.debug("%r: last error details %r", self.feed, last_error) now = utc_now() info = { 'error_count': error_count, 'last_error': last_error, 'user_id': self.feed.user_id, 'last_retrieved': now, 'expires': None } # forcing compute by controller FEED_FETCH.labels(feed_type=self.feed.feed_type.value, result='error').inc() return FeedController().update({'id': self.feed.id}, info)
def test_no_add_feed_skip(self): self.resp_status_code = 304 self.assertEqual(BASE_COUNT, ArticleController().read().count()) crawler() FeedController().update({}, { 'filters': [{ "type": "tag contains", "action on": "match", "pattern": "pattern5", "action": "skipped" }, { "type": "simple match", "action on": "match", "pattern": "pattern5", "action": "mark as read" }, { "type": "regex", "action on": "match", "pattern": "pattern5", "action": "skipped" }] }) crawler() self.assertEqual(BASE_COUNT, ArticleController().read().count())
def test_fetchable(self): fctrl = FeedController() total = fctrl.read().count() unix = datetime(1970, 1, 1).replace(tzinfo=timezone.utc) count = 0 for fd in fctrl.list_late(): count += 1 self.assertEqual(unix, fd.last_retrieved) self.assertEqual(unix, fd.expires) self.assertEqual(total, count) fetchables = fctrl.list_fetchable() now = utc_now() for fd in fetchables: self.assert_in_range(now - timedelta(seconds=1), fd.last_retrieved, now) self.assertEqual(unix, fd.expires) self.assert_late_count( 0, "no late feed to report because all just fetched") fctrl.update({}, {'expires': unix}) now = utc_now() for fd in fctrl.read(): # expires should be corrected self.assert_in_range( now + timedelta(seconds=conf.feed.min_expires - 1), fd.expires, now + timedelta(seconds=conf.feed.min_expires + 1)) lr_not_matter = timedelta(seconds=conf.feed.min_expires + 10) self.update_all_no_ctrl(expires=utc_now() - timedelta(seconds=1), last_retrieved=utc_now() - lr_not_matter) self.assert_late_count(total, "all feed just expired") self.update_all_no_ctrl(expires=utc_now() + timedelta(seconds=1)) self.assert_late_count( 0, "all feed will expire in a second, none are expired")
def test_cluster_tfidf_control(self): article = ArticleController().read(category_id__ne=None).first() cluster = article.cluster # leaving one cluster with one article clu_ids = [c.id for c in ClusterController().read(id__ne=cluster.id)] art_ids = [ a.id for a in ArticleController().read(id__ne=cluster.main_article_id) ] ArticleController().update({'id__in': art_ids}, {'cluster_id': None}) for clu_id in clu_ids: ClusterController().delete(clu_id) for art_id in art_ids: ArticleController().delete(art_id) self.assertEqual(1, ClusterController().read().count()) self.assertEqual(1, ArticleController().read().count()) feed1 = FeedController(cluster.user_id).create( title='new feed', cluster_conf={ 'tfidf_min_score': -1, 'tfidf_min_sample_size': 1 }) update_on_all_objs(articles=cluster.articles, feeds=[feed1], cluster_tfidf_enabled=True, cluster_enabled=True) feed2 = FeedController(cluster.user_id).create( cluster_enabled=True, cluster_tfidf_enabled=False, title='new feed', cluster_conf={ 'tfidf_min_score': -1, 'tfidf_min_sample_size': 1 }) article = self.create_article_from(cluster, feed1, link=cluster.main_article.link + 'do not match link') self.assertInCluster(article, cluster, ClusterReason.tf_idf) article = self.create_article_from(cluster, feed2, link=cluster.main_article.link + 'do not match link either') self.assertNotInCluster(article, cluster)
def put(feed_id): """Update an existing feed.""" fctrl = FeedController(current_identity.id) attrs = parse_meaningful_params(parser_edit) changed = fctrl.update({'id': feed_id}, attrs) if not changed: fctrl.assert_right_ok(feed_id) return None, 204
def setUp(self): super().setUp() login = '******' self.user = UserController().get(login=login) self.user2 = UserController().get(login='******') self.fctrl = FeedController(self.user.id) self.cctrl = CategoryController(self.user.id) self.uctrl = UserController()
def test_list_feeds(self): resp = self.jarr_client('get', 'list-feeds', user=self.user.login) fcount = FeedController(self.user.id).read().count() ccount = CategoryController(self.user.id).read().count() self.assertEqual(fcount + ccount + 1, len(resp.json)) self.assertEqual(fcount, len([r for r in resp.json if r['type'] == 'feed'])) self.assertEqual(ccount, len([r for r in resp.json if r['type'] == 'categ']))
def populate_db(): fcontr = FeedController() ccontr = CategoryController() UserController().create( **{ 'is_admin': True, 'is_api': True, 'cluster_enabled': False, 'login': '******', 'password': '******' }) user1, user2 = [ UserController().create(login=name, cluster_enabled=False, email="*****@*****.**" % name, password=name) for name in ["user1", "user2"] ] for iteration in range(2): article_total = 0 for user in (user1, user2): for iter_cat in range(3): cat_id = None if iter_cat: cat_id = ccontr.create(user_id=user.id, name=to_name( user, iteration, iter_cat)).id feed_id = fcontr.create( link="feed%d%d" % (iteration, iter_cat), user_id=user.id, category_id=cat_id, title=to_name(user, iteration, iter_cat, iter_cat)).id for iter_art in range(3): entry = to_name(user, iteration, iter_cat, iter_cat, iter_art) tags = [ to_name(user, iteration, iter_cat, iter_cat, iter_art, str(i)) for i in range(2) ] article_total += 1 ArticleController().create( entry_id=entry, link='http://test.te/%d' % article_total, feed_id=feed_id, user_id=user.id, tags=tags, category_id=cat_id, title=entry, date=utc_now() + timedelta(seconds=iteration), content="content %d" % article_total) session.commit() session.flush() ClusterController().clusterize_pending_articles()
def _reset_feeds_freshness(**kwargs): if 'expires' not in kwargs: kwargs['expires'] = UNIX_START if 'last_retrieved' not in kwargs: kwargs['last_retrieved'] = UNIX_START if 'etag' not in kwargs: kwargs['etag'] = '' if 'last_modified' not in kwargs: kwargs['last_modified'] = '' FeedController().update({}, kwargs)
def update(self, filters, attrs, return_objs=False, commit=True): user_id = attrs.get('user_id', self.user_id) if 'feed_id' in attrs: feed = FeedController().get(id=attrs['feed_id']) if not (self.user_id is None or feed.user_id == user_id): raise Forbidden("no right on feed %r" % feed.id) attrs['category_id'] = feed.category_id if attrs.get('category_id'): cat = CategoryController().get(id=attrs['category_id']) if not (self.user_id is None or cat.user_id == user_id): raise Forbidden("no right on cat %r" % cat.id) return super().update(filters, attrs, return_objs, commit)
def test_scheduler(self): scheduler() UserController().update({}, {'last_connection': utc_now()}) fctrl = FeedController() epoch = datetime(1970, 1, 1, tzinfo=timezone.utc) self.assertEqual(fctrl.read().count(), self.process_feed_patch.apply_async.call_count) self.assertEqual(0, self.clusteriser_patch.apply_async.call_count) self.assertEqual(0, self.feed_cleaner_patch.apply_async.call_count) feed1, feed2, feed3 = list(FeedController().read().limit(3)) FeedController().update({'id__in': [feed1.id, feed3.id]}, {'status': 'to_delete'}) FeedController().update({'id': feed2.id}, { 'last_retrieved': epoch, 'expires': epoch }) self.assertEqual(1, len(list(fctrl.list_fetchable()))) scheduler() self.assertEqual(fctrl.read().count(), self.process_feed_patch.apply_async.call_count) self.assertEqual(0, self.clusteriser_patch.apply_async.call_count) self.assertEqual(1, self.feed_cleaner_patch.apply_async.call_count)
def create(self, **attrs): # handling special denorm for article rights if 'feed_id' not in attrs: raise Unauthorized("must provide feed_id when creating article") feed = FeedController(attrs.get('user_id', self.user_id)).get(id=attrs['feed_id']) if 'user_id' in attrs and not (feed.user_id == attrs['user_id'] or self.user_id is None): raise Forbidden("no right on feed %r" % feed.id) attrs['user_id'], attrs['category_id'] = feed.user_id, feed.category_id attrs['vector'] = to_vector(attrs) if not attrs.get('link_hash') and attrs.get('link'): attrs['link_hash'] = digest(attrs['link'], alg='sha1', out='bytes') return super().create(**attrs)
def test_time(self): naive = dateutil.parser.parse('2016-11-17T16:18:02.727802') aware = dateutil.parser.parse('2016-11-17T16:18:02.727802+00:00') aware2 = dateutil.parser.parse('2016-11-17T16:18:02.727802+12:00') fctrl = FeedController() fctrl.update({'id': 1}, {'last_retrieved': naive}) self.assertEqual(fctrl.read(id=1).first().last_retrieved, aware) fctrl.update({'id': 1}, {'last_retrieved': aware}) self.assertEqual(fctrl.read(id=1).first().last_retrieved, aware) fctrl.update({'id': 1}, {'last_retrieved': aware2}) self.assertEqual(fctrl.read(id=1).first().last_retrieved, aware2) self.assertEqual( fctrl.read(id=1).first().last_retrieved, aware - timedelta(hours=12))
def test_MarkClustersAsRead_put_only_singles(self): feed = FeedController(self.user.id).read()[0] update_on_all_objs(feeds=[feed], cluster_same_feed=True, cluster_enabled=True) # creating a new article that will cluster ArticleController(self.user.id).create(entry_id='new entry_id', title='new title', content='new content', feed_id=feed.id, link=feed.articles[0].link) ClusterController(self.user.id).clusterize_pending_articles() self.assertClusterCount(18, {'filter': 'unread'}) # one per feed self._mark_as_read(2, {'only_singles': True, 'filter': 'unread'}) self.assertClusterCount(1, {'filter': 'unread'})
def get(): user_id = current_identity.id user = UserController(user_id).get(id=user_id) categories = { cat.id: cat for cat in CategoryController(user_id).read() } response = make_response( render_template('opml.xml', user=user, categories=categories, feeds=FeedController(user_id).read(), now=utc_now())) for key, value in OK_GET_HEADERS.items(): response.headers[key] = value return response
def _test_fetching_anti_herding_mech(self, now): fctrl = FeedController() total = fctrl.read().count() half = timedelta(seconds=conf.feed.min_expires / 2) twice = timedelta(seconds=conf.feed.min_expires * 2) long_ago = timedelta(seconds=conf.feed.max_expires * 2) self.update_all_no_ctrl(expires=now + half, last_retrieved=now) self.assert_late_count(0, "all have just been retrieved, none expired") self.update_all_no_ctrl(expires=now - twice, last_retrieved=now - half) self.assert_late_count(0, "have been retrieved not too long ago") self.update_all_no_ctrl(expires=now + twice, last_retrieved=now - long_ago) self.assert_late_count(total, "all retrieved some time ago, not expired")
def post(): opml_file = request.files['opml_file'] try: subscriptions = opml.from_string(opml_file.read()) except Exception as error: raise UnprocessableEntity("Couldn't parse OPML file (%r)" % error) ccontr = CategoryController(current_identity.id) fcontr = FeedController(current_identity.id) counts = {'created': 0, 'existing': 0, 'failed': 0, 'exceptions': []} categories = {cat.name: cat.id for cat in ccontr.read()} for line in subscriptions: try: link = line.xmlUrl except Exception as error: counts['failed'] += 1 counts['exceptions'].append(str(error)) continue # don't import twice if fcontr.read(link=link).count(): counts['existing'] += 1 continue # handling categories cat_id = None category = getattr(line, 'category', '').lstrip('/') if category: if category not in categories: new_category = ccontr.create(name=category) categories[new_category.name] = new_category.id cat_id = categories[category] fcontr.create(title=getattr(line, 'text', None), category_id=cat_id, description=getattr(line, 'description', None), link=link, site_link=getattr(line, 'htmlUrl', None)) counts['created'] += 1 code = 200 if counts.get('created'): code = 201 elif counts.get('failed'): code = 400 return counts, code
def test_no_cluster_same_category_on_original_category(self): article = ArticleController().read(category_id__ne=None).first() art_cat_id = article.category_id cat_ctrl = CategoryController(article.user_id) cluster = article.cluster feed = FeedController(cluster.user_id).create(title='new feed', category_id=art_cat_id) update_on_all_objs(articles=cluster.articles, feeds=[feed], cluster_same_category=None, cluster_enabled=True) cat_ctrl.update({'id': art_cat_id}, {'cluster_same_category': False}) article = self.create_article_from(cluster, feed) self.assertNotInCluster(article, cluster) cat_ctrl.update({'id': art_cat_id}, {'cluster_same_category': True}) article = self.create_article_from(cluster, feed) self.assertInCluster(article, cluster)
def clean_feed(self, response, **info): """Will reset the errors counters on a feed that have known errors""" now = utc_now() info.update({'error_count': 0, 'last_error': None, 'last_retrieved': now, 'expires': None}) info.update(extract_feed_info(response.headers, response.text)) feed_permanently_redirected = response.history \ and self.feed.link != response.url \ and any(r.status_code in {301, 308} for r in response.history) if feed_permanently_redirected: logger.warning('%r: feed moved from %r to %r', self.feed, self.feed.link, response.url) info['link'] = response.url if info: FeedController(self.feed.user_id).update({'id': self.feed.id}, info)