Пример #1
0
 def test_cluster_same_feed(self):
     article = ArticleController().read(category_id__ne=None).first()
     cluster = article.cluster
     # all is enabled, article in cluster
     update_on_all_objs(articles=cluster.articles,
                        cluster_enabled=True,
                        cluster_same_feed=True)
     article = self.create_article_from(cluster, cluster.main_article.feed)
     self.assertInCluster(article, cluster)
     # feed's disabled, won't cluster
     FeedController().update(
         {'id__in': [a.feed_id for a in cluster.articles]},
         {'cluster_same_feed': False})
     article = self.create_article_from(cluster, cluster.main_article.feed)
     self.assertNotInCluster(article, cluster)
     # category's disabled, won't cluster
     FeedController().update(
         {'id__in': [a.feed_id for a in cluster.articles]},
         {'cluster_same_feed': None})
     CategoryController().update({'id': cluster.main_article.category.id},
                                 {'cluster_same_feed': False})
     article = self.create_article_from(cluster, cluster.main_article.feed)
     self.assertNotInCluster(article, cluster)
     # user's disable, won't cluster
     CategoryController().update({'id': cluster.main_article.category.id},
                                 {'cluster_same_feed': None})
     UserController().update({'id': cluster.user_id},
                             {'cluster_same_feed': False})
     article = self.create_article_from(cluster, cluster.main_article.feed)
     self.assertNotInCluster(article, cluster)
     # reenabling user, will cluster
     UserController().update({'id': cluster.user_id},
                             {'cluster_same_feed': True})
     article = self.create_article_from(cluster, cluster.main_article.feed)
     self.assertInCluster(article, cluster)
Пример #2
0
    def test_delete_main_cluster_handling(self):
        suffix = 'suffix'
        clu = ClusterController().get(id=10)
        acontr = ArticleController(clu.user_id)
        fcontr = FeedController(clu.user_id)
        old_title = clu.main_title
        old_feed_title, old_art_id = clu.main_feed_title, clu.main_article_id
        for art_to_del in acontr.read(link=clu.main_article.link,
                                      id__ne=clu.main_article.id):
            acontr.delete(art_to_del.id)

        other_feed = fcontr.read(id__ne=clu.main_article.feed_id).first()
        update_on_all_objs(articles=[clu.main_article],
                           feeds=[other_feed],
                           cluster_enabled=True)
        acontr.create(
            feed_id=other_feed.id,
            entry_id=clu.main_article.entry_id + suffix,
            link=clu.main_article.link,
            title=clu.main_article.title + suffix,
            content=clu.main_article.content + suffix,
            date=clu.main_article.date + timedelta(1),
            retrieved_date=clu.main_article.retrieved_date + timedelta(1),
        )

        ClusterController(clu.user_id).clusterize_pending_articles()
        clu = ClusterController().get(id=10)
        self.assertEqual(2, len(clu.articles))
        fcontr.delete(clu.main_article.feed_id)
        new_cluster = ClusterController(clu.user_id).get(id=clu.id)
        self.assertEqual(1, len(new_cluster.articles))
        self.assertNotEqual(old_title, new_cluster.main_title)
        self.assertNotEqual(old_feed_title, new_cluster.main_feed_title)
        self.assertNotEqual(old_art_id, new_cluster.main_article_id)
Пример #3
0
    def test_adding_to_cluster_by_link(self):
        ccontr = ClusterController()

        cluster = ccontr.read().first()
        ccontr.update({'id': cluster.id}, {
            'read': True,
            'read_reason': 'marked'
        })
        cluster = ccontr.get(id=cluster.id)
        self.assertTrue(cluster.read)
        article = cluster.articles[0]
        articles_count = len(cluster.articles)

        fcontr = FeedController(cluster.user_id)
        acontr = ArticleController(cluster.user_id)
        fcontr.update({'id': article.feed_id}, {'cluster_wake_up': True})
        feed = fcontr.read(id__ne=article.feed_id).first()
        update_on_all_objs(articles=[article],
                           feeds=[feed],
                           cluster_enabled=True)

        self._clone_article(acontr, article, feed)
        ccontr.clusterize_pending_articles()

        cluster = ccontr.get(id=cluster.id)
        self.assertEqual(articles_count + 1, len(cluster.articles))
        self.assertFalse(cluster.read)
Пример #4
0
    def test_cluster_enabled(self):
        ccontr = ClusterController()
        cluster = ccontr.read().first()
        feed = FeedController(cluster.user_id).read(
            category_id__ne=None,
            id__nin=[art.feed_id for art in cluster.articles]).first()
        category = feed.category

        # clustering works when all is true
        update_on_all_objs(articles=cluster.articles,
                           feeds=[feed],
                           cluster_enabled=True)
        article = self.create_article_from(cluster, feed)
        self.assertInCluster(article, cluster)

        # disabling on user desactivate all clustering by default
        update_on_all_objs(articles=cluster.articles,
                           feeds=[feed],
                           cluster_enabled=None)
        UserController().update({'id': cluster.user_id},
                                {'cluster_enabled': False})
        article = self.create_article_from(cluster, feed)
        self.assertNotInCluster(article, cluster)

        # disabling on article's feed prevents from clustering
        update_on_all_objs(articles=cluster.articles,
                           feeds=[feed],
                           cluster_enabled=True)
        FeedController().update({'id': feed.id}, {'cluster_enabled': False})
        article = self.create_article_from(cluster, feed)
        self.assertNotInCluster(article, cluster)

        # disabling on feed from cluster's articles prevents from clustering
        update_on_all_objs(articles=cluster.articles,
                           feeds=[feed],
                           cluster_enabled=True)
        FeedController().update(
            {'id__in': [a.feed_id for a in cluster.articles]},
            {'cluster_enabled': False})
        article = self.create_article_from(cluster, feed)
        self.assertNotInCluster(article, cluster)

        # disabling on article's category prevents from clustering
        CategoryController(cluster.user_id).update({'id': category.id},
                                                   {'cluster_enabled': False})
        article = self.create_article_from(cluster, feed)
        self.assertNotInCluster(article, cluster)

        update_on_all_objs(articles=cluster.articles,
                           feeds=[feed],
                           cluster_enabled=True)
        article = self.create_article_from(cluster, feed)
        self.assertInCluster(article, cluster)
Пример #5
0
 def test_MarkClustersAsRead_put_only_singles(self):
     feed = FeedController(self.user.id).read()[0]
     update_on_all_objs(feeds=[feed],
                        cluster_same_feed=True,
                        cluster_enabled=True)
     # creating a new article that will cluster
     ArticleController(self.user.id).create(entry_id='new entry_id',
                                            title='new title',
                                            content='new content',
                                            feed_id=feed.id,
                                            link=feed.articles[0].link)
     ClusterController(self.user.id).clusterize_pending_articles()
     self.assertClusterCount(18, {'filter': 'unread'})
     # one per feed
     self._mark_as_read(2, {'only_singles': True, 'filter': 'unread'})
     self.assertClusterCount(1, {'filter': 'unread'})
Пример #6
0
    def test_cluster_tfidf_control(self):
        article = ArticleController().read(category_id__ne=None).first()
        cluster = article.cluster

        # leaving one cluster with one article
        clu_ids = [c.id for c in ClusterController().read(id__ne=cluster.id)]
        art_ids = [
            a.id
            for a in ArticleController().read(id__ne=cluster.main_article_id)
        ]
        ArticleController().update({'id__in': art_ids}, {'cluster_id': None})
        for clu_id in clu_ids:
            ClusterController().delete(clu_id)
        for art_id in art_ids:
            ArticleController().delete(art_id)
        self.assertEqual(1, ClusterController().read().count())
        self.assertEqual(1, ArticleController().read().count())

        feed1 = FeedController(cluster.user_id).create(
            title='new feed',
            cluster_conf={
                'tfidf_min_score': -1,
                'tfidf_min_sample_size': 1
            })
        update_on_all_objs(articles=cluster.articles,
                           feeds=[feed1],
                           cluster_tfidf_enabled=True,
                           cluster_enabled=True)
        feed2 = FeedController(cluster.user_id).create(
            cluster_enabled=True,
            cluster_tfidf_enabled=False,
            title='new feed',
            cluster_conf={
                'tfidf_min_score': -1,
                'tfidf_min_sample_size': 1
            })

        article = self.create_article_from(cluster,
                                           feed1,
                                           link=cluster.main_article.link +
                                           'do not match link')
        self.assertInCluster(article, cluster, ClusterReason.tf_idf)
        article = self.create_article_from(cluster,
                                           feed2,
                                           link=cluster.main_article.link +
                                           'do not match link either')
        self.assertNotInCluster(article, cluster)
Пример #7
0
 def test_no_cluster_same_category_on_original_category(self):
     article = ArticleController().read(category_id__ne=None).first()
     art_cat_id = article.category_id
     cat_ctrl = CategoryController(article.user_id)
     cluster = article.cluster
     feed = FeedController(cluster.user_id).create(title='new feed',
                                                   category_id=art_cat_id)
     update_on_all_objs(articles=cluster.articles,
                        feeds=[feed],
                        cluster_same_category=None,
                        cluster_enabled=True)
     cat_ctrl.update({'id': art_cat_id}, {'cluster_same_category': False})
     article = self.create_article_from(cluster, feed)
     self.assertNotInCluster(article, cluster)
     cat_ctrl.update({'id': art_cat_id}, {'cluster_same_category': True})
     article = self.create_article_from(cluster, feed)
     self.assertInCluster(article, cluster)
Пример #8
0
 def test_cluster_disabled_on_original_category(self):
     article = ArticleController().read(category_id__ne=None).first()
     art_cat_id = article.category_id
     cat_ctrl = CategoryController(article.user_id)
     cluster = article.cluster
     fctrl = FeedController(cluster.user_id)
     feed = fctrl.create(title='new feed', category_id=art_cat_id)
     fno_cat = fctrl.create(title='category-less')
     update_on_all_objs(users=[cluster.user], cluster_enabled=None)
     cat_ctrl.update({}, {'cluster_enabled': False})
     article = self.create_article_from(cluster, feed)
     self.assertEqual(1, len(article.cluster.articles))
     self.assertNotInCluster(article, cluster)
     article = self.create_article_from(cluster, fno_cat)
     self.assertEqual(1, len(article.cluster.articles))
     self.assertNotInCluster(article, cluster)
     cat_ctrl.update({'id': art_cat_id}, {'cluster_enabled': True})
     article = self.create_article_from(cluster, fno_cat)
     self.assertEqual(2, len(article.cluster.articles))
     self.assertInCluster(article, cluster)
     article = self.create_article_from(cluster, feed)
     self.assertEqual(3, len(article.cluster.articles))
     self.assertInCluster(article, cluster)