Exemplo n.º 1
0
def __generate_topic_and_post(cli, fake, nb_avg_posts_in_topic, nb_topics, nb_users, topics, tps1):
    profiles = list(Profile.objects.all())
    for topic_index in range(0, nb_topics):
        nb_posts = randint(0, nb_avg_posts_in_topic * 2) + 1
        for post_index in range(1, nb_posts):
            post = PostFactory(
                topic=topics[topic_index], author=profiles[post_index % nb_users].user, position=post_index + 1
            )
            post.text = fake.paragraph(nb_sentences=5, variable_nb_sentences=True)
            post.text_html = emarkdown(post.text)
            post.is_useful = int(nb_posts * 0.3) > 0 and post_index % int(nb_posts * 0.3) == 0
            post.save()
            sys.stdout.write(f" Topic {topic_index + 1}/{nb_topics}  \tPost {post_index + 1}/{nb_posts}  \r")
            sys.stdout.flush()
    tps2 = time.time()
    cli.stdout.write(f"\nFait en {tps2 - tps1} sec")
Exemplo n.º 2
0
    def test_boosts(self):
        """Check if boosts are doing their job"""

        if not self.manager.connected_to_es:
            return

        # 1. Create topics (with identical titles), posts (with identical texts), an article and a tuto
        text = "test"

        topic_1_solved_sticky = TopicFactory(forum=self.forum,
                                             author=self.user)
        topic_1_solved_sticky.title = text
        topic_1_solved_sticky.subtitle = ""
        topic_1_solved_sticky.solved_by = self.user
        topic_1_solved_sticky.is_sticky = True
        topic_1_solved_sticky.save()

        post_1 = PostFactory(topic=topic_1_solved_sticky,
                             author=self.user,
                             position=1)
        post_1.text = post_1.text_html = text
        post_1.save()

        post_2_useful = PostFactory(topic=topic_1_solved_sticky,
                                    author=self.user,
                                    position=2)
        post_2_useful.text = post_2_useful.text_html = text
        post_2_useful.is_useful = True
        post_2_useful.like = 5
        post_2_useful.dislike = 2  # l/d ratio above 1
        post_2_useful.save()

        topic_2_locked = TopicFactory(forum=self.forum,
                                      author=self.user,
                                      title=text)
        topic_2_locked.title = text
        topic_2_locked.subtitle = ""
        topic_2_locked.is_locked = True
        topic_2_locked.save()

        post_3_ld_below_1 = PostFactory(topic=topic_2_locked,
                                        author=self.user,
                                        position=1)
        post_3_ld_below_1.text = post_3_ld_below_1.text_html = text
        post_3_ld_below_1.like = 2
        post_3_ld_below_1.dislike = 5  # l/d ratio below 1
        post_3_ld_below_1.save()

        tuto = PublishableContentFactory(type="TUTORIAL")
        tuto_draft = tuto.load_version()

        tuto.title = text
        tuto.authors.add(self.user)
        tuto.save()

        tuto_draft.repo_update_top_container(text, tuto.slug, text, text)

        chapter1 = ContainerFactory(parent=tuto_draft, db_object=tuto)
        chapter1.repo_update(text, "Who cares ?", "Same here")
        ExtractFactory(container=chapter1, db_object=tuto)

        published_tuto = publish_content(tuto,
                                         tuto_draft,
                                         is_major_update=True)

        tuto.sha_public = tuto_draft.current_version
        tuto.sha_draft = tuto_draft.current_version
        tuto.public_version = published_tuto
        tuto.save()

        article = PublishedContentFactory(type="ARTICLE", title=text)
        published_article = PublishedContent.objects.get(content_pk=article.pk)

        opinion_not_picked = PublishedContentFactory(type="OPINION",
                                                     title=text)
        published_opinion_not_picked = PublishedContent.objects.get(
            content_pk=opinion_not_picked.pk)

        opinion_picked = PublishedContentFactory(type="OPINION", title=text)
        opinion_picked.sha_picked = opinion_picked.sha_draft
        opinion_picked.date_picked = datetime.datetime.now()
        opinion_picked.save()

        published_opinion_picked = PublishedContent.objects.get(
            content_pk=opinion_picked.pk)

        for model in self.indexable:
            if model is FakeChapter:
                continue
            self.manager.es_bulk_indexing_of_model(model)
        self.manager.refresh_index()

        self.assertEqual(
            len(
                self.manager.setup_search(Search().query(
                    MatchAll())).execute()), 10)

        # 2. Reset all boosts to 1
        for doc_type in settings.ZDS_APP["search"]["boosts"]:
            for key in settings.ZDS_APP["search"]["boosts"][doc_type]:
                settings.ZDS_APP["search"]["boosts"][doc_type][key] = 1.0

        # 3. Test posts
        result = self.client.get(reverse("search:query") + "?q=" + text +
                                 "&models=" + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context["object_list"].execute()
        self.assertEqual(response.hits.total, 3)

        # score are equals without boost:
        self.assertTrue(response[0].meta.score == response[1].meta.score ==
                        response[2].meta.score)

        settings.ZDS_APP["search"]["boosts"]["post"]["if_first"] = 2.0

        result = self.client.get(reverse("search:query") + "?q=" + text +
                                 "&models=" + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context["object_list"].execute()
        self.assertEqual(response.hits.total, 3)

        self.assertTrue(response[0].meta.score == response[1].meta.score >
                        response[2].meta.score)
        self.assertEqual(response[2].meta.id, str(
            post_2_useful.pk))  # post 2 is the only one not first

        settings.ZDS_APP["search"]["boosts"]["post"]["if_first"] = 1.0
        settings.ZDS_APP["search"]["boosts"]["post"]["if_useful"] = 2.0

        result = self.client.get(reverse("search:query") + "?q=" + text +
                                 "&models=" + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context["object_list"].execute()
        self.assertEqual(response.hits.total, 3)

        self.assertTrue(response[0].meta.score > response[1].meta.score ==
                        response[2].meta.score)
        self.assertEqual(response[0].meta.id,
                         str(post_2_useful.pk))  # post 2 is useful

        settings.ZDS_APP["search"]["boosts"]["post"]["if_useful"] = 1.0
        settings.ZDS_APP["search"]["boosts"]["post"]["ld_ratio_above_1"] = 2.0

        result = self.client.get(reverse("search:query") + "?q=" + text +
                                 "&models=" + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context["object_list"].execute()
        self.assertEqual(response.hits.total, 3)

        self.assertTrue(response[0].meta.score > response[1].meta.score ==
                        response[2].meta.score)
        self.assertEqual(response[0].meta.id, str(
            post_2_useful.pk))  # post 2 have a l/d ratio of 5/2

        settings.ZDS_APP["search"]["boosts"]["post"]["ld_ratio_above_1"] = 1.0
        settings.ZDS_APP["search"]["boosts"]["post"][
            "ld_ratio_below_1"] = 2.0  # no one would do that in real life

        result = self.client.get(reverse("search:query") + "?q=" + text +
                                 "&models=" + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context["object_list"].execute()
        self.assertEqual(response.hits.total, 3)

        self.assertTrue(response[0].meta.score > response[1].meta.score ==
                        response[2].meta.score)
        self.assertEqual(response[0].meta.id, str(
            post_3_ld_below_1.pk))  # post 3 have a l/d ratio of 2/5

        settings.ZDS_APP["search"]["boosts"]["post"]["ld_ratio_below_1"] = 1.0

        # 4. Test topics
        result = self.client.get(reverse("search:query") + "?q=" + text +
                                 "&models=" + Topic.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context["object_list"].execute()
        self.assertEqual(response.hits.total, 2)

        # score are equals without boost:
        self.assertTrue(response[0].meta.score == response[1].meta.score)

        settings.ZDS_APP["search"]["boosts"]["topic"]["if_sticky"] = 2.0

        result = self.client.get(reverse("search:query") + "?q=" + text +
                                 "&models=" + Topic.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context["object_list"].execute()
        self.assertEqual(response.hits.total, 2)

        self.assertTrue(response[0].meta.score > response[1].meta.score)
        self.assertEqual(response[0].meta.id,
                         str(topic_1_solved_sticky.pk))  # topic 1 is sticky

        settings.ZDS_APP["search"]["boosts"]["topic"]["if_sticky"] = 1.0
        settings.ZDS_APP["search"]["boosts"]["topic"]["if_solved"] = 2.0

        result = self.client.get(reverse("search:query") + "?q=" + text +
                                 "&models=" + Topic.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context["object_list"].execute()
        self.assertEqual(response.hits.total, 2)

        self.assertTrue(response[0].meta.score > response[1].meta.score)
        self.assertEqual(response[0].meta.id,
                         str(topic_1_solved_sticky.pk))  # topic 1 is solved

        settings.ZDS_APP["search"]["boosts"]["topic"]["if_solved"] = 1.0
        settings.ZDS_APP["search"]["boosts"]["topic"][
            "if_locked"] = 2.0  # no one would do that in real life

        result = self.client.get(reverse("search:query") + "?q=" + text +
                                 "&models=" + Topic.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context["object_list"].execute()
        self.assertEqual(response.hits.total, 2)

        self.assertTrue(response[0].meta.score > response[1].meta.score)
        self.assertEqual(response[0].meta.id,
                         str(topic_2_locked.pk))  # topic 2 is locked

        settings.ZDS_APP["search"]["boosts"]["topic"][
            "if_locked"] = 1.0  # no one would do that in real life

        # 5. Test published contents
        result = self.client.get(reverse("search:query") + "?q=" + text +
                                 "&models=content",
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context["object_list"].execute()
        self.assertEqual(response.hits.total, 5)

        # score are equals without boost:
        self.assertTrue(
            response[0].meta.score == response[1].meta.score == response[2].
            meta.score == response[3].meta.score == response[4].meta.score)

        settings.ZDS_APP["search"]["boosts"]["publishedcontent"][
            "if_article"] = 2.0

        result = self.client.get(reverse("search:query") + "?q=" + text +
                                 "&models=content",
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context["object_list"].execute()
        self.assertEqual(response.hits.total, 5)

        self.assertTrue(response[0].meta.score > response[1].meta.score)
        self.assertEqual(response[0].meta.id,
                         str(published_article.pk))  # obvious

        settings.ZDS_APP["search"]["boosts"]["publishedcontent"][
            "if_article"] = 1.0
        settings.ZDS_APP["search"]["boosts"]["publishedcontent"][
            "if_tutorial"] = 2.0

        result = self.client.get(reverse("search:query") + "?q=" + text +
                                 "&models=content",
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context["object_list"].execute()
        self.assertEqual(response.hits.total, 5)

        self.assertTrue(response[0].meta.score > response[1].meta.score)
        self.assertEqual(response[0].meta.id,
                         str(published_tuto.pk))  # obvious

        settings.ZDS_APP["search"]["boosts"]["publishedcontent"][
            "if_tutorial"] = 1.0
        settings.ZDS_APP["search"]["boosts"]["publishedcontent"][
            "if_opinion"] = 2.0
        settings.ZDS_APP["search"]["boosts"]["publishedcontent"][
            "if_opinion_not_picked"] = 4.0
        # Note: in "real life", unpicked opinion would get a boost < 1.

        result = self.client.get(reverse("search:query") + "?q=" + text +
                                 "&models=content",
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context["object_list"].execute()
        self.assertEqual(response.hits.total, 5)

        self.assertTrue(response[0].meta.score > response[1].meta.score >
                        response[2].meta.score)
        self.assertEqual(
            response[0].meta.id,
            str(published_opinion_not_picked.pk))  # unpicked opinion got first
        self.assertEqual(response[1].meta.id, str(published_opinion_picked.pk))

        settings.ZDS_APP["search"]["boosts"]["publishedcontent"][
            "if_opinion"] = 1.0
        settings.ZDS_APP["search"]["boosts"]["publishedcontent"][
            "if_opinion_not_picked"] = 1.0
        settings.ZDS_APP["search"]["boosts"]["publishedcontent"][
            "if_medium_or_big_tutorial"] = 2.0

        result = self.client.get(reverse("search:query") + "?q=" + text +
                                 "&models=content",
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context["object_list"].execute()
        self.assertEqual(response.hits.total, 5)

        self.assertTrue(response[0].meta.score > response[1].meta.score)
        self.assertEqual(response[0].meta.id,
                         str(published_tuto.pk))  # obvious

        settings.ZDS_APP["search"]["boosts"]["publishedcontent"][
            "if_medium_or_big_tutorial"] = 1.0

        # 6. Test global boosts
        # NOTE: score are NOT the same for all documents, no matter how hard it tries to, small differences exists

        for model in self.indexable:

            # set a huge number to overcome the small differences:
            settings.ZDS_APP["search"]["boosts"][
                model.get_es_document_type()]["global"] = 10.0

            result = self.client.get(reverse("search:query") + "?q=" + text,
                                     follow=False)

            self.assertEqual(result.status_code, 200)
            response = result.context["object_list"].execute()
            self.assertEqual(response.hits.total, 10)

            self.assertEqual(response[0].meta.doc_type,
                             model.get_es_document_type())  # obvious

            settings.ZDS_APP["search"]["boosts"][
                model.get_es_document_type()]["global"] = 1.0