def load_topics(cli, size, fake, *_, **__): """ Load topics """ nb_topics = size * 10 cli.stdout.write(f"Nombres de Topics à créer : {nb_topics}") tps1 = time.time() nb_forums = Forum.objects.count() if nb_forums == 0: cli.stdout.write( "Il n'y a aucun forum actuellement. " "Vous devez rajouter les forums dans vos fixtures (forum)" ) return forums = list(Forum.objects.all()) nb_users = User.objects.count() if nb_users == 0: cli.stdout.write( "Il n'y a aucun membre actuellement. " "Vous devez rajouter les membres dans vos fixtures (member)" ) return profiles = list(Profile.objects.all()) nb_tags = Tag.objects.count() if nb_tags == 0: cli.stdout.write("Il n'y a aucun tag actuellement. " "Vous devez rajouter les tags dans vos fixtures (tag)") return for i in range(0, nb_topics): with contextlib.suppress(IntegrityError): topic = TopicFactory(forum=forums[i % nb_forums], author=profiles[i % nb_users].user) topic.solved_by = profiles[i % nb_users].user if i % 5 else None topic.is_locked = i % 10 == 0 topic.is_sticky = i % 15 == 0 nb_rand_tags = random.randint(0, 5) add_generated_tags_to_topic(nb_rand_tags, nb_tags, topic) topic.title = fake.text(max_nb_chars=80) topic.subtitle = fake.text(max_nb_chars=200) topic.save() PostFactory(topic=topic, author=topic.author, position=1) sys.stdout.write(f" Topic {i + 1}/{nb_topics} \r") sys.stdout.flush() tps2 = time.time() cli.stdout.write(f"\nFait en {tps2 - tps1} sec")
def test_upercase_and_lowercase_search_give_same_results(self): """Pretty self-explanatory function name, isn't it ?""" if not self.manager.connected_to_es: return # 1. Index lowercase stuffs text_lc = "test" topic_1_lc = TopicFactory(forum=self.forum, author=self.user, title=text_lc) tag_lc = TagFactory(title=text_lc) topic_1_lc.tags.add(tag_lc) topic_1_lc.subtitle = text_lc topic_1_lc.save() post_1_lc = PostFactory(topic=topic_1_lc, author=self.user, position=1) post_1_lc.text = post_1_lc.text_html = text_lc post_1_lc.save() tuto_lc = PublishableContentFactory(type="TUTORIAL") tuto_draft_lc = tuto_lc.load_version() tuto_lc.title = text_lc tuto_lc.authors.add(self.user) subcategory_lc = SubCategoryFactory(title=text_lc) tuto_lc.subcategory.add(subcategory_lc) tuto_lc.tags.add(tag_lc) tuto_lc.save() tuto_draft_lc.description = text_lc tuto_draft_lc.repo_update_top_container(text_lc, tuto_lc.slug, text_lc, text_lc) chapter1_lc = ContainerFactory(parent=tuto_draft_lc, db_object=tuto_lc) extract_lc = ExtractFactory(container=chapter1_lc, db_object=tuto_lc) extract_lc.repo_update(text_lc, text_lc) published_lc = publish_content(tuto_lc, tuto_draft_lc, is_major_update=True) tuto_lc.sha_public = tuto_draft_lc.current_version tuto_lc.sha_draft = tuto_draft_lc.current_version tuto_lc.public_version = published_lc tuto_lc.save() # 2. Index uppercase stuffs text_uc = "TEST" topic_1_uc = TopicFactory(forum=self.forum, author=self.user, title=text_uc) topic_1_uc.tags.add( tag_lc) # Note: a constraint forces tags title to be unique topic_1_uc.subtitle = text_uc topic_1_uc.save() post_1_uc = PostFactory(topic=topic_1_uc, author=self.user, position=1) post_1_uc.text = post_1_uc.text_html = text_uc post_1_uc.save() tuto_uc = PublishableContentFactory(type="TUTORIAL") tuto_draft_uc = tuto_uc.load_version() tuto_uc.title = text_uc tuto_uc.authors.add(self.user) tuto_uc.subcategory.add(subcategory_lc) tuto_uc.tags.add(tag_lc) tuto_uc.save() tuto_draft_uc.description = text_uc tuto_draft_uc.repo_update_top_container(text_uc, tuto_uc.slug, text_uc, text_uc) chapter1_uc = ContainerFactory(parent=tuto_draft_uc, db_object=tuto_uc) extract_uc = ExtractFactory(container=chapter1_uc, db_object=tuto_uc) extract_uc.repo_update(text_uc, text_uc) published_uc = publish_content(tuto_uc, tuto_draft_uc, is_major_update=True) tuto_uc.sha_public = tuto_draft_uc.current_version tuto_uc.sha_draft = tuto_draft_uc.current_version tuto_uc.public_version = published_uc tuto_uc.save() # 3. Index and search: self.assertEqual( len( self.manager.setup_search(Search().query( MatchAll())).execute()), 0) # index for model in self.indexable: if model is FakeChapter: continue self.manager.es_bulk_indexing_of_model(model) self.manager.refresh_index() result = self.client.get(reverse("search:query") + "?q=" + text_lc, follow=False) self.assertEqual(result.status_code, 200) response_lc = result.context["object_list"].execute() self.assertEqual(response_lc.hits.total, 8) result = self.client.get(reverse("search:query") + "?q=" + text_uc, follow=False) self.assertEqual(result.status_code, 200) response_uc = result.context["object_list"].execute() self.assertEqual(response_uc.hits.total, 8) for responses in zip( response_lc, response_uc): # we should get results in the same order! self.assertEqual(responses[0].meta.id, responses[1].meta.id)
def test_boosts(self): """Check if boosts are doing their job""" if not self.manager.connected_to_es: return # 1. Create topics (with identical titles), posts (with identical texts), an article and a tuto text = "test" topic_1_solved_sticky = TopicFactory(forum=self.forum, author=self.user) topic_1_solved_sticky.title = text topic_1_solved_sticky.subtitle = "" topic_1_solved_sticky.solved_by = self.user topic_1_solved_sticky.is_sticky = True topic_1_solved_sticky.save() post_1 = PostFactory(topic=topic_1_solved_sticky, author=self.user, position=1) post_1.text = post_1.text_html = text post_1.save() post_2_useful = PostFactory(topic=topic_1_solved_sticky, author=self.user, position=2) post_2_useful.text = post_2_useful.text_html = text post_2_useful.is_useful = True post_2_useful.like = 5 post_2_useful.dislike = 2 # l/d ratio above 1 post_2_useful.save() topic_2_locked = TopicFactory(forum=self.forum, author=self.user, title=text) topic_2_locked.title = text topic_2_locked.subtitle = "" topic_2_locked.is_locked = True topic_2_locked.save() post_3_ld_below_1 = PostFactory(topic=topic_2_locked, author=self.user, position=1) post_3_ld_below_1.text = post_3_ld_below_1.text_html = text post_3_ld_below_1.like = 2 post_3_ld_below_1.dislike = 5 # l/d ratio below 1 post_3_ld_below_1.save() tuto = PublishableContentFactory(type="TUTORIAL") tuto_draft = tuto.load_version() tuto.title = text tuto.authors.add(self.user) tuto.save() tuto_draft.repo_update_top_container(text, tuto.slug, text, text) chapter1 = ContainerFactory(parent=tuto_draft, db_object=tuto) chapter1.repo_update(text, "Who cares ?", "Same here") ExtractFactory(container=chapter1, db_object=tuto) published_tuto = publish_content(tuto, tuto_draft, is_major_update=True) tuto.sha_public = tuto_draft.current_version tuto.sha_draft = tuto_draft.current_version tuto.public_version = published_tuto tuto.save() article = PublishedContentFactory(type="ARTICLE", title=text) published_article = PublishedContent.objects.get(content_pk=article.pk) opinion_not_picked = PublishedContentFactory(type="OPINION", title=text) published_opinion_not_picked = PublishedContent.objects.get( content_pk=opinion_not_picked.pk) opinion_picked = PublishedContentFactory(type="OPINION", title=text) opinion_picked.sha_picked = opinion_picked.sha_draft opinion_picked.date_picked = datetime.datetime.now() opinion_picked.save() published_opinion_picked = PublishedContent.objects.get( content_pk=opinion_picked.pk) for model in self.indexable: if model is FakeChapter: continue self.manager.es_bulk_indexing_of_model(model) self.manager.refresh_index() self.assertEqual( len( self.manager.setup_search(Search().query( MatchAll())).execute()), 10) # 2. Reset all boosts to 1 for doc_type in settings.ZDS_APP["search"]["boosts"]: for key in settings.ZDS_APP["search"]["boosts"][doc_type]: settings.ZDS_APP["search"]["boosts"][doc_type][key] = 1.0 # 3. Test posts result = self.client.get(reverse("search:query") + "?q=" + text + "&models=" + Post.get_es_document_type(), follow=False) self.assertEqual(result.status_code, 200) response = result.context["object_list"].execute() self.assertEqual(response.hits.total, 3) # score are equals without boost: self.assertTrue(response[0].meta.score == response[1].meta.score == response[2].meta.score) settings.ZDS_APP["search"]["boosts"]["post"]["if_first"] = 2.0 result = self.client.get(reverse("search:query") + "?q=" + text + "&models=" + Post.get_es_document_type(), follow=False) self.assertEqual(result.status_code, 200) response = result.context["object_list"].execute() self.assertEqual(response.hits.total, 3) self.assertTrue(response[0].meta.score == response[1].meta.score > response[2].meta.score) self.assertEqual(response[2].meta.id, str( post_2_useful.pk)) # post 2 is the only one not first settings.ZDS_APP["search"]["boosts"]["post"]["if_first"] = 1.0 settings.ZDS_APP["search"]["boosts"]["post"]["if_useful"] = 2.0 result = self.client.get(reverse("search:query") + "?q=" + text + "&models=" + Post.get_es_document_type(), follow=False) self.assertEqual(result.status_code, 200) response = result.context["object_list"].execute() self.assertEqual(response.hits.total, 3) self.assertTrue(response[0].meta.score > response[1].meta.score == response[2].meta.score) self.assertEqual(response[0].meta.id, str(post_2_useful.pk)) # post 2 is useful settings.ZDS_APP["search"]["boosts"]["post"]["if_useful"] = 1.0 settings.ZDS_APP["search"]["boosts"]["post"]["ld_ratio_above_1"] = 2.0 result = self.client.get(reverse("search:query") + "?q=" + text + "&models=" + Post.get_es_document_type(), follow=False) self.assertEqual(result.status_code, 200) response = result.context["object_list"].execute() self.assertEqual(response.hits.total, 3) self.assertTrue(response[0].meta.score > response[1].meta.score == response[2].meta.score) self.assertEqual(response[0].meta.id, str( post_2_useful.pk)) # post 2 have a l/d ratio of 5/2 settings.ZDS_APP["search"]["boosts"]["post"]["ld_ratio_above_1"] = 1.0 settings.ZDS_APP["search"]["boosts"]["post"][ "ld_ratio_below_1"] = 2.0 # no one would do that in real life result = self.client.get(reverse("search:query") + "?q=" + text + "&models=" + Post.get_es_document_type(), follow=False) self.assertEqual(result.status_code, 200) response = result.context["object_list"].execute() self.assertEqual(response.hits.total, 3) self.assertTrue(response[0].meta.score > response[1].meta.score == response[2].meta.score) self.assertEqual(response[0].meta.id, str( post_3_ld_below_1.pk)) # post 3 have a l/d ratio of 2/5 settings.ZDS_APP["search"]["boosts"]["post"]["ld_ratio_below_1"] = 1.0 # 4. Test topics result = self.client.get(reverse("search:query") + "?q=" + text + "&models=" + Topic.get_es_document_type(), follow=False) self.assertEqual(result.status_code, 200) response = result.context["object_list"].execute() self.assertEqual(response.hits.total, 2) # score are equals without boost: self.assertTrue(response[0].meta.score == response[1].meta.score) settings.ZDS_APP["search"]["boosts"]["topic"]["if_sticky"] = 2.0 result = self.client.get(reverse("search:query") + "?q=" + text + "&models=" + Topic.get_es_document_type(), follow=False) self.assertEqual(result.status_code, 200) response = result.context["object_list"].execute() self.assertEqual(response.hits.total, 2) self.assertTrue(response[0].meta.score > response[1].meta.score) self.assertEqual(response[0].meta.id, str(topic_1_solved_sticky.pk)) # topic 1 is sticky settings.ZDS_APP["search"]["boosts"]["topic"]["if_sticky"] = 1.0 settings.ZDS_APP["search"]["boosts"]["topic"]["if_solved"] = 2.0 result = self.client.get(reverse("search:query") + "?q=" + text + "&models=" + Topic.get_es_document_type(), follow=False) self.assertEqual(result.status_code, 200) response = result.context["object_list"].execute() self.assertEqual(response.hits.total, 2) self.assertTrue(response[0].meta.score > response[1].meta.score) self.assertEqual(response[0].meta.id, str(topic_1_solved_sticky.pk)) # topic 1 is solved settings.ZDS_APP["search"]["boosts"]["topic"]["if_solved"] = 1.0 settings.ZDS_APP["search"]["boosts"]["topic"][ "if_locked"] = 2.0 # no one would do that in real life result = self.client.get(reverse("search:query") + "?q=" + text + "&models=" + Topic.get_es_document_type(), follow=False) self.assertEqual(result.status_code, 200) response = result.context["object_list"].execute() self.assertEqual(response.hits.total, 2) self.assertTrue(response[0].meta.score > response[1].meta.score) self.assertEqual(response[0].meta.id, str(topic_2_locked.pk)) # topic 2 is locked settings.ZDS_APP["search"]["boosts"]["topic"][ "if_locked"] = 1.0 # no one would do that in real life # 5. Test published contents result = self.client.get(reverse("search:query") + "?q=" + text + "&models=content", follow=False) self.assertEqual(result.status_code, 200) response = result.context["object_list"].execute() self.assertEqual(response.hits.total, 5) # score are equals without boost: self.assertTrue( response[0].meta.score == response[1].meta.score == response[2]. meta.score == response[3].meta.score == response[4].meta.score) settings.ZDS_APP["search"]["boosts"]["publishedcontent"][ "if_article"] = 2.0 result = self.client.get(reverse("search:query") + "?q=" + text + "&models=content", follow=False) self.assertEqual(result.status_code, 200) response = result.context["object_list"].execute() self.assertEqual(response.hits.total, 5) self.assertTrue(response[0].meta.score > response[1].meta.score) self.assertEqual(response[0].meta.id, str(published_article.pk)) # obvious settings.ZDS_APP["search"]["boosts"]["publishedcontent"][ "if_article"] = 1.0 settings.ZDS_APP["search"]["boosts"]["publishedcontent"][ "if_tutorial"] = 2.0 result = self.client.get(reverse("search:query") + "?q=" + text + "&models=content", follow=False) self.assertEqual(result.status_code, 200) response = result.context["object_list"].execute() self.assertEqual(response.hits.total, 5) self.assertTrue(response[0].meta.score > response[1].meta.score) self.assertEqual(response[0].meta.id, str(published_tuto.pk)) # obvious settings.ZDS_APP["search"]["boosts"]["publishedcontent"][ "if_tutorial"] = 1.0 settings.ZDS_APP["search"]["boosts"]["publishedcontent"][ "if_opinion"] = 2.0 settings.ZDS_APP["search"]["boosts"]["publishedcontent"][ "if_opinion_not_picked"] = 4.0 # Note: in "real life", unpicked opinion would get a boost < 1. result = self.client.get(reverse("search:query") + "?q=" + text + "&models=content", follow=False) self.assertEqual(result.status_code, 200) response = result.context["object_list"].execute() self.assertEqual(response.hits.total, 5) self.assertTrue(response[0].meta.score > response[1].meta.score > response[2].meta.score) self.assertEqual( response[0].meta.id, str(published_opinion_not_picked.pk)) # unpicked opinion got first self.assertEqual(response[1].meta.id, str(published_opinion_picked.pk)) settings.ZDS_APP["search"]["boosts"]["publishedcontent"][ "if_opinion"] = 1.0 settings.ZDS_APP["search"]["boosts"]["publishedcontent"][ "if_opinion_not_picked"] = 1.0 settings.ZDS_APP["search"]["boosts"]["publishedcontent"][ "if_medium_or_big_tutorial"] = 2.0 result = self.client.get(reverse("search:query") + "?q=" + text + "&models=content", follow=False) self.assertEqual(result.status_code, 200) response = result.context["object_list"].execute() self.assertEqual(response.hits.total, 5) self.assertTrue(response[0].meta.score > response[1].meta.score) self.assertEqual(response[0].meta.id, str(published_tuto.pk)) # obvious settings.ZDS_APP["search"]["boosts"]["publishedcontent"][ "if_medium_or_big_tutorial"] = 1.0 # 6. Test global boosts # NOTE: score are NOT the same for all documents, no matter how hard it tries to, small differences exists for model in self.indexable: # set a huge number to overcome the small differences: settings.ZDS_APP["search"]["boosts"][ model.get_es_document_type()]["global"] = 10.0 result = self.client.get(reverse("search:query") + "?q=" + text, follow=False) self.assertEqual(result.status_code, 200) response = result.context["object_list"].execute() self.assertEqual(response.hits.total, 10) self.assertEqual(response[0].meta.doc_type, model.get_es_document_type()) # obvious settings.ZDS_APP["search"]["boosts"][ model.get_es_document_type()]["global"] = 1.0