def hide_comment_by_user(self, user, text_hidden): """Overridden to directly hide the post in ES as well""" super(Post, self).hide_comment_by_user(user, text_hidden) index_manager = ESIndexManager(**settings.ES_SEARCH_INDEX) index_manager.update_single_document(self, {"is_visible": False})
def hide_comment_by_user(self, user, text_hidden): """Overridden to directly hide the post in ES as well """ super(Post, self).hide_comment_by_user(user, text_hidden) index_manager = ESIndexManager(**settings.ES_SEARCH_INDEX) index_manager.update_single_document(self, {'is_visible': False})
def delete_published_content_in_elasticsearch(sender, instance, **kwargs): """Catch the pre_delete signal to ensure the deletion in ES. Also, handle the deletion of the corresponding chapters. """ index_manager = ESIndexManager(**settings.ES_SEARCH_INDEX) if index_manager.index_exists: index_manager.delete_by_query(FakeChapter.get_es_document_type(), ES_Q('match', _routing=instance.es_id)) return delete_document_in_elasticsearch(instance)
def __init__(self, *args, **kwargs): """Overridden because FakeChapter needs to be present for mapping. Also, its mapping needs to be defined before the one of PublishedContent for parenting reasons (!!!). """ super().__init__(*args, **kwargs) self.models.insert(0, FakeChapter) self.index_manager = ESIndexManager(**settings.ES_SEARCH_INDEX) if not self.index_manager.connected_to_es: raise Exception("Unable to connect to Elasticsearch, aborting.")
def setUp(self): settings.EMAIL_BACKEND = "django.core.mail.backends.locmem.EmailBackend" self.mas = ProfileFactory().user settings.ZDS_APP["member"]["bot_account"] = self.mas.username self.category, self.forum = create_category_and_forum() self.user = ProfileFactory().user self.staff = StaffProfileFactory().user self.index_manager = ESIndexManager(**settings.ES_SEARCH_INDEX)
def setUp(self): # don't build PDF to speed up the tests settings.ZDS_APP['content']['build_pdf_when_published'] = False settings.EMAIL_BACKEND = 'django.core.mail.backends.locmem.EmailBackend' self.mas = ProfileFactory().user settings.ZDS_APP['member']['bot_account'] = self.mas.username self.category, self.forum = create_category() self.user = ProfileFactory().user self.staff = StaffProfileFactory().user self.index_manager = ESIndexManager(**settings.ES_SEARCH_INDEX)
def setUp(self): settings.EMAIL_BACKEND = 'django.core.mail.backends.locmem.EmailBackend' self.mas = ProfileFactory().user settings.ZDS_APP['member']['bot_account'] = self.mas.username self.category, self.forum = create_category_and_forum() self.user = ProfileFactory().user self.staff = StaffProfileFactory().user self.manager = ESIndexManager(**settings.ES_SEARCH_INDEX) self.indexable = [FakeChapter, PublishedContent, Topic, Post] self.manager.reset_es_index(self.indexable) self.manager.setup_custom_analyzer() self.manager.refresh_index()
def test_setup_functions(self): """Test the behavior of the reset_es_index(), setup_custom_analyzer() and clear_es_index() functions""" if not self.manager.connected_to_es: return custom_index = {'name': 'some_random_name', 'shards': 3, 'replicas': 1} manager = ESIndexManager(**custom_index) # in the beginning: the void: self.assertTrue(manager.index not in self.manager.es.cat.indices()) self.assertEqual(manager.index, custom_index['name']) self.assertEqual(manager.number_of_shards, custom_index['shards']) self.assertEqual(manager.number_of_replicas, custom_index['replicas']) # 1. Creation: models = [Topic, Post] manager.reset_es_index([Topic, Post]) self.assertTrue(manager.index in manager.es.cat.indices()) # index in ! index_settings = manager.es.indices.get_settings(index=manager.index) self.assertTrue(manager.index in index_settings) index_settings = index_settings[manager.index]['settings']['index'] self.assertEqual(index_settings['provided_name'], manager.index) self.assertEqual(index_settings['number_of_shards'], str(manager.number_of_shards)) self.assertEqual(index_settings['number_of_replicas'], str(manager.number_of_replicas)) # test mappings mappings = manager.es.indices.get_mapping(index=manager.index) self.assertTrue(manager.index in mappings) mappings = mappings[manager.index]['mappings'] for model in models: self.assertTrue(model.get_es_document_type() in mappings) # analyzer self.assertTrue('analysis' not in index_settings) manager.setup_custom_analyzer() index_settings = manager.es.indices.get_settings(index=manager.index) self.assertTrue(manager.index in index_settings) index_settings = index_settings[manager.index]['settings']['index'] self.assertTrue('analysis' in index_settings) # 3. Clearing manager.clear_es_index() self.assertTrue( manager.index not in self.manager.es.cat.indices()) # back to the void
def get_es_indexable(cls, force_reindexing=False): """Overridden to also include chapters """ index_manager = ESIndexManager(**settings.ES_SEARCH_INDEX) # fetch initial batch last_pk = 0 objects_source = super(PublishedContent, cls).get_es_indexable(force_reindexing) objects = list(objects_source.filter(pk__gt=last_pk)[:PublishedContent.objects_per_batch]) while objects: chapters = [] for content in objects: versioned = content.load_public_version() # chapters are only indexed for middle and big tuto if versioned.has_sub_containers(): # delete possible previous chapters if content.es_already_indexed: index_manager.delete_by_query( FakeChapter.get_es_document_type(), ES_Q('match', _routing=content.es_id)) # (re)index the new one(s) for chapter in versioned.get_list_of_chapters(): chapters.append(FakeChapter(chapter, versioned, content.es_id)) if chapters: # since we want to return at most PublishedContent.objects_per_batch items # we have to split further while chapters: yield chapters[:PublishedContent.objects_per_batch] chapters = chapters[PublishedContent.objects_per_batch:] if objects: yield objects # fetch next batch last_pk = objects[-1].pk objects = list(objects_source.filter(pk__gt=last_pk)[:PublishedContent.objects_per_batch])
def setUp(self): settings.EMAIL_BACKEND = 'django.core.mail.backends.locmem.EmailBackend' self.mas = ProfileFactory().user settings.ZDS_APP['member']['bot_account'] = self.mas.username self.category, self.forum = create_category_and_forum() self.user = ProfileFactory().user self.staff = StaffProfileFactory().user self.index_manager = ESIndexManager(**settings.ES_SEARCH_INDEX)
def __init__(self, *args, **kwargs): """Overridden because FakeChapter needs to be present for mapping. Also, its mapping needs to be defined before the one of PublishedContent for parenting reasons (!!!). """ super(Command, self).__init__(*args, **kwargs) self.models.insert(0, FakeChapter) self.index_manager = ESIndexManager(**settings.ES_SEARCH_INDEX) if not self.index_manager.connected_to_es: raise Exception('Unable to connect to Elasticsearch, aborting.')
def test_setup_functions(self): """Test the behavior of the reset_es_index(), setup_custom_analyzer() and clear_es_index() functions""" if not self.manager.connected_to_es: return custom_index = {'name': 'some_random_name', 'shards': 3, 'replicas': 1} manager = ESIndexManager(**custom_index) # in the beginning: the void: self.assertTrue(manager.index not in self.manager.es.cat.indices()) self.assertEqual(manager.index, custom_index['name']) self.assertEqual(manager.number_of_shards, custom_index['shards']) self.assertEqual(manager.number_of_replicas, custom_index['replicas']) # 1. Creation: models = [Topic, Post] manager.reset_es_index([Topic, Post]) self.assertTrue(manager.index in manager.es.cat.indices()) # index in ! index_settings = manager.es.indices.get_settings(index=manager.index) self.assertTrue(manager.index in index_settings) index_settings = index_settings[manager.index]['settings']['index'] self.assertEqual(index_settings['provided_name'], manager.index) self.assertEqual(index_settings['number_of_shards'], str(manager.number_of_shards)) self.assertEqual(index_settings['number_of_replicas'], str(manager.number_of_replicas)) # test mappings mappings = manager.es.indices.get_mapping(index=manager.index) self.assertTrue(manager.index in mappings) mappings = mappings[manager.index]['mappings'] for model in models: self.assertTrue(model.get_es_document_type() in mappings) # analyzer self.assertTrue('analysis' not in index_settings) manager.setup_custom_analyzer() index_settings = manager.es.indices.get_settings(index=manager.index) self.assertTrue(manager.index in index_settings) index_settings = index_settings[manager.index]['settings']['index'] self.assertTrue('analysis' in index_settings) # 3. Clearing manager.clear_es_index() self.assertTrue(manager.index not in self.manager.es.cat.indices()) # back to the void
class UtilsTests(TestCase): def setUp(self): # don't build PDF to speed up the tests settings.ZDS_APP['content']['build_pdf_when_published'] = False settings.EMAIL_BACKEND = 'django.core.mail.backends.locmem.EmailBackend' self.mas = ProfileFactory().user settings.ZDS_APP['member']['bot_account'] = self.mas.username self.category, self.forum = create_category() self.user = ProfileFactory().user self.staff = StaffProfileFactory().user self.index_manager = ESIndexManager(**settings.ES_SEARCH_INDEX) def test_es_manager(self): """Test the behavior of the ``es_manager`` command""" if not self.index_manager.connected_to_es: return # in the beginning: the void self.assertTrue(self.index_manager.index not in self.index_manager.es.cat.indices()) text = u'Ceci est un texte de test' # create a topic with a post topic = TopicFactory(forum=self.forum, author=self.user, title=text) post = PostFactory(topic=topic, author=self.user, position=1) post.text = post.text_html = text post.save() topic = Topic.objects.get(pk=topic.pk) post = Post.objects.get(pk=post.pk) self.assertFalse(topic.es_already_indexed) self.assertTrue(topic.es_flagged) self.assertFalse(post.es_already_indexed) self.assertTrue(post.es_flagged) # create a middle-tutorial and publish it tuto = PublishableContentFactory(type='TUTORIAL') tuto.authors.add(self.user) tuto.save() tuto_draft = tuto.load_version() chapter1 = ContainerFactory(parent=tuto_draft, db_object=tuto) chapter1.repo_update(text, text, text) extract1 = ExtractFactory(container=chapter1, db_object=tuto) version = extract1.repo_update(text, text) published = publish_content(tuto, tuto_draft, is_major_update=True) tuto.sha_public = version tuto.sha_draft = version tuto.public_version = published tuto.save() published = PublishedContent.objects.get(content_pk=tuto.pk) self.assertFalse(published.es_already_indexed) self.assertTrue(published.es_flagged) # 1. test "index-all" call_command('es_manager', 'index_all') self.assertTrue(self.index_manager.es.indices.exists(self.index_manager.index)) self.index_manager.index_exists = True topic = Topic.objects.get(pk=topic.pk) post = Post.objects.get(pk=post.pk) self.assertTrue(topic.es_already_indexed) self.assertFalse(topic.es_flagged) self.assertTrue(post.es_already_indexed) self.assertFalse(post.es_flagged) published = PublishedContent.objects.get(content_pk=tuto.pk) self.assertTrue(published.es_already_indexed) self.assertFalse(published.es_flagged) s = Search() s.query(MatchAll()) results = self.index_manager.setup_search(s).execute() self.assertEqual(len(results), 4) # get 4 results, one of each type must_contain = {'post': False, 'topic': False, 'publishedcontent': False, 'chapter': False} id_must_be = { 'post': str(post.pk), 'topic': str(topic.pk), 'publishedcontent': str(published.pk), 'chapter': tuto.slug + '__' + chapter1.slug } for hit in results: doc_type = hit.meta.doc_type must_contain[doc_type] = True self.assertEqual(hit.meta.id, id_must_be[doc_type]) self.assertTrue(all(must_contain)) # 2. test "clear" self.assertTrue(self.index_manager.index in self.index_manager.es.cat.indices()) # index in call_command('es_manager', 'clear') self.assertFalse(self.index_manager.es.indices.exists(self.index_manager.index)) self.index_manager.index_exists = False # must reset every object topic = Topic.objects.get(pk=topic.pk) post = Post.objects.get(pk=post.pk) self.assertFalse(topic.es_already_indexed) self.assertTrue(topic.es_flagged) self.assertFalse(post.es_already_indexed) self.assertTrue(post.es_flagged) published = PublishedContent.objects.get(content_pk=tuto.pk) self.assertFalse(published.es_already_indexed) self.assertTrue(published.es_flagged) self.assertTrue(self.index_manager.index not in self.index_manager.es.cat.indices()) # index wiped out ! # 3. test "setup" call_command('es_manager', 'setup') self.assertTrue(self.index_manager.es.indices.exists(self.index_manager.index)) self.index_manager.index_exists = True self.assertTrue(self.index_manager.index in self.index_manager.es.cat.indices()) # index back in ... s = Search() s.query(MatchAll()) results = self.index_manager.setup_search(s).execute() self.assertEqual(len(results), 0) # ... but with nothing in it result = self.index_manager.es.indices.get_settings(index=self.index_manager.index) settings_index = result[self.index_manager.index]['settings']['index'] self.assertTrue('analysis' in settings_index) # custom analyzer was setup # 4. test "index-flagged" once ... call_command('es_manager', 'index_flagged') topic = Topic.objects.get(pk=topic.pk) post = Post.objects.get(pk=post.pk) self.assertTrue(topic.es_already_indexed) self.assertFalse(topic.es_flagged) self.assertTrue(post.es_already_indexed) self.assertFalse(post.es_flagged) published = PublishedContent.objects.get(content_pk=tuto.pk) self.assertTrue(published.es_already_indexed) self.assertFalse(published.es_flagged) s = Search() s.query(MatchAll()) results = self.index_manager.setup_search(s).execute() self.assertEqual(len(results), 4) # get the 4 results back def tearDown(self): if os.path.isdir(settings.ZDS_APP['content']['repo_private_path']): shutil.rmtree(settings.ZDS_APP['content']['repo_private_path']) if os.path.isdir(settings.ZDS_APP['content']['repo_public_path']): shutil.rmtree(settings.ZDS_APP['content']['repo_public_path']) if os.path.isdir(settings.MEDIA_ROOT): shutil.rmtree(settings.MEDIA_ROOT) # re-active PDF build settings.ZDS_APP['content']['build_pdf_when_published'] = True # delete index: self.index_manager.clear_es_index()
class ESIndexManagerTests(TutorialTestMixin, TestCase): def setUp(self): settings.EMAIL_BACKEND = 'django.core.mail.backends.locmem.EmailBackend' self.mas = ProfileFactory().user settings.ZDS_APP['member']['bot_account'] = self.mas.username self.category, self.forum = create_category_and_forum() self.user = ProfileFactory().user self.staff = StaffProfileFactory().user self.manager = ESIndexManager(**settings.ES_SEARCH_INDEX) self.indexable = [FakeChapter, PublishedContent, Topic, Post] self.manager.reset_es_index(self.indexable) self.manager.setup_custom_analyzer() self.manager.refresh_index() def test_setup_functions(self): """Test the behavior of the reset_es_index(), setup_custom_analyzer() and clear_es_index() functions""" if not self.manager.connected_to_es: return custom_index = {'name': 'some_random_name', 'shards': 3, 'replicas': 1} manager = ESIndexManager(**custom_index) # in the beginning: the void: self.assertTrue(manager.index not in self.manager.es.cat.indices()) self.assertEqual(manager.index, custom_index['name']) self.assertEqual(manager.number_of_shards, custom_index['shards']) self.assertEqual(manager.number_of_replicas, custom_index['replicas']) # 1. Creation: models = [Topic, Post] manager.reset_es_index([Topic, Post]) self.assertTrue(manager.index in manager.es.cat.indices()) # index in ! index_settings = manager.es.indices.get_settings(index=manager.index) self.assertTrue(manager.index in index_settings) index_settings = index_settings[manager.index]['settings']['index'] self.assertEqual(index_settings['provided_name'], manager.index) self.assertEqual(index_settings['number_of_shards'], str(manager.number_of_shards)) self.assertEqual(index_settings['number_of_replicas'], str(manager.number_of_replicas)) # test mappings mappings = manager.es.indices.get_mapping(index=manager.index) self.assertTrue(manager.index in mappings) mappings = mappings[manager.index]['mappings'] for model in models: self.assertTrue(model.get_es_document_type() in mappings) # analyzer self.assertTrue('analysis' not in index_settings) manager.setup_custom_analyzer() index_settings = manager.es.indices.get_settings(index=manager.index) self.assertTrue(manager.index in index_settings) index_settings = index_settings[manager.index]['settings']['index'] self.assertTrue('analysis' in index_settings) # 3. Clearing manager.clear_es_index() self.assertTrue(manager.index not in self.manager.es.cat.indices()) # back to the void def test_custom_analyzer(self): """Test our custom analyzer""" if not self.manager.connected_to_es: return test_sentences = [ # stemming: ('programmation programmer programmateur programmes', ['program', 'program', 'program', 'program']), # keep "c" intact: ('apprendre à programmer en C', ['aprendr', 'program', 'langage_c']), # remove HTML and some special characters: ('<p>« test ! », en hurlant …</p>', ['test', 'hurlant']), # keep "c++" and "linux" intact: ('écrire un programme en C++ avec Linux', ['ecrir', 'program', 'c++', 'linux']), # elision: ("c'est de l'arnaque", ['arnaqu']) ] for sentence in test_sentences: tokens = self.manager.analyze_sentence(sentence[0]) self.assertEqual(len(tokens), len(sentence[1])) self.assertEqual(tokens, sentence[1]) def test_indexation(self): """test the indexation and deletion of the different documents""" if not self.manager.connected_to_es: return # create a topic with a post topic = TopicFactory(forum=self.forum, author=self.user) post = PostFactory(topic=topic, author=self.user, position=1) topic = Topic.objects.get(pk=topic.pk) post = Post.objects.get(pk=post.pk) self.assertFalse(topic.es_already_indexed) self.assertTrue(topic.es_flagged) self.assertFalse(post.es_already_indexed) self.assertTrue(post.es_flagged) # create a middle-tutorial and publish it tuto = PublishableContentFactory(type='TUTORIAL') tuto.authors.add(self.user) tuto.save() tuto_draft = tuto.load_version() chapter1 = ContainerFactory(parent=tuto_draft, db_object=tuto) ExtractFactory(container=chapter1, db_object=tuto) published = publish_content(tuto, tuto_draft, is_major_update=True) tuto.sha_public = tuto_draft.current_version tuto.sha_draft = tuto_draft.current_version tuto.public_version = published tuto.save() published = PublishedContent.objects.get(content_pk=tuto.pk) self.assertFalse(published.es_already_indexed) self.assertTrue(published.es_flagged) # 1. index all for model in self.indexable: if model is FakeChapter: continue self.manager.es_bulk_indexing_of_model(model, force_reindexing=False) self.manager.refresh_index() topic = Topic.objects.get(pk=topic.pk) post = Post.objects.get(pk=post.pk) self.assertTrue(topic.es_already_indexed) self.assertFalse(topic.es_flagged) self.assertTrue(post.es_already_indexed) self.assertFalse(post.es_flagged) published = PublishedContent.objects.get(content_pk=tuto.pk) self.assertTrue(published.es_already_indexed) self.assertFalse(published.es_flagged) s = Search() s.query(MatchAll()) results = self.manager.setup_search(s).execute() self.assertEqual(len(results), 4) # get 4 results, one of each type must_contain = {'post': False, 'topic': False, 'publishedcontent': False, 'chapter': False} id_must_be = { 'post': str(post.pk), 'topic': str(topic.pk), 'publishedcontent': str(published.pk), 'chapter': tuto.slug + '__' + chapter1.slug } for hit in results: doc_type = hit.meta.doc_type must_contain[doc_type] = True self.assertEqual(hit.meta.id, id_must_be[doc_type]) self.assertTrue(all(must_contain)) # 2. Test what reindexation will do: new_topic = TopicFactory(forum=self.forum, author=self.user) new_post = PostFactory(topic=new_topic, author=self.user, position=1) pk_of_topics_to_reindex = [] for item in Topic.get_es_indexable(force_reindexing=False): pk_of_topics_to_reindex.append(item.pk) pk_of_posts_to_reindex = [] for item in Post.get_es_indexable(force_reindexing=False): pk_of_posts_to_reindex.append(item.pk) self.assertTrue(topic.pk not in pk_of_topics_to_reindex) self.assertTrue(new_topic.pk in pk_of_topics_to_reindex) self.assertTrue(post.pk not in pk_of_posts_to_reindex) self.assertTrue(new_post.pk in pk_of_posts_to_reindex) for model in self.indexable: # ok, so let's index that if model is FakeChapter: continue self.manager.es_bulk_indexing_of_model(model, force_reindexing=False) self.manager.refresh_index() s = Search() s.query(MatchAll()) results = self.manager.setup_search(s).execute() self.assertEqual(len(results), 6) # good! # 3. Test single deletion: new_post = Post.objects.get(pk=new_post.pk) self.manager.delete_document(new_post) self.manager.refresh_index() s = Search() s.query(MatchAll()) results = self.manager.setup_search(s).execute() self.assertEqual(len(results), 5) # one is missing for hit in results: self.assertTrue(hit.meta.doc_type != Post.get_es_document_type() or hit.meta.id != new_post.es_id) # 4. Test "delete_by_query_deletion": topic = Topic.objects.get(pk=topic.pk) new_topic = Topic.objects.get(pk=new_topic.pk) self.manager.delete_by_query(Topic.get_es_document_type(), MatchAll()) # the two topic are deleted self.manager.refresh_index() s = Search() s.query(MatchAll()) results = self.manager.setup_search(s).execute() self.assertEqual(len(results), 3) for hit in results: self.assertTrue(hit.meta.doc_type != Topic.get_es_document_type() or hit.meta.id != new_topic.es_id) self.assertTrue(hit.meta.doc_type != Topic.get_es_document_type() or hit.meta.id != topic.es_id) # 5. Test that the deletion of an object also triggers its deletion in ES post = Post.objects.get(pk=post.pk) post.delete() self.manager.refresh_index() s = Search() s.query(MatchAll()) results = self.manager.setup_search(s).execute() self.assertEqual(len(results), 2) for hit in results: self.assertTrue(hit.meta.doc_type != Post.get_es_document_type() or hit.meta.id != post.es_id) # 6. Test full desindexation: for model in self.indexable: if model is FakeChapter: continue self.manager.clear_indexing_of_model(model) # note "topic" is gone since "post" is gone, due to relationships at the Django level new_topic = Topic.objects.get(pk=new_topic.pk) new_post = Post.objects.get(pk=new_post.pk) self.assertFalse(new_topic.es_already_indexed) self.assertTrue(new_topic.es_flagged) self.assertFalse(new_post.es_already_indexed) self.assertTrue(new_post.es_flagged) published = PublishedContent.objects.get(content_pk=tuto.pk) self.assertFalse(published.es_already_indexed) self.assertTrue(published.es_flagged) def test_special_case_of_contents(self): """test that the old publishedcontent does not stay when a new one is created""" if not self.manager.connected_to_es: return # 1. Create a middle-tutorial, publish it, then index it tuto = PublishableContentFactory(type='TUTORIAL') tuto.authors.add(self.user) tuto.save() tuto_draft = tuto.load_version() chapter1 = ContainerFactory(parent=tuto_draft, db_object=tuto) ExtractFactory(container=chapter1, db_object=tuto) published = publish_content(tuto, tuto_draft, is_major_update=True) tuto.sha_public = tuto_draft.current_version tuto.sha_draft = tuto_draft.current_version tuto.public_version = published tuto.save() self.manager.es_bulk_indexing_of_model(PublishedContent, force_reindexing=True) # index self.manager.refresh_index() first_publication = PublishedContent.objects.get(content_pk=tuto.pk) self.assertTrue(first_publication.es_already_indexed) self.assertFalse(first_publication.es_flagged) s = Search() s.query(MatchAll()) results = self.manager.setup_search(s).execute() self.assertEqual(len(results), 2) # get 2 results, one for the content and one for the chapter self.assertEqual(PublishedContent.objects.count(), 1) # 2. Change thet title, which will trigger a change in the slug tuto = PublishableContent.objects.get(pk=tuto.pk) versioned = tuto.load_version(sha=tuto.sha_draft) tuto.title = 'un titre complètement différent!' tuto.save() versioned.repo_update_top_container(tuto.title, tuto.slug, 'osef', 'osef') second_publication = publish_content(tuto, versioned, True) tuto.sha_public = versioned.current_version tuto.sha_draft = versioned.current_version tuto.public_version = second_publication tuto.save() self.assertEqual(PublishedContent.objects.count(), 2) # now there is two objects ... first_publication = PublishedContent.objects.get(pk=first_publication.pk) self.assertTrue(first_publication.must_redirect) # .. including the first one, for redirection self.manager.refresh_index() s = Search() s.query(MatchAll()) results = self.manager.setup_search(s).execute() self.assertEqual(len(results), 0) # the old one is gone (and we need to reindex to get the new one) # 3. Check if indexation brings the new one, and not the old one self.manager.es_bulk_indexing_of_model(PublishedContent, force_reindexing=True) # index self.manager.refresh_index() first_publication = PublishedContent.objects.get(pk=first_publication.pk) second_publication = PublishedContent.objects.get(pk=second_publication.pk) s = Search() s.query(MatchAll()) results = self.manager.setup_search(s).execute() self.assertEqual(len(results), 2) # Still 2, not 4 ! found_old = False found_new = False for hit in results: if hit.meta.doc_type == PublishedContent.get_es_document_type(): if hit.meta.id == first_publication.es_id: found_old = True if hit.meta.id == second_publication.es_id: found_new = True self.assertTrue(found_new) self.assertFalse(found_old) def tearDown(self): super().tearDown() # delete index: self.manager.clear_es_index()
class Command(BaseCommand): help = "Index data in ES and manage them" index_manager = None models = get_django_indexable_objects() def __init__(self, *args, **kwargs): """Overridden because FakeChapter needs to be present for mapping. Also, its mapping needs to be defined before the one of PublishedContent for parenting reasons (!!!). """ super().__init__(*args, **kwargs) self.models.insert(0, FakeChapter) self.index_manager = ESIndexManager(**settings.ES_SEARCH_INDEX) if not self.index_manager.connected_to_es: raise Exception("Unable to connect to Elasticsearch, aborting.") def add_arguments(self, parser): parser.add_argument( "action", type=str, help="action to perform", choices=["setup", "clear", "index_all", "index_flagged"]) def handle(self, *args, **options): if options["action"] == "setup": self.setup_es() elif options["action"] == "clear": self.clear_es() elif options["action"] == "index_all": self.index_documents(force_reindexing=True) elif options["action"] == "index_flagged": self.index_documents(force_reindexing=False) else: raise CommandError("unknown action {}".format(options["action"])) def setup_es(self): self.index_manager.reset_es_index(self.models) self.index_manager.setup_custom_analyzer() self.index_manager.refresh_index() def clear_es(self): self.index_manager.clear_es_index() for model in self.models: self.index_manager.clear_indexing_of_model(model) def index_documents(self, force_reindexing=False): if force_reindexing: self.setup_es() # remove all previous data for model in self.models: if model is FakeChapter: continue if force_reindexing: print(f"- indexing {model.get_es_document_type()}s") indexed_counter = self.index_manager.es_bulk_indexing_of_model( model, force_reindexing=force_reindexing) if force_reindexing: print(f" {indexed_counter}\titems indexed") self.index_manager.refresh_index()
class UtilsTests(TutorialTestMixin, TestCase): def setUp(self): settings.EMAIL_BACKEND = 'django.core.mail.backends.locmem.EmailBackend' self.mas = ProfileFactory().user settings.ZDS_APP['member']['bot_account'] = self.mas.username self.category, self.forum = create_category_and_forum() self.user = ProfileFactory().user self.staff = StaffProfileFactory().user self.index_manager = ESIndexManager(**settings.ES_SEARCH_INDEX) def test_es_manager(self): """Test the behavior of the ``es_manager`` command""" if not self.index_manager.connected_to_es: return # in the beginning: the void self.assertTrue(self.index_manager.index not in self.index_manager.es.cat.indices()) text = 'Ceci est un texte de test' # create a topic with a post topic = TopicFactory(forum=self.forum, author=self.user, title=text) post = PostFactory(topic=topic, author=self.user, position=1) post.text = post.text_html = text post.save() topic = Topic.objects.get(pk=topic.pk) post = Post.objects.get(pk=post.pk) self.assertFalse(topic.es_already_indexed) self.assertTrue(topic.es_flagged) self.assertFalse(post.es_already_indexed) self.assertTrue(post.es_flagged) # create a middle-tutorial and publish it tuto = PublishableContentFactory(type='TUTORIAL') tuto.authors.add(self.user) tuto.save() tuto_draft = tuto.load_version() chapter1 = ContainerFactory(parent=tuto_draft, db_object=tuto) chapter1.repo_update(text, text, text) extract1 = ExtractFactory(container=chapter1, db_object=tuto) version = extract1.repo_update(text, text) published = publish_content(tuto, tuto_draft, is_major_update=True) tuto.sha_public = version tuto.sha_draft = version tuto.public_version = published tuto.save() published = PublishedContent.objects.get(content_pk=tuto.pk) self.assertFalse(published.es_already_indexed) self.assertTrue(published.es_flagged) # 1. test "index-all" call_command('es_manager', 'index_all') self.assertTrue(self.index_manager.es.indices.exists(self.index_manager.index)) self.index_manager.index_exists = True topic = Topic.objects.get(pk=topic.pk) post = Post.objects.get(pk=post.pk) self.assertTrue(topic.es_already_indexed) self.assertFalse(topic.es_flagged) self.assertTrue(post.es_already_indexed) self.assertFalse(post.es_flagged) published = PublishedContent.objects.get(content_pk=tuto.pk) self.assertTrue(published.es_already_indexed) self.assertFalse(published.es_flagged) s = Search() s.query(MatchAll()) results = self.index_manager.setup_search(s).execute() self.assertEqual(len(results), 4) # get 4 results, one of each type must_contain = {'post': False, 'topic': False, 'publishedcontent': False, 'chapter': False} id_must_be = { 'post': str(post.pk), 'topic': str(topic.pk), 'publishedcontent': str(published.pk), 'chapter': tuto.slug + '__' + chapter1.slug } for hit in results: doc_type = hit.meta.doc_type must_contain[doc_type] = True self.assertEqual(hit.meta.id, id_must_be[doc_type]) self.assertTrue(all(must_contain)) # 2. test "clear" self.assertTrue(self.index_manager.index in self.index_manager.es.cat.indices()) # index in call_command('es_manager', 'clear') self.assertFalse(self.index_manager.es.indices.exists(self.index_manager.index)) self.index_manager.index_exists = False # must reset every object topic = Topic.objects.get(pk=topic.pk) post = Post.objects.get(pk=post.pk) self.assertFalse(topic.es_already_indexed) self.assertTrue(topic.es_flagged) self.assertFalse(post.es_already_indexed) self.assertTrue(post.es_flagged) published = PublishedContent.objects.get(content_pk=tuto.pk) self.assertFalse(published.es_already_indexed) self.assertTrue(published.es_flagged) self.assertTrue(self.index_manager.index not in self.index_manager.es.cat.indices()) # index wiped out ! # 3. test "setup" call_command('es_manager', 'setup') self.assertTrue(self.index_manager.es.indices.exists(self.index_manager.index)) self.index_manager.index_exists = True self.assertTrue(self.index_manager.index in self.index_manager.es.cat.indices()) # index back in ... s = Search() s.query(MatchAll()) results = self.index_manager.setup_search(s).execute() self.assertEqual(len(results), 0) # ... but with nothing in it result = self.index_manager.es.indices.get_settings(index=self.index_manager.index) settings_index = result[self.index_manager.index]['settings']['index'] self.assertTrue('analysis' in settings_index) # custom analyzer was setup # 4. test "index-flagged" once ... call_command('es_manager', 'index_flagged') topic = Topic.objects.get(pk=topic.pk) post = Post.objects.get(pk=post.pk) self.assertTrue(topic.es_already_indexed) self.assertFalse(topic.es_flagged) self.assertTrue(post.es_already_indexed) self.assertFalse(post.es_flagged) published = PublishedContent.objects.get(content_pk=tuto.pk) self.assertTrue(published.es_already_indexed) self.assertFalse(published.es_flagged) s = Search() s.query(MatchAll()) results = self.index_manager.setup_search(s).execute() self.assertEqual(len(results), 4) # get the 4 results back def tearDown(self): super().tearDown() # delete index: self.index_manager.clear_es_index()
class ViewsTests(TutorialTestMixin, TestCase): def setUp(self): settings.EMAIL_BACKEND = 'django.core.mail.backends.locmem.EmailBackend' self.mas = ProfileFactory().user settings.ZDS_APP['member']['bot_account'] = self.mas.username self.category, self.forum = create_category_and_forum() self.user = ProfileFactory().user self.staff = StaffProfileFactory().user self.manager = ESIndexManager(**settings.ES_SEARCH_INDEX) self.indexable = [FakeChapter, PublishedContent, Topic, Post] self.manager.reset_es_index(self.indexable) self.manager.setup_custom_analyzer() self.manager.refresh_index() def test_basic_search(self): """Basic search and filtering""" if not self.manager.connected_to_es: return # 1. Index and test search: text = 'test' topic_1 = TopicFactory(forum=self.forum, author=self.user, title=text) post_1 = PostFactory(topic=topic_1, author=self.user, position=1) post_1.text = post_1.text_html = text post_1.save() # create a middle-size content and publish it tuto = PublishableContentFactory(type='TUTORIAL') tuto_draft = tuto.load_version() tuto.title = text tuto.authors.add(self.user) tuto.save() tuto_draft.repo_update_top_container(text, tuto.slug, text, text) # change title to be sure it will match chapter1 = ContainerFactory(parent=tuto_draft, db_object=tuto) extract = ExtractFactory(container=chapter1, db_object=tuto) extract.repo_update(text, text) published = publish_content(tuto, tuto_draft, is_major_update=True) tuto.sha_public = tuto_draft.current_version tuto.sha_draft = tuto_draft.current_version tuto.public_version = published tuto.save() # nothing has been indexed yet: self.assertEqual(len(self.manager.setup_search(Search().query(MatchAll())).execute()), 0) # index for model in self.indexable: if model is FakeChapter: continue self.manager.es_bulk_indexing_of_model(model) self.manager.refresh_index() result = self.client.get(reverse('search:query') + '?q=' + text, follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 4) # get 4 results # 2. Test filtering: topic_1 = Topic.objects.get(pk=topic_1.pk) post_1 = Post.objects.get(pk=post_1.pk) published = PublishedContent.objects.get(pk=published.pk) ids = { 'topic': [topic_1.es_id], 'post': [post_1.es_id], 'content': [published.es_id, published.content_public_slug + '__' + chapter1.slug], } search_groups = [k for k, v in settings.ZDS_APP['search']['search_groups'].items()] group_to_model = {k: v[1] for k, v in settings.ZDS_APP['search']['search_groups'].items()} for doc_type in search_groups: result = self.client.get(reverse('search:query') + '?q=' + text + '&models=' + doc_type, follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, len(ids[doc_type])) # get 1 result of each … for i, r in enumerate(response): self.assertIn(r.meta.doc_type, group_to_model[doc_type]) # … and only of the right type … self.assertEqual(r.meta.id, ids[doc_type][i]) # … with the right id ! def test_get_similar_topics(self): """Get similar topics lists""" if not self.manager.connected_to_es: return text = 'Clem ne se mange pas' topic_1 = TopicFactory(forum=self.forum, author=self.user, title=text) post_1 = PostFactory(topic=topic_1, author=self.user, position=1) post_1.text = post_1.text_html = text post_1.save() text = 'Clem est la meilleure mascotte' topic_2 = TopicFactory(forum=self.forum, author=self.user, title=text) post_2 = PostFactory(topic=topic_2, author=self.user, position=1) post_2.text = post_1.text_html = text post_2.save() # 1. Should not get any result result = self.client.get(reverse('search:similar') + '?q=est', follow=False) self.assertEqual(result.status_code, 200) content = json_handler.loads(result.content.decode('utf-8')) self.assertEqual(len(content['results']), 0) # index for model in self.indexable: if model is FakeChapter: continue self.manager.es_bulk_indexing_of_model(model) self.manager.refresh_index() # 2. Should get exactly one result result = self.client.get(reverse('search:similar') + '?q=mange', follow=False) self.assertEqual(result.status_code, 200) content = json_handler.loads(result.content.decode('utf-8')) self.assertEqual(len(content['results']), 1) # 2. Should get exactly two results result = self.client.get(reverse('search:similar') + '?q=Clem', follow=False) self.assertEqual(result.status_code, 200) content = json_handler.loads(result.content.decode('utf-8')) self.assertEqual(len(content['results']), 2) def test_hidden_post_are_not_result(self): """Hidden posts should not show up in the search results""" if not self.manager.connected_to_es: return # 1. Index and test search: text = 'test' topic_1 = TopicFactory(forum=self.forum, author=self.user, title=text) post_1 = PostFactory(topic=topic_1, author=self.user, position=1) post_1.text = post_1.text_html = text post_1.save() self.manager.es_bulk_indexing_of_model(Topic) self.manager.es_bulk_indexing_of_model(Post) self.manager.refresh_index() self.assertEqual(len(self.manager.setup_search(Search().query(MatchAll())).execute()), 2) # indexing ok post_1 = Post.objects.get(pk=post_1.pk) result = self.client.get( reverse('search:query') + '?q=' + text + '&models=' + Post.get_es_document_type(), follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 1) self.assertEqual(response[0].meta.id, post_1.es_id) # 2. Hide, reindex and search again: post_1.hide_comment_by_user(self.staff, 'Un abus de pouvoir comme un autre ;)') self.manager.refresh_index() result = self.client.get( reverse('search:query') + '?q=' + text + '&models=' + Post.get_es_document_type(), follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 0) # nothing in the results def test_hidden_forums_give_no_results_if_user_not_allowed(self): """Long name, isn't ?""" if not self.manager.connected_to_es: return # 1. Create a hidden forum belonging to a hidden staff group. text = 'test' group = Group.objects.create(name='Les illuminatis anonymes de ZdS') _, hidden_forum = create_category_and_forum(group) self.staff.groups.add(group) self.staff.save() topic_1 = TopicFactory(forum=hidden_forum, author=self.staff, title=text) post_1 = PostFactory(topic=topic_1, author=self.user, position=1) post_1.text = post_1.text_html = text post_1.save() self.manager.es_bulk_indexing_of_model(Topic) self.manager.es_bulk_indexing_of_model(Post) self.manager.refresh_index() self.assertEqual(len(self.manager.setup_search(Search().query(MatchAll())).execute()), 2) # indexing ok # 2. search without connection and get not result result = self.client.get(reverse('search:query') + '?q=' + text, follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 0) # 3. Connect with user (not a member of the group), search, and get no result self.assertTrue(self.client.login(username=self.user.username, password='******')) result = self.client.get(reverse('search:query') + '?q=' + text, follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 0) # 4. Connect with staff, search, and get the topic and the post self.client.logout() self.assertTrue(self.client.login(username=self.staff.username, password='******')) result = self.client.get(reverse('search:query') + '?q=' + text, follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 2) # ok ! def test_boosts(self): """Check if boosts are doing their job""" if not self.manager.connected_to_es: return # 1. Create topics (with identical titles), posts (with identical texts), an article and a tuto text = 'test' topic_1_solved_sticky = TopicFactory(forum=self.forum, author=self.user) topic_1_solved_sticky.title = text topic_1_solved_sticky.subtitle = '' topic_1_solved_sticky.solved_by = self.user topic_1_solved_sticky.is_sticky = True topic_1_solved_sticky.save() post_1 = PostFactory(topic=topic_1_solved_sticky, author=self.user, position=1) post_1.text = post_1.text_html = text post_1.save() post_2_useful = PostFactory(topic=topic_1_solved_sticky, author=self.user, position=2) post_2_useful.text = post_2_useful.text_html = text post_2_useful.is_useful = True post_2_useful.like = 5 post_2_useful.dislike = 2 # l/d ratio above 1 post_2_useful.save() topic_2_locked = TopicFactory(forum=self.forum, author=self.user, title=text) topic_2_locked.title = text topic_2_locked.subtitle = '' topic_2_locked.is_locked = True topic_2_locked.save() post_3_ld_below_1 = PostFactory(topic=topic_2_locked, author=self.user, position=1) post_3_ld_below_1.text = post_3_ld_below_1.text_html = text post_3_ld_below_1.like = 2 post_3_ld_below_1.dislike = 5 # l/d ratio below 1 post_3_ld_below_1.save() tuto = PublishableContentFactory(type='TUTORIAL') tuto_draft = tuto.load_version() tuto.title = text tuto.authors.add(self.user) tuto.save() tuto_draft.repo_update_top_container(text, tuto.slug, text, text) chapter1 = ContainerFactory(parent=tuto_draft, db_object=tuto) chapter1.repo_update(text, 'Who cares ?', 'Same here') ExtractFactory(container=chapter1, db_object=tuto) published_tuto = publish_content(tuto, tuto_draft, is_major_update=True) tuto.sha_public = tuto_draft.current_version tuto.sha_draft = tuto_draft.current_version tuto.public_version = published_tuto tuto.save() article = PublishedContentFactory(type='ARTICLE', title=text) published_article = PublishedContent.objects.get(content_pk=article.pk) opinion_not_picked = PublishedContentFactory(type='OPINION', title=text) published_opinion_not_picked = PublishedContent.objects.get(content_pk=opinion_not_picked.pk) opinion_picked = PublishedContentFactory(type='OPINION', title=text) opinion_picked.sha_picked = opinion_picked.sha_draft opinion_picked.date_picked = datetime.datetime.now() opinion_picked.save() published_opinion_picked = PublishedContent.objects.get(content_pk=opinion_picked.pk) for model in self.indexable: if model is FakeChapter: continue self.manager.es_bulk_indexing_of_model(model) self.manager.refresh_index() self.assertEqual(len(self.manager.setup_search(Search().query(MatchAll())).execute()), 10) # 2. Reset all boosts to 1 for doc_type in settings.ZDS_APP['search']['boosts']: for key in settings.ZDS_APP['search']['boosts'][doc_type]: settings.ZDS_APP['search']['boosts'][doc_type][key] = 1.0 # 3. Test posts result = self.client.get( reverse('search:query') + '?q=' + text + '&models=' + Post.get_es_document_type(), follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 3) # score are equals without boost: self.assertTrue(response[0].meta.score == response[1].meta.score == response[2].meta.score) settings.ZDS_APP['search']['boosts']['post']['if_first'] = 2.0 result = self.client.get( reverse('search:query') + '?q=' + text + '&models=' + Post.get_es_document_type(), follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 3) self.assertTrue(response[0].meta.score == response[1].meta.score > response[2].meta.score) self.assertEqual(response[2].meta.id, str(post_2_useful.pk)) # post 2 is the only one not first settings.ZDS_APP['search']['boosts']['post']['if_first'] = 1.0 settings.ZDS_APP['search']['boosts']['post']['if_useful'] = 2.0 result = self.client.get( reverse('search:query') + '?q=' + text + '&models=' + Post.get_es_document_type(), follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 3) self.assertTrue(response[0].meta.score > response[1].meta.score == response[2].meta.score) self.assertEqual(response[0].meta.id, str(post_2_useful.pk)) # post 2 is useful settings.ZDS_APP['search']['boosts']['post']['if_useful'] = 1.0 settings.ZDS_APP['search']['boosts']['post']['ld_ratio_above_1'] = 2.0 result = self.client.get( reverse('search:query') + '?q=' + text + '&models=' + Post.get_es_document_type(), follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 3) self.assertTrue(response[0].meta.score > response[1].meta.score == response[2].meta.score) self.assertEqual(response[0].meta.id, str(post_2_useful.pk)) # post 2 have a l/d ratio of 5/2 settings.ZDS_APP['search']['boosts']['post']['ld_ratio_above_1'] = 1.0 settings.ZDS_APP['search']['boosts']['post']['ld_ratio_below_1'] = 2.0 # no one would do that in real life result = self.client.get( reverse('search:query') + '?q=' + text + '&models=' + Post.get_es_document_type(), follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 3) self.assertTrue(response[0].meta.score > response[1].meta.score == response[2].meta.score) self.assertEqual(response[0].meta.id, str(post_3_ld_below_1.pk)) # post 3 have a l/d ratio of 2/5 settings.ZDS_APP['search']['boosts']['post']['ld_ratio_below_1'] = 1.0 # 4. Test topics result = self.client.get( reverse('search:query') + '?q=' + text + '&models=' + Topic.get_es_document_type(), follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 2) # score are equals without boost: self.assertTrue(response[0].meta.score == response[1].meta.score) settings.ZDS_APP['search']['boosts']['topic']['if_sticky'] = 2.0 result = self.client.get( reverse('search:query') + '?q=' + text + '&models=' + Topic.get_es_document_type(), follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 2) self.assertTrue(response[0].meta.score > response[1].meta.score) self.assertEqual(response[0].meta.id, str(topic_1_solved_sticky.pk)) # topic 1 is sticky settings.ZDS_APP['search']['boosts']['topic']['if_sticky'] = 1.0 settings.ZDS_APP['search']['boosts']['topic']['if_solved'] = 2.0 result = self.client.get( reverse('search:query') + '?q=' + text + '&models=' + Topic.get_es_document_type(), follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 2) self.assertTrue(response[0].meta.score > response[1].meta.score) self.assertEqual(response[0].meta.id, str(topic_1_solved_sticky.pk)) # topic 1 is solved settings.ZDS_APP['search']['boosts']['topic']['if_solved'] = 1.0 settings.ZDS_APP['search']['boosts']['topic']['if_locked'] = 2.0 # no one would do that in real life result = self.client.get( reverse('search:query') + '?q=' + text + '&models=' + Topic.get_es_document_type(), follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 2) self.assertTrue(response[0].meta.score > response[1].meta.score) self.assertEqual(response[0].meta.id, str(topic_2_locked.pk)) # topic 2 is locked settings.ZDS_APP['search']['boosts']['topic']['if_locked'] = 1.0 # no one would do that in real life # 5. Test published contents result = self.client.get( reverse('search:query') + '?q=' + text + '&models=content', follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 5) # score are equals without boost: self.assertTrue(response[0].meta.score == response[1].meta.score == response[2].meta.score == response[3].meta.score == response[4].meta.score) settings.ZDS_APP['search']['boosts']['publishedcontent']['if_article'] = 2.0 result = self.client.get( reverse('search:query') + '?q=' + text + '&models=content', follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 5) self.assertTrue(response[0].meta.score > response[1].meta.score) self.assertEqual(response[0].meta.id, str(published_article.pk)) # obvious settings.ZDS_APP['search']['boosts']['publishedcontent']['if_article'] = 1.0 settings.ZDS_APP['search']['boosts']['publishedcontent']['if_tutorial'] = 2.0 result = self.client.get( reverse('search:query') + '?q=' + text + '&models=content', follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 5) self.assertTrue(response[0].meta.score > response[1].meta.score) self.assertEqual(response[0].meta.id, str(published_tuto.pk)) # obvious settings.ZDS_APP['search']['boosts']['publishedcontent']['if_tutorial'] = 1.0 settings.ZDS_APP['search']['boosts']['publishedcontent']['if_opinion'] = 2.0 settings.ZDS_APP['search']['boosts']['publishedcontent']['if_opinion_not_picked'] = 4.0 # Note: in "real life", unpicked opinion would get a boost < 1. result = self.client.get( reverse('search:query') + '?q=' + text + '&models=content', follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 5) self.assertTrue(response[0].meta.score > response[1].meta.score > response[2].meta.score) self.assertEqual(response[0].meta.id, str(published_opinion_not_picked.pk)) # unpicked opinion got first self.assertEqual(response[1].meta.id, str(published_opinion_picked.pk)) settings.ZDS_APP['search']['boosts']['publishedcontent']['if_opinion'] = 1.0 settings.ZDS_APP['search']['boosts']['publishedcontent']['if_opinion_not_picked'] = 1.0 settings.ZDS_APP['search']['boosts']['publishedcontent']['if_medium_or_big_tutorial'] = 2.0 result = self.client.get( reverse('search:query') + '?q=' + text + '&models=content', follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 5) self.assertTrue(response[0].meta.score > response[1].meta.score) self.assertEqual(response[0].meta.id, str(published_tuto.pk)) # obvious settings.ZDS_APP['search']['boosts']['publishedcontent']['if_medium_or_big_tutorial'] = 1.0 # 6. Test global boosts # NOTE: score are NOT the same for all documents, no matter how hard it tries to, small differences exists for model in self.indexable: # set a huge number to overcome the small differences: settings.ZDS_APP['search']['boosts'][model.get_es_document_type()]['global'] = 10.0 result = self.client.get( reverse('search:query') + '?q=' + text, follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 10) self.assertEqual(response[0].meta.doc_type, model.get_es_document_type()) # obvious settings.ZDS_APP['search']['boosts'][model.get_es_document_type()]['global'] = 1.0 def test_change_topic_impacts_posts(self): if not self.manager.connected_to_es: return # 1. Create a hidden forum belonging to a hidden group and add staff in it. text = 'test' group = Group.objects.create(name='Les illuminatis anonymes de ZdS') _, hidden_forum = create_category_and_forum(group) self.staff.groups.add(group) self.staff.save() # 2. Create a normal topic and index it topic_1 = TopicFactory(forum=self.forum, author=self.user, title=text) post_1 = PostFactory(topic=topic_1, author=self.user, position=1) post_1.text = post_1.text_html = text post_1.save() self.manager.es_bulk_indexing_of_model(Topic) self.manager.es_bulk_indexing_of_model(Post) self.manager.refresh_index() self.assertEqual(len(self.manager.setup_search(Search().query(MatchAll())).execute()), 2) # indexing ok result = self.client.get( reverse('search:query') + '?q=' + text + '&models=' + Post.get_es_document_type(), follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 1) # ok self.assertEqual(response[0].meta.doc_type, Post.get_es_document_type()) self.assertEqual(response[0].forum_pk, self.forum.pk) self.assertEqual(response[0].topic_pk, topic_1.pk) self.assertEqual(response[0].topic_title, topic_1.title) # 3. Change topic title and reindex topic_1.title = 'new title' topic_1.save() self.manager.es_bulk_indexing_of_model(Topic) self.manager.es_bulk_indexing_of_model(Post) self.manager.refresh_index() result = self.client.get( reverse('search:query') + '?q=' + text + '&models=' + Post.get_es_document_type(), follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 1) # ok self.assertEqual(response[0].topic_title, topic_1.title) # title was changed # 4. connect with staff and move topic self.assertTrue(self.client.login(username=self.staff.username, password='******')) data = { 'move': '', 'forum': hidden_forum.pk, 'topic': topic_1.pk } response = self.client.post(reverse('topic-edit'), data, follow=False) self.assertEqual(302, response.status_code) self.manager.es_bulk_indexing_of_model(Topic) self.manager.es_bulk_indexing_of_model(Post) self.manager.refresh_index() result = self.client.get( reverse('search:query') + '?q=' + text + '&models=' + Post.get_es_document_type(), follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 1) # Note: without staff, would not get any results (see below) self.assertEqual(response[0].forum_pk, hidden_forum.pk) # post was updated with new forum # 5. Topic is now hidden self.client.logout() result = self.client.get( reverse('search:query') + '?q=' + text + '&models=' + Post.get_es_document_type(), follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 0) # ok def test_change_publishedcontents_impacts_chapter(self): if not self.manager.connected_to_es: return # 1. Create middle-size content and index it text = 'test' tuto = PublishableContentFactory(type='TUTORIAL') tuto_draft = tuto.load_version() tuto.title = text tuto.authors.add(self.user) tuto.save() tuto_draft.repo_update_top_container(text, tuto.slug, text, text) # change title to be sure it will match chapter1 = ContainerFactory(parent=tuto_draft, db_object=tuto) chapter1.repo_update(text, text, text) extract = ExtractFactory(container=chapter1, db_object=tuto) extract.repo_update(text, text) published = publish_content(tuto, tuto_draft, is_major_update=True) tuto.sha_public = tuto_draft.current_version tuto.sha_draft = tuto_draft.current_version tuto.public_version = published tuto.save() self.manager.es_bulk_indexing_of_model(PublishedContent) self.manager.refresh_index() self.assertEqual(len(self.manager.setup_search(Search().query(MatchAll())).execute()), 2) # indexing ok result = self.client.get( reverse('search:query') + '?q=' + text + '&models=content', follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 2) chapters = [r for r in response if r.meta.doc_type == 'chapter'] self.assertEqual(chapters[0].meta.doc_type, FakeChapter.get_es_document_type()) self.assertEqual(chapters[0].meta.id, published.content_public_slug + '__' + chapter1.slug) # 2. Change tuto: delete chapter and insert new one ! tuto = PublishableContent.objects.get(pk=tuto.pk) tuto_draft = tuto.load_version() tuto_draft.children[0].repo_delete() # chapter 1 is gone ! another_text = 'another thing' self.assertTrue(text not in another_text) # to prevent a future modification from breaking this test chapter2 = ContainerFactory(parent=tuto_draft, db_object=tuto) chapter2.repo_update(another_text, another_text, another_text) extract2 = ExtractFactory(container=chapter2, db_object=tuto) extract2.repo_update(another_text, another_text) published = publish_content(tuto, tuto_draft, is_major_update=False) tuto.sha_public = tuto_draft.current_version tuto.sha_draft = tuto_draft.current_version tuto.public_version = published tuto.save() self.manager.es_bulk_indexing_of_model(PublishedContent) self.manager.refresh_index() self.assertEqual(len(self.manager.setup_search(Search().query(MatchAll())).execute()), 2) # 2 objects, not 3 ! result = self.client.get( reverse('search:query') + '?q=' + text + '&models=content', follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() contents = [r for r in response if r.meta.doc_type != 'chapter'] self.assertEqual(response.hits.total, len(contents)) # no chapter found anymore result = self.client.get( reverse('search:query') + '?q=' + another_text + '&models=content', follow=False ) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() chapters = [r for r in response if r.meta.doc_type == 'chapter'] self.assertEqual(response.hits.total, 1) self.assertEqual(chapters[0].meta.doc_type, FakeChapter.get_es_document_type()) self.assertEqual(chapters[0].meta.id, published.content_public_slug + '__' + chapter2.slug) # got new chapter def test_opensearch(self): result = self.client.get( reverse('search:opensearch'), follow=False ) self.assertEqual(result.status_code, 200) self.assertContains(result, reverse('search:query')) self.assertContains(result, reverse('search:opensearch')) def test_upercase_and_lowercase_search_give_same_results(self): """Pretty self-explanatory function name, isn't it ?""" if not self.manager.connected_to_es: return # 1. Index lowercase stuffs text_lc = 'test' topic_1_lc = TopicFactory(forum=self.forum, author=self.user, title=text_lc) tag_lc = TagFactory(title=text_lc) topic_1_lc.tags.add(tag_lc) topic_1_lc.subtitle = text_lc topic_1_lc.save() post_1_lc = PostFactory(topic=topic_1_lc, author=self.user, position=1) post_1_lc.text = post_1_lc.text_html = text_lc post_1_lc.save() tuto_lc = PublishableContentFactory(type='TUTORIAL') tuto_draft_lc = tuto_lc.load_version() tuto_lc.title = text_lc tuto_lc.authors.add(self.user) subcategory_lc = SubCategoryFactory(title=text_lc) tuto_lc.subcategory.add(subcategory_lc) tuto_lc.tags.add(tag_lc) tuto_lc.save() tuto_draft_lc.description = text_lc tuto_draft_lc.repo_update_top_container(text_lc, tuto_lc.slug, text_lc, text_lc) chapter1_lc = ContainerFactory(parent=tuto_draft_lc, db_object=tuto_lc) extract_lc = ExtractFactory(container=chapter1_lc, db_object=tuto_lc) extract_lc.repo_update(text_lc, text_lc) published_lc = publish_content(tuto_lc, tuto_draft_lc, is_major_update=True) tuto_lc.sha_public = tuto_draft_lc.current_version tuto_lc.sha_draft = tuto_draft_lc.current_version tuto_lc.public_version = published_lc tuto_lc.save() # 2. Index uppercase stuffs text_uc = 'TEST' topic_1_uc = TopicFactory(forum=self.forum, author=self.user, title=text_uc) topic_1_uc.tags.add(tag_lc) # Note: a constraint forces tags title to be unique topic_1_uc.subtitle = text_uc topic_1_uc.save() post_1_uc = PostFactory(topic=topic_1_uc, author=self.user, position=1) post_1_uc.text = post_1_uc.text_html = text_uc post_1_uc.save() tuto_uc = PublishableContentFactory(type='TUTORIAL') tuto_draft_uc = tuto_uc.load_version() tuto_uc.title = text_uc tuto_uc.authors.add(self.user) tuto_uc.subcategory.add(subcategory_lc) tuto_uc.tags.add(tag_lc) tuto_uc.save() tuto_draft_uc.description = text_uc tuto_draft_uc.repo_update_top_container(text_uc, tuto_uc.slug, text_uc, text_uc) chapter1_uc = ContainerFactory(parent=tuto_draft_uc, db_object=tuto_uc) extract_uc = ExtractFactory(container=chapter1_uc, db_object=tuto_uc) extract_uc.repo_update(text_uc, text_uc) published_uc = publish_content(tuto_uc, tuto_draft_uc, is_major_update=True) tuto_uc.sha_public = tuto_draft_uc.current_version tuto_uc.sha_draft = tuto_draft_uc.current_version tuto_uc.public_version = published_uc tuto_uc.save() # 3. Index and search: self.assertEqual(len(self.manager.setup_search(Search().query(MatchAll())).execute()), 0) # index for model in self.indexable: if model is FakeChapter: continue self.manager.es_bulk_indexing_of_model(model) self.manager.refresh_index() result = self.client.get(reverse('search:query') + '?q=' + text_lc, follow=False) self.assertEqual(result.status_code, 200) response_lc = result.context['object_list'].execute() self.assertEqual(response_lc.hits.total, 8) result = self.client.get(reverse('search:query') + '?q=' + text_uc, follow=False) self.assertEqual(result.status_code, 200) response_uc = result.context['object_list'].execute() self.assertEqual(response_uc.hits.total, 8) for responses in zip(response_lc, response_uc): # we should get results in the same order! self.assertEqual(responses[0].meta.id, responses[1].meta.id) def test_category_and_subcategory_impact_search(self): """If two contents do not belong to the same (sub)category""" if not self.manager.connected_to_es: return text = 'Did you ever hear the tragedy of Darth Plagueis The Wise?' # 1. Create two contents with different subcategories category_1 = 'category 1' subcategory_1 = SubCategoryFactory(title=category_1) category_2 = 'category 2' subcategory_2 = SubCategoryFactory(title=category_2) tuto_1 = PublishableContentFactory(type='TUTORIAL') tuto_1_draft = tuto_1.load_version() tuto_1.title = text tuto_1.authors.add(self.user) tuto_1.subcategory.add(subcategory_1) tuto_1.save() tuto_1_draft.description = text tuto_1_draft.repo_update_top_container(text, tuto_1.slug, text, text) chapter_1 = ContainerFactory(parent=tuto_1_draft, db_object=tuto_1) extract_1 = ExtractFactory(container=chapter_1, db_object=tuto_1) extract_1.repo_update(text, text) published_1 = publish_content(tuto_1, tuto_1_draft, is_major_update=True) tuto_1.sha_public = tuto_1_draft.current_version tuto_1.sha_draft = tuto_1_draft.current_version tuto_1.public_version = published_1 tuto_1.save() tuto_2 = PublishableContentFactory(type='TUTORIAL') tuto_2_draft = tuto_2.load_version() tuto_2.title = text tuto_2.authors.add(self.user) tuto_2.subcategory.add(subcategory_2) tuto_2.save() tuto_2_draft.description = text tuto_2_draft.repo_update_top_container(text, tuto_2.slug, text, text) chapter_2 = ContainerFactory(parent=tuto_2_draft, db_object=tuto_2) extract_2 = ExtractFactory(container=chapter_2, db_object=tuto_2) extract_2.repo_update(text, text) published_2 = publish_content(tuto_2, tuto_2_draft, is_major_update=True) tuto_2.sha_public = tuto_2_draft.current_version tuto_2.sha_draft = tuto_2_draft.current_version tuto_2.public_version = published_2 tuto_2.save() # 2. Index: self.assertEqual(len(self.manager.setup_search(Search().query(MatchAll())).execute()), 0) # index for model in self.indexable: if model is FakeChapter: continue self.manager.es_bulk_indexing_of_model(model) self.manager.refresh_index() result = self.client.get(reverse('search:query') + '?q=' + text, follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 4) # Ok # 3. Test result = self.client.get( reverse('search:query') + '?q=' + text + '&model=content&subcategory=' + subcategory_1.slug, follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 2) self.assertEqual([int(r.meta.id) for r in response if r.meta.doc_type == 'publishedcontent'][0], published_1.pk) self.assertEqual( [r.meta.id for r in response if r.meta.doc_type == 'chapter'][0], tuto_1.slug + '__' + chapter_1.slug) result = self.client.get( reverse('search:query') + '?q=' + text + '&model=content&subcategory=' + subcategory_2.slug, follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 2) self.assertEqual([int(r.meta.id) for r in response if r.meta.doc_type == 'publishedcontent'][0], published_2.pk) self.assertEqual( [r.meta.id for r in response if r.meta.doc_type == 'chapter'][0], tuto_2.slug + '__' + chapter_2.slug) def tearDown(self): super().tearDown() # delete index: self.manager.clear_es_index()
class Command(BaseCommand): help = 'Index data in ES and manage them' index_manager = None models = get_django_indexable_objects() def __init__(self, *args, **kwargs): """Overridden because FakeChapter needs to be present for mapping. Also, its mapping needs to be defined before the one of PublishedContent for parenting reasons (!!!). """ super(Command, self).__init__(*args, **kwargs) self.models.insert(0, FakeChapter) self.index_manager = ESIndexManager(**settings.ES_SEARCH_INDEX) if not self.index_manager.connected_to_es: raise Exception('Unable to connect to Elasticsearch, aborting.') def add_arguments(self, parser): parser.add_argument( 'action', type=str, help='action to perform', choices=['setup', 'clear', 'index_all', 'index_flagged']) def handle(self, *args, **options): if options['action'] == 'setup': self.setup_es() elif options['action'] == 'clear': self.clear_es() elif options['action'] == 'index_all': self.index_documents(force_reindexing=True) elif options['action'] == 'index_flagged': self.index_documents(force_reindexing=False) else: raise CommandError('unknown action {}'.format(options['action'])) def setup_es(self): self.index_manager.reset_es_index(self.models) self.index_manager.setup_custom_analyzer() self.index_manager.refresh_index() def clear_es(self): self.index_manager.clear_es_index() for model in self.models: self.index_manager.clear_indexing_of_model(model) def index_documents(self, force_reindexing=False): if force_reindexing: self.setup_es() # remove all previous data for model in self.models: if model is FakeChapter: continue if force_reindexing: print(('- indexing {}s'.format(model.get_es_document_type()))) indexed_counter = self.index_manager.es_bulk_indexing_of_model(model, force_reindexing=force_reindexing) if force_reindexing: print((' {}\titems indexed'.format(indexed_counter))) self.index_manager.refresh_index()
class SearchView(ZdSPagingListView): """Search view.""" template_name = 'searchv2/search.html' paginate_by = settings.ZDS_APP['search']['results_per_page'] search_form_class = SearchForm search_form = None search_query = None content_category = None content_subcategory = None authorized_forums = '' index_manager = None def __init__(self, **kwargs): """Overridden because the index manager must NOT be initialized elsewhere.""" super(SearchView, self).__init__(**kwargs) self.index_manager = ESIndexManager(**settings.ES_SEARCH_INDEX) def get(self, request, *args, **kwargs): """Overridden to catch the request and fill the form.""" if 'q' in request.GET: self.search_query = ''.join(request.GET['q']) self.search_form = self.search_form_class(data=self.request.GET) if self.search_query and not self.search_form.is_valid(): raise PermissionDenied('research form is invalid') return super(SearchView, self).get(request, *args, **kwargs) def get_queryset(self): if not self.index_manager.connected_to_es: messages.warning(self.request, _('Impossible de se connecter à Elasticsearch')) return [] if self.search_query: # Searches forums the user is allowed to visit self.authorized_forums = get_authorized_forums(self.request.user) search_queryset = Search() # Restrict (sub)category if any if self.search_form.cleaned_data['category']: self.content_category = self.search_form.cleaned_data[ 'category'] if self.search_form.cleaned_data['subcategory']: self.content_subcategory = self.search_form.cleaned_data[ 'subcategory'] # Mark that contents must come from library if required self.from_library = False if self.search_form.cleaned_data['from_library'] == 'on': self.from_library = True # Setting the different querysets (according to the selected models, if any) part_querysets = [] chosen_groups = self.search_form.cleaned_data['models'] if chosen_groups: models = [] for group in chosen_groups: if group in settings.ZDS_APP['search']['search_groups']: models.append(settings.ZDS_APP['search'] ['search_groups'][group][1]) else: models = [ v[1] for k, v in settings.ZDS_APP['search'] ['search_groups'].items() ] models = reduce(operator.concat, models) for model in models: part_querysets.append( getattr(self, 'get_queryset_{}s'.format(model))()) queryset = part_querysets[0] for query in part_querysets[1:]: queryset |= query # Weighting: weight_functions = [] for _type, weights in list( settings.ZDS_APP['search']['boosts'].items()): if _type in models: weight_functions.append({ 'filter': Match(_type=_type), 'weight': weights['global'] }) scored_queryset = FunctionScore(query=queryset, boost_mode='multiply', functions=weight_functions) search_queryset = search_queryset.query(scored_queryset) # Highlighting: search_queryset = search_queryset.highlight_options( fragment_size=150, number_of_fragments=5, pre_tags=['[hl]'], post_tags=['[/hl]']) search_queryset = search_queryset.highlight('text').highlight( 'text_html') # Executing: return self.index_manager.setup_search(search_queryset) return [] def get_queryset_publishedcontents(self): """Search in PublishedContent objects.""" query = Match(_type='publishedcontent') \ & MultiMatch( query=self.search_query, fields=['title', 'description', 'categories', 'subcategories', 'tags', 'text']) if self.from_library: query &= Match(content_type='TUTORIAL') | Match( content_type='ARTICLE') if self.content_category: query &= Match(categories=self.content_category) if self.content_subcategory: query &= Match(subcategories=self.content_subcategory) functions_score = [ { 'filter': Match(content_type='TUTORIAL'), 'weight': settings.ZDS_APP['search']['boosts']['publishedcontent'] ['if_tutorial'] }, { 'filter': Match(content_type='TUTORIAL') & Match(has_chapters=True), 'weight': settings.ZDS_APP['search']['boosts']['publishedcontent'] ['if_medium_or_big_tutorial'] }, { 'filter': Match(content_type='ARTICLE'), 'weight': settings.ZDS_APP['search']['boosts']['publishedcontent'] ['if_article'] }, { 'filter': Match(content_type='OPINION'), 'weight': settings.ZDS_APP['search']['boosts']['publishedcontent'] ['if_opinion'] }, { 'filter': Match(content_type='OPINION') & Match(picked=False), 'weight': settings.ZDS_APP['search']['boosts']['publishedcontent'] ['if_opinion_not_picked'] }, ] scored_query = FunctionScore(query=query, boost_mode='multiply', functions=functions_score) return scored_query def get_queryset_chapters(self): """Search in content chapters.""" query = Match(_type='chapter') \ & MultiMatch(query=self.search_query, fields=['title', 'text']) if self.content_category: query &= Match(categories=self.content_category) if self.content_subcategory: query &= Match(subcategories=self.content_subcategory) return query def get_queryset_topics(self): """Search in topics, and remove the result if the forum is not allowed for the user. Score is modified if: + topic is solved; + topic is sticky; + topic is locked. """ query = Match(_type='topic') \ & Terms(forum_pk=self.authorized_forums) \ & MultiMatch(query=self.search_query, fields=['title', 'subtitle', 'tags']) functions_score = [{ 'filter': Match(is_solved=True), 'weight': settings.ZDS_APP['search']['boosts']['topic']['if_solved'] }, { 'filter': Match(is_sticky=True), 'weight': settings.ZDS_APP['search']['boosts']['topic']['if_sticky'] }, { 'filter': Match(is_locked=True), 'weight': settings.ZDS_APP['search']['boosts']['topic']['if_locked'] }] scored_query = FunctionScore(query=query, boost_mode='multiply', functions=functions_score) return scored_query def get_queryset_posts(self): """Search in posts, and remove result if the forum is not allowed for the user or if the message is invisible. Score is modified if: + post is the first one in a topic; + post is marked as "useful"; + post has a like/dislike ratio above (has more likes than dislikes) or below (the other way around) 1.0. """ query = Match(_type='post') \ & Terms(forum_pk=self.authorized_forums) \ & Term(is_visible=True) \ & MultiMatch(query=self.search_query, fields=['text_html']) functions_score = [{ 'filter': Match(position=1), 'weight': settings.ZDS_APP['search']['boosts']['post']['if_first'] }, { 'filter': Match(is_useful=True), 'weight': settings.ZDS_APP['search']['boosts']['post']['if_useful'] }, { 'filter': Range(like_dislike_ratio={'gt': 1}), 'weight': settings.ZDS_APP['search']['boosts']['post']['ld_ratio_above_1'] }, { 'filter': Range(like_dislike_ratio={'lt': 1}), 'weight': settings.ZDS_APP['search']['boosts']['post']['ld_ratio_below_1'] }] scored_query = FunctionScore(query=query, boost_mode='multiply', functions=functions_score) return scored_query def get_context_data(self, **kwargs): context = super(SearchView, self).get_context_data(**kwargs) context['form'] = self.search_form context['query'] = self.search_query is not None return context
def __init__(self, **kwargs): """Overridden because the index manager must NOT be initialized elsewhere.""" super(SimilarTopicsView, self).__init__(**kwargs) self.index_manager = ESIndexManager(**settings.ES_SEARCH_INDEX)
def __init__(self, **kwargs): """Overridden because the index manager must NOT be initialized elsewhere.""" super(SearchView, self).__init__(**kwargs) self.index_manager = ESIndexManager(**settings.ES_SEARCH_INDEX)
class SearchView(ZdSPagingListView): """Search view.""" template_name = "searchv2/search.html" paginate_by = settings.ZDS_APP["search"]["results_per_page"] search_form_class = SearchForm search_form = None search_query = None content_category = None content_subcategory = None authorized_forums = "" index_manager = None def __init__(self, **kwargs): """Overridden because the index manager must NOT be initialized elsewhere.""" super().__init__(**kwargs) self.index_manager = ESIndexManager(**settings.ES_SEARCH_INDEX) def get(self, request, *args, **kwargs): """Overridden to catch the request and fill the form.""" if "q" in request.GET: self.search_query = "".join(request.GET["q"]) self.search_form = self.search_form_class(data=self.request.GET) if self.search_query and not self.search_form.is_valid(): raise PermissionDenied("research form is invalid") return super().get(request, *args, **kwargs) def get_queryset(self): if not self.index_manager.connected_to_es: messages.warning(self.request, _("Impossible de se connecter à Elasticsearch")) return [] if self.search_query: # Searches forums the user is allowed to visit self.authorized_forums = get_authorized_forums(self.request.user) search_queryset = Search() # Restrict (sub)category if any if self.search_form.cleaned_data["category"]: self.content_category = self.search_form.cleaned_data[ "category"] if self.search_form.cleaned_data["subcategory"]: self.content_subcategory = self.search_form.cleaned_data[ "subcategory"] # Mark that contents must come from library if required self.from_library = False if self.search_form.cleaned_data["from_library"] == "on": self.from_library = True # Setting the different querysets (according to the selected models, if any) part_querysets = [] chosen_groups = self.search_form.cleaned_data["models"] if chosen_groups: models = [] for group in chosen_groups: if group in settings.ZDS_APP["search"]["search_groups"]: models.append(settings.ZDS_APP["search"] ["search_groups"][group][1]) else: models = [ v[1] for k, v in settings.ZDS_APP["search"] ["search_groups"].items() ] models = reduce(operator.concat, models) for model in models: part_querysets.append( getattr(self, f"get_queryset_{model}s")()) queryset = part_querysets[0] for query in part_querysets[1:]: queryset |= query # Weighting: weight_functions = [] for _type, weights in list( settings.ZDS_APP["search"]["boosts"].items()): if _type in models: weight_functions.append({ "filter": Match(_type=_type), "weight": weights["global"] }) scored_queryset = FunctionScore(query=queryset, boost_mode="multiply", functions=weight_functions) search_queryset = search_queryset.query(scored_queryset) # Highlighting: search_queryset = search_queryset.highlight_options( fragment_size=150, number_of_fragments=5, pre_tags=["[hl]"], post_tags=["[/hl]"]) search_queryset = search_queryset.highlight("text").highlight( "text_html") # Executing: return self.index_manager.setup_search(search_queryset) return [] def get_queryset_publishedcontents(self): """Search in PublishedContent objects.""" query = Match(_type="publishedcontent") & MultiMatch( query=self.search_query, fields=[ "title", "description", "categories", "subcategories", "tags", "text" ]) if self.from_library: query &= Match(content_type="TUTORIAL") | Match( content_type="ARTICLE") if self.content_category: query &= Match(categories=self.content_category) if self.content_subcategory: query &= Match(subcategories=self.content_subcategory) functions_score = [ { "filter": Match(content_type="TUTORIAL"), "weight": settings.ZDS_APP["search"]["boosts"]["publishedcontent"] ["if_tutorial"], }, { "filter": Match(content_type="TUTORIAL") & Match(has_chapters=True), "weight": settings.ZDS_APP["search"]["boosts"]["publishedcontent"] ["if_medium_or_big_tutorial"], }, { "filter": Match(content_type="ARTICLE"), "weight": settings.ZDS_APP["search"]["boosts"]["publishedcontent"] ["if_article"], }, { "filter": Match(content_type="OPINION"), "weight": settings.ZDS_APP["search"]["boosts"]["publishedcontent"] ["if_opinion"], }, { "filter": Match(content_type="OPINION") & Match(picked=False), "weight": settings.ZDS_APP["search"]["boosts"]["publishedcontent"] ["if_opinion_not_picked"], }, ] scored_query = FunctionScore(query=query, boost_mode="multiply", functions=functions_score) return scored_query def get_queryset_chapters(self): """Search in content chapters.""" query = Match(_type="chapter") & MultiMatch(query=self.search_query, fields=["title", "text"]) if self.content_category: query &= Match(categories=self.content_category) if self.content_subcategory: query &= Match(subcategories=self.content_subcategory) return query def get_queryset_topics(self): """Search in topics, and remove the result if the forum is not allowed for the user. Score is modified if: + topic is solved; + topic is sticky; + topic is locked. """ query = (Match(_type="topic") & Terms(forum_pk=self.authorized_forums) & MultiMatch(query=self.search_query, fields=["title", "subtitle", "tags"])) functions_score = [ { "filter": Match(is_solved=True), "weight": settings.ZDS_APP["search"]["boosts"]["topic"]["if_solved"] }, { "filter": Match(is_sticky=True), "weight": settings.ZDS_APP["search"]["boosts"]["topic"]["if_sticky"] }, { "filter": Match(is_locked=True), "weight": settings.ZDS_APP["search"]["boosts"]["topic"]["if_locked"] }, ] scored_query = FunctionScore(query=query, boost_mode="multiply", functions=functions_score) return scored_query def get_queryset_posts(self): """Search in posts, and remove result if the forum is not allowed for the user or if the message is invisible. Score is modified if: + post is the first one in a topic; + post is marked as "useful"; + post has a like/dislike ratio above (has more likes than dislikes) or below (the other way around) 1.0. """ query = (Match(_type="post") & Terms(forum_pk=self.authorized_forums) & Term(is_visible=True) & MultiMatch(query=self.search_query, fields=["text_html"])) functions_score = [ { "filter": Match(position=1), "weight": settings.ZDS_APP["search"]["boosts"]["post"]["if_first"] }, { "filter": Match(is_useful=True), "weight": settings.ZDS_APP["search"]["boosts"]["post"]["if_useful"] }, { "filter": Range(like_dislike_ratio={"gt": 1}), "weight": settings.ZDS_APP["search"]["boosts"]["post"] ["ld_ratio_above_1"], }, { "filter": Range(like_dislike_ratio={"lt": 1}), "weight": settings.ZDS_APP["search"]["boosts"]["post"] ["ld_ratio_below_1"], }, ] scored_query = FunctionScore(query=query, boost_mode="multiply", functions=functions_score) return scored_query def get_context_data(self, **kwargs): context = super().get_context_data(**kwargs) context["form"] = self.search_form context["query"] = self.search_query is not None return context
class UtilsTests(TutorialTestMixin, TestCase): def setUp(self): settings.EMAIL_BACKEND = "django.core.mail.backends.locmem.EmailBackend" self.mas = ProfileFactory().user settings.ZDS_APP["member"]["bot_account"] = self.mas.username self.category, self.forum = create_category_and_forum() self.user = ProfileFactory().user self.staff = StaffProfileFactory().user self.index_manager = ESIndexManager(**settings.ES_SEARCH_INDEX) def test_es_manager(self): """Test the behavior of the ``es_manager`` command""" if not self.index_manager.connected_to_es: return # in the beginning: the void self.assertTrue(self.index_manager.index not in self.index_manager.es.cat.indices()) text = "Ceci est un texte de test" # create a topic with a post topic = TopicFactory(forum=self.forum, author=self.user, title=text) post = PostFactory(topic=topic, author=self.user, position=1) post.text = post.text_html = text post.save() topic = Topic.objects.get(pk=topic.pk) post = Post.objects.get(pk=post.pk) self.assertFalse(topic.es_already_indexed) self.assertTrue(topic.es_flagged) self.assertFalse(post.es_already_indexed) self.assertTrue(post.es_flagged) # create a middle-tutorial and publish it tuto = PublishableContentFactory(type="TUTORIAL") tuto.authors.add(self.user) tuto.save() tuto_draft = tuto.load_version() chapter1 = ContainerFactory(parent=tuto_draft, db_object=tuto) chapter1.repo_update(text, text, text) extract1 = ExtractFactory(container=chapter1, db_object=tuto) version = extract1.repo_update(text, text) published = publish_content(tuto, tuto_draft, is_major_update=True) tuto.sha_public = version tuto.sha_draft = version tuto.public_version = published tuto.save() published = PublishedContent.objects.get(content_pk=tuto.pk) self.assertFalse(published.es_already_indexed) self.assertTrue(published.es_flagged) # 1. test "index-all" call_command("es_manager", "index_all") self.assertTrue(self.index_manager.es.indices.exists(self.index_manager.index)) self.index_manager.index_exists = True topic = Topic.objects.get(pk=topic.pk) post = Post.objects.get(pk=post.pk) self.assertTrue(topic.es_already_indexed) self.assertFalse(topic.es_flagged) self.assertTrue(post.es_already_indexed) self.assertFalse(post.es_flagged) published = PublishedContent.objects.get(content_pk=tuto.pk) self.assertTrue(published.es_already_indexed) self.assertFalse(published.es_flagged) s = Search() s.query(MatchAll()) results = self.index_manager.setup_search(s).execute() self.assertEqual(len(results), 4) # get 4 results, one of each type must_contain = {"post": False, "topic": False, "publishedcontent": False, "chapter": False} id_must_be = { "post": str(post.pk), "topic": str(topic.pk), "publishedcontent": str(published.pk), "chapter": tuto.slug + "__" + chapter1.slug, } for hit in results: doc_type = hit.meta.doc_type must_contain[doc_type] = True self.assertEqual(hit.meta.id, id_must_be[doc_type]) self.assertTrue(all(must_contain)) # 2. test "clear" self.assertTrue(self.index_manager.index in self.index_manager.es.cat.indices()) # index in call_command("es_manager", "clear") self.assertFalse(self.index_manager.es.indices.exists(self.index_manager.index)) self.index_manager.index_exists = False # must reset every object topic = Topic.objects.get(pk=topic.pk) post = Post.objects.get(pk=post.pk) self.assertFalse(topic.es_already_indexed) self.assertTrue(topic.es_flagged) self.assertFalse(post.es_already_indexed) self.assertTrue(post.es_flagged) published = PublishedContent.objects.get(content_pk=tuto.pk) self.assertFalse(published.es_already_indexed) self.assertTrue(published.es_flagged) self.assertTrue(self.index_manager.index not in self.index_manager.es.cat.indices()) # index wiped out ! # 3. test "setup" call_command("es_manager", "setup") self.assertTrue(self.index_manager.es.indices.exists(self.index_manager.index)) self.index_manager.index_exists = True self.assertTrue(self.index_manager.index in self.index_manager.es.cat.indices()) # index back in ... s = Search() s.query(MatchAll()) results = self.index_manager.setup_search(s).execute() self.assertEqual(len(results), 0) # ... but with nothing in it result = self.index_manager.es.indices.get_settings(index=self.index_manager.index) settings_index = result[self.index_manager.index]["settings"]["index"] self.assertTrue("analysis" in settings_index) # custom analyzer was setup # 4. test "index-flagged" once ... call_command("es_manager", "index_flagged") topic = Topic.objects.get(pk=topic.pk) post = Post.objects.get(pk=post.pk) self.assertTrue(topic.es_already_indexed) self.assertFalse(topic.es_flagged) self.assertTrue(post.es_already_indexed) self.assertFalse(post.es_flagged) published = PublishedContent.objects.get(content_pk=tuto.pk) self.assertTrue(published.es_already_indexed) self.assertFalse(published.es_flagged) s = Search() s.query(MatchAll()) results = self.index_manager.setup_search(s).execute() self.assertEqual(len(results), 4) # get the 4 results back def tearDown(self): super().tearDown() # delete index: self.index_manager.clear_es_index()
class ViewsTests(TestCase): def setUp(self): # don't build PDF to speed up the tests settings.ZDS_APP['content']['build_pdf_when_published'] = False settings.EMAIL_BACKEND = 'django.core.mail.backends.locmem.EmailBackend' self.mas = ProfileFactory().user settings.ZDS_APP['member']['bot_account'] = self.mas.username self.category, self.forum = create_category() self.user = ProfileFactory().user self.staff = StaffProfileFactory().user self.manager = ESIndexManager(**settings.ES_SEARCH_INDEX) self.indexable = [FakeChapter, PublishedContent, Topic, Post] self.manager.reset_es_index(self.indexable) self.manager.setup_custom_analyzer() self.manager.refresh_index() def test_basic_search(self): """Basic search and filtering""" if not self.manager.connected_to_es: return # 1. Index and test search: text = 'test' topic_1 = TopicFactory(forum=self.forum, author=self.user, title=text) post_1 = PostFactory(topic=topic_1, author=self.user, position=1) post_1.text = post_1.text_html = text post_1.save() # create a middle-size content and publish it tuto = PublishableContentFactory(type='TUTORIAL') tuto_draft = tuto.load_version() tuto.title = text tuto.authors.add(self.user) tuto.save() tuto_draft.repo_update_top_container( text, tuto.slug, text, text) # change title to be sure it will match chapter1 = ContainerFactory(parent=tuto_draft, db_object=tuto) extract = ExtractFactory(container=chapter1, db_object=tuto) extract.repo_update(text, text) published = publish_content(tuto, tuto_draft, is_major_update=True) tuto.sha_public = tuto_draft.current_version tuto.sha_draft = tuto_draft.current_version tuto.public_version = published tuto.save() # nothing has been indexed yet: self.assertEqual( len( self.manager.setup_search(Search().query( MatchAll())).execute()), 0) # index for model in self.indexable: if model is FakeChapter: continue self.manager.es_bulk_indexing_of_model(model) self.manager.refresh_index() result = self.client.get(reverse('search:query') + '?q=' + text, follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 4) # get 4 results # 2. Test filtering: topic_1 = Topic.objects.get(pk=topic_1.pk) post_1 = Post.objects.get(pk=post_1.pk) published = PublishedContent.objects.get(pk=published.pk) ids = { 'topic': [topic_1.es_id], 'post': [post_1.es_id], 'content': [ published.es_id, published.content_public_slug + '__' + chapter1.slug ], } search_groups = [ k for k, v in settings.ZDS_APP['search']['search_groups'].items() ] group_to_model = { k: v[1] for k, v in settings.ZDS_APP['search']['search_groups'].items() } for doc_type in search_groups: result = self.client.get(reverse('search:query') + '?q=' + text + '&models=' + doc_type, follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, len(ids[doc_type])) # get 1 result of each … for i, r in enumerate(response): self.assertIn( r.meta.doc_type, group_to_model[doc_type]) # … and only of the right type … self.assertEqual(r.meta.id, ids[doc_type][i]) # … with the right id ! def test_get_similar_topics(self): """Get similar topics lists""" if not self.manager.connected_to_es: return text = 'Clem ne se mange pas' topic_1 = TopicFactory(forum=self.forum, author=self.user, title=text) post_1 = PostFactory(topic=topic_1, author=self.user, position=1) post_1.text = post_1.text_html = text post_1.save() text = 'Clem est la meilleure mascotte' topic_2 = TopicFactory(forum=self.forum, author=self.user, title=text) post_2 = PostFactory(topic=topic_2, author=self.user, position=1) post_2.text = post_1.text_html = text post_2.save() # 1. Should not get any result result = self.client.get(reverse('search:similar') + '?q=est', follow=False) self.assertEqual(result.status_code, 200) content = json.loads(result.content.decode('utf-8')) self.assertEqual(len(content['results']), 0) # index for model in self.indexable: if model is FakeChapter: continue self.manager.es_bulk_indexing_of_model(model) self.manager.refresh_index() # 2. Should get exactly one result result = self.client.get(reverse('search:similar') + '?q=mange', follow=False) self.assertEqual(result.status_code, 200) content = json.loads(result.content.decode('utf-8')) self.assertEqual(len(content['results']), 1) # 2. Should get exactly two results result = self.client.get(reverse('search:similar') + '?q=Clem', follow=False) self.assertEqual(result.status_code, 200) content = json.loads(result.content.decode('utf-8')) self.assertEqual(len(content['results']), 2) def test_hidden_post_are_not_result(self): """Hidden posts should not show up in the search results""" if not self.manager.connected_to_es: return # 1. Index and test search: text = 'test' topic_1 = TopicFactory(forum=self.forum, author=self.user, title=text) post_1 = PostFactory(topic=topic_1, author=self.user, position=1) post_1.text = post_1.text_html = text post_1.save() self.manager.es_bulk_indexing_of_model(Topic) self.manager.es_bulk_indexing_of_model(Post) self.manager.refresh_index() self.assertEqual( len( self.manager.setup_search(Search().query( MatchAll())).execute()), 2) # indexing ok post_1 = Post.objects.get(pk=post_1.pk) result = self.client.get(reverse('search:query') + '?q=' + text + '&models=' + Post.get_es_document_type(), follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 1) self.assertEqual(response[0].meta.id, post_1.es_id) # 2. Hide, reindex and search again: post_1.hide_comment_by_user(self.staff, 'Un abus de pouvoir comme un autre ;)') self.manager.refresh_index() result = self.client.get(reverse('search:query') + '?q=' + text + '&models=' + Post.get_es_document_type(), follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 0) # nothing in the results def test_hidden_forums_give_no_results_if_user_not_allowed(self): """Long name, isn't ?""" if not self.manager.connected_to_es: return # 1. Create a hidden forum belonging to a hidden staff group. text = 'test' group = Group.objects.create(name='Les illuminatis anonymes de ZdS') _, hidden_forum = create_category(group) self.staff.groups.add(group) self.staff.save() topic_1 = TopicFactory(forum=hidden_forum, author=self.staff, title=text) post_1 = PostFactory(topic=topic_1, author=self.user, position=1) post_1.text = post_1.text_html = text post_1.save() self.manager.es_bulk_indexing_of_model(Topic) self.manager.es_bulk_indexing_of_model(Post) self.manager.refresh_index() self.assertEqual( len( self.manager.setup_search(Search().query( MatchAll())).execute()), 2) # indexing ok # 2. search without connection and get not result result = self.client.get(reverse('search:query') + '?q=' + text, follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 0) # 3. Connect with user (not a member of the group), search, and get no result self.assertTrue( self.client.login(username=self.user.username, password='******')) result = self.client.get(reverse('search:query') + '?q=' + text, follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 0) # 4. Connect with staff, search, and get the topic and the post self.client.logout() self.assertTrue( self.client.login(username=self.staff.username, password='******')) result = self.client.get(reverse('search:query') + '?q=' + text, follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 2) # ok ! def test_boosts(self): """Check if boosts are doing their job""" if not self.manager.connected_to_es: return # 1. Create topics (with identical titles), posts (with identical texts), an article and a tuto text = 'test' topic_1_solved_sticky = TopicFactory(forum=self.forum, author=self.user) topic_1_solved_sticky.title = text topic_1_solved_sticky.subtitle = '' topic_1_solved_sticky.is_solved = True topic_1_solved_sticky.is_sticky = True topic_1_solved_sticky.save() post_1 = PostFactory(topic=topic_1_solved_sticky, author=self.user, position=1) post_1.text = post_1.text_html = text post_1.save() post_2_useful = PostFactory(topic=topic_1_solved_sticky, author=self.user, position=2) post_2_useful.text = post_2_useful.text_html = text post_2_useful.is_useful = True post_2_useful.like = 5 post_2_useful.dislike = 2 # l/d ratio above 1 post_2_useful.save() topic_2_locked = TopicFactory(forum=self.forum, author=self.user, title=text) topic_2_locked.title = text topic_2_locked.subtitle = '' topic_2_locked.is_locked = True topic_2_locked.save() post_3_ld_below_1 = PostFactory(topic=topic_2_locked, author=self.user, position=1) post_3_ld_below_1.text = post_3_ld_below_1.text_html = text post_3_ld_below_1.like = 2 post_3_ld_below_1.dislike = 5 # l/d ratio below 1 post_3_ld_below_1.save() tuto = PublishableContentFactory(type='TUTORIAL') tuto_draft = tuto.load_version() tuto.title = text tuto.authors.add(self.user) tuto.save() tuto_draft.repo_update_top_container(text, tuto.slug, text, text) chapter1 = ContainerFactory(parent=tuto_draft, db_object=tuto) chapter1.repo_update(text, 'Who cares ?', 'Same here') ExtractFactory(container=chapter1, db_object=tuto) published_tuto = publish_content(tuto, tuto_draft, is_major_update=True) tuto.sha_public = tuto_draft.current_version tuto.sha_draft = tuto_draft.current_version tuto.public_version = published_tuto tuto.save() article = PublishedContentFactory(type='ARTICLE', title=text) published_article = PublishedContent.objects.get(content_pk=article.pk) opinion_not_picked = PublishedContentFactory(type='OPINION', title=text) published_opinion_not_picked = PublishedContent.objects.get( content_pk=opinion_not_picked.pk) opinion_picked = PublishedContentFactory(type='OPINION', title=text) opinion_picked.sha_picked = opinion_picked.sha_draft opinion_picked.date_picked = datetime.datetime.now() opinion_picked.save() published_opinion_picked = PublishedContent.objects.get( content_pk=opinion_picked.pk) for model in self.indexable: if model is FakeChapter: continue self.manager.es_bulk_indexing_of_model(model) self.manager.refresh_index() self.assertEqual( len( self.manager.setup_search(Search().query( MatchAll())).execute()), 10) # 2. Reset all boosts to 1 for doc_type in settings.ZDS_APP['search']['boosts']: for key in settings.ZDS_APP['search']['boosts'][doc_type]: settings.ZDS_APP['search']['boosts'][doc_type][key] = 1.0 # 3. Test posts result = self.client.get(reverse('search:query') + '?q=' + text + '&models=' + Post.get_es_document_type(), follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 3) # score are equals without boost: self.assertTrue(response[0].meta.score == response[1].meta.score == response[2].meta.score) settings.ZDS_APP['search']['boosts']['post']['if_first'] = 2.0 result = self.client.get(reverse('search:query') + '?q=' + text + '&models=' + Post.get_es_document_type(), follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 3) self.assertTrue(response[0].meta.score == response[1].meta.score > response[2].meta.score) self.assertEqual(response[2].meta.id, str( post_2_useful.pk)) # post 2 is the only one not first settings.ZDS_APP['search']['boosts']['post']['if_first'] = 1.0 settings.ZDS_APP['search']['boosts']['post']['if_useful'] = 2.0 result = self.client.get(reverse('search:query') + '?q=' + text + '&models=' + Post.get_es_document_type(), follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 3) self.assertTrue(response[0].meta.score > response[1].meta.score == response[2].meta.score) self.assertEqual(response[0].meta.id, str(post_2_useful.pk)) # post 2 is useful settings.ZDS_APP['search']['boosts']['post']['if_useful'] = 1.0 settings.ZDS_APP['search']['boosts']['post']['ld_ratio_above_1'] = 2.0 result = self.client.get(reverse('search:query') + '?q=' + text + '&models=' + Post.get_es_document_type(), follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 3) self.assertTrue(response[0].meta.score > response[1].meta.score == response[2].meta.score) self.assertEqual(response[0].meta.id, str( post_2_useful.pk)) # post 2 have a l/d ratio of 5/2 settings.ZDS_APP['search']['boosts']['post']['ld_ratio_above_1'] = 1.0 settings.ZDS_APP['search']['boosts']['post'][ 'ld_ratio_below_1'] = 2.0 # no one would do that in real life result = self.client.get(reverse('search:query') + '?q=' + text + '&models=' + Post.get_es_document_type(), follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 3) self.assertTrue(response[0].meta.score > response[1].meta.score == response[2].meta.score) self.assertEqual(response[0].meta.id, str( post_3_ld_below_1.pk)) # post 3 have a l/d ratio of 2/5 settings.ZDS_APP['search']['boosts']['post']['ld_ratio_below_1'] = 1.0 # 4. Test topics result = self.client.get(reverse('search:query') + '?q=' + text + '&models=' + Topic.get_es_document_type(), follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 2) # score are equals without boost: self.assertTrue(response[0].meta.score == response[1].meta.score) settings.ZDS_APP['search']['boosts']['topic']['if_sticky'] = 2.0 result = self.client.get(reverse('search:query') + '?q=' + text + '&models=' + Topic.get_es_document_type(), follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 2) self.assertTrue(response[0].meta.score > response[1].meta.score) self.assertEqual(response[0].meta.id, str(topic_1_solved_sticky.pk)) # topic 1 is sticky settings.ZDS_APP['search']['boosts']['topic']['if_sticky'] = 1.0 settings.ZDS_APP['search']['boosts']['topic']['if_solved'] = 2.0 result = self.client.get(reverse('search:query') + '?q=' + text + '&models=' + Topic.get_es_document_type(), follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 2) self.assertTrue(response[0].meta.score > response[1].meta.score) self.assertEqual(response[0].meta.id, str(topic_1_solved_sticky.pk)) # topic 1 is solved settings.ZDS_APP['search']['boosts']['topic']['if_solved'] = 1.0 settings.ZDS_APP['search']['boosts']['topic'][ 'if_locked'] = 2.0 # no one would do that in real life result = self.client.get(reverse('search:query') + '?q=' + text + '&models=' + Topic.get_es_document_type(), follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 2) self.assertTrue(response[0].meta.score > response[1].meta.score) self.assertEqual(response[0].meta.id, str(topic_2_locked.pk)) # topic 2 is locked settings.ZDS_APP['search']['boosts']['topic'][ 'if_locked'] = 1.0 # no one would do that in real life # 5. Test published contents result = self.client.get(reverse('search:query') + '?q=' + text + '&models=content', follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 5) # score are equals without boost: self.assertTrue( response[0].meta.score == response[1].meta.score == response[2]. meta.score == response[3].meta.score == response[4].meta.score) settings.ZDS_APP['search']['boosts']['publishedcontent'][ 'if_article'] = 2.0 result = self.client.get(reverse('search:query') + '?q=' + text + '&models=content', follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 5) self.assertTrue(response[0].meta.score > response[1].meta.score) self.assertEqual(response[0].meta.id, str(published_article.pk)) # obvious settings.ZDS_APP['search']['boosts']['publishedcontent'][ 'if_article'] = 1.0 settings.ZDS_APP['search']['boosts']['publishedcontent'][ 'if_tutorial'] = 2.0 result = self.client.get(reverse('search:query') + '?q=' + text + '&models=content', follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 5) self.assertTrue(response[0].meta.score > response[1].meta.score) self.assertEqual(response[0].meta.id, str(published_tuto.pk)) # obvious settings.ZDS_APP['search']['boosts']['publishedcontent'][ 'if_tutorial'] = 1.0 settings.ZDS_APP['search']['boosts']['publishedcontent'][ 'if_opinion'] = 2.0 settings.ZDS_APP['search']['boosts']['publishedcontent'][ 'if_opinion_not_picked'] = 4.0 # Note: in "real life", unpicked opinion would get a boost < 1. result = self.client.get(reverse('search:query') + '?q=' + text + '&models=content', follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 5) self.assertTrue(response[0].meta.score > response[1].meta.score > response[2].meta.score) self.assertEqual( response[0].meta.id, str(published_opinion_not_picked.pk)) # unpicked opinion got first self.assertEqual(response[1].meta.id, str(published_opinion_picked.pk)) settings.ZDS_APP['search']['boosts']['publishedcontent'][ 'if_opinion'] = 1.0 settings.ZDS_APP['search']['boosts']['publishedcontent'][ 'if_opinion_not_picked'] = 1.0 settings.ZDS_APP['search']['boosts']['publishedcontent'][ 'if_medium_or_big_tutorial'] = 2.0 result = self.client.get(reverse('search:query') + '?q=' + text + '&models=content', follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 5) self.assertTrue(response[0].meta.score > response[1].meta.score) self.assertEqual(response[0].meta.id, str(published_tuto.pk)) # obvious settings.ZDS_APP['search']['boosts']['publishedcontent'][ 'if_medium_or_big_tutorial'] = 1.0 # 6. Test global boosts # NOTE: score are NOT the same for all documents, no matter how hard it tries to, small differences exists for model in self.indexable: # set a huge number to overcome the small differences: settings.ZDS_APP['search']['boosts'][ model.get_es_document_type()]['global'] = 10.0 result = self.client.get(reverse('search:query') + '?q=' + text, follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 10) self.assertEqual(response[0].meta.doc_type, model.get_es_document_type()) # obvious settings.ZDS_APP['search']['boosts'][ model.get_es_document_type()]['global'] = 1.0 def test_change_topic_impacts_posts(self): if not self.manager.connected_to_es: return # 1. Create a hidden forum belonging to a hidden group and add staff in it. text = 'test' group = Group.objects.create(name='Les illuminatis anonymes de ZdS') _, hidden_forum = create_category(group) self.staff.groups.add(group) self.staff.save() # 2. Create a normal topic and index it topic_1 = TopicFactory(forum=self.forum, author=self.user, title=text) post_1 = PostFactory(topic=topic_1, author=self.user, position=1) post_1.text = post_1.text_html = text post_1.save() self.manager.es_bulk_indexing_of_model(Topic) self.manager.es_bulk_indexing_of_model(Post) self.manager.refresh_index() self.assertEqual( len( self.manager.setup_search(Search().query( MatchAll())).execute()), 2) # indexing ok result = self.client.get(reverse('search:query') + '?q=' + text + '&models=' + Post.get_es_document_type(), follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 1) # ok self.assertEqual(response[0].meta.doc_type, Post.get_es_document_type()) self.assertEqual(response[0].forum_pk, self.forum.pk) self.assertEqual(response[0].topic_pk, topic_1.pk) self.assertEqual(response[0].topic_title, topic_1.title) # 3. Change topic title and reindex topic_1.title = 'new title' topic_1.save() self.manager.es_bulk_indexing_of_model(Topic) self.manager.es_bulk_indexing_of_model(Post) self.manager.refresh_index() result = self.client.get(reverse('search:query') + '?q=' + text + '&models=' + Post.get_es_document_type(), follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 1) # ok self.assertEqual(response[0].topic_title, topic_1.title) # title was changed # 4. connect with staff and move topic self.assertTrue( self.client.login(username=self.staff.username, password='******')) data = {'move': '', 'forum': hidden_forum.pk, 'topic': topic_1.pk} response = self.client.post(reverse('topic-edit'), data, follow=False) self.assertEqual(302, response.status_code) self.manager.es_bulk_indexing_of_model(Topic) self.manager.es_bulk_indexing_of_model(Post) self.manager.refresh_index() result = self.client.get(reverse('search:query') + '?q=' + text + '&models=' + Post.get_es_document_type(), follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual( response.hits.total, 1) # Note: without staff, would not get any results (see below) self.assertEqual(response[0].forum_pk, hidden_forum.pk) # post was updated with new forum # 5. Topic is now hidden self.client.logout() result = self.client.get(reverse('search:query') + '?q=' + text + '&models=' + Post.get_es_document_type(), follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 0) # ok def test_change_publishedcontents_impacts_chapter(self): if not self.manager.connected_to_es: return # 1. Create middle-size content and index it text = 'test' tuto = PublishableContentFactory(type='TUTORIAL') tuto_draft = tuto.load_version() tuto.title = text tuto.authors.add(self.user) tuto.save() tuto_draft.repo_update_top_container( text, tuto.slug, text, text) # change title to be sure it will match chapter1 = ContainerFactory(parent=tuto_draft, db_object=tuto) chapter1.repo_update(text, text, text) extract = ExtractFactory(container=chapter1, db_object=tuto) extract.repo_update(text, text) published = publish_content(tuto, tuto_draft, is_major_update=True) tuto.sha_public = tuto_draft.current_version tuto.sha_draft = tuto_draft.current_version tuto.public_version = published tuto.save() self.manager.es_bulk_indexing_of_model(PublishedContent) self.manager.refresh_index() self.assertEqual( len( self.manager.setup_search(Search().query( MatchAll())).execute()), 2) # indexing ok result = self.client.get(reverse('search:query') + '?q=' + text + '&models=content', follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 2) chapters = [r for r in response if r.meta.doc_type == 'chapter'] self.assertEqual(chapters[0].meta.doc_type, FakeChapter.get_es_document_type()) self.assertEqual(chapters[0].meta.id, published.content_public_slug + '__' + chapter1.slug) # 2. Change tuto: delete chapter and insert new one ! tuto = PublishableContent.objects.get(pk=tuto.pk) tuto_draft = tuto.load_version() tuto_draft.children[0].repo_delete() # chapter 1 is gone ! another_text = 'another thing' self.assertTrue( text not in another_text ) # to prevent a future modification from breaking this test chapter2 = ContainerFactory(parent=tuto_draft, db_object=tuto) chapter2.repo_update(another_text, another_text, another_text) extract2 = ExtractFactory(container=chapter2, db_object=tuto) extract2.repo_update(another_text, another_text) published = publish_content(tuto, tuto_draft, is_major_update=False) tuto.sha_public = tuto_draft.current_version tuto.sha_draft = tuto_draft.current_version tuto.public_version = published tuto.save() self.manager.es_bulk_indexing_of_model(PublishedContent) self.manager.refresh_index() self.assertEqual( len( self.manager.setup_search(Search().query( MatchAll())).execute()), 2) # 2 objects, not 3 ! result = self.client.get(reverse('search:query') + '?q=' + text + '&models=content', follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() contents = [r for r in response if r.meta.doc_type != 'chapter'] self.assertEqual(response.hits.total, len(contents)) # no chapter found anymore result = self.client.get(reverse('search:query') + '?q=' + another_text + '&models=content', follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() chapters = [r for r in response if r.meta.doc_type == 'chapter'] self.assertEqual(response.hits.total, 1) self.assertEqual(chapters[0].meta.doc_type, FakeChapter.get_es_document_type()) self.assertEqual(chapters[0].meta.id, published.content_public_slug + '__' + chapter2.slug) # got new chapter def test_opensearch(self): result = self.client.get(reverse('search:opensearch'), follow=False) self.assertEqual(result.status_code, 200) self.assertContains(result, reverse('search:query')) self.assertContains(result, reverse('search:opensearch')) def test_upercase_and_lowercase_search_give_same_results(self): """Pretty self-explanatory function name, isn't it ?""" if not self.manager.connected_to_es: return # 1. Index lowercase stuffs text_lc = 'test' topic_1_lc = TopicFactory(forum=self.forum, author=self.user, title=text_lc) tag_lc = TagFactory(title=text_lc) topic_1_lc.tags.add(tag_lc) topic_1_lc.subtitle = text_lc topic_1_lc.save() post_1_lc = PostFactory(topic=topic_1_lc, author=self.user, position=1) post_1_lc.text = post_1_lc.text_html = text_lc post_1_lc.save() tuto_lc = PublishableContentFactory(type='TUTORIAL') tuto_draft_lc = tuto_lc.load_version() tuto_lc.title = text_lc tuto_lc.authors.add(self.user) subcategory_lc = SubCategoryFactory(title=text_lc) tuto_lc.subcategory.add(subcategory_lc) tuto_lc.tags.add(tag_lc) tuto_lc.save() tuto_draft_lc.description = text_lc tuto_draft_lc.repo_update_top_container(text_lc, tuto_lc.slug, text_lc, text_lc) chapter1_lc = ContainerFactory(parent=tuto_draft_lc, db_object=tuto_lc) extract_lc = ExtractFactory(container=chapter1_lc, db_object=tuto_lc) extract_lc.repo_update(text_lc, text_lc) published_lc = publish_content(tuto_lc, tuto_draft_lc, is_major_update=True) tuto_lc.sha_public = tuto_draft_lc.current_version tuto_lc.sha_draft = tuto_draft_lc.current_version tuto_lc.public_version = published_lc tuto_lc.save() # 2. Index uppercase stuffs text_uc = 'TEST' topic_1_uc = TopicFactory(forum=self.forum, author=self.user, title=text_uc) topic_1_uc.tags.add( tag_lc) # Note: a constraint forces tags title to be unique topic_1_uc.subtitle = text_uc topic_1_uc.save() post_1_uc = PostFactory(topic=topic_1_uc, author=self.user, position=1) post_1_uc.text = post_1_uc.text_html = text_uc post_1_uc.save() tuto_uc = PublishableContentFactory(type='TUTORIAL') tuto_draft_uc = tuto_uc.load_version() tuto_uc.title = text_uc tuto_uc.authors.add(self.user) tuto_uc.subcategory.add(subcategory_lc) tuto_uc.tags.add(tag_lc) tuto_uc.save() tuto_draft_uc.description = text_uc tuto_draft_uc.repo_update_top_container(text_uc, tuto_uc.slug, text_uc, text_uc) chapter1_uc = ContainerFactory(parent=tuto_draft_uc, db_object=tuto_uc) extract_uc = ExtractFactory(container=chapter1_uc, db_object=tuto_uc) extract_uc.repo_update(text_uc, text_uc) published_uc = publish_content(tuto_uc, tuto_draft_uc, is_major_update=True) tuto_uc.sha_public = tuto_draft_uc.current_version tuto_uc.sha_draft = tuto_draft_uc.current_version tuto_uc.public_version = published_uc tuto_uc.save() # 3. Index and search: self.assertEqual( len( self.manager.setup_search(Search().query( MatchAll())).execute()), 0) # index for model in self.indexable: if model is FakeChapter: continue self.manager.es_bulk_indexing_of_model(model) self.manager.refresh_index() result = self.client.get(reverse('search:query') + '?q=' + text_lc, follow=False) self.assertEqual(result.status_code, 200) response_lc = result.context['object_list'].execute() self.assertEqual(response_lc.hits.total, 8) result = self.client.get(reverse('search:query') + '?q=' + text_uc, follow=False) self.assertEqual(result.status_code, 200) response_uc = result.context['object_list'].execute() self.assertEqual(response_uc.hits.total, 8) for responses in zip( response_lc, response_uc): # we should get results in the same order! self.assertEqual(responses[0].meta.id, responses[1].meta.id) def test_category_and_subcategory_impact_search(self): """If two contents do not belong to the same (sub)category""" if not self.manager.connected_to_es: return text = 'Did you ever hear the tragedy of Darth Plagueis The Wise?' # 1. Create two contents with different subcategories category_1 = 'category 1' subcategory_1 = SubCategoryFactory(title=category_1) category_2 = 'category 2' subcategory_2 = SubCategoryFactory(title=category_2) tuto_1 = PublishableContentFactory(type='TUTORIAL') tuto_1_draft = tuto_1.load_version() tuto_1.title = text tuto_1.authors.add(self.user) tuto_1.subcategory.add(subcategory_1) tuto_1.save() tuto_1_draft.description = text tuto_1_draft.repo_update_top_container(text, tuto_1.slug, text, text) chapter_1 = ContainerFactory(parent=tuto_1_draft, db_object=tuto_1) extract_1 = ExtractFactory(container=chapter_1, db_object=tuto_1) extract_1.repo_update(text, text) published_1 = publish_content(tuto_1, tuto_1_draft, is_major_update=True) tuto_1.sha_public = tuto_1_draft.current_version tuto_1.sha_draft = tuto_1_draft.current_version tuto_1.public_version = published_1 tuto_1.save() tuto_2 = PublishableContentFactory(type='TUTORIAL') tuto_2_draft = tuto_2.load_version() tuto_2.title = text tuto_2.authors.add(self.user) tuto_2.subcategory.add(subcategory_2) tuto_2.save() tuto_2_draft.description = text tuto_2_draft.repo_update_top_container(text, tuto_2.slug, text, text) chapter_2 = ContainerFactory(parent=tuto_2_draft, db_object=tuto_2) extract_2 = ExtractFactory(container=chapter_2, db_object=tuto_2) extract_2.repo_update(text, text) published_2 = publish_content(tuto_2, tuto_2_draft, is_major_update=True) tuto_2.sha_public = tuto_2_draft.current_version tuto_2.sha_draft = tuto_2_draft.current_version tuto_2.public_version = published_2 tuto_2.save() # 2. Index: self.assertEqual( len( self.manager.setup_search(Search().query( MatchAll())).execute()), 0) # index for model in self.indexable: if model is FakeChapter: continue self.manager.es_bulk_indexing_of_model(model) self.manager.refresh_index() result = self.client.get(reverse('search:query') + '?q=' + text, follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 4) # Ok # 3. Test result = self.client.get(reverse('search:query') + '?q=' + text + '&model=content&subcategory=' + subcategory_1.slug, follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 2) self.assertEqual([ int(r.meta.id) for r in response if r.meta.doc_type == 'publishedcontent' ][0], published_1.pk) self.assertEqual([ r.meta.id for r in response if r.meta.doc_type == 'chapter' ][0], tuto_1.slug + '__' + chapter_1.slug) result = self.client.get(reverse('search:query') + '?q=' + text + '&model=content&subcategory=' + subcategory_2.slug, follow=False) self.assertEqual(result.status_code, 200) response = result.context['object_list'].execute() self.assertEqual(response.hits.total, 2) self.assertEqual([ int(r.meta.id) for r in response if r.meta.doc_type == 'publishedcontent' ][0], published_2.pk) self.assertEqual([ r.meta.id for r in response if r.meta.doc_type == 'chapter' ][0], tuto_2.slug + '__' + chapter_2.slug) def tearDown(self): if os.path.isdir(settings.ZDS_APP['content']['repo_private_path']): shutil.rmtree(settings.ZDS_APP['content']['repo_private_path']) if os.path.isdir(settings.ZDS_APP['content']['repo_public_path']): shutil.rmtree(settings.ZDS_APP['content']['repo_public_path']) if os.path.isdir(settings.MEDIA_ROOT): shutil.rmtree(settings.MEDIA_ROOT) # re-active PDF build settings.ZDS_APP['content']['build_pdf_when_published'] = True # delete index: self.manager.clear_es_index()
class SearchView(ZdSPagingListView): """Search view.""" template_name = 'searchv2/search.html' paginate_by = settings.ZDS_APP['search']['results_per_page'] search_form_class = SearchForm search_form = None search_query = None content_category = None content_subcategory = None authorized_forums = '' index_manager = None def __init__(self, **kwargs): """Overridden because the index manager must NOT be initialized elsewhere.""" super(SearchView, self).__init__(**kwargs) self.index_manager = ESIndexManager(**settings.ES_SEARCH_INDEX) def get(self, request, *args, **kwargs): """Overridden to catch the request and fill the form.""" if 'q' in request.GET: self.search_query = ''.join(request.GET['q']) self.search_form = self.search_form_class(data=self.request.GET) if self.search_query and not self.search_form.is_valid(): raise PermissionDenied('research form is invalid') return super(SearchView, self).get(request, *args, **kwargs) def get_queryset(self): if not self.index_manager.connected_to_es: messages.warning(self.request, _('Impossible de se connecter à Elasticsearch')) return [] if self.search_query: # Searches forums the user is allowed to visit self.authorized_forums = get_authorized_forums(self.request.user) search_queryset = Search() # Restrict (sub)category if any if self.search_form.cleaned_data['category']: self.content_category = self.search_form.cleaned_data['category'] if self.search_form.cleaned_data['subcategory']: self.content_subcategory = self.search_form.cleaned_data['subcategory'] # Mark that contents must come from library if required self.from_library = False if self.search_form.cleaned_data['from_library'] == 'on': self.from_library = True # Setting the different querysets (according to the selected models, if any) part_querysets = [] chosen_groups = self.search_form.cleaned_data['models'] if chosen_groups: models = [] for group in chosen_groups: if group in settings.ZDS_APP['search']['search_groups']: models.append(settings.ZDS_APP['search']['search_groups'][group][1]) else: models = [v[1] for k, v in settings.ZDS_APP['search']['search_groups'].items()] models = reduce(operator.concat, models) for model in models: part_querysets.append(getattr(self, 'get_queryset_{}s'.format(model))()) queryset = part_querysets[0] for query in part_querysets[1:]: queryset |= query # Weighting: weight_functions = [] for _type, weights in list(settings.ZDS_APP['search']['boosts'].items()): if _type in models: weight_functions.append({'filter': Match(_type=_type), 'weight': weights['global']}) scored_queryset = FunctionScore(query=queryset, boost_mode='multiply', functions=weight_functions) search_queryset = search_queryset.query(scored_queryset) # Highlighting: search_queryset = search_queryset.highlight_options( fragment_size=150, number_of_fragments=5, pre_tags=['[hl]'], post_tags=['[/hl]']) search_queryset = search_queryset.highlight('text').highlight('text_html') # Executing: return self.index_manager.setup_search(search_queryset) return [] def get_queryset_publishedcontents(self): """Search in PublishedContent objects.""" query = Match(_type='publishedcontent') \ & MultiMatch( query=self.search_query, fields=['title', 'description', 'categories', 'subcategories', 'tags', 'text']) if self.from_library: query &= Match(content_type='TUTORIAL') | Match(content_type='ARTICLE') if self.content_category: query &= Match(categories=self.content_category) if self.content_subcategory: query &= Match(subcategories=self.content_subcategory) functions_score = [ { 'filter': Match(content_type='TUTORIAL'), 'weight': settings.ZDS_APP['search']['boosts']['publishedcontent']['if_tutorial'] }, { 'filter': Match(content_type='TUTORIAL') & Match(has_chapters=True), 'weight': settings.ZDS_APP['search']['boosts']['publishedcontent']['if_medium_or_big_tutorial'] }, { 'filter': Match(content_type='ARTICLE'), 'weight': settings.ZDS_APP['search']['boosts']['publishedcontent']['if_article'] }, { 'filter': Match(content_type='OPINION'), 'weight': settings.ZDS_APP['search']['boosts']['publishedcontent']['if_opinion'] }, { 'filter': Match(content_type='OPINION') & Match(picked=False), 'weight': settings.ZDS_APP['search']['boosts']['publishedcontent']['if_opinion_not_picked'] }, ] scored_query = FunctionScore(query=query, boost_mode='multiply', functions=functions_score) return scored_query def get_queryset_chapters(self): """Search in content chapters.""" query = Match(_type='chapter') \ & MultiMatch(query=self.search_query, fields=['title', 'text']) if self.content_category: query &= Match(categories=self.content_category) if self.content_subcategory: query &= Match(subcategories=self.content_subcategory) return query def get_queryset_topics(self): """Search in topics, and remove the result if the forum is not allowed for the user. Score is modified if: + topic is solved; + topic is sticky; + topic is locked. """ query = Match(_type='topic') \ & Terms(forum_pk=self.authorized_forums) \ & MultiMatch(query=self.search_query, fields=['title', 'subtitle', 'tags']) functions_score = [ {'filter': Match(is_solved=True), 'weight': settings.ZDS_APP['search']['boosts']['topic']['if_solved']}, {'filter': Match(is_sticky=True), 'weight': settings.ZDS_APP['search']['boosts']['topic']['if_sticky']}, {'filter': Match(is_locked=True), 'weight': settings.ZDS_APP['search']['boosts']['topic']['if_locked']} ] scored_query = FunctionScore(query=query, boost_mode='multiply', functions=functions_score) return scored_query def get_queryset_posts(self): """Search in posts, and remove result if the forum is not allowed for the user or if the message is invisible. Score is modified if: + post is the first one in a topic; + post is marked as "useful"; + post has a like/dislike ratio above (has more likes than dislikes) or below (the other way around) 1.0. """ query = Match(_type='post') \ & Terms(forum_pk=self.authorized_forums) \ & Term(is_visible=True) \ & MultiMatch(query=self.search_query, fields=['text_html']) functions_score = [ {'filter': Match(position=1), 'weight': settings.ZDS_APP['search']['boosts']['post']['if_first']}, {'filter': Match(is_useful=True), 'weight': settings.ZDS_APP['search']['boosts']['post']['if_useful']}, { 'filter': Range(like_dislike_ratio={'gt': 1}), 'weight': settings.ZDS_APP['search']['boosts']['post']['ld_ratio_above_1'] }, { 'filter': Range(like_dislike_ratio={'lt': 1}), 'weight': settings.ZDS_APP['search']['boosts']['post']['ld_ratio_below_1'] } ] scored_query = FunctionScore(query=query, boost_mode='multiply', functions=functions_score) return scored_query def get_context_data(self, **kwargs): context = super(SearchView, self).get_context_data(**kwargs) context['form'] = self.search_form context['query'] = self.search_query is not None return context