示例#1
0
    def hide_comment_by_user(self, user, text_hidden):
        """Overridden to directly hide the post in ES as well"""

        super(Post, self).hide_comment_by_user(user, text_hidden)

        index_manager = ESIndexManager(**settings.ES_SEARCH_INDEX)
        index_manager.update_single_document(self, {"is_visible": False})
示例#2
0
    def hide_comment_by_user(self, user, text_hidden):
        """Overridden to directly hide the post in ES as well
        """

        super(Post, self).hide_comment_by_user(user, text_hidden)

        index_manager = ESIndexManager(**settings.ES_SEARCH_INDEX)
        index_manager.update_single_document(self, {'is_visible': False})
示例#3
0
def delete_published_content_in_elasticsearch(sender, instance, **kwargs):
    """Catch the pre_delete signal to ensure the deletion in ES. Also, handle the deletion of the corresponding
    chapters.
    """

    index_manager = ESIndexManager(**settings.ES_SEARCH_INDEX)

    if index_manager.index_exists:
        index_manager.delete_by_query(FakeChapter.get_es_document_type(), ES_Q('match', _routing=instance.es_id))

    return delete_document_in_elasticsearch(instance)
示例#4
0
    def __init__(self, *args, **kwargs):
        """Overridden because FakeChapter needs to be present for mapping.
        Also, its mapping needs to be defined before the one of PublishedContent for parenting reasons (!!!).
        """

        super().__init__(*args, **kwargs)
        self.models.insert(0, FakeChapter)

        self.index_manager = ESIndexManager(**settings.ES_SEARCH_INDEX)

        if not self.index_manager.connected_to_es:
            raise Exception("Unable to connect to Elasticsearch, aborting.")
示例#5
0
    def setUp(self):

        settings.EMAIL_BACKEND = "django.core.mail.backends.locmem.EmailBackend"
        self.mas = ProfileFactory().user
        settings.ZDS_APP["member"]["bot_account"] = self.mas.username

        self.category, self.forum = create_category_and_forum()

        self.user = ProfileFactory().user
        self.staff = StaffProfileFactory().user

        self.index_manager = ESIndexManager(**settings.ES_SEARCH_INDEX)
示例#6
0
    def setUp(self):
        # don't build PDF to speed up the tests
        settings.ZDS_APP['content']['build_pdf_when_published'] = False

        settings.EMAIL_BACKEND = 'django.core.mail.backends.locmem.EmailBackend'
        self.mas = ProfileFactory().user
        settings.ZDS_APP['member']['bot_account'] = self.mas.username

        self.category, self.forum = create_category()

        self.user = ProfileFactory().user
        self.staff = StaffProfileFactory().user

        self.index_manager = ESIndexManager(**settings.ES_SEARCH_INDEX)
示例#7
0
    def setUp(self):

        settings.EMAIL_BACKEND = 'django.core.mail.backends.locmem.EmailBackend'
        self.mas = ProfileFactory().user
        settings.ZDS_APP['member']['bot_account'] = self.mas.username

        self.category, self.forum = create_category_and_forum()

        self.user = ProfileFactory().user
        self.staff = StaffProfileFactory().user

        self.manager = ESIndexManager(**settings.ES_SEARCH_INDEX)
        self.indexable = [FakeChapter, PublishedContent, Topic, Post]

        self.manager.reset_es_index(self.indexable)
        self.manager.setup_custom_analyzer()
        self.manager.refresh_index()
示例#8
0
    def test_setup_functions(self):
        """Test the behavior of the reset_es_index(), setup_custom_analyzer() and clear_es_index() functions"""

        if not self.manager.connected_to_es:
            return

        custom_index = {'name': 'some_random_name', 'shards': 3, 'replicas': 1}
        manager = ESIndexManager(**custom_index)

        # in the beginning: the void:
        self.assertTrue(manager.index not in self.manager.es.cat.indices())

        self.assertEqual(manager.index, custom_index['name'])
        self.assertEqual(manager.number_of_shards, custom_index['shards'])
        self.assertEqual(manager.number_of_replicas, custom_index['replicas'])

        # 1. Creation:
        models = [Topic, Post]
        manager.reset_es_index([Topic, Post])
        self.assertTrue(manager.index
                        in manager.es.cat.indices())  # index in !

        index_settings = manager.es.indices.get_settings(index=manager.index)
        self.assertTrue(manager.index in index_settings)
        index_settings = index_settings[manager.index]['settings']['index']

        self.assertEqual(index_settings['provided_name'], manager.index)
        self.assertEqual(index_settings['number_of_shards'],
                         str(manager.number_of_shards))
        self.assertEqual(index_settings['number_of_replicas'],
                         str(manager.number_of_replicas))

        # test mappings
        mappings = manager.es.indices.get_mapping(index=manager.index)
        self.assertTrue(manager.index in mappings)
        mappings = mappings[manager.index]['mappings']

        for model in models:
            self.assertTrue(model.get_es_document_type() in mappings)

        # analyzer
        self.assertTrue('analysis' not in index_settings)
        manager.setup_custom_analyzer()

        index_settings = manager.es.indices.get_settings(index=manager.index)
        self.assertTrue(manager.index in index_settings)
        index_settings = index_settings[manager.index]['settings']['index']
        self.assertTrue('analysis' in index_settings)

        # 3. Clearing
        manager.clear_es_index()
        self.assertTrue(
            manager.index
            not in self.manager.es.cat.indices())  # back to the void
示例#9
0
    def get_es_indexable(cls, force_reindexing=False):
        """Overridden to also include chapters
        """

        index_manager = ESIndexManager(**settings.ES_SEARCH_INDEX)

        # fetch initial batch
        last_pk = 0
        objects_source = super(PublishedContent, cls).get_es_indexable(force_reindexing)
        objects = list(objects_source.filter(pk__gt=last_pk)[:PublishedContent.objects_per_batch])

        while objects:
            chapters = []

            for content in objects:
                versioned = content.load_public_version()

                # chapters are only indexed for middle and big tuto
                if versioned.has_sub_containers():

                    # delete possible previous chapters
                    if content.es_already_indexed:
                        index_manager.delete_by_query(
                            FakeChapter.get_es_document_type(), ES_Q('match', _routing=content.es_id))

                    # (re)index the new one(s)
                    for chapter in versioned.get_list_of_chapters():
                        chapters.append(FakeChapter(chapter, versioned, content.es_id))

            if chapters:
                # since we want to return at most PublishedContent.objects_per_batch items
                # we have to split further
                while chapters:
                    yield chapters[:PublishedContent.objects_per_batch]
                    chapters = chapters[PublishedContent.objects_per_batch:]
            if objects:
                yield objects

            # fetch next batch
            last_pk = objects[-1].pk
            objects = list(objects_source.filter(pk__gt=last_pk)[:PublishedContent.objects_per_batch])
示例#10
0
    def setUp(self):

        settings.EMAIL_BACKEND = 'django.core.mail.backends.locmem.EmailBackend'
        self.mas = ProfileFactory().user
        settings.ZDS_APP['member']['bot_account'] = self.mas.username

        self.category, self.forum = create_category_and_forum()

        self.user = ProfileFactory().user
        self.staff = StaffProfileFactory().user

        self.index_manager = ESIndexManager(**settings.ES_SEARCH_INDEX)
示例#11
0
    def __init__(self, *args, **kwargs):
        """Overridden because FakeChapter needs to be present for mapping.
        Also, its mapping needs to be defined before the one of PublishedContent for parenting reasons (!!!).
        """

        super(Command, self).__init__(*args, **kwargs)
        self.models.insert(0, FakeChapter)

        self.index_manager = ESIndexManager(**settings.ES_SEARCH_INDEX)

        if not self.index_manager.connected_to_es:
            raise Exception('Unable to connect to Elasticsearch, aborting.')
示例#12
0
    def test_setup_functions(self):
        """Test the behavior of the reset_es_index(), setup_custom_analyzer() and clear_es_index() functions"""

        if not self.manager.connected_to_es:
            return

        custom_index = {'name': 'some_random_name', 'shards': 3, 'replicas': 1}
        manager = ESIndexManager(**custom_index)

        # in the beginning: the void:
        self.assertTrue(manager.index not in self.manager.es.cat.indices())

        self.assertEqual(manager.index, custom_index['name'])
        self.assertEqual(manager.number_of_shards, custom_index['shards'])
        self.assertEqual(manager.number_of_replicas, custom_index['replicas'])

        # 1. Creation:
        models = [Topic, Post]
        manager.reset_es_index([Topic, Post])
        self.assertTrue(manager.index in manager.es.cat.indices())  # index in !

        index_settings = manager.es.indices.get_settings(index=manager.index)
        self.assertTrue(manager.index in index_settings)
        index_settings = index_settings[manager.index]['settings']['index']

        self.assertEqual(index_settings['provided_name'], manager.index)
        self.assertEqual(index_settings['number_of_shards'], str(manager.number_of_shards))
        self.assertEqual(index_settings['number_of_replicas'], str(manager.number_of_replicas))

        # test mappings
        mappings = manager.es.indices.get_mapping(index=manager.index)
        self.assertTrue(manager.index in mappings)
        mappings = mappings[manager.index]['mappings']

        for model in models:
            self.assertTrue(model.get_es_document_type() in mappings)

        # analyzer
        self.assertTrue('analysis' not in index_settings)
        manager.setup_custom_analyzer()

        index_settings = manager.es.indices.get_settings(index=manager.index)
        self.assertTrue(manager.index in index_settings)
        index_settings = index_settings[manager.index]['settings']['index']
        self.assertTrue('analysis' in index_settings)

        # 3. Clearing
        manager.clear_es_index()
        self.assertTrue(manager.index not in self.manager.es.cat.indices())  # back to the void
示例#13
0
class UtilsTests(TestCase):
    def setUp(self):
        # don't build PDF to speed up the tests
        settings.ZDS_APP['content']['build_pdf_when_published'] = False

        settings.EMAIL_BACKEND = 'django.core.mail.backends.locmem.EmailBackend'
        self.mas = ProfileFactory().user
        settings.ZDS_APP['member']['bot_account'] = self.mas.username

        self.category, self.forum = create_category()

        self.user = ProfileFactory().user
        self.staff = StaffProfileFactory().user

        self.index_manager = ESIndexManager(**settings.ES_SEARCH_INDEX)

    def test_es_manager(self):
        """Test the behavior of the ``es_manager`` command"""

        if not self.index_manager.connected_to_es:
            return

        # in the beginning: the void
        self.assertTrue(self.index_manager.index not in self.index_manager.es.cat.indices())

        text = u'Ceci est un texte de test'

        # create a topic with a post
        topic = TopicFactory(forum=self.forum, author=self.user, title=text)
        post = PostFactory(topic=topic, author=self.user, position=1)
        post.text = post.text_html = text
        post.save()

        topic = Topic.objects.get(pk=topic.pk)
        post = Post.objects.get(pk=post.pk)

        self.assertFalse(topic.es_already_indexed)
        self.assertTrue(topic.es_flagged)
        self.assertFalse(post.es_already_indexed)
        self.assertTrue(post.es_flagged)

        # create a middle-tutorial and publish it
        tuto = PublishableContentFactory(type='TUTORIAL')
        tuto.authors.add(self.user)
        tuto.save()

        tuto_draft = tuto.load_version()
        chapter1 = ContainerFactory(parent=tuto_draft, db_object=tuto)
        chapter1.repo_update(text, text, text)
        extract1 = ExtractFactory(container=chapter1, db_object=tuto)
        version = extract1.repo_update(text, text)
        published = publish_content(tuto, tuto_draft, is_major_update=True)

        tuto.sha_public = version
        tuto.sha_draft = version
        tuto.public_version = published
        tuto.save()

        published = PublishedContent.objects.get(content_pk=tuto.pk)
        self.assertFalse(published.es_already_indexed)
        self.assertTrue(published.es_flagged)

        # 1. test "index-all"
        call_command('es_manager', 'index_all')
        self.assertTrue(self.index_manager.es.indices.exists(self.index_manager.index))
        self.index_manager.index_exists = True

        topic = Topic.objects.get(pk=topic.pk)
        post = Post.objects.get(pk=post.pk)

        self.assertTrue(topic.es_already_indexed)
        self.assertFalse(topic.es_flagged)
        self.assertTrue(post.es_already_indexed)
        self.assertFalse(post.es_flagged)

        published = PublishedContent.objects.get(content_pk=tuto.pk)
        self.assertTrue(published.es_already_indexed)
        self.assertFalse(published.es_flagged)

        s = Search()
        s.query(MatchAll())
        results = self.index_manager.setup_search(s).execute()
        self.assertEqual(len(results), 4)  # get 4 results, one of each type

        must_contain = {'post': False, 'topic': False, 'publishedcontent': False, 'chapter': False}
        id_must_be = {
            'post': str(post.pk),
            'topic': str(topic.pk),
            'publishedcontent': str(published.pk),
            'chapter': tuto.slug + '__' + chapter1.slug
        }

        for hit in results:
            doc_type = hit.meta.doc_type
            must_contain[doc_type] = True
            self.assertEqual(hit.meta.id, id_must_be[doc_type])

        self.assertTrue(all(must_contain))

        # 2. test "clear"
        self.assertTrue(self.index_manager.index in self.index_manager.es.cat.indices())  # index in

        call_command('es_manager', 'clear')
        self.assertFalse(self.index_manager.es.indices.exists(self.index_manager.index))
        self.index_manager.index_exists = False

        # must reset every object
        topic = Topic.objects.get(pk=topic.pk)
        post = Post.objects.get(pk=post.pk)

        self.assertFalse(topic.es_already_indexed)
        self.assertTrue(topic.es_flagged)
        self.assertFalse(post.es_already_indexed)
        self.assertTrue(post.es_flagged)

        published = PublishedContent.objects.get(content_pk=tuto.pk)
        self.assertFalse(published.es_already_indexed)
        self.assertTrue(published.es_flagged)

        self.assertTrue(self.index_manager.index not in self.index_manager.es.cat.indices())  # index wiped out !

        # 3. test "setup"
        call_command('es_manager', 'setup')
        self.assertTrue(self.index_manager.es.indices.exists(self.index_manager.index))
        self.index_manager.index_exists = True

        self.assertTrue(self.index_manager.index in self.index_manager.es.cat.indices())  # index back in ...

        s = Search()
        s.query(MatchAll())
        results = self.index_manager.setup_search(s).execute()
        self.assertEqual(len(results), 0)  # ... but with nothing in it

        result = self.index_manager.es.indices.get_settings(index=self.index_manager.index)
        settings_index = result[self.index_manager.index]['settings']['index']
        self.assertTrue('analysis' in settings_index)  # custom analyzer was setup

        # 4. test "index-flagged" once ...
        call_command('es_manager', 'index_flagged')

        topic = Topic.objects.get(pk=topic.pk)
        post = Post.objects.get(pk=post.pk)

        self.assertTrue(topic.es_already_indexed)
        self.assertFalse(topic.es_flagged)
        self.assertTrue(post.es_already_indexed)
        self.assertFalse(post.es_flagged)

        published = PublishedContent.objects.get(content_pk=tuto.pk)
        self.assertTrue(published.es_already_indexed)
        self.assertFalse(published.es_flagged)

        s = Search()
        s.query(MatchAll())
        results = self.index_manager.setup_search(s).execute()
        self.assertEqual(len(results), 4)  # get the 4 results back

    def tearDown(self):
        if os.path.isdir(settings.ZDS_APP['content']['repo_private_path']):
            shutil.rmtree(settings.ZDS_APP['content']['repo_private_path'])
        if os.path.isdir(settings.ZDS_APP['content']['repo_public_path']):
            shutil.rmtree(settings.ZDS_APP['content']['repo_public_path'])
        if os.path.isdir(settings.MEDIA_ROOT):
            shutil.rmtree(settings.MEDIA_ROOT)

        # re-active PDF build
        settings.ZDS_APP['content']['build_pdf_when_published'] = True

        # delete index:
        self.index_manager.clear_es_index()
示例#14
0
class ESIndexManagerTests(TutorialTestMixin, TestCase):
    def setUp(self):

        settings.EMAIL_BACKEND = 'django.core.mail.backends.locmem.EmailBackend'
        self.mas = ProfileFactory().user
        settings.ZDS_APP['member']['bot_account'] = self.mas.username

        self.category, self.forum = create_category_and_forum()

        self.user = ProfileFactory().user
        self.staff = StaffProfileFactory().user

        self.manager = ESIndexManager(**settings.ES_SEARCH_INDEX)
        self.indexable = [FakeChapter, PublishedContent, Topic, Post]

        self.manager.reset_es_index(self.indexable)
        self.manager.setup_custom_analyzer()
        self.manager.refresh_index()

    def test_setup_functions(self):
        """Test the behavior of the reset_es_index(), setup_custom_analyzer() and clear_es_index() functions"""

        if not self.manager.connected_to_es:
            return

        custom_index = {'name': 'some_random_name', 'shards': 3, 'replicas': 1}
        manager = ESIndexManager(**custom_index)

        # in the beginning: the void:
        self.assertTrue(manager.index not in self.manager.es.cat.indices())

        self.assertEqual(manager.index, custom_index['name'])
        self.assertEqual(manager.number_of_shards, custom_index['shards'])
        self.assertEqual(manager.number_of_replicas, custom_index['replicas'])

        # 1. Creation:
        models = [Topic, Post]
        manager.reset_es_index([Topic, Post])
        self.assertTrue(manager.index in manager.es.cat.indices())  # index in !

        index_settings = manager.es.indices.get_settings(index=manager.index)
        self.assertTrue(manager.index in index_settings)
        index_settings = index_settings[manager.index]['settings']['index']

        self.assertEqual(index_settings['provided_name'], manager.index)
        self.assertEqual(index_settings['number_of_shards'], str(manager.number_of_shards))
        self.assertEqual(index_settings['number_of_replicas'], str(manager.number_of_replicas))

        # test mappings
        mappings = manager.es.indices.get_mapping(index=manager.index)
        self.assertTrue(manager.index in mappings)
        mappings = mappings[manager.index]['mappings']

        for model in models:
            self.assertTrue(model.get_es_document_type() in mappings)

        # analyzer
        self.assertTrue('analysis' not in index_settings)
        manager.setup_custom_analyzer()

        index_settings = manager.es.indices.get_settings(index=manager.index)
        self.assertTrue(manager.index in index_settings)
        index_settings = index_settings[manager.index]['settings']['index']
        self.assertTrue('analysis' in index_settings)

        # 3. Clearing
        manager.clear_es_index()
        self.assertTrue(manager.index not in self.manager.es.cat.indices())  # back to the void

    def test_custom_analyzer(self):
        """Test our custom analyzer"""

        if not self.manager.connected_to_es:
            return

        test_sentences = [
            # stemming:
            ('programmation programmer programmateur programmes', ['program', 'program', 'program', 'program']),
            # keep "c" intact:
            ('apprendre à programmer en C', ['aprendr', 'program', 'langage_c']),
            # remove HTML and some special characters:
            ('<p>&laquo; test&#x202F;! &raquo;, en hurlant &hellip;</p>', ['test', 'hurlant']),
            # keep "c++" and "linux" intact:
            ('écrire un programme en C++ avec Linux', ['ecrir', 'program', 'c++', 'linux']),
            # elision:
            ("c'est de l'arnaque", ['arnaqu'])
        ]

        for sentence in test_sentences:
            tokens = self.manager.analyze_sentence(sentence[0])
            self.assertEqual(len(tokens), len(sentence[1]))
            self.assertEqual(tokens, sentence[1])

    def test_indexation(self):
        """test the indexation and deletion of the different documents"""

        if not self.manager.connected_to_es:
            return

        # create a topic with a post
        topic = TopicFactory(forum=self.forum, author=self.user)
        post = PostFactory(topic=topic, author=self.user, position=1)

        topic = Topic.objects.get(pk=topic.pk)
        post = Post.objects.get(pk=post.pk)

        self.assertFalse(topic.es_already_indexed)
        self.assertTrue(topic.es_flagged)
        self.assertFalse(post.es_already_indexed)
        self.assertTrue(post.es_flagged)

        # create a middle-tutorial and publish it
        tuto = PublishableContentFactory(type='TUTORIAL')
        tuto.authors.add(self.user)
        tuto.save()

        tuto_draft = tuto.load_version()
        chapter1 = ContainerFactory(parent=tuto_draft, db_object=tuto)
        ExtractFactory(container=chapter1, db_object=tuto)
        published = publish_content(tuto, tuto_draft, is_major_update=True)

        tuto.sha_public = tuto_draft.current_version
        tuto.sha_draft = tuto_draft.current_version
        tuto.public_version = published
        tuto.save()

        published = PublishedContent.objects.get(content_pk=tuto.pk)
        self.assertFalse(published.es_already_indexed)
        self.assertTrue(published.es_flagged)

        # 1. index all
        for model in self.indexable:
            if model is FakeChapter:
                continue
            self.manager.es_bulk_indexing_of_model(model, force_reindexing=False)
            self.manager.refresh_index()

        topic = Topic.objects.get(pk=topic.pk)
        post = Post.objects.get(pk=post.pk)

        self.assertTrue(topic.es_already_indexed)
        self.assertFalse(topic.es_flagged)
        self.assertTrue(post.es_already_indexed)
        self.assertFalse(post.es_flagged)

        published = PublishedContent.objects.get(content_pk=tuto.pk)
        self.assertTrue(published.es_already_indexed)
        self.assertFalse(published.es_flagged)

        s = Search()
        s.query(MatchAll())
        results = self.manager.setup_search(s).execute()
        self.assertEqual(len(results), 4)  # get 4 results, one of each type

        must_contain = {'post': False, 'topic': False, 'publishedcontent': False, 'chapter': False}
        id_must_be = {
            'post': str(post.pk),
            'topic': str(topic.pk),
            'publishedcontent': str(published.pk),
            'chapter': tuto.slug + '__' + chapter1.slug
        }

        for hit in results:
            doc_type = hit.meta.doc_type
            must_contain[doc_type] = True
            self.assertEqual(hit.meta.id, id_must_be[doc_type])

        self.assertTrue(all(must_contain))

        # 2. Test what reindexation will do:
        new_topic = TopicFactory(forum=self.forum, author=self.user)
        new_post = PostFactory(topic=new_topic, author=self.user, position=1)

        pk_of_topics_to_reindex = []
        for item in Topic.get_es_indexable(force_reindexing=False):
            pk_of_topics_to_reindex.append(item.pk)

        pk_of_posts_to_reindex = []
        for item in Post.get_es_indexable(force_reindexing=False):
            pk_of_posts_to_reindex.append(item.pk)

        self.assertTrue(topic.pk not in pk_of_topics_to_reindex)
        self.assertTrue(new_topic.pk in pk_of_topics_to_reindex)
        self.assertTrue(post.pk not in pk_of_posts_to_reindex)
        self.assertTrue(new_post.pk in pk_of_posts_to_reindex)

        for model in self.indexable:  # ok, so let's index that
            if model is FakeChapter:
                continue
            self.manager.es_bulk_indexing_of_model(model, force_reindexing=False)
        self.manager.refresh_index()

        s = Search()
        s.query(MatchAll())
        results = self.manager.setup_search(s).execute()
        self.assertEqual(len(results), 6)  # good!

        # 3. Test single deletion:
        new_post = Post.objects.get(pk=new_post.pk)

        self.manager.delete_document(new_post)
        self.manager.refresh_index()

        s = Search()
        s.query(MatchAll())
        results = self.manager.setup_search(s).execute()
        self.assertEqual(len(results), 5)  # one is missing

        for hit in results:
            self.assertTrue(hit.meta.doc_type != Post.get_es_document_type() or hit.meta.id != new_post.es_id)

        # 4. Test "delete_by_query_deletion":
        topic = Topic.objects.get(pk=topic.pk)
        new_topic = Topic.objects.get(pk=new_topic.pk)

        self.manager.delete_by_query(Topic.get_es_document_type(), MatchAll())  # the two topic are deleted
        self.manager.refresh_index()

        s = Search()
        s.query(MatchAll())
        results = self.manager.setup_search(s).execute()
        self.assertEqual(len(results), 3)

        for hit in results:
            self.assertTrue(hit.meta.doc_type != Topic.get_es_document_type() or hit.meta.id != new_topic.es_id)
            self.assertTrue(hit.meta.doc_type != Topic.get_es_document_type() or hit.meta.id != topic.es_id)

        # 5. Test that the deletion of an object also triggers its deletion in ES
        post = Post.objects.get(pk=post.pk)
        post.delete()
        self.manager.refresh_index()

        s = Search()
        s.query(MatchAll())
        results = self.manager.setup_search(s).execute()
        self.assertEqual(len(results), 2)

        for hit in results:
            self.assertTrue(hit.meta.doc_type != Post.get_es_document_type() or hit.meta.id != post.es_id)

        # 6. Test full desindexation:
        for model in self.indexable:
            if model is FakeChapter:
                continue
            self.manager.clear_indexing_of_model(model)

        # note "topic" is gone since "post" is gone, due to relationships at the Django level
        new_topic = Topic.objects.get(pk=new_topic.pk)
        new_post = Post.objects.get(pk=new_post.pk)

        self.assertFalse(new_topic.es_already_indexed)
        self.assertTrue(new_topic.es_flagged)
        self.assertFalse(new_post.es_already_indexed)
        self.assertTrue(new_post.es_flagged)

        published = PublishedContent.objects.get(content_pk=tuto.pk)
        self.assertFalse(published.es_already_indexed)
        self.assertTrue(published.es_flagged)

    def test_special_case_of_contents(self):
        """test that the old publishedcontent does not stay when a new one is created"""

        if not self.manager.connected_to_es:
            return

        # 1. Create a middle-tutorial, publish it, then index it
        tuto = PublishableContentFactory(type='TUTORIAL')
        tuto.authors.add(self.user)
        tuto.save()

        tuto_draft = tuto.load_version()
        chapter1 = ContainerFactory(parent=tuto_draft, db_object=tuto)
        ExtractFactory(container=chapter1, db_object=tuto)
        published = publish_content(tuto, tuto_draft, is_major_update=True)

        tuto.sha_public = tuto_draft.current_version
        tuto.sha_draft = tuto_draft.current_version
        tuto.public_version = published
        tuto.save()

        self.manager.es_bulk_indexing_of_model(PublishedContent, force_reindexing=True)  # index
        self.manager.refresh_index()

        first_publication = PublishedContent.objects.get(content_pk=tuto.pk)
        self.assertTrue(first_publication.es_already_indexed)
        self.assertFalse(first_publication.es_flagged)

        s = Search()
        s.query(MatchAll())
        results = self.manager.setup_search(s).execute()
        self.assertEqual(len(results), 2)  # get 2 results, one for the content and one for the chapter

        self.assertEqual(PublishedContent.objects.count(), 1)

        # 2. Change thet title, which will trigger a change in the slug
        tuto = PublishableContent.objects.get(pk=tuto.pk)
        versioned = tuto.load_version(sha=tuto.sha_draft)

        tuto.title = 'un titre complètement différent!'
        tuto.save()

        versioned.repo_update_top_container(tuto.title, tuto.slug, 'osef', 'osef')
        second_publication = publish_content(tuto, versioned, True)

        tuto.sha_public = versioned.current_version
        tuto.sha_draft = versioned.current_version
        tuto.public_version = second_publication
        tuto.save()

        self.assertEqual(PublishedContent.objects.count(), 2)  # now there is two objects ...
        first_publication = PublishedContent.objects.get(pk=first_publication.pk)
        self.assertTrue(first_publication.must_redirect)  # .. including the first one, for redirection

        self.manager.refresh_index()

        s = Search()
        s.query(MatchAll())
        results = self.manager.setup_search(s).execute()
        self.assertEqual(len(results), 0)  # the old one is gone (and we need to reindex to get the new one)

        # 3. Check if indexation brings the new one, and not the old one
        self.manager.es_bulk_indexing_of_model(PublishedContent, force_reindexing=True)  # index
        self.manager.refresh_index()

        first_publication = PublishedContent.objects.get(pk=first_publication.pk)
        second_publication = PublishedContent.objects.get(pk=second_publication.pk)

        s = Search()
        s.query(MatchAll())
        results = self.manager.setup_search(s).execute()
        self.assertEqual(len(results), 2)  # Still 2, not 4 !

        found_old = False
        found_new = False

        for hit in results:
            if hit.meta.doc_type == PublishedContent.get_es_document_type():
                if hit.meta.id == first_publication.es_id:
                    found_old = True
                if hit.meta.id == second_publication.es_id:
                    found_new = True

        self.assertTrue(found_new)
        self.assertFalse(found_old)

    def tearDown(self):
        super().tearDown()

        # delete index:
        self.manager.clear_es_index()
示例#15
0
class Command(BaseCommand):
    help = "Index data in ES and manage them"

    index_manager = None
    models = get_django_indexable_objects()

    def __init__(self, *args, **kwargs):
        """Overridden because FakeChapter needs to be present for mapping.
        Also, its mapping needs to be defined before the one of PublishedContent for parenting reasons (!!!).
        """

        super().__init__(*args, **kwargs)
        self.models.insert(0, FakeChapter)

        self.index_manager = ESIndexManager(**settings.ES_SEARCH_INDEX)

        if not self.index_manager.connected_to_es:
            raise Exception("Unable to connect to Elasticsearch, aborting.")

    def add_arguments(self, parser):
        parser.add_argument(
            "action",
            type=str,
            help="action to perform",
            choices=["setup", "clear", "index_all", "index_flagged"])

    def handle(self, *args, **options):

        if options["action"] == "setup":
            self.setup_es()
        elif options["action"] == "clear":
            self.clear_es()
        elif options["action"] == "index_all":
            self.index_documents(force_reindexing=True)
        elif options["action"] == "index_flagged":
            self.index_documents(force_reindexing=False)
        else:
            raise CommandError("unknown action {}".format(options["action"]))

    def setup_es(self):

        self.index_manager.reset_es_index(self.models)
        self.index_manager.setup_custom_analyzer()

        self.index_manager.refresh_index()

    def clear_es(self):
        self.index_manager.clear_es_index()

        for model in self.models:
            self.index_manager.clear_indexing_of_model(model)

    def index_documents(self, force_reindexing=False):

        if force_reindexing:
            self.setup_es()  # remove all previous data

        for model in self.models:
            if model is FakeChapter:
                continue

            if force_reindexing:
                print(f"- indexing {model.get_es_document_type()}s")

            indexed_counter = self.index_manager.es_bulk_indexing_of_model(
                model, force_reindexing=force_reindexing)
            if force_reindexing:
                print(f"  {indexed_counter}\titems indexed")

        self.index_manager.refresh_index()
示例#16
0
class UtilsTests(TutorialTestMixin, TestCase):
    def setUp(self):

        settings.EMAIL_BACKEND = 'django.core.mail.backends.locmem.EmailBackend'
        self.mas = ProfileFactory().user
        settings.ZDS_APP['member']['bot_account'] = self.mas.username

        self.category, self.forum = create_category_and_forum()

        self.user = ProfileFactory().user
        self.staff = StaffProfileFactory().user

        self.index_manager = ESIndexManager(**settings.ES_SEARCH_INDEX)

    def test_es_manager(self):
        """Test the behavior of the ``es_manager`` command"""

        if not self.index_manager.connected_to_es:
            return

        # in the beginning: the void
        self.assertTrue(self.index_manager.index not in self.index_manager.es.cat.indices())

        text = 'Ceci est un texte de test'

        # create a topic with a post
        topic = TopicFactory(forum=self.forum, author=self.user, title=text)
        post = PostFactory(topic=topic, author=self.user, position=1)
        post.text = post.text_html = text
        post.save()

        topic = Topic.objects.get(pk=topic.pk)
        post = Post.objects.get(pk=post.pk)

        self.assertFalse(topic.es_already_indexed)
        self.assertTrue(topic.es_flagged)
        self.assertFalse(post.es_already_indexed)
        self.assertTrue(post.es_flagged)

        # create a middle-tutorial and publish it
        tuto = PublishableContentFactory(type='TUTORIAL')
        tuto.authors.add(self.user)
        tuto.save()

        tuto_draft = tuto.load_version()
        chapter1 = ContainerFactory(parent=tuto_draft, db_object=tuto)
        chapter1.repo_update(text, text, text)
        extract1 = ExtractFactory(container=chapter1, db_object=tuto)
        version = extract1.repo_update(text, text)
        published = publish_content(tuto, tuto_draft, is_major_update=True)

        tuto.sha_public = version
        tuto.sha_draft = version
        tuto.public_version = published
        tuto.save()

        published = PublishedContent.objects.get(content_pk=tuto.pk)
        self.assertFalse(published.es_already_indexed)
        self.assertTrue(published.es_flagged)

        # 1. test "index-all"
        call_command('es_manager', 'index_all')
        self.assertTrue(self.index_manager.es.indices.exists(self.index_manager.index))
        self.index_manager.index_exists = True

        topic = Topic.objects.get(pk=topic.pk)
        post = Post.objects.get(pk=post.pk)

        self.assertTrue(topic.es_already_indexed)
        self.assertFalse(topic.es_flagged)
        self.assertTrue(post.es_already_indexed)
        self.assertFalse(post.es_flagged)

        published = PublishedContent.objects.get(content_pk=tuto.pk)
        self.assertTrue(published.es_already_indexed)
        self.assertFalse(published.es_flagged)

        s = Search()
        s.query(MatchAll())
        results = self.index_manager.setup_search(s).execute()
        self.assertEqual(len(results), 4)  # get 4 results, one of each type

        must_contain = {'post': False, 'topic': False, 'publishedcontent': False, 'chapter': False}
        id_must_be = {
            'post': str(post.pk),
            'topic': str(topic.pk),
            'publishedcontent': str(published.pk),
            'chapter': tuto.slug + '__' + chapter1.slug
        }

        for hit in results:
            doc_type = hit.meta.doc_type
            must_contain[doc_type] = True
            self.assertEqual(hit.meta.id, id_must_be[doc_type])

        self.assertTrue(all(must_contain))

        # 2. test "clear"
        self.assertTrue(self.index_manager.index in self.index_manager.es.cat.indices())  # index in

        call_command('es_manager', 'clear')
        self.assertFalse(self.index_manager.es.indices.exists(self.index_manager.index))
        self.index_manager.index_exists = False

        # must reset every object
        topic = Topic.objects.get(pk=topic.pk)
        post = Post.objects.get(pk=post.pk)

        self.assertFalse(topic.es_already_indexed)
        self.assertTrue(topic.es_flagged)
        self.assertFalse(post.es_already_indexed)
        self.assertTrue(post.es_flagged)

        published = PublishedContent.objects.get(content_pk=tuto.pk)
        self.assertFalse(published.es_already_indexed)
        self.assertTrue(published.es_flagged)

        self.assertTrue(self.index_manager.index not in self.index_manager.es.cat.indices())  # index wiped out !

        # 3. test "setup"
        call_command('es_manager', 'setup')
        self.assertTrue(self.index_manager.es.indices.exists(self.index_manager.index))
        self.index_manager.index_exists = True

        self.assertTrue(self.index_manager.index in self.index_manager.es.cat.indices())  # index back in ...

        s = Search()
        s.query(MatchAll())
        results = self.index_manager.setup_search(s).execute()
        self.assertEqual(len(results), 0)  # ... but with nothing in it

        result = self.index_manager.es.indices.get_settings(index=self.index_manager.index)
        settings_index = result[self.index_manager.index]['settings']['index']
        self.assertTrue('analysis' in settings_index)  # custom analyzer was setup

        # 4. test "index-flagged" once ...
        call_command('es_manager', 'index_flagged')

        topic = Topic.objects.get(pk=topic.pk)
        post = Post.objects.get(pk=post.pk)

        self.assertTrue(topic.es_already_indexed)
        self.assertFalse(topic.es_flagged)
        self.assertTrue(post.es_already_indexed)
        self.assertFalse(post.es_flagged)

        published = PublishedContent.objects.get(content_pk=tuto.pk)
        self.assertTrue(published.es_already_indexed)
        self.assertFalse(published.es_flagged)

        s = Search()
        s.query(MatchAll())
        results = self.index_manager.setup_search(s).execute()
        self.assertEqual(len(results), 4)  # get the 4 results back

    def tearDown(self):
        super().tearDown()

        # delete index:
        self.index_manager.clear_es_index()
示例#17
0
class ViewsTests(TutorialTestMixin, TestCase):
    def setUp(self):

        settings.EMAIL_BACKEND = 'django.core.mail.backends.locmem.EmailBackend'
        self.mas = ProfileFactory().user
        settings.ZDS_APP['member']['bot_account'] = self.mas.username

        self.category, self.forum = create_category_and_forum()

        self.user = ProfileFactory().user
        self.staff = StaffProfileFactory().user

        self.manager = ESIndexManager(**settings.ES_SEARCH_INDEX)
        self.indexable = [FakeChapter, PublishedContent, Topic, Post]

        self.manager.reset_es_index(self.indexable)
        self.manager.setup_custom_analyzer()
        self.manager.refresh_index()

    def test_basic_search(self):
        """Basic search and filtering"""

        if not self.manager.connected_to_es:
            return

        # 1. Index and test search:
        text = 'test'

        topic_1 = TopicFactory(forum=self.forum, author=self.user, title=text)
        post_1 = PostFactory(topic=topic_1, author=self.user, position=1)
        post_1.text = post_1.text_html = text
        post_1.save()

        # create a middle-size content and publish it
        tuto = PublishableContentFactory(type='TUTORIAL')
        tuto_draft = tuto.load_version()

        tuto.title = text
        tuto.authors.add(self.user)
        tuto.save()

        tuto_draft.repo_update_top_container(text, tuto.slug, text, text)  # change title to be sure it will match

        chapter1 = ContainerFactory(parent=tuto_draft, db_object=tuto)
        extract = ExtractFactory(container=chapter1, db_object=tuto)
        extract.repo_update(text, text)

        published = publish_content(tuto, tuto_draft, is_major_update=True)

        tuto.sha_public = tuto_draft.current_version
        tuto.sha_draft = tuto_draft.current_version
        tuto.public_version = published
        tuto.save()

        # nothing has been indexed yet:
        self.assertEqual(len(self.manager.setup_search(Search().query(MatchAll())).execute()), 0)

        # index
        for model in self.indexable:
            if model is FakeChapter:
                continue
            self.manager.es_bulk_indexing_of_model(model)
        self.manager.refresh_index()

        result = self.client.get(reverse('search:query') + '?q=' + text, follow=False)
        self.assertEqual(result.status_code, 200)

        response = result.context['object_list'].execute()

        self.assertEqual(response.hits.total, 4)  # get 4 results

        # 2. Test filtering:
        topic_1 = Topic.objects.get(pk=topic_1.pk)
        post_1 = Post.objects.get(pk=post_1.pk)
        published = PublishedContent.objects.get(pk=published.pk)

        ids = {
            'topic': [topic_1.es_id],
            'post': [post_1.es_id],
            'content': [published.es_id, published.content_public_slug + '__' + chapter1.slug],
        }

        search_groups = [k for k, v in settings.ZDS_APP['search']['search_groups'].items()]
        group_to_model = {k: v[1] for k, v in settings.ZDS_APP['search']['search_groups'].items()}

        for doc_type in search_groups:
            result = self.client.get(reverse('search:query') + '?q=' + text + '&models=' + doc_type, follow=False)
            self.assertEqual(result.status_code, 200)

            response = result.context['object_list'].execute()

            self.assertEqual(response.hits.total, len(ids[doc_type]))  # get 1 result of each …
            for i, r in enumerate(response):
                self.assertIn(r.meta.doc_type, group_to_model[doc_type])  # … and only of the right type …
                self.assertEqual(r.meta.id, ids[doc_type][i])  # … with the right id !

    def test_get_similar_topics(self):
        """Get similar topics lists"""

        if not self.manager.connected_to_es:
            return

        text = 'Clem ne se mange pas'

        topic_1 = TopicFactory(forum=self.forum, author=self.user, title=text)
        post_1 = PostFactory(topic=topic_1, author=self.user, position=1)
        post_1.text = post_1.text_html = text
        post_1.save()

        text = 'Clem est la meilleure mascotte'

        topic_2 = TopicFactory(forum=self.forum, author=self.user, title=text)
        post_2 = PostFactory(topic=topic_2, author=self.user, position=1)
        post_2.text = post_1.text_html = text
        post_2.save()

        # 1. Should not get any result
        result = self.client.get(reverse('search:similar') + '?q=est', follow=False)
        self.assertEqual(result.status_code, 200)
        content = json_handler.loads(result.content.decode('utf-8'))
        self.assertEqual(len(content['results']), 0)

        # index
        for model in self.indexable:
            if model is FakeChapter:
                continue
            self.manager.es_bulk_indexing_of_model(model)
        self.manager.refresh_index()

        # 2. Should get exactly one result
        result = self.client.get(reverse('search:similar') + '?q=mange', follow=False)
        self.assertEqual(result.status_code, 200)
        content = json_handler.loads(result.content.decode('utf-8'))
        self.assertEqual(len(content['results']), 1)

        # 2. Should get exactly two results
        result = self.client.get(reverse('search:similar') + '?q=Clem', follow=False)
        self.assertEqual(result.status_code, 200)
        content = json_handler.loads(result.content.decode('utf-8'))
        self.assertEqual(len(content['results']), 2)

    def test_hidden_post_are_not_result(self):
        """Hidden posts should not show up in the search results"""

        if not self.manager.connected_to_es:
            return

        # 1. Index and test search:
        text = 'test'

        topic_1 = TopicFactory(forum=self.forum, author=self.user, title=text)
        post_1 = PostFactory(topic=topic_1, author=self.user, position=1)
        post_1.text = post_1.text_html = text
        post_1.save()

        self.manager.es_bulk_indexing_of_model(Topic)
        self.manager.es_bulk_indexing_of_model(Post)
        self.manager.refresh_index()

        self.assertEqual(len(self.manager.setup_search(Search().query(MatchAll())).execute()), 2)  # indexing ok

        post_1 = Post.objects.get(pk=post_1.pk)

        result = self.client.get(
            reverse('search:query') + '?q=' + text + '&models=' + Post.get_es_document_type(), follow=False)

        self.assertEqual(result.status_code, 200)

        response = result.context['object_list'].execute()

        self.assertEqual(response.hits.total, 1)
        self.assertEqual(response[0].meta.id, post_1.es_id)

        # 2. Hide, reindex and search again:
        post_1.hide_comment_by_user(self.staff, 'Un abus de pouvoir comme un autre ;)')
        self.manager.refresh_index()

        result = self.client.get(
            reverse('search:query') + '?q=' + text + '&models=' + Post.get_es_document_type(), follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 0)  # nothing in the results

    def test_hidden_forums_give_no_results_if_user_not_allowed(self):
        """Long name, isn't ?"""

        if not self.manager.connected_to_es:
            return

        # 1. Create a hidden forum belonging to a hidden staff group.
        text = 'test'

        group = Group.objects.create(name='Les illuminatis anonymes de ZdS')
        _, hidden_forum = create_category_and_forum(group)

        self.staff.groups.add(group)
        self.staff.save()

        topic_1 = TopicFactory(forum=hidden_forum, author=self.staff, title=text)
        post_1 = PostFactory(topic=topic_1, author=self.user, position=1)
        post_1.text = post_1.text_html = text
        post_1.save()

        self.manager.es_bulk_indexing_of_model(Topic)
        self.manager.es_bulk_indexing_of_model(Post)
        self.manager.refresh_index()

        self.assertEqual(len(self.manager.setup_search(Search().query(MatchAll())).execute()), 2)  # indexing ok

        # 2. search without connection and get not result
        result = self.client.get(reverse('search:query') + '?q=' + text, follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 0)

        # 3. Connect with user (not a member of the group), search, and get no result
        self.assertTrue(self.client.login(username=self.user.username, password='******'))

        result = self.client.get(reverse('search:query') + '?q=' + text, follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 0)

        # 4. Connect with staff, search, and get the topic and the post
        self.client.logout()
        self.assertTrue(self.client.login(username=self.staff.username, password='******'))

        result = self.client.get(reverse('search:query') + '?q=' + text, follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 2)  # ok !

    def test_boosts(self):
        """Check if boosts are doing their job"""

        if not self.manager.connected_to_es:
            return

        # 1. Create topics (with identical titles), posts (with identical texts), an article and a tuto
        text = 'test'

        topic_1_solved_sticky = TopicFactory(forum=self.forum, author=self.user)
        topic_1_solved_sticky.title = text
        topic_1_solved_sticky.subtitle = ''
        topic_1_solved_sticky.solved_by = self.user
        topic_1_solved_sticky.is_sticky = True
        topic_1_solved_sticky.save()

        post_1 = PostFactory(topic=topic_1_solved_sticky, author=self.user, position=1)
        post_1.text = post_1.text_html = text
        post_1.save()

        post_2_useful = PostFactory(topic=topic_1_solved_sticky, author=self.user, position=2)
        post_2_useful.text = post_2_useful.text_html = text
        post_2_useful.is_useful = True
        post_2_useful.like = 5
        post_2_useful.dislike = 2  # l/d ratio above 1
        post_2_useful.save()

        topic_2_locked = TopicFactory(forum=self.forum, author=self.user, title=text)
        topic_2_locked.title = text
        topic_2_locked.subtitle = ''
        topic_2_locked.is_locked = True
        topic_2_locked.save()

        post_3_ld_below_1 = PostFactory(topic=topic_2_locked, author=self.user, position=1)
        post_3_ld_below_1.text = post_3_ld_below_1.text_html = text
        post_3_ld_below_1.like = 2
        post_3_ld_below_1.dislike = 5  # l/d ratio below 1
        post_3_ld_below_1.save()

        tuto = PublishableContentFactory(type='TUTORIAL')
        tuto_draft = tuto.load_version()

        tuto.title = text
        tuto.authors.add(self.user)
        tuto.save()

        tuto_draft.repo_update_top_container(text, tuto.slug, text, text)

        chapter1 = ContainerFactory(parent=tuto_draft, db_object=tuto)
        chapter1.repo_update(text, 'Who cares ?', 'Same here')
        ExtractFactory(container=chapter1, db_object=tuto)

        published_tuto = publish_content(tuto, tuto_draft, is_major_update=True)

        tuto.sha_public = tuto_draft.current_version
        tuto.sha_draft = tuto_draft.current_version
        tuto.public_version = published_tuto
        tuto.save()

        article = PublishedContentFactory(type='ARTICLE', title=text)
        published_article = PublishedContent.objects.get(content_pk=article.pk)

        opinion_not_picked = PublishedContentFactory(type='OPINION', title=text)
        published_opinion_not_picked = PublishedContent.objects.get(content_pk=opinion_not_picked.pk)

        opinion_picked = PublishedContentFactory(type='OPINION', title=text)
        opinion_picked.sha_picked = opinion_picked.sha_draft
        opinion_picked.date_picked = datetime.datetime.now()
        opinion_picked.save()

        published_opinion_picked = PublishedContent.objects.get(content_pk=opinion_picked.pk)

        for model in self.indexable:
            if model is FakeChapter:
                continue
            self.manager.es_bulk_indexing_of_model(model)
        self.manager.refresh_index()

        self.assertEqual(len(self.manager.setup_search(Search().query(MatchAll())).execute()), 10)

        # 2. Reset all boosts to 1
        for doc_type in settings.ZDS_APP['search']['boosts']:
            for key in settings.ZDS_APP['search']['boosts'][doc_type]:
                settings.ZDS_APP['search']['boosts'][doc_type][key] = 1.0

        # 3. Test posts
        result = self.client.get(
            reverse('search:query') + '?q=' + text + '&models=' + Post.get_es_document_type(), follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 3)

        # score are equals without boost:
        self.assertTrue(response[0].meta.score == response[1].meta.score == response[2].meta.score)

        settings.ZDS_APP['search']['boosts']['post']['if_first'] = 2.0

        result = self.client.get(
            reverse('search:query') + '?q=' + text + '&models=' + Post.get_es_document_type(), follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 3)

        self.assertTrue(response[0].meta.score == response[1].meta.score > response[2].meta.score)
        self.assertEqual(response[2].meta.id, str(post_2_useful.pk))  # post 2 is the only one not first

        settings.ZDS_APP['search']['boosts']['post']['if_first'] = 1.0
        settings.ZDS_APP['search']['boosts']['post']['if_useful'] = 2.0

        result = self.client.get(
            reverse('search:query') + '?q=' + text + '&models=' + Post.get_es_document_type(), follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 3)

        self.assertTrue(response[0].meta.score > response[1].meta.score == response[2].meta.score)
        self.assertEqual(response[0].meta.id, str(post_2_useful.pk))  # post 2 is useful

        settings.ZDS_APP['search']['boosts']['post']['if_useful'] = 1.0
        settings.ZDS_APP['search']['boosts']['post']['ld_ratio_above_1'] = 2.0

        result = self.client.get(
            reverse('search:query') + '?q=' + text + '&models=' + Post.get_es_document_type(), follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 3)

        self.assertTrue(response[0].meta.score > response[1].meta.score == response[2].meta.score)
        self.assertEqual(response[0].meta.id, str(post_2_useful.pk))  # post 2 have a l/d ratio of 5/2

        settings.ZDS_APP['search']['boosts']['post']['ld_ratio_above_1'] = 1.0
        settings.ZDS_APP['search']['boosts']['post']['ld_ratio_below_1'] = 2.0  # no one would do that in real life

        result = self.client.get(
            reverse('search:query') + '?q=' + text + '&models=' + Post.get_es_document_type(), follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 3)

        self.assertTrue(response[0].meta.score > response[1].meta.score == response[2].meta.score)
        self.assertEqual(response[0].meta.id, str(post_3_ld_below_1.pk))  # post 3 have a l/d ratio of 2/5

        settings.ZDS_APP['search']['boosts']['post']['ld_ratio_below_1'] = 1.0

        # 4. Test topics
        result = self.client.get(
            reverse('search:query') + '?q=' + text + '&models=' + Topic.get_es_document_type(), follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 2)

        # score are equals without boost:
        self.assertTrue(response[0].meta.score == response[1].meta.score)

        settings.ZDS_APP['search']['boosts']['topic']['if_sticky'] = 2.0

        result = self.client.get(
            reverse('search:query') + '?q=' + text + '&models=' + Topic.get_es_document_type(), follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 2)

        self.assertTrue(response[0].meta.score > response[1].meta.score)
        self.assertEqual(response[0].meta.id, str(topic_1_solved_sticky.pk))  # topic 1 is sticky

        settings.ZDS_APP['search']['boosts']['topic']['if_sticky'] = 1.0
        settings.ZDS_APP['search']['boosts']['topic']['if_solved'] = 2.0

        result = self.client.get(
            reverse('search:query') + '?q=' + text + '&models=' + Topic.get_es_document_type(), follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 2)

        self.assertTrue(response[0].meta.score > response[1].meta.score)
        self.assertEqual(response[0].meta.id, str(topic_1_solved_sticky.pk))  # topic 1 is solved

        settings.ZDS_APP['search']['boosts']['topic']['if_solved'] = 1.0
        settings.ZDS_APP['search']['boosts']['topic']['if_locked'] = 2.0  # no one would do that in real life

        result = self.client.get(
            reverse('search:query') + '?q=' + text + '&models=' + Topic.get_es_document_type(), follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 2)

        self.assertTrue(response[0].meta.score > response[1].meta.score)
        self.assertEqual(response[0].meta.id, str(topic_2_locked.pk))  # topic 2 is locked

        settings.ZDS_APP['search']['boosts']['topic']['if_locked'] = 1.0  # no one would do that in real life

        # 5. Test published contents
        result = self.client.get(
            reverse('search:query') + '?q=' + text + '&models=content', follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 5)

        # score are equals without boost:
        self.assertTrue(response[0].meta.score ==
                        response[1].meta.score ==
                        response[2].meta.score ==
                        response[3].meta.score ==
                        response[4].meta.score)

        settings.ZDS_APP['search']['boosts']['publishedcontent']['if_article'] = 2.0

        result = self.client.get(
            reverse('search:query') + '?q=' + text + '&models=content', follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 5)

        self.assertTrue(response[0].meta.score > response[1].meta.score)
        self.assertEqual(response[0].meta.id, str(published_article.pk))  # obvious

        settings.ZDS_APP['search']['boosts']['publishedcontent']['if_article'] = 1.0
        settings.ZDS_APP['search']['boosts']['publishedcontent']['if_tutorial'] = 2.0

        result = self.client.get(
            reverse('search:query') + '?q=' + text + '&models=content', follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 5)

        self.assertTrue(response[0].meta.score > response[1].meta.score)
        self.assertEqual(response[0].meta.id, str(published_tuto.pk))  # obvious

        settings.ZDS_APP['search']['boosts']['publishedcontent']['if_tutorial'] = 1.0
        settings.ZDS_APP['search']['boosts']['publishedcontent']['if_opinion'] = 2.0
        settings.ZDS_APP['search']['boosts']['publishedcontent']['if_opinion_not_picked'] = 4.0
        # Note: in "real life", unpicked opinion would get a boost < 1.

        result = self.client.get(
            reverse('search:query') + '?q=' + text + '&models=content', follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 5)

        self.assertTrue(response[0].meta.score > response[1].meta.score > response[2].meta.score)
        self.assertEqual(response[0].meta.id, str(published_opinion_not_picked.pk))  # unpicked opinion got first
        self.assertEqual(response[1].meta.id, str(published_opinion_picked.pk))

        settings.ZDS_APP['search']['boosts']['publishedcontent']['if_opinion'] = 1.0
        settings.ZDS_APP['search']['boosts']['publishedcontent']['if_opinion_not_picked'] = 1.0
        settings.ZDS_APP['search']['boosts']['publishedcontent']['if_medium_or_big_tutorial'] = 2.0

        result = self.client.get(
            reverse('search:query') + '?q=' + text + '&models=content', follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 5)

        self.assertTrue(response[0].meta.score > response[1].meta.score)
        self.assertEqual(response[0].meta.id, str(published_tuto.pk))  # obvious

        settings.ZDS_APP['search']['boosts']['publishedcontent']['if_medium_or_big_tutorial'] = 1.0

        # 6. Test global boosts
        # NOTE: score are NOT the same for all documents, no matter how hard it tries to, small differences exists

        for model in self.indexable:

            # set a huge number to overcome the small differences:
            settings.ZDS_APP['search']['boosts'][model.get_es_document_type()]['global'] = 10.0

            result = self.client.get(
                reverse('search:query') + '?q=' + text, follow=False)

            self.assertEqual(result.status_code, 200)
            response = result.context['object_list'].execute()
            self.assertEqual(response.hits.total, 10)

            self.assertEqual(response[0].meta.doc_type, model.get_es_document_type())  # obvious

            settings.ZDS_APP['search']['boosts'][model.get_es_document_type()]['global'] = 1.0

    def test_change_topic_impacts_posts(self):

        if not self.manager.connected_to_es:
            return

        # 1. Create a hidden forum belonging to a hidden group and add staff in it.
        text = 'test'

        group = Group.objects.create(name='Les illuminatis anonymes de ZdS')
        _, hidden_forum = create_category_and_forum(group)

        self.staff.groups.add(group)
        self.staff.save()

        # 2. Create a normal topic and index it
        topic_1 = TopicFactory(forum=self.forum, author=self.user, title=text)
        post_1 = PostFactory(topic=topic_1, author=self.user, position=1)
        post_1.text = post_1.text_html = text
        post_1.save()

        self.manager.es_bulk_indexing_of_model(Topic)
        self.manager.es_bulk_indexing_of_model(Post)
        self.manager.refresh_index()

        self.assertEqual(len(self.manager.setup_search(Search().query(MatchAll())).execute()), 2)  # indexing ok

        result = self.client.get(
            reverse('search:query') + '?q=' + text + '&models=' + Post.get_es_document_type(), follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 1)  # ok
        self.assertEqual(response[0].meta.doc_type, Post.get_es_document_type())
        self.assertEqual(response[0].forum_pk, self.forum.pk)
        self.assertEqual(response[0].topic_pk, topic_1.pk)
        self.assertEqual(response[0].topic_title, topic_1.title)

        # 3. Change topic title and reindex
        topic_1.title = 'new title'
        topic_1.save()

        self.manager.es_bulk_indexing_of_model(Topic)
        self.manager.es_bulk_indexing_of_model(Post)
        self.manager.refresh_index()

        result = self.client.get(
            reverse('search:query') + '?q=' + text + '&models=' + Post.get_es_document_type(), follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 1)  # ok

        self.assertEqual(response[0].topic_title, topic_1.title)  # title was changed

        # 4. connect with staff and move topic
        self.assertTrue(self.client.login(username=self.staff.username, password='******'))

        data = {
            'move': '',
            'forum': hidden_forum.pk,
            'topic': topic_1.pk
        }
        response = self.client.post(reverse('topic-edit'), data, follow=False)

        self.assertEqual(302, response.status_code)

        self.manager.es_bulk_indexing_of_model(Topic)
        self.manager.es_bulk_indexing_of_model(Post)
        self.manager.refresh_index()

        result = self.client.get(
            reverse('search:query') + '?q=' + text + '&models=' + Post.get_es_document_type(), follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 1)  # Note: without staff, would not get any results (see below)

        self.assertEqual(response[0].forum_pk, hidden_forum.pk)  # post was updated with new forum

        # 5. Topic is now hidden
        self.client.logout()

        result = self.client.get(
            reverse('search:query') + '?q=' + text + '&models=' + Post.get_es_document_type(), follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 0)  # ok

    def test_change_publishedcontents_impacts_chapter(self):

        if not self.manager.connected_to_es:
            return

        # 1. Create middle-size content and index it
        text = 'test'

        tuto = PublishableContentFactory(type='TUTORIAL')
        tuto_draft = tuto.load_version()

        tuto.title = text
        tuto.authors.add(self.user)
        tuto.save()

        tuto_draft.repo_update_top_container(text, tuto.slug, text, text)  # change title to be sure it will match

        chapter1 = ContainerFactory(parent=tuto_draft, db_object=tuto)
        chapter1.repo_update(text, text, text)
        extract = ExtractFactory(container=chapter1, db_object=tuto)
        extract.repo_update(text, text)

        published = publish_content(tuto, tuto_draft, is_major_update=True)

        tuto.sha_public = tuto_draft.current_version
        tuto.sha_draft = tuto_draft.current_version
        tuto.public_version = published
        tuto.save()

        self.manager.es_bulk_indexing_of_model(PublishedContent)
        self.manager.refresh_index()

        self.assertEqual(len(self.manager.setup_search(Search().query(MatchAll())).execute()), 2)  # indexing ok

        result = self.client.get(
            reverse('search:query') + '?q=' + text + '&models=content', follow=False)
        self.assertEqual(result.status_code, 200)

        response = result.context['object_list'].execute()

        self.assertEqual(response.hits.total, 2)

        chapters = [r for r in response if r.meta.doc_type == 'chapter']
        self.assertEqual(chapters[0].meta.doc_type, FakeChapter.get_es_document_type())
        self.assertEqual(chapters[0].meta.id, published.content_public_slug + '__' + chapter1.slug)

        # 2. Change tuto: delete chapter and insert new one !
        tuto = PublishableContent.objects.get(pk=tuto.pk)
        tuto_draft = tuto.load_version()

        tuto_draft.children[0].repo_delete()  # chapter 1 is gone !

        another_text = 'another thing'
        self.assertTrue(text not in another_text)  # to prevent a future modification from breaking this test

        chapter2 = ContainerFactory(parent=tuto_draft, db_object=tuto)
        chapter2.repo_update(another_text, another_text, another_text)
        extract2 = ExtractFactory(container=chapter2, db_object=tuto)
        extract2.repo_update(another_text, another_text)

        published = publish_content(tuto, tuto_draft, is_major_update=False)

        tuto.sha_public = tuto_draft.current_version
        tuto.sha_draft = tuto_draft.current_version
        tuto.public_version = published
        tuto.save()

        self.manager.es_bulk_indexing_of_model(PublishedContent)
        self.manager.refresh_index()

        self.assertEqual(len(self.manager.setup_search(Search().query(MatchAll())).execute()), 2)  # 2 objects, not 3 !

        result = self.client.get(
            reverse('search:query') + '?q=' + text + '&models=content', follow=False)
        self.assertEqual(result.status_code, 200)

        response = result.context['object_list'].execute()

        contents = [r for r in response if r.meta.doc_type != 'chapter']
        self.assertEqual(response.hits.total, len(contents))  # no chapter found anymore

        result = self.client.get(
            reverse('search:query') + '?q=' + another_text + '&models=content',
            follow=False
        )

        self.assertEqual(result.status_code, 200)

        response = result.context['object_list'].execute()
        chapters = [r for r in response if r.meta.doc_type == 'chapter']
        self.assertEqual(response.hits.total, 1)
        self.assertEqual(chapters[0].meta.doc_type, FakeChapter.get_es_document_type())
        self.assertEqual(chapters[0].meta.id, published.content_public_slug + '__' + chapter2.slug)  # got new chapter

    def test_opensearch(self):

        result = self.client.get(
            reverse('search:opensearch'),
            follow=False
        )

        self.assertEqual(result.status_code, 200)

        self.assertContains(result, reverse('search:query'))
        self.assertContains(result, reverse('search:opensearch'))

    def test_upercase_and_lowercase_search_give_same_results(self):
        """Pretty self-explanatory function name, isn't it ?"""

        if not self.manager.connected_to_es:
            return

        # 1. Index lowercase stuffs
        text_lc = 'test'

        topic_1_lc = TopicFactory(forum=self.forum, author=self.user, title=text_lc)

        tag_lc = TagFactory(title=text_lc)
        topic_1_lc.tags.add(tag_lc)
        topic_1_lc.subtitle = text_lc
        topic_1_lc.save()

        post_1_lc = PostFactory(topic=topic_1_lc, author=self.user, position=1)
        post_1_lc.text = post_1_lc.text_html = text_lc
        post_1_lc.save()

        tuto_lc = PublishableContentFactory(type='TUTORIAL')
        tuto_draft_lc = tuto_lc.load_version()

        tuto_lc.title = text_lc
        tuto_lc.authors.add(self.user)
        subcategory_lc = SubCategoryFactory(title=text_lc)
        tuto_lc.subcategory.add(subcategory_lc)
        tuto_lc.tags.add(tag_lc)
        tuto_lc.save()

        tuto_draft_lc.description = text_lc
        tuto_draft_lc.repo_update_top_container(text_lc, tuto_lc.slug, text_lc, text_lc)

        chapter1_lc = ContainerFactory(parent=tuto_draft_lc, db_object=tuto_lc)
        extract_lc = ExtractFactory(container=chapter1_lc, db_object=tuto_lc)
        extract_lc.repo_update(text_lc, text_lc)

        published_lc = publish_content(tuto_lc, tuto_draft_lc, is_major_update=True)

        tuto_lc.sha_public = tuto_draft_lc.current_version
        tuto_lc.sha_draft = tuto_draft_lc.current_version
        tuto_lc.public_version = published_lc
        tuto_lc.save()

        # 2. Index uppercase stuffs
        text_uc = 'TEST'

        topic_1_uc = TopicFactory(forum=self.forum, author=self.user, title=text_uc)

        topic_1_uc.tags.add(tag_lc)  # Note: a constraint forces tags title to be unique
        topic_1_uc.subtitle = text_uc
        topic_1_uc.save()

        post_1_uc = PostFactory(topic=topic_1_uc, author=self.user, position=1)
        post_1_uc.text = post_1_uc.text_html = text_uc
        post_1_uc.save()

        tuto_uc = PublishableContentFactory(type='TUTORIAL')
        tuto_draft_uc = tuto_uc.load_version()

        tuto_uc.title = text_uc
        tuto_uc.authors.add(self.user)
        tuto_uc.subcategory.add(subcategory_lc)
        tuto_uc.tags.add(tag_lc)
        tuto_uc.save()

        tuto_draft_uc.description = text_uc
        tuto_draft_uc.repo_update_top_container(text_uc, tuto_uc.slug, text_uc, text_uc)

        chapter1_uc = ContainerFactory(parent=tuto_draft_uc, db_object=tuto_uc)
        extract_uc = ExtractFactory(container=chapter1_uc, db_object=tuto_uc)
        extract_uc.repo_update(text_uc, text_uc)

        published_uc = publish_content(tuto_uc, tuto_draft_uc, is_major_update=True)

        tuto_uc.sha_public = tuto_draft_uc.current_version
        tuto_uc.sha_draft = tuto_draft_uc.current_version
        tuto_uc.public_version = published_uc
        tuto_uc.save()

        # 3. Index and search:
        self.assertEqual(len(self.manager.setup_search(Search().query(MatchAll())).execute()), 0)

        # index
        for model in self.indexable:
            if model is FakeChapter:
                continue
            self.manager.es_bulk_indexing_of_model(model)
        self.manager.refresh_index()

        result = self.client.get(reverse('search:query') + '?q=' + text_lc, follow=False)
        self.assertEqual(result.status_code, 200)

        response_lc = result.context['object_list'].execute()
        self.assertEqual(response_lc.hits.total, 8)

        result = self.client.get(reverse('search:query') + '?q=' + text_uc, follow=False)
        self.assertEqual(result.status_code, 200)

        response_uc = result.context['object_list'].execute()
        self.assertEqual(response_uc.hits.total, 8)

        for responses in zip(response_lc, response_uc):  # we should get results in the same order!
            self.assertEqual(responses[0].meta.id, responses[1].meta.id)

    def test_category_and_subcategory_impact_search(self):
        """If two contents do not belong to the same (sub)category"""

        if not self.manager.connected_to_es:
            return

        text = 'Did you ever hear the tragedy of Darth Plagueis The Wise?'

        # 1. Create two contents with different subcategories
        category_1 = 'category 1'
        subcategory_1 = SubCategoryFactory(title=category_1)
        category_2 = 'category 2'
        subcategory_2 = SubCategoryFactory(title=category_2)

        tuto_1 = PublishableContentFactory(type='TUTORIAL')
        tuto_1_draft = tuto_1.load_version()

        tuto_1.title = text
        tuto_1.authors.add(self.user)
        tuto_1.subcategory.add(subcategory_1)
        tuto_1.save()

        tuto_1_draft.description = text
        tuto_1_draft.repo_update_top_container(text, tuto_1.slug, text, text)

        chapter_1 = ContainerFactory(parent=tuto_1_draft, db_object=tuto_1)
        extract_1 = ExtractFactory(container=chapter_1, db_object=tuto_1)
        extract_1.repo_update(text, text)

        published_1 = publish_content(tuto_1, tuto_1_draft, is_major_update=True)

        tuto_1.sha_public = tuto_1_draft.current_version
        tuto_1.sha_draft = tuto_1_draft.current_version
        tuto_1.public_version = published_1
        tuto_1.save()

        tuto_2 = PublishableContentFactory(type='TUTORIAL')
        tuto_2_draft = tuto_2.load_version()

        tuto_2.title = text
        tuto_2.authors.add(self.user)
        tuto_2.subcategory.add(subcategory_2)
        tuto_2.save()

        tuto_2_draft.description = text
        tuto_2_draft.repo_update_top_container(text, tuto_2.slug, text, text)

        chapter_2 = ContainerFactory(parent=tuto_2_draft, db_object=tuto_2)
        extract_2 = ExtractFactory(container=chapter_2, db_object=tuto_2)
        extract_2.repo_update(text, text)

        published_2 = publish_content(tuto_2, tuto_2_draft, is_major_update=True)

        tuto_2.sha_public = tuto_2_draft.current_version
        tuto_2.sha_draft = tuto_2_draft.current_version
        tuto_2.public_version = published_2
        tuto_2.save()

        # 2. Index:
        self.assertEqual(len(self.manager.setup_search(Search().query(MatchAll())).execute()), 0)

        # index
        for model in self.indexable:
            if model is FakeChapter:
                continue
            self.manager.es_bulk_indexing_of_model(model)
        self.manager.refresh_index()

        result = self.client.get(reverse('search:query') + '?q=' + text, follow=False)
        self.assertEqual(result.status_code, 200)

        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 4)  # Ok

        # 3. Test
        result = self.client.get(
            reverse('search:query') + '?q=' + text + '&model=content&subcategory=' + subcategory_1.slug, follow=False)

        self.assertEqual(result.status_code, 200)

        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 2)

        self.assertEqual([int(r.meta.id) for r in response if r.meta.doc_type == 'publishedcontent'][0], published_1.pk)
        self.assertEqual(
            [r.meta.id for r in response if r.meta.doc_type == 'chapter'][0],
            tuto_1.slug + '__' + chapter_1.slug)

        result = self.client.get(
            reverse('search:query') + '?q=' + text + '&model=content&subcategory=' + subcategory_2.slug, follow=False)

        self.assertEqual(result.status_code, 200)

        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 2)

        self.assertEqual([int(r.meta.id) for r in response if r.meta.doc_type == 'publishedcontent'][0], published_2.pk)
        self.assertEqual(
            [r.meta.id for r in response if r.meta.doc_type == 'chapter'][0],
            tuto_2.slug + '__' + chapter_2.slug)

    def tearDown(self):
        super().tearDown()

        # delete index:
        self.manager.clear_es_index()
示例#18
0
class Command(BaseCommand):
    help = 'Index data in ES and manage them'

    index_manager = None
    models = get_django_indexable_objects()

    def __init__(self, *args, **kwargs):
        """Overridden because FakeChapter needs to be present for mapping.
        Also, its mapping needs to be defined before the one of PublishedContent for parenting reasons (!!!).
        """

        super(Command, self).__init__(*args, **kwargs)
        self.models.insert(0, FakeChapter)

        self.index_manager = ESIndexManager(**settings.ES_SEARCH_INDEX)

        if not self.index_manager.connected_to_es:
            raise Exception('Unable to connect to Elasticsearch, aborting.')

    def add_arguments(self, parser):
        parser.add_argument(
            'action', type=str, help='action to perform', choices=['setup', 'clear', 'index_all', 'index_flagged'])

    def handle(self, *args, **options):

        if options['action'] == 'setup':
            self.setup_es()
        elif options['action'] == 'clear':
            self.clear_es()
        elif options['action'] == 'index_all':
            self.index_documents(force_reindexing=True)
        elif options['action'] == 'index_flagged':
            self.index_documents(force_reindexing=False)
        else:
            raise CommandError('unknown action {}'.format(options['action']))

    def setup_es(self):

        self.index_manager.reset_es_index(self.models)
        self.index_manager.setup_custom_analyzer()

        self.index_manager.refresh_index()

    def clear_es(self):
        self.index_manager.clear_es_index()

        for model in self.models:
            self.index_manager.clear_indexing_of_model(model)

    def index_documents(self, force_reindexing=False):

        if force_reindexing:
            self.setup_es()  # remove all previous data

        for model in self.models:
            if model is FakeChapter:
                continue

            if force_reindexing:
                print(('- indexing {}s'.format(model.get_es_document_type())))

            indexed_counter = self.index_manager.es_bulk_indexing_of_model(model, force_reindexing=force_reindexing)
            if force_reindexing:
                print(('  {}\titems indexed'.format(indexed_counter)))

        self.index_manager.refresh_index()
示例#19
0
class SearchView(ZdSPagingListView):
    """Search view."""

    template_name = 'searchv2/search.html'
    paginate_by = settings.ZDS_APP['search']['results_per_page']

    search_form_class = SearchForm
    search_form = None
    search_query = None
    content_category = None
    content_subcategory = None

    authorized_forums = ''

    index_manager = None

    def __init__(self, **kwargs):
        """Overridden because the index manager must NOT be initialized elsewhere."""

        super(SearchView, self).__init__(**kwargs)
        self.index_manager = ESIndexManager(**settings.ES_SEARCH_INDEX)

    def get(self, request, *args, **kwargs):
        """Overridden to catch the request and fill the form."""

        if 'q' in request.GET:
            self.search_query = ''.join(request.GET['q'])

        self.search_form = self.search_form_class(data=self.request.GET)

        if self.search_query and not self.search_form.is_valid():
            raise PermissionDenied('research form is invalid')

        return super(SearchView, self).get(request, *args, **kwargs)

    def get_queryset(self):
        if not self.index_manager.connected_to_es:
            messages.warning(self.request,
                             _('Impossible de se connecter à Elasticsearch'))
            return []

        if self.search_query:

            # Searches forums the user is allowed to visit
            self.authorized_forums = get_authorized_forums(self.request.user)

            search_queryset = Search()

            # Restrict (sub)category if any
            if self.search_form.cleaned_data['category']:
                self.content_category = self.search_form.cleaned_data[
                    'category']
            if self.search_form.cleaned_data['subcategory']:
                self.content_subcategory = self.search_form.cleaned_data[
                    'subcategory']

            # Mark that contents must come from library if required
            self.from_library = False
            if self.search_form.cleaned_data['from_library'] == 'on':
                self.from_library = True

            # Setting the different querysets (according to the selected models, if any)
            part_querysets = []
            chosen_groups = self.search_form.cleaned_data['models']

            if chosen_groups:
                models = []
                for group in chosen_groups:
                    if group in settings.ZDS_APP['search']['search_groups']:
                        models.append(settings.ZDS_APP['search']
                                      ['search_groups'][group][1])
            else:
                models = [
                    v[1] for k, v in settings.ZDS_APP['search']
                    ['search_groups'].items()
                ]

            models = reduce(operator.concat, models)

            for model in models:
                part_querysets.append(
                    getattr(self, 'get_queryset_{}s'.format(model))())

            queryset = part_querysets[0]
            for query in part_querysets[1:]:
                queryset |= query

            # Weighting:
            weight_functions = []
            for _type, weights in list(
                    settings.ZDS_APP['search']['boosts'].items()):
                if _type in models:
                    weight_functions.append({
                        'filter': Match(_type=_type),
                        'weight': weights['global']
                    })

            scored_queryset = FunctionScore(query=queryset,
                                            boost_mode='multiply',
                                            functions=weight_functions)
            search_queryset = search_queryset.query(scored_queryset)

            # Highlighting:
            search_queryset = search_queryset.highlight_options(
                fragment_size=150,
                number_of_fragments=5,
                pre_tags=['[hl]'],
                post_tags=['[/hl]'])
            search_queryset = search_queryset.highlight('text').highlight(
                'text_html')

            # Executing:
            return self.index_manager.setup_search(search_queryset)

        return []

    def get_queryset_publishedcontents(self):
        """Search in PublishedContent objects."""

        query = Match(_type='publishedcontent') \
            & MultiMatch(
            query=self.search_query,
            fields=['title', 'description', 'categories', 'subcategories', 'tags', 'text'])

        if self.from_library:
            query &= Match(content_type='TUTORIAL') | Match(
                content_type='ARTICLE')

        if self.content_category:
            query &= Match(categories=self.content_category)

        if self.content_subcategory:
            query &= Match(subcategories=self.content_subcategory)

        functions_score = [
            {
                'filter':
                Match(content_type='TUTORIAL'),
                'weight':
                settings.ZDS_APP['search']['boosts']['publishedcontent']
                ['if_tutorial']
            },
            {
                'filter':
                Match(content_type='TUTORIAL') & Match(has_chapters=True),
                'weight':
                settings.ZDS_APP['search']['boosts']['publishedcontent']
                ['if_medium_or_big_tutorial']
            },
            {
                'filter':
                Match(content_type='ARTICLE'),
                'weight':
                settings.ZDS_APP['search']['boosts']['publishedcontent']
                ['if_article']
            },
            {
                'filter':
                Match(content_type='OPINION'),
                'weight':
                settings.ZDS_APP['search']['boosts']['publishedcontent']
                ['if_opinion']
            },
            {
                'filter':
                Match(content_type='OPINION') & Match(picked=False),
                'weight':
                settings.ZDS_APP['search']['boosts']['publishedcontent']
                ['if_opinion_not_picked']
            },
        ]

        scored_query = FunctionScore(query=query,
                                     boost_mode='multiply',
                                     functions=functions_score)

        return scored_query

    def get_queryset_chapters(self):
        """Search in content chapters."""

        query = Match(_type='chapter') \
            & MultiMatch(query=self.search_query, fields=['title', 'text'])

        if self.content_category:
            query &= Match(categories=self.content_category)

        if self.content_subcategory:
            query &= Match(subcategories=self.content_subcategory)

        return query

    def get_queryset_topics(self):
        """Search in topics, and remove the result if the forum is not allowed for the user.

        Score is modified if:

        + topic is solved;
        + topic is sticky;
        + topic is locked.
        """

        query = Match(_type='topic') \
            & Terms(forum_pk=self.authorized_forums) \
            & MultiMatch(query=self.search_query, fields=['title', 'subtitle', 'tags'])

        functions_score = [{
            'filter':
            Match(is_solved=True),
            'weight':
            settings.ZDS_APP['search']['boosts']['topic']['if_solved']
        }, {
            'filter':
            Match(is_sticky=True),
            'weight':
            settings.ZDS_APP['search']['boosts']['topic']['if_sticky']
        }, {
            'filter':
            Match(is_locked=True),
            'weight':
            settings.ZDS_APP['search']['boosts']['topic']['if_locked']
        }]

        scored_query = FunctionScore(query=query,
                                     boost_mode='multiply',
                                     functions=functions_score)

        return scored_query

    def get_queryset_posts(self):
        """Search in posts, and remove result if the forum is not allowed for the user or if the message is invisible.

        Score is modified if:

        + post is the first one in a topic;
        + post is marked as "useful";
        + post has a like/dislike ratio above (has more likes than dislikes) or below (the other way around) 1.0.
        """

        query = Match(_type='post') \
            & Terms(forum_pk=self.authorized_forums) \
            & Term(is_visible=True) \
            & MultiMatch(query=self.search_query, fields=['text_html'])

        functions_score = [{
            'filter':
            Match(position=1),
            'weight':
            settings.ZDS_APP['search']['boosts']['post']['if_first']
        }, {
            'filter':
            Match(is_useful=True),
            'weight':
            settings.ZDS_APP['search']['boosts']['post']['if_useful']
        }, {
            'filter':
            Range(like_dislike_ratio={'gt': 1}),
            'weight':
            settings.ZDS_APP['search']['boosts']['post']['ld_ratio_above_1']
        }, {
            'filter':
            Range(like_dislike_ratio={'lt': 1}),
            'weight':
            settings.ZDS_APP['search']['boosts']['post']['ld_ratio_below_1']
        }]

        scored_query = FunctionScore(query=query,
                                     boost_mode='multiply',
                                     functions=functions_score)

        return scored_query

    def get_context_data(self, **kwargs):
        context = super(SearchView, self).get_context_data(**kwargs)
        context['form'] = self.search_form
        context['query'] = self.search_query is not None

        return context
示例#20
0
    def __init__(self, **kwargs):
        """Overridden because the index manager must NOT be initialized elsewhere."""

        super(SimilarTopicsView, self).__init__(**kwargs)
        self.index_manager = ESIndexManager(**settings.ES_SEARCH_INDEX)
示例#21
0
    def __init__(self, **kwargs):
        """Overridden because the index manager must NOT be initialized elsewhere."""

        super(SearchView, self).__init__(**kwargs)
        self.index_manager = ESIndexManager(**settings.ES_SEARCH_INDEX)
示例#22
0
class SearchView(ZdSPagingListView):
    """Search view."""

    template_name = "searchv2/search.html"
    paginate_by = settings.ZDS_APP["search"]["results_per_page"]

    search_form_class = SearchForm
    search_form = None
    search_query = None
    content_category = None
    content_subcategory = None

    authorized_forums = ""

    index_manager = None

    def __init__(self, **kwargs):
        """Overridden because the index manager must NOT be initialized elsewhere."""

        super().__init__(**kwargs)
        self.index_manager = ESIndexManager(**settings.ES_SEARCH_INDEX)

    def get(self, request, *args, **kwargs):
        """Overridden to catch the request and fill the form."""

        if "q" in request.GET:
            self.search_query = "".join(request.GET["q"])

        self.search_form = self.search_form_class(data=self.request.GET)

        if self.search_query and not self.search_form.is_valid():
            raise PermissionDenied("research form is invalid")

        return super().get(request, *args, **kwargs)

    def get_queryset(self):
        if not self.index_manager.connected_to_es:
            messages.warning(self.request,
                             _("Impossible de se connecter à Elasticsearch"))
            return []

        if self.search_query:

            # Searches forums the user is allowed to visit
            self.authorized_forums = get_authorized_forums(self.request.user)

            search_queryset = Search()

            # Restrict (sub)category if any
            if self.search_form.cleaned_data["category"]:
                self.content_category = self.search_form.cleaned_data[
                    "category"]
            if self.search_form.cleaned_data["subcategory"]:
                self.content_subcategory = self.search_form.cleaned_data[
                    "subcategory"]

            # Mark that contents must come from library if required
            self.from_library = False
            if self.search_form.cleaned_data["from_library"] == "on":
                self.from_library = True

            # Setting the different querysets (according to the selected models, if any)
            part_querysets = []
            chosen_groups = self.search_form.cleaned_data["models"]

            if chosen_groups:
                models = []
                for group in chosen_groups:
                    if group in settings.ZDS_APP["search"]["search_groups"]:
                        models.append(settings.ZDS_APP["search"]
                                      ["search_groups"][group][1])
            else:
                models = [
                    v[1] for k, v in settings.ZDS_APP["search"]
                    ["search_groups"].items()
                ]

            models = reduce(operator.concat, models)

            for model in models:
                part_querysets.append(
                    getattr(self, f"get_queryset_{model}s")())

            queryset = part_querysets[0]
            for query in part_querysets[1:]:
                queryset |= query

            # Weighting:
            weight_functions = []
            for _type, weights in list(
                    settings.ZDS_APP["search"]["boosts"].items()):
                if _type in models:
                    weight_functions.append({
                        "filter": Match(_type=_type),
                        "weight": weights["global"]
                    })

            scored_queryset = FunctionScore(query=queryset,
                                            boost_mode="multiply",
                                            functions=weight_functions)
            search_queryset = search_queryset.query(scored_queryset)

            # Highlighting:
            search_queryset = search_queryset.highlight_options(
                fragment_size=150,
                number_of_fragments=5,
                pre_tags=["[hl]"],
                post_tags=["[/hl]"])
            search_queryset = search_queryset.highlight("text").highlight(
                "text_html")

            # Executing:
            return self.index_manager.setup_search(search_queryset)

        return []

    def get_queryset_publishedcontents(self):
        """Search in PublishedContent objects."""

        query = Match(_type="publishedcontent") & MultiMatch(
            query=self.search_query,
            fields=[
                "title", "description", "categories", "subcategories", "tags",
                "text"
            ])

        if self.from_library:
            query &= Match(content_type="TUTORIAL") | Match(
                content_type="ARTICLE")

        if self.content_category:
            query &= Match(categories=self.content_category)

        if self.content_subcategory:
            query &= Match(subcategories=self.content_subcategory)

        functions_score = [
            {
                "filter":
                Match(content_type="TUTORIAL"),
                "weight":
                settings.ZDS_APP["search"]["boosts"]["publishedcontent"]
                ["if_tutorial"],
            },
            {
                "filter":
                Match(content_type="TUTORIAL") & Match(has_chapters=True),
                "weight":
                settings.ZDS_APP["search"]["boosts"]["publishedcontent"]
                ["if_medium_or_big_tutorial"],
            },
            {
                "filter":
                Match(content_type="ARTICLE"),
                "weight":
                settings.ZDS_APP["search"]["boosts"]["publishedcontent"]
                ["if_article"],
            },
            {
                "filter":
                Match(content_type="OPINION"),
                "weight":
                settings.ZDS_APP["search"]["boosts"]["publishedcontent"]
                ["if_opinion"],
            },
            {
                "filter":
                Match(content_type="OPINION") & Match(picked=False),
                "weight":
                settings.ZDS_APP["search"]["boosts"]["publishedcontent"]
                ["if_opinion_not_picked"],
            },
        ]

        scored_query = FunctionScore(query=query,
                                     boost_mode="multiply",
                                     functions=functions_score)

        return scored_query

    def get_queryset_chapters(self):
        """Search in content chapters."""

        query = Match(_type="chapter") & MultiMatch(query=self.search_query,
                                                    fields=["title", "text"])

        if self.content_category:
            query &= Match(categories=self.content_category)

        if self.content_subcategory:
            query &= Match(subcategories=self.content_subcategory)

        return query

    def get_queryset_topics(self):
        """Search in topics, and remove the result if the forum is not allowed for the user.

        Score is modified if:

        + topic is solved;
        + topic is sticky;
        + topic is locked.
        """

        query = (Match(_type="topic")
                 & Terms(forum_pk=self.authorized_forums)
                 & MultiMatch(query=self.search_query,
                              fields=["title", "subtitle", "tags"]))

        functions_score = [
            {
                "filter":
                Match(is_solved=True),
                "weight":
                settings.ZDS_APP["search"]["boosts"]["topic"]["if_solved"]
            },
            {
                "filter":
                Match(is_sticky=True),
                "weight":
                settings.ZDS_APP["search"]["boosts"]["topic"]["if_sticky"]
            },
            {
                "filter":
                Match(is_locked=True),
                "weight":
                settings.ZDS_APP["search"]["boosts"]["topic"]["if_locked"]
            },
        ]

        scored_query = FunctionScore(query=query,
                                     boost_mode="multiply",
                                     functions=functions_score)

        return scored_query

    def get_queryset_posts(self):
        """Search in posts, and remove result if the forum is not allowed for the user or if the message is invisible.

        Score is modified if:

        + post is the first one in a topic;
        + post is marked as "useful";
        + post has a like/dislike ratio above (has more likes than dislikes) or below (the other way around) 1.0.
        """

        query = (Match(_type="post")
                 & Terms(forum_pk=self.authorized_forums)
                 & Term(is_visible=True)
                 & MultiMatch(query=self.search_query, fields=["text_html"]))

        functions_score = [
            {
                "filter": Match(position=1),
                "weight":
                settings.ZDS_APP["search"]["boosts"]["post"]["if_first"]
            },
            {
                "filter": Match(is_useful=True),
                "weight":
                settings.ZDS_APP["search"]["boosts"]["post"]["if_useful"]
            },
            {
                "filter":
                Range(like_dislike_ratio={"gt": 1}),
                "weight":
                settings.ZDS_APP["search"]["boosts"]["post"]
                ["ld_ratio_above_1"],
            },
            {
                "filter":
                Range(like_dislike_ratio={"lt": 1}),
                "weight":
                settings.ZDS_APP["search"]["boosts"]["post"]
                ["ld_ratio_below_1"],
            },
        ]

        scored_query = FunctionScore(query=query,
                                     boost_mode="multiply",
                                     functions=functions_score)

        return scored_query

    def get_context_data(self, **kwargs):
        context = super().get_context_data(**kwargs)
        context["form"] = self.search_form
        context["query"] = self.search_query is not None

        return context
示例#23
0
class UtilsTests(TutorialTestMixin, TestCase):
    def setUp(self):

        settings.EMAIL_BACKEND = "django.core.mail.backends.locmem.EmailBackend"
        self.mas = ProfileFactory().user
        settings.ZDS_APP["member"]["bot_account"] = self.mas.username

        self.category, self.forum = create_category_and_forum()

        self.user = ProfileFactory().user
        self.staff = StaffProfileFactory().user

        self.index_manager = ESIndexManager(**settings.ES_SEARCH_INDEX)

    def test_es_manager(self):
        """Test the behavior of the ``es_manager`` command"""

        if not self.index_manager.connected_to_es:
            return

        # in the beginning: the void
        self.assertTrue(self.index_manager.index not in self.index_manager.es.cat.indices())

        text = "Ceci est un texte de test"

        # create a topic with a post
        topic = TopicFactory(forum=self.forum, author=self.user, title=text)
        post = PostFactory(topic=topic, author=self.user, position=1)
        post.text = post.text_html = text
        post.save()

        topic = Topic.objects.get(pk=topic.pk)
        post = Post.objects.get(pk=post.pk)

        self.assertFalse(topic.es_already_indexed)
        self.assertTrue(topic.es_flagged)
        self.assertFalse(post.es_already_indexed)
        self.assertTrue(post.es_flagged)

        # create a middle-tutorial and publish it
        tuto = PublishableContentFactory(type="TUTORIAL")
        tuto.authors.add(self.user)
        tuto.save()

        tuto_draft = tuto.load_version()
        chapter1 = ContainerFactory(parent=tuto_draft, db_object=tuto)
        chapter1.repo_update(text, text, text)
        extract1 = ExtractFactory(container=chapter1, db_object=tuto)
        version = extract1.repo_update(text, text)
        published = publish_content(tuto, tuto_draft, is_major_update=True)

        tuto.sha_public = version
        tuto.sha_draft = version
        tuto.public_version = published
        tuto.save()

        published = PublishedContent.objects.get(content_pk=tuto.pk)
        self.assertFalse(published.es_already_indexed)
        self.assertTrue(published.es_flagged)

        # 1. test "index-all"
        call_command("es_manager", "index_all")
        self.assertTrue(self.index_manager.es.indices.exists(self.index_manager.index))
        self.index_manager.index_exists = True

        topic = Topic.objects.get(pk=topic.pk)
        post = Post.objects.get(pk=post.pk)

        self.assertTrue(topic.es_already_indexed)
        self.assertFalse(topic.es_flagged)
        self.assertTrue(post.es_already_indexed)
        self.assertFalse(post.es_flagged)

        published = PublishedContent.objects.get(content_pk=tuto.pk)
        self.assertTrue(published.es_already_indexed)
        self.assertFalse(published.es_flagged)

        s = Search()
        s.query(MatchAll())
        results = self.index_manager.setup_search(s).execute()
        self.assertEqual(len(results), 4)  # get 4 results, one of each type

        must_contain = {"post": False, "topic": False, "publishedcontent": False, "chapter": False}
        id_must_be = {
            "post": str(post.pk),
            "topic": str(topic.pk),
            "publishedcontent": str(published.pk),
            "chapter": tuto.slug + "__" + chapter1.slug,
        }

        for hit in results:
            doc_type = hit.meta.doc_type
            must_contain[doc_type] = True
            self.assertEqual(hit.meta.id, id_must_be[doc_type])

        self.assertTrue(all(must_contain))

        # 2. test "clear"
        self.assertTrue(self.index_manager.index in self.index_manager.es.cat.indices())  # index in

        call_command("es_manager", "clear")
        self.assertFalse(self.index_manager.es.indices.exists(self.index_manager.index))
        self.index_manager.index_exists = False

        # must reset every object
        topic = Topic.objects.get(pk=topic.pk)
        post = Post.objects.get(pk=post.pk)

        self.assertFalse(topic.es_already_indexed)
        self.assertTrue(topic.es_flagged)
        self.assertFalse(post.es_already_indexed)
        self.assertTrue(post.es_flagged)

        published = PublishedContent.objects.get(content_pk=tuto.pk)
        self.assertFalse(published.es_already_indexed)
        self.assertTrue(published.es_flagged)

        self.assertTrue(self.index_manager.index not in self.index_manager.es.cat.indices())  # index wiped out !

        # 3. test "setup"
        call_command("es_manager", "setup")
        self.assertTrue(self.index_manager.es.indices.exists(self.index_manager.index))
        self.index_manager.index_exists = True

        self.assertTrue(self.index_manager.index in self.index_manager.es.cat.indices())  # index back in ...

        s = Search()
        s.query(MatchAll())
        results = self.index_manager.setup_search(s).execute()
        self.assertEqual(len(results), 0)  # ... but with nothing in it

        result = self.index_manager.es.indices.get_settings(index=self.index_manager.index)
        settings_index = result[self.index_manager.index]["settings"]["index"]
        self.assertTrue("analysis" in settings_index)  # custom analyzer was setup

        # 4. test "index-flagged" once ...
        call_command("es_manager", "index_flagged")

        topic = Topic.objects.get(pk=topic.pk)
        post = Post.objects.get(pk=post.pk)

        self.assertTrue(topic.es_already_indexed)
        self.assertFalse(topic.es_flagged)
        self.assertTrue(post.es_already_indexed)
        self.assertFalse(post.es_flagged)

        published = PublishedContent.objects.get(content_pk=tuto.pk)
        self.assertTrue(published.es_already_indexed)
        self.assertFalse(published.es_flagged)

        s = Search()
        s.query(MatchAll())
        results = self.index_manager.setup_search(s).execute()
        self.assertEqual(len(results), 4)  # get the 4 results back

    def tearDown(self):
        super().tearDown()

        # delete index:
        self.index_manager.clear_es_index()
示例#24
0
class ViewsTests(TestCase):
    def setUp(self):
        # don't build PDF to speed up the tests
        settings.ZDS_APP['content']['build_pdf_when_published'] = False

        settings.EMAIL_BACKEND = 'django.core.mail.backends.locmem.EmailBackend'
        self.mas = ProfileFactory().user
        settings.ZDS_APP['member']['bot_account'] = self.mas.username

        self.category, self.forum = create_category()

        self.user = ProfileFactory().user
        self.staff = StaffProfileFactory().user

        self.manager = ESIndexManager(**settings.ES_SEARCH_INDEX)
        self.indexable = [FakeChapter, PublishedContent, Topic, Post]

        self.manager.reset_es_index(self.indexable)
        self.manager.setup_custom_analyzer()
        self.manager.refresh_index()

    def test_basic_search(self):
        """Basic search and filtering"""

        if not self.manager.connected_to_es:
            return

        # 1. Index and test search:
        text = 'test'

        topic_1 = TopicFactory(forum=self.forum, author=self.user, title=text)
        post_1 = PostFactory(topic=topic_1, author=self.user, position=1)
        post_1.text = post_1.text_html = text
        post_1.save()

        # create a middle-size content and publish it
        tuto = PublishableContentFactory(type='TUTORIAL')
        tuto_draft = tuto.load_version()

        tuto.title = text
        tuto.authors.add(self.user)
        tuto.save()

        tuto_draft.repo_update_top_container(
            text, tuto.slug, text,
            text)  # change title to be sure it will match

        chapter1 = ContainerFactory(parent=tuto_draft, db_object=tuto)
        extract = ExtractFactory(container=chapter1, db_object=tuto)
        extract.repo_update(text, text)

        published = publish_content(tuto, tuto_draft, is_major_update=True)

        tuto.sha_public = tuto_draft.current_version
        tuto.sha_draft = tuto_draft.current_version
        tuto.public_version = published
        tuto.save()

        # nothing has been indexed yet:
        self.assertEqual(
            len(
                self.manager.setup_search(Search().query(
                    MatchAll())).execute()), 0)

        # index
        for model in self.indexable:
            if model is FakeChapter:
                continue
            self.manager.es_bulk_indexing_of_model(model)
        self.manager.refresh_index()

        result = self.client.get(reverse('search:query') + '?q=' + text,
                                 follow=False)
        self.assertEqual(result.status_code, 200)

        response = result.context['object_list'].execute()

        self.assertEqual(response.hits.total, 4)  # get 4 results

        # 2. Test filtering:
        topic_1 = Topic.objects.get(pk=topic_1.pk)
        post_1 = Post.objects.get(pk=post_1.pk)
        published = PublishedContent.objects.get(pk=published.pk)

        ids = {
            'topic': [topic_1.es_id],
            'post': [post_1.es_id],
            'content': [
                published.es_id,
                published.content_public_slug + '__' + chapter1.slug
            ],
        }

        search_groups = [
            k for k, v in settings.ZDS_APP['search']['search_groups'].items()
        ]
        group_to_model = {
            k: v[1]
            for k, v in settings.ZDS_APP['search']['search_groups'].items()
        }

        for doc_type in search_groups:
            result = self.client.get(reverse('search:query') + '?q=' + text +
                                     '&models=' + doc_type,
                                     follow=False)
            self.assertEqual(result.status_code, 200)

            response = result.context['object_list'].execute()

            self.assertEqual(response.hits.total,
                             len(ids[doc_type]))  # get 1 result of each …
            for i, r in enumerate(response):
                self.assertIn(
                    r.meta.doc_type,
                    group_to_model[doc_type])  # … and only of the right type …
                self.assertEqual(r.meta.id,
                                 ids[doc_type][i])  # … with the right id !

    def test_get_similar_topics(self):
        """Get similar topics lists"""

        if not self.manager.connected_to_es:
            return

        text = 'Clem ne se mange pas'

        topic_1 = TopicFactory(forum=self.forum, author=self.user, title=text)
        post_1 = PostFactory(topic=topic_1, author=self.user, position=1)
        post_1.text = post_1.text_html = text
        post_1.save()

        text = 'Clem est la meilleure mascotte'

        topic_2 = TopicFactory(forum=self.forum, author=self.user, title=text)
        post_2 = PostFactory(topic=topic_2, author=self.user, position=1)
        post_2.text = post_1.text_html = text
        post_2.save()

        # 1. Should not get any result
        result = self.client.get(reverse('search:similar') + '?q=est',
                                 follow=False)
        self.assertEqual(result.status_code, 200)
        content = json.loads(result.content.decode('utf-8'))
        self.assertEqual(len(content['results']), 0)

        # index
        for model in self.indexable:
            if model is FakeChapter:
                continue
            self.manager.es_bulk_indexing_of_model(model)
        self.manager.refresh_index()

        # 2. Should get exactly one result
        result = self.client.get(reverse('search:similar') + '?q=mange',
                                 follow=False)
        self.assertEqual(result.status_code, 200)
        content = json.loads(result.content.decode('utf-8'))
        self.assertEqual(len(content['results']), 1)

        # 2. Should get exactly two results
        result = self.client.get(reverse('search:similar') + '?q=Clem',
                                 follow=False)
        self.assertEqual(result.status_code, 200)
        content = json.loads(result.content.decode('utf-8'))
        self.assertEqual(len(content['results']), 2)

    def test_hidden_post_are_not_result(self):
        """Hidden posts should not show up in the search results"""

        if not self.manager.connected_to_es:
            return

        # 1. Index and test search:
        text = 'test'

        topic_1 = TopicFactory(forum=self.forum, author=self.user, title=text)
        post_1 = PostFactory(topic=topic_1, author=self.user, position=1)
        post_1.text = post_1.text_html = text
        post_1.save()

        self.manager.es_bulk_indexing_of_model(Topic)
        self.manager.es_bulk_indexing_of_model(Post)
        self.manager.refresh_index()

        self.assertEqual(
            len(
                self.manager.setup_search(Search().query(
                    MatchAll())).execute()), 2)  # indexing ok

        post_1 = Post.objects.get(pk=post_1.pk)

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)

        response = result.context['object_list'].execute()

        self.assertEqual(response.hits.total, 1)
        self.assertEqual(response[0].meta.id, post_1.es_id)

        # 2. Hide, reindex and search again:
        post_1.hide_comment_by_user(self.staff,
                                    'Un abus de pouvoir comme un autre ;)')
        self.manager.refresh_index()

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 0)  # nothing in the results

    def test_hidden_forums_give_no_results_if_user_not_allowed(self):
        """Long name, isn't ?"""

        if not self.manager.connected_to_es:
            return

        # 1. Create a hidden forum belonging to a hidden staff group.
        text = 'test'

        group = Group.objects.create(name='Les illuminatis anonymes de ZdS')
        _, hidden_forum = create_category(group)

        self.staff.groups.add(group)
        self.staff.save()

        topic_1 = TopicFactory(forum=hidden_forum,
                               author=self.staff,
                               title=text)
        post_1 = PostFactory(topic=topic_1, author=self.user, position=1)
        post_1.text = post_1.text_html = text
        post_1.save()

        self.manager.es_bulk_indexing_of_model(Topic)
        self.manager.es_bulk_indexing_of_model(Post)
        self.manager.refresh_index()

        self.assertEqual(
            len(
                self.manager.setup_search(Search().query(
                    MatchAll())).execute()), 2)  # indexing ok

        # 2. search without connection and get not result
        result = self.client.get(reverse('search:query') + '?q=' + text,
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 0)

        # 3. Connect with user (not a member of the group), search, and get no result
        self.assertTrue(
            self.client.login(username=self.user.username,
                              password='******'))

        result = self.client.get(reverse('search:query') + '?q=' + text,
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 0)

        # 4. Connect with staff, search, and get the topic and the post
        self.client.logout()
        self.assertTrue(
            self.client.login(username=self.staff.username,
                              password='******'))

        result = self.client.get(reverse('search:query') + '?q=' + text,
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 2)  # ok !

    def test_boosts(self):
        """Check if boosts are doing their job"""

        if not self.manager.connected_to_es:
            return

        # 1. Create topics (with identical titles), posts (with identical texts), an article and a tuto
        text = 'test'

        topic_1_solved_sticky = TopicFactory(forum=self.forum,
                                             author=self.user)
        topic_1_solved_sticky.title = text
        topic_1_solved_sticky.subtitle = ''
        topic_1_solved_sticky.is_solved = True
        topic_1_solved_sticky.is_sticky = True
        topic_1_solved_sticky.save()

        post_1 = PostFactory(topic=topic_1_solved_sticky,
                             author=self.user,
                             position=1)
        post_1.text = post_1.text_html = text
        post_1.save()

        post_2_useful = PostFactory(topic=topic_1_solved_sticky,
                                    author=self.user,
                                    position=2)
        post_2_useful.text = post_2_useful.text_html = text
        post_2_useful.is_useful = True
        post_2_useful.like = 5
        post_2_useful.dislike = 2  # l/d ratio above 1
        post_2_useful.save()

        topic_2_locked = TopicFactory(forum=self.forum,
                                      author=self.user,
                                      title=text)
        topic_2_locked.title = text
        topic_2_locked.subtitle = ''
        topic_2_locked.is_locked = True
        topic_2_locked.save()

        post_3_ld_below_1 = PostFactory(topic=topic_2_locked,
                                        author=self.user,
                                        position=1)
        post_3_ld_below_1.text = post_3_ld_below_1.text_html = text
        post_3_ld_below_1.like = 2
        post_3_ld_below_1.dislike = 5  # l/d ratio below 1
        post_3_ld_below_1.save()

        tuto = PublishableContentFactory(type='TUTORIAL')
        tuto_draft = tuto.load_version()

        tuto.title = text
        tuto.authors.add(self.user)
        tuto.save()

        tuto_draft.repo_update_top_container(text, tuto.slug, text, text)

        chapter1 = ContainerFactory(parent=tuto_draft, db_object=tuto)
        chapter1.repo_update(text, 'Who cares ?', 'Same here')
        ExtractFactory(container=chapter1, db_object=tuto)

        published_tuto = publish_content(tuto,
                                         tuto_draft,
                                         is_major_update=True)

        tuto.sha_public = tuto_draft.current_version
        tuto.sha_draft = tuto_draft.current_version
        tuto.public_version = published_tuto
        tuto.save()

        article = PublishedContentFactory(type='ARTICLE', title=text)
        published_article = PublishedContent.objects.get(content_pk=article.pk)

        opinion_not_picked = PublishedContentFactory(type='OPINION',
                                                     title=text)
        published_opinion_not_picked = PublishedContent.objects.get(
            content_pk=opinion_not_picked.pk)

        opinion_picked = PublishedContentFactory(type='OPINION', title=text)
        opinion_picked.sha_picked = opinion_picked.sha_draft
        opinion_picked.date_picked = datetime.datetime.now()
        opinion_picked.save()

        published_opinion_picked = PublishedContent.objects.get(
            content_pk=opinion_picked.pk)

        for model in self.indexable:
            if model is FakeChapter:
                continue
            self.manager.es_bulk_indexing_of_model(model)
        self.manager.refresh_index()

        self.assertEqual(
            len(
                self.manager.setup_search(Search().query(
                    MatchAll())).execute()), 10)

        # 2. Reset all boosts to 1
        for doc_type in settings.ZDS_APP['search']['boosts']:
            for key in settings.ZDS_APP['search']['boosts'][doc_type]:
                settings.ZDS_APP['search']['boosts'][doc_type][key] = 1.0

        # 3. Test posts
        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 3)

        # score are equals without boost:
        self.assertTrue(response[0].meta.score == response[1].meta.score ==
                        response[2].meta.score)

        settings.ZDS_APP['search']['boosts']['post']['if_first'] = 2.0

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 3)

        self.assertTrue(response[0].meta.score == response[1].meta.score >
                        response[2].meta.score)
        self.assertEqual(response[2].meta.id, str(
            post_2_useful.pk))  # post 2 is the only one not first

        settings.ZDS_APP['search']['boosts']['post']['if_first'] = 1.0
        settings.ZDS_APP['search']['boosts']['post']['if_useful'] = 2.0

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 3)

        self.assertTrue(response[0].meta.score > response[1].meta.score ==
                        response[2].meta.score)
        self.assertEqual(response[0].meta.id,
                         str(post_2_useful.pk))  # post 2 is useful

        settings.ZDS_APP['search']['boosts']['post']['if_useful'] = 1.0
        settings.ZDS_APP['search']['boosts']['post']['ld_ratio_above_1'] = 2.0

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 3)

        self.assertTrue(response[0].meta.score > response[1].meta.score ==
                        response[2].meta.score)
        self.assertEqual(response[0].meta.id, str(
            post_2_useful.pk))  # post 2 have a l/d ratio of 5/2

        settings.ZDS_APP['search']['boosts']['post']['ld_ratio_above_1'] = 1.0
        settings.ZDS_APP['search']['boosts']['post'][
            'ld_ratio_below_1'] = 2.0  # no one would do that in real life

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 3)

        self.assertTrue(response[0].meta.score > response[1].meta.score ==
                        response[2].meta.score)
        self.assertEqual(response[0].meta.id, str(
            post_3_ld_below_1.pk))  # post 3 have a l/d ratio of 2/5

        settings.ZDS_APP['search']['boosts']['post']['ld_ratio_below_1'] = 1.0

        # 4. Test topics
        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Topic.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 2)

        # score are equals without boost:
        self.assertTrue(response[0].meta.score == response[1].meta.score)

        settings.ZDS_APP['search']['boosts']['topic']['if_sticky'] = 2.0

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Topic.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 2)

        self.assertTrue(response[0].meta.score > response[1].meta.score)
        self.assertEqual(response[0].meta.id,
                         str(topic_1_solved_sticky.pk))  # topic 1 is sticky

        settings.ZDS_APP['search']['boosts']['topic']['if_sticky'] = 1.0
        settings.ZDS_APP['search']['boosts']['topic']['if_solved'] = 2.0

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Topic.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 2)

        self.assertTrue(response[0].meta.score > response[1].meta.score)
        self.assertEqual(response[0].meta.id,
                         str(topic_1_solved_sticky.pk))  # topic 1 is solved

        settings.ZDS_APP['search']['boosts']['topic']['if_solved'] = 1.0
        settings.ZDS_APP['search']['boosts']['topic'][
            'if_locked'] = 2.0  # no one would do that in real life

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Topic.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 2)

        self.assertTrue(response[0].meta.score > response[1].meta.score)
        self.assertEqual(response[0].meta.id,
                         str(topic_2_locked.pk))  # topic 2 is locked

        settings.ZDS_APP['search']['boosts']['topic'][
            'if_locked'] = 1.0  # no one would do that in real life

        # 5. Test published contents
        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=content',
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 5)

        # score are equals without boost:
        self.assertTrue(
            response[0].meta.score == response[1].meta.score == response[2].
            meta.score == response[3].meta.score == response[4].meta.score)

        settings.ZDS_APP['search']['boosts']['publishedcontent'][
            'if_article'] = 2.0

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=content',
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 5)

        self.assertTrue(response[0].meta.score > response[1].meta.score)
        self.assertEqual(response[0].meta.id,
                         str(published_article.pk))  # obvious

        settings.ZDS_APP['search']['boosts']['publishedcontent'][
            'if_article'] = 1.0
        settings.ZDS_APP['search']['boosts']['publishedcontent'][
            'if_tutorial'] = 2.0

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=content',
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 5)

        self.assertTrue(response[0].meta.score > response[1].meta.score)
        self.assertEqual(response[0].meta.id,
                         str(published_tuto.pk))  # obvious

        settings.ZDS_APP['search']['boosts']['publishedcontent'][
            'if_tutorial'] = 1.0
        settings.ZDS_APP['search']['boosts']['publishedcontent'][
            'if_opinion'] = 2.0
        settings.ZDS_APP['search']['boosts']['publishedcontent'][
            'if_opinion_not_picked'] = 4.0
        # Note: in "real life", unpicked opinion would get a boost < 1.

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=content',
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 5)

        self.assertTrue(response[0].meta.score > response[1].meta.score >
                        response[2].meta.score)
        self.assertEqual(
            response[0].meta.id,
            str(published_opinion_not_picked.pk))  # unpicked opinion got first
        self.assertEqual(response[1].meta.id, str(published_opinion_picked.pk))

        settings.ZDS_APP['search']['boosts']['publishedcontent'][
            'if_opinion'] = 1.0
        settings.ZDS_APP['search']['boosts']['publishedcontent'][
            'if_opinion_not_picked'] = 1.0
        settings.ZDS_APP['search']['boosts']['publishedcontent'][
            'if_medium_or_big_tutorial'] = 2.0

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=content',
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 5)

        self.assertTrue(response[0].meta.score > response[1].meta.score)
        self.assertEqual(response[0].meta.id,
                         str(published_tuto.pk))  # obvious

        settings.ZDS_APP['search']['boosts']['publishedcontent'][
            'if_medium_or_big_tutorial'] = 1.0

        # 6. Test global boosts
        # NOTE: score are NOT the same for all documents, no matter how hard it tries to, small differences exists

        for model in self.indexable:

            # set a huge number to overcome the small differences:
            settings.ZDS_APP['search']['boosts'][
                model.get_es_document_type()]['global'] = 10.0

            result = self.client.get(reverse('search:query') + '?q=' + text,
                                     follow=False)

            self.assertEqual(result.status_code, 200)
            response = result.context['object_list'].execute()
            self.assertEqual(response.hits.total, 10)

            self.assertEqual(response[0].meta.doc_type,
                             model.get_es_document_type())  # obvious

            settings.ZDS_APP['search']['boosts'][
                model.get_es_document_type()]['global'] = 1.0

    def test_change_topic_impacts_posts(self):

        if not self.manager.connected_to_es:
            return

        # 1. Create a hidden forum belonging to a hidden group and add staff in it.
        text = 'test'

        group = Group.objects.create(name='Les illuminatis anonymes de ZdS')
        _, hidden_forum = create_category(group)

        self.staff.groups.add(group)
        self.staff.save()

        # 2. Create a normal topic and index it
        topic_1 = TopicFactory(forum=self.forum, author=self.user, title=text)
        post_1 = PostFactory(topic=topic_1, author=self.user, position=1)
        post_1.text = post_1.text_html = text
        post_1.save()

        self.manager.es_bulk_indexing_of_model(Topic)
        self.manager.es_bulk_indexing_of_model(Post)
        self.manager.refresh_index()

        self.assertEqual(
            len(
                self.manager.setup_search(Search().query(
                    MatchAll())).execute()), 2)  # indexing ok

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 1)  # ok
        self.assertEqual(response[0].meta.doc_type,
                         Post.get_es_document_type())
        self.assertEqual(response[0].forum_pk, self.forum.pk)
        self.assertEqual(response[0].topic_pk, topic_1.pk)
        self.assertEqual(response[0].topic_title, topic_1.title)

        # 3. Change topic title and reindex
        topic_1.title = 'new title'
        topic_1.save()

        self.manager.es_bulk_indexing_of_model(Topic)
        self.manager.es_bulk_indexing_of_model(Post)
        self.manager.refresh_index()

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 1)  # ok

        self.assertEqual(response[0].topic_title,
                         topic_1.title)  # title was changed

        # 4. connect with staff and move topic
        self.assertTrue(
            self.client.login(username=self.staff.username,
                              password='******'))

        data = {'move': '', 'forum': hidden_forum.pk, 'topic': topic_1.pk}
        response = self.client.post(reverse('topic-edit'), data, follow=False)

        self.assertEqual(302, response.status_code)

        self.manager.es_bulk_indexing_of_model(Topic)
        self.manager.es_bulk_indexing_of_model(Post)
        self.manager.refresh_index()

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(
            response.hits.total,
            1)  # Note: without staff, would not get any results (see below)

        self.assertEqual(response[0].forum_pk,
                         hidden_forum.pk)  # post was updated with new forum

        # 5. Topic is now hidden
        self.client.logout()

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 0)  # ok

    def test_change_publishedcontents_impacts_chapter(self):

        if not self.manager.connected_to_es:
            return

        # 1. Create middle-size content and index it
        text = 'test'

        tuto = PublishableContentFactory(type='TUTORIAL')
        tuto_draft = tuto.load_version()

        tuto.title = text
        tuto.authors.add(self.user)
        tuto.save()

        tuto_draft.repo_update_top_container(
            text, tuto.slug, text,
            text)  # change title to be sure it will match

        chapter1 = ContainerFactory(parent=tuto_draft, db_object=tuto)
        chapter1.repo_update(text, text, text)
        extract = ExtractFactory(container=chapter1, db_object=tuto)
        extract.repo_update(text, text)

        published = publish_content(tuto, tuto_draft, is_major_update=True)

        tuto.sha_public = tuto_draft.current_version
        tuto.sha_draft = tuto_draft.current_version
        tuto.public_version = published
        tuto.save()

        self.manager.es_bulk_indexing_of_model(PublishedContent)
        self.manager.refresh_index()

        self.assertEqual(
            len(
                self.manager.setup_search(Search().query(
                    MatchAll())).execute()), 2)  # indexing ok

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=content',
                                 follow=False)
        self.assertEqual(result.status_code, 200)

        response = result.context['object_list'].execute()

        self.assertEqual(response.hits.total, 2)

        chapters = [r for r in response if r.meta.doc_type == 'chapter']
        self.assertEqual(chapters[0].meta.doc_type,
                         FakeChapter.get_es_document_type())
        self.assertEqual(chapters[0].meta.id,
                         published.content_public_slug + '__' + chapter1.slug)

        # 2. Change tuto: delete chapter and insert new one !
        tuto = PublishableContent.objects.get(pk=tuto.pk)
        tuto_draft = tuto.load_version()

        tuto_draft.children[0].repo_delete()  # chapter 1 is gone !

        another_text = 'another thing'
        self.assertTrue(
            text not in another_text
        )  # to prevent a future modification from breaking this test

        chapter2 = ContainerFactory(parent=tuto_draft, db_object=tuto)
        chapter2.repo_update(another_text, another_text, another_text)
        extract2 = ExtractFactory(container=chapter2, db_object=tuto)
        extract2.repo_update(another_text, another_text)

        published = publish_content(tuto, tuto_draft, is_major_update=False)

        tuto.sha_public = tuto_draft.current_version
        tuto.sha_draft = tuto_draft.current_version
        tuto.public_version = published
        tuto.save()

        self.manager.es_bulk_indexing_of_model(PublishedContent)
        self.manager.refresh_index()

        self.assertEqual(
            len(
                self.manager.setup_search(Search().query(
                    MatchAll())).execute()), 2)  # 2 objects, not 3 !

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=content',
                                 follow=False)
        self.assertEqual(result.status_code, 200)

        response = result.context['object_list'].execute()

        contents = [r for r in response if r.meta.doc_type != 'chapter']
        self.assertEqual(response.hits.total,
                         len(contents))  # no chapter found anymore

        result = self.client.get(reverse('search:query') + '?q=' +
                                 another_text + '&models=content',
                                 follow=False)

        self.assertEqual(result.status_code, 200)

        response = result.context['object_list'].execute()
        chapters = [r for r in response if r.meta.doc_type == 'chapter']
        self.assertEqual(response.hits.total, 1)
        self.assertEqual(chapters[0].meta.doc_type,
                         FakeChapter.get_es_document_type())
        self.assertEqual(chapters[0].meta.id, published.content_public_slug +
                         '__' + chapter2.slug)  # got new chapter

    def test_opensearch(self):

        result = self.client.get(reverse('search:opensearch'), follow=False)

        self.assertEqual(result.status_code, 200)

        self.assertContains(result, reverse('search:query'))
        self.assertContains(result, reverse('search:opensearch'))

    def test_upercase_and_lowercase_search_give_same_results(self):
        """Pretty self-explanatory function name, isn't it ?"""

        if not self.manager.connected_to_es:
            return

        # 1. Index lowercase stuffs
        text_lc = 'test'

        topic_1_lc = TopicFactory(forum=self.forum,
                                  author=self.user,
                                  title=text_lc)

        tag_lc = TagFactory(title=text_lc)
        topic_1_lc.tags.add(tag_lc)
        topic_1_lc.subtitle = text_lc
        topic_1_lc.save()

        post_1_lc = PostFactory(topic=topic_1_lc, author=self.user, position=1)
        post_1_lc.text = post_1_lc.text_html = text_lc
        post_1_lc.save()

        tuto_lc = PublishableContentFactory(type='TUTORIAL')
        tuto_draft_lc = tuto_lc.load_version()

        tuto_lc.title = text_lc
        tuto_lc.authors.add(self.user)
        subcategory_lc = SubCategoryFactory(title=text_lc)
        tuto_lc.subcategory.add(subcategory_lc)
        tuto_lc.tags.add(tag_lc)
        tuto_lc.save()

        tuto_draft_lc.description = text_lc
        tuto_draft_lc.repo_update_top_container(text_lc, tuto_lc.slug, text_lc,
                                                text_lc)

        chapter1_lc = ContainerFactory(parent=tuto_draft_lc, db_object=tuto_lc)
        extract_lc = ExtractFactory(container=chapter1_lc, db_object=tuto_lc)
        extract_lc.repo_update(text_lc, text_lc)

        published_lc = publish_content(tuto_lc,
                                       tuto_draft_lc,
                                       is_major_update=True)

        tuto_lc.sha_public = tuto_draft_lc.current_version
        tuto_lc.sha_draft = tuto_draft_lc.current_version
        tuto_lc.public_version = published_lc
        tuto_lc.save()

        # 2. Index uppercase stuffs
        text_uc = 'TEST'

        topic_1_uc = TopicFactory(forum=self.forum,
                                  author=self.user,
                                  title=text_uc)

        topic_1_uc.tags.add(
            tag_lc)  # Note: a constraint forces tags title to be unique
        topic_1_uc.subtitle = text_uc
        topic_1_uc.save()

        post_1_uc = PostFactory(topic=topic_1_uc, author=self.user, position=1)
        post_1_uc.text = post_1_uc.text_html = text_uc
        post_1_uc.save()

        tuto_uc = PublishableContentFactory(type='TUTORIAL')
        tuto_draft_uc = tuto_uc.load_version()

        tuto_uc.title = text_uc
        tuto_uc.authors.add(self.user)
        tuto_uc.subcategory.add(subcategory_lc)
        tuto_uc.tags.add(tag_lc)
        tuto_uc.save()

        tuto_draft_uc.description = text_uc
        tuto_draft_uc.repo_update_top_container(text_uc, tuto_uc.slug, text_uc,
                                                text_uc)

        chapter1_uc = ContainerFactory(parent=tuto_draft_uc, db_object=tuto_uc)
        extract_uc = ExtractFactory(container=chapter1_uc, db_object=tuto_uc)
        extract_uc.repo_update(text_uc, text_uc)

        published_uc = publish_content(tuto_uc,
                                       tuto_draft_uc,
                                       is_major_update=True)

        tuto_uc.sha_public = tuto_draft_uc.current_version
        tuto_uc.sha_draft = tuto_draft_uc.current_version
        tuto_uc.public_version = published_uc
        tuto_uc.save()

        # 3. Index and search:
        self.assertEqual(
            len(
                self.manager.setup_search(Search().query(
                    MatchAll())).execute()), 0)

        # index
        for model in self.indexable:
            if model is FakeChapter:
                continue
            self.manager.es_bulk_indexing_of_model(model)
        self.manager.refresh_index()

        result = self.client.get(reverse('search:query') + '?q=' + text_lc,
                                 follow=False)
        self.assertEqual(result.status_code, 200)

        response_lc = result.context['object_list'].execute()
        self.assertEqual(response_lc.hits.total, 8)

        result = self.client.get(reverse('search:query') + '?q=' + text_uc,
                                 follow=False)
        self.assertEqual(result.status_code, 200)

        response_uc = result.context['object_list'].execute()
        self.assertEqual(response_uc.hits.total, 8)

        for responses in zip(
                response_lc,
                response_uc):  # we should get results in the same order!
            self.assertEqual(responses[0].meta.id, responses[1].meta.id)

    def test_category_and_subcategory_impact_search(self):
        """If two contents do not belong to the same (sub)category"""

        if not self.manager.connected_to_es:
            return

        text = 'Did you ever hear the tragedy of Darth Plagueis The Wise?'

        # 1. Create two contents with different subcategories
        category_1 = 'category 1'
        subcategory_1 = SubCategoryFactory(title=category_1)
        category_2 = 'category 2'
        subcategory_2 = SubCategoryFactory(title=category_2)

        tuto_1 = PublishableContentFactory(type='TUTORIAL')
        tuto_1_draft = tuto_1.load_version()

        tuto_1.title = text
        tuto_1.authors.add(self.user)
        tuto_1.subcategory.add(subcategory_1)
        tuto_1.save()

        tuto_1_draft.description = text
        tuto_1_draft.repo_update_top_container(text, tuto_1.slug, text, text)

        chapter_1 = ContainerFactory(parent=tuto_1_draft, db_object=tuto_1)
        extract_1 = ExtractFactory(container=chapter_1, db_object=tuto_1)
        extract_1.repo_update(text, text)

        published_1 = publish_content(tuto_1,
                                      tuto_1_draft,
                                      is_major_update=True)

        tuto_1.sha_public = tuto_1_draft.current_version
        tuto_1.sha_draft = tuto_1_draft.current_version
        tuto_1.public_version = published_1
        tuto_1.save()

        tuto_2 = PublishableContentFactory(type='TUTORIAL')
        tuto_2_draft = tuto_2.load_version()

        tuto_2.title = text
        tuto_2.authors.add(self.user)
        tuto_2.subcategory.add(subcategory_2)
        tuto_2.save()

        tuto_2_draft.description = text
        tuto_2_draft.repo_update_top_container(text, tuto_2.slug, text, text)

        chapter_2 = ContainerFactory(parent=tuto_2_draft, db_object=tuto_2)
        extract_2 = ExtractFactory(container=chapter_2, db_object=tuto_2)
        extract_2.repo_update(text, text)

        published_2 = publish_content(tuto_2,
                                      tuto_2_draft,
                                      is_major_update=True)

        tuto_2.sha_public = tuto_2_draft.current_version
        tuto_2.sha_draft = tuto_2_draft.current_version
        tuto_2.public_version = published_2
        tuto_2.save()

        # 2. Index:
        self.assertEqual(
            len(
                self.manager.setup_search(Search().query(
                    MatchAll())).execute()), 0)

        # index
        for model in self.indexable:
            if model is FakeChapter:
                continue
            self.manager.es_bulk_indexing_of_model(model)
        self.manager.refresh_index()

        result = self.client.get(reverse('search:query') + '?q=' + text,
                                 follow=False)
        self.assertEqual(result.status_code, 200)

        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 4)  # Ok

        # 3. Test
        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&model=content&subcategory=' +
                                 subcategory_1.slug,
                                 follow=False)

        self.assertEqual(result.status_code, 200)

        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 2)

        self.assertEqual([
            int(r.meta.id) for r in response
            if r.meta.doc_type == 'publishedcontent'
        ][0], published_1.pk)
        self.assertEqual([
            r.meta.id for r in response if r.meta.doc_type == 'chapter'
        ][0], tuto_1.slug + '__' + chapter_1.slug)

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&model=content&subcategory=' +
                                 subcategory_2.slug,
                                 follow=False)

        self.assertEqual(result.status_code, 200)

        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 2)

        self.assertEqual([
            int(r.meta.id) for r in response
            if r.meta.doc_type == 'publishedcontent'
        ][0], published_2.pk)
        self.assertEqual([
            r.meta.id for r in response if r.meta.doc_type == 'chapter'
        ][0], tuto_2.slug + '__' + chapter_2.slug)

    def tearDown(self):
        if os.path.isdir(settings.ZDS_APP['content']['repo_private_path']):
            shutil.rmtree(settings.ZDS_APP['content']['repo_private_path'])
        if os.path.isdir(settings.ZDS_APP['content']['repo_public_path']):
            shutil.rmtree(settings.ZDS_APP['content']['repo_public_path'])
        if os.path.isdir(settings.MEDIA_ROOT):
            shutil.rmtree(settings.MEDIA_ROOT)

        # re-active PDF build
        settings.ZDS_APP['content']['build_pdf_when_published'] = True

        # delete index:
        self.manager.clear_es_index()
示例#25
0
class SearchView(ZdSPagingListView):
    """Search view."""

    template_name = 'searchv2/search.html'
    paginate_by = settings.ZDS_APP['search']['results_per_page']

    search_form_class = SearchForm
    search_form = None
    search_query = None
    content_category = None
    content_subcategory = None

    authorized_forums = ''

    index_manager = None

    def __init__(self, **kwargs):
        """Overridden because the index manager must NOT be initialized elsewhere."""

        super(SearchView, self).__init__(**kwargs)
        self.index_manager = ESIndexManager(**settings.ES_SEARCH_INDEX)

    def get(self, request, *args, **kwargs):
        """Overridden to catch the request and fill the form."""

        if 'q' in request.GET:
            self.search_query = ''.join(request.GET['q'])

        self.search_form = self.search_form_class(data=self.request.GET)

        if self.search_query and not self.search_form.is_valid():
            raise PermissionDenied('research form is invalid')

        return super(SearchView, self).get(request, *args, **kwargs)

    def get_queryset(self):
        if not self.index_manager.connected_to_es:
            messages.warning(self.request, _('Impossible de se connecter à Elasticsearch'))
            return []

        if self.search_query:

            # Searches forums the user is allowed to visit
            self.authorized_forums = get_authorized_forums(self.request.user)

            search_queryset = Search()

            # Restrict (sub)category if any
            if self.search_form.cleaned_data['category']:
                self.content_category = self.search_form.cleaned_data['category']
            if self.search_form.cleaned_data['subcategory']:
                self.content_subcategory = self.search_form.cleaned_data['subcategory']

            # Mark that contents must come from library if required
            self.from_library = False
            if self.search_form.cleaned_data['from_library'] == 'on':
                self.from_library = True

            # Setting the different querysets (according to the selected models, if any)
            part_querysets = []
            chosen_groups = self.search_form.cleaned_data['models']

            if chosen_groups:
                models = []
                for group in chosen_groups:
                    if group in settings.ZDS_APP['search']['search_groups']:
                        models.append(settings.ZDS_APP['search']['search_groups'][group][1])
            else:
                models = [v[1] for k, v in settings.ZDS_APP['search']['search_groups'].items()]

            models = reduce(operator.concat, models)

            for model in models:
                part_querysets.append(getattr(self, 'get_queryset_{}s'.format(model))())

            queryset = part_querysets[0]
            for query in part_querysets[1:]:
                queryset |= query

            # Weighting:
            weight_functions = []
            for _type, weights in list(settings.ZDS_APP['search']['boosts'].items()):
                if _type in models:
                    weight_functions.append({'filter': Match(_type=_type), 'weight': weights['global']})

            scored_queryset = FunctionScore(query=queryset, boost_mode='multiply', functions=weight_functions)
            search_queryset = search_queryset.query(scored_queryset)

            # Highlighting:
            search_queryset = search_queryset.highlight_options(
                fragment_size=150, number_of_fragments=5, pre_tags=['[hl]'], post_tags=['[/hl]'])
            search_queryset = search_queryset.highlight('text').highlight('text_html')

            # Executing:
            return self.index_manager.setup_search(search_queryset)

        return []

    def get_queryset_publishedcontents(self):
        """Search in PublishedContent objects."""

        query = Match(_type='publishedcontent') \
            & MultiMatch(
            query=self.search_query,
            fields=['title', 'description', 'categories', 'subcategories', 'tags', 'text'])

        if self.from_library:
            query &= Match(content_type='TUTORIAL') | Match(content_type='ARTICLE')

        if self.content_category:
            query &= Match(categories=self.content_category)

        if self.content_subcategory:
            query &= Match(subcategories=self.content_subcategory)

        functions_score = [
            {
                'filter': Match(content_type='TUTORIAL'),
                'weight': settings.ZDS_APP['search']['boosts']['publishedcontent']['if_tutorial']
            },
            {
                'filter': Match(content_type='TUTORIAL') & Match(has_chapters=True),
                'weight': settings.ZDS_APP['search']['boosts']['publishedcontent']['if_medium_or_big_tutorial']
            },
            {
                'filter': Match(content_type='ARTICLE'),
                'weight': settings.ZDS_APP['search']['boosts']['publishedcontent']['if_article']
            },
            {
                'filter': Match(content_type='OPINION'),
                'weight': settings.ZDS_APP['search']['boosts']['publishedcontent']['if_opinion']
            },
            {
                'filter': Match(content_type='OPINION') & Match(picked=False),
                'weight': settings.ZDS_APP['search']['boosts']['publishedcontent']['if_opinion_not_picked']
            },
        ]

        scored_query = FunctionScore(query=query, boost_mode='multiply', functions=functions_score)

        return scored_query

    def get_queryset_chapters(self):
        """Search in content chapters."""

        query = Match(_type='chapter') \
            & MultiMatch(query=self.search_query, fields=['title', 'text'])

        if self.content_category:
            query &= Match(categories=self.content_category)

        if self.content_subcategory:
            query &= Match(subcategories=self.content_subcategory)

        return query

    def get_queryset_topics(self):
        """Search in topics, and remove the result if the forum is not allowed for the user.

        Score is modified if:

        + topic is solved;
        + topic is sticky;
        + topic is locked.
        """

        query = Match(_type='topic') \
            & Terms(forum_pk=self.authorized_forums) \
            & MultiMatch(query=self.search_query, fields=['title', 'subtitle', 'tags'])

        functions_score = [
            {'filter': Match(is_solved=True), 'weight': settings.ZDS_APP['search']['boosts']['topic']['if_solved']},
            {'filter': Match(is_sticky=True), 'weight': settings.ZDS_APP['search']['boosts']['topic']['if_sticky']},
            {'filter': Match(is_locked=True), 'weight': settings.ZDS_APP['search']['boosts']['topic']['if_locked']}
        ]

        scored_query = FunctionScore(query=query, boost_mode='multiply', functions=functions_score)

        return scored_query

    def get_queryset_posts(self):
        """Search in posts, and remove result if the forum is not allowed for the user or if the message is invisible.

        Score is modified if:

        + post is the first one in a topic;
        + post is marked as "useful";
        + post has a like/dislike ratio above (has more likes than dislikes) or below (the other way around) 1.0.
        """

        query = Match(_type='post') \
            & Terms(forum_pk=self.authorized_forums) \
            & Term(is_visible=True) \
            & MultiMatch(query=self.search_query, fields=['text_html'])

        functions_score = [
            {'filter': Match(position=1), 'weight': settings.ZDS_APP['search']['boosts']['post']['if_first']},
            {'filter': Match(is_useful=True), 'weight': settings.ZDS_APP['search']['boosts']['post']['if_useful']},
            {
                'filter': Range(like_dislike_ratio={'gt': 1}),
                'weight': settings.ZDS_APP['search']['boosts']['post']['ld_ratio_above_1']
            },
            {
                'filter': Range(like_dislike_ratio={'lt': 1}),
                'weight': settings.ZDS_APP['search']['boosts']['post']['ld_ratio_below_1']
            }
        ]

        scored_query = FunctionScore(query=query, boost_mode='multiply', functions=functions_score)

        return scored_query

    def get_context_data(self, **kwargs):
        context = super(SearchView, self).get_context_data(**kwargs)
        context['form'] = self.search_form
        context['query'] = self.search_query is not None

        return context