Пример #1
0
    def test_file_search_show_versions(self, client, all_projects, es_index, settings):
        # override the settings to index all versions
        settings.INDEX_ONLY_LATEST = False

        project = all_projects[0]
        # Create some versions of the project
        versions = [G(Version, project=project) for _ in range(3)]
        self._reindex_elasticsearch(es_index=es_index)

        query = get_search_query_from_project_file(project_slug=project.slug)

        result, page = self._get_search_result(url=self.url, client=client,
                                               search_params={'q': query, 'type': 'file'})

        # There should be only one result because by default
        # only latest version result should be there
        assert len(result) == 1

        content = page.find('.navigable .version-list')
        # There should be total 4 versions
        # one is latest, and other 3 that we created above
        assert len(content) == 4

        project_versions = [v.slug for v in versions] + [LATEST]
        content_versions = []
        for element in content:
            text = element.text_content()
            # strip and split to keep the version slug only
            slug = text.strip().split('\n')[0]
            content_versions.append(slug)

        assert sorted(project_versions) == sorted(content_versions)
Пример #2
0
    def test_file_search(self, client, project, data_type, page_num):
        data_type = data_type.split('.')
        type, field = None, None
        if len(data_type) < 2:
            field = data_type[0]
        else:
            type, field = data_type
        query = get_search_query_from_project_file(
            project_slug=project.slug,
            page_num=page_num,
            type=type,
            field=field,
        )
        results, _ = self._get_search_result(url=self.url,
                                             client=client,
                                             search_params={
                                                 'q': query,
                                                 'type': 'file'
                                             })
        assert len(results) >= 1

        # checking first result
        result_0 = results[0]
        highlight = self._get_highlight(result_0, field, type)
        assert len(highlight) == 1

        highlighted_words = self._get_highlighted_words(highlight[0])
        assert len(highlighted_words) >= 1
        for word in highlighted_words:
            # Make it lower because our search is case insensitive
            assert word.lower() in query.lower()
Пример #3
0
    def test_file_search_show_versions(self, client, all_projects, es_index,
                                       settings):
        # override the settings to index all versions
        settings.INDEX_ONLY_LATEST = False

        project = all_projects[0]
        # Create some versions of the project
        versions = [G(Version, project=project) for _ in range(3)]
        query = get_search_query_from_project_file(project_slug=project.slug)
        results, facets = self._get_search_result(
            url=self.url,
            client=client,
            search_params={
                'q': query,
                'type': 'file'
            },
        )

        # Results can be from other projects also
        assert len(results) >= 1

        version_facets = facets['version']
        version_facets_str = [facet[0] for facet in version_facets]
        # There should be total 4 versions
        # one is latest, and other 3 that we created above
        assert len(version_facets) == 4

        project_versions = [v.slug for v in versions] + [LATEST]
        assert sorted(project_versions) == sorted(resulted_version_facets)
Пример #4
0
    def test_search_by_file_content(self, client, project, data_type, page_num):
        query = get_search_query_from_project_file(project_slug=project.slug, page_num=page_num,
                                                   data_type=data_type)

        result, _ = self._get_search_result(url=self.url, client=client,
                                            search_params={'q': query, 'type': 'file'})
        assert len(result) == 1
Пример #5
0
    def test_file_search_subprojects(self, client, all_projects, es_index):
        """
        TODO: File search should return results from subprojects also.

        This is currently disabled because the UX around it is weird.
        You filter by a project, and get results for multiple.
        """
        project = all_projects[0]
        subproject = all_projects[1]
        # Add another project as subproject of the project
        project.add_subproject(subproject)

        # Now search with subproject content but explicitly filter by the parent project
        query = get_search_query_from_project_file(
            project_slug=subproject.slug)
        search_params = {
            'q': query,
            'type': 'file',
            'project': project.slug,
        }
        results, _ = self._get_search_result(
            url=self.url,
            client=client,
            search_params=search_params,
        )
        assert len(results) == 0
Пример #6
0
    def test_doc_search_filter_by_version(self, api_client, project):
        """Test Doc search result are filtered according to version"""
        query = get_search_query_from_project_file(project_slug=project.slug)
        latest_version = project.versions.all()[0]
        # Create another version
        dummy_version = G(
            Version,
            project=project,
            active=True,
            privacy_level=PUBLIC,
        )
        # Create HTMLFile same as the latest version
        latest_version_files = HTMLFile.objects.all().filter(
            version=latest_version)
        for f in latest_version_files:
            f.version = dummy_version
            # Make primary key to None, so django will create new object
            f.pk = None
            f.save()
            PageDocument().update(f)

        search_params = {
            'q': query,
            'project': project.slug,
            'version': dummy_version.slug
        }
        resp = self.get_search(api_client, search_params)
        assert resp.status_code == 200

        data = resp.data['results']
        assert len(data) == 1
        assert data[0]['project'] == project.slug
Пример #7
0
    def test_doc_search_subprojects(self, api_client, all_projects):
        """Test Document search return results from subprojects also"""
        project = all_projects[0]
        subproject = all_projects[1]
        version = project.versions.all()[0]
        # Add another project as subproject of the project
        project.add_subproject(subproject)

        # Now search with subproject content but explicitly filter by the parent project
        query = get_search_query_from_project_file(
            project_slug=subproject.slug)
        search_params = {
            'q': query,
            'project': project.slug,
            'version': version.slug
        }
        resp = self.get_search(api_client, search_params)
        assert resp.status_code == 200

        data = resp.data['results']
        assert len(data) >= 1  # there may be results from another projects

        # First result should be the subproject
        first_result = data[0]
        assert first_result['project'] == subproject.slug
        # Check the link is the subproject document link
        document_link = subproject.get_docs_url(version_slug=version.slug)
        assert document_link in first_result['link']
Пример #8
0
    def test_file_search_show_versions(self, client, all_projects, es_index, settings):
        # override the settings to index all versions
        settings.INDEX_ONLY_LATEST = False

        project = all_projects[0]
        # Create some versions of the project
        versions = [G(Version, project=project) for _ in range(3)]

        query = get_search_query_from_project_file(project_slug=project.slug)

        result, page = self._get_search_result(url=self.url, client=client,
                                               search_params={'q': query, 'type': 'file'})

        # There should be only one result because by default
        # only latest version result should be there
        assert len(result) == 1

        content = page.find('.navigable .version-list')
        # There should be total 4 versions
        # one is latest, and other 3 that we created above
        assert len(content) == 4

        project_versions = [v.slug for v in versions] + [LATEST]
        content_versions = []
        for element in content:
            text = element.text_content()
            # strip and split to keep the version slug only
            slug = text.strip().split('\n')[0]
            content_versions.append(slug)

        assert sorted(project_versions) == sorted(content_versions)
Пример #9
0
    def test_search_works_with_title_query(self, api_client, project,
                                           page_num):
        query = get_search_query_from_project_file(project_slug=project.slug,
                                                   page_num=page_num,
                                                   field='title')

        version = project.versions.all().first()
        search_params = {
            'project': project.slug,
            'version': version.slug,
            'q': query
        }
        resp = self.get_search(api_client, search_params)
        assert resp.status_code == 200

        data = resp.data['results']
        assert len(data) >= 1

        # Matching first result
        project_data = data[0]
        assert project_data['project'] == project.slug

        # Check highlight return correct object of first result
        title_highlight = project_data['highlight']['title']

        assert len(title_highlight) == 1
        assert query.lower() in title_highlight[0].lower()
Пример #10
0
    def test_file_search(self, client, project, data_type, page_num):
        query = get_search_query_from_project_file(project_slug=project.slug, page_num=page_num,
                                                   data_type=data_type)

        result, _ = self._get_search_result(url=self.url, client=client,
                                            search_params={'q': query, 'type': 'file'})
        assert len(result) == 1
        assert query in result.text()
Пример #11
0
    def test_doc_search_hidden_versions(self, api_client, all_projects):
        """Test Document search return results from subprojects also"""
        project = all_projects[0]
        subproject = all_projects[1]
        version = project.versions.all()[0]
        # Add another project as subproject of the project
        project.add_subproject(subproject)

        version_subproject = subproject.versions.first()
        version_subproject.hidden = True
        version_subproject.save()

        # Now search with subproject content but explicitly filter by the parent project
        query = get_search_query_from_project_file(
            project_slug=subproject.slug)
        search_params = {
            'q': query,
            'project': project.slug,
            'version': version.slug
        }
        resp = self.get_search(api_client, search_params)
        assert resp.status_code == 200

        # The version from the subproject is hidden, so isn't show on the results.
        data = resp.data['results']
        assert len(data) == 0

        # Now search on the subproject with hidden version
        query = get_search_query_from_project_file(
            project_slug=subproject.slug)
        search_params = {
            'q': query,
            'project': subproject.slug,
            'version': version_subproject.slug
        }
        resp = self.get_search(api_client, search_params)
        assert resp.status_code == 200
        # We can still search inside the hidden version
        data = resp.data['results']
        assert len(data) == 1
        first_result = data[0]
        assert first_result['project'] == subproject.slug
Пример #12
0
    def test_search_works_with_sections_and_domains_query(
        self,
        api_client,
        project,
        page_num,
        data_type
    ):
        type, field = data_type.split('.')
        query = get_search_query_from_project_file(
            project_slug=project.slug,
            page_num=page_num,
            type=type,
            field=field,
        )
        version = project.versions.all().first()
        search_params = {
            'project': project.slug,
            'version': version.slug,
            'q': query
        }
        resp = self.get_search(api_client, search_params)
        assert resp.status_code == 200

        data = resp.data['results']
        assert len(data) >= 1

        # Matching first result
        project_data = data[0]
        assert project_data['project'] == project.slug

        blocks = project_data['blocks']
        # since there was a nested query,
        # blocks should not be empty
        assert len(blocks) >= 1

        block_0 = blocks[0]

        assert block_0['type'] == type

        highlights = block_0['highlights'][field]
        assert (
            len(highlights) == 1
        ), 'number_of_fragments is set to 1'

        # checking highlighting of results
        highlighted_words = re.findall(  # this gets all words inside <em> tag
            '<span>(.*?)</span>',
            highlights[0]
        )
        assert len(highlighted_words) > 0

        for word in highlighted_words:
            # Make it lower because our search is case insensitive
            assert word.lower() in query.lower()
Пример #13
0
    def has_results(self, api_client, project_slug, version_slug):
        query = get_search_query_from_project_file(project_slug=project_slug, )
        search_params = {
            'project': project_slug,
            'version': version_slug,
            'q': query
        }
        resp = api_client.get(self.url, search_params)
        assert resp.status_code == 200

        data = resp.data['results']
        return len(data) > 0
Пример #14
0
    def test_search_works_with_sections_and_domains_query(
        self,
        api_client,
        project,
        page_num,
        data_type
    ):
        query = get_search_query_from_project_file(
            project_slug=project.slug,
            page_num=page_num,
            data_type=data_type
        )
        version = project.versions.all().first()
        search_params = {
            'project': project.slug,
            'version': version.slug,
            'q': query
        }
        resp = api_client.get(self.url, search_params)
        assert resp.status_code == 200

        data = resp.data['results']
        assert len(data) >= 1

        # Matching first result
        project_data = data[0]
        assert project_data['project'] == project.slug

        inner_hits = project_data['inner_hits']
        # since there was a nested query,
        # inner_hits should not be empty
        assert len(inner_hits) >= 1

        inner_hit_0 = inner_hits[0]  # first inner_hit

        expected_type = data_type.split('.')[0]  # can be "sections" or "domains"
        assert inner_hit_0['type'] == expected_type

        highlight = inner_hit_0['highlight'][data_type]
        assert (
            len(highlight) == 1
        ), 'number_of_fragments is set to 1'

        # checking highlighting of results
        highlighted_words = re.findall(  # this gets all words inside <em> tag
            '<span>(.*?)</span>',
            highlight[0]
        )
        assert len(highlighted_words) > 0

        for word in highlighted_words:
            # Make it lower because our search is case insensitive
            assert word.lower() in query.lower()
Пример #15
0
    def test_doc_search_subprojects_default_version(self, api_client,
                                                    all_projects):
        """Return results from subprojects that match the version from the main project or fallback to its default version."""
        project = all_projects[0]
        version = project.versions.all()[0]
        feature, _ = Feature.objects.get_or_create(
            feature_id=Feature.SEARCH_SUBPROJECTS_ON_DEFAULT_VERSION, )
        project.feature_set.add(feature)

        subproject = all_projects[1]
        subproject_version = subproject.versions.all()[0]

        # Change the name of the version, and make it default.
        subproject_version.slug = 'different'
        subproject_version.save()
        subproject.default_version = subproject_version.slug
        subproject.save()
        subproject.versions.filter(slug=version.slug).delete()

        # Refresh index
        version_files = HTMLFile.objects.all().filter(
            version=subproject_version)
        for f in version_files:
            PageDocument().update(f)

        # Add another project as subproject of the project
        project.add_subproject(subproject)

        # Now search with subproject content but explicitly filter by the parent project
        query = get_search_query_from_project_file(
            project_slug=subproject.slug)
        search_params = {
            'q': query,
            'project': project.slug,
            'version': version.slug
        }
        resp = self.get_search(api_client, search_params)
        assert resp.status_code == 200

        data = resp.data['results']
        assert len(data) >= 1  # there may be results from another projects

        # First result should be the subproject
        first_result = data[0]
        assert first_result['project'] == subproject.slug
        assert first_result['version'] == 'different'
        # Check the link is the subproject document link
        document_link = subproject.get_docs_url(
            version_slug=subproject_version.slug)
        link = first_result['domain'] + first_result['path']
        assert document_link in link
Пример #16
0
    def test_file_search_subprojects(self, client, all_projects, es_index):
        """File search should return results from subprojects also"""
        project = all_projects[0]
        subproject = all_projects[1]
        # Add another project as subproject of the project
        project.add_subproject(subproject)

        # Now search with subproject content but explicitly filter by the parent project
        query = get_search_query_from_project_file(project_slug=subproject.slug)
        search_params = {'q': query, 'type': 'file', 'project': project.slug}
        result, page = self._get_search_result(url=self.url, client=client,
                                               search_params=search_params)

        assert len(result) == 1
Пример #17
0
    def test_page_search_not_return_removed_page(self, client, project):
        """Check removed page are not in the search index"""
        query = get_search_query_from_project_file(project_slug=project.slug)
        # Make a query to check it returns result
        result, _ = self._get_search_result(url=self.url, client=client,
                                            search_params={'q': query, 'type': 'file'})
        assert len(result) == 1

        # Delete all the HTML files of the project
        HTMLFile.objects.filter(project=project).delete()
        # Run the query again and this time there should not be any result
        result, _ = self._get_search_result(url=self.url, client=client,
                                            search_params={'q': query, 'type': 'file'})
        assert len(result) == 0
Пример #18
0
    def test_file_search_subprojects(self, client, all_projects, es_index):
        """File search should return results from subprojects also"""
        project = all_projects[0]
        subproject = all_projects[1]
        # Add another project as subproject of the project
        project.add_subproject(subproject)
        self._reindex_elasticsearch(es_index=es_index)

        # Now search with subproject content but explicitly filter by the parent project
        query = get_search_query_from_project_file(project_slug=subproject.slug)
        search_params = {'q': query, 'type': 'file', 'project': project.slug}
        result, page = self._get_search_result(url=self.url, client=client,
                                               search_params=search_params)

        assert len(result) == 1
Пример #19
0
    def test_file_search_case_insensitive(self, client, project, case):
        """Check File search is case insensitive

        It tests with uppercase, lowercase and camelcase
        """
        query_text = get_search_query_from_project_file(project_slug=project.slug)

        cased_query = getattr(query_text, case)
        query = cased_query()

        result, _ = self._get_search_result(url=self.url, client=client,
                                            search_params={'q': query, 'type': 'file'})

        assert len(result) == 1
        # Check the actual text is in the result, not the cased one
        assert query_text in result.text()
Пример #20
0
    def test_file_search_case_insensitive(self, client, project, case):
        """
        Check File search is case insensitive.

        It tests with uppercase, lowercase and camelcase
        """
        query_text = get_search_query_from_project_file(project_slug=project.slug)

        cased_query = getattr(query_text, case)
        query = cased_query()

        result, _ = self._get_search_result(url=self.url, client=client,
                                            search_params={'q': query, 'type': 'file'})

        assert len(result) == 1
        # Check the actual text is in the result, not the cased one
        assert query_text in result.text()
Пример #21
0
    def test_search_works(self, api_client, project, data_type, page_num):
        query = get_search_query_from_project_file(project_slug=project.slug, page_num=page_num,
                                                   data_type=data_type)

        version = project.versions.all()[0]
        search_params = {'project': project.slug, 'version': version.slug, 'q': query}
        resp = api_client.get(self.url, search_params)
        assert resp.status_code == 200

        data = resp.data['results']
        assert len(data) == 1
        project_data = data[0]
        assert project_data['project'] == project.slug

        # Check highlight return correct object
        all_highlights = project_data['highlight'][data_type]
        for highlight in all_highlights:
            # Make it lower because our search is case insensitive
            assert query.lower() in highlight.lower()
Пример #22
0
    def test_doc_search_subprojects(self, api_client, all_projects):
        """Test Document search return results from subprojects also"""
        project = all_projects[0]
        subproject = all_projects[1]
        version = project.versions.all()[0]
        # Add another project as subproject of the project
        project.add_subproject(subproject)

        # Now search with subproject content but explicitly filter by the parent project
        query = get_search_query_from_project_file(project_slug=subproject.slug)
        search_params = {'q': query, 'project': project.slug, 'version': version.slug}
        resp = api_client.get(self.url, search_params)
        assert resp.status_code == 200

        data = resp.data['results']
        assert len(data) == 1
        assert data[0]['project'] == subproject.slug
        # Check the link is the subproject document link
        document_link = subproject.get_docs_url(version_slug=version.slug)
        assert document_link in data[0]['link']
Пример #23
0
    def test_file_search_subprojects(self, client, all_projects, es_index):
        """
        TODO: File search should return results from subprojects also.

        This is currently disabled because the UX around it is weird.
        You filter by a project, and get results for multiple.
        """
        project = all_projects[0]
        subproject = all_projects[1]
        # Add another project as subproject of the project
        project.add_subproject(subproject)

        # Now search with subproject content but explicitly filter by the parent project
        query = get_search_query_from_project_file(project_slug=subproject.slug)
        search_params = {'q': query, 'type': 'file', 'project': project.slug}
        result, page = self._get_search_result(
            url=self.url, client=client,
            search_params=search_params,
        )

        assert len(result) == 0
Пример #24
0
    def test_doc_search_filter_by_version(self, api_client, project):
        """Test Doc search result are filtered according to version"""
        query = get_search_query_from_project_file(project_slug=project.slug)
        latest_version = project.versions.all()[0]
        # Create another version
        dummy_version = G(Version, project=project, active=True)
        # Create HTMLFile same as the latest version
        latest_version_files = HTMLFile.objects.all().filter(version=latest_version)
        for f in latest_version_files:
            f.version = dummy_version
            # Make primary key to None, so django will create new object
            f.pk = None
            f.save()
            PageDocument().update(f)

        search_params = {'q': query, 'project': project.slug, 'version': dummy_version.slug}
        resp = api_client.get(self.url, search_params)
        assert resp.status_code == 200

        data = resp.data['results']
        assert len(data) == 1
        assert data[0]['project'] == project.slug
Пример #25
0
class TestPageSearch(object):
    url = reverse('search')

    def _get_search_result(self, url, client, search_params):
        resp = client.get(url, search_params)
        assert resp.status_code == 200

        results = resp.context['results']
        facets = resp.context['facets']

        return results, facets

    def _get_highlight(self, result, data_type):
        # if query is from page title,
        # highlighted title is present in 'result.meta.highlight.title'
        if data_type == 'title':
            highlight = result.meta.highlight.title

        # if result is not from page title,
        # then results and highlighted results are present inside 'inner_hits'
        else:
            inner_hits = result.meta.inner_hits
            assert len(inner_hits) >= 1

            # checking first inner_hit
            inner_hit_0 = inner_hits[0]
            expected_type = data_type.split('.')[
                0]  # can be either 'sections' or 'domains'
            assert inner_hit_0['type'] == expected_type
            highlight = inner_hit_0['highlight'][data_type]

        return highlight

    def _get_highlighted_words(self, string):
        highlighted_words = re.findall('<span>(.*?)</span>', string)
        return highlighted_words

    @pytest.mark.parametrize('data_type', DATA_TYPES_VALUES)
    @pytest.mark.parametrize('page_num', [0, 1])
    def test_file_search(self, client, project, data_type, page_num):
        query = get_search_query_from_project_file(project_slug=project.slug,
                                                   page_num=page_num,
                                                   data_type=data_type)
        results, _ = self._get_search_result(url=self.url,
                                             client=client,
                                             search_params={
                                                 'q': query,
                                                 'type': 'file'
                                             })
        assert len(results) >= 1

        # checking first result
        result_0 = results[0]
        highlight = self._get_highlight(result_0, data_type)
        assert len(highlight) == 1

        highlighted_words = self._get_highlighted_words(highlight[0])
        assert len(highlighted_words) >= 1
        for word in highlighted_words:
            # Make it lower because our search is case insensitive
            assert word.lower() in query.lower()

    def test_file_search_have_correct_role_name_facets(self, client):
        """Test that searching files should result all role_names."""

        # searching for 'celery' to test that
        # correct role_names are displayed
        results, facets = self._get_search_result(url=self.url,
                                                  client=client,
                                                  search_params={
                                                      'q': 'celery',
                                                      'type': 'file'
                                                  })
        assert len(results) >= 1
        role_name_facets = facets['role_name']
        role_name_facets_str = [facet[0] for facet in role_name_facets]
        expected_role_names = ['py:class', 'py:function', 'py:method']
        assert sorted(expected_role_names) == sorted(role_name_facets_str)
        for facet in role_name_facets:
            assert facet[2] == False  # because none of the facets are applied

    def test_file_search_filter_role_name(self, client):
        """Test that searching files filtered according to role_names."""

        search_params = {'q': 'celery', 'type': 'file'}
        # searching without the filter
        results, facets = self._get_search_result(url=self.url,
                                                  client=client,
                                                  search_params=search_params)
        assert len(results) >= 2  # there are > 1 results without the filter
        role_name_facets = facets['role_name']
        for facet in role_name_facets:
            assert facet[2] == False  # because none of the facets are applied

        confval_facet = 'py:class'
        # checking if 'py:class' facet is present in results
        assert confval_facet in [facet[0] for facet in role_name_facets]

        # filtering with role_name=py:class
        search_params['role_name'] = confval_facet
        new_results, new_facets = self._get_search_result(
            url=self.url, client=client, search_params=search_params)
        new_role_names_facets = new_facets['role_name']
        # there is only one result with role_name='py:class'
        # in `signals` page
        assert len(new_results) == 1
        first_result = new_results[0]  # first result
        inner_hits = first_result.meta.inner_hits  # inner_hits of first results
        assert len(inner_hits) >= 1
        inner_hit_0 = inner_hits[0]  # first inner_hit
        assert inner_hit_0.type == 'domains'
        assert inner_hit_0.source.role_name == confval_facet

        for facet in new_role_names_facets:
            if facet[0] == confval_facet:
                assert facet[
                    2] == True  # because 'std:confval' filter is active
            else:
                assert facet[2] == False

    @pytest.mark.parametrize('data_type', DATA_TYPES_VALUES)
    @pytest.mark.parametrize('case', ['upper', 'lower', 'title'])
    def test_file_search_case_insensitive(self, client, project, case,
                                          data_type):
        """
        Check File search is case insensitive.

        It tests with uppercase, lowercase and camelcase.
        """
        query_text = get_search_query_from_project_file(
            project_slug=project.slug, data_type=data_type)
        cased_query = getattr(query_text, case)
        query = cased_query()

        results, _ = self._get_search_result(url=self.url,
                                             client=client,
                                             search_params={
                                                 'q': query,
                                                 'type': 'file'
                                             })
        assert len(results) >= 1

        first_result = results[0]
        highlight = self._get_highlight(first_result, data_type)
        assert len(highlight) == 1
        highlighted_words = self._get_highlighted_words(highlight[0])
        assert len(highlighted_words) >= 1
        for word in highlighted_words:
            assert word.lower() in query.lower()