Exemplo n.º 1
0
    def page(self, number):
        """
        Returns a page object.

        This class overrides the default behavior and ignores "orphans" and
        assigns the count from the ES result to the Paginator.

        """
        number = self.validate_number(number)
        bottom = (number - 1) * self.per_page
        top = bottom + self.per_page

        # Force the search to evaluate and then attach the count. We want to
        # avoid an extra useless query even if there are no results, so we
        # directly fetch the count from hits.
        result = self.object_list[bottom:top].execute()
        page = Page(result.hits, number, self)
        # Update the `_count`.
        self._count = page.object_list.total
        # Also store the facets, if any.
        if hasattr(result, 'facets'):
            page.facets = result.facets

        # Now that we have the count validate that the page number isn't higher
        # than the possible number of pages and adjust accordingly.
        if number > self.num_pages:
            if number == 1 and self.allow_empty_first_page:
                pass
            else:
                raise EmptyPage('That page contains no results')
        return page
Exemplo n.º 2
0
    def page(self, number):
        """
        Override the page method in Paginator since Solr has already
        paginated stuff for us.
        """

        number = self.validate_number(number)

        # figure out the solr query and execute it
        solr = SolrConnection(
            settings.SOLR)  # TODO: maybe keep connection around?
        start = self.per_page * (number - 1)
        params = {
            "hl.snippets": 100,  # TODO: make this unlimited
            "hl.requireFieldMatch": 'true',  # limits highlighting slop
            "hl.maxAnalyzedChars": '102400',  # increased from default 51200
        }
        params.update(self.facet_params)
        sort_field, sort_order = _get_sort(self.query.get('sort'),
                                           in_pages=True)
        solr_response = solr.query(self._q,
                                   fields=[
                                       'id', 'title', 'date', 'month', 'day',
                                       'sequence', 'edition_label',
                                       'section_label'
                                   ],
                                   highlight=self._ocr_list,
                                   rows=self.per_page,
                                   sort=sort_field,
                                   sort_order=sort_order,
                                   start=start,
                                   **params)
        solr_facets = solr_response.facet_counts
        # sort states by number of hits per state (desc)
        facets = {
            'city': _sort_facets_asc(solr_facets, 'city'),
            'county': _sort_facets_asc(solr_facets, 'county'),
            'frequency': _sort_facets_asc(solr_facets, 'frequency'),
            'language': _sort_facets_asc(solr_facets, 'language'),
            'state': _sort_facets_asc(solr_facets, 'state'),
        }
        # sort by year (desc)
        facets['year'] = sorted(list(
            solr_facets['facet_ranges']['year']['counts'].items()),
                                key=lambda k: k[0],
                                reverse=True)
        facet_gap = self.facet_params['f_year_facet_range_gap']
        if facet_gap > 1:
            facets['year'] = [('%s-%d' % (y[0], int(y[0]) + facet_gap - 1),
                               y[1]) for y in facets['year']]
        pages = []
        for result in solr_response.results:
            page = models.Page.lookup(result['id'])
            if not page:
                continue
            words = set()
            coords = solr_response.highlighting[result['id']]
            for ocr in self._ocr_list:
                for s in coords.get(ocr) or []:
                    words.update(find_words(s))
            page.words = sorted(words, key=lambda v: v.lower())

            page.highlight_url = self.highlight_url(page.url, page.words)
            pages.append(page)

        solr_page = Page(pages, number, self)
        solr_page.facets = facets
        return solr_page
Exemplo n.º 3
0
    def page(self, number):
        """
        Override the page method in Paginator since Solr has already
        paginated stuff for us.
        """

        number = self.validate_number(number)

        # figure out the solr query and execute it
        solr = SolrConnection(settings.SOLR) # TODO: maybe keep connection around?
        start = self.per_page * (number - 1)
        params = {
            "hl.snippets": 100, # TODO: make this unlimited
            "hl.requireFieldMatch": 'true', # limits highlighting slop
            "hl.maxAnalyzedChars": '102400', # increased from default 51200
            # "hl.method":'unified'
            }
        params.update(self.facet_params)
        sort_field, sort_order = _get_sort(self.query.get('sort'), in_pages=True)
        solr_response = solr.query(
           self._q,
           fields="*",
           # highlight=self._ocr_list,
           highlight='ocr_vector',
           rows=self.per_page,
           sort=sort_field,
           sort_order=sort_order,
           start=start,
           **params)

        solr_facets = solr_response.facet_counts

        facets = dict()
        facets['year'] = sorted(solr_facets.get('facet_fields')['year'].items())
        facets['lccn'] = sorted(solr_facets.get('facet_fields')['lccn'].items(), lambda x, y: x - y, lambda k: k[1], True)
        facets['county'] = sorted(solr_facets.get('facet_fields')['county'].items(), lambda x, y: x - y, lambda k: k[1], True)
        facets['region'] = sorted(solr_facets.get('facet_fields')['region'].items(), lambda x, y: x - y, lambda k: k[1], True)
        facets['city'] = sorted(solr_facets.get('facet_fields')['city'].items(), lambda x, y: x - y, lambda k: k[1], True)
        facets['newspaper_type'] = sorted(solr_facets.get('facet_fields')['newspaper_type'].items(), lambda x, y: x - y, lambda k: k[1], True)

        pages = []

        for result in solr_response.results:
            page = models.Page.lookup(result['id'])
            if not page:
                continue
            words = set()
            coords = solr_response.highlighting[result['id']]
            # for ocr in self._ocr_list:
            for s in coords.get('ocr_vector') or []:
                words.update(find_words(s))
            page.words = sorted(words, key=lambda v: v.lower())

            page.highlight_url = self.highlight_url(page.url,
                                                    page.words,
                                                    number, len(pages))
            pages.append(page)

        solr_page = Page(pages, number, self)
        solr_page.facets = facets
        return solr_page
Exemplo n.º 4
0
    def page(self, number):
        """
        Override the page method in Paginator since Solr has already
        paginated stuff for us.
        """

        number = self.validate_number(number)

        # figure out the solr query and execute it
        solr = SolrConnection(settings.SOLR) # TODO: maybe keep connection around?
        start = self.per_page * (number - 1)
        params = {"hl.snippets": 100, # TODO: make this unlimited
            "hl.requireFieldMatch": 'true', # limits highlighting slop
            "hl.maxAnalyzedChars": '102400', # increased from default 51200
            }
        params.update(self.facet_params)
        sort_field, sort_order = _get_sort(self.query.get('sort'), in_pages=True)
        solr_response = solr.query(self._q,
                                   fields=['id', 'title', 'date', 'month', 'day',
                                           'sequence', 'edition_label', 
                                           'section_label'],
                                   highlight=self._ocr_list,
                                   rows=self.per_page,
                                   sort=sort_field,
                                   sort_order=sort_order,
                                   start=start,
                                   **params)
        solr_facets = solr_response.facet_counts
        # sort states by number of hits per state (desc)
        facets = {'state': sorted(solr_facets.get('facet_fields')['state'].items(),
                                  lambda x, y: x - y, lambda k: k[1], True),
                  'year': solr_facets['facet_ranges']['year']['counts'],
                  'county': sorted(solr_facets.get('facet_fields')['county'].items(),
                                  lambda x, y: x - y, lambda k: k[1], True)}
        # sort by year (desc)
        facets['year'] = sorted(solr_facets['facet_ranges']['year']['counts'].items(),
                                lambda x, y: int(x) - int(y), lambda k: k[0], True)
        facet_gap = self.facet_params['f_year_facet_range_gap']
        if facet_gap > 1:
            facets['year'] = [('%s-%d' % (y[0], int(y[0])+facet_gap-1), y[1]) 
                              for y in facets['year']]
        pages = []
        for result in solr_response.results:
            page = models.Page.lookup(result['id'])
            if not page:
                continue
            words = set()
            coords = solr_response.highlighting[result['id']]
            for ocr in self._ocr_list:
                for s in coords.get(ocr) or []:
                    words.update(find_words(s))
            page.words = sorted(words, key=lambda v: v.lower())

            page.highlight_url = self.highlight_url(page.url,
                                                    page.words,
                                                    number, len(pages))
            pages.append(page)

        solr_page = Page(pages, number, self)
        solr_page.facets = facets
        return solr_page
Exemplo n.º 5
0
    def page(self, number):
        """
        Override the page method in Paginator since Solr has already
        paginated stuff for us.
        """

        number = self.validate_number(number)

        # figure out the solr query and execute it
        start = self.per_page * (number - 1)
        params = {
            'fl':
            'id,title,date,month,day,sequence,edition_label,section_label',
            'hl': 'true',
            'hl.snippets': 100,  # TODO: make this unlimited
            'hl.requireFieldMatch': 'true',  # limits highlighting slop
            'hl.maxAnalyzedChars': '102400',  # increased from default 51200
            'hl.fl': ','.join(self._ocr_list),
            'rows': self.per_page,
            'start': start,
        }
        params.update(self.facet_params)

        sort_field, sort_order = _get_sort(self.query.get('sort'),
                                           in_pages=True)
        if sort_field and sort_order:
            params['sort'] = '%s %s' % (sort_field, sort_order)

        solr_response = conn().search(self._q, **params)

        # Gather facet data from the solr response
        solr_facets = solr_response.facets
        field_counts = solr_facets.get('facet_fields')
        facets = {
            'city': _sorted_facet_counts(field_counts, 'city'),
            'county': _sorted_facet_counts(field_counts, 'county'),
            'frequency': _sorted_facet_counts(field_counts, 'frequency'),
            'language': _sorted_facet_counts(field_counts, 'language'),
            'state': _sorted_facet_counts(field_counts, 'state'),
        }
        # sort by year (desc)
        facets['year'] = _sorted_facet_counts(
            solr_facets['facet_ranges']['year'], 'counts')
        facet_gap = self.facet_params['f.year.facet.range.gap']
        if facet_gap > 1:
            facets['year'] = [('%s-%d' % (y[0], int(y[0]) + facet_gap - 1),
                               y[1]) for y in facets['year']]
        pages = []
        for result in solr_response.docs:
            page = models.Page.lookup(result['id'])
            if not page:
                continue
            words = set()
            coords = solr_response.highlighting[result['id']]
            for ocr in self._ocr_list:
                for s in coords.get(ocr) or []:
                    words.update(find_words(s))
            page.words = sorted(words, key=lambda v: v.lower())

            page.highlight_url = self.highlight_url(page.url, page.words)
            pages.append(page)

        solr_page = Page(pages, number, self)
        solr_page.facets = facets
        return solr_page