示例#1
0
    def test_recent_helpful_votes(self):
        """Recent helpful votes are indexed properly."""
        # Create a document and verify it doesn't show up in a
        # query for recent_helpful_votes__gt=0.
        r = revision(is_approved=True, save=True)
        self.refresh()
        eq_(Document.search().filter(
            document_recent_helpful_votes__gt=0).count(), 0)

        # Add an unhelpful vote, it still shouldn't show up.
        helpful_vote(revision=r, helpful=False, save=True)
        r.document.save()  # Votes don't trigger a reindex.
        self.refresh()
        eq_(Document.search().filter(
            document_recent_helpful_votes__gt=0).count(), 0)

        # Add an helpful vote created 31 days ago, it still shouldn't show up.
        created = datetime.now() - timedelta(days=31)
        helpful_vote(revision=r, helpful=True, created=created, save=True)
        r.document.save()  # Votes don't trigger a reindex.
        self.refresh()
        eq_(Document.search().filter(
            document_recent_helpful_votes__gt=0).count(), 0)

        # Add an helpful vote created 29 days ago, it should show up now.
        created = datetime.now() - timedelta(days=29)
        helpful_vote(revision=r, helpful=True, created=created, save=True)
        r.document.save()  # Votes don't trigger a reindex.
        self.refresh()
        eq_(Document.search().filter(
            document_recent_helpful_votes__gt=0).count(), 1)
示例#2
0
文件: test_es.py 项目: ibai/kitsune
    def test_recent_helpful_votes(self):
        """Recent helpful votes are indexed properly."""
        # Create a document and verify it doesn't show up in a
        # query for recent_helpful_votes__gt=0.
        r = revision(is_approved=True, save=True)
        self.refresh()
        eq_(
            Document.search().filter(
                document_recent_helpful_votes__gt=0).count(), 0)

        # Add an unhelpful vote, it still shouldn't show up.
        helpful_vote(revision=r, helpful=False, save=True)
        r.document.save()  # Votes don't trigger a reindex.
        self.refresh()
        eq_(
            Document.search().filter(
                document_recent_helpful_votes__gt=0).count(), 0)

        # Add an helpful vote created 31 days ago, it still shouldn't show up.
        created = datetime.now() - timedelta(days=31)
        helpful_vote(revision=r, helpful=True, created=created, save=True)
        r.document.save()  # Votes don't trigger a reindex.
        self.refresh()
        eq_(
            Document.search().filter(
                document_recent_helpful_votes__gt=0).count(), 0)

        # Add an helpful vote created 29 days ago, it should show up now.
        created = datetime.now() - timedelta(days=29)
        helpful_vote(revision=r, helpful=True, created=created, save=True)
        r.document.save()  # Votes don't trigger a reindex.
        self.refresh()
        eq_(
            Document.search().filter(
                document_recent_helpful_votes__gt=0).count(), 1)
示例#3
0
    def test_add_and_delete(self):
        """Adding a doc should add it to the search index; deleting should
        delete it."""
        doc = document(save=True)
        revision(document=doc, is_approved=True, save=True)
        self.refresh()
        eq_(Document.search().count(), 1)

        doc.delete()
        self.refresh()
        eq_(Document.search().count(), 0)
示例#4
0
文件: test_es.py 项目: ibai/kitsune
    def test_add_and_delete(self):
        """Adding a doc should add it to the search index; deleting should
        delete it."""
        doc = document(save=True)
        revision(document=doc, is_approved=True, save=True)
        self.refresh()
        eq_(Document.search().count(), 1)

        doc.delete()
        self.refresh()
        eq_(Document.search().count(), 0)
示例#5
0
文件: test_es.py 项目: ibai/kitsune
 def test_wiki_no_revisions(self):
     """Don't index documents without approved revisions"""
     # Create a document with no revisions and make sure the
     # document is not in the index.
     doc = document(save=True)
     self.refresh()
     eq_(Document.search().count(), 0)
     # Create a revision that's not approved and make sure the
     # document is still not in the index.
     revision(document=doc, is_approved=False, save=True)
     self.refresh()
     eq_(Document.search().count(), 0)
示例#6
0
 def test_wiki_no_revisions(self):
     """Don't index documents without approved revisions"""
     # Create a document with no revisions and make sure the
     # document is not in the index.
     doc = document(save=True)
     self.refresh()
     eq_(Document.search().count(), 0)
     # Create a revision that's not approved and make sure the
     # document is still not in the index.
     revision(document=doc, is_approved=False, save=True)
     self.refresh()
     eq_(Document.search().count(), 0)
示例#7
0
    def test_wiki_redirects(self):
        """Make sure we don't index redirects"""
        # First create a revision that doesn't have a redirect and
        # make sure it's in the index.
        doc = document(title=u"wool hats")
        doc.save()
        revision(document=doc, is_approved=True, save=True)
        self.refresh()
        eq_(Document.search().query("wool").count(), 1)

        # Now create a revision that is a redirect and make sure the
        # document is removed from the index.
        revision(document=doc, content=REDIRECT_CONTENT, is_approved=True, save=True)
        self.refresh()
        eq_(Document.search().query("wool").count(), 0)
示例#8
0
    def test_wiki_keywords(self):
        """Make sure updating keywords updates the index."""
        # Create a document with a revision with no keywords. It
        # shouldn't show up with a document_keywords term query for
        # 'wool' since it has no keywords.
        doc = document(title=u'wool hats')
        doc.save()
        revision(document=doc, is_approved=True, save=True)
        self.refresh()
        eq_(Document.search().query(document_keywords='wool').count(), 0)

        revision(document=doc, is_approved=True, keywords='wool', save=True)
        self.refresh()

        eq_(Document.search().query(document_keywords='wool').count(), 1)
示例#9
0
文件: test_es.py 项目: ibai/kitsune
    def test_wiki_keywords(self):
        """Make sure updating keywords updates the index."""
        # Create a document with a revision with no keywords. It
        # shouldn't show up with a document_keywords term query for
        # 'wool' since it has no keywords.
        doc = document(title=u'wool hats')
        doc.save()
        revision(document=doc, is_approved=True, save=True)
        self.refresh()
        eq_(Document.search().query(document_keywords='wool').count(), 0)

        revision(document=doc, is_approved=True, keywords='wool', save=True)
        self.refresh()

        eq_(Document.search().query(document_keywords='wool').count(), 1)
示例#10
0
文件: test_es.py 项目: ibai/kitsune
    def test_wiki_redirects(self):
        """Make sure we don't index redirects"""
        # First create a revision that doesn't have a redirect and
        # make sure it's in the index.
        doc = document(title=u'wool hats')
        doc.save()
        revision(document=doc, is_approved=True, save=True)
        self.refresh()
        eq_(Document.search().query('wool').count(), 1)

        # Now create a revision that is a redirect and make sure the
        # document is removed from the index.
        revision(document=doc,
                 content=REDIRECT_CONTENT,
                 is_approved=True,
                 save=True)
        self.refresh()
        eq_(Document.search().query('wool').count(), 0)
示例#11
0
文件: facets.py 项目: ibai/kitsune
def _es_documents_for(locale, topics, products):
    """ES implementation of documents_for."""
    s = Document.search().values_dict(
        'id', 'document_title', 'url').filter(document_locale=locale)
    for topic in topics:
        s = s.filter(document_topic=topic.slug)
    for product in products or []:
        s = s.filter(document_product=product.slug)

    return list(s.order_by('-document_recent_helpful_votes')[:100])
示例#12
0
    def test_wiki_topics(self):
        """Make sure that adding topics to a Document causes it to
        refresh the index.

        """
        t = topic(slug=u"hiphop", save=True)
        eq_(Document.search().filter(document_topic=t.slug).count(), 0)
        doc = document(save=True)
        revision(document=doc, is_approved=True, save=True)
        self.refresh()
        eq_(Document.search().filter(document_topic=t.slug).count(), 0)
        doc.topics.add(t)
        self.refresh()
        eq_(Document.search().filter(document_topic=t.slug).count(), 1)
        doc.topics.clear()
        self.refresh()

        # Make sure the document itself is still there and that we didn't
        # accidentally delete it through screwed up signal handling:
        eq_(Document.search().filter().count(), 1)

        eq_(Document.search().filter(document_topic=t.slug).count(), 0)
示例#13
0
文件: test_es.py 项目: ibai/kitsune
    def test_wiki_products(self):
        """Make sure that adding products to a Document causes it to
        refresh the index.

        """
        p = product(slug=u'desktop', save=True)
        eq_(Document.search().filter(document_product=p.slug).count(), 0)
        doc = document(save=True)
        revision(document=doc, is_approved=True, save=True)
        self.refresh()
        eq_(Document.search().filter(document_product=p.slug).count(), 0)
        doc.products.add(p)
        self.refresh()
        eq_(Document.search().filter(document_product=p.slug).count(), 1)
        doc.products.remove(p)
        self.refresh()

        # Make sure the document itself is still there and that we didn't
        # accidentally delete it through screwed up signal handling:
        eq_(Document.search().filter().count(), 1)

        eq_(Document.search().filter(document_product=p.slug).count(), 0)
示例#14
0
文件: test_es.py 项目: ibai/kitsune
    def test_wiki_topics(self):
        """Make sure that adding topics to a Document causes it to
        refresh the index.

        """
        t = topic(slug=u'hiphop', save=True)
        eq_(Document.search().filter(document_topic=t.slug).count(), 0)
        doc = document(save=True)
        revision(document=doc, is_approved=True, save=True)
        self.refresh()
        eq_(Document.search().filter(document_topic=t.slug).count(), 0)
        doc.topics.add(t)
        self.refresh()
        eq_(Document.search().filter(document_topic=t.slug).count(), 1)
        doc.topics.clear()
        self.refresh()

        # Make sure the document itself is still there and that we didn't
        # accidentally delete it through screwed up signal handling:
        eq_(Document.search().filter().count(), 1)

        eq_(Document.search().filter(document_topic=t.slug).count(), 0)
示例#15
0
文件: facets.py 项目: icaaq/kitsune
def _es_documents_for(locale, topics, products=None):
    """ES implementation of documents_for."""
    s = (Document.search()
        .values_dict('id', 'document_title', 'url', 'document_parent_id')
        .filter(document_locale=locale, document_is_archived=False,
                document_category__in=settings.IA_DEFAULT_CATEGORIES))

    for topic in topics:
        s = s.filter(document_topic=topic.slug)
    for product in products or []:
        s = s.filter(document_product=product.slug)

    return list(s.order_by('-document_recent_helpful_votes')[:100])
示例#16
0
    def test_wiki_products(self):
        """Make sure that adding products to a Document causes it to
        refresh the index.

        """
        p = product(slug=u'desktop', save=True)
        eq_(Document.search().filter(product=p.slug).count(), 0)
        doc = document(save=True)
        revision(document=doc, is_approved=True, save=True)
        self.refresh()
        eq_(Document.search().filter(product=p.slug).count(), 0)
        doc.products.add(p)
        self.refresh()
        eq_(Document.search().filter(product=p.slug).count(), 1)
        doc.products.remove(p)
        self.refresh()

        # Make sure the document itself is still there and that we didn't
        # accidentally delete it through screwed up signal handling:
        eq_(Document.search().filter().count(), 1)

        eq_(Document.search().filter(product=p.slug).count(), 0)
示例#17
0
    def test_wiki_tags(self):
        """Make sure that adding tags to a Document causes it to
        refresh the index.

        """
        tag = u'hiphop'
        eq_(Document.search().filter(tag=tag).count(), 0)
        doc = document(save=True)
        revision(document=doc, is_approved=True, save=True)
        self.refresh()
        eq_(Document.search().filter(tag=tag).count(), 0)
        doc.tags.add(tag)
        self.refresh()
        eq_(Document.search().filter(tag=tag).count(), 1)
        doc.tags.remove(tag)
        self.refresh()

        # Make sure the document itself is still there and that we didn't
        # accidentally delete it through screwed up signal handling:
        eq_(Document.search().filter().count(), 1)

        eq_(Document.search().filter(tag=tag).count(), 0)
示例#18
0
def troubleshooting_view(request):
    # Build a list of the most recently indexed 50 wiki documents.
    last_50_indexed = list(_fix_value_dicts(Document.search().values_dict().order_by("-indexed_on")[:50]))

    last_50_reviewed = list(
        Document.uncached.filter(current_revision__is_approved=True).order_by("-current_revision__reviewed")[:50]
    )

    diff_list = diff_it_for_realz(last_50_indexed, last_50_reviewed)

    return render_to_response(
        "search/admin/troubleshooting.html",
        {"title": "Index Troubleshooting", "diffs": diff_list},
        RequestContext(request, {}),
    )
示例#19
0
文件: facets.py 项目: bituka/kitsune
def _es_documents_for(locale, topics=None, products=None):
    """ES implementation of documents_for."""
    s = (Document.search().values_dict(
        'id', 'document_title', 'url', 'document_parent_id',
        'document_summary').filter(
            document_locale=locale,
            document_is_archived=False,
            document_category__in=settings.IA_DEFAULT_CATEGORIES))

    for topic in topics or []:
        s = s.filter(topic=topic.slug)
    for product in products or []:
        s = s.filter(product=product.slug)

    return list(s.order_by('-document_recent_helpful_votes')[:100])
示例#20
0
文件: admin.py 项目: ibai/kitsune
def troubleshooting_view(request):
    # Build a list of the most recently indexed 50 wiki documents.
    last_50_indexed = list(
        _fix_value_dicts(
            Document.search().values_dict().order_by('-indexed_on')[:50]))

    last_50_reviewed = list(
        Document.uncached.filter(current_revision__is_approved=True).order_by(
            '-current_revision__reviewed')[:50])

    diff_list = diff_it_for_realz(last_50_indexed, last_50_reviewed)

    return render_to_response('search/admin/troubleshooting.html', {
        'title': 'Index Troubleshooting',
        'diffs': diff_list,
    }, RequestContext(request, {}))
示例#21
0
文件: facets.py 项目: Owen66/kitsune
def products_for(topics):
    """Returns a list of products that apply to passed in topics.

    :arg topics: a list of Topic instances
    """
    product_field = 'document_product'

    s = Document.search().values_dict('id')
    for topic in topics:
        s = s.filter(document_topic=topic.slug)
    s = s.facet(product_field, filtered=True)
    facet_counts = s.facet_counts()[product_field]

    products = Product.objects.filter(
        slug__in=[f['term'] for f in facet_counts]).filter(visible=True)

    return products
示例#22
0
def troubleshooting_view(request):
    # Build a list of the most recently indexed 50 wiki documents.
    last_50_indexed = _fix_value_dicts(Document.search()
                                               .values_dict()
                                               .order_by('-indexed_on')[:50])

    last_50_reviewed = (Document.uncached
                                .filter(current_revision__is_approved=True)
                                .order_by('-current_revision__reviewed')[:50])

    return render_to_response(
        'search/admin/troubleshooting.html',
        {'title': 'Index Troubleshooting',
         'last_50_indexed': last_50_indexed,
         'last_50_reviewed': last_50_reviewed
         },
        RequestContext(request, {}))
示例#23
0
文件: facets.py 项目: Owen66/kitsune
def topics_for(products):
    """Returns a list of topics that apply to passed in products.

    :arg topics: a list of Product instances
    """
    topic_field = 'document_topic'

    s = Document.search().values_dict('id')
    for product in products:
        s = s.filter(document_product=product.slug)
    s = s.facet(topic_field, filtered=True)
    facet_counts = s.facet_counts()[topic_field]

    topics = Topic.objects.filter(
        slug__in=[f['term'] for f in facet_counts]).filter(visible=True)

    return topics
示例#24
0
文件: admin.py 项目: LASarkar/kitsune
def troubleshooting_view(request):
    # Build a list of the most recently indexed 50 wiki documents.
    last_50_indexed = list(_fix_value_dicts(Document.search()
                                            .values_dict()
                                            .order_by('-indexed_on')[:50]))

    last_50_reviewed = list(Document.uncached
                            .filter(current_revision__is_approved=True)
                            .order_by('-current_revision__reviewed')[:50])

    diff_list = diff_it_for_realz(last_50_indexed, last_50_reviewed)

    return render(
        request,
        'admin/search_troubleshooting.html',
        {'title': 'Index Troubleshooting',
         'diffs': diff_list,
         })
示例#25
0
def documents_for(locale, topics, products=None):
    """Returns a list of articles that apply to passed in topics and products.

    :arg locale: the locale
    :arg topics: a list of Topic instances
    :arg products: (optional) a list of Product instances

    The articles are returned as a list of dicts with the following keys:
        id
        document_title
        url
    """
    s = Document.search().values_dict('id', 'document_title', 'url')
    for topic in topics:
        s = s.filter(document_topic=topic.slug)
    for product in products or []:
        s = s.filter(document_product=product.slug)

    return list(s)
示例#26
0
文件: views.py 项目: bituka/kitsune
def suggestions(request):
    """A simple search view that returns OpenSearch suggestions."""
    mimetype = 'application/x-suggestions+json'

    term = request.GET.get('q')
    if not term:
        return HttpResponseBadRequest(mimetype=mimetype)

    site = Site.objects.get_current()
    locale = locale_or_default(request.LANGUAGE_CODE)
    try:
        query = dict(('%s__text' % field, term)
                     for field in Document.get_query_fields())
        wiki_s = (Document.search()
                  .filter(document_is_archived=False)
                  .filter(document_locale=locale)
                  .values_dict('document_title', 'url')
                  .query(or_=query)[:5])

        query = dict(('%s__text' % field, term)
                     for field in Question.get_query_fields())
        question_s = (Question.search()
                      .filter(question_has_helpful=True)
                      .values_dict('question_title', 'url')
                      .query(or_=query)[:5])

        results = list(chain(question_s, wiki_s))
    except (ESTimeoutError, ESMaxRetryError, ESException):
        # If we have ES problems, we just send back an empty result
        # set.
        results = []

    urlize = lambda r: u'https://%s%s' % (site, r['url'])
    titleize = lambda r: (r['document_title'] if 'document_title' in r
                          else r['question_title'])
    data = [term,
            [titleize(r) for r in results],
            [],
            [urlize(r) for r in results]]
    return HttpResponse(json.dumps(data), mimetype=mimetype)
示例#27
0
def suggestions(request):
    """A simple search view that returns OpenSearch suggestions."""
    mimetype = 'application/x-suggestions+json'

    term = request.GET.get('q')
    if not term:
        return HttpResponseBadRequest(mimetype=mimetype)

    site = Site.objects.get_current()
    locale = locale_or_default(request.locale)
    try:
        query = dict(('%s__text' % field, term)
                     for field in Document.get_query_fields())
        wiki_s = (Document.search()
                  .filter(document_is_archived=False)
                  .filter(document_locale=locale)
                  .values_dict('document_title', 'url')
                  .query(or_=query)[:5])

        query = dict(('%s__text' % field, term)
                     for field in Question.get_query_fields())
        question_s = (Question.search()
                      .filter(question_has_helpful=True)
                      .values_dict('question_title', 'url')
                      .query(or_=query)[:5])

        results = list(chain(question_s, wiki_s))
    except (ESTimeoutError, ESMaxRetryError, ESException):
        # If we have ES problems, we just send back an empty result
        # set.
        results = []

    urlize = lambda r: u'https://%s%s' % (site, r['url'])
    titleize = lambda r: (r['document_title'] if 'document_title' in r
                          else r['question_title'])
    data = [term,
            [titleize(r) for r in results],
            [],
            [urlize(r) for r in results]]
    return HttpResponse(json.dumps(data), mimetype=mimetype)
示例#28
0
def _search_suggestions(request, text, locale, tags, product_slugs):
    """Return an iterable of the most relevant wiki pages and questions.

    :arg text: full text to search on
    :arg locale: locale to limit to
    :arg tags: list of tags to filter questions on
    :arg product_slugs: list of product slugs to filter articles on
        (["desktop", "mobile", ...])

    Items are dicts of::

        {
            'type':
            'search_summary':
            'title':
            'url':
            'object':
        }

    :returns: up to 3 wiki pages, then up to 3 questions.

    """
    # TODO: this can be reworked to pull data from ES rather than
    # hit the db.
    question_s = Question.search()
    wiki_s = Document.search()

    # Max number of search results per type.
    WIKI_RESULTS = QUESTIONS_RESULTS = 3
    default_categories = settings.SEARCH_DEFAULT_CATEGORIES

    # Apply product filters
    if product_slugs:
        wiki_s = wiki_s.filter(document_product__in=product_slugs)
    if tags:
        question_s = question_s.filter(question_tag__in=tags)

    results = []
    try:
        query = dict(('%s__text' % field, text)
                      for field in Document.get_query_fields())
        raw_results = (
            wiki_s.filter(document_locale=locale,
                          document_category__in=default_categories)
                  .query(or_=query)
                  .values_dict('id')[:WIKI_RESULTS])
        for r in raw_results:
            try:
                doc = (Document.objects.select_related('current_revision')
                                       .get(pk=r['id']))
                results.append({
                    'search_summary': clean_excerpt(
                            doc.current_revision.summary),
                    'url': doc.get_absolute_url(),
                    'title': doc.title,
                    'type': 'document',
                    'object': doc,
                })
            except Document.DoesNotExist:
                pass

        # Note: Questions app is en-US only.
        query = dict(('%s__text' % field, text)
                      for field in Question.get_query_fields())
        raw_results = (question_s.query(or_=query)
                                 .values_dict('id')[:QUESTIONS_RESULTS])
        for r in raw_results:
            try:
                q = Question.objects.get(pk=r['id'])
                results.append({
                    'search_summary': clean_excerpt(q.content[0:500]),
                    'url': q.get_absolute_url(),
                    'title': q.title,
                    'type': 'question',
                    'object': q,
                    'is_solved': q.is_solved,
                    'num_answers': q.num_answers,
                    'num_votes': q.num_votes,
                    'num_votes_past_week': q.num_votes_past_week
                })
            except Question.DoesNotExist:
                pass

    except (ESTimeoutError, ESMaxRetryError, ESException) as exc:
        if isinstance(exc, ESTimeoutError):
            statsd.incr('questions.suggestions.timeouterror')
        elif isinstance(exc, ESMaxRetryError):
            statsd.incr('questions.suggestions.maxretryerror')
        elif isinstance(exc, ESException):
            statsd.incr('questions.suggestions.elasticsearchexception')
        log.debug(exc)

    return results
示例#29
0
文件: views.py 项目: ibai/kitsune
def _search_suggestions(request, query, locale, tags, product_slugs):
    """Return an iterable of the most relevant wiki pages and questions.

    query -- full text to search on
    locale -- locale to limit to
    tags -- list of tags to filter questions on
    product_slugs -- list of product slugs to filter articles on
        (["desktop", "mobile", ...])

    Items are dicts of:
        {
            'type':
            'search_summary':
            'title':
            'url':
            'object':
        }

    Returns up to 3 wiki pages, then up to 3 questions.

    """
    # TODO: this can be reworked to pull data from ES rather than
    # hit the db.
    question_s = Question.search()
    wiki_s = Document.search()

    # Max number of search results per type.
    WIKI_RESULTS = QUESTIONS_RESULTS = 3
    default_categories = settings.SEARCH_DEFAULT_CATEGORIES

    # Apply product filters
    if product_slugs:
        wiki_s = wiki_s.filter(document_product__in=product_slugs)
    if tags:
        question_s = question_s.filter(question_tag__in=tags)

    try:
        raw_results = (wiki_s.filter(
            document_locale=locale,
            document_category__in=default_categories).query(query).values_dict(
                'id')[:WIKI_RESULTS])

        results = []
        for r in raw_results:
            try:
                doc = (Document.objects.select_related('current_revision').get(
                    pk=r['id']))
                results.append({
                    'search_summary':
                    clean_excerpt(doc.current_revision.summary),
                    'url':
                    doc.get_absolute_url(),
                    'title':
                    doc.title,
                    'type':
                    'document',
                    'object':
                    doc,
                })
            except Document.DoesNotExist:
                pass

        # Note: Questions app is en-US only.
        raw_results = (
            question_s.query(query).values_dict('id')[:QUESTIONS_RESULTS])

        for r in raw_results:
            try:
                q = Question.objects.get(pk=r['id'])
                results.append({
                    'search_summary':
                    clean_excerpt(q.content[0:500]),
                    'url':
                    q.get_absolute_url(),
                    'title':
                    q.title,
                    'type':
                    'question',
                    'object':
                    q,
                    'is_solved':
                    q.is_solved,
                    'num_answers':
                    q.num_answers,
                    'num_votes':
                    q.num_votes,
                    'num_votes_past_week':
                    q.num_votes_past_week
                })
            except Question.DoesNotExist:
                pass

    except (ESTimeoutError, ESMaxRetryError, ESException), exc:
        if isinstance(exc, ESTimeoutError):
            statsd.incr('questions.suggestions.timeouterror')
        elif isinstance(exc, ESMaxRetryError):
            statsd.incr('questions.suggestions.maxretryerror')
        elif isinstance(exc, ESException):
            statsd.incr('questions.suggestions.elasticsearchexception')

        return []
示例#30
0
def _search_suggestions(request, query, locale, tags, product_slugs):
    """Return an iterable of the most relevant wiki pages and questions.

    query -- full text to search on
    locale -- locale to limit to
    tags -- list of tags to filter questions on
    product_slugs -- list of product slugs to filter articles on
        (["desktop", "mobile", ...])

    Items are dicts of:
        {
            'type':
            'search_summary':
            'title':
            'url':
            'object':
        }

    Returns up to 3 wiki pages, then up to 3 questions.

    """
    # TODO: this can be reworked to pull data from ES rather than
    # hit the db.
    question_s = Question.search()
    wiki_s = Document.search()

    # Max number of search results per type.
    WIKI_RESULTS = QUESTIONS_RESULTS = 3
    default_categories = settings.SEARCH_DEFAULT_CATEGORIES

    # Apply product filters
    if product_slugs:
        wiki_s = wiki_s.filter(document_product__in=product_slugs)
    if tags:
        question_s = question_s.filter(question_tag__in=tags)

    try:
        raw_results = (
            wiki_s.filter(document_locale=locale, document_category__in=default_categories)
            .query(query)
            .values_dict("id")[:WIKI_RESULTS]
        )

        results = []
        for r in raw_results:
            try:
                doc = Document.objects.select_related("current_revision").get(pk=r["id"])
                results.append(
                    {
                        "search_summary": clean_excerpt(doc.current_revision.summary),
                        "url": doc.get_absolute_url(),
                        "title": doc.title,
                        "type": "document",
                        "object": doc,
                    }
                )
            except Document.DoesNotExist:
                pass

        # Note: Questions app is en-US only.
        raw_results = question_s.query(query).values_dict("id")[:QUESTIONS_RESULTS]

        for r in raw_results:
            try:
                q = Question.objects.get(pk=r["id"])
                results.append(
                    {
                        "search_summary": clean_excerpt(q.content[0:500]),
                        "url": q.get_absolute_url(),
                        "title": q.title,
                        "type": "question",
                        "object": q,
                        "is_solved": q.is_solved,
                        "num_answers": q.num_answers,
                        "num_votes": q.num_votes,
                        "num_votes_past_week": q.num_votes_past_week,
                    }
                )
            except Question.DoesNotExist:
                pass

    except (ESTimeoutError, ESMaxRetryError, ESException), exc:
        if isinstance(exc, ESTimeoutError):
            statsd.incr("questions.suggestions.timeouterror")
        elif isinstance(exc, ESMaxRetryError):
            statsd.incr("questions.suggestions.maxretryerror")
        elif isinstance(exc, ESException):
            statsd.incr("questions.suggestions.elasticsearchexception")

        return []
示例#31
0
def _search_suggestions_es(request, query, locale, category_tags):
    """See _search_suggestions

    """
    # TODO: this can be reworked to pull data from ES rather than
    # hit the db.
    engine = 'elastic'
    question_s = Question.search()
    wiki_s = Document.search()

    # Max number of search results per type.
    WIKI_RESULTS = QUESTIONS_RESULTS = 3
    default_categories = settings.SEARCH_DEFAULT_CATEGORIES

    # Apply category filters
    if category_tags:
        question_s = question_s.filter(question_tag__in=category_tags)
        wiki_s = wiki_s.filter(document_tag__in=category_tags)

    try:
        raw_results = (
            wiki_s.filter(document_locale=locale,
                          document_category__in=default_categories)
                  .query(query)
                  .values_dict('id')[:WIKI_RESULTS])

        results = []
        for r in raw_results:
            try:
                doc = (Document.objects.select_related('current_revision')
                                       .get(pk=r['id']))
                results.append({
                    'search_summary': clean_excerpt(
                            doc.current_revision.summary),
                    'url': doc.get_absolute_url(),
                    'title': doc.title,
                    'type': 'document',
                    'object': doc,
                })
            except Document.DoesNotExist:
                pass

        # Note: Questions app is en-US only.
        raw_results = (question_s.query(query)
                                 .values_dict('id')[:QUESTIONS_RESULTS])

        for r in raw_results:
            try:
                q = Question.objects.get(pk=r['id'])
                results.append({
                    'search_summary': clean_excerpt(q.content[0:500]),
                    'url': q.get_absolute_url(),
                    'title': q.title,
                    'type': 'question',
                    'object': q,
                    'is_solved': q.is_solved,
                    'num_answers': q.num_answers,
                    'num_votes': q.num_votes,
                    'num_votes_past_week': q.num_votes_past_week
                })
            except Question.DoesNotExist:
                pass

    except (SearchError, ESTimeoutError, ESMaxRetryError, ESException), exc:
        if isinstance(exc, SearchError):
            statsd.incr('questions.suggestions.%s.searcherror' % engine)
        elif isinstance(exc, ESTimeoutError):
            statsd.incr('questions.suggestions.%s.timeouterror' % engine)
        elif isinstance(exc, ESMaxRetryError):
            statsd.incr('questions.suggestions.%s.maxretryerror' % engine)
        elif isinstance(exc, ESException):
            statsd.incr('questions.suggestions.%s.elasticsearchexception' %
                        engine)

        return []
示例#32
0
def _search_suggestions(request, query, locale, category_tags):
    """Return an iterable of the most relevant wiki pages and questions.

    query -- full text to search on
    locale -- locale to limit to

    Items are dicts of:
        {
            'type':
            'search_summary':
            'title':
            'url':
            'object':
        }

    Returns up to 3 wiki pages, then up to 3 questions.

    """
    if waffle.flag_is_active(request, 'elasticsearch'):
        engine = 'elastic'
        question_s = Question.search()
        wiki_s = Document.search()
    else:
        engine = 'sphinx'
        question_s = question_searcher(request)
        wiki_s = wiki_searcher(request)

    # Max number of search results per type.
    WIKI_RESULTS = QUESTIONS_RESULTS = 3

    # Apply category filters
    if category_tags:
        question_s = question_s.filter(tag__in=category_tags)
        wiki_s = wiki_s.filter(tag__in=category_tags)

    try:
        raw_results = (
            wiki_s.filter(locale=locale,
                          category__in=settings.SEARCH_DEFAULT_CATEGORIES)
                  .query(query)
                  .values_dict('id')[:WIKI_RESULTS])

        results = []
        for r in raw_results:
            try:
                doc = (Document.objects.select_related('current_revision')
                                       .get(pk=r['id']))
                results.append({
                    'search_summary': clean_excerpt(
                            doc.current_revision.summary),
                    'url': doc.get_absolute_url(),
                    'title': doc.title,
                    'type': 'document',
                    'object': doc,
                })
            except Document.DoesNotExist:
                pass

        # Note: Questions app is en-US only.
        raw_results = (question_s.query(query)
                                 .values_dict('id')[:QUESTIONS_RESULTS])

        for r in raw_results:
            try:
                q = Question.objects.get(pk=r['id'])
                results.append({
                    'search_summary': clean_excerpt(q.content[0:500]),
                    'url': q.get_absolute_url(),
                    'title': q.title,
                    'type': 'question',
                    'object': q
                })
            except Question.DoesNotExist:
                pass

    except (SearchError, ESTimeoutError, ESMaxRetryError, ESException), exc:
        if isinstance(exc, SearchError):
            statsd.incr('questions.suggestions.%s.searcherror' % engine)
        elif isinstance(exc, ESTimeoutError):
            statsd.incr('questions.suggestions.%s.timeouterror' % engine)
        elif isinstance(exc, ESMaxRetryError):
            statsd.incr('questions.suggestions.%s.maxretryerror' % engine)
        elif isinstance(exc, ESException):
            statsd.incr('questions.suggestions.%s.elasticsearchexception' %
                        engine)

        return []
示例#33
0
文件: views.py 项目: klrmn/kitsune
def search(request, template=None):
    """ES-specific search view"""

    if (waffle.flag_is_active(request, 'esunified') or
        request.GET.get('esunified')):
        return search_with_es_unified(request, template)

    start = time.time()

    # JSON-specific variables
    is_json = (request.GET.get('format') == 'json')
    callback = request.GET.get('callback', '').strip()
    mimetype = 'application/x-javascript' if callback else 'application/json'

    # Search "Expires" header format
    expires_fmt = '%A, %d %B %Y %H:%M:%S GMT'

    # Check callback is valid
    if is_json and callback and not jsonp_is_valid(callback):
        return HttpResponse(
            json.dumps({'error': _('Invalid callback function.')}),
            mimetype=mimetype, status=400)

    language = locale_or_default(request.GET.get('language', request.locale))
    r = request.GET.copy()
    a = request.GET.get('a', '0')

    # Search default values
    try:
        category = (map(int, r.getlist('category')) or
                    settings.SEARCH_DEFAULT_CATEGORIES)
    except ValueError:
        category = settings.SEARCH_DEFAULT_CATEGORIES
    r.setlist('category', category)

    # Basic form
    if a == '0':
        r['w'] = r.get('w', constants.WHERE_BASIC)
    # Advanced form
    if a == '2':
        r['language'] = language
        r['a'] = '1'

    # TODO: Rewrite so SearchForm is unbound initially and we can use
    # `initial` on the form fields.
    if 'include_archived' not in r:
        r['include_archived'] = False

    search_form = SearchForm(r)

    if not search_form.is_valid() or a == '2':
        if is_json:
            return HttpResponse(
                json.dumps({'error': _('Invalid search data.')}),
                mimetype=mimetype,
                status=400)

        t = template if request.MOBILE else 'search/form.html'
        search_ = jingo.render(request, t,
                               {'advanced': a, 'request': request,
                                'search_form': search_form})
        search_['Cache-Control'] = 'max-age=%s' % \
                                   (settings.SEARCH_CACHE_PERIOD * 60)
        search_['Expires'] = (datetime.utcnow() +
                              timedelta(
                                minutes=settings.SEARCH_CACHE_PERIOD)) \
                              .strftime(expires_fmt)
        return search_

    cleaned = search_form.cleaned_data

    page = max(smart_int(request.GET.get('page')), 1)
    offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE

    lang = language.lower()
    if settings.LANGUAGES.get(lang):
        lang_name = settings.LANGUAGES[lang]
    else:
        lang_name = ''

    wiki_s = Document.search()
    question_s = Question.search()
    discussion_s = Thread.search()

    # wiki filters
    # Category filter
    if cleaned['category']:
        wiki_s = wiki_s.filter(document_category__in=cleaned['category'])

    # Locale filter
    wiki_s = wiki_s.filter(document_locale=language)

    # Product filter
    products = cleaned['product']
    for p in products:
        wiki_s = wiki_s.filter(document_tag=p)

    # Tags filter
    tags = [t.strip() for t in cleaned['tags'].split()]
    for t in tags:
        wiki_s = wiki_s.filter(document_tag=t)

    # Archived bit
    if a == '0' and not cleaned['include_archived']:
        # Default to NO for basic search:
        cleaned['include_archived'] = False
    if not cleaned['include_archived']:
        wiki_s = wiki_s.filter(document_is_archived=False)
    # End of wiki filters

    # Support questions specific filters
    if cleaned['w'] & constants.WHERE_SUPPORT:

        # Solved is set by default if using basic search
        if a == '0' and not cleaned['has_helpful']:
            cleaned['has_helpful'] = constants.TERNARY_YES

        # These filters are ternary, they can be either YES, NO, or OFF
        ternary_filters = ('is_locked', 'is_solved', 'has_answers',
                           'has_helpful')
        d = dict(('question_%s' % filter_name,
                  _ternary_filter(cleaned[filter_name]))
                 for filter_name in ternary_filters if cleaned[filter_name])
        if d:
            question_s = question_s.filter(**d)

        if cleaned['asked_by']:
            question_s = question_s.filter(
                question_creator=cleaned['asked_by'])

        if cleaned['answered_by']:
            question_s = question_s.filter(
                question_answer_creator=cleaned['answered_by'])

        q_tags = [t.strip() for t in cleaned['q_tags'].split(',')]
        for t in q_tags:
            if t:
                question_s = question_s.filter(question_tag=t)

    # Discussion forum specific filters
    if cleaned['w'] & constants.WHERE_DISCUSSION:
        if cleaned['author']:
            discussion_s = discussion_s.filter(
                post_author_ord=cleaned['author'])

        if cleaned['thread_type']:
            if constants.DISCUSSION_STICKY in cleaned['thread_type']:
                discussion_s = discussion_s.filter(post_is_sticky=1)

            if constants.DISCUSSION_LOCKED in cleaned['thread_type']:
                discussion_s = discussion_s.filter(post_is_locked=1)

        if cleaned['forum']:
            discussion_s = discussion_s.filter(
                post_forum_id__in=cleaned['forum'])

    # Filters common to support and discussion forums
    # Created filter
    unix_now = int(time.time())
    interval_filters = (
        ('created', cleaned['created'], cleaned['created_date']),
        ('updated', cleaned['updated'], cleaned['updated_date']))
    for filter_name, filter_option, filter_date in interval_filters:
        if filter_option == constants.INTERVAL_BEFORE:
            before = {filter_name + '__gte': 0,
                      filter_name + '__lte': max(filter_date, 0)}

            discussion_s = discussion_s.filter(**before)
            question_s = question_s.filter(**before)
        elif filter_option == constants.INTERVAL_AFTER:
            after = {filter_name + '__gte': min(filter_date, unix_now),
                     filter_name + '__lte': unix_now}

            discussion_s = discussion_s.filter(**after)
            question_s = question_s.filter(**after)

    # Note: num_voted (with a d) is a different field than num_votes
    # (with an s). The former is a dropdown and the latter is an
    # integer value.
    if cleaned['num_voted'] == constants.INTERVAL_BEFORE:
        question_s = question_s.filter(
            question_num_votes__lte=max(cleaned['num_votes'], 0))
    elif cleaned['num_voted'] == constants.INTERVAL_AFTER:
        question_s = question_s.filter(
            question_num_votes__gte=cleaned['num_votes'])

    # Done with all the filtery stuff--time  to generate results

    documents = ComposedList()
    sortby = smart_int(request.GET.get('sortby'))
    try:
        max_results = settings.SEARCH_MAX_RESULTS
        cleaned_q = cleaned['q']

        if cleaned['w'] & constants.WHERE_WIKI:
            if cleaned_q:
                wiki_s = wiki_s.query(cleaned_q)

            # For a front-page non-advanced search, we want to cap the kb
            # at 10 results.
            if a == '0':
                wiki_max_results = 10
            else:
                wiki_max_results = max_results
            documents.set_count(('wiki', wiki_s),
                                min(wiki_s.count(), wiki_max_results))

        if cleaned['w'] & constants.WHERE_SUPPORT:
            # Sort results by
            try:
                question_s = question_s.order_by(
                    *constants.SORT_QUESTIONS[sortby])
            except IndexError:
                pass

            question_s = question_s.highlight(
                'question_title', 'question_content',
                'question_answer_content',
                before_match='<b>',
                after_match='</b>',
                limit=settings.SEARCH_SUMMARY_LENGTH)

            if cleaned_q:
                question_s = question_s.query(cleaned_q)
            documents.set_count(('question', question_s),
                                min(question_s.count(), max_results))

        if cleaned['w'] & constants.WHERE_DISCUSSION:
            discussion_s = discussion_s.highlight(
                'discussion_content',
                before_match='<b>',
                after_match='</b>',
                limit=settings.SEARCH_SUMMARY_LENGTH)

            if cleaned_q:
                discussion_s = discussion_s.query(cleaned_q)
            documents.set_count(('forum', discussion_s),
                                min(discussion_s.count(), max_results))

        results_per_page = settings.SEARCH_RESULTS_PER_PAGE
        pages = paginate(request, documents, results_per_page)
        num_results = len(documents)

        # Get the documents we want to show and add them to
        # docs_for_page.
        documents = documents[offset:offset + results_per_page]
        docs_for_page = []
        for (kind, search_s), bounds in documents:
            search_s = search_s.values_dict()[bounds[0]:bounds[1]]
            docs_for_page += [(kind, doc) for doc in search_s]

        results = []
        for i, docinfo in enumerate(docs_for_page):
            rank = i + offset
            # Type here is something like 'wiki', ... while doc here
            # is an ES result document.
            type_, doc = docinfo

            if type_ == 'wiki':
                summary = doc['document_summary']
                result = {
                    'url': doc['url'],
                    'title': doc['document_title'],
                    'type': 'document',
                    'object': ObjectDict(doc)}
            elif type_ == 'question':
                summary = _build_es_excerpt(doc)
                result = {
                    'url': doc['url'],
                    'title': doc['question_title'],
                    'type': 'question',
                    'object': ObjectDict(doc),
                    'is_solved': doc['question_is_solved'],
                    'num_answers': doc['question_num_answers'],
                    'num_votes': doc['question_num_votes'],
                    'num_votes_past_week': doc['question_num_votes_past_week']}
            else:
                summary = _build_es_excerpt(doc)
                result = {
                    'url': doc['url'],
                    'title': doc['post_title'],
                    'type': 'thread',
                    'object': ObjectDict(doc)}
            result['search_summary'] = summary
            result['rank'] = rank
            result['score'] = doc._score
            results.append(result)

    except (ESTimeoutError, ESMaxRetryError, ESException), exc:
        # Handle timeout and all those other transient errors with a
        # "Search Unavailable" rather than a Django error page.
        if is_json:
            return HttpResponse(json.dumps({'error':
                                             _('Search Unavailable')}),
                                mimetype=mimetype, status=503)

        if isinstance(exc, ESTimeoutError):
            statsd.incr('search.es.timeouterror')
        elif isinstance(exc, ESMaxRetryError):
            statsd.incr('search.es.maxretryerror')
        elif isinstance(exc, ESException):
            statsd.incr('search.es.elasticsearchexception')

        t = 'search/mobile/down.html' if request.MOBILE else 'search/down.html'
        return jingo.render(request, t, {'q': cleaned['q']}, status=503)
示例#34
0
def search_with_es(request, template=None):
    """ES-specific search view"""

    engine = "elastic"

    # Time ES and Sphinx separate. See bug 723930.
    # TODO: Remove this once Sphinx is gone.
    start = time.time()

    # JSON-specific variables
    is_json = request.GET.get("format") == "json"
    callback = request.GET.get("callback", "").strip()
    mimetype = "application/x-javascript" if callback else "application/json"

    # Search "Expires" header format
    expires_fmt = "%A, %d %B %Y %H:%M:%S GMT"

    # Check callback is valid
    if is_json and callback and not jsonp_is_valid(callback):
        return HttpResponse(json.dumps({"error": _("Invalid callback function.")}), mimetype=mimetype, status=400)

    language = locale_or_default(request.GET.get("language", request.locale))
    r = request.GET.copy()
    a = request.GET.get("a", "0")

    # Search default values
    try:
        category = map(int, r.getlist("category")) or settings.SEARCH_DEFAULT_CATEGORIES
    except ValueError:
        category = settings.SEARCH_DEFAULT_CATEGORIES
    r.setlist("category", category)

    # Basic form
    if a == "0":
        r["w"] = r.get("w", constants.WHERE_BASIC)
    # Advanced form
    if a == "2":
        r["language"] = language
        r["a"] = "1"

    # TODO: Rewrite so SearchForm is unbound initially and we can use
    # `initial` on the form fields.
    if "include_archived" not in r:
        r["include_archived"] = False

    search_form = SearchForm(r)

    if not search_form.is_valid() or a == "2":
        if is_json:
            return HttpResponse(json.dumps({"error": _("Invalid search data.")}), mimetype=mimetype, status=400)

        t = template if request.MOBILE else "search/form.html"
        search_ = jingo.render(request, t, {"advanced": a, "request": request, "search_form": search_form})
        search_["Cache-Control"] = "max-age=%s" % (settings.SEARCH_CACHE_PERIOD * 60)
        search_["Expires"] = (datetime.utcnow() + timedelta(minutes=settings.SEARCH_CACHE_PERIOD)).strftime(expires_fmt)
        return search_

    cleaned = search_form.cleaned_data

    page = max(smart_int(request.GET.get("page")), 1)
    offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE

    # TODO: This is fishy--why does it have to be coded this way?
    # get language name for display in template
    lang = language.lower()
    if settings.LANGUAGES.get(lang):
        lang_name = settings.LANGUAGES[lang]
    else:
        lang_name = ""

    wiki_s = Document.search()
    question_s = Question.search()
    discussion_s = Thread.search()

    # wiki filters
    # Category filter
    if cleaned["category"]:
        wiki_s = wiki_s.filter(document_category__in=cleaned["category"])

    # Locale filter
    wiki_s = wiki_s.filter(document_locale=language)

    # Product filter
    products = cleaned["product"]
    for p in products:
        wiki_s = wiki_s.filter(document_tag=p)

    # Tags filter
    tags = [t.strip() for t in cleaned["tags"].split()]
    for t in tags:
        wiki_s = wiki_s.filter(document_tag=t)

    # Archived bit
    if a == "0" and not cleaned["include_archived"]:
        # Default to NO for basic search:
        cleaned["include_archived"] = False
    if not cleaned["include_archived"]:
        wiki_s = wiki_s.filter(document_is_archived=False)
    # End of wiki filters

    # Support questions specific filters
    if cleaned["w"] & constants.WHERE_SUPPORT:

        # Solved is set by default if using basic search
        if a == "0" and not cleaned["has_helpful"]:
            cleaned["has_helpful"] = constants.TERNARY_YES

        # These filters are ternary, they can be either YES, NO, or OFF
        ternary_filters = ("is_locked", "is_solved", "has_answers", "has_helpful")
        d = dict(
            ("question_%s" % filter_name, _ternary_filter(cleaned[filter_name]))
            for filter_name in ternary_filters
            if cleaned[filter_name]
        )
        if d:
            question_s = question_s.filter(**d)

        if cleaned["asked_by"]:
            question_s = question_s.filter(question_creator=cleaned["asked_by"])

        if cleaned["answered_by"]:
            question_s = question_s.filter(question_answer_creator=cleaned["answered_by"])

        q_tags = [t.strip() for t in cleaned["q_tags"].split()]
        for t in q_tags:
            question_s = question_s.filter(question_tag=t)

    # Discussion forum specific filters
    if cleaned["w"] & constants.WHERE_DISCUSSION:
        if cleaned["author"]:
            discussion_s = discussion_s.filter(post_author_ord=cleaned["author"])

        if cleaned["thread_type"]:
            if constants.DISCUSSION_STICKY in cleaned["thread_type"]:
                discussion_s = discussion_s.filter(post_is_sticky=1)

            if constants.DISCUSSION_LOCKED in cleaned["thread_type"]:
                discussion_s = discussion_s.filter(post_is_locked=1)

        if cleaned["forum"]:
            discussion_s = discussion_s.filter(post_forum_id__in=cleaned["forum"])

    # Filters common to support and discussion forums
    # Created filter
    unix_now = int(time.time())
    interval_filters = (
        ("created", cleaned["created"], cleaned["created_date"]),
        ("updated", cleaned["updated"], cleaned["updated_date"]),
    )
    for filter_name, filter_option, filter_date in interval_filters:
        if filter_option == constants.INTERVAL_BEFORE:
            before = {filter_name + "__gte": 0, filter_name + "__lte": max(filter_date, 0)}

            discussion_s = discussion_s.filter(**before)
            question_s = question_s.filter(**before)
        elif filter_option == constants.INTERVAL_AFTER:
            after = {filter_name + "__gte": min(filter_date, unix_now), filter_name + "__lte": unix_now}

            discussion_s = discussion_s.filter(**after)
            question_s = question_s.filter(**after)

    # Note: num_voted (with a d) is a different field than num_votes
    # (with an s). The former is a dropdown and the latter is an
    # integer value.
    if cleaned["num_voted"] == constants.INTERVAL_BEFORE:
        question_s.filter(question_num_votes__lte=max(cleaned["num_votes"], 0))
    elif cleaned["num_voted"] == constants.INTERVAL_AFTER:
        question_s.filter(question_num_votes__gte=cleaned["num_votes"])

    # Done with all the filtery stuff--time  to generate results

    documents = ComposedList()
    sortby = smart_int(request.GET.get("sortby"))
    try:
        max_results = settings.SEARCH_MAX_RESULTS
        cleaned_q = cleaned["q"]

        if cleaned["w"] & constants.WHERE_WIKI:
            if cleaned_q:
                wiki_s = wiki_s.query(cleaned_q)

            # For a front-page non-advanced search, we want to cap the kb
            # at 10 results.
            if a == "0":
                wiki_max_results = 10
            else:
                wiki_max_results = max_results
            documents.set_count(("wiki", wiki_s), min(wiki_s.count(), wiki_max_results))

        if cleaned["w"] & constants.WHERE_SUPPORT:
            # Sort results by
            try:
                question_s = question_s.order_by(*constants.SORT_QUESTIONS[sortby])
            except IndexError:
                pass

            question_s = question_s.highlight(
                "question_title",
                "question_content",
                "question_answer_content",
                before_match="<b>",
                after_match="</b>",
                limit=settings.SEARCH_SUMMARY_LENGTH,
            )

            if cleaned_q:
                question_s = question_s.query(cleaned_q)
            documents.set_count(("question", question_s), min(question_s.count(), max_results))

        if cleaned["w"] & constants.WHERE_DISCUSSION:
            discussion_s = discussion_s.highlight(
                "discussion_content", before_match="<b>", after_match="</b>", limit=settings.SEARCH_SUMMARY_LENGTH
            )

            if cleaned_q:
                discussion_s = discussion_s.query(cleaned_q)
            documents.set_count(("forum", discussion_s), min(discussion_s.count(), max_results))

        results_per_page = settings.SEARCH_RESULTS_PER_PAGE
        pages = paginate(request, documents, results_per_page)
        num_results = len(documents)

        # Get the documents we want to show and add them to
        # docs_for_page.
        documents = documents[offset : offset + results_per_page]
        docs_for_page = []
        for (kind, search_s), bounds in documents:
            search_s = search_s.values_dict()[bounds[0] : bounds[1]]
            docs_for_page += [(kind, doc) for doc in search_s]

        results = []
        for i, docinfo in enumerate(docs_for_page):
            rank = i + offset
            # Type here is something like 'wiki', ... while doc here
            # is an ES result document.
            type_, doc = docinfo

            if type_ == "wiki":
                summary = doc["document_summary"]
                result = {
                    "url": doc["url"],
                    "title": doc["document_title"],
                    "type": "document",
                    "object": ObjectDict(doc),
                }
            elif type_ == "question":
                summary = _build_es_excerpt(doc)
                result = {
                    "url": doc["url"],
                    "title": doc["question_title"],
                    "type": "question",
                    "object": ObjectDict(doc),
                    "is_solved": doc["question_is_solved"],
                    "num_answers": doc["question_num_answers"],
                    "num_votes": doc["question_num_votes"],
                    "num_votes_past_week": doc["question_num_votes_past_week"],
                }
            else:
                summary = _build_es_excerpt(doc)
                result = {"url": doc["url"], "title": doc["post_title"], "type": "thread", "object": ObjectDict(doc)}
            result["search_summary"] = summary
            result["rank"] = rank
            result["score"] = doc._score
            results.append(result)

    except (ESTimeoutError, ESMaxRetryError, ESException), exc:
        # Handle timeout and all those other transient errors with a
        # "Search Unavailable" rather than a Django error page.
        if is_json:
            return HttpResponse(json.dumps({"error": _("Search Unavailable")}), mimetype=mimetype, status=503)

        if isinstance(exc, ESTimeoutError):
            statsd.incr("search.%s.timeouterror" % engine)
        elif isinstance(exc, ESMaxRetryError):
            statsd.incr("search.%s.maxretryerror" % engine)
        elif isinstance(exc, ESException):
            statsd.incr("search.%s.elasticsearchexception" % engine)

        t = "search/mobile/down.html" if request.MOBILE else "search/down.html"
        return jingo.render(request, t, {"q": cleaned["q"]}, status=503)
示例#35
0
文件: views.py 项目: yengyin/kitsune
def _search_suggestions(request, text, locale, product_slugs):
    """Return an iterable of the most relevant wiki pages and questions.

    :arg text: full text to search on
    :arg locale: locale to limit to
    :arg product_slugs: list of product slugs to filter articles on
        (["desktop", "mobile", ...])

    Items are dicts of::

        {
            'type':
            'search_summary':
            'title':
            'url':
            'object':
        }

    :returns: up to 3 wiki pages, then up to 3 questions.

    """
    # TODO: this can be reworked to pull data from ES rather than
    # hit the db.
    question_s = Question.search()
    wiki_s = Document.search()

    # Max number of search results per type.
    WIKI_RESULTS = QUESTIONS_RESULTS = 3
    default_categories = settings.SEARCH_DEFAULT_CATEGORIES

    # Apply product filters
    if product_slugs:
        wiki_s = wiki_s.filter(product__in=product_slugs)
        question_s = question_s.filter(product__in=product_slugs)

    results = []
    try:
        # Search for relevant KB documents.
        query = dict(('%s__text' % field, text)
                      for field in Document.get_query_fields())
        query.update(dict(('%s__text_phrase' % field, text)
                      for field in Document.get_query_fields()))
        filter = F()
        filter |= F(document_locale=locale)
        filter |= F(document_locale=settings.WIKI_DEFAULT_LANGUAGE)
        filter &= F(document_category__in=default_categories)
        filter &= F(document_is_archived=False)

        raw_results = (
            wiki_s.filter(filter)
                  .query(or_=query)
                  .values_dict('id')[:WIKI_RESULTS])
        for r in raw_results:
            try:
                doc = (Document.objects.select_related('current_revision')
                                       .get(pk=r['id']))
                results.append({
                    'search_summary': clean_excerpt(
                            doc.current_revision.summary),
                    'url': doc.get_absolute_url(),
                    'title': doc.title,
                    'type': 'document',
                    'object': doc,
                })
            except Document.DoesNotExist:
                pass

        # Search for relevant questions.
        query = dict(('%s__text' % field, text)
                      for field in Question.get_query_fields())
        query.update(dict(('%s__text_phrase' % field, text)
                      for field in Question.get_query_fields()))

        max_age = int(time.time()) - settings.SEARCH_DEFAULT_MAX_QUESTION_AGE
        # Filter questions by language. Questions should be either in English
        # or in the locale's language. This is because we have some questions
        # marked English that are really in other languages. The assumption
        # being that if a native speakers submits a query in given language,
        # the items that are written in that language will automatically match
        # better, so questions incorrectly marked as english can be found too.
        question_filter = F(question_locale=locale)
        question_filter |= F(question_locale=settings.WIKI_DEFAULT_LANGUAGE)
        question_filter &= F(updated__gte=max_age)

        raw_results = (question_s
            .query(or_=query)
            .filter(question_filter)
            .values_dict('id')[:QUESTIONS_RESULTS])

        for r in raw_results:
            try:
                q = Question.objects.get(pk=r['id'])
                results.append({
                    'search_summary': clean_excerpt(q.content[0:500]),
                    'url': q.get_absolute_url(),
                    'title': q.title,
                    'type': 'question',
                    'object': q,
                    'is_solved': q.is_solved,
                    'num_answers': q.num_answers,
                    'num_votes': q.num_votes,
                    'num_votes_past_week': q.num_votes_past_week
                })
            except Question.DoesNotExist:
                pass

    except (ESTimeoutError, ESMaxRetryError, ESException) as exc:
        if isinstance(exc, ESTimeoutError):
            statsd.incr('questions.suggestions.timeouterror')
        elif isinstance(exc, ESMaxRetryError):
            statsd.incr('questions.suggestions.maxretryerror')
        elif isinstance(exc, ESException):
            statsd.incr('questions.suggestions.elasticsearchexception')
        log.debug(exc)

    return results