示例#1
0
    def test_recent_helpful_votes(self):
        """Recent helpful votes are indexed properly."""
        # Create a document and verify it doesn't show up in a
        # query for recent_helpful_votes__gt=0.
        r = revision(is_approved=True, save=True)
        self.refresh()
        eq_(Document.search().filter(
            document_recent_helpful_votes__gt=0).count(), 0)

        # Add an unhelpful vote, it still shouldn't show up.
        helpful_vote(revision=r, helpful=False, save=True)
        r.document.save()  # Votes don't trigger a reindex.
        self.refresh()
        eq_(Document.search().filter(
            document_recent_helpful_votes__gt=0).count(), 0)

        # Add an helpful vote created 31 days ago, it still shouldn't show up.
        created = datetime.now() - timedelta(days=31)
        helpful_vote(revision=r, helpful=True, created=created, save=True)
        r.document.save()  # Votes don't trigger a reindex.
        self.refresh()
        eq_(Document.search().filter(
            document_recent_helpful_votes__gt=0).count(), 0)

        # Add an helpful vote created 29 days ago, it should show up now.
        created = datetime.now() - timedelta(days=29)
        helpful_vote(revision=r, helpful=True, created=created, save=True)
        r.document.save()  # Votes don't trigger a reindex.
        self.refresh()
        eq_(Document.search().filter(
            document_recent_helpful_votes__gt=0).count(), 1)
示例#2
0
def document(save=False, **kwargs):
    """Return an empty document with enough stuff filled out that it can be
    saved."""
    defaults = {'category': CATEGORIES[0][0], 'title': str(datetime.now())}
    defaults.update(kwargs)
    if 'slug' not in kwargs:
        defaults['slug'] = slugify(defaults['title'])
    d = Document(**defaults)
    if save:
        d.save()
    return d
示例#3
0
    def test_add_and_delete(self):
        """Adding a doc should add it to the search index; deleting should
        delete it."""
        doc = document(save=True)
        revision(document=doc, is_approved=True, save=True)
        self.refresh()
        eq_(Document.search().count(), 1)

        doc.delete()
        self.refresh()
        eq_(Document.search().count(), 0)
示例#4
0
def document(save=False, **kwargs):
    """Return an empty document with enough stuff filled out that it can be
    saved."""
    defaults = {"category": CATEGORIES[0][0], "title": str(datetime.now())}
    defaults.update(kwargs)
    if "slug" not in kwargs:
        defaults["slug"] = slugify(defaults["title"])
    d = Document(**defaults)
    if save:
        d.save()
    return d
示例#5
0
 def test_wiki_no_revisions(self):
     """Don't index documents without approved revisions"""
     # Create a document with no revisions and make sure the
     # document is not in the index.
     doc = document(save=True)
     self.refresh()
     eq_(Document.search().count(), 0)
     # Create a revision that's not approved and make sure the
     # document is still not in the index.
     revision(document=doc, is_approved=False, save=True)
     self.refresh()
     eq_(Document.search().count(), 0)
示例#6
0
文件: test_es.py 项目: ibai/kitsune
    def test_wiki_keywords(self):
        """Make sure updating keywords updates the index."""
        # Create a document with a revision with no keywords. It
        # shouldn't show up with a document_keywords term query for
        # 'wool' since it has no keywords.
        doc = document(title=u'wool hats')
        doc.save()
        revision(document=doc, is_approved=True, save=True)
        self.refresh()
        eq_(Document.search().query(document_keywords='wool').count(), 0)

        revision(document=doc, is_approved=True, keywords='wool', save=True)
        self.refresh()

        eq_(Document.search().query(document_keywords='wool').count(), 1)
示例#7
0
    def test_wiki_redirects(self):
        """Make sure we don't index redirects"""
        # First create a revision that doesn't have a redirect and
        # make sure it's in the index.
        doc = document(title=u"wool hats")
        doc.save()
        revision(document=doc, is_approved=True, save=True)
        self.refresh()
        eq_(Document.search().query("wool").count(), 1)

        # Now create a revision that is a redirect and make sure the
        # document is removed from the index.
        revision(document=doc, content=REDIRECT_CONTENT, is_approved=True, save=True)
        self.refresh()
        eq_(Document.search().query("wool").count(), 0)
示例#8
0
文件: __init__.py 项目: zzdjk6/kuma
def document(save=False, **kwargs):
    """Return an empty document with enough stuff filled out that it can be
    saved."""
    defaults = {
        'category': CATEGORIES[0][0],
        'title': str(datetime.now()),
        'is_redirect': 0
    }
    defaults.update(kwargs)
    if 'slug' not in kwargs:
        defaults['slug'] = slugify(defaults['title'])
    d = Document(**defaults)
    if save:
        d.save()
    return d
示例#9
0
    def test_wiki_keywords(self):
        """Make sure updating keywords updates the index."""
        # Create a document with a revision with no keywords. It
        # shouldn't show up with a document_keywords term query for
        # 'wool' since it has no keywords.
        doc = document(title=u'wool hats')
        doc.save()
        revision(document=doc, is_approved=True, save=True)
        self.refresh()
        eq_(Document.search().query(document_keywords='wool').count(), 0)

        revision(document=doc, is_approved=True, keywords='wool', save=True)
        self.refresh()

        eq_(Document.search().query(document_keywords='wool').count(), 1)
示例#10
0
 def test_document_translate_fallback(self):
     d_en = document(locale='en-US',
                     title=u'How to delete Google Chrome?',
                     save=True)
     invalid_translate = reverse('wiki.document', locale='tr',
                                 args=[d_en.slug])
     self.assertEqual(d_en, Document.from_url(invalid_translate))
示例#11
0
 def test_document_translate_fallback(self):
     d_en = document(locale='en-US',
                     title=u'How to delete Google Chrome?',
                     save=True)
     invalid_translate = reverse('wiki.document',
                                 locale='tr',
                                 args=[d_en.slug])
     self.assertEqual(d_en, Document.from_url(invalid_translate))
示例#12
0
def document(**kwargs):
    """Return an empty document with enough stuff filled out that it can be
    saved."""
    defaults = {'category': CATEGORIES[0][0], 'title': str(datetime.now())}
    defaults.update(kwargs)
    if 'slug' not in kwargs:
        defaults['slug'] = slugify(defaults['title'])
    return Document(**defaults)
示例#13
0
def _postdoc(request, is_image):
    docpath = u'{0}/{1}'.format(settings.MEDIA_ROOT, is_image and u'images'
                                or u'documents')

    if not os.path.exists(docpath):
        os.mkdir(docpath)

    f = request.FILES[u'file']

    fd = open(u'{0}/{1}'.format(docpath, f.name), u'wb+')
    for chunk in f.chunks():
        fd.write(chunk)
    fd.close()

    url = u'{0}/{1}/{2}'.format(settings.MEDIA_URL, is_image and u'images'
                                or u'documents', f.name)

    try:
        doc = Document.objects.get(path=url)

    except Document.DoesNotExist:
        doc = Document()

        doc.is_image = is_image
        doc.path = url

        doc.wikipath = request.POST[u'page']
        doc.save()

    return HttpResponse(doc.path)
示例#14
0
文件: test_es.py 项目: ibai/kitsune
    def test_wiki_redirects(self):
        """Make sure we don't index redirects"""
        # First create a revision that doesn't have a redirect and
        # make sure it's in the index.
        doc = document(title=u'wool hats')
        doc.save()
        revision(document=doc, is_approved=True, save=True)
        self.refresh()
        eq_(Document.search().query('wool').count(), 1)

        # Now create a revision that is a redirect and make sure the
        # document is removed from the index.
        revision(document=doc,
                 content=REDIRECT_CONTENT,
                 is_approved=True,
                 save=True)
        self.refresh()
        eq_(Document.search().query('wool').count(), 0)
示例#15
0
文件: facets.py 项目: ibai/kitsune
def _es_documents_for(locale, topics, products):
    """ES implementation of documents_for."""
    s = Document.search().values_dict(
        'id', 'document_title', 'url').filter(document_locale=locale)
    for topic in topics:
        s = s.filter(document_topic=topic.slug)
    for product in products or []:
        s = s.filter(document_product=product.slug)

    return list(s.order_by('-document_recent_helpful_votes')[:100])
示例#16
0
    def test_translations_get_parent_tags(self):
        doc1 = document(title=u'Audio too loud')
        doc1.save()
        revision(document=doc1, is_approved=True, save=True)
        doc1.tags.add(u'desktop')
        doc1.tags.add(u'windows')

        doc2 = document(title=u'Audio too loud bork bork',
                        parent=doc1)
        doc2.save()
        revision(document=doc2, is_approved=True, save=True)
        doc2.tags.add(u'badtag')

        # Verify the parent has the right tags.
        doc_dict = Document.extract_document(doc1.id)
        eq_(doc_dict['tag'], [u'desktop', u'windows'])

        # Verify the translation has the parent's tags.
        doc_dict = Document.extract_document(doc2.id)
        eq_(doc_dict['tag'], [u'desktop', u'windows'])
示例#17
0
def suggestions(request):
    """A simple search view that returns OpenSearch suggestions."""
    mimetype = 'application/x-suggestions+json'

    term = request.GET.get('q')
    if not term:
        return HttpResponseBadRequest(mimetype=mimetype)

    site = Site.objects.get_current()
    locale = locale_or_default(request.locale)
    try:
        query = dict(('%s__text' % field, term)
                     for field in Document.get_query_fields())
        wiki_s = (Document.search()
                  .filter(document_is_archived=False)
                  .filter(document_locale=locale)
                  .values_dict('document_title', 'url')
                  .query(or_=query)[:5])

        query = dict(('%s__text' % field, term)
                     for field in Question.get_query_fields())
        question_s = (Question.search()
                      .filter(question_has_helpful=True)
                      .values_dict('question_title', 'url')
                      .query(or_=query)[:5])

        results = list(chain(question_s, wiki_s))
    except (ESTimeoutError, ESMaxRetryError, ESException):
        # If we have ES problems, we just send back an empty result
        # set.
        results = []

    urlize = lambda r: u'https://%s%s' % (site, r['url'])
    titleize = lambda r: (r['document_title'] if 'document_title' in r
                          else r['question_title'])
    data = [term,
            [titleize(r) for r in results],
            [],
            [urlize(r) for r in results]]
    return HttpResponse(json.dumps(data), mimetype=mimetype)
示例#18
0
文件: views.py 项目: bituka/kitsune
def suggestions(request):
    """A simple search view that returns OpenSearch suggestions."""
    mimetype = 'application/x-suggestions+json'

    term = request.GET.get('q')
    if not term:
        return HttpResponseBadRequest(mimetype=mimetype)

    site = Site.objects.get_current()
    locale = locale_or_default(request.LANGUAGE_CODE)
    try:
        query = dict(('%s__text' % field, term)
                     for field in Document.get_query_fields())
        wiki_s = (Document.search()
                  .filter(document_is_archived=False)
                  .filter(document_locale=locale)
                  .values_dict('document_title', 'url')
                  .query(or_=query)[:5])

        query = dict(('%s__text' % field, term)
                     for field in Question.get_query_fields())
        question_s = (Question.search()
                      .filter(question_has_helpful=True)
                      .values_dict('question_title', 'url')
                      .query(or_=query)[:5])

        results = list(chain(question_s, wiki_s))
    except (ESTimeoutError, ESMaxRetryError, ESException):
        # If we have ES problems, we just send back an empty result
        # set.
        results = []

    urlize = lambda r: u'https://%s%s' % (site, r['url'])
    titleize = lambda r: (r['document_title'] if 'document_title' in r
                          else r['question_title'])
    data = [term,
            [titleize(r) for r in results],
            [],
            [urlize(r) for r in results]]
    return HttpResponse(json.dumps(data), mimetype=mimetype)
示例#19
0
def _postdoc(request, is_image):
    docpath = u'{0}/{1}'.format(
            settings.MEDIA_ROOT,
            is_image and u'images' or u'documents'
    )

    if not os.path.exists(docpath):
        os.mkdir(docpath)

    f = request.FILES[u'file']

    fd = open(u'{0}/{1}'.format(docpath, f.name), u'wb+')
    for chunk in f.chunks():
        fd.write(chunk)
    fd.close()

    url = u'{0}/{1}/{2}'.format(
            settings.MEDIA_URL,
            is_image and u'images' or u'documents',
            f.name)

    try:
        doc = Document.objects.get(path=url)

    except Document.DoesNotExist:
        doc = Document()

        doc.is_image = is_image
        doc.path = url

        doc.wikipath = request.POST[u'page']
        doc.save()

    return HttpResponse(doc.path)
示例#20
0
    def test_translations_get_parent_tags(self):
        doc1 = document(title=u"Audio too loud")
        doc1.save()
        revision(document=doc1, is_approved=True, save=True)
        doc1.topics.add(topic(slug="cookies", save=True))
        doc1.topics.add(topic(slug="general", save=True))
        doc1.products.add(product(slug="desktop", save=True))

        doc2 = document(title=u"Audio too loud bork bork", parent=doc1)
        doc2.save()
        revision(document=doc2, is_approved=True, save=True)
        doc2.tags.add(u"badtag")

        # Verify the parent has the right tags.
        doc_dict = Document.extract_document(doc1.id)
        eq_(doc_dict["document_topic"], [u"cookies", u"general"])
        eq_(doc_dict["document_product"], [u"desktop"])

        # Verify the translation has the parent's tags.
        doc_dict = Document.extract_document(doc2.id)
        eq_(doc_dict["document_topic"], [u"cookies", u"general"])
        eq_(doc_dict["document_product"], [u"desktop"])
示例#21
0
    def test_wiki_tags(self):
        """Make sure that adding tags to a Document causes it to
        refresh the index.

        """
        tag = u'hiphop'
        eq_(Document.search().filter(tag=tag).count(), 0)
        doc = document(save=True)
        revision(document=doc, is_approved=True, save=True)
        self.refresh()
        eq_(Document.search().filter(tag=tag).count(), 0)
        doc.tags.add(tag)
        self.refresh()
        eq_(Document.search().filter(tag=tag).count(), 1)
        doc.tags.remove(tag)
        self.refresh()

        # Make sure the document itself is still there and that we didn't
        # accidentally delete it through screwed up signal handling:
        eq_(Document.search().filter().count(), 1)

        eq_(Document.search().filter(tag=tag).count(), 0)
示例#22
0
文件: facets.py 项目: icaaq/kitsune
def _es_documents_for(locale, topics, products=None):
    """ES implementation of documents_for."""
    s = (Document.search()
        .values_dict('id', 'document_title', 'url', 'document_parent_id')
        .filter(document_locale=locale, document_is_archived=False,
                document_category__in=settings.IA_DEFAULT_CATEGORIES))

    for topic in topics:
        s = s.filter(document_topic=topic.slug)
    for product in products or []:
        s = s.filter(document_product=product.slug)

    return list(s.order_by('-document_recent_helpful_votes')[:100])
示例#23
0
    def test_wiki_products(self):
        """Make sure that adding products to a Document causes it to
        refresh the index.

        """
        p = product(slug=u'desktop', save=True)
        eq_(Document.search().filter(product=p.slug).count(), 0)
        doc = document(save=True)
        revision(document=doc, is_approved=True, save=True)
        self.refresh()
        eq_(Document.search().filter(product=p.slug).count(), 0)
        doc.products.add(p)
        self.refresh()
        eq_(Document.search().filter(product=p.slug).count(), 1)
        doc.products.remove(p)
        self.refresh()

        # Make sure the document itself is still there and that we didn't
        # accidentally delete it through screwed up signal handling:
        eq_(Document.search().filter().count(), 1)

        eq_(Document.search().filter(product=p.slug).count(), 0)
示例#24
0
文件: test_es.py 项目: ibai/kitsune
    def test_wiki_products(self):
        """Make sure that adding products to a Document causes it to
        refresh the index.

        """
        p = product(slug=u'desktop', save=True)
        eq_(Document.search().filter(document_product=p.slug).count(), 0)
        doc = document(save=True)
        revision(document=doc, is_approved=True, save=True)
        self.refresh()
        eq_(Document.search().filter(document_product=p.slug).count(), 0)
        doc.products.add(p)
        self.refresh()
        eq_(Document.search().filter(document_product=p.slug).count(), 1)
        doc.products.remove(p)
        self.refresh()

        # Make sure the document itself is still there and that we didn't
        # accidentally delete it through screwed up signal handling:
        eq_(Document.search().filter().count(), 1)

        eq_(Document.search().filter(document_product=p.slug).count(), 0)
示例#25
0
文件: test_es.py 项目: ibai/kitsune
    def test_wiki_topics(self):
        """Make sure that adding topics to a Document causes it to
        refresh the index.

        """
        t = topic(slug=u'hiphop', save=True)
        eq_(Document.search().filter(document_topic=t.slug).count(), 0)
        doc = document(save=True)
        revision(document=doc, is_approved=True, save=True)
        self.refresh()
        eq_(Document.search().filter(document_topic=t.slug).count(), 0)
        doc.topics.add(t)
        self.refresh()
        eq_(Document.search().filter(document_topic=t.slug).count(), 1)
        doc.topics.clear()
        self.refresh()

        # Make sure the document itself is still there and that we didn't
        # accidentally delete it through screwed up signal handling:
        eq_(Document.search().filter().count(), 1)

        eq_(Document.search().filter(document_topic=t.slug).count(), 0)
示例#26
0
文件: test_es.py 项目: ibai/kitsune
    def test_translations_get_parent_tags(self):
        doc1 = document(title=u'Audio too loud')
        doc1.save()
        revision(document=doc1, is_approved=True, save=True)
        doc1.topics.add(topic(slug='cookies', save=True))
        doc1.topics.add(topic(slug='general', save=True))
        doc1.products.add(product(slug='desktop', save=True))

        doc2 = document(title=u'Audio too loud bork bork', parent=doc1)
        doc2.save()
        revision(document=doc2, is_approved=True, save=True)
        doc2.tags.add(u'badtag')

        # Verify the parent has the right tags.
        doc_dict = Document.extract_document(doc1.id)
        eq_(doc_dict['document_topic'], [u'cookies', u'general'])
        eq_(doc_dict['document_product'], [u'desktop'])

        # Verify the translation has the parent's tags.
        doc_dict = Document.extract_document(doc2.id)
        eq_(doc_dict['document_topic'], [u'cookies', u'general'])
        eq_(doc_dict['document_product'], [u'desktop'])
示例#27
0
    def test_wiki_topics(self):
        """Make sure that adding topics to a Document causes it to
        refresh the index.

        """
        t = topic(slug=u"hiphop", save=True)
        eq_(Document.search().filter(document_topic=t.slug).count(), 0)
        doc = document(save=True)
        revision(document=doc, is_approved=True, save=True)
        self.refresh()
        eq_(Document.search().filter(document_topic=t.slug).count(), 0)
        doc.topics.add(t)
        self.refresh()
        eq_(Document.search().filter(document_topic=t.slug).count(), 1)
        doc.topics.clear()
        self.refresh()

        # Make sure the document itself is still there and that we didn't
        # accidentally delete it through screwed up signal handling:
        eq_(Document.search().filter().count(), 1)

        eq_(Document.search().filter(document_topic=t.slug).count(), 0)
示例#28
0
    def test_translations_get_parent_tags(self):
        doc1 = document(title=u'Audio too loud')
        doc1.save()
        revision(document=doc1, is_approved=True, save=True)
        doc1.topics.add(topic(slug='cookies', save=True))
        doc1.topics.add(topic(slug='general', save=True))
        doc1.products.add(product(slug='desktop', save=True))

        doc2 = document(title=u'Audio too loud bork bork',
                        parent=doc1)
        doc2.save()
        revision(document=doc2, is_approved=True, save=True)
        doc2.tags.add(u'badtag')

        # Verify the parent has the right tags.
        doc_dict = Document.extract_document(doc1.id)
        eq_(doc_dict['topic'], [u'cookies', u'general'])
        eq_(doc_dict['product'], [u'desktop'])

        # Verify the translation has the parent's tags.
        doc_dict = Document.extract_document(doc2.id)
        eq_(doc_dict['topic'], [u'cookies', u'general'])
        eq_(doc_dict['product'], [u'desktop'])
示例#29
0
class UntranslatedReadout(Readout):
    title = _lazy(u'Untranslated')
    short_title = _lazy(u'Untranslated')
    details_link_text = _lazy(u'All untranslated articles...')
    slug = 'untranslated'
    column4_label = _lazy(u'Updated')

    def _query_and_params(self, max):
        # Incidentally, we tried this both as a left join and as a search
        # against an inner query returning translated docs, and the left join
        # yielded a faster-looking plan (on a production corpus).
        #
        # Find non-archived, localizable documents in categories 10,
        # 20 and 60 having at least one ready- for-localization
        # revision. Of those, show the ones that have no translation.
        return ('SELECT parent.slug, parent.title, '
                'wiki_revision.reviewed, dashboards_wikidocumentvisits.visits '
                'FROM wiki_document parent '
                'INNER JOIN wiki_revision ON '
                'parent.latest_localizable_revision_id=wiki_revision.id '
                'LEFT JOIN wiki_document translated ON '
                'parent.id=translated.parent_id AND translated.locale=%s '
                'LEFT JOIN dashboards_wikidocumentvisits ON '
                'parent.id=dashboards_wikidocumentvisits.document_id AND '
                'dashboards_wikidocumentvisits.period=%s '
                'WHERE '
                'translated.id IS NULL AND parent.is_localizable AND '
                'parent.category in (10, 20, 60) AND '
                'parent.locale=%s AND NOT parent.is_archived '
                'AND wiki_revision.content NOT LIKE "REDIRECT%%" ' +
                self._order_clause() + self._limit_clause(max),
                (self.locale, LAST_30_DAYS, settings.WIKI_DEFAULT_LANGUAGE))

    def _order_clause(self):
        return ('ORDER BY wiki_revision.reviewed DESC, parent.title ASC'
                if self.mode == MOST_RECENT else
                'ORDER BY dashboards_wikidocumentvisits.visits DESC, '
                'parent.title ASC')

    def _format_row(self, (slug, title, reviewed, visits)):
        # Run the data through the model to (potentially) format it and
        # take advantage of SPOTs (like for get_absolute_url()):
        d = Document(slug=slug,
                     title=title,
                     locale=settings.WIKI_DEFAULT_LANGUAGE)
        return dict(title=d.title,
                    url=d.get_absolute_url(),
                    visits=visits,
                    updated=reviewed)
示例#30
0
def troubleshooting_view(request):
    # Build a list of the most recently indexed 50 wiki documents.
    last_50_indexed = list(_fix_value_dicts(Document.search().values_dict().order_by("-indexed_on")[:50]))

    last_50_reviewed = list(
        Document.uncached.filter(current_revision__is_approved=True).order_by("-current_revision__reviewed")[:50]
    )

    diff_list = diff_it_for_realz(last_50_indexed, last_50_reviewed)

    return render_to_response(
        "search/admin/troubleshooting.html",
        {"title": "Index Troubleshooting", "diffs": diff_list},
        RequestContext(request, {}),
    )
示例#31
0
 def reload_period_from_analytics(cls, period):
     """Replace the stats for the given period from Google Analytics."""
     counts = googleanalytics.pageviews_by_document(*period_dates(period))
     if counts:
         # Delete and remake the rows:
         # Horribly inefficient until
         # http://code.djangoproject.com/ticket/9519 is fixed.
         cls.objects.filter(period=period).delete()
         for doc_id, visits in counts.iteritems():
             cls.objects.create(document=Document(pk=doc_id), visits=visits,
                                period=period)
     else:
         # Don't erase interesting data if there's nothing to replace it:
         log.warning('Google Analytics returned no interesting data,'
                     ' so I kept what I had.')
示例#32
0
 def reload_period_from_json(cls, period, json_data):
     """Replace the stats for the given period with the given JSON."""
     counts = cls._visit_counts(json_data)
     if counts:
         # Delete and remake the rows:
         # Horribly inefficient until
         # http://code.djangoproject.com/ticket/9519 is fixed.
         cls.objects.filter(period=period).delete()
         for doc_id, visits in counts.iteritems():
             cls.objects.create(document=Document(pk=doc_id), visits=visits,
                                period=period)
     else:
         # Don't erase interesting data if there's nothing to replace it:
         log.warning('WebTrends returned no interesting data, so I kept '
                     'what I had.')
示例#33
0
文件: facets.py 项目: bituka/kitsune
def _es_documents_for(locale, topics=None, products=None):
    """ES implementation of documents_for."""
    s = (Document.search().values_dict(
        'id', 'document_title', 'url', 'document_parent_id',
        'document_summary').filter(
            document_locale=locale,
            document_is_archived=False,
            document_category__in=settings.IA_DEFAULT_CATEGORIES))

    for topic in topics or []:
        s = s.filter(topic=topic.slug)
    for product in products or []:
        s = s.filter(product=product.slug)

    return list(s.order_by('-document_recent_helpful_votes')[:100])
示例#34
0
def pageviews_by_document(start_date, end_date):
    """Return the number of pageviews by document in a given date range.

    * Only returns en-US documents for now since that's what we did with
    webtrends.

    Returns a dict with pageviews for each document:
        {<document_id>: <pageviews>,
         1: 42,
         7: 1337,...}
    """
    counts = {}
    request = _build_request()
    start_index = 1
    max_results = 10000

    while True:  # To deal with pagination

        @retry_503
        def _make_request():
            return request.get(
                ids='ga:' + profile_id,
                start_date=str(start_date),
                end_date=str(end_date),
                metrics='ga:pageviews',
                dimensions='ga:pagePath',
                filters='ga:pagePathLevel2==/kb/;ga:pagePathLevel1==/en-US/',
                max_results=max_results,
                start_index=start_index).execute()

        results = _make_request()

        for result in results['rows']:
            path = result[0]
            pageviews = int(result[1])
            doc = Document.from_url(path, id_only=True, check_host=False)
            if not doc:
                continue

            # The same document can appear multiple times due to url params.
            counts[doc.pk] = counts.get(doc.pk, 0) + pageviews

        # Move to next page of results.
        start_index += max_results
        if start_index > results['totalResults']:
            break

    return counts
示例#35
0
def pageviews_by_document(start_date, end_date):
    """Return the number of pageviews by document in a given date range.

    * Only returns en-US documents for now since that's what we did with
    webtrends.

    Returns a dict with pageviews for each document:
        {<document_id>: <pageviews>,
         1: 42,
         7: 1337,...}
    """
    counts = {}
    request = _build_request()
    start_index = 1
    max_results = 10000

    while True:  # To deal with pagination

        @retry_503
        def _make_request():
            return request.get(
                ids='ga:' + profile_id,
                start_date=str(start_date),
                end_date=str(end_date),
                metrics='ga:pageviews',
                dimensions='ga:pagePath',
                filters='ga:pagePathLevel2==/kb/;ga:pagePathLevel1==/en-US/',
                max_results=max_results,
                start_index=start_index).execute()

        results = _make_request()

        for result in results['rows']:
            path = result[0]
            pageviews = int(result[1])
            doc = Document.from_url(path, id_only=True, check_host=False)
            if not doc:
                continue

            # The same document can appear multiple times due to url params.
            counts[doc.pk] = counts.get(doc.pk, 0) + pageviews

        # Move to next page of results.
        start_index += max_results
        if start_index > results['totalResults']:
            break

    return counts
示例#36
0
文件: admin.py 项目: ibai/kitsune
def troubleshooting_view(request):
    # Build a list of the most recently indexed 50 wiki documents.
    last_50_indexed = list(
        _fix_value_dicts(
            Document.search().values_dict().order_by('-indexed_on')[:50]))

    last_50_reviewed = list(
        Document.uncached.filter(current_revision__is_approved=True).order_by(
            '-current_revision__reviewed')[:50])

    diff_list = diff_it_for_realz(last_50_indexed, last_50_reviewed)

    return render_to_response('search/admin/troubleshooting.html', {
        'title': 'Index Troubleshooting',
        'diffs': diff_list,
    }, RequestContext(request, {}))
示例#37
0
def _create_document(title='Test Document'):
    d = Document(title=title, html='<div>Lorem Ipsum</div>',
                 category=1, locale='en-US')
    d.save()
    r = Revision(document=d, keywords='key1, key2', summary='lipsum',
                 content='<div>Lorem Ipsum</div>', creator_id=118577,
                 significance=SIGNIFICANCES[0][0])
    r.save()
    d.current_revision = r
    d.save()
    return d
示例#38
0
文件: facets.py 项目: Owen66/kitsune
def products_for(topics):
    """Returns a list of products that apply to passed in topics.

    :arg topics: a list of Topic instances
    """
    product_field = 'document_product'

    s = Document.search().values_dict('id')
    for topic in topics:
        s = s.filter(document_topic=topic.slug)
    s = s.facet(product_field, filtered=True)
    facet_counts = s.facet_counts()[product_field]

    products = Product.objects.filter(
        slug__in=[f['term'] for f in facet_counts]).filter(visible=True)

    return products
示例#39
0
文件: facets.py 项目: Owen66/kitsune
def topics_for(products):
    """Returns a list of topics that apply to passed in products.

    :arg topics: a list of Product instances
    """
    topic_field = 'document_topic'

    s = Document.search().values_dict('id')
    for product in products:
        s = s.filter(document_product=product.slug)
    s = s.facet(topic_field, filtered=True)
    facet_counts = s.facet_counts()[topic_field]

    topics = Topic.objects.filter(
        slug__in=[f['term'] for f in facet_counts]).filter(visible=True)

    return topics
示例#40
0
def troubleshooting_view(request):
    # Build a list of the most recently indexed 50 wiki documents.
    last_50_indexed = _fix_value_dicts(Document.search()
                                               .values_dict()
                                               .order_by('-indexed_on')[:50])

    last_50_reviewed = (Document.uncached
                                .filter(current_revision__is_approved=True)
                                .order_by('-current_revision__reviewed')[:50])

    return render_to_response(
        'search/admin/troubleshooting.html',
        {'title': 'Index Troubleshooting',
         'last_50_indexed': last_50_indexed,
         'last_50_reviewed': last_50_reviewed
         },
        RequestContext(request, {}))
示例#41
0
    def _visit_counts(cls, json_data):
        """Given WebTrends JSON data, return a dict of doc IDs and visits:

            {document ID: number of visits, ...}

        If there is no interesting data in the given JSON, return {}.

        """
        # We're very defensive here, as WebTrends has been known to return
        # invalid garbage of various sorts.
        try:
            data = json.loads(json_data)['data']
        except (ValueError, KeyError, TypeError):
            raise StatsException('Error extracting data from WebTrends JSON')

        try:
            pages = (data[data.keys()[0]]['SubRows'] if data.keys()
                     else {}).iteritems()
        except (AttributeError, IndexError, KeyError, TypeError):
            raise StatsException('Error extracting pages from WebTrends data')

        counts = {}
        for url, page_info in pages:
            doc = Document.from_url(
                url,
                required_locale=settings.LANGUAGE_CODE,
                id_only=True,
                check_host=False)
            if not doc:
                continue

            # Get visit count:
            try:
                visits = int(page_info['measures']['Visits'])
            except (ValueError, KeyError, TypeError):
                continue

            # Sometimes WebTrends repeats a URL modulo a space, etc. These can
            # resolve to the same document. An arbitrary one wins.
            # TODO: Should we be summing these?
            if doc.pk in counts:
                log.info('WebTrends has the following duplicate URL for this '
                         'document: %s' % url)
            counts[doc.pk] = visits
        return counts
示例#42
0
文件: admin.py 项目: LASarkar/kitsune
def troubleshooting_view(request):
    # Build a list of the most recently indexed 50 wiki documents.
    last_50_indexed = list(_fix_value_dicts(Document.search()
                                            .values_dict()
                                            .order_by('-indexed_on')[:50]))

    last_50_reviewed = list(Document.uncached
                            .filter(current_revision__is_approved=True)
                            .order_by('-current_revision__reviewed')[:50])

    diff_list = diff_it_for_realz(last_50_indexed, last_50_reviewed)

    return render(
        request,
        'admin/search_troubleshooting.html',
        {'title': 'Index Troubleshooting',
         'diffs': diff_list,
         })
示例#43
0
def documents_for(locale, topics, products=None):
    """Returns a list of articles that apply to passed in topics and products.

    :arg locale: the locale
    :arg topics: a list of Topic instances
    :arg products: (optional) a list of Product instances

    The articles are returned as a list of dicts with the following keys:
        id
        document_title
        url
    """
    s = Document.search().values_dict('id', 'document_title', 'url')
    for topic in topics:
        s = s.filter(document_topic=topic.slug)
    for product in products or []:
        s = s.filter(document_product=product.slug)

    return list(s)
示例#44
0
def _search_suggestions(request, text, locale, tags, product_slugs):
    """Return an iterable of the most relevant wiki pages and questions.

    :arg text: full text to search on
    :arg locale: locale to limit to
    :arg tags: list of tags to filter questions on
    :arg product_slugs: list of product slugs to filter articles on
        (["desktop", "mobile", ...])

    Items are dicts of::

        {
            'type':
            'search_summary':
            'title':
            'url':
            'object':
        }

    :returns: up to 3 wiki pages, then up to 3 questions.

    """
    # TODO: this can be reworked to pull data from ES rather than
    # hit the db.
    question_s = Question.search()
    wiki_s = Document.search()

    # Max number of search results per type.
    WIKI_RESULTS = QUESTIONS_RESULTS = 3
    default_categories = settings.SEARCH_DEFAULT_CATEGORIES

    # Apply product filters
    if product_slugs:
        wiki_s = wiki_s.filter(document_product__in=product_slugs)
    if tags:
        question_s = question_s.filter(question_tag__in=tags)

    results = []
    try:
        query = dict(('%s__text' % field, text)
                      for field in Document.get_query_fields())
        raw_results = (
            wiki_s.filter(document_locale=locale,
                          document_category__in=default_categories)
                  .query(or_=query)
                  .values_dict('id')[:WIKI_RESULTS])
        for r in raw_results:
            try:
                doc = (Document.objects.select_related('current_revision')
                                       .get(pk=r['id']))
                results.append({
                    'search_summary': clean_excerpt(
                            doc.current_revision.summary),
                    'url': doc.get_absolute_url(),
                    'title': doc.title,
                    'type': 'document',
                    'object': doc,
                })
            except Document.DoesNotExist:
                pass

        # Note: Questions app is en-US only.
        query = dict(('%s__text' % field, text)
                      for field in Question.get_query_fields())
        raw_results = (question_s.query(or_=query)
                                 .values_dict('id')[:QUESTIONS_RESULTS])
        for r in raw_results:
            try:
                q = Question.objects.get(pk=r['id'])
                results.append({
                    'search_summary': clean_excerpt(q.content[0:500]),
                    'url': q.get_absolute_url(),
                    'title': q.title,
                    'type': 'question',
                    'object': q,
                    'is_solved': q.is_solved,
                    'num_answers': q.num_answers,
                    'num_votes': q.num_votes,
                    'num_votes_past_week': q.num_votes_past_week
                })
            except Question.DoesNotExist:
                pass

    except (ESTimeoutError, ESMaxRetryError, ESException) as exc:
        if isinstance(exc, ESTimeoutError):
            statsd.incr('questions.suggestions.timeouterror')
        elif isinstance(exc, ESMaxRetryError):
            statsd.incr('questions.suggestions.maxretryerror')
        elif isinstance(exc, ESException):
            statsd.incr('questions.suggestions.elasticsearchexception')
        log.debug(exc)

    return results
示例#45
0
文件: views.py 项目: ibai/kitsune
def _search_suggestions(request, query, locale, tags, product_slugs):
    """Return an iterable of the most relevant wiki pages and questions.

    query -- full text to search on
    locale -- locale to limit to
    tags -- list of tags to filter questions on
    product_slugs -- list of product slugs to filter articles on
        (["desktop", "mobile", ...])

    Items are dicts of:
        {
            'type':
            'search_summary':
            'title':
            'url':
            'object':
        }

    Returns up to 3 wiki pages, then up to 3 questions.

    """
    # TODO: this can be reworked to pull data from ES rather than
    # hit the db.
    question_s = Question.search()
    wiki_s = Document.search()

    # Max number of search results per type.
    WIKI_RESULTS = QUESTIONS_RESULTS = 3
    default_categories = settings.SEARCH_DEFAULT_CATEGORIES

    # Apply product filters
    if product_slugs:
        wiki_s = wiki_s.filter(document_product__in=product_slugs)
    if tags:
        question_s = question_s.filter(question_tag__in=tags)

    try:
        raw_results = (wiki_s.filter(
            document_locale=locale,
            document_category__in=default_categories).query(query).values_dict(
                'id')[:WIKI_RESULTS])

        results = []
        for r in raw_results:
            try:
                doc = (Document.objects.select_related('current_revision').get(
                    pk=r['id']))
                results.append({
                    'search_summary':
                    clean_excerpt(doc.current_revision.summary),
                    'url':
                    doc.get_absolute_url(),
                    'title':
                    doc.title,
                    'type':
                    'document',
                    'object':
                    doc,
                })
            except Document.DoesNotExist:
                pass

        # Note: Questions app is en-US only.
        raw_results = (
            question_s.query(query).values_dict('id')[:QUESTIONS_RESULTS])

        for r in raw_results:
            try:
                q = Question.objects.get(pk=r['id'])
                results.append({
                    'search_summary':
                    clean_excerpt(q.content[0:500]),
                    'url':
                    q.get_absolute_url(),
                    'title':
                    q.title,
                    'type':
                    'question',
                    'object':
                    q,
                    'is_solved':
                    q.is_solved,
                    'num_answers':
                    q.num_answers,
                    'num_votes':
                    q.num_votes,
                    'num_votes_past_week':
                    q.num_votes_past_week
                })
            except Question.DoesNotExist:
                pass

    except (ESTimeoutError, ESMaxRetryError, ESException), exc:
        if isinstance(exc, ESTimeoutError):
            statsd.incr('questions.suggestions.timeouterror')
        elif isinstance(exc, ESMaxRetryError):
            statsd.incr('questions.suggestions.maxretryerror')
        elif isinstance(exc, ESException):
            statsd.incr('questions.suggestions.elasticsearchexception')

        return []
示例#46
0
    def __iter__(self):
        from wiki.models import Document

        input = html5lib_Filter.__iter__(self)

        # Pass #1: Gather all the link URLs and prepare annotations
        links = dict()
        buffer = []
        for token in input:
            buffer.append(token)
            if ('StartTag' == token['type'] and 'a' == token['name']):
                attrs = dict(token['data'])
                if not 'href' in attrs:
                    continue

                href = attrs['href']
                if href.startswith(self.base_url):
                    # Squash site-absolute URLs to site-relative paths.
                    href = '/%s' % href[len(self.base_url):]

                # Prepare annotations record for this path.
                links[href] = dict(classes=[])

        # Run through all the links and check for annotatable conditions.
        for href in links.keys():

            # Is this an external URL?
            is_external = False
            for prefix in self.EXTERNAL_PREFIXES:
                if href.startswith(prefix):
                    is_external = True
                    break
            if is_external:
                links[href]['classes'].append('external')
                continue

            # TODO: Should this also check for old-school mindtouch URLs? Or
            # should we encourage editors to convert to new-style URLs to take
            # advantage of link annotation? (I'd say the latter)

            # Is this a kuma doc URL?
            if '/docs/' in href:

                # Check if this is a special docs path that's exempt from "new"
                skip = False
                for path in DOC_SPECIAL_PATHS:
                    if '/docs/%s' % path in href:
                        skip = True
                if skip:
                    continue

                href_locale, href_path = href.split(u'/docs/', 1)
                if href_locale.startswith(u'/'):
                    href_locale = href_locale[1:]

                if '#' in href_path:
                    # If present, discard the hash anchor
                    href_path, _, _ = href_path.partition('#')

                # Handle any URL-encoded UTF-8 characters in the path
                href_path = href_path.encode('utf-8', 'ignore')
                href_path = urllib.unquote(href_path)
                href_path = href_path.decode('utf-8', 'ignore')

                # Try to sort out the locale and slug through some of our
                # redirection logic.
                locale, slug, needs_redirect = (
                    Document.locale_and_slug_from_path(
                        href_path, path_locale=href_locale))

                # Does this locale and slug correspond to an existing document?
                # If not, mark it as a "new" link.
                #
                # TODO: Should these DB queries be batched up into one big
                # query? A page with hundreds of links will fire off hundreds
                # of queries
                ct = Document.objects.filter(locale=locale, slug=slug).count()
                if ct == 0:
                    links[href]['classes'].append('new')

        # Pass #2: Filter the content, annotating links
        for token in buffer:
            if ('StartTag' == token['type'] and 'a' == token['name']):
                attrs = dict(token['data'])

                if 'href' in attrs:

                    href = attrs['href']
                    if href.startswith(self.base_url):
                        # Squash site-absolute URLs to site-relative paths.
                        href = '/%s' % href[len(self.base_url):]

                    if href in links:
                        # Update class names on this link element.
                        if 'class' in attrs:
                            classes = set(attrs['class'].split(u' '))
                        else:
                            classes = set()
                        classes.update(links[href]['classes'])
                        if classes:
                            attrs['class'] = u' '.join(classes)

                token['data'] = attrs.items()

            yield token
示例#47
0
文件: content.py 项目: arkanDEV/kuma
    def __iter__(self):
        from wiki.models import Document

        input = html5lib_Filter.__iter__(self)

        # Pass #1: Gather all the link URLs and prepare annotations
        links = dict()
        buffer = []
        for token in input:
            buffer.append(token)
            if ('StartTag' == token['type'] and 'a' == token['name']):
                attrs = dict(token['data'])
                if not 'href' in attrs:
                    continue

                href = attrs['href']
                if href.startswith(self.base_url):
                    # Squash site-absolute URLs to site-relative paths.
                    href = '/%s' % href[len(self.base_url):]

                # Prepare annotations record for this path.
                links[href] = dict(classes=[])

        needs_existence_check = defaultdict(lambda: defaultdict(set))

        # Run through all the links and check for annotatable conditions.
        for href in links.keys():

            # Is this an external URL?
            is_external = False
            for prefix in self.EXTERNAL_PREFIXES:
                if href.startswith(prefix):
                    is_external = True
                    break
            if is_external:
                links[href]['classes'].append('external')
                continue

            # TODO: Should this also check for old-school mindtouch URLs? Or
            # should we encourage editors to convert to new-style URLs to take
            # advantage of link annotation? (I'd say the latter)

            # Is this a kuma doc URL?
            if '/docs/' in href:

                # Check if this is a special docs path that's exempt from "new"
                skip = False
                for path in DOC_SPECIAL_PATHS:
                    if '/docs/%s' % path in href:
                        skip = True
                if skip:
                    continue

                href_locale, href_path = href.split(u'/docs/', 1)
                if href_locale.startswith(u'/'):
                    href_locale = href_locale[1:]

                if '#' in href_path:
                    # If present, discard the hash anchor
                    href_path, _, _ = href_path.partition('#')

                # Handle any URL-encoded UTF-8 characters in the path
                href_path = href_path.encode('utf-8', 'ignore')
                href_path = urllib.unquote(href_path)
                href_path = href_path.decode('utf-8', 'ignore')

                # Try to sort out the locale and slug through some of our
                # redirection logic.
                locale, slug, needs_redirect = (
                    Document.locale_and_slug_from_path(
                        href_path, path_locale=href_locale))

                # Gather up this link for existence check
                needs_existence_check[locale.lower()][slug.lower()].add(href)

        # Perform existence checks for all the links, using one DB query per
        # locale for all the candidate slugs.
        for locale, slug_hrefs in needs_existence_check.items():

            existing_slugs = (Document.objects.filter(
                locale=locale,
                slug__in=slug_hrefs.keys()).values_list('slug', flat=True))

            # Remove the slugs that pass existence check.
            for slug in existing_slugs:
                lslug = slug.lower()
                if lslug in slug_hrefs:
                    del slug_hrefs[lslug]

            # Mark all the links whose slugs did not come back from the DB
            # query as "new"
            for slug, hrefs in slug_hrefs.items():
                for href in hrefs:
                    links[href]['classes'].append('new')

        # Pass #2: Filter the content, annotating links
        for token in buffer:
            if ('StartTag' == token['type'] and 'a' == token['name']):
                attrs = dict(token['data'])

                if 'href' in attrs:

                    href = attrs['href']
                    if href.startswith(self.base_url):
                        # Squash site-absolute URLs to site-relative paths.
                        href = '/%s' % href[len(self.base_url):]

                    if href in links:
                        # Update class names on this link element.
                        if 'class' in attrs:
                            classes = set(attrs['class'].split(u' '))
                        else:
                            classes = set()
                        classes.update(links[href]['classes'])
                        if classes:
                            attrs['class'] = u' '.join(classes)

                token['data'] = attrs.items()

            yield token
示例#48
0
def reindex_kb():
    """Reindex wiki_document."""
    index_task.delay(Document, Document.get_indexable())
示例#49
0
class UntranslatedReadout(Readout):
    title = _lazy(u'Untranslated')
    description = _lazy(
        u'This indicates there are no approved translations of these articles. '
        'Some of the articles may have proposed translations waiting to be '
        'reviewed and will appear in the Unreviewed Changes section as well.')
    short_title = _lazy(u'Untranslated')
    details_link_text = _lazy(u'All untranslated articles...')
    slug = 'untranslated'
    column4_label = _lazy(u'Updated')

    def _query_and_params(self, max):
        # Filter by product if specified.
        if self.product:
            extra_joins = PRODUCT_FILTER
            params = (self.locale, LAST_30_DAYS, self.product.id,
                      settings.WIKI_DEFAULT_LANGUAGE)
        else:
            extra_joins = ''
            params = (self.locale, LAST_30_DAYS,
                      settings.WIKI_DEFAULT_LANGUAGE)

        # Incidentally, we tried this both as a left join and as a search
        # against an inner query returning translated docs, and the left join
        # yielded a faster-looking plan (on a production corpus).
        #
        # Find non-archived, localizable documents in categories 10,
        # 20 and 60 having at least one ready- for-localization
        # revision. Of those, show the ones that have no translation.
        query = (
            'SELECT engdoc.slug, engdoc.title, '
            'wiki_revision.reviewed, dashboards_wikidocumentvisits.visits '
            'FROM wiki_document engdoc '
            'INNER JOIN wiki_revision ON '
            'engdoc.latest_localizable_revision_id=wiki_revision.id '
            'LEFT JOIN wiki_document translated ON '
            'engdoc.id=translated.parent_id AND translated.locale=%s '
            'LEFT JOIN dashboards_wikidocumentvisits ON '
            'engdoc.id=dashboards_wikidocumentvisits.document_id AND '
            'dashboards_wikidocumentvisits.period=%s ' + extra_joins + 'WHERE '
            '(translated.id IS NULL OR translated.current_revision_id IS NULL) '
            'AND engdoc.is_localizable AND '
            'engdoc.category in (10, 20, 60) AND '
            'engdoc.locale=%s AND NOT engdoc.is_archived '
            'AND wiki_revision.content NOT LIKE "REDIRECT%%" ' +
            self._order_clause() + self._limit_clause(max))

        return query, params

    def _order_clause(self):
        return ('ORDER BY wiki_revision.reviewed DESC, engdoc.title ASC'
                if self.mode == MOST_RECENT else
                'ORDER BY dashboards_wikidocumentvisits.visits DESC, '
                'engdoc.title ASC')

    def _format_row(self, (slug, title, reviewed, visits)):
        # Run the data through the model to (potentially) format it and
        # take advantage of SPOTs (like for get_absolute_url()):
        d = Document(slug=slug,
                     title=title,
                     locale=settings.WIKI_DEFAULT_LANGUAGE)
        return dict(title=d.title,
                    url=d.get_absolute_url(),
                    visits=visits,
                    updated=reviewed)
示例#50
0
文件: views.py 项目: klrmn/kitsune
def search(request, template=None):
    """ES-specific search view"""

    if (waffle.flag_is_active(request, 'esunified') or
        request.GET.get('esunified')):
        return search_with_es_unified(request, template)

    start = time.time()

    # JSON-specific variables
    is_json = (request.GET.get('format') == 'json')
    callback = request.GET.get('callback', '').strip()
    mimetype = 'application/x-javascript' if callback else 'application/json'

    # Search "Expires" header format
    expires_fmt = '%A, %d %B %Y %H:%M:%S GMT'

    # Check callback is valid
    if is_json and callback and not jsonp_is_valid(callback):
        return HttpResponse(
            json.dumps({'error': _('Invalid callback function.')}),
            mimetype=mimetype, status=400)

    language = locale_or_default(request.GET.get('language', request.locale))
    r = request.GET.copy()
    a = request.GET.get('a', '0')

    # Search default values
    try:
        category = (map(int, r.getlist('category')) or
                    settings.SEARCH_DEFAULT_CATEGORIES)
    except ValueError:
        category = settings.SEARCH_DEFAULT_CATEGORIES
    r.setlist('category', category)

    # Basic form
    if a == '0':
        r['w'] = r.get('w', constants.WHERE_BASIC)
    # Advanced form
    if a == '2':
        r['language'] = language
        r['a'] = '1'

    # TODO: Rewrite so SearchForm is unbound initially and we can use
    # `initial` on the form fields.
    if 'include_archived' not in r:
        r['include_archived'] = False

    search_form = SearchForm(r)

    if not search_form.is_valid() or a == '2':
        if is_json:
            return HttpResponse(
                json.dumps({'error': _('Invalid search data.')}),
                mimetype=mimetype,
                status=400)

        t = template if request.MOBILE else 'search/form.html'
        search_ = jingo.render(request, t,
                               {'advanced': a, 'request': request,
                                'search_form': search_form})
        search_['Cache-Control'] = 'max-age=%s' % \
                                   (settings.SEARCH_CACHE_PERIOD * 60)
        search_['Expires'] = (datetime.utcnow() +
                              timedelta(
                                minutes=settings.SEARCH_CACHE_PERIOD)) \
                              .strftime(expires_fmt)
        return search_

    cleaned = search_form.cleaned_data

    page = max(smart_int(request.GET.get('page')), 1)
    offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE

    lang = language.lower()
    if settings.LANGUAGES.get(lang):
        lang_name = settings.LANGUAGES[lang]
    else:
        lang_name = ''

    wiki_s = Document.search()
    question_s = Question.search()
    discussion_s = Thread.search()

    # wiki filters
    # Category filter
    if cleaned['category']:
        wiki_s = wiki_s.filter(document_category__in=cleaned['category'])

    # Locale filter
    wiki_s = wiki_s.filter(document_locale=language)

    # Product filter
    products = cleaned['product']
    for p in products:
        wiki_s = wiki_s.filter(document_tag=p)

    # Tags filter
    tags = [t.strip() for t in cleaned['tags'].split()]
    for t in tags:
        wiki_s = wiki_s.filter(document_tag=t)

    # Archived bit
    if a == '0' and not cleaned['include_archived']:
        # Default to NO for basic search:
        cleaned['include_archived'] = False
    if not cleaned['include_archived']:
        wiki_s = wiki_s.filter(document_is_archived=False)
    # End of wiki filters

    # Support questions specific filters
    if cleaned['w'] & constants.WHERE_SUPPORT:

        # Solved is set by default if using basic search
        if a == '0' and not cleaned['has_helpful']:
            cleaned['has_helpful'] = constants.TERNARY_YES

        # These filters are ternary, they can be either YES, NO, or OFF
        ternary_filters = ('is_locked', 'is_solved', 'has_answers',
                           'has_helpful')
        d = dict(('question_%s' % filter_name,
                  _ternary_filter(cleaned[filter_name]))
                 for filter_name in ternary_filters if cleaned[filter_name])
        if d:
            question_s = question_s.filter(**d)

        if cleaned['asked_by']:
            question_s = question_s.filter(
                question_creator=cleaned['asked_by'])

        if cleaned['answered_by']:
            question_s = question_s.filter(
                question_answer_creator=cleaned['answered_by'])

        q_tags = [t.strip() for t in cleaned['q_tags'].split(',')]
        for t in q_tags:
            if t:
                question_s = question_s.filter(question_tag=t)

    # Discussion forum specific filters
    if cleaned['w'] & constants.WHERE_DISCUSSION:
        if cleaned['author']:
            discussion_s = discussion_s.filter(
                post_author_ord=cleaned['author'])

        if cleaned['thread_type']:
            if constants.DISCUSSION_STICKY in cleaned['thread_type']:
                discussion_s = discussion_s.filter(post_is_sticky=1)

            if constants.DISCUSSION_LOCKED in cleaned['thread_type']:
                discussion_s = discussion_s.filter(post_is_locked=1)

        if cleaned['forum']:
            discussion_s = discussion_s.filter(
                post_forum_id__in=cleaned['forum'])

    # Filters common to support and discussion forums
    # Created filter
    unix_now = int(time.time())
    interval_filters = (
        ('created', cleaned['created'], cleaned['created_date']),
        ('updated', cleaned['updated'], cleaned['updated_date']))
    for filter_name, filter_option, filter_date in interval_filters:
        if filter_option == constants.INTERVAL_BEFORE:
            before = {filter_name + '__gte': 0,
                      filter_name + '__lte': max(filter_date, 0)}

            discussion_s = discussion_s.filter(**before)
            question_s = question_s.filter(**before)
        elif filter_option == constants.INTERVAL_AFTER:
            after = {filter_name + '__gte': min(filter_date, unix_now),
                     filter_name + '__lte': unix_now}

            discussion_s = discussion_s.filter(**after)
            question_s = question_s.filter(**after)

    # Note: num_voted (with a d) is a different field than num_votes
    # (with an s). The former is a dropdown and the latter is an
    # integer value.
    if cleaned['num_voted'] == constants.INTERVAL_BEFORE:
        question_s = question_s.filter(
            question_num_votes__lte=max(cleaned['num_votes'], 0))
    elif cleaned['num_voted'] == constants.INTERVAL_AFTER:
        question_s = question_s.filter(
            question_num_votes__gte=cleaned['num_votes'])

    # Done with all the filtery stuff--time  to generate results

    documents = ComposedList()
    sortby = smart_int(request.GET.get('sortby'))
    try:
        max_results = settings.SEARCH_MAX_RESULTS
        cleaned_q = cleaned['q']

        if cleaned['w'] & constants.WHERE_WIKI:
            if cleaned_q:
                wiki_s = wiki_s.query(cleaned_q)

            # For a front-page non-advanced search, we want to cap the kb
            # at 10 results.
            if a == '0':
                wiki_max_results = 10
            else:
                wiki_max_results = max_results
            documents.set_count(('wiki', wiki_s),
                                min(wiki_s.count(), wiki_max_results))

        if cleaned['w'] & constants.WHERE_SUPPORT:
            # Sort results by
            try:
                question_s = question_s.order_by(
                    *constants.SORT_QUESTIONS[sortby])
            except IndexError:
                pass

            question_s = question_s.highlight(
                'question_title', 'question_content',
                'question_answer_content',
                before_match='<b>',
                after_match='</b>',
                limit=settings.SEARCH_SUMMARY_LENGTH)

            if cleaned_q:
                question_s = question_s.query(cleaned_q)
            documents.set_count(('question', question_s),
                                min(question_s.count(), max_results))

        if cleaned['w'] & constants.WHERE_DISCUSSION:
            discussion_s = discussion_s.highlight(
                'discussion_content',
                before_match='<b>',
                after_match='</b>',
                limit=settings.SEARCH_SUMMARY_LENGTH)

            if cleaned_q:
                discussion_s = discussion_s.query(cleaned_q)
            documents.set_count(('forum', discussion_s),
                                min(discussion_s.count(), max_results))

        results_per_page = settings.SEARCH_RESULTS_PER_PAGE
        pages = paginate(request, documents, results_per_page)
        num_results = len(documents)

        # Get the documents we want to show and add them to
        # docs_for_page.
        documents = documents[offset:offset + results_per_page]
        docs_for_page = []
        for (kind, search_s), bounds in documents:
            search_s = search_s.values_dict()[bounds[0]:bounds[1]]
            docs_for_page += [(kind, doc) for doc in search_s]

        results = []
        for i, docinfo in enumerate(docs_for_page):
            rank = i + offset
            # Type here is something like 'wiki', ... while doc here
            # is an ES result document.
            type_, doc = docinfo

            if type_ == 'wiki':
                summary = doc['document_summary']
                result = {
                    'url': doc['url'],
                    'title': doc['document_title'],
                    'type': 'document',
                    'object': ObjectDict(doc)}
            elif type_ == 'question':
                summary = _build_es_excerpt(doc)
                result = {
                    'url': doc['url'],
                    'title': doc['question_title'],
                    'type': 'question',
                    'object': ObjectDict(doc),
                    'is_solved': doc['question_is_solved'],
                    'num_answers': doc['question_num_answers'],
                    'num_votes': doc['question_num_votes'],
                    'num_votes_past_week': doc['question_num_votes_past_week']}
            else:
                summary = _build_es_excerpt(doc)
                result = {
                    'url': doc['url'],
                    'title': doc['post_title'],
                    'type': 'thread',
                    'object': ObjectDict(doc)}
            result['search_summary'] = summary
            result['rank'] = rank
            result['score'] = doc._score
            results.append(result)

    except (ESTimeoutError, ESMaxRetryError, ESException), exc:
        # Handle timeout and all those other transient errors with a
        # "Search Unavailable" rather than a Django error page.
        if is_json:
            return HttpResponse(json.dumps({'error':
                                             _('Search Unavailable')}),
                                mimetype=mimetype, status=503)

        if isinstance(exc, ESTimeoutError):
            statsd.incr('search.es.timeouterror')
        elif isinstance(exc, ESMaxRetryError):
            statsd.incr('search.es.maxretryerror')
        elif isinstance(exc, ESException):
            statsd.incr('search.es.elasticsearchexception')

        t = 'search/mobile/down.html' if request.MOBILE else 'search/down.html'
        return jingo.render(request, t, {'q': cleaned['q']}, status=503)
示例#51
0
文件: content.py 项目: KKDeep/kuma
    def __iter__(self):
        from wiki.models import Document

        input = html5lib_Filter.__iter__(self)

        # Pass #1: Gather all the link URLs and prepare annotations
        links = dict()
        buffer = []
        for token in input:
            buffer.append(token)
            if "StartTag" == token["type"] and "a" == token["name"]:
                attrs = dict(token["data"])
                if not "href" in attrs:
                    continue

                href = attrs["href"]
                if href.startswith(self.base_url):
                    # Squash site-absolute URLs to site-relative paths.
                    href = "/%s" % href[len(self.base_url) :]

                # Prepare annotations record for this path.
                links[href] = dict(classes=[])

        # Run through all the links and check for annotatable conditions.
        for href in links.keys():

            # Is this an external URL?
            is_external = False
            for prefix in self.EXTERNAL_PREFIXES:
                if href.startswith(prefix):
                    is_external = True
                    break
            if is_external:
                links[href]["classes"].append("external")
                continue

            # TODO: Should this also check for old-school mindtouch URLs? Or
            # should we encourage editors to convert to new-style URLs to take
            # advantage of link annotation? (I'd say the latter)

            # Is this a kuma doc URL?
            if "/docs/" in href:

                # Check if this is a special docs path that's exempt from "new"
                skip = False
                for path in DOC_SPECIAL_PATHS:
                    if "/docs/%s" % path in href:
                        skip = True
                if skip:
                    continue

                href_locale, href_path = href.split(u"/docs/", 1)
                if href_locale.startswith(u"/"):
                    href_locale = href_locale[1:]

                if "#" in href_path:
                    # If present, discard the hash anchor
                    href_path, _, _ = href_path.partition("#")

                # Handle any URL-encoded UTF-8 characters in the path
                href_path = href_path.encode("utf-8", "ignore")
                href_path = urllib.unquote(href_path)
                href_path = href_path.decode("utf-8", "ignore")

                # Try to sort out the locale and slug through some of our
                # redirection logic.
                locale, slug, needs_redirect = Document.locale_and_slug_from_path(href_path, path_locale=href_locale)

                # Does this locale and slug correspond to an existing document?
                # If not, mark it as a "new" link.
                #
                # TODO: Should these DB queries be batched up into one big
                # query? A page with hundreds of links will fire off hundreds
                # of queries
                ct = Document.objects.filter(locale=locale, slug=slug).count()
                if ct == 0:
                    links[href]["classes"].append("new")

        # Pass #2: Filter the content, annotating links
        for token in buffer:
            if "StartTag" == token["type"] and "a" == token["name"]:
                attrs = dict(token["data"])

                if "href" in attrs:

                    href = attrs["href"]
                    if href.startswith(self.base_url):
                        # Squash site-absolute URLs to site-relative paths.
                        href = "/%s" % href[len(self.base_url) :]

                    if href in links:
                        # Update class names on this link element.
                        if "class" in attrs:
                            classes = set(attrs["class"].split(u" "))
                        else:
                            classes = set()
                        classes.update(links[href]["classes"])
                        if classes:
                            attrs["class"] = u" ".join(classes)

                token["data"] = attrs.items()

            yield token