Python WikiClient.WikiClient示例，search.clients.WikiClient.WikiClient Python示例

示例#1

0

显示文件

 def test_utf8_excerpt(self):
     """Characters should stay in UTF-8."""
     wc = WikiClient()
     page = Document.objects.get(pk=4)
     q = u'fa\xe7on'
     excerpt = wc.excerpt(page.html, q)
     assert q in excerpt, u'%s not in %s' % (q, excerpt)

示例#2

0

显示文件

文件： views.py 项目： treevivi/kitsune

def suggestions(request):
    """A simple search view that returns OpenSearch suggestions."""

    mimetype = 'application/x-suggestions+json'

    term = request.GET.get('q')

    if not term:
        return HttpResponseBadRequest(mimetype=mimetype)

    wc = WikiClient()
    qc = QuestionsClient()
    site = Site.objects.get_current()
    locale = sphinx_locale(locale_or_default(request.locale))

    results = []
    filters_w = [{'filter': 'locale', 'value': (locale, )}]
    filters_q = [{'filter': 'has_helpful', 'value': (True, )}]

    for client, filter, cls in [(wc, filters_w, Document),
                                (qc, filters_q, Question)]:
        for result in client.query(term, filter, limit=5):
            try:
                result = cls.objects.get(pk=result['id'])
            except cls.DoesNotExist:
                continue
            results.append(result)

    urlize = lambda obj: u'https://%s%s' % (site, obj.get_absolute_url())
    data = [term, [r.title for r in results], [], [urlize(r) for r in results]]
    return HttpResponse(json.dumps(data), mimetype=mimetype)

示例#3

0

显示文件

def test_sphinx_down():
    """
    Tests that the client times out when Sphinx is down.
    """
    wc = WikiClient()
    wc.sphinx.SetServer('localhost', 65535)
    assert_raises(SearchError, wc.query, 'test')

示例#4

0

显示文件

    def test_no_syntax_error(self):
        """Test that special chars cannot cause a syntax error."""
        wc = WikiClient()
        results = wc.query('video^$')
        eq_(1, len(results))

        results = wc.query('video^^^$$$^')
        eq_(1, len(results))

示例#5

0

显示文件

 def test_category_exclude_nothing(self):
     """Excluding no categories should return results."""
     clients = ((WikiClient(), 'category'),
                (QuestionsClient(), 'replies'),
                (DiscussionClient(), 'author_ord'))
     for client, filter in clients:
         results = client.query('', ({'filter': filter, 'exclude': True,
                                      'value': []},))
         self.assertNotEquals(0, len(results))

示例#6

0

显示文件

 def test_unicode_excerpt(self):
     """Unicode characters in the excerpt should not be a problem."""
     wc = WikiClient()
     page = Document.objects.get(pk=2)
     try:
         excerpt = wc.excerpt(page.html, u'\u3068')
         render('{{ c }}', {'c': excerpt})
     except UnicodeDecodeError:
         self.fail('Raised UnicodeDecodeError.')

示例#7

0

显示文件

 def test_range_filter(self):
     """Test filtering on a range."""
     wc = WikiClient()
     filter_ = ({'filter': 'updated',
                 'max': 1285765791,
                 'min': 1284664176,
                 'range': True},)
     results = wc.query('', filter_)
     eq_(2, len(results))

示例#8

0

显示文件

 def test_clean_excerpt(self):
     """SearchClient.excerpt() should not allow disallowed HTML through."""
     wc = WikiClient()  # Index strips HTML
     qc = QuestionsClient()  # Index does not strip HTML
     input = 'test <div>the start of something</div>'
     output_strip = '<b>test</b>  the start of something'
     output_nostrip = ('<b>test</b> &lt;div&gt;the start of '
                       'something&lt;/div&gt;')
     eq_(output_strip, wc.excerpt(input, 'test'))
     eq_(output_nostrip, qc.excerpt(input, 'test'))

示例#9

0

显示文件

    def test_translations_inherit_os_values(self):
        wc = WikiClient()
        filters = [{'filter': 'locale', 'value': (crc32('fr'),)},
                   {'filter': 'os', 'value': (1,)}]
        results = wc.query('', filters)
        eq_(1, len(results))
        eq_(4, results[0]['id'])

        filters[1]['value'] = (4,)
        results = wc.query('', filters)
        eq_(0, len(results))

示例#10

0

显示文件

 def test_ngram_chars(self):
     """Ideographs are handled correctly."""
     wc = WikiClient()
     results = wc.query(u'\u30c1')
     eq_(1, len(results))
     eq_(2, results[0]['id'])

示例#11

0

显示文件

 def test_wiki_index_strip_html(self):
     """HTML should be stripped, not indexed."""
     wc = WikiClient()
     results = wc.query('strong')
     eq_(0, len(results))

示例#12

0

显示文件

 def test_wiki_index_content(self):
     """Obviously the content should be indexed."""
     wc = WikiClient()
     results = wc.query('video')
     eq_(1, len(results))
     eq_(1, results[0]['id'])

示例#13

0

显示文件

 def test_wiki_index_summary(self):
     """The summary field of a revision is indexed."""
     wc = WikiClient()
     results = wc.query('whatever')
     eq_(1, len(results))
     eq_(3, results[0]['id'])

示例#14

0

显示文件

        assert not response.content

    def test_archived(self):
        """Ensure archived articles show only when requested."""
        qs = {'q': 'impalas', 'a': 1, 'w': 1, 'format': 'json',
              'include_archived': 'on'}
        response = self.client.get(reverse('search'), qs)
        results = json.loads(response.content)['results']
        eq_(1, len(results))
        assert results[0]['url'].endswith('archived-article')

        qs = {'q': 'impalas', 'a': 0, 'w': 1, 'format': 'json'}
        response = self.client.get(reverse('search'), qs)
        results = json.loads(response.content)['results']
        eq_([], results)


query = lambda *args, **kwargs: WikiClient().query(*args, **kwargs)


@mock.patch('search.clients.WikiClient')
def test_excerpt_timeout(sphinx_mock):
    def sphinx_error(cls):
        raise cls

    sphinx_mock.query.side_effect = lambda *a: sphinx_error(socket.timeout)
    assert_raises(SearchError, query, 'xxx')

    sphinx_mock.query.side_effect = lambda *a: sphinx_error(Exception)
    assert_raises(SearchError, query, 'xxx')

示例#15

0

显示文件

 def test_category(self):
     wc = WikiClient()
     results = wc.query('', ({'filter': 'category', 'value': [10]},))
     eq_(5, len(results))
     results = wc.query('', ({'filter': 'category', 'value': [30]},))
     eq_(1, len(results))

示例#16

0

显示文件

 def test_indexer(self):
     wc = WikiClient()
     results = wc.query('audio')
     eq_(2, len(results))

示例#17

0

显示文件

 def test_no_redirects(self):
     """Redirect articles should never appear in search results."""
     wc = WikiClient()
     results = wc.query('ghosts')
     eq_(1, len(results))

示例#18

0

显示文件

    def test_no_filter(self):
        """Test searching with no filters."""
        wc = WikiClient()

        results = wc.query('')
        eq_(6, len(results))

示例#19

0

显示文件

 def test_empty_content_excerpt(self):
     """SearchClient.excerpt() returns empty string for empty content."""
     wc = WikiClient()
     eq_('', wc.excerpt('', 'test'))

示例#20

0

显示文件

 def test_none_content_excerpt(self):
     """SearchClient.excerpt() returns empty string for None type."""
     wc = WikiClient()
     eq_('', wc.excerpt(None, 'test'))

示例#21

0

显示文件

文件： views.py 项目： treevivi/kitsune

def search(request, template=None):
    """Performs search or displays the search form."""

    # JSON-specific variables
    is_json = (request.GET.get('format') == 'json')
    callback = request.GET.get('callback', '').strip()
    mimetype = 'application/x-javascript' if callback else 'application/json'

    # Search "Expires" header format
    expires_fmt = '%A, %d %B %Y %H:%M:%S GMT'

    # Check callback is valid
    if is_json and callback and not jsonp_is_valid(callback):
        return HttpResponse(json.dumps(
            {'error': _('Invalid callback function.')}),
                            mimetype=mimetype,
                            status=400)

    language = locale_or_default(request.GET.get('language', request.locale))
    r = request.GET.copy()
    a = request.GET.get('a', '0')

    # Search default values
    try:
        category = map(int, r.getlist('category')) or \
                   settings.SEARCH_DEFAULT_CATEGORIES
    except ValueError:
        category = settings.SEARCH_DEFAULT_CATEGORIES
    r.setlist('category', [x for x in category if x > 0])
    exclude_category = [abs(x) for x in category if x < 0]

    try:
        fx = map(int, r.getlist('fx')) or [v.id for v in FIREFOX_VERSIONS]
    except ValueError:
        fx = [v.id for v in FIREFOX_VERSIONS]
    r.setlist('fx', fx)

    try:
        os = map(int, r.getlist('os')) or [o.id for o in OPERATING_SYSTEMS]
    except ValueError:
        os = [o.id for o in OPERATING_SYSTEMS]
    r.setlist('os', os)

    # Basic form
    if a == '0':
        r['w'] = r.get('w', constants.WHERE_BASIC)
    # Advanced form
    if a == '2':
        r['language'] = language
        r['a'] = '1'

    search_form = SearchForm(r)

    if not search_form.is_valid() or a == '2':
        if is_json:
            return HttpResponse(json.dumps(
                {'error': _('Invalid search data.')}),
                                mimetype=mimetype,
                                status=400)

        t = template if request.MOBILE else 'search/form.html'
        search_ = jingo.render(request, t, {
            'advanced': a,
            'request': request,
            'search_form': search_form
        })
        search_['Cache-Control'] = 'max-age=%s' % \
                                   (settings.SEARCH_CACHE_PERIOD * 60)
        search_['Expires'] = (datetime.utcnow() +
                              timedelta(
                                minutes=settings.SEARCH_CACHE_PERIOD)) \
                              .strftime(expires_fmt)
        return search_

    cleaned = search_form.cleaned_data
    search_locale = (sphinx_locale(language), )

    page = max(smart_int(request.GET.get('page')), 1)
    offset = (page - 1) * settings.SEARCH_RESULTS_PER_PAGE

    # get language name for display in template
    lang = language.lower()
    if settings.LANGUAGES.get(lang):
        lang_name = settings.LANGUAGES[lang]
    else:
        lang_name = ''

    documents = []
    filters_w = []
    filters_q = []
    filters_f = []

    # wiki filters
    # Version and OS filters
    if cleaned['fx']:
        filters_w.append({
            'filter': 'fx',
            'value': cleaned['fx'],
        })

    if cleaned['os']:
        filters_w.append({
            'filter': 'os',
            'value': cleaned['os'],
        })

    # Category filter
    if cleaned['category']:
        filters_w.append({
            'filter': 'category',
            'value': cleaned['category'],
        })

    if exclude_category:
        filters_w.append({
            'filter': 'category',
            'value': exclude_category,
            'exclude': True,
        })

    # Locale filter
    filters_w.append({
        'filter': 'locale',
        'value': search_locale,
    })

    # Tags filter
    tags = [crc32(t.strip()) for t in cleaned['tags'].split()]
    if tags:
        for t in tags:
            filters_w.append({
                'filter': 'tag',
                'value': (t, ),
            })
    # End of wiki filters

    # Support questions specific filters
    if cleaned['w'] & constants.WHERE_SUPPORT:

        # Solved is set by default if using basic search
        if a == '0' and not cleaned['has_helpful']:
            cleaned['has_helpful'] = constants.TERNARY_YES

        # These filters are ternary, they can be either YES, NO, or OFF
        toggle_filters = ('is_locked', 'is_solved', 'has_answers',
                          'has_helpful')
        for filter_name in toggle_filters:
            if cleaned[filter_name] == constants.TERNARY_YES:
                filters_q.append({
                    'filter': filter_name,
                    'value': (True, ),
                })
            if cleaned[filter_name] == constants.TERNARY_NO:
                filters_q.append({
                    'filter': filter_name,
                    'value': (False, ),
                })

        if cleaned['asked_by']:
            filters_q.append({
                'filter': 'question_creator',
                'value': (crc32(cleaned['asked_by']), ),
            })

        if cleaned['answered_by']:
            filters_q.append({
                'filter': 'answer_creator',
                'value': (crc32(cleaned['answered_by']), ),
            })

        q_tags = [crc32(t.strip()) for t in cleaned['q_tags'].split()]
        if q_tags:
            for t in q_tags:
                filters_q.append({
                    'filter': 'tag',
                    'value': (t, ),
                })

    # Discussion forum specific filters
    if cleaned['w'] & constants.WHERE_DISCUSSION:
        if cleaned['author']:
            filters_f.append({
                'filter': 'author_ord',
                'value': (crc32(cleaned['author']), ),
            })

        if cleaned['thread_type']:
            if constants.DISCUSSION_STICKY in cleaned['thread_type']:
                filters_f.append({
                    'filter': 'is_sticky',
                    'value': (1, ),
                })

            if constants.DISCUSSION_LOCKED in cleaned['thread_type']:
                filters_f.append({
                    'filter': 'is_locked',
                    'value': (1, ),
                })

        if cleaned['forum']:
            filters_f.append({
                'filter': 'forum_id',
                'value': cleaned['forum'],
            })

    # Filters common to support and discussion forums
    # Created filter
    unix_now = int(time.time())
    interval_filters = (('created', cleaned['created'],
                         cleaned['created_date']),
                        ('updated', cleaned['updated'],
                         cleaned['updated_date']), ('question_votes',
                                                    cleaned['num_voted'],
                                                    cleaned['num_votes']))
    for filter_name, filter_option, filter_date in interval_filters:
        if filter_option == constants.INTERVAL_BEFORE:
            before = {
                'range': True,
                'filter': filter_name,
                'min': 0,
                'max': max(filter_date, 0),
            }
            if filter_name != 'question_votes':
                filters_f.append(before)
            filters_q.append(before)
        elif filter_option == constants.INTERVAL_AFTER:
            after = {
                'range': True,
                'filter': filter_name,
                'min': min(filter_date, unix_now),
                'max': unix_now,
            }
            if filter_name != 'question_votes':
                filters_f.append(after)
            filters_q.append(after)

    sortby = smart_int(request.GET.get('sortby'))
    try:
        if cleaned['w'] & constants.WHERE_WIKI:
            wc = WikiClient()  # Wiki SearchClient instance
            # Execute the query and append to documents
            documents += wc.query(cleaned['q'], filters_w)

        if cleaned['w'] & constants.WHERE_SUPPORT:
            qc = QuestionsClient()  # Support question SearchClient instance

            # Sort results by
            try:
                qc.set_sort_mode(constants.SORT_QUESTIONS[sortby][0],
                                 constants.SORT_QUESTIONS[sortby][1])
            except IndexError:
                pass

            documents += qc.query(cleaned['q'], filters_q)

        if cleaned['w'] & constants.WHERE_DISCUSSION:
            dc = DiscussionClient()  # Discussion forums SearchClient instance

            # Sort results by
            try:
                dc.groupsort = constants.GROUPSORT[sortby]
            except IndexError:
                pass

            documents += dc.query(cleaned['q'], filters_f)

    except SearchError:
        if is_json:
            return HttpResponse(json.dumps({'error': _('Search Unavailable')}),
                                mimetype=mimetype,
                                status=503)

        t = 'search/mobile/down.html' if request.MOBILE else 'search/down.html'
        return jingo.render(request, t, {'q': cleaned['q']}, status=503)

    pages = paginate(request, documents, settings.SEARCH_RESULTS_PER_PAGE)

    results = []
    for i in range(offset, offset + settings.SEARCH_RESULTS_PER_PAGE):
        try:
            if documents[i]['attrs'].get('category', False) != False:
                wiki_page = Document.objects.get(pk=documents[i]['id'])
                summary = wiki_page.current_revision.summary

                result = {
                    'search_summary': summary,
                    'url': wiki_page.get_absolute_url(),
                    'title': wiki_page.title,
                    'type': 'document',
                }
                results.append(result)
            elif documents[i]['attrs'].get('question_creator', False) != False:
                question = Question.objects.get(
                    pk=documents[i]['attrs']['question_id'])

                excerpt = qc.excerpt(question.content, cleaned['q'])
                summary = jinja2.Markup(excerpt)

                result = {
                    'search_summary': summary,
                    'url': question.get_absolute_url(),
                    'title': question.title,
                    'type': 'question',
                }
                results.append(result)
            else:
                thread = Thread.objects.get(
                    pk=documents[i]['attrs']['thread_id'])
                post = Post.objects.get(pk=documents[i]['id'])

                excerpt = dc.excerpt(post.content, cleaned['q'])
                summary = jinja2.Markup(excerpt)

                result = {
                    'search_summary': summary,
                    'url': thread.get_absolute_url(),
                    'title': thread.title,
                    'type': 'thread',
                }
                results.append(result)
        except IndexError:
            break
        except ObjectDoesNotExist:
            continue

    items = [(k, v) for k in search_form.fields for v in r.getlist(k)
             if v and k != 'a']
    items.append(('a', '2'))

    refine_query = u'?%s' % urlencode(items)

    if is_json:
        data = {}
        data['results'] = results
        data['total'] = len(results)
        data['query'] = cleaned['q']
        if not results:
            data['message'] = _('No pages matched the search criteria')
        json_data = json.dumps(data)
        if callback:
            json_data = callback + '(' + json_data + ');'

        return HttpResponse(json_data, mimetype=mimetype)

    results_ = jingo.render(
        request, template, {
            'num_results': len(documents),
            'results': results,
            'q': cleaned['q'],
            'pages': pages,
            'w': cleaned['w'],
            'refine_query': refine_query,
            'search_form': search_form,
            'lang_name': lang_name,
        })
    results_['Cache-Control'] = 'max-age=%s' % \
                                (settings.SEARCH_CACHE_PERIOD * 60)
    results_['Expires'] = (datetime.utcnow() +
                           timedelta(minutes=settings.SEARCH_CACHE_PERIOD)) \
                           .strftime(expires_fmt)
    return results_

示例#22

0

显示文件

 def test_wiki_index_keywords(self):
     """The keywords field of a revision is indexed."""
     wc = WikiClient()
     results = wc.query('foobar')
     eq_(1, len(results))
     eq_(3, results[0]['id'])

示例#23

0

显示文件

文件： views.py 项目： tantek/kuma

def _search_suggestions(query, locale):
    """Return an iterable of the most relevant wiki pages and questions.

    query -- full text to search on
    locale -- locale to limit to

    Items returned are dicts:
        { 'url': URL where the article can be viewed,
          'title': Title of the article,
          'excerpt_html': Excerpt of the article with search terms hilighted,
                          formatted in HTML }

    Weights wiki pages infinitely higher than questions at the moment.

    TODO: ZOMFG this needs to be refactored and the search app should
          provide an internal API. Seriously.

    """
    def prepare(result, model, attr, searcher, result_to_id):
        """Turn a search result from a Sphinx client into a dict for templates.

        Return {} if an object corresponding to the result cannot be found.

        """
        try:
            obj = model.objects.get(pk=result_to_id(result))
        except ObjectDoesNotExist:
            return {}
        return {'url': obj.get_absolute_url(),
                'title': obj.title,
                'excerpt_html': searcher.excerpt(getattr(obj, attr), query)}

    max_suggestions = settings.QUESTIONS_MAX_SUGGESTIONS
    query_limit = max_suggestions + settings.QUESTIONS_SUGGESTION_SLOP

    # Search wiki pages:
    wiki_searcher = WikiClient()
    filters = [{'filter': 'locale',
                'value': (sphinx_locale(locale),)},
               {'filter': 'category',
                'value': [x for x in settings.SEARCH_DEFAULT_CATEGORIES
                          if x >= 0]},
               {'filter': 'category',
                'exclude': True,
                'value': [-x for x in settings.SEARCH_DEFAULT_CATEGORIES
                          if x < 0]}]
    raw_results = wiki_searcher.query(query, filters=filters,
                                      limit=query_limit)
    # Lazily build excerpts from results. Stop when we have enough:
    results = islice((p for p in
                       (prepare(r, Document, 'html', wiki_searcher,
                                lambda x: x['id'])
                        for r in raw_results) if p),
                     max_suggestions)
    results = list(results)

    # If we didn't find enough wiki pages to fill the page, pad it out with
    # other questions:
    if len(results) < max_suggestions:
        question_searcher = QuestionsClient()
        # questions app is en-US only.
        raw_results = question_searcher.query(query,
                                              limit=query_limit - len(results))
        results.extend(islice((p for p in
                               (prepare(r, Question, 'content',
                                        question_searcher,
                                        lambda x: x['attrs']['question_id'])
                                for r in raw_results) if p),
                              max_suggestions - len(results)))

    return results

示例#24

0

显示文件

文件： test_search.py 项目： tantek/kuma

 def test_clean_excerpt(self):
     """SearchClient.excerpt() should not allow disallowed HTML through."""
     wc = WikiClient()  # Index strips HTML
     input = 'test <div>the start of something</div>'
     output_strip = '<b>test</b>  the start of something'
     eq_(output_strip, wc.excerpt(input, 'test'))