Пример #1
0
def _do_search(self, request, model):

    self.method_check(request, allowed=['get'])
    self.is_authenticated(request)
    self.throttle_check(request)

    # Do the query.
    query = request.GET.get('q', '')
    sqs = SearchQuerySet().models(model).load_all().auto_query(query)
    paginator = Paginator(sqs, 20)

    try:
        page = paginator.page(int(request.GET.get('page', 1)))
    except InvalidPage:
        raise Http404("Sorry, no results on that page.")

    objects = []

    for result in page.object_list:
        if result:
            highlighter = Highlighter(query)
            text = highlighter.highlight(result.text)
            bundle = self.build_bundle(obj=result.object, request=request)
            bundle = self.full_dehydrate(bundle)
            bundle.data['text'] = text
            objects.append(bundle)

    object_list = {
        'objects': objects,
    }

    self.log_throttled_access(request)
    return self.create_response(request, object_list)
Пример #2
0
 def get_results(self):
     """
     Override get_results to add the value of the field where query was found
     Also takes care of highlighting the query.
     """
     results = super(FindView, self).get_results()
     query = self.query.lower()
     highlight = Highlighter(query)
     for r in results:
         for field in r.get_stored_fields():
             value = getattr(r, field)
             # assume search index field 'text' is document field
             if isinstance(value, string_types) and\
                     query in value.lower() and\
                     field != 'text':
                 # assume search index field name == model field name
                 try:
                     name = r.object._meta.get_field(field).verbose_name
                 except:
                     name = field
                 r.context = {
                     'field': name,
                     'value': highlight.highlight(value)
                 }
                 continue
     return results
Пример #3
0
def homeroom(request):
    user = request.user
    context = RequestContext(request)
    if request.method == 'POST':
        query = request.POST['course-search']
        results = SearchQuerySet().autocomplete(text=query).models(Course)[:10]
        highlighter = Highlighter(query, html_tag='span', css_class='keyword')
        courses = []
        for result in results:
            course = {}
            course['object'] = result.object
            course['highlight'] = highlighter.highlight(result.text)
            courses.append(course)
        # courses = Course.objects.filter(institute=user.get_profile().institute, title__icontains=query)
        suggestion = None
        # suggestion = SearchQuerySet().spelling_suggestion(query)
        context['courses'] = courses
        context['suggestion'] = suggestion
    sections = [
        assign.section for assign in SectionAssign.objects.filter(
            user=user).order_by('-section__start_date')
    ]
    form = AddCourseForm(request=request)
    context['sections'] = sections
    context['form'] = form
    return render_to_response('homeroom/index.html', context)
Пример #4
0
def _do_search(self, request, model):
    self.method_check(request, allowed=['get'])
    self.is_authenticated(request)
    self.throttle_check(request)

    # Do the query.
    query = request.GET.get('q', '')
    sqs = SearchQuerySet().models(model).auto_query(query)
    paginator = Paginator(sqs, 20)

    try:
        page = paginator.page(int(request.GET.get('page', 1)))
    except InvalidPage:
        raise Http404("Sorry, no results on that page.")

    objects = []

    for result in page.object_list:
        highlighter = Highlighter(query)
        text = highlighter.highlight(result.text)
        bundle = self.full_dehydrate(result.object)
        bundle.data['text'] = text
        objects.append(bundle)

    object_list = {
        'objects': objects,
    }

    self.log_throttled_access(request)
    return self.create_response(request, object_list)
Пример #5
0
 def get_results(self):
     """
     Override get_results to add the value of the field where query was found
     Also takes care of highlighting the query.
     """
     results = super(FindView, self).get_results()
     query = self.query.lower()
     highlight = Highlighter(query)
     for r in results:
         for field in r.get_stored_fields():
             value = getattr(r, field)
             # assume search index field 'text' is document field
             if isinstance(value, string_types) and\
                     query in value.lower() and\
                     field != 'text':
                 # assume search index field name == model field name
                 try:
                     name = r.object._meta.get_field(field).verbose_name
                 except:
                     name = field
                 r.context = {
                     'field': name,
                     'value': highlight.highlight(value)
                 }
                 continue
     return results
Пример #6
0
    def _search(self, request, model, facets=None, page_size=20,
                highlight=True):
        """
        `facets`

            A list of facets to include with the results
        `models`
            Limit the search to one or more models
        """
        form = FacetedSearchForm(request.GET, facets=facets or [],
                                 models=(model,), load_all=True)
        if not form.is_valid():
            return self.error_response({'errors': form.errors}, request)
        results = form.search()

        paginator = Paginator(results, page_size)
        try:
            page = paginator.page(int(request.GET.get('page', 1)))
        except InvalidPage:
            raise Http404(ugettext("Sorry, no results on that page."))

        objects = []
        query = request.GET.get('q', '')
        highlighter = Highlighter(query)
        for result in page.object_list:
            if not result:
                continue
            text = result.text
            if highlight:
                text = highlighter.highlight(text)
            bundle = self.build_bundle(obj=result.object, request=request)
            bundle = self.full_dehydrate(bundle)
            bundle.data['text'] = text
            objects.append(bundle)

        url_template = self._url_template(query,
                                          form['selected_facets'].value())
        page_data = {
            'number': page.number,
            'per_page': paginator.per_page,
            'num_pages': paginator.num_pages,
            'page_range': paginator.page_range,
            'object_count': paginator.count,
            'url_template': url_template,
        }
        if page.has_next():
            page_data['url_next'] = url_template.format(
                page.next_page_number())
        if page.has_previous():
            page_data['url_prev'] = url_template.format(
                page.previous_page_number())

        object_list = {
            'page': page_data,
            'objects': objects,
        }
        if facets:
            object_list.update({'facets': results.facet_counts()})
        return object_list
Пример #7
0
def execute_highlighter(query, text_key, results):
    highlight = Highlighter(query)
    for result in results:
        highlight.text_block = result.get_additional_fields().get(text_key, "")
        highlight_locations = highlight.find_highlightable_words()
        result.highlight_locations = []
        for q, locations in highlight_locations.iteritems():
            result.highlight_locations.extend([[location, location + len(q)] for location in locations])
Пример #8
0
 def no_query_found(self):
     all_results = SearchQuerySet(self).all()
     #sqs = SearchQuerySet().filter(content='foo').highlight()
     sqs = SearchQuerySet().filter(content=all_results).highlight()
     highlighter = Highlighter(search_query)
     result = sqs[0]
     result.highlighted['text'][0]
     print highlighter.highlight(sqs[0].text)
Пример #9
0
 def no_query_found(self):
     all_results = SearchQuerySet(self).all()
     #sqs = SearchQuerySet().filter(content='foo').highlight()
     sqs = SearchQuerySet().filter(content=all_results).highlight()
     highlighter = Highlighter(search_query)
     result = sqs[0]
     result.highlighted['text'][0]
     print highlighter.highlight(sqs[0].text)
Пример #10
0
	def build_results_for_page(games, query):
		highlighter = Highlighter(query)

		return [{'url': game.url,
		         'title': highlighter.highlight(game.title),
		         'intro': highlighter.highlight(game.intro),
		         'city': highlighter.highlight(game.location.city),
		         'state': highlighter.highlight(game.location.state)} for game in games]
Пример #11
0
    def _search(self, request, model, facets=None, page_size=20,
                highlight=True):
        '''
        `facets`
            A list of facets to include with the results
        `models`
            Limit the search to one or more models
        '''
        form = FacetedSearchForm(request.GET, facets=facets or [],
                                 models=(model,), load_all=True)
        if not form.is_valid():
            return self.error_response({'errors': form.errors}, request)
        results = form.search()

        paginator = Paginator(results, page_size)
        try:
            page = paginator.page(int(request.GET.get('page', 1)))
        except InvalidPage:
            raise Http404(ugettext("Sorry, no results on that page."))

        objects = []
        query = request.GET.get('q', '')
        highlighter = Highlighter(query)
        for result in page.object_list:
            if not result:
                continue
            text = result.text
            if highlight:
                text = highlighter.highlight(text)
            bundle = self.build_bundle(obj=result.object, request=request)
            bundle = self.full_dehydrate(bundle)
            bundle.data['text'] = text
            objects.append(bundle)

        url_template = self._url_template(query,
                                          form['selected_facets'].value())
        page_data = {
            'number': page.number,
            'per_page': paginator.per_page,
            'num_pages': paginator.num_pages,
            'page_range': paginator.page_range,
            'object_count': paginator.count,
            'url_template': url_template,
        }
        if page.has_next():
            page_data['url_next'] = url_template.format(
                page.next_page_number())
        if page.has_previous():
            page_data['url_prev'] = url_template.format(
                page.previous_page_number())

        object_list = {
            'page': page_data,
            'objects': objects,
        }
        if facets:
            object_list.update({'facets': results.facet_counts()})
        return object_list
Пример #12
0
def autocomplete(request):
    query = request.GET.get('q', '')
    sqs = SearchQuerySet().models(Search).autocomplete(content_auto=query)[:5]
    highlight = Highlighter(query, css_class='no-highlight')
    suggestions = [highlight.highlight(result.search) for result in sqs]
    the_data = json.dumps({
        'results': suggestions
    })
    return HttpResponse(the_data, content_type='application/json')
Пример #13
0
def autocomplete(request):
    sqs = SearchQuerySet().autocomplete(
        model_auto=request.GET.get('q', ''))[:5]
    highlight = Highlighter(request.GET.get('q', ''), max_length=35)
    suggestions = [highlight.highlight(result.text) for result in sqs]
    print len(sqs)
    # Make sure you return a JSON object, not a bare list.
    # Otherwise, you could be vulnerable to an XSS attack.
    the_data = json.dumps({'results': suggestions})
    return HttpResponse(the_data, content_type='application/json')
Пример #14
0
def search_view(request):
    keyword = request.GET['q']
    results = SearchQuerySet().filter(content=keyword)
    highlighter = Highlighter(keyword)
    results_dict = {
        'success': True,
        'by': 'search',
        'list': [{
                'title': r.object.title,
                'content': highlighter.highlight(r.object.content),
                'uri': r.object.abs_uri
                } for r in results]
    }
    return HttpResponse(json.dumps(results_dict), content_type="application/json")
Пример #15
0
def index(request):
	#搜索词
	words = request.GET['key'] 
	results = SearchQuerySet().filter(name__contains=(words))
	#Highlighter(my_query,html_tag='',css_class='',max_length=100)
	highlight = Highlighter(words,max_length=100)
	#(content=(words))##.facet('name',limit=10)
	#输出总条数
	counts = results.count()
	for r in results:
		#设置高亮
		r.name = highlight.highlight(r.name)
	#unicode -> string  unicodestring.endcode('utf-8') 
	#string -> unicode unicode(utf8string,'utf-8)
	return render(request,'search/search.html',{'data':results,'counts':counts})
Пример #16
0
 def __init__(self, scholarship_key=None, search_result=None, to_highlight=''):
     scholarship_model = search_result.object
     self.scholarship_key = scholarship_key
     scholarship_model = scholarship_model
     highlight = Highlighter(to_highlight, max_length=300)
     self.snippet = highlight.highlight(scholarship_model.description)
     if scholarship_model is not None:
         self.deadline = scholarship_model.deadline
     self.source = scholarship_model.organization
     self.href = scholarship_model.third_party_url
     self.title = scholarship_model.title
     self.essay_required = scholarship_model.essay_required
     self.gender_restriction = scholarship_model.gender_restriction
     safe_title = scholarship_model.title[:100].encode('ascii', 'ignore')
     self.vs_href = u'/scholarship/{}?title={}'.format(self.scholarship_key, safe_title)
Пример #17
0
    def test_find_highlightable_words(self):
        highlighter = Highlighter("this test")
        highlighter.text_block = self.document_1
        self.assertEqual(highlighter.find_highlightable_words(), {"this": [0, 53, 79], "test": [10, 68]})

        # We don't stem for now.
        highlighter = Highlighter("highlight tests")
        highlighter.text_block = self.document_1
        self.assertEqual(highlighter.find_highlightable_words(), {"highlight": [22], "tests": []})

        # Ignore negated bits.
        highlighter = Highlighter("highlight -test")
        highlighter.text_block = self.document_1
        self.assertEqual(highlighter.find_highlightable_words(), {"highlight": [22]})
Пример #18
0
    def test_find_highlightable_words(self):
        highlighter = Highlighter('this test')
        highlighter.text_block = self.document_1
        self.assertEqual(highlighter.find_highlightable_words(), {'this': [0, 53, 79], 'test': [10, 68]})

        # We don't stem for now.
        highlighter = Highlighter('highlight tests')
        highlighter.text_block = self.document_1
        self.assertEqual(highlighter.find_highlightable_words(), {'highlight': [22], 'tests': []})

        # Ignore negated bits.
        highlighter = Highlighter('highlight -test')
        highlighter.text_block = self.document_1
        self.assertEqual(highlighter.find_highlightable_words(), {'highlight': [22]})
Пример #19
0
def index(request):
    #搜索词
    words = request.GET['key']
    results = SearchQuerySet().filter(name__contains=(words))
    #Highlighter(my_query,html_tag='',css_class='',max_length=100)
    highlight = Highlighter(words, max_length=100)
    #(content=(words))##.facet('name',limit=10)
    #输出总条数
    counts = results.count()
    for r in results:
        #设置高亮
        r.name = highlight.highlight(r.name)
    #unicode -> string  unicodestring.endcode('utf-8')
    #string -> unicode unicode(utf8string,'utf-8)
    return render(request, 'search/search.html', {
        'data': results,
        'counts': counts
    })
Пример #20
0
def autocomplete(request):
	query = escape(request.GET.get('q', '')).strip()
	if len(query) < 2:
		suggestions = []
	else:
		lighter = Highlighter(query, max_length=64)
		sqs = SearchQuerySet().autocomplete(auto=query)[:7]
		suggestions = []
		for result in sqs:
			match = ' '.join(striptags(lighter.highlight(result.auto)).strip('.').split())
			url = None
			if hasattr(result.object, 'get_absolute_url'):
				url = result.object.get_absolute_url()
			suggestions.append({
				'name': match,
				#'title': result.title,
				'url': url,
			})
	return JSONResponse(request, suggestions)
Пример #21
0
def homeroom(request):
    user = request.user
    context = RequestContext(request)
    if request.method == 'POST':
        query = request.POST['course-search']
        results = SearchQuerySet().autocomplete(text=query).models(Course)[:10]
        highlighter = Highlighter(query, html_tag='span', css_class='keyword')
        courses = []
        for result in results:
            course = {}
            course['object'] = result.object
            course['highlight'] = highlighter.highlight(result.text)
            courses.append(course)
        # courses = Course.objects.filter(institute=user.get_profile().institute, title__icontains=query)
        suggestion = None
        # suggestion = SearchQuerySet().spelling_suggestion(query)
        context['courses'] = courses
        context['suggestion'] = suggestion
    sections = [assign.section for assign in SectionAssign.objects.filter(user=user).order_by('-section__start_date')]
    form = AddCourseForm(request=request)
    context['sections'] = sections
    context['form'] = form
    return render_to_response('homeroom/index.html', context)
Пример #22
0
def get_response(project_uri, query_string, include_n3=True):
    d = {
        'results': list(),
    }

    project_graph = projects.get_project_graph(project_uri)
    graph = Graph()

    query_set = SearchQuerySet().models(Text).filter(
        content=AutoQuery(query_string), project__exact=project_uri)

    highlighter = Highlighter(query_string,
                              html_tag='span',
                              css_class=CSS_RESULT_MATCH_CLASS)
    title_highlighter = TitleHighlighter(query_string,
                                         html_tag='span',
                                         css_class=CSS_RESULT_MATCH_CLASS)

    d['spelling_suggestion'] = query_set.spelling_suggestion()

    for result in query_set:
        text_uri = URIRef(result.get_stored_fields()['identifier'])

        if annotations.has_annotation_link(
                project_graph,
                text_uri) or projects.is_top_level_project_resource(
                    project_uri, text_uri):
            d['results'].append(
                search_result_to_dict(result, project_uri, highlighter,
                                      title_highlighter))

            if include_n3:
                graph += utils.metadata_triples(project_graph, text_uri)

    if include_n3:
        d['n3'] = graph.serialize(format='n3')

    return d
Пример #23
0
def highlight(text_block, query, **kwargs):
	highlighter = Highlighter(query, **kwargs)
	highlighted_text = highlighter.highlight(text_block)
	return mark_safe(highlighted_text)
Пример #24
0
    def test_find_window(self):
        # The query doesn't matter for this method, so ignore it.
        highlighter = Highlighter('')
        highlighter.text_block = self.document_1

        # No query.
        self.assertEqual(highlighter.find_window({}), (0, 200))

        # Nothing found.
        self.assertEqual(
            highlighter.find_window({
                'highlight': [],
                'tests': []
            }), (0, 200))

        # Simple cases.
        self.assertEqual(
            highlighter.find_window({
                'highlight': [0],
                'tests': [100]
            }), (0, 200))
        self.assertEqual(
            highlighter.find_window({
                'highlight': [99],
                'tests': [199]
            }), (99, 299))
        self.assertEqual(
            highlighter.find_window({
                'highlight': [0],
                'tests': [201]
            }), (0, 200))
        self.assertEqual(
            highlighter.find_window({
                'highlight': [203],
                'tests': [120]
            }), (120, 320))
        self.assertEqual(
            highlighter.find_window({
                'highlight': [],
                'tests': [100]
            }), (100, 300))
        self.assertEqual(
            highlighter.find_window({
                'highlight': [0],
                'tests': [80],
                'moof': [120]
            }), (0, 200))

        # Simple cases, with an outlier far outside the window.
        self.assertEqual(
            highlighter.find_window({
                'highlight': [0],
                'tests': [100, 450]
            }), (0, 200))
        self.assertEqual(
            highlighter.find_window({
                'highlight': [100],
                'tests': [220, 450]
            }), (100, 300))
        self.assertEqual(
            highlighter.find_window({
                'highlight': [100],
                'tests': [350, 450]
            }), (350, 550))
        self.assertEqual(
            highlighter.find_window({
                'highlight': [100],
                'tests': [220],
                'moof': [450]
            }), (100, 300))

        # Density checks.
        self.assertEqual(
            highlighter.find_window({
                'highlight': [0],
                'tests': [100, 180, 450]
            }), (0, 200))
        self.assertEqual(
            highlighter.find_window({
                'highlight': [0, 40],
                'tests': [100, 200, 220, 450]
            }), (40, 240))
        self.assertEqual(
            highlighter.find_window({
                'highlight': [0, 40],
                'tests': [100, 200, 220],
                'moof': [450]
            }), (40, 240))
        self.assertEqual(
            highlighter.find_window({
                'highlight': [0, 40],
                'tests': [100, 200, 220],
                'moof': [294, 299, 450]
            }), (100, 300))
Пример #25
0
    def test_find_highlightable_words(self):
        highlighter = Highlighter('this test')
        highlighter.text_block = self.document_1
        self.assertEqual(highlighter.find_highlightable_words(), {
            'this': [0, 53, 79],
            'test': [10, 68]
        })

        # We don't stem for now.
        highlighter = Highlighter('highlight tests')
        highlighter.text_block = self.document_1
        self.assertEqual(highlighter.find_highlightable_words(), {
            'highlight': [22],
            'tests': []
        })

        # Ignore negated bits.
        highlighter = Highlighter('highlight -test')
        highlighter.text_block = self.document_1
        self.assertEqual(highlighter.find_highlightable_words(),
                         {'highlight': [22]})
Пример #26
0
    def test_highlight(self):
        highlighter = Highlighter('this test')
        self.assertEqual(
            highlighter.highlight(self.document_1),
            u'<span class="highlighted">This</span> is a <span class="highlighted">test</span> of the highlightable words detection. <span class="highlighted">This</span> is only a <span class="highlighted">test</span>. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air.'
        )
        self.assertEqual(
            highlighter.highlight(self.document_2),
            u'The content of words in no particular order causes nothing to occur.'
        )
        self.assertEqual(
            highlighter.highlight(self.document_3),
            u'<span class="highlighted">This</span> is a <span class="highlighted">test</span> of the highlightable words detection. <span class="highlighted">This</span> is only a <span class="highlighted">test</span>. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air. The content of words in no particular order causes no...'
        )

        highlighter = Highlighter('this test', html_tag='div', css_class=None)
        self.assertEqual(
            highlighter.highlight(self.document_1),
            u'<div>This</div> is a <div>test</div> of the highlightable words detection. <div>This</div> is only a <div>test</div>. Were <div>this</div> an actual emergency, your text would have exploded in mid-air.'
        )
        self.assertEqual(
            highlighter.highlight(self.document_2),
            u'The content of words in no particular order causes nothing to occur.'
        )
        self.assertEqual(
            highlighter.highlight(self.document_3),
            u'<div>This</div> is a <div>test</div> of the highlightable words detection. <div>This</div> is only a <div>test</div>. Were <div>this</div> an actual emergency, your text would have exploded in mid-air. The content of words in no particular order causes no...'
        )

        highlighter = Highlighter('content detection')
        self.assertEqual(
            highlighter.highlight(self.document_1),
            u'...<span class="highlighted">detection</span>. This is only a test. Were this an actual emergency, your text would have exploded in mid-air.'
        )
        self.assertEqual(
            highlighter.highlight(self.document_2),
            u'...<span class="highlighted">content</span> of words in no particular order causes nothing to occur.'
        )
        self.assertEqual(
            highlighter.highlight(self.document_3),
            u'...<span class="highlighted">detection</span>. This is only a test. Were this an actual emergency, your text would have exploded in mid-air. The <span class="highlighted">content</span> of words in no particular order causes nothing to occur.'
        )

        highlighter = Highlighter('content detection', max_length=100)
        self.assertEqual(
            highlighter.highlight(self.document_1),
            u'...<span class="highlighted">detection</span>. This is only a test. Were this an actual emergency, your text would have exploded in mid-...'
        )
        self.assertEqual(
            highlighter.highlight(self.document_2),
            u'...<span class="highlighted">content</span> of words in no particular order causes nothing to occur.'
        )
        self.assertEqual(
            highlighter.highlight(self.document_3),
            u'This is a test of the highlightable words <span class="highlighted">detection</span>. This is only a test. Were this an actual emerge...'
        )
Пример #27
0
    def test_render_html(self):
        highlighter = Highlighter('this test')
        highlighter.text_block = self.document_1
        self.assertEqual(
            highlighter.render_html({
                'this': [0, 53, 79],
                'test': [10, 68]
            }, 0, 200),
            '<span class="highlighted">This</span> is a <span class="highlighted">test</span> of the highlightable words detection. <span class="highlighted">This</span> is only a <span class="highlighted">test</span>. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air.'
        )

        highlighter.text_block = self.document_2
        self.assertEqual(
            highlighter.render_html({
                'this': [0, 53, 79],
                'test': [10, 68]
            }, 0, 200),
            'The content of words in no particular order causes nothing to occur.'
        )

        highlighter.text_block = self.document_3
        self.assertEqual(
            highlighter.render_html({
                'this': [0, 53, 79],
                'test': [10, 68]
            }, 0, 200),
            '<span class="highlighted">This</span> is a <span class="highlighted">test</span> of the highlightable words detection. <span class="highlighted">This</span> is only a <span class="highlighted">test</span>. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air. The content of words in no particular order causes no...'
        )

        highlighter = Highlighter('content detection')
        highlighter.text_block = self.document_3
        self.assertEqual(
            highlighter.render_html({
                'content': [151],
                'detection': [42]
            }, 42, 242),
            '...<span class="highlighted">detection</span>. This is only a test. Were this an actual emergency, your text would have exploded in mid-air. The <span class="highlighted">content</span> of words in no particular order causes nothing to occur.'
        )

        self.assertEqual(
            highlighter.render_html({
                'content': [151],
                'detection': [42]
            }, 42, 200),
            '...<span class="highlighted">detection</span>. This is only a test. Were this an actual emergency, your text would have exploded in mid-air. The <span class="highlighted">content</span> of words in no particular order causes no...'
        )

        # One term found within another term.
        highlighter = Highlighter('this is')
        highlighter.text_block = self.document_1
        self.assertEqual(
            highlighter.render_html(
                {
                    'this': [0, 53, 79],
                    'is': [2, 5, 55, 58, 81]
                }, 0, 200),
            '<span class="highlighted">This</span> <span class="highlighted">is</span> a test of the highlightable words detection. <span class="highlighted">This</span> <span class="highlighted">is</span> only a test. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air.'
        )

        # Regression for repetition in the regular expression.
        highlighter = Highlighter('i++')
        highlighter.text_block = 'Foo is i++ in most cases.'
        self.assertEqual(
            highlighter.render_html({'i++': [7]}, 0, 200),
            'Foo is <span class="highlighted">i++</span> in most cases.')
        highlighter = Highlighter('i**')
        highlighter.text_block = 'Foo is i** in most cases.'
        self.assertEqual(
            highlighter.render_html({'i**': [7]}, 0, 200),
            'Foo is <span class="highlighted">i**</span> in most cases.')
        highlighter = Highlighter('i..')
        highlighter.text_block = 'Foo is i.. in most cases.'
        self.assertEqual(
            highlighter.render_html({'i..': [7]}, 0, 200),
            'Foo is <span class="highlighted">i..</span> in most cases.')
        highlighter = Highlighter('i??')
        highlighter.text_block = 'Foo is i?? in most cases.'
        self.assertEqual(
            highlighter.render_html({'i??': [7]}, 0, 200),
            'Foo is <span class="highlighted">i??</span> in most cases.')

        # Regression for highlighting already highlighted HTML terms.
        highlighter = Highlighter('span')
        highlighter.text_block = 'A span in spam makes html in a can.'
        self.assertEqual(
            highlighter.render_html({'span': [2]}, 0, 200),
            'A <span class="highlighted">span</span> in spam makes html in a can.'
        )

        highlighter = Highlighter('highlight')
        highlighter.text_block = 'A span in spam makes highlighted html in a can.'
        self.assertEqual(
            highlighter.render_html({'highlight': [21]}, 0, 200),
            'A span in spam makes <span class="highlighted">highlight</span>ed html in a can.'
        )
Пример #28
0
    def test_render_html(self):
        highlighter = Highlighter('this test')
        highlighter.text_block = self.document_1
        self.assertEqual(highlighter.render_html({'this': [0, 53, 79], 'test': [10, 68]}, 0, 200), '<span class="highlighted">This</span> is a <span class="highlighted">test</span> of the highlightable words detection. <span class="highlighted">This</span> is only a <span class="highlighted">test</span>. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air.')
        
        highlighter.text_block = self.document_2
        self.assertEqual(highlighter.render_html({'this': [0, 53, 79], 'test': [10, 68]}, 0, 200), 'The content of words in no particular order causes nothing to occur.')
        
        highlighter.text_block = self.document_3
        self.assertEqual(highlighter.render_html({'this': [0, 53, 79], 'test': [10, 68]}, 0, 200), '<span class="highlighted">This</span> is a <span class="highlighted">test</span> of the highlightable words detection. <span class="highlighted">This</span> is only a <span class="highlighted">test</span>. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air. The content of words in no particular order causes no...')
        
        highlighter = Highlighter('content detection')
        highlighter.text_block = self.document_3
        self.assertEqual(highlighter.render_html({'content': [151], 'detection': [42]}, 42, 242), '...<span class="highlighted">detection</span>. This is only a test. Were this an actual emergency, your text would have exploded in mid-air. The <span class="highlighted">content</span> of words in no particular order causes nothing to occur.')
        
        self.assertEqual(highlighter.render_html({'content': [151], 'detection': [42]}, 42, 200), '...<span class="highlighted">detection</span>. This is only a test. Were this an actual emergency, your text would have exploded in mid-air. The <span class="highlighted">content</span> of words in no particular order causes no...')
        
        # One term found within another term.
        highlighter = Highlighter('this is')
        highlighter.text_block = self.document_1
        self.assertEqual(highlighter.render_html({'this': [0, 53, 79], 'is': [2, 5, 55, 58, 81]}, 0, 200), '<span class="highlighted">This</span> <span class="highlighted">is</span> a test of the highlightable words detection. <span class="highlighted">This</span> <span class="highlighted">is</span> only a test. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air.')

        # Regression for repetition in the regular expression.
        highlighter = Highlighter('i++')
        highlighter.text_block = 'Foo is i++ in most cases.'
        self.assertEqual(highlighter.render_html({'i++': [7]}, 0, 200), 'Foo is <span class="highlighted">i++</span> in most cases.')
        highlighter = Highlighter('i**')
        highlighter.text_block = 'Foo is i** in most cases.'
        self.assertEqual(highlighter.render_html({'i**': [7]}, 0, 200), 'Foo is <span class="highlighted">i**</span> in most cases.')
        highlighter = Highlighter('i..')
        highlighter.text_block = 'Foo is i.. in most cases.'
        self.assertEqual(highlighter.render_html({'i..': [7]}, 0, 200), 'Foo is <span class="highlighted">i..</span> in most cases.')
        highlighter = Highlighter('i??')
        highlighter.text_block = 'Foo is i?? in most cases.'
        self.assertEqual(highlighter.render_html({'i??': [7]}, 0, 200), 'Foo is <span class="highlighted">i??</span> in most cases.')
        
        # Regression for highlighting already highlighted HTML terms.
        highlighter = Highlighter('span')
        highlighter.text_block = 'A span in spam makes html in a can.'
        self.assertEqual(highlighter.render_html({'span': [2]}, 0, 200), 'A <span class="highlighted">span</span> in spam makes html in a can.')
        
        highlighter = Highlighter('highlight')
        highlighter.text_block = 'A span in spam makes highlighted html in a can.'
        self.assertEqual(highlighter.render_html({'highlight': [21]}, 0, 200), 'A span in spam makes <span class="highlighted">highlight</span>ed html in a can.')
Пример #29
0
def highlight_result(text, query, length=500):
    hl = Highlighter(query, html_tag='strong', max_length=length)
    hl = hl.highlight(text)

    return hl
Пример #30
0
def return_search_results_ajax(request):
    """
    Process queries issued from the haystack search form and
    validate the form. If the form is valid, highlight the queried terms and
    return the top hundred search results as highlighted snippets.

    NOTE: Due to performance issues, only the top one hundred search results
    will be returned at maximum, no matter how many search results have been
    found. This shortcoming can be improved by providing more efficient
    database queries and adding a sophisticated caching functionality.
    """
    haystack_search_form = HaystackSearchForm(request.GET)
    response = {}

    if haystack_search_form.is_valid():
        search_query = haystack_search_form.cleaned_data['search_query']
        search_source = haystack_search_form.cleaned_data['search_source']
        max_results = haystack_search_form.cleaned_data['max_results']

        search_source_to_model_name = {
            'venyoo_events': 'event',
            'crawled_webpages': 'crawledwebpage'}

        highlighter = Highlighter(
            search_query,
            html_tag='strong',
            css_class='highlighted',
            max_length=250)

        search_results = SearchQuerySet().filter(content=AutoQuery(search_query))
        end = int(math.ceil(search_results.count() / 1000))

        results = []
        webpage_urls = []
        highlighted_snippets = []

        a, b = 0, 1000

        for i in xrange(end):
            if search_source in ('venyoo_events', 'crawled_webpages'):
                results = results + \
                    [result for result in search_results[a:b]
                     if isinstance(result, SearchResult)
                     and result.model_name ==
                     search_source_to_model_name[search_source]]
            else:
                results = results +\
                    [result for result in search_results[a:b]
                     if isinstance(result, SearchResult)]

            webpage_urls = webpage_urls +\
                           [result.get_stored_fields()['url'] for result
                            in results[a:b]]

            highlighted_snippets = highlighted_snippets +\
                                   [highlighter.highlight(result.text) for
                                    result in results[a:b]]
            a += 1000
            b += 1000

        results_total = len(results)

        response['results_total'] = results_total
        response['results_shown'] = max_results if max_results <= results_total else results_total
        response['webpage_urls'] = webpage_urls[:max_results]
        response['highlighted_snippets'] = highlighted_snippets[:max_results]

    return HttpResponse(json.dumps(response), mimetype='application/json')
Пример #31
0
 def test_highlight(self):
     highlighter = Highlighter('this test')
     self.assertEqual(highlighter.highlight(self.document_1), u'<span class="highlighted">This</span> is a <span class="highlighted">test</span> of the highlightable words detection. <span class="highlighted">This</span> is only a <span class="highlighted">test</span>. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air.')
     self.assertEqual(highlighter.highlight(self.document_2), u'The content of words in no particular order causes nothing to occur.')
     self.assertEqual(highlighter.highlight(self.document_3), u'<span class="highlighted">This</span> is a <span class="highlighted">test</span> of the highlightable words detection. <span class="highlighted">This</span> is only a <span class="highlighted">test</span>. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air. The content of words in no particular order causes no...')
     
     highlighter = Highlighter('this test', html_tag='div', css_class=None)
     self.assertEqual(highlighter.highlight(self.document_1), u'<div>This</div> is a <div>test</div> of the highlightable words detection. <div>This</div> is only a <div>test</div>. Were <div>this</div> an actual emergency, your text would have exploded in mid-air.')
     self.assertEqual(highlighter.highlight(self.document_2), u'The content of words in no particular order causes nothing to occur.')
     self.assertEqual(highlighter.highlight(self.document_3), u'<div>This</div> is a <div>test</div> of the highlightable words detection. <div>This</div> is only a <div>test</div>. Were <div>this</div> an actual emergency, your text would have exploded in mid-air. The content of words in no particular order causes no...')
     
     highlighter = Highlighter('content detection')
     self.assertEqual(highlighter.highlight(self.document_1), u'...<span class="highlighted">detection</span>. This is only a test. Were this an actual emergency, your text would have exploded in mid-air.')
     self.assertEqual(highlighter.highlight(self.document_2), u'...<span class="highlighted">content</span> of words in no particular order causes nothing to occur.')
     self.assertEqual(highlighter.highlight(self.document_3), u'...<span class="highlighted">detection</span>. This is only a test. Were this an actual emergency, your text would have exploded in mid-air. The <span class="highlighted">content</span> of words in no particular order causes nothing to occur.')
     
     highlighter = Highlighter('content detection', max_length=100)
     self.assertEqual(highlighter.highlight(self.document_1), u'...<span class="highlighted">detection</span>. This is only a test. Were this an actual emergency, your text would have exploded in mid-...')
     self.assertEqual(highlighter.highlight(self.document_2), u'...<span class="highlighted">content</span> of words in no particular order causes nothing to occur.')
     self.assertEqual(highlighter.highlight(self.document_3), u'This is a test of the highlightable words <span class="highlighted">detection</span>. This is only a test. Were this an actual emerge...')
Пример #32
0
def highlight_result(text, query, length=500):
    hl = Highlighter(query, html_tag='strong', max_length=length)
    hl = hl.highlight(text)
    return hl
Пример #33
0
def slow_highlight(query, text):
    "Invoked only if the search backend does not support highlighting"
    highlight = Highlighter(query)
    value = highlight.highlight(text)
    return value
Пример #34
0
def highlighted_persref(text, query, **kwargs):
    highlight = Highlighter(
        query, html_tag='strong', css_class='found', max_length=120)
    phText = highlight.highlight(text)
    parsedText = add_persref_links(phText)
    return format_html(parsedText)
Пример #35
0
    def test_render_html(self):
        highlighter = Highlighter("this test")
        highlighter.text_block = self.document_1
        self.assertEqual(
            highlighter.render_html({"this": [0, 53, 79], "test": [10, 68]}, 0, 200),
            '<span class="highlighted">This</span> is a <span class="highlighted">test</span> of the highlightable words detection. <span class="highlighted">This</span> is only a <span class="highlighted">test</span>. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air.',
        )

        highlighter.text_block = self.document_2
        self.assertEqual(
            highlighter.render_html({"this": [0, 53, 79], "test": [10, 68]}, 0, 200),
            "The content of words in no particular order causes nothing to occur.",
        )

        highlighter.text_block = self.document_3
        self.assertEqual(
            highlighter.render_html({"this": [0, 53, 79], "test": [10, 68]}, 0, 200),
            '<span class="highlighted">This</span> is a <span class="highlighted">test</span> of the highlightable words detection. <span class="highlighted">This</span> is only a <span class="highlighted">test</span>. Were <span class="highlighted">this</span> an actual emergency, your text would have exploded in mid-air. The content of words in no particular order causes no...',
        )

        highlighter = Highlighter("content detection")
        highlighter.text_block = self.document_3
        self.assertEqual(
            highlighter.render_html({"content": [151], "detection": [42]}, 42, 242),
            '...<span class="highlighted">detection</span>. This is only a test. Were this an actual emergency, your text would have exploded in mid-air. The <span class="highlighted">content</span> of words in no particular order causes nothing to occur.',
        )

        self.assertEqual(
            highlighter.render_html({"content": [151], "detection": [42]}, 42, 200),
            '...<span class="highlighted">detection</span>. This is only a test. Were this an actual emergency, your text would have exploded in mid-air. The <span class="highlighted">content</span> of words in no particular order causes no...',
        )
Пример #36
0
    def test_find_window(self):
        # The query doesn't matter for this method, so ignore it.
        highlighter = Highlighter('')
        highlighter.text_block = self.document_1

        # No query.
        self.assertEqual(highlighter.find_window({}), (0, 200))

        # Nothing found.
        self.assertEqual(highlighter.find_window({'highlight': [], 'tests': []}), (0, 200))

        # Simple cases.
        self.assertEqual(highlighter.find_window({'highlight': [0], 'tests': [100]}), (0, 200))
        self.assertEqual(highlighter.find_window({'highlight': [99], 'tests': [199]}), (99, 299))
        self.assertEqual(highlighter.find_window({'highlight': [0], 'tests': [201]}), (0, 200))
        self.assertEqual(highlighter.find_window({'highlight': [203], 'tests': [120]}), (120, 320))
        self.assertEqual(highlighter.find_window({'highlight': [], 'tests': [100]}), (100, 300))
        self.assertEqual(highlighter.find_window({'highlight': [0], 'tests': [80], 'moof': [120]}), (0, 200))
        
        # Simple cases, with an outlier far outside the window.
        self.assertEqual(highlighter.find_window({'highlight': [0], 'tests': [100, 450]}), (0, 200))
        self.assertEqual(highlighter.find_window({'highlight': [100], 'tests': [220, 450]}), (100, 300))
        self.assertEqual(highlighter.find_window({'highlight': [100], 'tests': [350, 450]}), (350, 550))
        self.assertEqual(highlighter.find_window({'highlight': [100], 'tests': [220], 'moof': [450]}), (100, 300))
        
        # Density checks.
        self.assertEqual(highlighter.find_window({'highlight': [0], 'tests': [100, 180, 450]}), (0, 200))
        self.assertEqual(highlighter.find_window({'highlight': [0, 40], 'tests': [100, 200, 220, 450]}), (40, 240))
        self.assertEqual(highlighter.find_window({'highlight': [0, 40], 'tests': [100, 200, 220], 'moof': [450]}), (40, 240))
        self.assertEqual(highlighter.find_window({'highlight': [0, 40], 'tests': [100, 200, 220], 'moof': [294, 299, 450]}), (100, 300))
Пример #37
0
def slow_highlight(query, text):
    "Invoked only if the search backend does not support highlighting"
    highlight = Highlighter(query)
    value = highlight.highlight(text)
    return value
Пример #38
0
 def custom_resumen(self, xfield):
     request = self.context['request']
     preg = request.query_params.get('q')
     highlight = Highlighter(preg, max_length=150)
     # Highlighter(my_query, html_tag='div', css_class='found', max_length=35)
     return highlight.highlight(xfield.resumen)