Пример #1
0
 def get(self, request, page):  # page does nothing here, just ignore it
     if len(request.GET) < 1:
         # QueryFormset = formset_factory(QueryForm, extra=2)
         return render_to_response('search.html',
                                   context_instance=RequestContext(request))
     else:
         query = request.GET
         subcorpus, subcorpus_sents, subcorpus_words, flag = get_subcorpus(
             query)
         # print subcorpus.count()
         # subcorpus_sents = [sent.id for doc in subcorpus[0] for sent in doc.sentence_set.all()]
         count_data = {
             'total_docs': Document.objects.count(),
             'total_sents': Sentence.objects.count(),
             'total_tokens': Token.objects.count(),
             'subcorpus_docs': len(subcorpus),
             'subcorpus_sents': subcorpus_sents,
             'subcorpus_words': subcorpus_words
         }
         per_page = int(query.get(u'per_page'))
         page = request.GET.get('page')
         page = int(page) if page else 1
         expand = int(query.get(u'expand')[-1])
         if query["exact_word"] != '':
             jq, sent_list, word, res_docs, res_num = exact_search(
                 request.GET["exact_word"].lower().encode('utf-8'),
                 subcorpus, flag, expand, page, per_page)
         else:
             # todo rewrite this part of search
             jq, sent_list, word, res_docs, res_num = lex_search(
                 query, subcorpus, flag, expand, page, per_page)
         paginator = Paginator([''] * res_num, per_page)
         start = page - 10 if page > 10 else 1
         end = page + 10 if page + 10 <= paginator.num_pages else paginator.num_pages
         paginator.page_range2 = range(start, end + 1)
         try:
             sents = paginator.page(page)
         except PageNotAnInteger:
             # If page is not an integer, deliver first page.
             sents = paginator.page(1)
         except EmptyPage:
             # If page is out of range (e.g. 9999), deliver last page of results.
             sents = paginator.page(paginator.num_pages)
         full_path = rePage.sub('', request.get_full_path())
         d_path = full_path.replace('19thcentury/search/',
                                    '19thcentury/search/download/')
         #word = re.sub('\s', '_', word)
         return render_to_response('result.html', {
             'query': word,
             'result': sent_list,
             'pages': sents,
             'numbers': count_data,
             'total': res_num,
             'total_docs': res_docs,
             'path': full_path,
             'd_path': d_path,
             'j': jq,
             'olstart': (page - 1) * per_page + 1
         },
                                   context_instance=RequestContext(request))
Пример #2
0
    def get(self, request, page):  # page does nothing here, just ignore it
        if len(request.GET) < 1:
            QueryFormset = formset_factory(QueryForm, extra=2)
            return render_to_response('search.html', {'form': QueryFormset},
                                      context_instance=RequestContext(request))
        else:
            # print request.GET
            query = request.GET
            subcorpus, subcorpus_sents, subcorpus_words, flag = get_subcorpus(query)
            # print subcorpus.count()
            # subcorpus_sents = [sent.id for doc in subcorpus[0] for sent in doc.sentence_set.all()]
            count_data = {'total_docs': Document.objects.count(),
                          'total_sents': Sentence.objects.count(),
                          'total_tokens': Token.objects.count(),
                          'subcorpus_docs': len(subcorpus),
                          'subcorpus_sents': subcorpus_sents,
                          'subcorpus_words': subcorpus_words}
            per_page = int(query.get(u'per_page'))
            expand = int(query.get(u'expand')[-1])
            if query["exact_word"] != '':
                jq, sent_list, word, res_docs = exact_search(request.GET["exact_word"].lower().encode('utf-8'), subcorpus, flag, expand)

            else:
                # QueryFormset = formset_factory(QueryForm)
                # formset = QueryFormset(request.GET, request.FILES)
                # if formset.is_valid():
                # todo rewrite this part of search
                jq, sent_list, word, res_docs = lex_search(query, subcorpus, flag, expand)

            page = request.GET.get('page')
            paginator = Paginator(sent_list, per_page)
            jq_paginator = Paginator(jq, per_page)
            if page:
                page = int(page)
                start = page - 10 if page > 10 else 1
                end = page + 10 if page + 10 <= paginator.num_pages else paginator.num_pages
            else:
                start = 1
                end = 11 if paginator.num_pages > 10 else paginator.num_pages + 1
            paginator.page_range2 = range(start, end)
            try:
                sents = paginator.page(page)
                jq = jq_paginator.page(page)
            except PageNotAnInteger:
                # If page is not an integer, deliver first page.
                sents = paginator.page(1)
                jq = jq_paginator.page(1)
            except EmptyPage:
                # If page is out of range (e.g. 9999), deliver last page of results.
                sents = paginator.page(paginator.num_pages)
                jq = jq_paginator.page(jq_paginator.num_pages)
            full_path = rePage.sub('', request.get_full_path())
            return render_to_response('result.html',
                                      {'query': word, 'result': sents,
                                       'numbers': count_data,
                                       'total': len(sent_list), 'total_docs': res_docs,
                                       'path':full_path, 'j':jq},
                                      context_instance=RequestContext(request))
Пример #3
0
 def get(self, request, page):  # page does nothing here, just ignore it
     if len(request.GET) < 1:
         # QueryFormset = formset_factory(QueryForm, extra=2)
         return render_to_response('search.html',
                                   context_instance=RequestContext(request))
     else:
         query = request.GET
         subcorpus, subcorpus_sents, subcorpus_words, flag = get_subcorpus(query)
         # print subcorpus.count()
         # subcorpus_sents = [sent.id for doc in subcorpus[0] for sent in doc.sentence_set.all()]
         count_data = {'total_docs': Document.objects.count(),
                       'total_sents': Sentence.objects.count(),
                       'total_tokens': Token.objects.count(),
                       'subcorpus_docs': len(subcorpus),
                       'subcorpus_sents': subcorpus_sents,
                       'subcorpus_words': subcorpus_words}
         per_page = int(query.get(u'per_page'))
         page = request.GET.get('page')
         page = int(page) if page else 1
         expand = int(query.get(u'expand')[-1])
         if query["exact_word"] != '':
             jq, sent_list, word, res_docs, res_num = exact_search(request.GET["exact_word"].lower().encode('utf-8'), subcorpus, flag, expand, page, per_page)
         else:
             # todo rewrite this part of search
             jq, sent_list, word, res_docs, res_num = lex_search(query, subcorpus, flag, expand, page, per_page)
         paginator = Paginator([''] * res_num, per_page)
         start = page - 10 if page > 10 else 1
         end = page + 10 if page + 10 <= paginator.num_pages else paginator.num_pages
         paginator.page_range2 = range(start, end + 1)
         try:
             sents = paginator.page(page)
         except PageNotAnInteger:
             # If page is not an integer, deliver first page.
             sents = paginator.page(1)
         except EmptyPage:
             # If page is out of range (e.g. 9999), deliver last page of results.
             sents = paginator.page(paginator.num_pages)
         full_path = rePage.sub('', request.get_full_path())
         d_path = full_path.replace('19thcentury/search/', '19thcentury/search/download/')
         #word = re.sub('\s', '_', word)
         return render_to_response('result.html',
                                   {'query': word, 'result': sent_list, 'pages': sents,
                                    'numbers': count_data,
                                    'total': res_num, 'total_docs': res_docs,
                                    'path': full_path, 'd_path': d_path, 'j': jq, 'olstart': (page - 1) * per_page + 1},
                                   context_instance=RequestContext(request))
Пример #4
0
    def get(self, request):  # page does nothing here, just ignore it
        if len(request.GET) < 1:
            # QueryFormset = formset_factory(QueryForm, extra=2)
            return render(request, 'search/search.html')
        else:
            # print request.GET
            # u_groups = request.user.groups
            query = request.GET
            # print(query)
            subcorpus, subcorpus_sents, subcorpus_words, flag = get_subcorpus(query) #, u_groups)
            # print subcorpus.count()
            # subcorpus_sents = [sent.id for doc in subcorpus[0] for sent in doc.sentence_set.all()]
            count_data = {'total_docs': Document.objects.count(),
                          'total_sents': Sentence.objects.count(),
                          'total_tokens': Token.objects.count(),
                          'subcorpus_docs': len(subcorpus),
                          'subcorpus_sents': subcorpus_sents,
                          'subcorpus_words': subcorpus_words}
            per_page = int(query.get(u'per_page'))
            page = request.GET.get('page')
            page = int(page) if page else 1
            expand = int(query.get(u'expand')[-1])
            if "exact_search" in query:
                jq, sent_list, word, res_docs, res_num = exact_full_search(request.GET["exact_word"].lower().encode('utf-8'), subcorpus, flag, expand, page, per_page)
            elif "orig_search" in query:
                jq, sent_list, word, res_docs, res_num = orig_exact_search(request.GET["orig_word"].lower().encode('utf-8'), subcorpus, flag, expand, page, per_page)

            else:
                # todo rewrite this part of search
                jq, sent_list, word, res_docs, res_num = lex_search(query, subcorpus, flag, expand, page, per_page)


            paginator = Paginator(['']*res_num, per_page)
            start = page - 10 if page > 10 else 1
            end = page + 10 if page + 10 <= paginator.num_pages else paginator.num_pages
            paginator.page_range2 = range(start, end+1)
            try:
                sents = paginator.page(page)
            except PageNotAnInteger:
                # If page is not an integer, deliver first page.
                sents = paginator.page(1)
            except EmptyPage:
                # If page is out of range (e.g. 9999), deliver last page of results.
                sents = paginator.page(paginator.num_pages)
            full_path = rePage.sub('', request.get_full_path())
            PREFIX = 'translator_corpus' if PROD else ''
            d_path = full_path.replace(PREFIX + '/search/', PREFIX + '/search/download/')
            if not "orig_search" in query:
                result = sent_dict(sent_list)
                return render(request, 'search/result.html',
                                          {'query': word, 'result': result, 'pages': sents,
                                           'numbers': count_data,
                                           'total': res_num, 'total_docs': res_docs,
                                           'path':full_path, 'd_path':d_path, 'j':jq, 'olstart': (page-1)*per_page + 1})
            else:
                # fw = open('log.txt', 'w')
                # fw.write(str(sent_list))
                # fw.close()
                # for k in sent_list:
                #     print(sent_list[k].text)
                return render(request, 'search/result_orig.html',
                                          {'query': word, 'result': sent_list, 'pages': sents,
                                           'numbers': count_data,
                                           'total': res_num, 'total_docs': res_docs,
                                           'path':full_path, 'd_path':d_path, 'j':jq, 'olstart': (page-1)*per_page + 1})
Пример #5
0
    def get(self, request):
        if len(request.GET) < 1:
            # QueryFormset = formset_factory(QueryForm, extra=2)
            return render_to_response('download_search.html',
                                      context_instance=RequestContext(request))
        else:
            # print request.GET
            # u_groups = request.user.groups
            query = request.GET
            subcorpus, subcorpus_sents, subcorpus_words, flag = get_subcorpus(
                query)  #, u_groups)
            # print subcorpus.count()
            # subcorpus_sents = [sent.id for doc in subcorpus[0] for sent in doc.sentence_set.all()]
            count_data = {
                'total_docs': Document.objects.count(),
                'total_sents': Sentence.objects.count(),
                'total_tokens': Token.objects.count(),
                'subcorpus_docs': len(subcorpus),
                'subcorpus_sents': subcorpus_sents,
                'subcorpus_words': subcorpus_words
            }
            per_page = int(query.get(u'per_page'))
            page = request.GET.get('page')
            page = int(page) if page else 1
            expand = int(query.get(u'expand')[-1])
            if "exact_search" in query:
                jq, sent_list, word, res_docs, res_num = exact_full_search(
                    request.GET["exact_word"].lower().encode('utf-8'),
                    subcorpus, flag, expand, page, per_page)

            else:
                # todo rewrite this part of search
                jq, sent_list, word, res_docs, res_num = lex_search(
                    query, subcorpus, flag, expand, page, per_page)

            paginator = Paginator([''] * res_num, per_page)
            start = page - 10 if page > 10 else 1
            end = page + 10 if page + 10 <= paginator.num_pages else paginator.num_pages
            paginator.page_range2 = range(start, end + 1)
            try:
                sents = paginator.page(page)
            except PageNotAnInteger:
                # If page is not an integer, deliver first page.
                sents = paginator.page(1)
            except EmptyPage:
                # If page is out of range (e.g. 9999), deliver last page of results.
                sents = paginator.page(paginator.num_pages)
            full_path = rePage.sub('', request.get_full_path())

            rows = []
            rows.append([
                u'Номер примера', u'Название текста', u'Язык', u'Русский язык',
                u'Уровень', u'Год', u'Оригинальное предложение',
                u'Исправленное предложение', u'Тег', u'Ошибка', u'Исправление',
                u'Разметчик', u'Комментарий'
            ])
            for ind, sent in enumerate(sent_list):
                anns = Sentence.get_annotations(sent.id)
                if not anns:
                    rows.append([(page - 1) * per_page + 1 + ind,
                                 sent.doc_id.title, sent.doc_id.native,
                                 sent.doc_id.language_background,
                                 sent.doc_id.level, sent.doc_id.date_displayed,
                                 reSpan.sub('\\2', sent.tagged).replace(
                                     '<b>', '{{').replace('</b>', '}}'),
                                 reSpan.sub('\\2', sent.correct).replace(
                                     '<span class="correction">',
                                     '*').replace('</span>',
                                                  '*'), '', '', '', '', ''])
                else:
                    for an in anns:
                        rows.append([
                            (page - 1) * per_page + 1 + ind, sent.doc_id.title,
                            sent.doc_id.native,
                            sent.doc_id.language_background, sent.doc_id.level,
                            sent.doc_id.date_displayed,
                            reSpan.sub('\\2', sent.tagged).replace(
                                '<b>', '{{').replace('</b>', '}}'),
                            reSpan.sub('\\2', sent.correct).replace(
                                '<span class="correction">',
                                '*').replace('</span>',
                                             '*'), an['tag'], an['quote'],
                            an['corr'], an['owner'].username, an['comment']
                        ])

            output = StringIO.StringIO()

            book = xlsxwriter.Workbook(output)
            sheet = book.add_worksheet()
            for row_num, row in enumerate(rows):
                for col_num, item in enumerate(row):
                    sheet.write(row_num, col_num, item)
            book.close()

            # construct response
            output.seek(0)
            response = HttpResponse(
                output.read(),
                content_type=
                "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
            )
            response[
                'Content-Disposition'] = 'attachment; filename="rlc_search_results.xlsx"'
            return response
Пример #6
0
    def get(self, request, page):  # page does nothing here, just ignore it
        if len(request.GET) < 1:
            QueryFormset = formset_factory(QueryForm, extra=2)
            d = {}
            # d['genres'] = set([i.genre for i in Document.objects.all() if i.genre not in [u"", None, ' ']])
            # d['course'] = set([i.course for i in Document.objects.all() if i.course not in [u"", None, ' ']])
            # d['major'] = set([i.major for i in Document.objects.all() if i.major not in [u"", None, ' ']])
            # d['domain'] = set([i.domain for i in Document.objects.all() if i.domain not in [u"", None, ' ']])
            return render_to_response('search.html', {
                'form': QueryFormset,
                'd': d
            },
                                      context_instance=RequestContext(request))
        else:
            query = request.GET
            subcorpus, subcorpus_sents, subcorpus_words, flag = get_subcorpus(
                query)
            # print subcorpus.count()
            # subcorpus_sents = [sent.id for doc in subcorpus[0] for sent in doc.sentence_set.all()]
            count_data = {
                'total_docs': Document.objects.count(),
                'total_sents': Sentence.objects.count(),
                'total_tokens': Token.objects.count(),
                'subcorpus_docs': len(subcorpus),
                'subcorpus_sents': subcorpus_sents,
                'subcorpus_words': subcorpus_words
            }
            per_page = int(query.get(u'per_page'))
            page = request.GET.get('page')
            page = int(page) if page else 1
            expand = int(query.get(u'expand')[-1])
            if query["exact_word"] != '':
                jq, sent_list, word, res_docs, res_num = exact_search(
                    request.GET["exact_word"].lower().encode('utf-8'),
                    subcorpus, flag, expand, page, per_page)

            else:
                # QueryFormset = formset_factory(QueryForm)
                # formset = QueryFormset(request.GET, request.FILES)
                # if formset.is_valid():
                # todo rewrite this part of search
                jq, sent_list, word, res_docs, res_num = lex_search(
                    query, subcorpus, flag, expand, page, per_page)

            paginator = Paginator([''] * res_num, per_page)
            start = page - 10 if page > 10 else 1
            end = page + 10 if page + 10 <= paginator.num_pages else paginator.num_pages
            paginator.page_range2 = range(start, end + 1)
            try:
                sents = paginator.page(page)
            except PageNotAnInteger:
                # If page is not an integer, deliver first page.
                sents = paginator.page(1)
            except EmptyPage:
                # If page is out of range (e.g. 9999), deliver last page of results.
                sents = paginator.page(paginator.num_pages)
            full_path = rePage.sub('', request.get_full_path())
            return render_to_response('result.html', {
                'query': word,
                'result': sent_list,
                'pages': sents,
                'numbers': count_data,
                'total': res_num,
                'total_docs': res_docs,
                'path': full_path,
                'j': jq,
                'olstart': (page - 1) * per_page + 1
            },
                                      context_instance=RequestContext(request))