示例#1
0
def check(request, document_id, grade):

    document = get_object_or_404(Document, pk=document_id)

    if request.method == 'POST':
        WordFormSet = modelformset_factory(
            LocalWord, 
            form=RestrictedWordForm,
            exclude=('document', 'isConfirmed', 'isDeferred', 'grade'), 
            can_delete=True)

        formset = WordFormSet(request.POST)
        if formset.is_valid():
            instances = formset.save(commit=False)
            for instance in instances:
                instance.grade = grade
                instance.document = document
                instance.save()
            writeLocalTables([document])
            redirect = 'dictionary_check_g1' if grade == 1 else 'dictionary_check_g2'
            return HttpResponseRedirect(reverse(redirect, args=[document_id]))
        else:
            return render(request, 'dictionary/words.html', locals())

    # filter some words from the xml
    content = document.latest_version().content
    content.open()
    # strip='none': if this parameter is not set, whitespace is removed automatically for documents with a DOCTYPE declaration
    tree = etree.parse(saxon9he(content.file, os.path.join(settings.PROJECT_DIR, 'dictionary', 'xslt', 'filter.xsl'), '-strip:none', contraction=grade).stdout, parser=HUGE_TREE_PARSER)
    content.close()

    # grab the homographs
    homographs = set(("|".join(homograph.xpath('text()')).lower() 
                      for homograph in tree.xpath('//brl:homograph', namespaces=BRL_NAMESPACE)))
    duplicate_homographs = set((smart_unicode(word) for 
                                word in 
                                chain(GlobalWord.objects.filter(grade=grade).filter(type=5).filter(homograph_disambiguation__in=homographs).values_list('homograph_disambiguation', flat=True),
                                      LocalWord.objects.filter(grade=grade).filter(type=5).filter(document=document).filter(homograph_disambiguation__in=homographs).values_list('homograph_disambiguation', flat=True))))
    unknown_homographs = [{'untranslated': homograph.replace('|', ''), 
                           'braille': translate(getTables(grade), homograph.replace('|', unichr(0x250A))),
                           'type': 5,
                           'homograph_disambiguation': homograph}
                          for homograph in homographs - duplicate_homographs]
    # grab names and places
    names = set((name for names in 
                 (name.text.lower().split() for name in tree.xpath('//brl:name', namespaces=BRL_NAMESPACE) if name.text != None) for name in names))
    duplicate_names = set((smart_unicode(word) for 
                           word in 
                           chain(GlobalWord.objects.filter(grade=grade).filter(type__in=(1,2)).filter(untranslated__in=names).values_list('untranslated', flat=True),
                                 LocalWord.objects.filter(grade=grade).filter(type__in=(1,2)).filter(document=document).filter(untranslated__in=names).values_list('untranslated', flat=True))))
    unknown_names = [{'untranslated': name, 
                      'braille': translate(getTables(grade, name=True), name), 
                      'type': 2,
                      'homograph_disambiguation': ''}
                     for name in names - duplicate_names]
    places = set((place for places in 
                 (place.text.lower().split() for place in tree.xpath('//brl:place', namespaces=BRL_NAMESPACE) if place.text != None) for place in places))
    duplicate_places = set((smart_unicode(word) for 
                            word in 
                            chain(GlobalWord.objects.filter(grade=grade).filter(type__in=(3,4)).filter(untranslated__in=places).values_list('untranslated', flat=True),
                                  LocalWord.objects.filter(grade=grade).filter(type__in=(3,4)).filter(document=document).filter(untranslated__in=places).values_list('untranslated', flat=True))))
    unknown_places = [{'untranslated': place,
                       'braille': translate(getTables(grade, place=True), place),
                       'type': 4,
                       'homograph_disambiguation': ''}
                      for place in places - duplicate_places]

    # filter homographs, names and places from the xml
    xsl = etree.parse(os.path.join(settings.PROJECT_DIR, 'dictionary', 'xslt', 'filter_names.xsl'),
                      parser=HUGE_TREE_PARSER)
    transform = etree.XSLT(xsl)
    filtered_tree = transform(tree)
    # grab the rest of the content
    content = etree.tostring(filtered_tree, method="text", encoding=unicode)
    # filter all punctuation and replace dashes by space, so we can split by space below
    content = ''.join(
        # replace Punctuation Dash and Punctuation other (except for "'") with space
        c if c == u"\u0027" or unicodedata.category(c) not in ['Pd', 'Po'] else ' '
        for c in content 
        # drop all chars which are not letters, separators or select
        # punctuation which we replace with space later on
        if unicodedata.category(c) in ['Lu', 'Ll', 'Zs', 'Zl', 'Zp', 'Pd', 'Po']
        or c in ['\n', '\r'])

    new_words = set((w.lower() for w in content.split() if len(w) > 1))
    # FIXME: We basically do a set difference manually here. This
    # would probably be better if done inside the db. However for that
    # we would have to be able to insert the new_words into the db in
    # an efficient manner, i.e. bulk insert. For a possibility on how
    # to do this in the context of Django ORM look at
    # http://ole-laursen.blogspot.com/2010/11/bulk-inserting-django-objects.html.
    # After that we could for example do a query along the lines of
    # cursor.execute("SELECT untranslated from new_words EXCEPT SELECT
    # untranslated FROM dict_words;). However MySQL doesn't seem to
    # support EXCEPT so it would be SELECT untranslated FROM new_words
    # w1 LEFT JOIN dict_words w2 ON w1.untranslated=w2.untranslated
    # WHERE w2.untranslated IS NULL;
    duplicate_words = set((smart_unicode(word) for 
                           word in
    # exclude type 2,4 and 5 as these probably have a different
    # translations, so we do need to show these words if they are not
    # tagged even if they have an entry in the dictionary as a name or
    # a place.
                           chain(GlobalWord.objects.filter(grade=grade).exclude(type__in=(2,4,5)).filter(untranslated__in=new_words).values_list('untranslated', flat=True),
                                 LocalWord.objects.filter(grade=grade).exclude(type__in=(2,4,5)).filter(document=document).filter(untranslated__in=new_words).values_list('untranslated', flat=True))))
    unknown_words = [{'untranslated': word, 
                      'braille': translate(getTables(grade), word),
                      'type' : 0,
                      'homograph_disambiguation': ''}
                     for word in new_words - duplicate_words]

    unknown_words = unknown_words + unknown_homographs + unknown_names + unknown_places
    unknown_words.sort(cmp=lambda x,y: cmp(x['untranslated'].lower(), y['untranslated'].lower()))

    # remove words from the local words which are no longer in the document (they might have
    # been typos that slipped in to the local words and were corrected subsequently)
    all_duplicates = duplicate_homographs | duplicate_names | duplicate_places | duplicate_words
    LocalWord.objects.filter(grade=grade, document=document).exclude(untranslated__in=all_duplicates).delete()

    paginator = Paginator(unknown_words, MAX_WORDS_PER_PAGE)
    try:
        page = int(request.GET.get('page', '1'))
    except ValueError:
        page = 1
    
    try:
        words = paginator.page(page)
    except InvalidPage:
        words = paginator.page(paginator.num_pages)

    WordFormSet = modelformset_factory(
        LocalWord, 
        form=RestrictedWordForm,
        exclude=('document', 'isConfirmed', 'isDeferred', 'grade'), 
        extra=len(words.object_list), can_delete=True)

    have_type = any((word['type']!=0 for word in words.object_list))
    have_homograph_disambiguation = any((word['homograph_disambiguation']!='' for word in words.object_list))
    formset = WordFormSet(queryset=LocalWord.objects.none(), initial=words.object_list)

    # Document statistic
    stats = DocumentStatistic(document=document, grade=grade, total=len(new_words), unknown=len(unknown_words))
    percentage = 100.0*stats.unknown/stats.total
    stats.save()

    return render(request, 'dictionary/words.html', locals())
示例#2
0
def edit_global_words_with_missing_braille(request):

    WordFormSet = formset_factory(GlobalWordBothGradesForm, extra=0)

    if request.method == 'POST':
        formset = WordFormSet(request.POST)
        if formset.is_valid():
            for form in formset.forms:
                GlobalWord.objects.create(
                    untranslated=form.cleaned_data['untranslated'], 
                    braille=form.cleaned_data['grade2'] if form.cleaned_data['original_grade'] == 1 else form.cleaned_data['grade1'],
                    grade=2 if form.cleaned_data['original_grade'] == 1 else 1,
                    type=form.cleaned_data['type'],
                    homograph_disambiguation=form.cleaned_data['homograph_disambiguation'])
            return HttpResponseRedirect(reverse('dictionary_edit_global_words_with_missing_braille'))
        else:
            return render(request, 'dictionary/edit_missing_globals.html', locals())

    WORDS_WITH_MISSING_BRAILLE = """
SELECT l.* 
FROM dictionary_globalword AS l
WHERE NOT EXISTS
      (
      SELECT NULL
      FROM dictionary_globalword AS r
      WHERE
	l.untranslated = r.untranslated AND 
      	l.type = r.type AND
      	l.homograph_disambiguation = r.homograph_disambiguation AND
      	l.grade != r.grade
      )
ORDER BY l.untranslated
"""
    single_grade_words = GlobalWord.objects.raw(WORDS_WITH_MISSING_BRAILLE)
    missing_words = [{'untranslated': smart_unicode(word.untranslated),
                      'original_grade': word.grade,
                      'grade1': smart_unicode(word.braille) if word.grade == 1 else translate(getTables(1), smart_unicode(word.untranslated)),
                      'grade2': smart_unicode(word.braille) if word.grade == 2 else translate(getTables(2), smart_unicode(word.untranslated)),
                      'type' : word.type,
                      'homograph_disambiguation': smart_unicode(word.homograph_disambiguation)}
                     for word in single_grade_words]

    paginator = Paginator(missing_words, MAX_WORDS_PER_PAGE)
    try:
        page = int(request.GET.get('page', '1'))
    except ValueError:
        page = 1
    
    try:
        words = paginator.page(page)
    except InvalidPage:
        words = paginator.page(paginator.num_pages)

    formset = WordFormSet(initial=words.object_list)
    return render(request, 'dictionary/edit_missing_globals.html', locals())
示例#3
0
def confirm(request, grade, deferred=False):
    if [word for word in get_conflicting_words(grade)]:
        redirect = ('dictionary_confirm_deferred_conflicting_duplicates_g' if deferred
                        else 'dictionary_confirm_conflicting_duplicates_g') + str(grade)
        return HttpResponseRedirect(reverse(redirect))

    WordFormSet = formset_factory(ConfirmDeferredWordForm if deferred else ConfirmWordForm, extra=0) 
    if request.method == 'POST':

        formset = WordFormSet(request.POST)
        if formset.is_valid():
            # FIXME: in Djano 1.3+ formset formmsets are iterable, so you can just say 
            # for form in formset:
            for form in formset.forms:
                update_word_tables(form, grade, deferred)
            # FIXME: in principle we need to regenerate the liblouis tables,
            # i.e. the white lists now. However we do this asynchronously
            # (using a cron job) for now. There are several reasons for this:
            # 1) It is slow as hell if done inside a transaction. To do this
            # outside the transaction we need transaction context managers
            # (https://docs.djangoproject.com/en/1.3/topics/db/transactions/#controlling-transaction-management-in-views)
            # which are only available in Django 1.3.
            # 2) We need to serialize the table writing so they do not write
            # on top of each other. This is easy if it is done periodically.
            # 3) Of course it would be nice to use some kind of message queue
            # for this (e.g. rabbitmq and celery), but for now this poor mans
            # solution seems good enough
            # redirect to self as there might be more words
            redirect = ('dictionary_confirm_deferred_g' if deferred else 'dictionary_confirm_g') + str(grade)
            return HttpResponseRedirect(reverse(redirect))
        else:
            return render(request, 'dictionary/confirm.html', locals())

    # create a default for all unconfirmed homographs which have no default, i.e. no restriction word entry
    unconfirmed_homographs = set((smart_unicode(word) for 
                                  word in 
                                  LocalWord.objects.filter(grade=grade, type=5, isConfirmed=False, isDeferred=deferred, 
                                                           document__state__sort_order=final_sort_order).values_list('untranslated', flat=True)))
    if unconfirmed_homographs:
        covered_entries = set((smart_unicode(word) for 
                               word in 
                               chain(
                    LocalWord.objects.filter(grade=grade, type=0, untranslated__in=unconfirmed_homographs).values_list('untranslated', flat=True),
                    GlobalWord.objects.filter(grade=grade, type=0, untranslated__in=unconfirmed_homographs).values_list('untranslated', flat=True))))
                                 
        for word in unconfirmed_homographs - covered_entries:
            document = Document.objects.filter(localword__grade=grade, localword__type=5, localword__isConfirmed=False, localword__untranslated=word)[0]
            w = LocalWord(untranslated=word, 
                          braille=translate(getTables(grade), word),
                          grade=grade, type=0, document=document)
            w.save()
    
    filterform = FilterForm(request.GET)
    if filterform.is_valid():
        currentFilter = filterform.cleaned_data['filter']
    
    words_to_confirm = LocalWord.objects.filter(grade=grade, isConfirmed=False, isDeferred=deferred, 
                                                untranslated__contains=currentFilter,
                                                document__state__sort_order=final_sort_order).order_by('untranslated', 'type').values('untranslated', 'braille', 'type', 'homograph_disambiguation', 'isLocal').distinct()
    paginator = Paginator(words_to_confirm, MAX_WORDS_PER_PAGE)
    try:
        page = int(request.GET.get('page', '1'))
    except ValueError:
        page = 1
    
    try:
        words = paginator.page(page)
    except InvalidPage:
        words = paginator.page(paginator.num_pages)

    have_type = any((word['type']!=0 for word in words.object_list))
    have_homograph_disambiguation = any((word['homograph_disambiguation']!='' for word in words.object_list))
    formset = WordFormSet(initial=words.object_list)
    return render(request, 'dictionary/confirm.html', locals())