Пример #1
0
 def get_word_list(self):
     self.close()
     token_list = RegexpTokenizer(r'\w+').tokenize(self.text)
     token_set = set([base for base in
                      (base_form(w) for w in token_list if w.isalpha())
                      if base is not None])
     list = [[w, self.sort_key(w)] for w in token_set]
     # Sort by frequency
     list.sort(key=lambda p: p[1])
     return [p[0] for p in list]
Пример #2
0
def find_glossary_words(book_dir, all_words):
    glossaryfile = os.path.join(book_dir, 'glossary.json')
    if os.path.exists(glossaryfile):
        with open(glossaryfile, 'r', encoding='utf-8') as file:
            glossary = json.load(file)
            words = [base_form(e['headword']) for e in glossary]
            this_version_words = sorted(set(words).intersection(all_words))
            return this_version_words
    else:
        return []
Пример #3
0
def get_word_rating(request, word):
    try:
        user = ClusiveUser.objects.get(user=request.user)
        base = base_form(word)
        wm = WordModel.objects.get(user=user, word=base)
        return JsonResponse({'rating': wm.rating})
    except WordModel.DoesNotExist:
        return JsonResponse({'word': base, 'rating': False})
    except ClusiveUser.DoesNotExist:
        logger.warning("No clusive user, can't fetch ratings")
        return JsonResponse({'word': base, 'rating': False})
Пример #4
0
def word_bank_remove(request, word):
    try:
        user = ClusiveUser.objects.get(user=request.user)
        base = base_form(word)
        wm = WordModel.objects.get(user=user, word=base)
        if wm:
            wm.register_wordbank_remove()
            return JsonResponse({'success': 1})
        else:
            return JsonResponse({'success': 0})
    except ClusiveUser.DoesNotExist:
        logger.warning("No clusive user, can't remove word")
        return JsonResponse({'success': 0})
Пример #5
0
def set_word_rating(request, word, rating):
    try:
        user = ClusiveUser.objects.get(user=request.user)
        base = base_form(word)
        wm, created = WordModel.objects.get_or_create(user=user, word=base)
        if WordModel.is_valid_rating(rating):
            wm.register_rating(rating)
            word_rated.send(sender=GlossaryConfig.__class__,
                            request=request,
                            word=word,
                            rating=rating)
            return JsonResponse({'success': 1})
        else:
            return JsonResponse({'success': 0})
    except ClusiveUser.DoesNotExist:
        logger.warning("No clusive user, can't set ratings")
        return JsonResponse({'success': 0})
Пример #6
0
 def init_data(self):
     self.data = {}
     try:
         book = Book.objects.get(id=self.book_id)
         with open(book.glossary_storage, 'r', encoding='utf-8') as file:
             logger.debug("Reading glossary %s", file.name)
             rawdata = json.load(file)
             self.data = {}
             for worddata in rawdata:
                 base = glossaryutil.base_form(worddata['headword'])
                 self.data[base] = worddata
                 for altform in worddata['alternateForms']:
                     self.data[altform.lower()] = worddata
     except FileNotFoundError:
         logger.warning('Book %s has no glossary', book)
     except EnvironmentError:
         logger.error('Failed to read glossary data')
Пример #7
0
 def form_valid(self, form):
     text = form.cleaned_data['text']
     word_list = wf.tokenize(text, self.lang)
     self.stats = [
         { 'name': 'Flesch-Kincaid grade level',
           'value':  textstat.flesch_kincaid_grade(text),
           'desc': 'Based on avg sentence length and syllables per word.'},
         { 'name': 'Dale-Chall grade level',
           'value': textstat.dale_chall_readability_score_v2(text),
           'desc': 'Based on avg sentence length and percent difficult words.'},
         { 'name': 'Number of words',
           'value': textstat.lexicon_count(text) },
         { 'name': 'Number of sentences',
           'value': textstat.sentence_count(text) },
         { 'name': 'Average sentence length',
           'value': textstat.avg_sentence_length(text) },
         { 'name': 'Average syllables per word',
           'value': textstat.avg_syllables_per_word(text) },
         { 'name': 'Difficult words',
           'value': "%d (%d%%): %s" % (textstat.difficult_words(text),
                                       100*textstat.difficult_words(text)/textstat.lexicon_count(text),
                                       ', '.join(textstat.difficult_words_list(text))) },
     ]
     word_info = {}
     for word in word_list:
         base = base_form(word)
         w = word_info.get(base)
         if w:
             w['count'] += 1
             if word != base and word not in w['alts']:
                 w['alts'].append(word)
         else:
             w = {
                 'hw' : base,
                 'alts' : [],
                 'count' : 1,
                 'freq' : wf.zipf_frequency(base, self.lang)
             }
             if word != base:
                 w['alts'].append(word)
             word_info[base] = w
     self.words = sorted(word_info.values(), key=lambda x: x.get('freq'))
     logger.debug('words: %s', self.words)
     # Don't do normal process of redirecting to success_url.  Just stay on this form page forever.
     return self.render_to_response(self.get_context_data(form=form))
Пример #8
0
def glossdef(request, book_id, cued, word):
    """Return a formatted HTML representation of a word's meaning(s)."""
    base = base_form(word)
    try:
        book = Book.objects.get(pk=book_id)
    except Book.DoesNotExist:
        book = None
    defs = lookup(book, base)

    vocab_lookup.send(sender=GlossaryConfig.__class__,
                      request=request,
                      word=base,
                      cued=cued,
                      source=defs['source'] if defs else None)
    # TODO might want to record how many meanings were found (especially if it's 0): len(defs['meanings'])
    if defs:
        context = {'defs': defs}
        if book:
            context['book_path'] = book.path
        return render(request, 'glossary/glossdef.html', context=context)
    else:
        return HttpResponseNotFound("<p>No definition found</p>")
Пример #9
0
 def test_base_forms(self):
     self.assertEqual('noun', base_form('noun'))
     self.assertEqual('noun', base_form('nouns'))
     self.assertEqual('act', base_form('acting'))
     self.assertEqual('act', base_form('acted'))
     self.assertEqual('go', base_form('went'))
     self.assertEqual('go', base_form('goes'))
     self.assertEqual('large', base_form('largest'))
     self.assertEqual('text', base_form('texts'))
     self.assertEqual('install',
                      base_form('installing'))  # Not British 'instal'
     self.assertEqual('more', base_form(
         'more'))  # alphabetically before the other possibility, "much"
     self.assertEqual(
         'ooblecks',
         base_form('ooblecks'))  # unknown word is passed through as is