Пример #1
0
 def save(self, *args, **kwargs):
     self.content_length = len(self.content)
     content = re.sub('[!-@[-`]', ' ', self.content)
     content = re.sub(' +', ' ', self.content)
     self.content_word_count = wordcount(content)
     self.content_ascii = unicode_to_ascii(self.content)
     self.subject_title_ascii = unicode_to_ascii(self.subject_title)
     super(ScrappedDocument, self).save(*args, **kwargs)
Пример #2
0
 def save(self, *args, **kwargs):
     self.content_length = len(self.content)
     content = re.sub('[!-@[-`]', ' ', self.content)
     content = re.sub(' +', ' ', self.content)
     self.content_word_count = wordcount(content)
     self.content_ascii = unicode_to_ascii(self.content)
     self.subject_title_ascii = unicode_to_ascii(self.subject_title)
     super(ScrappedDocument, self).save(*args, **kwargs)
Пример #3
0
 def update_word_stats(sender, instance, created, using, **kwargs):
     if not created:
         return
     text = ''.join((instance.content_ascii, instance.subject_title_ascii))
     text = unicode_to_ascii(text).lower()
     for word, count in count_words(text):
         Word.objects.increase_count(word, count)
Пример #4
0
 def cognate_words(self, words):
     if isinstance(words, unicode):
         words = str(unicode_to_ascii(words))
     if isinstance(str):
         words = re.split('\w+', words)
     base_words = self.filter(word__in=words).values_list('base', flat=True)
     return self.filter(base_in=base_words)
Пример #5
0
 def update_word_stats(sender, instance, created, using, **kwargs):
     if not created:
         return
     text = ''.join((instance.content_ascii, instance.subject_title_ascii))
     text = unicode_to_ascii(text).lower()
     for word, count in count_words(text):
         Word.objects.increase_count(word, count)
Пример #6
0
 def cognate_words(self, words):
     if isinstance(words, unicode):
         words = str(unicode_to_ascii(words))
     if isinstance(str):
         words = re.split('\w+', words)
     base_words = self.filter(word__in=words).values_list('base', flat=True)
     return self.filter(base_in=base_words)
Пример #7
0
 def _normalize(self, query):
     query = unicode_to_ascii(query)
     query = re.sub('\W+', ' ', query)
     return query
Пример #8
0
 def _normalize(self, query):
     query = unicode_to_ascii(query)
     query = re.sub('\W+', ' ', query)
     return query