def process(self, item):
   words = get_word(item)
   stemmer = STEMMERS[STEMMER]    
   for word in words:
     new_word = stemmer.stem(word)
     item[new_word] = item.pop(word)
   self.push(item)
Exemplo n.º 2
0
 def process(self, item):
     words = get_word(item)
     to_add = {}
     to_delete = []
     for word in words:
         string = ''.join(word)
         blob = TextBlob(string)
         tag = blob.tags[0][1]
         pos = to_wordnet(tag)
         new_word = str(word.lemmatize(pos))
         if new_word != word:
             if new_word in item:
                 prio1 = item.get(word).get('priority')
                 prio2 = item.get(new_word).get('priority')
                 to_add[new_word] = {
                     'priority': max(prio1, prio2),
                     'amount': item.get(word).get('amount') + 1
                 }
                 to_delete.append(word)
             else:
                 to_add[new_word] = item.pop(word)
     for element in to_add:
         item[element] = to_add[element]
     for element in to_delete:
         del item[element]
     self.push(item)
Exemplo n.º 3
0
  def process(self, item):
    words = get_word(item)
    all_synonyms = []

    for word in words:    
      synsets = word.synsets

      outer_synonyms = self._get_outer_synonyms(synsets)

      lemmas = self._get_lemma_names(outer_synonyms)
      lemmas = list(dict.fromkeys(lemmas))

      filtered = [i for i in lemmas if not self._is_invalid(i)]

      synonyms = self.get_best_synonyms(word, filtered)
      all_synonyms.extend(synonyms)

    for element in all_synonyms:
      word = element[0]
      similarity = element[1]
      if word in item:
        item[word]['amount'] += 1
      else:
        item[word] = {
          'priority': SYNONYMS_PRIORITY_FUNCTION(similarity), 
          'amount': 1
        }

    self.push(item)
Exemplo n.º 4
0
 def process(self, item):
     words = get_word(item)
     for word in words:
         corrected_words = word.spellcheck()
         for element in corrected_words:
             word = element[0]
             confidence = element[1]
             if confidence > SPELLCHECK_MIN_CONFIDENCE:
                 prio = self._calculate_priority(confidence)
                 if word in item:
                     item[word]['priority'] = prio
                     item[word]['amount'] = item.get(word).get('amount') + 1
                 else:
                     item[word] = {'priority': prio, 'amount': 1}
     self.push(item)