def __search_combinations(self, comment): comment_lower = comment.lower() #new_dictionary = dict(self.__combinations.items() + self.__new_combination_vocabulary.items() + self.__modified_combinations.items()) #for combination in new_dictionary: for combination in self.__combinations: if comment_lower.find(combination) >= 0: obj_term = Term(combination) obj_term.set_term_type(settings.TERM_TYPE_COMBINATION) obj_term.set_original_weight(int(self.__combinations[combination])) self.__term_list.append(obj_term) comment = re.sub(combination, "", comment, flags=re.IGNORECASE) return comment
def __tagging(self, term): obj_term = Term(term) new_term = obj_term.get_new_term() # tipo = emoticon result = self.__search(self.__emoticons, term) if result != settings.TERM_NOT_FOUND: obj_term.set_term_type(settings.TERM_TYPE_EMOTICON) obj_term.set_original_weight(int(self.__emoticons[result])) self.__term_list.append(obj_term) return # tipo = puntuacion result = self.__search(self.__punctuation, new_term) if result != settings.TERM_NOT_FOUND: obj_term.set_term_type(settings.TERM_TYPE_PUNCTUATION) obj_term.set_original_weight(int(self.__punctuation[result])) self.__term_list.append(obj_term) return # tipo = palabras de realce result = self.__search(self.__booster_words, new_term) if result != settings.TERM_NOT_FOUND: obj_term.set_term_type(settings.TERM_TYPE_BOOSTER) obj_term.set_original_weight(int(self.__booster_words[result])) self.__term_list.append(obj_term) return # tipo = negacion if new_term in self.__negating_words: obj_term.set_term_type(settings.TERM_TYPE_NEGATING) self.__term_list.append(obj_term) return # tipo = palabra o jerga #new_term = self.__corrector.correct(new_term) #obj_term.set_new_term(new_term) #new_dictionary = dict(self.__words_and_slangs.items() + self.__new_simple_vocabulary.items() + self.__modified_words.items()) result = self.__search(self.__words_and_slangs, new_term) #result = self.__search(new_dictionary, new_term) if result != settings.TERM_NOT_FOUND: obj_term.set_term_type(settings.TERM_TYPE_WORD_SLANG) obj_term.set_original_weight(int(self.__words_and_slangs[result])) #obj_term.set_original_weight(int(new_dictionary[result])) self.__term_list.append(obj_term) return # tipo = neutral obj_term.set_term_type(settings.TERM_TYPE_NEUTRO) self.__term_list.append(obj_term)