def get_info(self, content): words = util.getWords(content) temp_tags = bigram_tag.tag(words) tags = self.re_tag(temp_tags) normalized = True while normalized: normalized = False for i in range(0, len(tags) - 1): tagged1 = tags[i] if i + 1 >= len(tags): break tagged2 = tags[i + 1] key = tagged1[1] + '+' + tagged2[1] pos = cfg.get(key) if pos: tags.pop(i) tags.pop(i) re_tagged = tagged1[0] + ' ' + tagged2[0] tags.insert(i, (re_tagged, pos)) normalized = True final_context = [] for tag in tags: if tag[1] == 'NNP' or tag[1] == 'NNI': final_context.append(tag[0]) return final_context
def get_info(self, content): words = util.getWords(content) temp_tags = bigram_tag.tag(words) tags = self.re_tag(temp_tags) normalized = True while normalized: normalized = False for i in range(0, len(tags) - 1): tagged1 = tags[i] if i+1 >= len(tags): break tagged2 = tags[i+1] key = tagged1[1] + '+' + tagged2[1] pos = cfg.get(key) if pos: tags.pop(i) tags.pop(i) re_tagged = tagged1[0] + ' ' + tagged2[0] tags.insert(i, (re_tagged, pos)) normalized = True final_context = [] for tag in tags: if tag[1] == 'NNP' or tag[1] == 'NNI': final_context.append(tag[0]) return final_context
def remove_stopwords(sentences): """ Removes stopwords from the sentence :param sentences: (list) sentences :returns: cleaned sentences without any stopwords """ sw = set(stopwords.words('english')) cleaned = [] for sentence in sentences: words = util.getWords(sentence) sentence = ' '.join([c for c in words if c not in sw]) cleaned.append(sentence) return cleaned
def get_text(): """ Driver function; Execution starts here """ text = raw_input('TEXT: ') limit = input('LIMIT: ') words_dict, words = util.getWords(text) words_sorted = sorted(words, key=len) words_dict = reduce_general_slang(words_dict, words_sorted, limit) words_dict = reduce_suffix(words_dict, words_sorted, limit) words_dict = reduce_prefix(words_dict, words_sorted, limit) words_dict = reduce_vowels(words_dict, words_sorted, limit) for word in words: print words_dict[word][REDUCED],
def testGetWords(w): return util.getWords(w) # print(util.getRelations('force'))