예제 #1
0
    def get_info(self, content):
        words = util.getWords(content)
        temp_tags = bigram_tag.tag(words)
        tags = self.re_tag(temp_tags)
        normalized = True
        while normalized:
            normalized = False
            for i in range(0, len(tags) - 1):
                tagged1 = tags[i]
                if i + 1 >= len(tags):
                    break
                tagged2 = tags[i + 1]
                key = tagged1[1] + '+' + tagged2[1]
                pos = cfg.get(key)
                if pos:
                    tags.pop(i)
                    tags.pop(i)
                    re_tagged = tagged1[0] + ' ' + tagged2[0]
                    tags.insert(i, (re_tagged, pos))
                    normalized = True

        final_context = []
        for tag in tags:
            if tag[1] == 'NNP' or tag[1] == 'NNI':
                final_context.append(tag[0])
        return final_context
예제 #2
0
    def get_info(self, content):
        words = util.getWords(content)
        temp_tags = bigram_tag.tag(words)
        tags = self.re_tag(temp_tags)
        normalized = True
        while normalized:
            normalized = False
            for i in range(0, len(tags) - 1):
                tagged1 = tags[i]
                if i+1 >= len(tags):
                    break
                tagged2 = tags[i+1]
                key = tagged1[1] + '+' + tagged2[1]
                pos = cfg.get(key)
                if pos:
                    tags.pop(i)
                    tags.pop(i)
                    re_tagged = tagged1[0] + ' ' + tagged2[0]
                    tags.insert(i, (re_tagged, pos))
                    normalized = True

        final_context = []
        for tag in tags:
            if tag[1] == 'NNP' or tag[1] == 'NNI':
                final_context.append(tag[0])
        return final_context
예제 #3
0
def remove_stopwords(sentences):
    """
    Removes stopwords from the sentence
    :param sentences: (list) sentences
    :returns: cleaned sentences without any stopwords
    """
    sw = set(stopwords.words('english'))
    cleaned = []
    for sentence in sentences:
        words = util.getWords(sentence)
        sentence = ' '.join([c for c in words if c not in sw])
        cleaned.append(sentence)
    return cleaned
예제 #4
0
def remove_stopwords(sentences):
    """
    Removes stopwords from the sentence
    :param sentences: (list) sentences
    :returns: cleaned sentences without any stopwords
    """
    sw = set(stopwords.words('english'))
    cleaned = []
    for sentence in sentences:
        words = util.getWords(sentence)
        sentence = ' '.join([c for c in words if c not in sw])
        cleaned.append(sentence)
    return cleaned
예제 #5
0
def get_text():
    """
    Driver function; Execution starts here
    """
    text = raw_input('TEXT: ')
    limit = input('LIMIT: ')
    words_dict, words = util.getWords(text)
    words_sorted = sorted(words, key=len)
    words_dict = reduce_general_slang(words_dict, words_sorted, limit)
    words_dict = reduce_suffix(words_dict, words_sorted, limit)
    words_dict = reduce_prefix(words_dict, words_sorted, limit)
    words_dict = reduce_vowels(words_dict, words_sorted, limit)

    for word in words:
        print words_dict[word][REDUCED],
예제 #6
0
def testGetWords(w):
	return util.getWords(w)

# print(util.getRelations('force'))