Пример #1
0
print(lmtzr.lemmatize("using", wordnet.VERB))


def convertTag(tag):
    if tag.startswith("J"):
        return wordnet.ADJ
    elif tag.startswith("N"):
        return wordnet.NOUN
    elif tag.startswith("V"):
        return wordnet.VERB
    elif tag.startswith("R"):
        return wordnet.ADV
    else:
        return wordnet.NOUN


original = "The hose attachment has GOING to be placed on when you want to use it and my bare floor tool was missing. Looks nice and the floor options seems to work ok."
originalNoPunc = re.sub("[^\w&^']", " ", original).split()
finalStr = [
    word.lower() for word in originalNoPunc if word.lower() not in stopWords
]
taggedWords = nltk.pos_tag(finalStr)
final = []
for wordPair in taggedWords:
    word = wordPair[0]
    tag = wordPair[1]
    print("{} has a tag {}".format(word, tag))
    final.append(lmtzr.lemmatize(word, convertTag(tag)))

print("BEFORE: {} \n\n AFTER: {}".format(reduceReview(original), final))
Пример #2
0
 def test_number_removal(self):
     exp = ["dog"]
     got = reduceReview("The (dog is a 10!!!")
     self.assertEqual(exp, got)
Пример #3
0
 def test_number_removal2(self):
     exp = []
     got = reduceReview("(123)")
     self.assertEqual(exp, got)
Пример #4
0
 def test_punctuation(self):
     exp = []
     got = reduceReview("THE is!!! ")
     self.assertEqual(exp, got)
Пример #5
0
 def test_many_stopwords2(self):
     exp = ["dog", "cat", "horse"]
     got = reduceReview(
         "THE is! so the THEM (dog cat horse\n)!.. ourselves    TO thEM thE?"
     )
     self.assertEqual(exp, got)
Пример #6
0
 def test_same_word_stemming(self):
     upper = reduceReview("Dogs")
     lower = reduceReview("dogs")
     self.assertEqual(upper, lower)
Пример #7
0
 def test_same_word_stemming2(self):
     upper = reduceReview("DOGS!!!")
     lower = reduceReview(" dogs...")
     self.assertEqual(upper, lower)
Пример #8
0
 def test_cap_stemming3(self):
     exp = ['dog']
     got = reduceReview("dOgs!")
     self.assertEqual(exp, got)
Пример #9
0
 def test_cap_stemming2(self):
     exp = ['dog']
     got = reduceReview("DOGS")
     self.assertEqual(exp, got)
Пример #10
0
 def test_many_stopwords(self):
     exp = []
     got = reduceReview("The, tHe the THE..")
     self.assertEqual(exp, got)
Пример #11
0
 def test_cap_stopwords4(self):
     exp = []
     got = reduceReview("The")
     self.assertEqual(exp, got)
Пример #12
0
 def test_stopwords(self):
     review = "The dog is CUTE."
     exp = ['dog', 'cute']
     got = reduceReview(review)
     self.assertEqual(exp, got)