示例#1
0
print(lmtzr.lemmatize("using", wordnet.VERB))


def convertTag(tag):
    if tag.startswith("J"):
        return wordnet.ADJ
    elif tag.startswith("N"):
        return wordnet.NOUN
    elif tag.startswith("V"):
        return wordnet.VERB
    elif tag.startswith("R"):
        return wordnet.ADV
    else:
        return wordnet.NOUN


original = "The hose attachment has GOING to be placed on when you want to use it and my bare floor tool was missing. Looks nice and the floor options seems to work ok."
originalNoPunc = re.sub("[^\w&^']", " ", original).split()
finalStr = [
    word.lower() for word in originalNoPunc if word.lower() not in stopWords
]
taggedWords = nltk.pos_tag(finalStr)
final = []
for wordPair in taggedWords:
    word = wordPair[0]
    tag = wordPair[1]
    print("{} has a tag {}".format(word, tag))
    final.append(lmtzr.lemmatize(word, convertTag(tag)))

print("BEFORE: {} \n\n AFTER: {}".format(reduceReview(original), final))
 def test_number_removal(self):
     exp = ["dog"]
     got = reduceReview("The (dog is a 10!!!")
     self.assertEqual(exp, got)
 def test_number_removal2(self):
     exp = []
     got = reduceReview("(123)")
     self.assertEqual(exp, got)
 def test_punctuation(self):
     exp = []
     got = reduceReview("THE is!!! ")
     self.assertEqual(exp, got)
 def test_many_stopwords2(self):
     exp = ["dog", "cat", "horse"]
     got = reduceReview(
         "THE is! so the THEM (dog cat horse\n)!.. ourselves    TO thEM thE?"
     )
     self.assertEqual(exp, got)
 def test_same_word_stemming(self):
     upper = reduceReview("Dogs")
     lower = reduceReview("dogs")
     self.assertEqual(upper, lower)
 def test_same_word_stemming2(self):
     upper = reduceReview("DOGS!!!")
     lower = reduceReview(" dogs...")
     self.assertEqual(upper, lower)
 def test_cap_stemming3(self):
     exp = ['dog']
     got = reduceReview("dOgs!")
     self.assertEqual(exp, got)
 def test_cap_stemming2(self):
     exp = ['dog']
     got = reduceReview("DOGS")
     self.assertEqual(exp, got)
示例#10
0
 def test_many_stopwords(self):
     exp = []
     got = reduceReview("The, tHe the THE..")
     self.assertEqual(exp, got)
示例#11
0
 def test_cap_stopwords4(self):
     exp = []
     got = reduceReview("The")
     self.assertEqual(exp, got)
示例#12
0
 def test_stopwords(self):
     review = "The dog is CUTE."
     exp = ['dog', 'cute']
     got = reduceReview(review)
     self.assertEqual(exp, got)