print(lmtzr.lemmatize("using", wordnet.VERB)) def convertTag(tag): if tag.startswith("J"): return wordnet.ADJ elif tag.startswith("N"): return wordnet.NOUN elif tag.startswith("V"): return wordnet.VERB elif tag.startswith("R"): return wordnet.ADV else: return wordnet.NOUN original = "The hose attachment has GOING to be placed on when you want to use it and my bare floor tool was missing. Looks nice and the floor options seems to work ok." originalNoPunc = re.sub("[^\w&^']", " ", original).split() finalStr = [ word.lower() for word in originalNoPunc if word.lower() not in stopWords ] taggedWords = nltk.pos_tag(finalStr) final = [] for wordPair in taggedWords: word = wordPair[0] tag = wordPair[1] print("{} has a tag {}".format(word, tag)) final.append(lmtzr.lemmatize(word, convertTag(tag))) print("BEFORE: {} \n\n AFTER: {}".format(reduceReview(original), final))
def test_number_removal(self): exp = ["dog"] got = reduceReview("The (dog is a 10!!!") self.assertEqual(exp, got)
def test_number_removal2(self): exp = [] got = reduceReview("(123)") self.assertEqual(exp, got)
def test_punctuation(self): exp = [] got = reduceReview("THE is!!! ") self.assertEqual(exp, got)
def test_many_stopwords2(self): exp = ["dog", "cat", "horse"] got = reduceReview( "THE is! so the THEM (dog cat horse\n)!.. ourselves TO thEM thE?" ) self.assertEqual(exp, got)
def test_same_word_stemming(self): upper = reduceReview("Dogs") lower = reduceReview("dogs") self.assertEqual(upper, lower)
def test_same_word_stemming2(self): upper = reduceReview("DOGS!!!") lower = reduceReview(" dogs...") self.assertEqual(upper, lower)
def test_cap_stemming3(self): exp = ['dog'] got = reduceReview("dOgs!") self.assertEqual(exp, got)
def test_cap_stemming2(self): exp = ['dog'] got = reduceReview("DOGS") self.assertEqual(exp, got)
def test_many_stopwords(self): exp = [] got = reduceReview("The, tHe the THE..") self.assertEqual(exp, got)
def test_cap_stopwords4(self): exp = [] got = reduceReview("The") self.assertEqual(exp, got)
def test_stopwords(self): review = "The dog is CUTE." exp = ['dog', 'cute'] got = reduceReview(review) self.assertEqual(exp, got)