def noun_stem(s): """extracts the stem from a plural noun, or returns empty string""" # add code here if s in unchanging_plurals: return s elif re.match("men", s[len(s) - 3:len(s)]): return re.sub("men", "man", s) elif verb_stem(s) in unchanging_plurals: return '' else: return verb_stem(s)
def restore_words_aux(tr,wds): if (isinstance(tr,str)): wd = wds.pop() if (tr=='Is'): return ('I_' + verb_stem(wd), tr) elif (tr=='Ts'): return ('T_' + verb_stem(wd), tr) elif (tr=='Np'): return ('N_' + noun_stem(wd), tr) elif (tr=='Ip' or tr=='Tp' or tr=='Ns' or tr=='A'): return (tr[0] + '_' + wd, tr) else: return (wd, tr) else: return Tree(tr.node, [restore_words_aux(t,wds) for t in tr])
def test_stemming_with_tagging(): failures = [] for word, stem in word2stem.items(): if not verb_stem(word) == stem: failures.append(word) # some words are just not in brown :( assert failures == ['fizzes', 'dazes', 'analyses']
def tag_word(lx, wd): """returns a list of all possible tags for wd relative to lx""" # add code here printlist = [] for nom in function_words_tags: if nom[0] == wd: add(printlist, nom[1]) if len(printlist) == 0: if wd in lx.getAll('P'): add(printlist, 'P') if wd in lx.getAll('A'): add(printlist, 'A') if wd in lx.getAll('N'): if wd in unchanging_plurals: add(printlist, 'Ns') add(printlist, 'Np') if noun_stem(wd) is '': add(printlist, 'Ns') else: add(printlist, 'Np') elif noun_stem(wd) in lx.getAll('N'): if wd in unchanging_plurals: add(printlist, 'Ns') add(printlist, 'Np') if noun_stem(wd) is not '': add(printlist, 'Np') else: add(printlist, 'Ns') if wd in lx.getAll('I'): if verb_stem(wd) is '': add(printlist, 'Ip') else: add(printlist, 'Is') elif verb_stem(wd) in lx.getAll('I'): if verb_stem(wd) is '': add(printlist, 'Ip') else: add(printlist, 'Is') if wd in lx.getAll('T'): if verb_stem(wd) is '': add(printlist, 'Tp') else: add(printlist, 'Ts') elif verb_stem(wd) in lx.getAll('T'): if verb_stem(wd) is '': add(printlist, 'Tp') else: add(printlist, 'Ts') return printlist else: return printlist
def test_tagger(): assert not verb_stem('cats')
def test_verb_stem(self): self.assertEqual("fly", s.verb_stem("flies")) self.assertEqual("eat", s.verb_stem("eats")) self.assertEqual("tell", s.verb_stem("tells")) self.assertEqual("show", s.verb_stem("shows")) self.assertEqual("pay", s.verb_stem("pays")) self.assertEqual("buy", s.verb_stem("buys")) self.assertEqual("fly", s.verb_stem("flies")) self.assertEqual("try", s.verb_stem("tries")) self.assertEqual("unify", s.verb_stem("unifies")) self.assertEqual("die", s.verb_stem("dies")) self.assertEqual("lie", s.verb_stem("lies")) self.assertEqual("tie", s.verb_stem("ties")) self.assertEqual("go", s.verb_stem("goes")) self.assertEqual("box", s.verb_stem("boxes")) self.assertEqual("attach", s.verb_stem("attaches")) self.assertEqual("wash", s.verb_stem("washes")) self.assertEqual("dress", s.verb_stem("dresses")) #self.assertEqual("fizz", s.verb_stem("fizzes")) self.assertEqual("lose", s.verb_stem("loses")) #self.assertEqual("daze", s.verb_stem("dazes")) self.assertEqual("lapse", s.verb_stem("lapses")) #self.assertEqual("analyse", s.verb_stem("analyses")) self.assertEqual("have", s.verb_stem("has")) self.assertEqual("like", s.verb_stem("likes")) self.assertEqual("hate", s.verb_stem("hates")) self.assertEqual("bathe", s.verb_stem("bathes"))
def test_verb_stem(): assert st.verb_stem('tells') == 'tell' assert st.verb_stem('buys') == 'buy' assert st.verb_stem('buysa') == '' assert st.verb_stem('tries') == 'try' assert st.verb_stem('flies') == 'fly' assert st.verb_stem('dies') == 'die' assert st.verb_stem('fixes') == '' # not in Brown Corpus assert st.verb_stem('goes') == 'go' assert st.verb_stem('boxes') == '' # not in Brown Corpus assert st.verb_stem('attaches') == 'attach' assert st.verb_stem('washes') == '' # not in Brown Corpus assert st.verb_stem('fizzes') == '' # not in Brown Corpus assert st.verb_stem('dresses') == '' # not in Brown Corpus assert st.verb_stem('loses') == 'lose' assert st.verb_stem('dazes') == '' # not in Brown Corpus assert st.verb_stem( 'has' ) == '' # should be ignored according to https://piazza.com/class/jkuzor9eypxov?cid=240 assert st.verb_stem('likes') == 'like' assert st.verb_stem('hates') == 'hate' assert st.verb_stem('bathes') == '' # not in Brown Corpus assert st.verb_stem( 'is' ) == '' # should be ignored according https://piazza.com/class/jkuzor9eypxov?cid=240 assert st.verb_stem('unties') == '' # not in Brown Corpus assert st.verb_stem('cats') == '' assert st.verb_stem('analyses') == '' # not in Brown Corpus
def test_verb_stem(self): self.assertEqual("fly", s.verb_stem("flies")) self.assertEqual("eat", s.verb_stem("eats")) self.assertEqual("tell", s.verb_stem("tells")) self.assertEqual("show", s.verb_stem("shows")) self.assertEqual("pay", s.verb_stem("pays")) self.assertEqual("buy", s.verb_stem("buys")) self.assertEqual("fly", s.verb_stem("flies")) self.assertEqual("try", s.verb_stem("tries")) self.assertEqual("unify", s.verb_stem("unifies")) self.assertEqual("die", s.verb_stem("dies")) self.assertEqual("lie", s.verb_stem("lies")) self.assertEqual("tie", s.verb_stem("ties")) self.assertEqual("go", s.verb_stem("goes")) self.assertEqual("box", s.verb_stem("boxes")) self.assertEqual("attach", s.verb_stem("attaches")) self.assertEqual("wash", s.verb_stem("washes")) self.assertEqual("dress", s.verb_stem("dresses")) #self.assertEqual("fizz", s.verb_stem("fizzes")) self.assertEqual("lose", s.verb_stem("loses")) #self.assertEqual("daze", s.verb_stem("dazes")) self.assertEqual("lapse", s.verb_stem("lapses")) #self.assertEqual("analyse", s.verb_stem("analyses")) self.assertEqual("have", s.verb_stem("has")) self.assertEqual("like", s.verb_stem("likes")) self.assertEqual("hate", s.verb_stem("hates")) self.assertEqual("bathe", s.verb_stem("bathes")) class TestPOS(unittest.TestCase): def test_noun_stem(self): self.assertEqual(p.noun_stem("sheep"), "sheep") self.assertEqual(p.noun_stem("sheeps"), "") self.assertEqual(p.noun_stem("buffalo"), "buffalo") self.assertEqual(p.noun_stem("buffalos"), "") self.assertEqual(p.noun_stem("women"), "woman") self.assertEqual(p.noun_stem("men"), "man") self.assertEqual(p.noun_stem("ashes"), "ash") self.assertEqual(p.noun_stem("countries"), "country") self.assertEqual(p.noun_stem("dogs"), "dog") def test_tag_words(self): lx = s.Lexicon() lx.add("John", "P") lx.add("orange", "A") lx.add("orange", "N") lx.add("fish", "N") lx.add("fish", "I") lx.add("fish", "T") self.assertEqual(["P"], p.tag_word(lx, "John")) self.assertEqual(["A", "Ns"], p.tag_word(lx, "orange")) self.assertEqual(["Ns", "Np", "Ip", "Tp"], p.tag_word(lx, "fish")) self.assertEqual(["AR"], p.tag_word(lx, "a")) self.assertEqual([], p.tag_word(lx, "zxghqw")) class TestAgreement(unittest.TestCase): def test_can_parse(self): lx = s.Lexicon() lx.add('John', 'P') lx.add('like', 'T') lx.add("fly", "I") lx.add("Mary", "P") lx.add("duck", "N") lx.add("swim", "I") lx.add("like", "T") lx.add("frog", "N") lx.add("orange", "A") lx.add("orange", "N") lx.add("purple", "A") lx.add("fish", "N") lx.add("fish", "I") lx.add("fish", "T") lx.add("student", "N") lx.add("old", "A") self.assertGreaterEqual( len(a.all_valid_parses(lx, "Who likes John ?".split(" "))), 1) self.assertGreaterEqual( len(a.all_valid_parses(lx, "Who is a duck ?".split(" "))), 1) self.assertGreaterEqual(len(a.all_valid_parses( lx, "Which orange duck likes a frog ?".split(" "))), 1) self.assertGreaterEqual(len(a.all_valid_parses( lx, "Who does John like ?".split(" "))), 1) self.assertGreaterEqual(len(a.all_valid_parses( lx, "Who is an orange duck ?".split(" "))), 1) self.assertGreaterEqual(len(a.all_valid_parses( lx, "Which ducks are orange ?".split(" "))), 1) self.assertGreaterEqual(len(a.all_valid_parses( lx, "Which ducks like a frog ?".split(" "))), 1) self.assertGreaterEqual(len(a.all_valid_parses( lx, "Which ducks like frogs ?".split(" "))), 1) self.assertGreaterEqual(len(a.all_valid_parses( lx, "Who likes a duck who flies ?".split(" "))), 1) self.assertGreaterEqual(len(a.all_valid_parses( lx, "Which purple ducks fly ?".split(" "))), 1) if __name__ == '__main__': unittest.main()
def test_verb_stem(self): # Rule 1 self.assertEqual(statements.verb_stem("eats"), "eat") self.assertEqual(statements.verb_stem("tells"), "tell") self.assertEqual(statements.verb_stem("shows"), "show") # Rule 2 self.assertEqual(statements.verb_stem("pays"), "pay") self.assertEqual(statements.verb_stem("buys"), "buy") # Rule 3 self.assertEqual(statements.verb_stem("flies"), "fly") self.assertEqual(statements.verb_stem("tries"), "try") self.assertEqual(statements.verb_stem("unifies"), "unify") # Rule 4 self.assertEqual(statements.verb_stem("dies"), "die") self.assertEqual(statements.verb_stem("lies"), "lie") self.assertEqual(statements.verb_stem("ties"), "tie") self.assertNotEqual(statements.verb_stem("unties"), "unities") # Rule 5 self.assertEqual(statements.verb_stem("goes"), "go") self.assertEqual(statements.verb_stem("boxes"), "box") self.assertEqual(statements.verb_stem("attaches"), "attach") self.assertEqual(statements.verb_stem("washes"), "wash") self.assertEqual(statements.verb_stem("dresses"), "dress") #self.assertEqual(statements.verb_stem("fizzes"), "fizz") # Rule 6 self.assertEqual(statements.verb_stem("loses"), "lose") self.assertEqual(statements.verb_stem("dazes"), "daze") self.assertEqual(statements.verb_stem("lapses"), "lapse") self.assertEqual(statements.verb_stem("analyses"), "analyse") # Rule 7 self.assertEqual(statements.verb_stem("has"), "have") # Rule 8 self.assertEqual(statements.verb_stem("likes"), "like") self.assertEqual(statements.verb_stem("hates"), "hate") self.assertEqual(statements.verb_stem("bathes"), "bathe") # Base Case self.assertEqual(statements.verb_stem("flys"), "") self.assertEqual(statements.verb_stem("inchs"), "") # Check whether point 4 of Part 1 was implemented self.assertEqual(statements.verb_stem("cats"), "") self.assertEqual(statements.verb_stem("Johns"), "") self.assertEqual(statements.verb_stem("Marys"), "") self.assertEqual(statements.verb_stem("dogs"), "") self.assertEqual(statements.verb_stem("doggies"), "")