class ZDELangTestCase(unittest.TestCase): def setUp(self): self.p = Parser(lang = 'de') def test_a_getting_num_of_words(self): #Words include punctuation and a 'LEFT-WALL' and 'RIGHT_WALL' self.assertEqual(self.p.parse_sent('Dies ist den Traum.')[0].num_of_words, 7) self.assertEqual(self.p.parse_sent('Der Hund jagte ihn durch den Park.')[0].num_of_words, 10) def test_b_getting_words(self): self.assertEqual(self.p.parse_sent('Der Hund jagte ihn durch den Park.')[0].words, ['LEFT-WALL', 'der.d', 'Hund.n', 'jagte.s', 'ihn', 'durch', 'den.d', 'Park.n', '.', 'RIGHT-WALL']) def test_c_getting_links(self): sent = 'Dies ist den Traum.' linkage = self.p.parse_sent(sent)[0] self.assertEqual(linkage.links[0], Link('LEFT-WALL','Xp','Xp','.')) self.assertEqual(linkage.links[1], Link('LEFT-WALL','W','W','ist.v')) self.assertEqual(linkage.links[2], Link('dies','Ss','Ss','ist.v')) self.assertEqual(linkage.links[3], Link('ist.v','O','O','Traum.n')) self.assertEqual(linkage.links[4], Link('den.d','Dam','Dam','Traum.n')) self.assertEqual(linkage.links[5], Link('.','RW','RW','RIGHT-WALL'))
def test_e_spell_guessing_off(self): self.p = Parser(spell_guess=False) result = self.p.parse_sent("I love going to shoop.") self.assertEqual(result[0].words, [ 'LEFT-WALL', 'I.p', 'love.v', 'going.v', 'to.r', 'shoop[?].v', '.', 'RIGHT-WALL' ])
class ZDELangTestCase(unittest.TestCase): def setUp(self): self.p = Parser(lang='de') def test_a_getting_num_of_words(self): #Words include punctuation and a 'LEFT-WALL' and 'RIGHT_WALL' self.assertEqual( self.p.parse_sent('Dies ist den Traum.')[0].num_of_words, 7) self.assertEqual( self.p.parse_sent('Der Hund jagte ihn durch den Park.') [0].num_of_words, 10) def test_b_getting_words(self): self.assertEqual( self.p.parse_sent('Der Hund jagte ihn durch den Park.')[0].words, [ 'LEFT-WALL', 'der.d', 'Hund.n', 'jagte.s', 'ihn', 'durch', 'den.d', 'Park.n', '.', 'RIGHT-WALL' ]) def test_c_getting_links(self): sent = 'Dies ist den Traum.' linkage = self.p.parse_sent(sent)[0] self.assertEqual(linkage.links[0], Link('LEFT-WALL', 'Xp', 'Xp', '.')) self.assertEqual(linkage.links[1], Link('LEFT-WALL', 'W', 'W', 'ist.v')) self.assertEqual(linkage.links[2], Link('dies', 'Ss', 'Ss', 'ist.v')) self.assertEqual(linkage.links[3], Link('ist.v', 'O', 'O', 'Traum.n')) self.assertEqual(linkage.links[4], Link('den.d', 'Dam', 'Dam', 'Traum.n')) self.assertEqual(linkage.links[5], Link('.', 'RW', 'RW', 'RIGHT-WALL'))
def test_d_morphology(self): self.p = Parser(lang='ru', display_morphology=True) self.assertEqual( self.p.parse_sent('вверху плыли редкие облачка.')[0].words, [ 'LEFT-WALL', 'вверху.e', 'плы.=', '=ли.vnndpp', 'ре.=', '=дкие.api', 'облачк.=', '=а.ndnpi', '.', 'RIGHT-WALL' ])
def test_that_parser_can_be_destroyed_when_linkages_still_exist(self): """ If the parser is deleted before the associated swig objects are, there will be bad pointer dereferences (as the swig objects will be pointing into freed memory). This test ensures that parsers can be created and deleted without regard for the existence of PYTHON Linkage objects """ p = Parser() linkages = p.parse_sent('This is a sentence.') del p
def test_d_spell_guessing_on(self): self.p = Parser(spell_guess=True) result = self.p.parse_sent("I love going to shoop.") resultx = result[0] if result else [] for resultx in result: if resultx.words[5] == 'shop[~].v': break self.assertEqual(resultx.words if resultx else [], [ 'LEFT-WALL', 'I.p', 'love.v', 'going.v', 'to.r', 'shop[~].v', '.', 'RIGHT-WALL' ])
class ZLTLangTestCase(unittest.TestCase): def setUp(self): self.p = Parser(lang = 'lt') # Reads linkages from a test-file. def test_getting_links(self): parses = open("parses-lt.txt") diagram = None sent = None for line in parses : # Lines starting with I are the input sentences if 'I' == line[0] : sent = line[1:] diagram = "" # Lines starting with O are the parse diagrams if 'O' == line[0] : diagram += line[1:] # We have a complete diagram if it ends with an # empty line. if '\n' == line[1] and 1 < len(diagram) : linkage = self.p.parse_sent(sent)[0] self.assertEqual(linkage.diagram, diagram) parses.close()
class ZLTLangTestCase(unittest.TestCase): def setUp(self): self.p = Parser(lang='lt') # Reads linkages from a test-file. def test_getting_links(self): parses = open("parses-lt.txt") diagram = None sent = None for line in parses: # Lines starting with I are the input sentences if 'I' == line[0]: sent = line[1:] diagram = "" # Lines starting with O are the parse diagrams if 'O' == line[0]: diagram += line[1:] # We have a complete diagram if it ends with an # empty line. if '\n' == line[1] and 1 < len(diagram): linkage = self.p.parse_sent(sent)[0] self.assertEqual(linkage.diagram, diagram) parses.close()
def test_d_spell_guessing_on(self): self.p = Parser(spell_guess = True) result = self.p.parse_sent("I love going to shoop.") resultx = result[0] if result else [] for resultx in result: if resultx.words[5] == 'shop[~].v': break; self.assertEqual(resultx.words if resultx else [], ['LEFT-WALL', 'I.p', 'love.v', 'going.v', 'to.r', 'shop[~].v', '.', 'RIGHT-WALL'])
def test_d_morphology(self): self.p = Parser(lang = 'ru', display_morphology = True) self.assertEqual(self.p.parse_sent('вверху плыли редкие облачка.')[0].words, ['LEFT-WALL', 'вверху.e', 'плы.=', '=ли.vnndpp', 'ре.=', '=дкие.api', 'облачк.=', '=а.ndnpi', '.', 'RIGHT-WALL'])
class DBasicParsingTestCase(unittest.TestCase): def setUp(self): self.p = Parser() def test_that_parse_sent_returns_list_of_linkage_objects_for_valid_sentence( self): result = self.p.parse_sent("This is a relatively simple sentence.") self.assertTrue(isinstance(result, list)) self.assertTrue(isinstance(result[0], Linkage)) self.assertTrue(isinstance(result[1], Linkage)) def test_utf8_encoded_string(self): result = self.p.parse_sent("I love going to the café.") self.assertTrue(isinstance(result, list)) self.assertTrue(1 < len(result)) self.assertTrue(isinstance(result[0], Linkage)) self.assertTrue(isinstance(result[1], Linkage)) # def test_unicode_encoded_string(self): result = self.p.parse_sent( u"I love going to the caf\N{LATIN SMALL LETTER E WITH ACUTE}.". encode('utf8')) self.assertTrue(isinstance(result, list)) self.assertTrue(1 < len(result)) self.assertTrue(isinstance(result[0], Linkage)) self.assertTrue(isinstance(result[1], Linkage)) # def test_unknown_word(self): result = self.p.parse_sent("I love going to the qertfdwedadt.") self.assertTrue(isinstance(result, list)) self.assertTrue(1 < len(result)) self.assertTrue(isinstance(result[0], Linkage)) self.assertTrue(isinstance(result[1], Linkage)) # def test_unknown_euro_utf8_word(self): result = self.p.parse_sent("I love going to the qéáéğíóşúüñ.") self.assertTrue(isinstance(result, list)) self.assertTrue(1 < len(result)) self.assertTrue(isinstance(result[0], Linkage)) self.assertTrue(isinstance(result[1], Linkage)) # def test_unknown_cyrillic_utf8_word(self): result = self.p.parse_sent("I love going to the доктором.") self.assertTrue(isinstance(result, list)) self.assertTrue(1 < len(result)) self.assertTrue(isinstance(result[0], Linkage)) self.assertTrue(isinstance(result[1], Linkage)) def test_getting_link_distances(self): result = self.p.parse_sent("This is a sentence.")[0] self.assertEqual(result.link_distances, [5, 2, 1, 1, 2, 1, 1]) result = self.p.parse_sent("This is a silly sentence.")[0] self.assertEqual(result.link_distances, [6, 2, 1, 1, 3, 2, 1, 1, 1])
class ZRULangTestCase(unittest.TestCase): def setUp(self): self.p = Parser(lang = 'ru') def test_a_getting_num_of_words(self): #Words include punctuation and a 'LEFT-WALL' and 'RIGHT_WALL' self.assertEqual(self.p.parse_sent('это тести.')[0].num_of_words, 5) self.assertEqual(self.p.parse_sent('вверху плыли редкие облачка.')[0].num_of_words, 7) def test_b_getting_words(self): self.assertEqual(self.p.parse_sent('вверху плыли редкие облачка.')[0].words, ['LEFT-WALL', 'вверху.e', 'плыли.vnndpp', 'редкие.api', 'облачка.ndnpi', '.', 'RIGHT-WALL']) def test_c_getting_links(self): sent = 'вверху плыли редкие облачка.' linkage = self.p.parse_sent(sent)[0] self.assertEqual(linkage.links[0], Link('LEFT-WALL','Xp','Xp','.')) self.assertEqual(linkage.links[1], Link('LEFT-WALL','W','Wd','плыли.vnndpp')) self.assertEqual(linkage.links[2], Link('вверху.e','EI','EI','плыли.vnndpp')) self.assertEqual(linkage.links[3], Link('плыли.vnndpp','SIp','SIp','облачка.ndnpi')) self.assertEqual(linkage.links[4], Link('редкие.api','Api','Api','облачка.ndnpi')) self.assertEqual(linkage.links[5], Link('.','RW','RW','RIGHT-WALL')) # Expect morphological splitting to apply. def test_d_morphology(self): self.p = Parser(lang = 'ru', display_morphology = True) self.assertEqual(self.p.parse_sent('вверху плыли редкие облачка.')[0].words, ['LEFT-WALL', 'вверху.e', 'плы.=', '=ли.vnndpp', 'ре.=', '=дкие.api', 'облачк.=', '=а.ndnpi', '.', 'RIGHT-WALL'])
class DBasicParsingTestCase(unittest.TestCase): def setUp(self): self.p = Parser() def test_that_parse_sent_returns_list_of_linkage_objects_for_valid_sentence(self): result = self.p.parse_sent("This is a relatively simple sentence.") self.assertTrue(isinstance(result, list)) self.assertTrue(isinstance(result[0], Linkage)) self.assertTrue(isinstance(result[1], Linkage)) def test_utf8_encoded_string(self): result = self.p.parse_sent("I love going to the café.") self.assertTrue(isinstance(result, list)) self.assertTrue(1 < len(result)) self.assertTrue(isinstance(result[0], Linkage)) self.assertTrue(isinstance(result[1], Linkage)) # def test_unicode_encoded_string(self): result = self.p.parse_sent(u"I love going to the caf\N{LATIN SMALL LETTER E WITH ACUTE}.".encode('utf8')) self.assertTrue(isinstance(result, list)) self.assertTrue(1 < len(result)) self.assertTrue(isinstance(result[0], Linkage)) self.assertTrue(isinstance(result[1], Linkage)) # def test_unknown_word(self): result = self.p.parse_sent("I love going to the qertfdwedadt.") self.assertTrue(isinstance(result, list)) self.assertTrue(1 < len(result)) self.assertTrue(isinstance(result[0], Linkage)) self.assertTrue(isinstance(result[1], Linkage)) # def test_unknown_euro_utf8_word(self): result = self.p.parse_sent("I love going to the qéáéğíóşúüñ.") self.assertTrue(isinstance(result, list)) self.assertTrue(1 < len(result)) self.assertTrue(isinstance(result[0], Linkage)) self.assertTrue(isinstance(result[1], Linkage)) # def test_unknown_cyrillic_utf8_word(self): result = self.p.parse_sent("I love going to the доктором.") self.assertTrue(isinstance(result, list)) self.assertTrue(1 < len(result)) self.assertTrue(isinstance(result[0], Linkage)) self.assertTrue(isinstance(result[1], Linkage)) def test_getting_link_distances(self): result = self.p.parse_sent("This is a sentence.")[0] self.assertEqual(result.link_distances, [5,2,1,1,2,1,1]) result = self.p.parse_sent("This is a silly sentence.")[0] self.assertEqual(result.link_distances, [6,2,1,1,3,2,1,1,1])
class ZRULangTestCase(unittest.TestCase): def setUp(self): self.p = Parser(lang='ru') def test_a_getting_num_of_words(self): #Words include punctuation and a 'LEFT-WALL' and 'RIGHT_WALL' self.assertEqual(self.p.parse_sent('это тести.')[0].num_of_words, 5) self.assertEqual( self.p.parse_sent('вверху плыли редкие облачка.')[0].num_of_words, 7) def test_b_getting_words(self): self.assertEqual( self.p.parse_sent('вверху плыли редкие облачка.')[0].words, [ 'LEFT-WALL', 'вверху.e', 'плыли.vnndpp', 'редкие.api', 'облачка.ndnpi', '.', 'RIGHT-WALL' ]) def test_c_getting_links(self): sent = 'вверху плыли редкие облачка.' linkage = self.p.parse_sent(sent)[0] self.assertEqual(linkage.links[0], Link('LEFT-WALL', 'Xp', 'Xp', '.')) self.assertEqual(linkage.links[1], Link('LEFT-WALL', 'W', 'Wd', 'плыли.vnndpp')) self.assertEqual(linkage.links[2], Link('вверху.e', 'EI', 'EI', 'плыли.vnndpp')) self.assertEqual(linkage.links[3], Link('плыли.vnndpp', 'SIp', 'SIp', 'облачка.ndnpi')) self.assertEqual(linkage.links[4], Link('редкие.api', 'Api', 'Api', 'облачка.ndnpi')) self.assertEqual(linkage.links[5], Link('.', 'RW', 'RW', 'RIGHT-WALL')) # Expect morphological splitting to apply. def test_d_morphology(self): self.p = Parser(lang='ru', display_morphology=True) self.assertEqual( self.p.parse_sent('вверху плыли редкие облачка.')[0].words, [ 'LEFT-WALL', 'вверху.e', 'плы.=', '=ли.vnndpp', 'ре.=', '=дкие.api', 'облачк.=', '=а.ndnpi', '.', 'RIGHT-WALL' ])
def setUp(self): self.p = Parser(lang='lt')
class EEnglishLinkageTestCase(unittest.TestCase): def setUp(self): self.p = Parser() def test_a_getting_words(self): self.assertEqual(self.p.parse_sent('This is a sentence.')[0].words, ['LEFT-WALL', 'this.p', 'is.v', 'a', 'sentence.n', '.', 'RIGHT-WALL']) def test_b_getting_num_of_words(self): #Words include punctuation and a 'LEFT-WALL' and 'RIGHT_WALL' self.assertEqual(self.p.parse_sent('This is a sentence.')[0].num_of_words, 7) def test_c_getting_links(self): sent = 'This is a sentence.' linkage = self.p.parse_sent(sent)[0] self.assertEqual(linkage.links[0], Link('LEFT-WALL','Xp','Xp','.')) self.assertEqual(linkage.links[1], Link('LEFT-WALL','hWV','dWV','is.v')) self.assertEqual(linkage.links[2], Link('LEFT-WALL','Wd','Wd','this.p')) self.assertEqual(linkage.links[3], Link('this.p','Ss*b','Ss','is.v')) self.assertEqual(linkage.links[4], Link('is.v','O*m','Os','sentence.n')) self.assertEqual(linkage.links[5], Link('a','Ds**c','Ds**c','sentence.n')) self.assertEqual(linkage.links[6], Link('.','RW','RW','RIGHT-WALL')) def test_d_spell_guessing_on(self): self.p = Parser(spell_guess = True) result = self.p.parse_sent("I love going to shoop.") resultx = result[0] if result else [] for resultx in result: if resultx.words[5] == 'shop[~].v': break; self.assertEqual(resultx.words if resultx else [], ['LEFT-WALL', 'I.p', 'love.v', 'going.v', 'to.r', 'shop[~].v', '.', 'RIGHT-WALL']) def test_e_spell_guessing_off(self): self.p = Parser(spell_guess = False) result = self.p.parse_sent("I love going to shoop.") self.assertEqual(result[0].words, ['LEFT-WALL', 'I.p', 'love.v', 'going.v', 'to.r', 'shoop[?].v', '.', 'RIGHT-WALL']) # Stress-test first-word-capitalized in various different ways. # Roughly, the test matrix is this: # -- word is/isn't in dict as lower-case word # -- word is/isn't in dict as upper-case word # -- word is/isn't matched with CAPITALIZED_WORDS regex # -- word is/isn't split by suffix splitter # -- the one that is in the dict is not the grammatically appropriate word. # # Let's is NOT split into two! Its in the dict as one word, lower-case only. def test_f_captilization(self): self.assertEqual(self.p.parse_sent('Let\'s eat.')[0].words, ['LEFT-WALL', 'let\'s', 'eat.v', '.', 'RIGHT-WALL']) # He's is split into two words, he is in dict, lower-case only. self.assertEqual(self.p.parse_sent('He\'s going.')[0].words, ['LEFT-WALL', 'he', '\'s.v', 'going.v', '.', 'RIGHT-WALL']) self.assertEqual(self.p.parse_sent('You\'re going?')[0].words, ['LEFT-WALL', 'you', '\'re', 'going.v', '?', 'RIGHT-WALL']) # Jumbo only in dict as adjective, lower-case, but not noun. self.assertEqual(self.p.parse_sent('Jumbo\'s going?')[0].words, ['LEFT-WALL', 'Jumbo[!]', '\'s.v', 'going.v', '?', 'RIGHT-WALL']) self.assertEqual(self.p.parse_sent('Jumbo\'s shoe fell off.')[0].words, ['LEFT-WALL', 'Jumbo[!]', '\'s.p', 'shoe.n', 'fell.v-d', 'off', '.', 'RIGHT-WALL']) self.assertEqual(self.p.parse_sent('Jumbo sat down.')[0].words, ['LEFT-WALL', 'Jumbo[!]', 'sat.v-d', 'down.r', '.', 'RIGHT-WALL']) # Red is in dict, lower-case, as noun, too. # There's no way to really know, syntactically, that Red # should be taken as a proper noun (given name). #self.assertEqual(self.p.parse_sent('Red\'s going?')[0].words, # ['LEFT-WALL', 'Red[!]', '\'s.v', 'going.v', '?', 'RIGHT-WALL']) # #self.assertEqual(self.p.parse_sent('Red\'s shoe fell off.')[0].words, # ['LEFT-WALL', 'Red[!]', # '\'s.p', 'shoe.n', 'fell.v-d', 'off', '.', 'RIGHT-WALL']) # #self.assertEqual(self.p.parse_sent('Red sat down.')[1].words, # ['LEFT-WALL', 'Red[!]', 'sat.v-d', 'down.r', '.', 'RIGHT-WALL']) # May in dict as noun, capitalized, and as lower-case verb. self.assertEqual(self.p.parse_sent('May\'s going?')[0].words, ['LEFT-WALL', 'May.f', '\'s.v', 'going.v', '?', 'RIGHT-WALL']) self.assertEqual(self.p.parse_sent('May sat down.')[0].words, ['LEFT-WALL', 'May.f', 'sat.v-d', 'down.r', '.', 'RIGHT-WALL']) # McGyver is not in the dict, but is regex-matched. self.assertEqual(self.p.parse_sent('McGyver\'s going?')[0].words, ['LEFT-WALL', 'McGyver[!]', '\'s.v', 'going.v', '?', 'RIGHT-WALL']) self.assertEqual(self.p.parse_sent('McGyver\'s shoe fell off.')[0].words, ['LEFT-WALL', 'McGyver[!]', '\'s.p', 'shoe.n', 'fell.v-d', 'off', '.', 'RIGHT-WALL']) self.assertEqual(self.p.parse_sent('McGyver sat down.')[0].words, ['LEFT-WALL', 'McGyver[!]', 'sat.v-d', 'down.r', '.', 'RIGHT-WALL']) self.assertEqual(self.p.parse_sent('McGyver Industries stock declined.')[0].words, ['LEFT-WALL', 'McGyver[!]', 'Industries[!]', 'stock.n-u', 'declined.v-d', '.', 'RIGHT-WALL']) # King in dict as both upper and lower case. self.assertEqual(self.p.parse_sent('King Industries stock declined.')[0].words, ['LEFT-WALL', 'King.b', 'Industries[!]', 'stock.n-u', 'declined.v-d', '.', 'RIGHT-WALL']) # Jumbo in dict only lower-case, as adjective self.assertEqual(self.p.parse_sent('Jumbo Industries stock declined.')[0].words, ['LEFT-WALL', 'Jumbo[!]', 'Industries[!]', 'stock.n-u', 'declined.v-d', '.', 'RIGHT-WALL']) # Thomas in dict only as upper case. self.assertEqual(self.p.parse_sent('Thomas Industries stock declined.')[0].words, ['LEFT-WALL', 'Thomas.b', 'Industries[!]', 'stock.n-u', 'declined.v-d', '.', 'RIGHT-WALL']) # Some parses are fractionally preferred over others... def test_g_fractions(self): self.assertEqual(self.p.parse_sent('A player who is injured has to leave the field')[0].words, ['LEFT-WALL', 'a', 'player.n', 'who', 'is.v', 'injured.a', 'has.v', 'to.r', 'leave.v', 'the', 'field.n', 'RIGHT-WALL']) self.assertEqual(self.p.parse_sent('They ate a special curry which was recommended by the restaurant\'s owner')[0].words, ['LEFT-WALL', 'they', 'ate.v-d', 'a', 'special.a', 'curry.s', 'which', 'was.v-d', 'recommended.v-d', 'by', 'the', 'restaurant.n', '\'s.p', 'owner.n', 'RIGHT-WALL']) # Verify that we are getting the linkages that we want # See below, remainder of parses are in text files def test_h_getting_links(self): sent = 'Scientists sometimes may repeat experiments or use groups.' linkage = self.p.parse_sent(sent)[0] self.assertEqual(linkage.diagram, "\n +---------------------------------------Xp--------------------------------------+" "\n +---------------------------->WV---------------------------->+ |" "\n | +-----------------------Sp-----------------------+ |" "\n | | +------------VJlpi------------+ |" "\n +-----Wd----+ +---E---+---I---+----Op----+ +VJrpi+---Op--+ |" "\n | | | | | | | | | |" "\nLEFT-WALL scientists.n sometimes may.v repeat.v experiments.n or.j-v use.v groups.n . " "\n\n") sent = 'I enjoy eating bass.' linkage = self.p.parse_sent(sent)[0] self.assertEqual(linkage.diagram, "\n +-----------------Xp----------------+" "\n +---->WV---->+ |" "\n +--Wd--+-Sp*i+---Pg---+---Ou---+ |" "\n | | | | | |" "\nLEFT-WALL I.p enjoy.v eating.v bass.n-u . " "\n\n") sent = 'We are from the planet Gorpon' linkage = self.p.parse_sent(sent)[0] self.assertEqual(linkage.diagram, "\n +--->WV--->+ +---------Js--------+" "\n +--Wd--+Spx+--Pp-+ +--DD--+---GN---+" "\n | | | | | | |" "\nLEFT-WALL we are.v from the planet.n Gorpon[!] " "\n\n")
def setUp(self): self.p = Parser(lang='de')
def setUp(self): self.p = Parser()
def setUp(self): self.p = Parser(lang='en')
def test_e_spell_guessing_off(self): self.p = Parser(spell_guess = False) result = self.p.parse_sent("I love going to shoop.") self.assertEqual(result[0].words, ['LEFT-WALL', 'I.p', 'love.v', 'going.v', 'to.r', 'shoop[?].v', '.', 'RIGHT-WALL'])
def setUp(self): self.p = Parser(lang = 'en')
class EEnglishLinkageTestCase(unittest.TestCase): def setUp(self): self.p = Parser() def test_a_getting_words(self): self.assertEqual( self.p.parse_sent('This is a sentence.')[0].words, [ 'LEFT-WALL', 'this.p', 'is.v', 'a', 'sentence.n', '.', 'RIGHT-WALL' ]) def test_b_getting_num_of_words(self): #Words include punctuation and a 'LEFT-WALL' and 'RIGHT_WALL' self.assertEqual( self.p.parse_sent('This is a sentence.')[0].num_of_words, 7) def test_c_getting_links(self): sent = 'This is a sentence.' linkage = self.p.parse_sent(sent)[0] self.assertEqual(linkage.links[0], Link('LEFT-WALL', 'Xp', 'Xp', '.')) self.assertEqual(linkage.links[1], Link('LEFT-WALL', 'hWV', 'dWV', 'is.v')) self.assertEqual(linkage.links[2], Link('LEFT-WALL', 'Wd', 'Wd', 'this.p')) self.assertEqual(linkage.links[3], Link('this.p', 'Ss*b', 'Ss', 'is.v')) self.assertEqual(linkage.links[4], Link('is.v', 'O*m', 'Os', 'sentence.n')) self.assertEqual(linkage.links[5], Link('a', 'Ds**c', 'Ds**c', 'sentence.n')) self.assertEqual(linkage.links[6], Link('.', 'RW', 'RW', 'RIGHT-WALL')) def test_d_spell_guessing_on(self): self.p = Parser(spell_guess=True) result = self.p.parse_sent("I love going to shoop.") resultx = result[0] if result else [] for resultx in result: if resultx.words[5] == 'shop[~].v': break self.assertEqual(resultx.words if resultx else [], [ 'LEFT-WALL', 'I.p', 'love.v', 'going.v', 'to.r', 'shop[~].v', '.', 'RIGHT-WALL' ]) def test_e_spell_guessing_off(self): self.p = Parser(spell_guess=False) result = self.p.parse_sent("I love going to shoop.") self.assertEqual(result[0].words, [ 'LEFT-WALL', 'I.p', 'love.v', 'going.v', 'to.r', 'shoop[?].v', '.', 'RIGHT-WALL' ]) # Stress-test first-word-capitalized in various different ways. # Roughly, the test matrix is this: # -- word is/isn't in dict as lower-case word # -- word is/isn't in dict as upper-case word # -- word is/isn't matched with CAPITALIZED_WORDS regex # -- word is/isn't split by suffix splitter # -- the one that is in the dict is not the grammatically appropriate word. # # Let's is NOT split into two! Its in the dict as one word, lower-case only. def test_f_captilization(self): self.assertEqual( self.p.parse_sent('Let\'s eat.')[0].words, ['LEFT-WALL', 'let\'s', 'eat.v', '.', 'RIGHT-WALL']) # He's is split into two words, he is in dict, lower-case only. self.assertEqual( self.p.parse_sent('He\'s going.')[0].words, ['LEFT-WALL', 'he', '\'s.v', 'going.v', '.', 'RIGHT-WALL']) self.assertEqual( self.p.parse_sent('You\'re going?')[0].words, ['LEFT-WALL', 'you', '\'re', 'going.v', '?', 'RIGHT-WALL']) # Jumbo only in dict as adjective, lower-case, but not noun. self.assertEqual( self.p.parse_sent('Jumbo\'s going?')[0].words, ['LEFT-WALL', 'Jumbo[!]', '\'s.v', 'going.v', '?', 'RIGHT-WALL']) self.assertEqual( self.p.parse_sent('Jumbo\'s shoe fell off.')[0].words, [ 'LEFT-WALL', 'Jumbo[!]', '\'s.p', 'shoe.n', 'fell.v-d', 'off', '.', 'RIGHT-WALL' ]) self.assertEqual( self.p.parse_sent('Jumbo sat down.')[0].words, ['LEFT-WALL', 'Jumbo[!]', 'sat.v-d', 'down.r', '.', 'RIGHT-WALL']) # Red is in dict, lower-case, as noun, too. # There's no way to really know, syntactically, that Red # should be taken as a proper noun (given name). #self.assertEqual(self.p.parse_sent('Red\'s going?')[0].words, # ['LEFT-WALL', 'Red[!]', '\'s.v', 'going.v', '?', 'RIGHT-WALL']) # #self.assertEqual(self.p.parse_sent('Red\'s shoe fell off.')[0].words, # ['LEFT-WALL', 'Red[!]', # '\'s.p', 'shoe.n', 'fell.v-d', 'off', '.', 'RIGHT-WALL']) # #self.assertEqual(self.p.parse_sent('Red sat down.')[1].words, # ['LEFT-WALL', 'Red[!]', 'sat.v-d', 'down.r', '.', 'RIGHT-WALL']) # May in dict as noun, capitalized, and as lower-case verb. self.assertEqual( self.p.parse_sent('May\'s going?')[0].words, ['LEFT-WALL', 'May.f', '\'s.v', 'going.v', '?', 'RIGHT-WALL']) self.assertEqual( self.p.parse_sent('May sat down.')[0].words, ['LEFT-WALL', 'May.f', 'sat.v-d', 'down.r', '.', 'RIGHT-WALL']) # McGyver is not in the dict, but is regex-matched. self.assertEqual( self.p.parse_sent('McGyver\'s going?')[0].words, ['LEFT-WALL', 'McGyver[!]', '\'s.v', 'going.v', '?', 'RIGHT-WALL']) self.assertEqual( self.p.parse_sent('McGyver\'s shoe fell off.')[0].words, [ 'LEFT-WALL', 'McGyver[!]', '\'s.p', 'shoe.n', 'fell.v-d', 'off', '.', 'RIGHT-WALL' ]) self.assertEqual( self.p.parse_sent('McGyver sat down.')[0].words, [ 'LEFT-WALL', 'McGyver[!]', 'sat.v-d', 'down.r', '.', 'RIGHT-WALL' ]) self.assertEqual( self.p.parse_sent('McGyver Industries stock declined.')[0].words, [ 'LEFT-WALL', 'McGyver[!]', 'Industries[!]', 'stock.n-u', 'declined.v-d', '.', 'RIGHT-WALL' ]) # King in dict as both upper and lower case. self.assertEqual( self.p.parse_sent('King Industries stock declined.')[0].words, [ 'LEFT-WALL', 'King.b', 'Industries[!]', 'stock.n-u', 'declined.v-d', '.', 'RIGHT-WALL' ]) # Jumbo in dict only lower-case, as adjective self.assertEqual( self.p.parse_sent('Jumbo Industries stock declined.')[0].words, [ 'LEFT-WALL', 'Jumbo[!]', 'Industries[!]', 'stock.n-u', 'declined.v-d', '.', 'RIGHT-WALL' ]) # Thomas in dict only as upper case. self.assertEqual( self.p.parse_sent('Thomas Industries stock declined.')[0].words, [ 'LEFT-WALL', 'Thomas.b', 'Industries[!]', 'stock.n-u', 'declined.v-d', '.', 'RIGHT-WALL' ]) # Some parses are fractionally preferred over others... def test_g_fractions(self): self.assertEqual( self.p.parse_sent('A player who is injured has to leave the field') [0].words, [ 'LEFT-WALL', 'a', 'player.n', 'who', 'is.v', 'injured.a', 'has.v', 'to.r', 'leave.v', 'the', 'field.n', 'RIGHT-WALL' ]) self.assertEqual( self.p.parse_sent( 'They ate a special curry which was recommended by the restaurant\'s owner' )[0].words, [ 'LEFT-WALL', 'they', 'ate.v-d', 'a', 'special.a', 'curry.s', 'which', 'was.v-d', 'recommended.v-d', 'by', 'the', 'restaurant.n', '\'s.p', 'owner.n', 'RIGHT-WALL' ]) # Verify that we are getting the linkages that we want # See below, remainder of parses are in text files def test_h_getting_links(self): sent = 'Scientists sometimes may repeat experiments or use groups.' linkage = self.p.parse_sent(sent)[0] self.assertEqual( linkage.diagram, "\n +---------------------------------------Xp--------------------------------------+" "\n +---------------------------->WV---------------------------->+ |" "\n | +-----------------------Sp-----------------------+ |" "\n | | +------------VJlpi------------+ |" "\n +-----Wd----+ +---E---+---I---+----Op----+ +VJrpi+---Op--+ |" "\n | | | | | | | | | |" "\nLEFT-WALL scientists.n sometimes may.v repeat.v experiments.n or.j-v use.v groups.n . " "\n\n") sent = 'I enjoy eating bass.' linkage = self.p.parse_sent(sent)[0] self.assertEqual( linkage.diagram, "\n +-----------------Xp----------------+" "\n +---->WV---->+ |" "\n +--Wd--+-Sp*i+---Pg---+---Ou---+ |" "\n | | | | | |" "\nLEFT-WALL I.p enjoy.v eating.v bass.n-u . " "\n\n") sent = 'We are from the planet Gorpon' linkage = self.p.parse_sent(sent)[0] self.assertEqual( linkage.diagram, "\n +--->WV--->+ +---------Js--------+" "\n +--Wd--+Spx+--Pp-+ +--DD--+---GN---+" "\n | | | | | | |" "\nLEFT-WALL we are.v from the planet.n Gorpon[!] " "\n\n")
def setUp(self): self.p = Parser(lang = 'de')
def setUp(self): self.p = Parser(lang = 'lt')
def setUp(self): self.p = Parser(lang = 'ru')
def setUp(self): self.p = Parser(lang='ru')
#! /usr/bin/env python # -*- coding: utf8 -*- # # Link Grammar example usage # # May need to set the PYTHONPATH to get this to work: # PYTHONPATH=$PYTHONPATH:/usr/local/lib/python2.7/dist-packages/link-grammar # or something similar ... # from linkgrammar import Parser, Linkage, ParseOptions, Link po = ParseOptions() # English is the default language p = Parser() linkages = p.parse_sent("This is a test.") print "English: found ", len(linkages), "linkages" for linkage in linkages: print linkage.diagram # Russian p = Parser(lang='ru') linkages = p.parse_sent("это большой тест.") print "Russian: found ", len(linkages), "linkages" for linkage in linkages: print linkage.diagram # Turkish p = Parser(lang='tr') linkages = p.parse_sent("çok şişman adam geldi") print "Turkish: found ", len(linkages), "linkages"
#! /usr/bin/env python # -*- coding: utf8 -*- # # Link Grammar example usage # # May need to set the PYTHONPATH to get this to work: # PYTHONPATH=$PYTHONPATH:/usr/local/lib/python2.7/dist-packages/link_grammar # or something similar ... # from linkgrammar import Parser, Linkage, ParseOptions, Link # English is the default language p = Parser(lang='en', islands_ok=True) linkages = p.parse_sent("This is a test.") print ("English: found ", len(linkages), "linkages") for linkage in linkages : print (linkage.diagram) print (linkage.constituent_phrases_nested) # Russian try: p = Parser(lang = 'ru') linkages = p.parse_sent("это большой тест.") print ("Russian: found ", len(linkages), "linkages") for linkage in linkages : print (linkage.diagram) except: pass # Turkish try:
def test_specifying_options_when_instantiating_parser(self): p = Parser(linkage_limit=10) self.assertEqual( clg.parse_options_get_linkage_limit(p.parse_options._po), 10)