Exemplo n.º 1
0
class ZDELangTestCase(unittest.TestCase):
    def setUp(self):
        self.p = Parser(lang = 'de')

    def test_a_getting_num_of_words(self):
        #Words include punctuation and a 'LEFT-WALL' and 'RIGHT_WALL'
        self.assertEqual(self.p.parse_sent('Dies ist den Traum.')[0].num_of_words, 7)
        self.assertEqual(self.p.parse_sent('Der Hund jagte ihn durch den Park.')[0].num_of_words, 10)

    def test_b_getting_words(self):
        self.assertEqual(self.p.parse_sent('Der Hund jagte ihn durch den Park.')[0].words,
            ['LEFT-WALL', 'der.d', 'Hund.n', 'jagte.s', 'ihn', 'durch',
               'den.d', 'Park.n', '.', 'RIGHT-WALL'])

    def test_c_getting_links(self):
        sent = 'Dies ist den Traum.'
        linkage = self.p.parse_sent(sent)[0]
        self.assertEqual(linkage.links[0],
                         Link('LEFT-WALL','Xp','Xp','.'))
        self.assertEqual(linkage.links[1],
                         Link('LEFT-WALL','W','W','ist.v'))
        self.assertEqual(linkage.links[2],
                         Link('dies','Ss','Ss','ist.v'))
        self.assertEqual(linkage.links[3],
                         Link('ist.v','O','O','Traum.n'))
        self.assertEqual(linkage.links[4],
                         Link('den.d','Dam','Dam','Traum.n'))
        self.assertEqual(linkage.links[5],
                         Link('.','RW','RW','RIGHT-WALL'))
Exemplo n.º 2
0
class ZDELangTestCase(unittest.TestCase):
    def setUp(self):
        self.p = Parser(lang='de')

    def test_a_getting_num_of_words(self):
        #Words include punctuation and a 'LEFT-WALL' and 'RIGHT_WALL'
        self.assertEqual(
            self.p.parse_sent('Dies ist den Traum.')[0].num_of_words, 7)
        self.assertEqual(
            self.p.parse_sent('Der Hund jagte ihn durch den Park.')
            [0].num_of_words, 10)

    def test_b_getting_words(self):
        self.assertEqual(
            self.p.parse_sent('Der Hund jagte ihn durch den Park.')[0].words, [
                'LEFT-WALL', 'der.d', 'Hund.n', 'jagte.s', 'ihn', 'durch',
                'den.d', 'Park.n', '.', 'RIGHT-WALL'
            ])

    def test_c_getting_links(self):
        sent = 'Dies ist den Traum.'
        linkage = self.p.parse_sent(sent)[0]
        self.assertEqual(linkage.links[0], Link('LEFT-WALL', 'Xp', 'Xp', '.'))
        self.assertEqual(linkage.links[1], Link('LEFT-WALL', 'W', 'W',
                                                'ist.v'))
        self.assertEqual(linkage.links[2], Link('dies', 'Ss', 'Ss', 'ist.v'))
        self.assertEqual(linkage.links[3], Link('ist.v', 'O', 'O', 'Traum.n'))
        self.assertEqual(linkage.links[4],
                         Link('den.d', 'Dam', 'Dam', 'Traum.n'))
        self.assertEqual(linkage.links[5], Link('.', 'RW', 'RW', 'RIGHT-WALL'))
Exemplo n.º 3
0
class ZLTLangTestCase(unittest.TestCase):
    def setUp(self):
        self.p = Parser(lang='lt')

    # Reads linkages from a test-file.
    def test_getting_links(self):
        parses = open("parses-lt.txt")
        diagram = None
        sent = None
        for line in parses:
            # Lines starting with I are the input sentences
            if 'I' == line[0]:
                sent = line[1:]
                diagram = ""

            # Lines starting with O are the parse diagrams
            if 'O' == line[0]:
                diagram += line[1:]

                # We have a complete diagram if it ends with an
                # empty line.
                if '\n' == line[1] and 1 < len(diagram):
                    linkage = self.p.parse_sent(sent)[0]
                    self.assertEqual(linkage.diagram, diagram)

        parses.close()
Exemplo n.º 4
0
class ZLTLangTestCase(unittest.TestCase):
    def setUp(self):
        self.p = Parser(lang = 'lt')

    # Reads linkages from a test-file.
    def test_getting_links(self):
        parses = open("parses-lt.txt")
        diagram = None
        sent = None
        for line in parses :
            # Lines starting with I are the input sentences
            if 'I' == line[0] :
                sent = line[1:]
                diagram = ""

            # Lines starting with O are the parse diagrams
            if 'O' == line[0] :
                diagram += line[1:]

                # We have a complete diagram if it ends with an
                # empty line.
                if '\n' == line[1] and 1 < len(diagram) :
                    linkage = self.p.parse_sent(sent)[0]
                    self.assertEqual(linkage.diagram, diagram)

        parses.close()
Exemplo n.º 5
0
class DBasicParsingTestCase(unittest.TestCase):
    def setUp(self):
        self.p = Parser()

    def test_that_parse_sent_returns_list_of_linkage_objects_for_valid_sentence(
            self):
        result = self.p.parse_sent("This is a relatively simple sentence.")
        self.assertTrue(isinstance(result, list))
        self.assertTrue(isinstance(result[0], Linkage))
        self.assertTrue(isinstance(result[1], Linkage))

    def test_utf8_encoded_string(self):
        result = self.p.parse_sent("I love going to the café.")
        self.assertTrue(isinstance(result, list))
        self.assertTrue(1 < len(result))
        self.assertTrue(isinstance(result[0], Linkage))
        self.assertTrue(isinstance(result[1], Linkage))

        # def test_unicode_encoded_string(self):
        result = self.p.parse_sent(
            u"I love going to the caf\N{LATIN SMALL LETTER E WITH ACUTE}.".
            encode('utf8'))
        self.assertTrue(isinstance(result, list))
        self.assertTrue(1 < len(result))
        self.assertTrue(isinstance(result[0], Linkage))
        self.assertTrue(isinstance(result[1], Linkage))

        # def test_unknown_word(self):
        result = self.p.parse_sent("I love going to the qertfdwedadt.")
        self.assertTrue(isinstance(result, list))
        self.assertTrue(1 < len(result))
        self.assertTrue(isinstance(result[0], Linkage))
        self.assertTrue(isinstance(result[1], Linkage))

        # def test_unknown_euro_utf8_word(self):
        result = self.p.parse_sent("I love going to the qéáéğíóşúüñ.")
        self.assertTrue(isinstance(result, list))
        self.assertTrue(1 < len(result))
        self.assertTrue(isinstance(result[0], Linkage))
        self.assertTrue(isinstance(result[1], Linkage))

        # def test_unknown_cyrillic_utf8_word(self):
        result = self.p.parse_sent("I love going to the доктором.")
        self.assertTrue(isinstance(result, list))
        self.assertTrue(1 < len(result))
        self.assertTrue(isinstance(result[0], Linkage))
        self.assertTrue(isinstance(result[1], Linkage))

    def test_getting_link_distances(self):
        result = self.p.parse_sent("This is a sentence.")[0]
        self.assertEqual(result.link_distances, [5, 2, 1, 1, 2, 1, 1])
        result = self.p.parse_sent("This is a silly sentence.")[0]
        self.assertEqual(result.link_distances, [6, 2, 1, 1, 3, 2, 1, 1, 1])
Exemplo n.º 6
0
 def test_that_parser_can_be_destroyed_when_linkages_still_exist(self):
     """
     If the parser is deleted before the associated swig objects
     are, there will be bad pointer dereferences (as the swig
     objects will be pointing into freed memory).  This test ensures
     that parsers can be created and deleted without regard for
     the existence of PYTHON Linkage objects
     """
     p = Parser()
     linkages = p.parse_sent('This is a sentence.')
     del p
Exemplo n.º 7
0
 def test_that_parser_can_be_destroyed_when_linkages_still_exist(self):
     """
     If the parser is deleted before the associated swig objects
     are, there will be bad pointer dereferences (as the swig
     objects will be pointing into freed memory).  This test ensures
     that parsers can be created and deleted without regard for
     the existence of PYTHON Linkage objects
     """
     p = Parser()
     linkages = p.parse_sent('This is a sentence.')
     del p
Exemplo n.º 8
0
class ZRULangTestCase(unittest.TestCase):
    def setUp(self):
        self.p = Parser(lang = 'ru')

    def test_a_getting_num_of_words(self):
        #Words include punctuation and a 'LEFT-WALL' and 'RIGHT_WALL'
        self.assertEqual(self.p.parse_sent('это тести.')[0].num_of_words, 5)
        self.assertEqual(self.p.parse_sent('вверху плыли редкие облачка.')[0].num_of_words, 7)

    def test_b_getting_words(self):
        self.assertEqual(self.p.parse_sent('вверху плыли редкие облачка.')[0].words,
            ['LEFT-WALL', 'вверху.e', 'плыли.vnndpp', 'редкие.api',
                'облачка.ndnpi', '.', 'RIGHT-WALL'])

    def test_c_getting_links(self):
        sent = 'вверху плыли редкие облачка.'
        linkage = self.p.parse_sent(sent)[0]
        self.assertEqual(linkage.links[0],
                         Link('LEFT-WALL','Xp','Xp','.'))
        self.assertEqual(linkage.links[1],
                         Link('LEFT-WALL','W','Wd','плыли.vnndpp'))
        self.assertEqual(linkage.links[2],
                         Link('вверху.e','EI','EI','плыли.vnndpp'))
        self.assertEqual(linkage.links[3],
                         Link('плыли.vnndpp','SIp','SIp','облачка.ndnpi'))
        self.assertEqual(linkage.links[4],
                         Link('редкие.api','Api','Api','облачка.ndnpi'))
        self.assertEqual(linkage.links[5],
                         Link('.','RW','RW','RIGHT-WALL'))


    # Expect morphological splitting to apply.
    def test_d_morphology(self):
        self.p = Parser(lang = 'ru', display_morphology = True)
        self.assertEqual(self.p.parse_sent('вверху плыли редкие облачка.')[0].words,
            ['LEFT-WALL',
             'вверху.e',
             'плы.=', '=ли.vnndpp',
             'ре.=', '=дкие.api',
             'облачк.=', '=а.ndnpi',
             '.', 'RIGHT-WALL'])
Exemplo n.º 9
0
class ZRULangTestCase(unittest.TestCase):
    def setUp(self):
        self.p = Parser(lang='ru')

    def test_a_getting_num_of_words(self):
        #Words include punctuation and a 'LEFT-WALL' and 'RIGHT_WALL'
        self.assertEqual(self.p.parse_sent('это тести.')[0].num_of_words, 5)
        self.assertEqual(
            self.p.parse_sent('вверху плыли редкие облачка.')[0].num_of_words,
            7)

    def test_b_getting_words(self):
        self.assertEqual(
            self.p.parse_sent('вверху плыли редкие облачка.')[0].words, [
                'LEFT-WALL', 'вверху.e', 'плыли.vnndpp', 'редкие.api',
                'облачка.ndnpi', '.', 'RIGHT-WALL'
            ])

    def test_c_getting_links(self):
        sent = 'вверху плыли редкие облачка.'
        linkage = self.p.parse_sent(sent)[0]
        self.assertEqual(linkage.links[0], Link('LEFT-WALL', 'Xp', 'Xp', '.'))
        self.assertEqual(linkage.links[1],
                         Link('LEFT-WALL', 'W', 'Wd', 'плыли.vnndpp'))
        self.assertEqual(linkage.links[2],
                         Link('вверху.e', 'EI', 'EI', 'плыли.vnndpp'))
        self.assertEqual(linkage.links[3],
                         Link('плыли.vnndpp', 'SIp', 'SIp', 'облачка.ndnpi'))
        self.assertEqual(linkage.links[4],
                         Link('редкие.api', 'Api', 'Api', 'облачка.ndnpi'))
        self.assertEqual(linkage.links[5], Link('.', 'RW', 'RW', 'RIGHT-WALL'))

    # Expect morphological splitting to apply.
    def test_d_morphology(self):
        self.p = Parser(lang='ru', display_morphology=True)
        self.assertEqual(
            self.p.parse_sent('вверху плыли редкие облачка.')[0].words, [
                'LEFT-WALL', 'вверху.e', 'плы.=', '=ли.vnndpp', 'ре.=',
                '=дкие.api', 'облачк.=', '=а.ndnpi', '.', 'RIGHT-WALL'
            ])
Exemplo n.º 10
0
class DBasicParsingTestCase(unittest.TestCase):
    def setUp(self):
        self.p = Parser()

    def test_that_parse_sent_returns_list_of_linkage_objects_for_valid_sentence(self):
        result = self.p.parse_sent("This is a relatively simple sentence.")
        self.assertTrue(isinstance(result, list))
        self.assertTrue(isinstance(result[0], Linkage))
        self.assertTrue(isinstance(result[1], Linkage))

    def test_utf8_encoded_string(self):
        result = self.p.parse_sent("I love going to the café.")
        self.assertTrue(isinstance(result, list))
        self.assertTrue(1 < len(result))
        self.assertTrue(isinstance(result[0], Linkage))
        self.assertTrue(isinstance(result[1], Linkage))

        # def test_unicode_encoded_string(self):
        result = self.p.parse_sent(u"I love going to the caf\N{LATIN SMALL LETTER E WITH ACUTE}.".encode('utf8'))
        self.assertTrue(isinstance(result, list))
        self.assertTrue(1 < len(result))
        self.assertTrue(isinstance(result[0], Linkage))
        self.assertTrue(isinstance(result[1], Linkage))

        # def test_unknown_word(self):
        result = self.p.parse_sent("I love going to the qertfdwedadt.")
        self.assertTrue(isinstance(result, list))
        self.assertTrue(1 < len(result))
        self.assertTrue(isinstance(result[0], Linkage))
        self.assertTrue(isinstance(result[1], Linkage))

        # def test_unknown_euro_utf8_word(self):
        result = self.p.parse_sent("I love going to the qéáéğíóşúüñ.")
        self.assertTrue(isinstance(result, list))
        self.assertTrue(1 < len(result))
        self.assertTrue(isinstance(result[0], Linkage))
        self.assertTrue(isinstance(result[1], Linkage))

        # def test_unknown_cyrillic_utf8_word(self):
        result = self.p.parse_sent("I love going to the доктором.")
        self.assertTrue(isinstance(result, list))
        self.assertTrue(1 < len(result))
        self.assertTrue(isinstance(result[0], Linkage))
        self.assertTrue(isinstance(result[1], Linkage))

    def test_getting_link_distances(self):
        result = self.p.parse_sent("This is a sentence.")[0]
        self.assertEqual(result.link_distances, [5,2,1,1,2,1,1])
        result = self.p.parse_sent("This is a silly sentence.")[0]
        self.assertEqual(result.link_distances, [6,2,1,1,3,2,1,1,1])
Exemplo n.º 11
0
#! /usr/bin/env python
# -*- coding: utf8 -*-
#
# Link Grammar example usage 
#
# May need to set the PYTHONPATH to get this to work:
# PYTHONPATH=$PYTHONPATH:/usr/local/lib/python2.7/dist-packages/link_grammar
# or something similar ...
#
from linkgrammar import Parser, Linkage, ParseOptions, Link


# English is the default language
p = Parser(lang='en', islands_ok=True)
linkages = p.parse_sent("This is a test.")
print ("English: found ", len(linkages), "linkages")
for linkage in linkages :
    print (linkage.diagram)
    print (linkage.constituent_phrases_nested)

# Russian
try:
    p = Parser(lang = 'ru')
    linkages = p.parse_sent("это большой тест.")
    print ("Russian: found ", len(linkages), "linkages")
    for linkage in linkages :
        print (linkage.diagram)
except:
    pass 
# Turkish
try:
Exemplo n.º 12
0
class EEnglishLinkageTestCase(unittest.TestCase):
    def setUp(self):
        self.p = Parser()

    def test_a_getting_words(self):
        self.assertEqual(
            self.p.parse_sent('This is a sentence.')[0].words, [
                'LEFT-WALL', 'this.p', 'is.v', 'a', 'sentence.n', '.',
                'RIGHT-WALL'
            ])

    def test_b_getting_num_of_words(self):
        #Words include punctuation and a 'LEFT-WALL' and 'RIGHT_WALL'
        self.assertEqual(
            self.p.parse_sent('This is a sentence.')[0].num_of_words, 7)

    def test_c_getting_links(self):
        sent = 'This is a sentence.'
        linkage = self.p.parse_sent(sent)[0]
        self.assertEqual(linkage.links[0], Link('LEFT-WALL', 'Xp', 'Xp', '.'))
        self.assertEqual(linkage.links[1],
                         Link('LEFT-WALL', 'hWV', 'dWV', 'is.v'))
        self.assertEqual(linkage.links[2],
                         Link('LEFT-WALL', 'Wd', 'Wd', 'this.p'))
        self.assertEqual(linkage.links[3], Link('this.p', 'Ss*b', 'Ss',
                                                'is.v'))
        self.assertEqual(linkage.links[4],
                         Link('is.v', 'O*m', 'Os', 'sentence.n'))
        self.assertEqual(linkage.links[5],
                         Link('a', 'Ds**c', 'Ds**c', 'sentence.n'))
        self.assertEqual(linkage.links[6], Link('.', 'RW', 'RW', 'RIGHT-WALL'))

    def test_d_spell_guessing_on(self):
        self.p = Parser(spell_guess=True)
        result = self.p.parse_sent("I love going to shoop.")
        resultx = result[0] if result else []
        for resultx in result:
            if resultx.words[5] == 'shop[~].v':
                break
        self.assertEqual(resultx.words if resultx else [], [
            'LEFT-WALL', 'I.p', 'love.v', 'going.v', 'to.r', 'shop[~].v', '.',
            'RIGHT-WALL'
        ])

    def test_e_spell_guessing_off(self):
        self.p = Parser(spell_guess=False)
        result = self.p.parse_sent("I love going to shoop.")
        self.assertEqual(result[0].words, [
            'LEFT-WALL', 'I.p', 'love.v', 'going.v', 'to.r', 'shoop[?].v', '.',
            'RIGHT-WALL'
        ])

    # Stress-test first-word-capitalized in various different ways.
    # Roughly, the test matrix is this:
    # -- word is/isn't in dict as lower-case word
    # -- word is/isn't in dict as upper-case word
    # -- word is/isn't matched with CAPITALIZED_WORDS regex
    # -- word is/isn't split by suffix splitter
    # -- the one that is in the dict is not the grammatically appropriate word.
    #
    # Let's is NOT split into two! Its in the dict as one word, lower-case only.
    def test_f_captilization(self):
        self.assertEqual(
            self.p.parse_sent('Let\'s eat.')[0].words,
            ['LEFT-WALL', 'let\'s', 'eat.v', '.', 'RIGHT-WALL'])

        # He's is split into two words, he is in dict, lower-case only.
        self.assertEqual(
            self.p.parse_sent('He\'s going.')[0].words,
            ['LEFT-WALL', 'he', '\'s.v', 'going.v', '.', 'RIGHT-WALL'])

        self.assertEqual(
            self.p.parse_sent('You\'re going?')[0].words,
            ['LEFT-WALL', 'you', '\'re', 'going.v', '?', 'RIGHT-WALL'])

        # Jumbo only in dict as adjective, lower-case, but not noun.
        self.assertEqual(
            self.p.parse_sent('Jumbo\'s going?')[0].words,
            ['LEFT-WALL', 'Jumbo[!]', '\'s.v', 'going.v', '?', 'RIGHT-WALL'])

        self.assertEqual(
            self.p.parse_sent('Jumbo\'s shoe fell off.')[0].words, [
                'LEFT-WALL', 'Jumbo[!]', '\'s.p', 'shoe.n', 'fell.v-d', 'off',
                '.', 'RIGHT-WALL'
            ])

        self.assertEqual(
            self.p.parse_sent('Jumbo sat down.')[0].words,
            ['LEFT-WALL', 'Jumbo[!]', 'sat.v-d', 'down.r', '.', 'RIGHT-WALL'])

        # Red is in dict, lower-case, as noun, too.
        # There's no way to really know, syntactically, that Red
        # should be taken as a proper noun (given name).
        #self.assertEqual(self.p.parse_sent('Red\'s going?')[0].words,
        #     ['LEFT-WALL', 'Red[!]', '\'s.v', 'going.v', '?', 'RIGHT-WALL'])
        #
        #self.assertEqual(self.p.parse_sent('Red\'s shoe fell off.')[0].words,
        #     ['LEFT-WALL', 'Red[!]',
        #      '\'s.p', 'shoe.n', 'fell.v-d', 'off', '.', 'RIGHT-WALL'])
        #
        #self.assertEqual(self.p.parse_sent('Red sat down.')[1].words,
        #     ['LEFT-WALL', 'Red[!]', 'sat.v-d', 'down.r', '.', 'RIGHT-WALL'])

        # May in dict as noun, capitalized, and as lower-case verb.
        self.assertEqual(
            self.p.parse_sent('May\'s going?')[0].words,
            ['LEFT-WALL', 'May.f', '\'s.v', 'going.v', '?', 'RIGHT-WALL'])

        self.assertEqual(
            self.p.parse_sent('May sat down.')[0].words,
            ['LEFT-WALL', 'May.f', 'sat.v-d', 'down.r', '.', 'RIGHT-WALL'])

        # McGyver is not in the dict, but is regex-matched.
        self.assertEqual(
            self.p.parse_sent('McGyver\'s going?')[0].words,
            ['LEFT-WALL', 'McGyver[!]', '\'s.v', 'going.v', '?', 'RIGHT-WALL'])

        self.assertEqual(
            self.p.parse_sent('McGyver\'s shoe fell off.')[0].words, [
                'LEFT-WALL', 'McGyver[!]', '\'s.p', 'shoe.n', 'fell.v-d',
                'off', '.', 'RIGHT-WALL'
            ])

        self.assertEqual(
            self.p.parse_sent('McGyver sat down.')[0].words, [
                'LEFT-WALL', 'McGyver[!]', 'sat.v-d', 'down.r', '.',
                'RIGHT-WALL'
            ])

        self.assertEqual(
            self.p.parse_sent('McGyver Industries stock declined.')[0].words, [
                'LEFT-WALL', 'McGyver[!]', 'Industries[!]', 'stock.n-u',
                'declined.v-d', '.', 'RIGHT-WALL'
            ])

        # King in dict as both upper and lower case.
        self.assertEqual(
            self.p.parse_sent('King Industries stock declined.')[0].words, [
                'LEFT-WALL', 'King.b', 'Industries[!]', 'stock.n-u',
                'declined.v-d', '.', 'RIGHT-WALL'
            ])

        # Jumbo in dict only lower-case, as adjective
        self.assertEqual(
            self.p.parse_sent('Jumbo Industries stock declined.')[0].words, [
                'LEFT-WALL', 'Jumbo[!]', 'Industries[!]', 'stock.n-u',
                'declined.v-d', '.', 'RIGHT-WALL'
            ])

        # Thomas in dict only as upper case.
        self.assertEqual(
            self.p.parse_sent('Thomas Industries stock declined.')[0].words, [
                'LEFT-WALL', 'Thomas.b', 'Industries[!]', 'stock.n-u',
                'declined.v-d', '.', 'RIGHT-WALL'
            ])

    # Some parses are fractionally preferred over others...
    def test_g_fractions(self):
        self.assertEqual(
            self.p.parse_sent('A player who is injured has to leave the field')
            [0].words, [
                'LEFT-WALL', 'a', 'player.n', 'who', 'is.v', 'injured.a',
                'has.v', 'to.r', 'leave.v', 'the', 'field.n', 'RIGHT-WALL'
            ])

        self.assertEqual(
            self.p.parse_sent(
                'They ate a special curry which was recommended by the restaurant\'s owner'
            )[0].words, [
                'LEFT-WALL', 'they', 'ate.v-d', 'a', 'special.a', 'curry.s',
                'which', 'was.v-d', 'recommended.v-d', 'by', 'the',
                'restaurant.n', '\'s.p', 'owner.n', 'RIGHT-WALL'
            ])

    # Verify that we are getting the linkages that we want
    # See below, remainder of parses are in text files
    def test_h_getting_links(self):
        sent = 'Scientists sometimes may repeat experiments or use groups.'
        linkage = self.p.parse_sent(sent)[0]
        self.assertEqual(
            linkage.diagram,
            "\n    +---------------------------------------Xp--------------------------------------+"
            "\n    +---------------------------->WV---------------------------->+                  |"
            "\n    |           +-----------------------Sp-----------------------+                  |"
            "\n    |           |                  +------------VJlpi------------+                  |"
            "\n    +-----Wd----+          +---E---+---I---+----Op----+          +VJrpi+---Op--+    |"
            "\n    |           |          |       |       |          |          |     |       |    |"
            "\nLEFT-WALL scientists.n sometimes may.v repeat.v experiments.n or.j-v use.v groups.n . "
            "\n\n")
        sent = 'I enjoy eating bass.'
        linkage = self.p.parse_sent(sent)[0]
        self.assertEqual(
            linkage.diagram, "\n    +-----------------Xp----------------+"
            "\n    +---->WV---->+                      |"
            "\n    +--Wd--+-Sp*i+---Pg---+---Ou---+    |"
            "\n    |      |     |        |        |    |"
            "\nLEFT-WALL I.p enjoy.v eating.v bass.n-u . "
            "\n\n")

        sent = 'We are from the planet Gorpon'
        linkage = self.p.parse_sent(sent)[0]
        self.assertEqual(
            linkage.diagram, "\n    +--->WV--->+     +---------Js--------+"
            "\n    +--Wd--+Spx+--Pp-+   +--DD--+---GN---+"
            "\n    |      |   |     |   |      |        |"
            "\nLEFT-WALL we are.v from the planet.n Gorpon[!] "
            "\n\n")
Exemplo n.º 13
0
#! /usr/bin/env python
# -*- coding: utf8 -*-
#
# Link Grammar example usage
#
# May need to set the PYTHONPATH to get this to work:
# PYTHONPATH=$PYTHONPATH:/usr/local/lib/python2.7/dist-packages/link-grammar
# or something similar ...
#
from linkgrammar import Parser, Linkage, ParseOptions, Link

po = ParseOptions()

# English is the default language
p = Parser()
linkages = p.parse_sent("This is a test.")
print "English: found ", len(linkages), "linkages"
for linkage in linkages:
    print linkage.diagram

# Russian
p = Parser(lang='ru')
linkages = p.parse_sent("это большой тест.")
print "Russian: found ", len(linkages), "linkages"
for linkage in linkages:
    print linkage.diagram

# Turkish
p = Parser(lang='tr')
linkages = p.parse_sent("çok şişman adam geldi")
print "Turkish: found ", len(linkages), "linkages"
Exemplo n.º 14
0
class EEnglishLinkageTestCase(unittest.TestCase):
    def setUp(self):
        self.p = Parser()

    def test_a_getting_words(self):
        self.assertEqual(self.p.parse_sent('This is a sentence.')[0].words,
             ['LEFT-WALL', 'this.p', 'is.v', 'a', 'sentence.n', '.', 'RIGHT-WALL'])

    def test_b_getting_num_of_words(self):
        #Words include punctuation and a 'LEFT-WALL' and 'RIGHT_WALL'
        self.assertEqual(self.p.parse_sent('This is a sentence.')[0].num_of_words, 7)

    def test_c_getting_links(self):
        sent = 'This is a sentence.'
        linkage = self.p.parse_sent(sent)[0]
        self.assertEqual(linkage.links[0],
                         Link('LEFT-WALL','Xp','Xp','.'))
        self.assertEqual(linkage.links[1],
                         Link('LEFT-WALL','hWV','dWV','is.v'))
        self.assertEqual(linkage.links[2],
                         Link('LEFT-WALL','Wd','Wd','this.p'))
        self.assertEqual(linkage.links[3],
                         Link('this.p','Ss*b','Ss','is.v'))
        self.assertEqual(linkage.links[4],
                         Link('is.v','O*m','Os','sentence.n'))
        self.assertEqual(linkage.links[5],
                         Link('a','Ds**c','Ds**c','sentence.n'))
        self.assertEqual(linkage.links[6],
                         Link('.','RW','RW','RIGHT-WALL'))

    def test_d_spell_guessing_on(self):
        self.p = Parser(spell_guess = True)
        result = self.p.parse_sent("I love going to shoop.")
        resultx = result[0] if result else []
        for resultx in result:
            if resultx.words[5] == 'shop[~].v':
                break;
        self.assertEqual(resultx.words if resultx else [],
             ['LEFT-WALL', 'I.p', 'love.v', 'going.v', 'to.r', 'shop[~].v', '.', 'RIGHT-WALL'])

    def test_e_spell_guessing_off(self):
        self.p = Parser(spell_guess = False)
        result = self.p.parse_sent("I love going to shoop.")
        self.assertEqual(result[0].words,
             ['LEFT-WALL', 'I.p', 'love.v', 'going.v', 'to.r', 'shoop[?].v', '.', 'RIGHT-WALL'])

    # Stress-test first-word-capitalized in various different ways.
    # Roughly, the test matrix is this:
    # -- word is/isn't in dict as lower-case word
    # -- word is/isn't in dict as upper-case word
    # -- word is/isn't matched with CAPITALIZED_WORDS regex
    # -- word is/isn't split by suffix splitter
    # -- the one that is in the dict is not the grammatically appropriate word.
    #
    # Let's is NOT split into two! Its in the dict as one word, lower-case only.
    def test_f_captilization(self):
        self.assertEqual(self.p.parse_sent('Let\'s eat.')[0].words,
             ['LEFT-WALL', 'let\'s', 'eat.v', '.', 'RIGHT-WALL'])

        # He's is split into two words, he is in dict, lower-case only.
        self.assertEqual(self.p.parse_sent('He\'s going.')[0].words,
             ['LEFT-WALL', 'he', '\'s.v', 'going.v', '.', 'RIGHT-WALL'])

        self.assertEqual(self.p.parse_sent('You\'re going?')[0].words,
             ['LEFT-WALL', 'you', '\'re', 'going.v', '?', 'RIGHT-WALL'])

        # Jumbo only in dict as adjective, lower-case, but not noun.
        self.assertEqual(self.p.parse_sent('Jumbo\'s going?')[0].words,
             ['LEFT-WALL', 'Jumbo[!]', '\'s.v', 'going.v', '?', 'RIGHT-WALL'])

        self.assertEqual(self.p.parse_sent('Jumbo\'s shoe fell off.')[0].words,
             ['LEFT-WALL', 'Jumbo[!]',
              '\'s.p', 'shoe.n', 'fell.v-d', 'off', '.', 'RIGHT-WALL'])

        self.assertEqual(self.p.parse_sent('Jumbo sat down.')[0].words,
             ['LEFT-WALL', 'Jumbo[!]', 'sat.v-d', 'down.r', '.', 'RIGHT-WALL'])

        # Red is in dict, lower-case, as noun, too.
        # There's no way to really know, syntactically, that Red
        # should be taken as a proper noun (given name).
        #self.assertEqual(self.p.parse_sent('Red\'s going?')[0].words,
        #     ['LEFT-WALL', 'Red[!]', '\'s.v', 'going.v', '?', 'RIGHT-WALL'])
        #
        #self.assertEqual(self.p.parse_sent('Red\'s shoe fell off.')[0].words,
        #     ['LEFT-WALL', 'Red[!]',
        #      '\'s.p', 'shoe.n', 'fell.v-d', 'off', '.', 'RIGHT-WALL'])
        #
        #self.assertEqual(self.p.parse_sent('Red sat down.')[1].words,
        #     ['LEFT-WALL', 'Red[!]', 'sat.v-d', 'down.r', '.', 'RIGHT-WALL'])

        # May in dict as noun, capitalized, and as lower-case verb.
        self.assertEqual(self.p.parse_sent('May\'s going?')[0].words,
             ['LEFT-WALL', 'May.f', '\'s.v', 'going.v', '?', 'RIGHT-WALL'])

        self.assertEqual(self.p.parse_sent('May sat down.')[0].words,
             ['LEFT-WALL', 'May.f', 'sat.v-d', 'down.r', '.', 'RIGHT-WALL'])

        # McGyver is not in the dict, but is regex-matched.
        self.assertEqual(self.p.parse_sent('McGyver\'s going?')[0].words,
             ['LEFT-WALL', 'McGyver[!]', '\'s.v', 'going.v', '?', 'RIGHT-WALL'])

        self.assertEqual(self.p.parse_sent('McGyver\'s shoe fell off.')[0].words,
             ['LEFT-WALL', 'McGyver[!]',
              '\'s.p', 'shoe.n', 'fell.v-d', 'off', '.', 'RIGHT-WALL'])

        self.assertEqual(self.p.parse_sent('McGyver sat down.')[0].words,
             ['LEFT-WALL', 'McGyver[!]', 'sat.v-d', 'down.r', '.', 'RIGHT-WALL'])

        self.assertEqual(self.p.parse_sent('McGyver Industries stock declined.')[0].words,
             ['LEFT-WALL', 'McGyver[!]', 'Industries[!]',
              'stock.n-u', 'declined.v-d', '.', 'RIGHT-WALL'])

        # King in dict as both upper and lower case.
        self.assertEqual(self.p.parse_sent('King Industries stock declined.')[0].words,
             ['LEFT-WALL', 'King.b', 'Industries[!]',
              'stock.n-u', 'declined.v-d', '.', 'RIGHT-WALL'])

        # Jumbo in dict only lower-case, as adjective
        self.assertEqual(self.p.parse_sent('Jumbo Industries stock declined.')[0].words,
             ['LEFT-WALL', 'Jumbo[!]', 'Industries[!]',
              'stock.n-u', 'declined.v-d', '.', 'RIGHT-WALL'])

        # Thomas in dict only as upper case.
        self.assertEqual(self.p.parse_sent('Thomas Industries stock declined.')[0].words,
             ['LEFT-WALL', 'Thomas.b', 'Industries[!]',
              'stock.n-u', 'declined.v-d', '.', 'RIGHT-WALL'])

    # Some parses are fractionally preferred over others...
    def test_g_fractions(self):
        self.assertEqual(self.p.parse_sent('A player who is injured has to leave the field')[0].words,
             ['LEFT-WALL', 'a', 'player.n', 'who', 'is.v', 'injured.a', 'has.v', 'to.r', 'leave.v', 'the', 'field.n', 'RIGHT-WALL'])

        self.assertEqual(self.p.parse_sent('They ate a special curry which was recommended by the restaurant\'s owner')[0].words,
             ['LEFT-WALL', 'they', 'ate.v-d', 'a', 'special.a', 'curry.s',
              'which', 'was.v-d', 'recommended.v-d', 'by', 'the', 'restaurant.n',
              '\'s.p', 'owner.n', 'RIGHT-WALL'])

    # Verify that we are getting the linkages that we want
    # See below, remainder of parses are in text files
    def test_h_getting_links(self):
        sent = 'Scientists sometimes may repeat experiments or use groups.'
        linkage = self.p.parse_sent(sent)[0]
        self.assertEqual(linkage.diagram,
"\n    +---------------------------------------Xp--------------------------------------+"
"\n    +---------------------------->WV---------------------------->+                  |"
"\n    |           +-----------------------Sp-----------------------+                  |"
"\n    |           |                  +------------VJlpi------------+                  |"
"\n    +-----Wd----+          +---E---+---I---+----Op----+          +VJrpi+---Op--+    |"
"\n    |           |          |       |       |          |          |     |       |    |"
"\nLEFT-WALL scientists.n sometimes may.v repeat.v experiments.n or.j-v use.v groups.n . "
"\n\n")
        sent = 'I enjoy eating bass.'
        linkage = self.p.parse_sent(sent)[0]
        self.assertEqual(linkage.diagram,
"\n    +-----------------Xp----------------+"
"\n    +---->WV---->+                      |"
"\n    +--Wd--+-Sp*i+---Pg---+---Ou---+    |"
"\n    |      |     |        |        |    |"
"\nLEFT-WALL I.p enjoy.v eating.v bass.n-u . "
"\n\n")


        sent = 'We are from the planet Gorpon'
        linkage = self.p.parse_sent(sent)[0]
        self.assertEqual(linkage.diagram,
"\n    +--->WV--->+     +---------Js--------+"
"\n    +--Wd--+Spx+--Pp-+   +--DD--+---GN---+"
"\n    |      |   |     |   |      |        |"
"\nLEFT-WALL we are.v from the planet.n Gorpon[!] "
"\n\n")