Exemplo n.º 1
0
 def test_init(self):
     with pytest.raises(TypeError):
         D()
     with pytest.raises(TypeError):
         D(1)
     D(1, 'some-thing')
     D(1, 'some-thing', 0.5, 0, 3, [T('some-token')])
     # roots are special: id is None, entity is root, daughters must
     # exactly 1 node; rest are None
     with pytest.raises(TypeError):
         D(None)
     with pytest.raises(TypeError):
         D(None, 'some-root', 0.5)
     with pytest.raises(TypeError):
         D(None, 'some-root', start=1)
     with pytest.raises(TypeError):
         D(None, 'some-root', end=1)
     with pytest.raises(ValueError):
         D(None,
           'some-root',
           daughters=[N(1, 'some-thing'),
                      N(2, 'some-thing')])
     with pytest.raises(ValueError):
         D(None, 'some-root', daughters=[T('some-token')])
     D(None, 'some-root', daughters=[N(1, 'some-thing')])
     D(None, 'some-root', None, None, None, daughters=[N(1, 'some-thing')])
     # root not as top
     with pytest.raises(ValueError):
         D(1,
           'some-thing',
           daughters=[
               N(None,
                 'some-root',
                 daughters=[N(2, 'a-lex', daughters=[T('some-token')])])
           ])
Exemplo n.º 2
0
 def test_attributes(self):
     t = D(1, 'some-thing')
     assert t.id == 1
     assert t.entity == 'some-thing'
     assert t.score == -1
     assert t.start == -1
     assert t.end == -1
     assert t.daughters == []
     t = D(1, 'some-thing', 0.5, 1, 6, [T('some token')])
     assert t.id == 1
     assert t.entity == 'some-thing'
     assert t.score == 0.5
     assert t.start == 1
     assert t.end == 6
     assert t.daughters == [T('some token')]
     t = D(None, 'some-root', daughters=[D(1, 'some-thing')])
     assert t.id is None
     assert t.entity == 'some-root'
     assert t.score is None
     assert t.start is None
     assert t.end is None
     assert len(t.daughters) == 1
Exemplo n.º 3
0
 def test_fromstring(self):
     with pytest.raises(DerivationSyntaxError):
         from_string('')
     # root with no children
     # TODO: this should be a DerivationSyntaxError but the current
     # UDF parser doesn't make that straightforward. Revist this
     # when/if the UDF parsing changes
     with pytest.raises(ValueError):
         from_string('(some-root)')
     # does not start with `(` or end with `)`
     with pytest.raises(DerivationSyntaxError):
         from_string(' (1 some-thing -1 -1 -1 ("token"))')
     with pytest.raises(DerivationSyntaxError):
         from_string(' (1 some-thing -1 -1 -1 ("token")) ')
     # uneven parens
     with pytest.raises(DerivationSyntaxError):
         from_string('(1 some-thing -1 -1 -1 ("token")')
     # ok
     t = from_string('(1 some-thing -1 -1 -1 ("token"))')
     assert t.id == 1
     assert t.entity == 'some-thing'
     assert t.score == -1.0
     assert t.start == -1
     assert t.end == -1
     assert t.daughters == [T('token')]
     # newlines in tree
     t = from_string('''(1 some-thing -1 -1 -1
                           ("token"))''')
     assert t.id == 1
     assert t.entity == 'some-thing'
     assert t.score == -1.0
     assert t.start == -1
     assert t.end == -1
     assert t.daughters == [T('token')]
     # LKB-style terminals
     t = from_string('''(1 some-thing -1 -1 -1
                           ("to ken" 1 2))''')
     assert t.id == 1
     assert t.entity == 'some-thing'
     assert t.score == -1.0
     assert t.start == -1
     assert t.end == -1
     assert t.daughters == [T('to ken')]  # start/end ignored
     # TFS-style terminals
     t = from_string(r'''(1 some-thing -1 -1 -1
                           ("to ken" 2 "token [ +FORM \"to\" ]"
                                     3 "token [ +FORM \"ken\" ]"))''')
     assert t.id == 1
     assert t.entity == 'some-thing'
     assert t.score == -1.0
     assert t.start == -1
     assert t.end == -1
     assert t.daughters == [
         T('to ken', [
             Tk(2, r'token [ +FORM \"to\" ]'),
             Tk(3, r'token [ +FORM \"ken\" ]')
         ])
     ]
     # longer example
     t = from_string(r'''(root
         (1 some-thing 0.4 0 5
             (2 a-lex 0.8 0 1
                 ("a" 1 "token [ +FORM \"a\" ]"))
             (3 bcd-lex 0.5 2 5
                 ("bcd" 2 "token [ +FORM \"bcd\" ]")))
     )''')
     assert t.entity == 'root'
     assert len(t.daughters) == 1
     top = t.daughters[0]
     assert top.id == 1
     assert top.entity == 'some-thing'
     assert top.score == 0.4
     assert top.start == 0
     assert top.end == 5
     assert len(top.daughters) == 2
     lex = top.daughters[0]
     assert lex.id == 2
     assert lex.entity == 'a-lex'
     assert lex.score == 0.8
     assert lex.start == 0
     assert lex.end == 1
     assert lex.daughters == [T('a', [Tk(1, r'token [ +FORM \"a\" ]')])]
     lex = top.daughters[1]
     assert lex.id == 3
     assert lex.entity == 'bcd-lex'
     assert lex.score == 0.5
     assert lex.start == 2
     assert lex.end == 5
     assert lex.daughters == [T('bcd', [Tk(2, r'token [ +FORM \"bcd\" ]')])]
Exemplo n.º 4
0
 def test_fromstring(self):
     with pytest.raises(ValueError): D.from_string('')
     # root with no children
     with pytest.raises(ValueError): D.from_string('(some-root)')
     # does not start with `(` or end with `)`
     with pytest.raises(ValueError):
         D.from_string(' (1 some-thing -1 -1 -1 ("token"))')
     with pytest.raises(ValueError):
         D.from_string(' (1 some-thing -1 -1 -1 ("token")) ')
     # uneven parens
     with pytest.raises(ValueError):
         D.from_string('(1 some-thing -1 -1 -1 ("token")')
     # ok
     t = D.from_string('(1 some-thing -1 -1 -1 ("token"))')
     assert t.id == 1
     assert t.entity == 'some-thing'
     assert t.score == -1.0
     assert t.start == -1
     assert t.end == -1
     assert t.daughters == [T('token')]
     # newlines in tree
     t = D.from_string('''(1 some-thing -1 -1 -1
                             ("token"))''')
     assert t.id == 1
     assert t.entity == 'some-thing'
     assert t.score == -1.0
     assert t.start == -1
     assert t.end == -1
     assert t.daughters == [T('token')]
     # LKB-style terminals
     t = D.from_string('''(1 some-thing -1 -1 -1
                             ("to ken" 1 2))''')
     assert t.id == 1
     assert t.entity == 'some-thing'
     assert t.score == -1.0
     assert t.start == -1
     assert t.end == -1
     assert t.daughters == [T('to ken')]  # start/end ignored
     # TFS-style terminals
     t = D.from_string(r'''(1 some-thing -1 -1 -1
                             ("to ken" 2 "token [ +FORM \"to\" ]"
                                       3 "token [ +FORM \"ken\" ]"))''')
     assert t.id == 1
     assert t.entity == 'some-thing'
     assert t.score == -1.0
     assert t.start == -1
     assert t.end == -1
     assert t.daughters == [
         T('to ken', [Tk(2, r'token [ +FORM \"to\" ]'),
                      Tk(3, r'token [ +FORM \"ken\" ]')])
     ]
     # longer example
     t = D.from_string(r'''(root
         (1 some-thing 0.4 0 5
             (2 a-lex 0.8 0 1
                 ("a" 1 "token [ +FORM \"a\" ]"))
             (3 bcd-lex 0.5 2 5
                 ("bcd" 2 "token [ +FORM \"bcd\" ]")))
     )''')
     assert t.entity == 'root'
     assert len(t.daughters) == 1
     top = t.daughters[0]
     assert top.id == 1
     assert top.entity == 'some-thing'
     assert top.score == 0.4
     assert top.start == 0
     assert top.end == 5
     assert len(top.daughters) == 2
     lex = top.daughters[0]
     assert lex.id == 2
     assert lex.entity == 'a-lex'
     assert lex.score == 0.8
     assert lex.start == 0
     assert lex.end == 1
     assert lex.daughters == [T('a', [Tk(1, r'token [ +FORM \"a\" ]')])]
     lex = top.daughters[1]
     assert lex.id == 3
     assert lex.entity == 'bcd-lex'
     assert lex.score == 0.5
     assert lex.start == 2
     assert lex.end == 5
     assert lex.daughters == [T('bcd',
                                [Tk(2, r'token [ +FORM \"bcd\" ]')])]