Exemplo n.º 1
0
 def test_from_dict(self):
     s = '(root (1 some-thing -1 -1 -1 ("a")))'
     d = {
         'entity': 'root',
         'daughters': [
             {
                 'id': 1,
                 'entity': 'some-thing',
                 'form': 'a'
             }
         ]
     }
     assert D.from_dict(d) == D.from_string(s)
     s = (r'(root (1 ^some-thing@some-type -1 -1 -1 ("a b"'
          r' 2 "token [ +FORM \"a\" ]"'
          r' 3 "token [ +FORM \"b\" ]")))' )
     d = {
         'entity': 'root',
         'daughters': [
             {
                 'id': 1,
                 'entity': 'some-thing',
                 'type': 'some-type',
                 'head': True,
                 'form': 'a b',
                 'tokens': [
                     {'id': 2, 'tfs': r'token [ +FORM \"a\" ]'},
                     {'id': 3, 'tfs': r'token [ +FORM \"b\" ]'}
                 ]
             }
         ]
     }
     assert D.from_dict(d) == D.from_string(s)
Exemplo n.º 2
0
 def test_str(self):
     s = '(1 some-thing -1 -1 -1 ("token"))'
     assert str(D.from_string(s)) == s
     s = (r'(root (1 some-thing 0.4 0 5 (2 a-lex 0.8 0 1 '
          r'("a" 1 "token [ +FORM \"a\" ]")) '
          r'(3 bcd-lex 0.5 2 5 ("bcd" 2 "token [ +FORM \"bcd\" ]"))))')
     assert str(D.from_string(s)) == s
Exemplo n.º 3
0
 def test_eq(self):
     a = D.from_string('(1 some-type -1 -1 -1 ("token"))')
     # identity
     b = D.from_string('(1 some-type -1 -1 -1 ("token"))')
     assert a == b
     # ids and scores don't matter
     b = D.from_string('(100 some-type 0.114 -1 -1 ("token"))')
     assert a == b
     # tokens matter
     b = D.from_string('(1 some-type -1 -1 -1 ("nekot"))')
     assert a != b
     # and type of rhs
     assert a != '(1 some-type -1 -1 -1 ("token"))'
     # and tokenization
     b = D.from_string('(1 some-type -1 2 7 ("token"))')
     assert a != b
     # and of course entities
     b = D.from_string('(1 epyt-emos -1 -1 -1 ("token"))')
     assert a != b
     # and number of children
     a = D.from_string('(1 x -1 -1 -1 (2 y -1 -1 -1 ("y")))')
     b = D.from_string('(1 x -1 -1 -1 (2 y -1 -1 -1 ("y")) (3 z -1 -1 -1 ("z")))')
     assert a != b
     # and order of children
     a = D.from_string('(1 x -1 -1 -1 (2 y -1 -1 -1 ("y")) (3 z -1 -1 -1 ("z")))')
     b = D.from_string('(1 x -1 -1 -1 (3 z -1 -1 -1 ("z")) (2 y -1 -1 -1 ("y")))')
     assert a != b
Exemplo n.º 4
0
 def test_str(self):
     s = '(1 some-type -1 -1 -1 ("token"))'
     assert str(D.from_string(s)) == s
     s = (r'(root (1 some-type 0.4 0 5 (2 a-lex 0.8 0 1 '
          r'("a" 1 "token [ +FORM \"a\" ]")) '
          r'(3 bcd-lex 0.5 2 5 ("bcd" 2 "token [ +FORM \"bcd\" ]"))))')
     assert str(D.from_string(s)) == s
Exemplo n.º 5
0
 def test_to_udf(self):
     s = '(1 some-type -1 -1 -1 ("token"))'
     assert D.from_string(s).to_udf(indent=None) == s
     assert D.from_string(s).to_udf(indent=1) == (
         '(1 some-type -1 -1 -1\n'
         ' ("token"))'
     )
     s = (r'(root (1 some-type 0.4 0 5 (2 a-lex 0.8 0 1 '
          r'("a" 3 "token [ +FORM \"a\" ]")) '
          r'(4 bcd-lex 0.5 2 5 ("bcd" 5 "token [ +FORM \"bcd\" ]"))))')
     assert D.from_string(s).to_udf(indent=1) == (
         '(root\n'
         ' (1 some-type 0.4 0 5\n'
         '  (2 a-lex 0.8 0 1\n'
         '   ("a"\n'
         '    3 "token [ +FORM \\"a\\" ]"))\n'
         '  (4 bcd-lex 0.5 2 5\n'
         '   ("bcd"\n'
         '    5 "token [ +FORM \\"bcd\\" ]"))))'
     )
     s = (r'(root (1 some-type 0.4 0 5 (2 a-lex 0.8 0 1 '
          r'("a b" 3 "token [ +FORM \"a\" ]" 4 "token [ +FORM \"b\" ]"))))')
     assert D.from_string(s).to_udf(indent=1) == (
         '(root\n'
         ' (1 some-type 0.4 0 5\n'
         '  (2 a-lex 0.8 0 1\n'
         '   ("a b"\n'
         '    3 "token [ +FORM \\"a\\" ]"\n'
         '    4 "token [ +FORM \\"b\\" ]"))))'
     )
Exemplo n.º 6
0
 def test_from_dict(self):
     s = '(root (1 some-type -1 -1 -1 ("a")))'
     d = {
         'entity': 'root',
         'daughters': [
             {
                 'id': 1,
                 'entity': 'some-type',
                 'form': 'a'
             }
         ]
     }
     assert D.from_dict(d) == D.from_string(s)
     s = (   r'(root (1 some-type -1 -1 -1 ("a b"'
             r' 2 "token [ +FORM \"a\" ]"'
             r' 3 "token [ +FORM \"b\" ]")))' )
     d = {
         'entity': 'root',
         'daughters': [
             {
                 'id': 1,
                 'entity': 'some-type',
                 'form': 'a b',
                 'tokens': [
                     {'id': 2, 'tfs': r'token [ +FORM \"a\" ]'},
                     {'id': 3, 'tfs': r'token [ +FORM \"b\" ]'}
                 ]
             }
         ]
     }
     assert D.from_dict(d) == D.from_string(s)
Exemplo n.º 7
0
 def test_eq(self):
     a = D.from_string('(1 some-type -1 -1 -1 ("token"))')
     # identity
     b = D.from_string('(1 some-type -1 -1 -1 ("token"))')
     assert a == b
     # ids and scores don't matter
     b = D.from_string('(100 some-type 0.114 -1 -1 ("token"))')
     assert a == b
     # tokens matter
     b = D.from_string('(1 some-type -1 -1 -1 ("nekot"))')
     assert a != b
     # and type of rhs
     assert a != '(1 some-type -1 -1 -1 ("token"))'
     # and tokenization
     b = D.from_string('(1 some-type -1 2 7 ("token"))')
     assert a != b
     # and of course entities
     b = D.from_string('(1 epyt-emos -1 -1 -1 ("token"))')
     assert a != b
     # and number of children
     a = D.from_string('(1 x -1 -1 -1 (2 y -1 -1 -1 ("y")))')
     b = D.from_string(
         '(1 x -1 -1 -1 (2 y -1 -1 -1 ("y")) (3 z -1 -1 -1 ("z")))')
     assert a != b
     # and order of children
     a = D.from_string(
         '(1 x -1 -1 -1 (2 y -1 -1 -1 ("y")) (3 z -1 -1 -1 ("z")))')
     b = D.from_string(
         '(1 x -1 -1 -1 (3 z -1 -1 -1 ("z")) (2 y -1 -1 -1 ("y")))')
     assert a != b
Exemplo n.º 8
0
 def test_lexical_type(self):
     # NOTE: this returns None for standard UDF or non-preterminals
     a = D.from_string('(root (1 a-type -1 -1 -1 ("a"))'
                       '      (2 b-type -1 -1 -1 ("b")))')
     assert a.lexical_type() == None
     assert a.daughters[0].lexical_type() == None
     assert a.daughters[1].lexical_type() == None
     a = D.from_string('(root (1 a-type@a-type_le -1 -1 -1 ("a"))'
                       '      (2 b-type@b-type_le -1 -1 -1 ("b")))')
     assert a.lexical_type() == None
     assert a.daughters[0].lexical_type() == 'a-type_le'
     assert a.daughters[1].lexical_type() == 'b-type_le'
Exemplo n.º 9
0
 def test_lexical_type(self):
     # NOTE: this returns None for standard UDF or non-preterminals
     a = D.from_string('(root (1 a-type -1 -1 -1 ("a"))'
                       '      (2 b-type -1 -1 -1 ("b")))')
     assert a.lexical_type() == None
     assert a.daughters[0].lexical_type() == None
     assert a.daughters[1].lexical_type() == None
     a = D.from_string('(root (1 a-type@a-type_le -1 -1 -1 ("a"))'
                       '      (2 b-type@b-type_le -1 -1 -1 ("b")))')
     assert a.lexical_type() == None
     assert a.daughters[0].lexical_type() == 'a-type_le'
     assert a.daughters[1].lexical_type() == 'b-type_le'
Exemplo n.º 10
0
 def derivation(self):
     """
     Deserialize and return a Derivation object for UDF- or
     JSON-formatted derivation data; otherwise return the original
     string.
     """
     drv = self.get('derivation')
     if drv is not None:
         if isinstance(drv, dict):
             drv = Derivation.from_dict(drv)
         elif isinstance(drv, stringtypes):
             drv = Derivation.from_string(drv)
     return drv
Exemplo n.º 11
0
 def derivation(self):
     """
     Deserialize and return a Derivation object for UDF- or
     JSON-formatted derivation data; otherwise return the original
     string.
     """
     drv = self.get('derivation')
     if drv is not None:
         if isinstance(drv, dict):
             drv = Derivation.from_dict(drv)
         elif isinstance(drv, stringtypes):
             drv = Derivation.from_string(drv)
     return drv
Exemplo n.º 12
0
 def test_basic_entity(self):
     # this works for both UDX and standard UDF
     a = D.from_string('(root (1 a-type -1 -1 -1 ("a"))'
                       '      (2 b-type -1 -1 -1 ("b")))')
     assert a.basic_entity() == 'root'
     assert a.daughters[0].basic_entity() == 'a-type'
     assert a.daughters[1].basic_entity() == 'b-type'
     a = D.from_string('(root (1 a-type@a-type_le -1 -1 -1 ("a"))'
                       '      (2 b-type@b-type_le -1 -1 -1 ("b")))')
     assert a.basic_entity() == 'root'
     assert a.daughters[0].entity == 'a-type@a-type_le'
     assert a.daughters[0].basic_entity() == 'a-type'
     assert a.daughters[1].entity == 'b-type@b-type_le'
     assert a.daughters[1].basic_entity() == 'b-type'
Exemplo n.º 13
0
 def test_basic_entity(self):
     # this works for both UDX and standard UDF
     a = D.from_string('(root (1 a-type -1 -1 -1 ("a"))'
                       '      (2 b-type -1 -1 -1 ("b")))')
     assert a.basic_entity() == 'root'
     assert a.daughters[0].basic_entity() == 'a-type'
     assert a.daughters[1].basic_entity() == 'b-type'
     a = D.from_string('(root (1 a-type@a-type_le -1 -1 -1 ("a"))'
                       '      (2 b-type@b-type_le -1 -1 -1 ("b")))')
     assert a.basic_entity() == 'root'
     assert a.daughters[0].entity == 'a-type@a-type_le'
     assert a.daughters[0].basic_entity() == 'a-type'
     assert a.daughters[1].entity == 'b-type@b-type_le'
     assert a.daughters[1].basic_entity() == 'b-type'
Exemplo n.º 14
0
 def test_terminals(self):
     a = D.from_string('(root (1 some-thing -1 -1 -1'
                       '  (2 a-thing -1 -1 -1 ("a"))'
                       '  (3 b-thing -1 -1 -1 ("b"))))')
     assert [t.form for t in a.terminals()] == ['a', 'b']
     a = D.from_string('(root'
         ' (1 some-thing@some-type 0.4 0 5'
         '  (2 a-lex@a-type 0.8 0 1'
         '   ("a b"'
         '    3 "token [ +FORM \\"a\\" ]"'
         '    4 "token [ +FORM \\"b\\" ]"))'
         '  (5 b-lex@b-type 0.9 1 2'
         '   ("b"'
         '    6 "token [ +FORM \\"b\\" ]"))))')
     assert [t.form for t in a.terminals()] == ['a b', 'b']
Exemplo n.º 15
0
 def test_is_head(self):
     # NOTE: is_head() is undefined for standard UDF without the
     # head marker ^
     a = D.from_string('(root (1 some-type -1 -1 -1 ("a"))'
                       '      (2 ^some-type -1 -1 -1 ("b")))')
     assert a.daughters[0].is_head() == False
     assert a.daughters[1].is_head() == True
Exemplo n.º 16
0
 def test_is_head(self):
     # NOTE: is_head() is undefined for standard UDF without the
     # head marker ^
     a = D.from_string('(root (1 some-type -1 -1 -1 ("a"))'
                       '      (2 ^some-type -1 -1 -1 ("b")))')
     assert a.daughters[0].is_head() == False
     assert a.daughters[1].is_head() == True
Exemplo n.º 17
0
 def test_entity(self):
     a = D.from_string('(root (1 some-thing -1 -1 -1'
                       '  (2 a-thing -1 -1 -1 ("a"))'
                       '  (3 b-thing -1 -1 -1 ("b"))))')
     assert a.entity == 'root'
     node = a.daughters[0]
     assert node.entity == 'some-thing'
     assert node.daughters[0].entity == 'a-thing'
     assert node.daughters[1].entity == 'b-thing'
     a = D.from_string('(root (1 some-thing@some-type -1 -1 -1'
                       '  (2 a-thing@a-type -1 -1 -1 ("a"))'
                       '  (3 b-thing@b-type -1 -1 -1 ("b"))))')
     assert a.entity == 'root'
     node = a.daughters[0]
     assert node.entity == 'some-thing'
     assert node.daughters[0].entity == 'a-thing'
     assert node.daughters[1].entity == 'b-thing'
Exemplo n.º 18
0
 def test_type(self):
     a = D.from_string('(root (1 some-thing -1 -1 -1'
                       '  (2 a-thing -1 -1 -1 ("a"))'
                       '  (3 b-thing -1 -1 -1 ("b"))))')
     assert a.type == None
     node = a.daughters[0]
     assert node.type == None
     assert node.daughters[0].type == None
     assert node.daughters[1].type == None
     a = D.from_string('(root (1 some-thing@some-type -1 -1 -1'
                       '  (2 a-thing@a-type -1 -1 -1 ("a"))'
                       '  (3 b-thing@b-type -1 -1 -1 ("b"))))')
     assert a.type == None
     node = a.daughters[0]
     assert node.type == 'some-type'
     assert node.daughters[0].type == 'a-type'
     assert node.daughters[1].type == 'b-type'
Exemplo n.º 19
0
 def test_lexical_type(self):
     # NOTE: this returns None for standard UDF or non-preterminals
     a = D.from_string('(root (1 some-thing -1 -1 -1'
                       '  (2 a-thing -1 -1 -1 ("a"))'
                       '  (3 b-thing -1 -1 -1 ("b"))))')
     with pytest.warns(DeprecationWarning):
         assert a.lexical_type() == None
         node = a.daughters[0]
         assert node.daughters[0].lexical_type() == None
         assert node.daughters[1].lexical_type() == None
     a = D.from_string('(root (1 some-thing -1 -1 -1'
                       '  (2 a-thing@a-type_le -1 -1 -1 ("a"))'
                       '  (3 b-thing@b-type_le -1 -1 -1 ("b"))))')
     with pytest.warns(DeprecationWarning):
         assert a.lexical_type() == None
         node = a.daughters[0]
         assert node.daughters[0].lexical_type() == 'a-type_le'
         assert node.daughters[1].lexical_type() == 'b-type_le'
Exemplo n.º 20
0
def preprocess(inp, derivation):
    derivation = Derivation.from_string(derivation)
    tokens = get_tokens(derivation)
    traces = [i[1] for i in tokens]

    sent = []
    for token, trace in tokens:
        lemma = trace.split('/')[-3]

        #native entry
        if not lemma.startswith('generic'):
            to = int(re.search(r'\+TO .*?\\"(\d+)\\"', token.tfs).group(1))
            fro = int(re.search(r'\+FROM .*?\\"(\d+)\\"', token.tfs).group(1))
            form = inp[fro:to]
        else:
            form = lemma

            #add punctuation
            if 'comma' in trace:
                form = '%s,' % form
            if 'asterisk_' in trace:
                form = '%s*' % form
            if 'asterisk-pre' in trace:
                form = '*%s' % form
            if 'threedot' in trace:
                form = '%s...' % form
            if 'hyphen' in trace:
                form = '%s-' % form
            if 'sqright' in trace:
                form = '%s\'' % form
            if 'sqleft' in trace:
                form = '\'%s' % form
            if 'dqright' in trace:
                form = '%s\'' % form
            if 'dqleft' in trace:
                form = '\'%s' % form
            if 'rparen' in trace:
                form = '%s)' % form
            if 'lparen' in trace:
                form = '(%s' % form
            if 'comma-rp' in trace:
                form = '%s,)' % form
            if 'bang' in trace:
                form = '%s!' % form
            if 'qmark' in trace:
                form = '%s?' % form
            if 'qmark-bang' in trace:
                form = '%s?!' % form
            if 'period' in trace:
                form = '%s.' % form

        #fix compounds
        if '-' in form and form[-1] != '-':
            form = form.split('-')[1]

        sent.append(form)
    return ' '.join(sent)
Exemplo n.º 21
0
 def test_basic_entity(self):
     # this works for both UDX and standard UDF
     a = D.from_string('(root (1 some-thing -1 -1 -1'
                       '  (2 a-thing -1 -1 -1 ("a"))'
                       '  (3 b-thing -1 -1 -1 ("b"))))')
     with pytest.warns(DeprecationWarning):
         assert a.basic_entity() == 'root'
         node = a.daughters[0]
         assert node.daughters[0].basic_entity() == 'a-thing'
         assert node.daughters[1].basic_entity() == 'b-thing'
     a = D.from_string('(root (1 some-thing -1 -1 -1'
                       '  (2 a-thing@a-type_le -1 -1 -1 ("a"))'
                       '  (3 b-thing@b-type_le -1 -1 -1 ("b"))))')
     with pytest.warns(DeprecationWarning):
         assert a.basic_entity() == 'root'
         node = a.daughters[0]
         assert node.basic_entity() == 'some-thing'
         assert node.daughters[0].basic_entity() == 'a-thing'
         assert node.daughters[1].basic_entity() == 'b-thing'
Exemplo n.º 22
0
 def test_to_udx(self):
     s = '(1 some-thing -1 -1 -1 ("token"))'
     assert D.from_string(s).to_udx(indent=None) == s
     s = (r'(root (1 some-thing@some-type 0.4 0 5 '
          r'(2 a-lex@a-type 0.8 0 1 '
          r'("a b" 3 "token [ +FORM \"a\" ]" 4 "token [ +FORM \"b\" ]")) '
          r'(5 b-lex@b-type 0.9 1 2 '
          r'("b" 6 "token [ +FORM \"b\" ]"))))')
     assert D.from_string(s).to_udx(indent=1) == (
         '(root\n'
         ' (1 some-thing@some-type 0.4 0 5\n'
         '  (2 a-lex@a-type 0.8 0 1\n'
         '   ("a b"\n'
         '    3 "token [ +FORM \\"a\\" ]"\n'
         '    4 "token [ +FORM \\"b\\" ]"))\n'
         '  (5 b-lex@b-type 0.9 1 2\n'
         '   ("b"\n'
         '    6 "token [ +FORM \\"b\\" ]"))))'
     )
Exemplo n.º 23
0
 def test_is_head(self):
     # NOTE: is_head() is undefined for nodes with multiple
     # siblings, none of which are marked head (e.g. in plain UDF)
     a = D.from_string('(root (1 some-thing -1 -1 -1'
                       '  (2 some-thing -1 -1 -1 ("a"))'
                       '  (3 some-thing -1 -1 -1 ("b"))))')
     assert a.is_head() == True
     node = a.daughters[0]
     assert node.is_head() == True
     assert node.daughters[0].is_head() == None
     assert node.daughters[1].is_head() == None
     # if one sibling is marked, all become decidable
     a = D.from_string('(root (1 some-thing -1 -1 -1'
                       '  (2 some-thing -1 -1 -1 ("a"))'
                       '  (3 ^some-thing -1 -1 -1 ("b"))))')
     assert a.is_head() == True
     node = a.daughters[0]
     assert node.is_head() == True
     assert node.daughters[0].is_head() == False
     assert node.daughters[1].is_head() == True
Exemplo n.º 24
0
 def test_to_udf(self):
     s = '(1 some-thing -1 -1 -1 ("token"))'
     assert D.from_string(s).to_udf(indent=None) == s
     assert D.from_string(s).to_udf(indent=1) == (
         '(1 some-thing -1 -1 -1\n'
         ' ("token"))'
     )
     s = (r'(root (1 some-thing 0.4 0 5 (2 a-lex 0.8 0 1 '
          r'("a" 3 "token [ +FORM \"a\" ]")) '
          r'(4 bcd-lex 0.5 2 5 ("bcd" 5 "token [ +FORM \"bcd\" ]"))))')
     assert D.from_string(s).to_udf(indent=1) == (
         '(root\n'
         ' (1 some-thing 0.4 0 5\n'
         '  (2 a-lex 0.8 0 1\n'
         '   ("a"\n'
         '    3 "token [ +FORM \\"a\\" ]"))\n'
         '  (4 bcd-lex 0.5 2 5\n'
         '   ("bcd"\n'
         '    5 "token [ +FORM \\"bcd\\" ]"))))'
     )
     s = (r'(root (1 some-thing 0.4 0 5 (2 a-lex 0.8 0 1 '
          r'("a b" 3 "token [ +FORM \"a\" ]" 4 "token [ +FORM \"b\" ]"))))')
     assert D.from_string(s).to_udf(indent=1) == (
         '(root\n'
         ' (1 some-thing 0.4 0 5\n'
         '  (2 a-lex 0.8 0 1\n'
         '   ("a b"\n'
         '    3 "token [ +FORM \\"a\\" ]"\n'
         '    4 "token [ +FORM \\"b\\" ]"))))'
     )
     s = (r'(root (1 some-thing@some-type 0.4 0 5 (2 a-lex@a-type 0.8 0 1 '
          r'("a b" 3 "token [ +FORM \"a\" ]" 4 "token [ +FORM \"b\" ]"))))')
     assert D.from_string(s).to_udf(indent=1) == (
         '(root\n'
         ' (1 some-thing 0.4 0 5\n'
         '  (2 a-lex 0.8 0 1\n'
         '   ("a b"\n'
         '    3 "token [ +FORM \\"a\\" ]"\n'
         '    4 "token [ +FORM \\"b\\" ]"))))'
     )
Exemplo n.º 25
0
 def test_to_dict(self):
     s = '(1 some-type -1 -1 -1 ("token"))'
     assert D.from_string(s).to_dict() == {
         'id': 1,
         'entity': 'some-type',
         'score': -1.0,
         'start': -1,
         'end': -1,
         'form': 'token'
     }
     fields = ('id', 'entity', 'score')
     # daughters and form are always shown
     assert D.from_string(s).to_dict(fields=fields) == {
         'id': 1,
         'entity': 'some-type',
         'score': -1.0,
         'form': 'token'
     }
     s = (   r'(1 a-lex -1 -1 -1 ("a b" 2 "token [ +FORM \"a\" ]"'
             r' 3 "token [ +FORM \"b\" ]"))' )
     assert D.from_string(s).to_dict() == {
         'id': 1,
         'entity': 'a-lex',
         'score': -1.0,
         'start': -1,
         'end': -1,
         'form': 'a b',
         'tokens': [
             {'id': 2, 'tfs': r'token [ +FORM \"a\" ]'},
             {'id': 3, 'tfs': r'token [ +FORM \"b\" ]'}
         ]
     }
     assert D.from_string(s).to_dict(fields=fields) == {
         'id': 1,
         'entity': 'a-lex',
         'score': -1.0,
         'form': 'a b'
     }
Exemplo n.º 26
0
def parse_spans(span_lines, derivation_str):
    regex = re.compile(r"\((\d+), \d+, \d+, <(\d+):(\d+)>")
    c_spans = {}
    for line in span_lines.split("\n"):
        m = regex.search(line)
        if not m:
            continue
        key, start, end = m.groups()
        c_spans[int(key)] = (int(start), int(end))

    derivation = Derivation.from_string(derivation_str)  # type: Derivation
    # return [c_spans[j.id] for i in derivation.terminals()
    #         for j in i.tokens]
    return [(c_spans[i.tokens[0].id][0], c_spans[i.tokens[-1].id][1])
            for i in derivation.terminals()]
Exemplo n.º 27
0
def prof_entries(prof,
                 typemap,
                 lexmap,
                 table='result',
                 cols=('derivation', 'mrs')):
    p = itsdb.ItsdbProfile(prof)
    seen = set()
    for derivation, mrs in p.select(table, cols):
        d = Derivation.from_string(derivation)
        for entity, typ, form in _derivation_les(d):
            if typ is None:
                typ = lexmap.get(entity)
            orth = ', '.join('"{}"'.format(part) for part in form)
            if (typ, orth) not in seen and typ in typemap:
                supertype = typemap[typ][0]  # more than 1?
                lename = '+'.join(form) + '-' + supertype
                pred = None
                print(lename, supertype, orth, pred, None)
                yield (lename, supertype, orth, pred, None)
                seen.add((typ, orth))
Exemplo n.º 28
0
 def test_fromstring(self):
     with pytest.raises(ValueError): D.from_string('')
     # root with no children
     with pytest.raises(ValueError): D.from_string('(some-root)')
     # does not start with `(` or end with `)`
     with pytest.raises(ValueError):
         D.from_string(' (1 some-type -1 -1 -1 ("token"))')
     with pytest.raises(ValueError):
         D.from_string(' (1 some-type -1 -1 -1 ("token")) ')
     # uneven parens
     with pytest.raises(ValueError):
         D.from_string('(1 some-type -1 -1 -1 ("token")')
     # ok
     t = D.from_string('(1 some-type -1 -1 -1 ("token"))')
     assert t.id == 1
     assert t.entity == 'some-type'
     assert t.score == -1.0
     assert t.start == -1
     assert t.end == -1
     assert t.daughters == [T('token')]
     # newlines in tree
     t = D.from_string('''(1 some-type -1 -1 -1
                             ("token"))''')
     assert t.id == 1
     assert t.entity == 'some-type'
     assert t.score == -1.0
     assert t.start == -1
     assert t.end == -1
     assert t.daughters == [T('token')]
     # LKB-style terminals
     t = D.from_string('''(1 some-type -1 -1 -1
                             ("to ken" 1 2))''')
     assert t.id == 1
     assert t.entity == 'some-type'
     assert t.score == -1.0
     assert t.start == -1
     assert t.end == -1
     assert t.daughters == [T('to ken')]  # start/end ignored
     # TFS-style terminals
     t = D.from_string(r'''(1 some-type -1 -1 -1
                             ("to ken" 2 "token [ +FORM \"to\" ]"
                                       3 "token [ +FORM \"ken\" ]"))''')
     assert t.id == 1
     assert t.entity == 'some-type'
     assert t.score == -1.0
     assert t.start == -1
     assert t.end == -1
     assert t.daughters == [
         T('to ken', [Tk(2, r'token [ +FORM \"to\" ]'),
                      Tk(3, r'token [ +FORM \"ken\" ]')])
     ]
     # longer example
     t = D.from_string(r'''(root
         (1 some-type 0.4 0 5
             (2 a-lex 0.8 0 1
                 ("a" 1 "token [ +FORM \"a\" ]"))
             (3 bcd-lex 0.5 2 5
                 ("bcd" 2 "token [ +FORM \"bcd\" ]")))
     )''')
     assert t.entity == 'root'
     assert len(t.daughters) == 1
     top = t.daughters[0]
     assert top.id == 1
     assert top.entity == 'some-type'
     assert top.score == 0.4
     assert top.start == 0
     assert top.end == 5
     assert len(top.daughters) == 2
     lex = top.daughters[0]
     assert lex.id == 2
     assert lex.entity == 'a-lex'
     assert lex.score == 0.8
     assert lex.start == 0
     assert lex.end == 1
     assert lex.daughters == [T('a', [Tk(1, r'token [ +FORM \"a\" ]')])]
     lex = top.daughters[1]
     assert lex.id == 3
     assert lex.entity == 'bcd-lex'
     assert lex.score == 0.5
     assert lex.start == 2
     assert lex.end == 5
     assert lex.daughters == [T('bcd',
                                [Tk(2, r'token [ +FORM \"bcd\" ]')])]
Exemplo n.º 29
0
    def test_to_dict(self):
        s = '(1 some-thing -1 -1 -1 ("token"))'
        assert D.from_string(s).to_dict() == {
            'id': 1,
            'entity': 'some-thing',
            'score': -1.0,
            'start': -1,
            'end': -1,
            'form': 'token'
        }
        fields = ('id', 'entity', 'score')
        # daughters and form are always shown
        assert D.from_string(s).to_dict(fields=fields) == {
            'id': 1,
            'entity': 'some-thing',
            'score': -1.0,
            'form': 'token'
        }
        s = (r'(root (0 top@top-rule -1 -1 -1'
             r' (1 a-lex@a-type -1 -1 -1 ("a b" 2 "token [ +FORM \"a\" ]"'
             r'  3 "token [ +FORM \"b\" ]"))'
             r' (4 ^c-lex@c-type -1 -1 -1 ("c" 5 "token [ +FORM \"c\" ]"))))')
        assert D.from_string(s).to_dict() == {
            'entity': 'root',
            'daughters': [
                {
                    'id': 0,
                    'entity': 'top',
                    'type': 'top-rule',
                    'score': -1.0,
                    'start': -1,
                    'end': -1,
                    'daughters': [
                        {
                            'id': 1,
                            'entity': 'a-lex',
                            'type': 'a-type',
                            'score': -1.0,
                            'start': -1,
                            'end': -1,
                            'form': 'a b',
                            'tokens': [
                                {'id': 2, 'tfs': r'token [ +FORM \"a\" ]'},
                                {'id': 3, 'tfs': r'token [ +FORM \"b\" ]'}
                            ]
                        },
                        {
                            'id': 4,
                            'entity': 'c-lex',
                            'type': 'c-type',
                            'head': True,
                            'score': -1.0,
                            'start': -1,
                            'end': -1,
                            'form': 'c',
                            'tokens': [
                                {'id': 5, 'tfs': r'token [ +FORM \"c\" ]'}
                            ]
                        }
                    ]
                }
            ]
        }
        assert D.from_string(s).to_dict(fields=fields) == {
            'entity': 'root',
            'daughters': [
                {
                    'id': 0,
                    'entity': 'top',
                    'score': -1.0,
                    'daughters': [
                        {
                            'id': 1,
                            'entity': 'a-lex',
                            'score': -1.0,
                            'form': 'a b'
                        },
                        {
                            'id': 4,
                            'entity': 'c-lex',
                            'score': -1.0,
                            'form': 'c'
                        }

                    ]
                }
            ]
        }
Exemplo n.º 30
0
 def test_is_root(self):
     a = D.from_string('(1 some-thing -1 -1 -1 ("token"))')
     assert a.is_root() == False
     a = D.from_string('(root (1 some-thing -1 -1 -1 ("token")))')
     assert a.is_root() == True
     assert a.daughters[0].is_root() == False
Exemplo n.º 31
0
 def test_fromstring(self):
     with pytest.raises(ValueError): D.from_string('')
     # root with no children
     with pytest.raises(ValueError): D.from_string('(some-root)')
     # does not start with `(` or end with `)`
     with pytest.raises(ValueError):
         D.from_string(' (1 some-type -1 -1 -1 ("token"))')
     with pytest.raises(ValueError):
         D.from_string(' (1 some-type -1 -1 -1 ("token")) ')
     # uneven parens
     with pytest.raises(ValueError):
         D.from_string('(1 some-type -1 -1 -1 ("token")')
     # ok
     t = D.from_string('(1 some-type -1 -1 -1 ("token"))')
     assert t.id == 1
     assert t.entity == 'some-type'
     assert t.score == -1.0
     assert t.start == -1
     assert t.end == -1
     assert t.daughters == [('"token"',)]
     # newlines in tree
     t = D.from_string('''(1 some-type -1 -1 -1
                             ("token"))''')
     assert t.id == 1
     assert t.entity == 'some-type'
     assert t.score == -1.0
     assert t.start == -1
     assert t.end == -1
     assert t.daughters == [('"token"',)]
     # longer example
     t = D.from_string(r'''(root
         (1 some-type 0.4 0 5
             (2 a-lex 0.8 0 1
                 ("a" 1 "token [ +FORM \"a\" ]"))
             (3 bcd-lex 0.5 2 5
                 ("bcd" 2 "token [ +FORM \"bcd\" ]")))
     )''')
     assert t.entity == 'root'
     assert len(t.daughters) == 1
     top = t.daughters[0]
     assert top.id == 1
     assert top.entity == 'some-type'
     assert top.score == 0.4
     assert top.start == 0
     assert top.end == 5
     assert len(top.daughters) == 2
     lex = top.daughters[0]
     assert lex.id == 2
     assert lex.entity == 'a-lex'
     assert lex.score == 0.8
     assert lex.start == 0
     assert lex.end == 1
     assert lex.daughters == [('"a"', '1', r'"token [ +FORM \"a\" ]"')]
     lex = top.daughters[1]
     assert lex.id == 3
     assert lex.entity == 'bcd-lex'
     assert lex.score == 0.5
     assert lex.start == 2
     assert lex.end == 5
     assert lex.daughters == [('"bcd"', '2', r'"token [ +FORM \"bcd\" ]"')]
Exemplo n.º 32
0
 def test_fromstring(self):
     with pytest.raises(ValueError):
         D.from_string('')
     # root with no children
     with pytest.raises(ValueError):
         D.from_string('(some-root)')
     # does not start with `(` or end with `)`
     with pytest.raises(ValueError):
         D.from_string(' (1 some-type -1 -1 -1 ("token"))')
     with pytest.raises(ValueError):
         D.from_string(' (1 some-type -1 -1 -1 ("token")) ')
     # uneven parens
     with pytest.raises(ValueError):
         D.from_string('(1 some-type -1 -1 -1 ("token")')
     # ok
     t = D.from_string('(1 some-type -1 -1 -1 ("token"))')
     assert t.id == 1
     assert t.entity == 'some-type'
     assert t.score == -1.0
     assert t.start == -1
     assert t.end == -1
     assert t.daughters == [('"token"', )]
     # newlines in tree
     t = D.from_string('''(1 some-type -1 -1 -1
                             ("token"))''')
     assert t.id == 1
     assert t.entity == 'some-type'
     assert t.score == -1.0
     assert t.start == -1
     assert t.end == -1
     assert t.daughters == [('"token"', )]
     # longer example
     t = D.from_string(r'''(root
         (1 some-type 0.4 0 5
             (2 a-lex 0.8 0 1
                 ("a" 1 "token [ +FORM \"a\" ]"))
             (3 bcd-lex 0.5 2 5
                 ("bcd" 2 "token [ +FORM \"bcd\" ]")))
     )''')
     assert t.entity == 'root'
     assert len(t.daughters) == 1
     top = t.daughters[0]
     assert top.id == 1
     assert top.entity == 'some-type'
     assert top.score == 0.4
     assert top.start == 0
     assert top.end == 5
     assert len(top.daughters) == 2
     lex = top.daughters[0]
     assert lex.id == 2
     assert lex.entity == 'a-lex'
     assert lex.score == 0.8
     assert lex.start == 0
     assert lex.end == 1
     assert lex.daughters == [('"a"', '1', r'"token [ +FORM \"a\" ]"')]
     lex = top.daughters[1]
     assert lex.id == 3
     assert lex.entity == 'bcd-lex'
     assert lex.score == 0.5
     assert lex.start == 2
     assert lex.end == 5
     assert lex.daughters == [('"bcd"', '2', r'"token [ +FORM \"bcd\" ]"')]
Exemplo n.º 33
0
 def test_is_root(self):
     a = D.from_string('(1 some-type -1 -1 -1 ("token"))')
     assert a.is_root() == False
     a = D.from_string('(root (1 some-type -1 -1 -1 ("token")))')
     assert a.is_root() == True
     assert a.daughters[0].is_root() == False
Exemplo n.º 34
0
def test_ParseResult():
    r = ParseResult()
    assert len(r) == 0
    assert r.mrs() is None
    assert r.dmrs() is None
    assert r.eds() is None
    assert r.derivation() is None

    mrs_s = '[ TOP: h0 RELS: < ["_rain_v_1_rel" LBL: h1 ARG0: e2 ] > HCONS: < h0 qeq h1 > ]'
    mrs_d = {
        'top': 'h0',
        'relations': [
            {
                'predicate': '_rain_v_1',
                'label': 'h1',
                'arguments': {'ARG0': 'e2'}
            }
        ],
        'constraints': [
            {'relation': 'qeq', 'high': 'h0', 'low': 'h1'}
        ]
    }
    mrs = simplemrs.loads_one(mrs_s)

    r = ParseResult(mrs=mrs_s)
    assert len(r) == 1
    assert r['mrs'] == mrs_s
    assert r.mrs() == mrs

    r = ParseResult(mrs=mrs_d)
    assert len(r) == 1
    assert r['mrs'] == mrs_d
    assert r.mrs() == mrs

    r = ParseResult(mrs=mrs_d)
    assert len(r) == 1
    assert r['mrs'] == mrs_d
    assert r.mrs() == mrs

    # r = ParseResult(mrs='nonsense')
    # assert r['mrs'] == 'nonsense'
    # with pytest.raises(XmrsDeserializationError):
    #     r.mrs()

    dmrs_d = {
        'nodes': [
            {'nodeid': 10000, 'predicate': '_rain_v_1',
             'sortinfo': {'cvarsort': 'e'}}
        ],
        'links': [
            {'from': 0, 'to': 10000, 'rargname': None, 'post': 'H'}
        ]
    }
    dmrs = Dmrs.from_dict(dmrs_d)

    r = ParseResult(dmrs=dmrs_d)
    assert len(r) == 1
    assert r['dmrs'] == dmrs_d
    assert r.dmrs() == dmrs

    # r = ParseResult(dmrs='nonsense')
    # assert len(r) == 1
    # assert r['dmrs'] == 'nonsense'
    # with pytest.raises(XmrsDeserializationError):
    #     r.dmrs()

    eds_d = {
        'top': 'e2',
        'nodes': {
            'e2': {
                'label': '_rain_v_1',
                'lnk': {'from': 3, 'to': 9},
                'edges': {}
            }
        }
    }
    eds_s = '{e2: e2:_rain_v_1<3:9>[]}'
    eds = Eds.from_dict(eds_d)

    r = ParseResult(eds=eds_s)
    assert len(r) == 1
    assert r['eds'] == eds_s
    assert r.eds() == eds

    r = ParseResult(eds=eds_d)
    assert len(r) == 1
    assert r['eds'] == eds_d
    assert r.eds() == eds

    # r = ParseResult(eds='nonsense')
    # assert len(r) == 1
    # assert r['eds'] == 'nonsense'
    # with pytest.raises(XmrsDeserializationError):
    #     r.eds()

    # several changes were made to the below for compatibility:
    #  - removed head annotation (on W_PERIOD_PLR)
    #  - removed type info
    #  - removed from/to info
    #  - added start/end
    #  - escaped quotes
    #  - capitalized entity names

    deriv_s = '(189 SB-HD_MC_C 0.228699 0 2 (37 it 0.401245 0 1 ("it" 34 "token [ +FORM \\"it\\" +FROM #1=\\"0\\" +TO \\"2\\" ]")) (188 W_PERIOD_PLR -0.113641 1 2 (187 V_PST_OLR 0 1 2 (56 rain_v1 0 1 2 ("rained." 32 "token [ +FORM \\"rained.\\" +FROM #1=\\"3\\" +TO \\"10\\" ]")))))'
    deriv_d = {
        "id": 189, "entity": "SB-HD_MC_C", "label": "S", "score": 0.228699, "start": 0, "end": 2, "daughters": [  # , "type": "subjh_mc_rule"
            {"id": 37, "entity": "it", "score": 0.401245, "start": 0, "end": 1, "form": "it", "tokens": [  # , "type": "n_-_pr-it-x_le" , "from": 0, "to": 2
                {"id": 34, "tfs": "token [ +FORM \\\"it\\\" +FROM #1=\\\"0\\\" +TO \\\"2\\\" ]"}]},  # , "from": 0, "to": 2
            {"id": 188, "entity": "W_PERIOD_PLR", "score": -0.113641, "start": 1, "end": 2, "daughters": [  # , "type": "punctuation_period_rule"
                {"id": 187, "entity": "V_PST_OLR", "score": 0, "start": 1, "end": 2, "daughters": [  # , "type": "v_pst_inflrule"
                    {"id": 56, "entity": "rain_v1", "score": 0, "start": 1, "end": 2, "form": "rained.", "tokens": [  # , "type": "v_-_it_le", "from": 3, "to": 10
                        {"id": 32, "tfs": "token [ +FORM \\\"rained.\\\" +FROM #1=\\\"3\\\" +TO \\\"10\\\" ]"}]}]}]}]  # , "from": 3, "to": 10
    }
    deriv = Derivation.from_dict(deriv_d)

    r = ParseResult(derivation=deriv_s)
    assert len(r) == 1
    assert r['derivation'] == deriv_s
    assert r.derivation() == deriv

    r = ParseResult(derivation=deriv_d)
    assert len(r) == 1
    assert r['derivation'] == deriv_d
    assert r.derivation() == deriv
Exemplo n.º 35
0
def test_ParseResult():
    r = ParseResult()
    assert len(r) == 0
    assert r.mrs() is None
    assert r.dmrs() is None
    assert r.eds() is None
    assert r.derivation() is None

    mrs_s = '[ TOP: h0 RELS: < ["_rain_v_1_rel" LBL: h1 ARG0: e2 ] > HCONS: < h0 qeq h1 > ]'
    mrs_d = {
        'top':
        'h0',
        'relations': [{
            'predicate': '_rain_v_1',
            'label': 'h1',
            'arguments': {
                'ARG0': 'e2'
            }
        }],
        'constraints': [{
            'relation': 'qeq',
            'high': 'h0',
            'low': 'h1'
        }]
    }
    mrs = simplemrs.loads_one(mrs_s)

    r = ParseResult(mrs=mrs_s)
    assert len(r) == 1
    assert r['mrs'] == mrs_s
    assert r.mrs() == mrs

    r = ParseResult(mrs=mrs_d)
    assert len(r) == 1
    assert r['mrs'] == mrs_d
    assert r.mrs() == mrs

    r = ParseResult(mrs=mrs_d)
    assert len(r) == 1
    assert r['mrs'] == mrs_d
    assert r.mrs() == mrs

    # r = ParseResult(mrs='nonsense')
    # assert r['mrs'] == 'nonsense'
    # with pytest.raises(XmrsDeserializationError):
    #     r.mrs()

    dmrs_d = {
        'nodes': [{
            'nodeid': 10000,
            'predicate': '_rain_v_1',
            'sortinfo': {
                'cvarsort': 'e'
            }
        }],
        'links': [{
            'from': 0,
            'to': 10000,
            'rargname': None,
            'post': 'H'
        }]
    }
    dmrs = Dmrs.from_dict(dmrs_d)

    r = ParseResult(dmrs=dmrs_d)
    assert len(r) == 1
    assert r['dmrs'] == dmrs_d
    assert r.dmrs() == dmrs

    # r = ParseResult(dmrs='nonsense')
    # assert len(r) == 1
    # assert r['dmrs'] == 'nonsense'
    # with pytest.raises(XmrsDeserializationError):
    #     r.dmrs()

    eds_d = {
        'top': 'e2',
        'nodes': {
            'e2': {
                'label': '_rain_v_1',
                'lnk': {
                    'from': 3,
                    'to': 9
                },
                'edges': {}
            }
        }
    }
    eds_s = '{e2: e2:_rain_v_1<3:9>[]}'
    eds = Eds.from_dict(eds_d)

    r = ParseResult(eds=eds_s)
    assert len(r) == 1
    assert r['eds'] == eds_s
    assert r.eds() == eds

    r = ParseResult(eds=eds_d)
    assert len(r) == 1
    assert r['eds'] == eds_d
    assert r.eds() == eds

    # r = ParseResult(eds='nonsense')
    # assert len(r) == 1
    # assert r['eds'] == 'nonsense'
    # with pytest.raises(XmrsDeserializationError):
    #     r.eds()

    # several changes were made to the below for compatibility:
    #  - removed head annotation (on W_PERIOD_PLR)
    #  - removed type info
    #  - removed from/to info
    #  - added start/end
    #  - escaped quotes
    #  - capitalized entity names

    deriv_s = '(189 SB-HD_MC_C 0.228699 0 2 (37 it 0.401245 0 1 ("it" 34 "token [ +FORM \\"it\\" +FROM #1=\\"0\\" +TO \\"2\\" ]")) (188 W_PERIOD_PLR -0.113641 1 2 (187 V_PST_OLR 0 1 2 (56 rain_v1 0 1 2 ("rained." 32 "token [ +FORM \\"rained.\\" +FROM #1=\\"3\\" +TO \\"10\\" ]")))))'
    deriv_d = {
        "id":
        189,
        "entity":
        "SB-HD_MC_C",
        "label":
        "S",
        "score":
        0.228699,
        "start":
        0,
        "end":
        2,
        "daughters": [  # , "type": "subjh_mc_rule"
            {
                "id":
                37,
                "entity":
                "it",
                "score":
                0.401245,
                "start":
                0,
                "end":
                1,
                "form":
                "it",
                "tokens": [  # , "type": "n_-_pr-it-x_le" , "from": 0, "to": 2
                    {
                        "id":
                        34,
                        "tfs":
                        "token [ +FORM \\\"it\\\" +FROM #1=\\\"0\\\" +TO \\\"2\\\" ]"
                    }
                ]
            },  # , "from": 0, "to": 2
            {
                "id":
                188,
                "entity":
                "W_PERIOD_PLR",
                "score":
                -0.113641,
                "start":
                1,
                "end":
                2,
                "daughters": [  # , "type": "punctuation_period_rule"
                    {
                        "id":
                        187,
                        "entity":
                        "V_PST_OLR",
                        "score":
                        0,
                        "start":
                        1,
                        "end":
                        2,
                        "daughters": [  # , "type": "v_pst_inflrule"
                            {
                                "id":
                                56,
                                "entity":
                                "rain_v1",
                                "score":
                                0,
                                "start":
                                1,
                                "end":
                                2,
                                "form":
                                "rained.",
                                "tokens":
                                [  # , "type": "v_-_it_le", "from": 3, "to": 10
                                    {
                                        "id":
                                        32,
                                        "tfs":
                                        "token [ +FORM \\\"rained.\\\" +FROM #1=\\\"3\\\" +TO \\\"10\\\" ]"
                                    }
                                ]
                            }
                        ]
                    }
                ]
            }
        ]  # , "from": 3, "to": 10
    }
    deriv = Derivation.from_dict(deriv_d)

    r = ParseResult(derivation=deriv_s)
    assert len(r) == 1
    assert r['derivation'] == deriv_s
    assert r.derivation() == deriv

    r = ParseResult(derivation=deriv_d)
    assert len(r) == 1
    assert r['derivation'] == deriv_d
    assert r.derivation() == deriv
Exemplo n.º 36
0
 def test_fromstring(self):
     with pytest.raises(ValueError): D.from_string('')
     # root with no children
     with pytest.raises(ValueError): D.from_string('(some-root)')
     # does not start with `(` or end with `)`
     with pytest.raises(ValueError):
         D.from_string(' (1 some-thing -1 -1 -1 ("token"))')
     with pytest.raises(ValueError):
         D.from_string(' (1 some-thing -1 -1 -1 ("token")) ')
     # uneven parens
     with pytest.raises(ValueError):
         D.from_string('(1 some-thing -1 -1 -1 ("token")')
     # ok
     t = D.from_string('(1 some-thing -1 -1 -1 ("token"))')
     assert t.id == 1
     assert t.entity == 'some-thing'
     assert t.score == -1.0
     assert t.start == -1
     assert t.end == -1
     assert t.daughters == [T('token')]
     # newlines in tree
     t = D.from_string('''(1 some-thing -1 -1 -1
                             ("token"))''')
     assert t.id == 1
     assert t.entity == 'some-thing'
     assert t.score == -1.0
     assert t.start == -1
     assert t.end == -1
     assert t.daughters == [T('token')]
     # LKB-style terminals
     t = D.from_string('''(1 some-thing -1 -1 -1
                             ("to ken" 1 2))''')
     assert t.id == 1
     assert t.entity == 'some-thing'
     assert t.score == -1.0
     assert t.start == -1
     assert t.end == -1
     assert t.daughters == [T('to ken')]  # start/end ignored
     # TFS-style terminals
     t = D.from_string(r'''(1 some-thing -1 -1 -1
                             ("to ken" 2 "token [ +FORM \"to\" ]"
                                       3 "token [ +FORM \"ken\" ]"))''')
     assert t.id == 1
     assert t.entity == 'some-thing'
     assert t.score == -1.0
     assert t.start == -1
     assert t.end == -1
     assert t.daughters == [
         T('to ken', [Tk(2, r'token [ +FORM \"to\" ]'),
                      Tk(3, r'token [ +FORM \"ken\" ]')])
     ]
     # longer example
     t = D.from_string(r'''(root
         (1 some-thing 0.4 0 5
             (2 a-lex 0.8 0 1
                 ("a" 1 "token [ +FORM \"a\" ]"))
             (3 bcd-lex 0.5 2 5
                 ("bcd" 2 "token [ +FORM \"bcd\" ]")))
     )''')
     assert t.entity == 'root'
     assert len(t.daughters) == 1
     top = t.daughters[0]
     assert top.id == 1
     assert top.entity == 'some-thing'
     assert top.score == 0.4
     assert top.start == 0
     assert top.end == 5
     assert len(top.daughters) == 2
     lex = top.daughters[0]
     assert lex.id == 2
     assert lex.entity == 'a-lex'
     assert lex.score == 0.8
     assert lex.start == 0
     assert lex.end == 1
     assert lex.daughters == [T('a', [Tk(1, r'token [ +FORM \"a\" ]')])]
     lex = top.daughters[1]
     assert lex.id == 3
     assert lex.entity == 'bcd-lex'
     assert lex.score == 0.5
     assert lex.start == 2
     assert lex.end == 5
     assert lex.daughters == [T('bcd',
                                [Tk(2, r'token [ +FORM \"bcd\" ]')])]