def test_to_udf(self):
     s = '(1 some-thing -1 -1 -1 ("token"))'
     assert from_string(s).to_udf(indent=None) == s
     assert from_string(s).to_udf(indent=1) == ('(1 some-thing -1 -1 -1\n'
                                                ' ("token"))')
     s = (r'(root (1 some-thing 0.4 0 5 (2 a-lex 0.8 0 1 '
          r'("a" 3 "token [ +FORM \"a\" ]")) '
          r'(4 bcd-lex 0.5 2 5 ("bcd" 5 "token [ +FORM \"bcd\" ]"))))')
     assert from_string(s).to_udf(
         indent=1) == ('(root\n'
                       ' (1 some-thing 0.4 0 5\n'
                       '  (2 a-lex 0.8 0 1\n'
                       '   ("a"\n'
                       '    3 "token [ +FORM \\"a\\" ]"))\n'
                       '  (4 bcd-lex 0.5 2 5\n'
                       '   ("bcd"\n'
                       '    5 "token [ +FORM \\"bcd\\" ]"))))')
     s = (r'(root (1 some-thing 0.4 0 5 (2 a-lex 0.8 0 1 '
          r'("a b" 3 "token [ +FORM \"a\" ]" 4 "token [ +FORM \"b\" ]"))))')
     assert from_string(s).to_udf(
         indent=1) == ('(root\n'
                       ' (1 some-thing 0.4 0 5\n'
                       '  (2 a-lex 0.8 0 1\n'
                       '   ("a b"\n'
                       '    3 "token [ +FORM \\"a\\" ]"\n'
                       '    4 "token [ +FORM \\"b\\" ]"))))')
     s = (r'(root (1 some-thing@some-type 0.4 0 5 (2 a-lex@a-type 0.8 0 1 '
          r'("a b" 3 "token [ +FORM \"a\" ]" 4 "token [ +FORM \"b\" ]"))))')
     assert from_string(s).to_udf(
         indent=1) == ('(root\n'
                       ' (1 some-thing 0.4 0 5\n'
                       '  (2 a-lex 0.8 0 1\n'
                       '   ("a b"\n'
                       '    3 "token [ +FORM \\"a\\" ]"\n'
                       '    4 "token [ +FORM \\"b\\" ]"))))')
 def test_str(self):
     s = '(1 some-thing -1 -1 -1 ("token"))'
     assert str(from_string(s)) == s
     s = (r'(root (1 some-thing 0.4 0 5 (2 a-lex 0.8 0 1 '
          r'("a" 1 "token [ +FORM \"a\" ]")) '
          r'(3 bcd-lex 0.5 2 5 ("bcd" 2 "token [ +FORM \"bcd\" ]"))))')
     assert str(from_string(s)) == s
 def test_internals(self):
     a = from_string('(root (1 some-thing -1 -1 -1'
                     '  (2 a-thing -1 -1 -1 ("a"))'
                     '  (3 b-thing -1 -1 -1 ("b"))))')
     assert [t.id for t in a.internals()] == [None, 1]
     a = from_string('(root'
                     ' (1 some-thing@some-type 0.4 0 5'
                     '  (2 a-lex@a-type 0.8 0 1'
                     '   ("a b"'
                     '    3 "token [ +FORM \\"a\\" ]"'
                     '    4 "token [ +FORM \\"b\\" ]"))'
                     '  (5 b-lex@b-type 0.9 1 2'
                     '   ("b"'
                     '    6 "token [ +FORM \\"b\\" ]"))))')
     assert [t.id for t in a.internals()] == [None, 1]
 def test_terminals(self):
     a = from_string('(root (1 some-thing -1 -1 -1'
                     '  (2 a-thing -1 -1 -1 ("a"))'
                     '  (3 b-thing -1 -1 -1 ("b"))))')
     assert [t.form for t in a.terminals()] == ['a', 'b']
     a = from_string('(root'
                     ' (1 some-thing@some-type 0.4 0 5'
                     '  (2 a-lex@a-type 0.8 0 1'
                     '   ("a b"'
                     '    3 "token [ +FORM \\"a\\" ]"'
                     '    4 "token [ +FORM \\"b\\" ]"))'
                     '  (5 b-lex@b-type 0.9 1 2'
                     '   ("b"'
                     '    6 "token [ +FORM \\"b\\" ]"))))')
     assert [t.form for t in a.terminals()] == ['a b', 'b']
示例#5
0
    def derivation(self):
        """
        Interpret and return a Derivation object.

        If :mod:`delphin.derivation` is available and the value of the
        `derivation` key in the result dictionary is a valid UDF
        string or a dictionary, return the interpeted
        Derivation object. If there is no 'derivation' key in the
        result, return `None`.

        Raises:
            InterfaceError: when the value is an unsupported type or
                :mod:`delphin.derivation` is unavailable
        """
        drv = self.get('derivation')
        try:
            from delphin import derivation
            if isinstance(drv, dict):
                drv = derivation.from_dict(drv)
            elif isinstance(drv, str):
                drv = derivation.from_string(drv)
            elif drv is not None:
                raise TypeError(drv.__class__.__name__)
        except (ImportError, TypeError) as exc:
            raise InterfaceError('can not get Derivation object') from exc
        return drv
 def test_type(self):
     a = from_string('(root (1 some-thing -1 -1 -1'
                     '  (2 a-thing -1 -1 -1 ("a"))'
                     '  (3 b-thing -1 -1 -1 ("b"))))')
     assert a.type is None
     node = a.daughters[0]
     assert node.type is None
     assert node.daughters[0].type is None
     assert node.daughters[1].type is None
     a = from_string('(root (1 some-thing@some-type -1 -1 -1'
                     '  (2 a-thing@a-type -1 -1 -1 ("a"))'
                     '  (3 b-thing@b-type -1 -1 -1 ("b"))))')
     assert a.type is None
     node = a.daughters[0]
     assert node.type == 'some-type'
     assert node.daughters[0].type == 'a-type'
     assert node.daughters[1].type == 'b-type'
 def test_entity(self):
     a = from_string('(root (1 some-thing -1 -1 -1'
                     '  (2 a-thing -1 -1 -1 ("a"))'
                     '  (3 b-thing -1 -1 -1 ("b"))))')
     assert a.entity == 'root'
     node = a.daughters[0]
     assert node.entity == 'some-thing'
     assert node.daughters[0].entity == 'a-thing'
     assert node.daughters[1].entity == 'b-thing'
     a = from_string('(root (1 some-thing@some-type -1 -1 -1'
                     '  (2 a-thing@a-type -1 -1 -1 ("a"))'
                     '  (3 b-thing@b-type -1 -1 -1 ("b"))))')
     assert a.entity == 'root'
     node = a.daughters[0]
     assert node.entity == 'some-thing'
     assert node.daughters[0].entity == 'a-thing'
     assert node.daughters[1].entity == 'b-thing'
 def test_to_udx(self):
     s = '(1 some-thing -1 -1 -1 ("token"))'
     assert from_string(s).to_udx(indent=None) == s
     s = (r'(root (1 some-thing@some-type 0.4 0 5 '
          r'(2 a-lex@a-type 0.8 0 1 '
          r'("a b" 3 "token [ +FORM \"a\" ]" 4 "token [ +FORM \"b\" ]")) '
          r'(5 b-lex@b-type 0.9 1 2 '
          r'("b" 6 "token [ +FORM \"b\" ]"))))')
     assert from_string(s).to_udx(
         indent=1) == ('(root\n'
                       ' (1 some-thing@some-type 0.4 0 5\n'
                       '  (2 a-lex@a-type 0.8 0 1\n'
                       '   ("a b"\n'
                       '    3 "token [ +FORM \\"a\\" ]"\n'
                       '    4 "token [ +FORM \\"b\\" ]"))\n'
                       '  (5 b-lex@b-type 0.9 1 2\n'
                       '   ("b"\n'
                       '    6 "token [ +FORM \\"b\\" ]"))))')
 def test_is_head(self):
     # NOTE: is_head() is undefined for nodes with multiple
     # siblings, none of which are marked head (e.g. in plain UDF)
     a = from_string('(root (1 some-thing -1 -1 -1'
                     '  (2 some-thing -1 -1 -1 ("a"))'
                     '  (3 some-thing -1 -1 -1 ("b"))))')
     assert a.is_head()
     node = a.daughters[0]
     assert node.is_head()
     assert node.daughters[0].is_head() is None
     assert node.daughters[1].is_head() is None
     # if one sibling is marked, all become decidable
     a = from_string('(root (1 some-thing -1 -1 -1'
                     '  (2 some-thing -1 -1 -1 ("a"))'
                     '  (3 ^some-thing -1 -1 -1 ("b"))))')
     assert a.is_head()
     node = a.daughters[0]
     assert node.is_head()
     assert not node.daughters[0].is_head()
     assert node.daughters[1].is_head()
 def test_from_dict(self):
     s = '(root (1 some-thing -1 -1 -1 ("a")))'
     d = {
         'entity': 'root',
         'daughters': [{
             'id': 1,
             'entity': 'some-thing',
             'form': 'a'
         }]
     }
     assert from_dict(d) == from_string(s)
     s = (r'(root (1 ^some-thing@some-type -1 -1 -1 ("a b"'
          r' 2 "token [ +FORM \"a\" ]"'
          r' 3 "token [ +FORM \"b\" ]")))')
     d = {
         'entity':
         'root',
         'daughters': [{
             'id':
             1,
             'entity':
             'some-thing',
             'type':
             'some-type',
             'head':
             True,
             'form':
             'a b',
             'tokens': [{
                 'id': 2,
                 'tfs': r'token [ +FORM \"a\" ]'
             }, {
                 'id': 3,
                 'tfs': r'token [ +FORM \"b\" ]'
             }]
         }]
     }
     assert from_dict(d) == from_string(s)
示例#11
0
def _transform_derivation(s):
    return derivation.from_string(s).to_dict()
 def test_fromstring(self):
     with pytest.raises(DerivationSyntaxError):
         from_string('')
     # root with no children
     # TODO: this should be a DerivationSyntaxError but the current
     # UDF parser doesn't make that straightforward. Revist this
     # when/if the UDF parsing changes
     with pytest.raises(ValueError):
         from_string('(some-root)')
     # does not start with `(` or end with `)`
     with pytest.raises(DerivationSyntaxError):
         from_string(' (1 some-thing -1 -1 -1 ("token"))')
     with pytest.raises(DerivationSyntaxError):
         from_string(' (1 some-thing -1 -1 -1 ("token")) ')
     # uneven parens
     with pytest.raises(DerivationSyntaxError):
         from_string('(1 some-thing -1 -1 -1 ("token")')
     # ok
     t = from_string('(1 some-thing -1 -1 -1 ("token"))')
     assert t.id == 1
     assert t.entity == 'some-thing'
     assert t.score == -1.0
     assert t.start == -1
     assert t.end == -1
     assert t.daughters == [T('token')]
     # newlines in tree
     t = from_string('''(1 some-thing -1 -1 -1
                           ("token"))''')
     assert t.id == 1
     assert t.entity == 'some-thing'
     assert t.score == -1.0
     assert t.start == -1
     assert t.end == -1
     assert t.daughters == [T('token')]
     # LKB-style terminals
     t = from_string('''(1 some-thing -1 -1 -1
                           ("to ken" 1 2))''')
     assert t.id == 1
     assert t.entity == 'some-thing'
     assert t.score == -1.0
     assert t.start == -1
     assert t.end == -1
     assert t.daughters == [T('to ken')]  # start/end ignored
     # TFS-style terminals
     t = from_string(r'''(1 some-thing -1 -1 -1
                           ("to ken" 2 "token [ +FORM \"to\" ]"
                                     3 "token [ +FORM \"ken\" ]"))''')
     assert t.id == 1
     assert t.entity == 'some-thing'
     assert t.score == -1.0
     assert t.start == -1
     assert t.end == -1
     assert t.daughters == [
         T('to ken', [
             Tk(2, r'token [ +FORM \"to\" ]'),
             Tk(3, r'token [ +FORM \"ken\" ]')
         ])
     ]
     # longer example
     t = from_string(r'''(root
         (1 some-thing 0.4 0 5
             (2 a-lex 0.8 0 1
                 ("a" 1 "token [ +FORM \"a\" ]"))
             (3 bcd-lex 0.5 2 5
                 ("bcd" 2 "token [ +FORM \"bcd\" ]")))
     )''')
     assert t.entity == 'root'
     assert len(t.daughters) == 1
     top = t.daughters[0]
     assert top.id == 1
     assert top.entity == 'some-thing'
     assert top.score == 0.4
     assert top.start == 0
     assert top.end == 5
     assert len(top.daughters) == 2
     lex = top.daughters[0]
     assert lex.id == 2
     assert lex.entity == 'a-lex'
     assert lex.score == 0.8
     assert lex.start == 0
     assert lex.end == 1
     assert lex.daughters == [T('a', [Tk(1, r'token [ +FORM \"a\" ]')])]
     lex = top.daughters[1]
     assert lex.id == 3
     assert lex.entity == 'bcd-lex'
     assert lex.score == 0.5
     assert lex.start == 2
     assert lex.end == 5
     assert lex.daughters == [T('bcd', [Tk(2, r'token [ +FORM \"bcd\" ]')])]
 def test_to_dict(self):
     s = '(1 some-thing -1 -1 -1 ("token"))'
     assert from_string(s).to_dict() == {
         'id': 1,
         'entity': 'some-thing',
         'score': -1.0,
         'start': -1,
         'end': -1,
         'form': 'token'
     }
     fields = ('id', 'entity', 'score')
     # daughters and form are always shown
     assert from_string(s).to_dict(fields=fields) == {
         'id': 1,
         'entity': 'some-thing',
         'score': -1.0,
         'form': 'token'
     }
     s = (r'(root (0 top@top-rule -1 -1 -1'
          r' (1 a-lex@a-type -1 -1 -1 ("a b" 2 "token [ +FORM \"a\" ]"'
          r'  3 "token [ +FORM \"b\" ]"))'
          r' (4 ^c-lex@c-type -1 -1 -1 ("c" 5 "token [ +FORM \"c\" ]"))))')
     assert from_string(s).to_dict() == {
         'entity':
         'root',
         'daughters': [{
             'id':
             0,
             'entity':
             'top',
             'type':
             'top-rule',
             'score':
             -1.0,
             'start':
             -1,
             'end':
             -1,
             'daughters': [{
                 'id':
                 1,
                 'entity':
                 'a-lex',
                 'type':
                 'a-type',
                 'score':
                 -1.0,
                 'start':
                 -1,
                 'end':
                 -1,
                 'form':
                 'a b',
                 'tokens': [{
                     'id': 2,
                     'tfs': r'token [ +FORM \"a\" ]'
                 }, {
                     'id': 3,
                     'tfs': r'token [ +FORM \"b\" ]'
                 }]
             }, {
                 'id':
                 4,
                 'entity':
                 'c-lex',
                 'type':
                 'c-type',
                 'head':
                 True,
                 'score':
                 -1.0,
                 'start':
                 -1,
                 'end':
                 -1,
                 'form':
                 'c',
                 'tokens': [{
                     'id': 5,
                     'tfs': r'token [ +FORM \"c\" ]'
                 }]
             }]
         }]
     }
     assert from_string(s).to_dict(fields=fields) == {
         'entity':
         'root',
         'daughters': [{
             'id':
             0,
             'entity':
             'top',
             'score':
             -1.0,
             'daughters': [{
                 'id': 1,
                 'entity': 'a-lex',
                 'score': -1.0,
                 'form': 'a b'
             }, {
                 'id': 4,
                 'entity': 'c-lex',
                 'score': -1.0,
                 'form': 'c'
             }]
         }]
     }
 def test_is_root(self):
     a = from_string('(1 some-thing -1 -1 -1 ("token"))')
     assert not a.is_root()
     a = from_string('(root (1 some-thing -1 -1 -1 ("token")))')
     assert a.is_root()
     assert not a.daughters[0].is_root()
 def test_eq(self):
     a = from_string('(1 some-thing -1 -1 -1 ("token"))')
     # identity
     b = from_string('(1 some-thing -1 -1 -1 ("token"))')
     assert a == b
     # ids and scores don't matter
     b = from_string('(100 some-thing 0.114 -1 -1 ("token"))')
     assert a == b
     # tokens matter
     b = from_string('(1 some-thing -1 -1 -1 ("nekot"))')
     assert a != b
     # and type of rhs
     assert a != '(1 some-thing -1 -1 -1 ("token"))'
     # and tokenization
     b = from_string('(1 some-thing -1 2 7 ("token"))')
     assert a != b
     # and of course entities
     b = from_string('(1 epyt-emos -1 -1 -1 ("token"))')
     assert a != b
     # and number of children
     a = from_string('(1 x -1 -1 -1 (2 y -1 -1 -1 ("y")))')
     b = from_string(
         '(1 x -1 -1 -1 (2 y -1 -1 -1 ("y")) (3 z -1 -1 -1 ("z")))')
     assert a != b
     # and order of children
     a = from_string(
         '(1 x -1 -1 -1 (2 y -1 -1 -1 ("y")) (3 z -1 -1 -1 ("z")))')
     b = from_string(
         '(1 x -1 -1 -1 (3 z -1 -1 -1 ("z")) (2 y -1 -1 -1 ("y")))')
     assert a != b
     # and UDX properties when specified
     a = from_string(
         '(1 x -1 -1 -1 (2 ^y -1 -1 -1 ("y")) (3 z -1 -1 -1 ("z")))')
     b = from_string(
         '(1 x -1 -1 -1 (2 ^y -1 -1 -1 ("y")) (3 z -1 -1 -1 ("z")))')
     assert a == b
     b = from_string(
         '(1 x -1 -1 -1 (2 y -1 -1 -1 ("y")) (3 ^z -1 -1 -1 ("z")))')
     assert a != b
     b = from_string(
         '(1 x -1 -1 -1 (2 y -1 -1 -1 ("y")) (3 z -1 -1 -1 ("z")))')
     assert a != b
     a = from_string('(1 some-thing@some-type -1 -1 -1 ("token"))')
     b = from_string('(1 some-thing@some-type -1 -1 -1 ("token"))')
     assert a == b
     b = from_string('(1 some-thing@another-type -1 -1 -1 ("token"))')
     assert a != b
     b = from_string('(1 some-thing -1 -1 -1 ("token"))')
     assert a != b
示例#16
0
def read_profile(input_dir, output_dir, profile_name, mrp_eds, lexicon, args):
    ts = d_itsdb.TestSuite(input_dir)
 
    derivation_strs = []
    supertag_strs = []
    dmrs_json_strs = []

    for iid, sentence, parse_tokens, result_derivation, result_mrs in d_tsql.select('i-id i-input p-tokens derivation mrs', ts):
        tokens_rep = d_tokens.YYTokenLattice.from_string(parse_tokens)
        token_dict = {tok.id : tok for tok in tokens_rep.tokens}
        derivation_rep = d_derivation.from_string(result_derivation)
        assert len(derivation_rep.daughters) == 1 
        derivation_rep = derivation_rep.daughters[0]

        if mrp_eds:
            if iid in mrp_eds:
                try:
                    eds_rep = dc_eds.decode(mrp_eds[iid])
                    dmrs_rep = eds_to_dmrs(eds_rep)
                except d_eds._exceptions.EDSSyntaxError:
                    #print("Skipping: EDS syntax error", mrp_eds[iid])
                    continue
            else:
                    #print("Unmatched:", iid)
                    continue
        else:
            try:
                mrs_rep = dc_simplemrs.decode(result_mrs)
            except d_mrs._exceptions.MRSSyntaxError:
                #print("Skipping: MRS syntax error", result_mrs)
                continue

            dmrs_rep = d_dmrs.from_mrs(mrs_rep)

        mr = semantics.SemanticRepresentation(profile_name + ":" + iid, sentence, token_dict, derivation_rep, lexicon) # read derivation tree

        if args.convert_semantics:
            mr.map_dmrs(dmrs_rep)
            mr.process_semantic_tree(mr.root_node_id, dmrs_rep)

        mr.print_mrs()

        if args.extract_syntax:
            derivation_strs.append(mr.derivation_tree_str(mr.root_node_id, newline=False).lstrip())
            supertag_strs.append(mr.supertag_str(mr.root_node_id).strip())

        if args.extract_semantics:
            dmrs_json_strs.append(mr.dmrs_json_str(dmrs_rep))

    if args.extract_syntax:
        with open(output_dir + ".tree", 'w') as dt_out:
            for s in derivation_strs:
                dt_out.write(s + "\n")
        with open(output_dir + ".tags", 'w') as st_out:
            for s in supertag_strs:
                st_out.write(s + "\n")

    if args.extract_semantics:
        with open(output_dir + ".dmrs", 'w') as d_out:
            for s in dmrs_json_strs:
                if s != "":
                    d_out.write(s + "\n")