Exemplo n.º 1
0
 def check(self, derivs_file, gs_file):
     with open(derivs_file) as f:
         with open(gs_file) as gs:
             file = f.readlines()
             gsdeps_file = gs.readlines()
         
             while file and gsdeps_file:
                 _, deriv = file.pop(0), file.pop(0)
                 gsdeps_line = gsdeps_file.pop(0)
                 
                 if deriv.startswith('#'): continue
                 
                 t = naive_label_derivation(parse_tree(deriv))
                 # only take the first two elements (filler lex, arg lex)
                 deps = set(imap(lambda v: tuple(e.split(IndexSeparator)[0] for e in v[0:2]), mkdeps(t)))
                 gsdeps = parse_gsdeps(gsdeps_line)
                 
                 try:
                     self.assertEqual(deps, gsdeps)
                 except AssertionError:
                     print "EXPECTED\n-------"
                     for depl, depr in sorted(gsdeps):
                         print depl, depr
                     print "GOT\n---"
                     for depl, depr in sorted(deps):
                         print depl, depr
                     print "DIFF\n----"
                     print "false negatives: %s" % ' '.join('|'.join((u, v)) for u, v in list(set(gsdeps) - set(deps)))
                     print "false positives: %s" % ' '.join('|'.join((u, v)) for u, v in list(set(deps) - set(gsdeps)))
                         
                     raise
Exemplo n.º 2
0
    def check(self, derivs_file, gs_file):
        with open(derivs_file) as f:
            with open(gs_file) as gs:
                file = f.readlines()
                gsdeps_file = gs.readlines()

                while file and gsdeps_file:
                    _, deriv = file.pop(0), file.pop(0)
                    gsdeps_line = gsdeps_file.pop(0)

                    if deriv.startswith('#'): continue

                    t = naive_label_derivation(parse_tree(deriv))
                    # only take the first two elements (filler lex, arg lex)
                    deps = set(
                        imap(
                            lambda v: tuple(
                                e.split(IndexSeparator)[0] for e in v[0:2]),
                            mkdeps(t)))
                    gsdeps = parse_gsdeps(gsdeps_line)

                    try:
                        self.assertEqual(deps, gsdeps)
                    except AssertionError:
                        print "EXPECTED\n-------"
                        for depl, depr in sorted(gsdeps):
                            print depl, depr
                        print "GOT\n---"
                        for depl, depr in sorted(deps):
                            print depl, depr
                        print "DIFF\n----"
                        print "false negatives: %s" % ' '.join(
                            '|'.join((u, v))
                            for u, v in list(set(gsdeps) - set(deps)))
                        print "false positives: %s" % ' '.join(
                            '|'.join((u, v))
                            for u, v in list(set(deps) - set(gsdeps)))

                        raise
Exemplo n.º 3
0
def get_deps(root):
    return mkdeps(naive_label_derivation(root))
Exemplo n.º 4
0
    arg_index = int(arg_index)

    sec, doc, deriv_no = deriv_id_to_components(deriv_id)

    # read in the derivation
    if using_data_dir:
        reader = GuessReader(os.path.join(made_docs, deriv_id_to_filespec(deriv_id, with_deriv=True, with_section_dir=True)))
    else:
        reader = GuessReader(os.path.join('final', deriv_id_to_filespec(deriv_id, with_deriv=True, with_section_dir=True)))
    try:
        bundle = iter(reader).next()
        total_sents += 1

        root = bundle.derivation
        # run mkmarked on the derivation
        root = naive_label_derivation(root)
        def extract_index(s):
            return int(s.split('`')[1])
        # run mkdeps on the derivation
        try:
            deps = mkdeps(root, postprocessor=extract_index)
        except UnificationException:
            print "mkdeps failed on %s" % deriv_id
            traceback.print_exc(file=sys.stdout)
            deps = None

        def deps_match(head_index, arg_index, dep):
            return head_index==dep[0] and arg_index==dep[1]

        if deps:
            sys.stdout.write('%d %d -> ' % (head_index, arg_index))
Exemplo n.º 5
0
    opt = '9'
    long_opt = 'mkdeps'

    arg_names = 'OUTDIR'

if __name__ == '__main__':
    try:
        import psyco
        psyco.full()
    except ImportError: pass
    
    from munge.ccg.parse import *

    file = "final/%s" % sys.argv[1]
    t=naive_label_derivation(parse_tree(open(file).readlines()[2*int(sys.argv[2])+1]))
    print t
    print "sent:"
    print "-----"
    print ' '.join(t.text())
    deps = mkdeps(t)
    
    print "deps:"
    print "-----"
    for l, r in deps: print "%s|%s" % (l, r)
    
    print "leaves:"
    print "-------"
    for leaf in leaves(t):
        print leaf.lex, leaf.cat