Exemplo n.º 1
0
    basename = grammarStem.basename()

    fileLst = workDir.files(basename+"-txt-lvl*")
    fileNums = [re.search("-txt-lvl(\d+)", fileName) for fileName in fileLst]
    fileNums = [int(match.group(1)) for match in fileNums if match]
    maxLevel = max(fileNums)

    print "Max grammar level:", maxLevel

    hierFile = workDir/("%s-txt.hier" % (basename,))
    for line in file(hierFile):
        fields = line.strip().split()
        (level, parNT, arrow, childNT) = fields
        level = int(level)
        assert(arrow == "->")
        grammar.addAncestry(level, parNT, childNT)

    grammar.writeback("hierarchy")

    for level in range(maxLevel+1):
        print >>sys.stderr, "Level", level

        grammarFile = workDir/("%s-txt-lvl%d.grammar" % (basename, level))

        print >>sys.stderr, "Nonterms from", grammarFile

        ct = 0
        for line in file(grammarFile):
            if ct % 1000 == 0:
                print >>sys.stderr, ct, "..."
            ct += 1
Exemplo n.º 2
0
if __name__ == "__main__":
    (grammarStem, out, trees, validation) = sys.argv[1:]

    print "Grammar stem:", grammarStem, "Output:", out, "Training:", trees,\
          "Validation:", validation

    grammar = HierGrammar(out, mode='w')

    topLevel = len(ec05) - 1

    for level,mapping in reversed(list(enumerate(ec05))):
#        print level, mapping
        for anc,children in mapping.items():
            for child in children:
                grammar.addAncestry(level + 1, anc, child)
                grammar.addAncestry(level + 1, "@%s" % anc, "@%s" % child)

#    print grammar.hierarchy

    grammar.makeMapping(topLevel)

#    print grammar.pennToLevel

    for level in range(len(ec05)):
        gstats = GrammarStats()
        vstats = ValidationEvents()

        for ct,line in enumerate(file(trees)):
            if ct % 100 == 0:
                print "read trees", ct