basename = grammarStem.basename() fileLst = workDir.files(basename+"-txt-lvl*") fileNums = [re.search("-txt-lvl(\d+)", fileName) for fileName in fileLst] fileNums = [int(match.group(1)) for match in fileNums if match] maxLevel = max(fileNums) print "Max grammar level:", maxLevel hierFile = workDir/("%s-txt.hier" % (basename,)) for line in file(hierFile): fields = line.strip().split() (level, parNT, arrow, childNT) = fields level = int(level) assert(arrow == "->") grammar.addAncestry(level, parNT, childNT) grammar.writeback("hierarchy") for level in range(maxLevel+1): print >>sys.stderr, "Level", level grammarFile = workDir/("%s-txt-lvl%d.grammar" % (basename, level)) print >>sys.stderr, "Nonterms from", grammarFile ct = 0 for line in file(grammarFile): if ct % 1000 == 0: print >>sys.stderr, ct, "..." ct += 1
if __name__ == "__main__": (grammarStem, out, trees, validation) = sys.argv[1:] print "Grammar stem:", grammarStem, "Output:", out, "Training:", trees,\ "Validation:", validation grammar = HierGrammar(out, mode='w') topLevel = len(ec05) - 1 for level,mapping in reversed(list(enumerate(ec05))): # print level, mapping for anc,children in mapping.items(): for child in children: grammar.addAncestry(level + 1, anc, child) grammar.addAncestry(level + 1, "@%s" % anc, "@%s" % child) # print grammar.hierarchy grammar.makeMapping(topLevel) # print grammar.pennToLevel for level in range(len(ec05)): gstats = GrammarStats() vstats = ValidationEvents() for ct,line in enumerate(file(trees)): if ct % 100 == 0: print "read trees", ct