Exemplo n.º 1
0
#                    print "WARNING, zero-prob event", lhs, word, pos, ct
                    continue
                wordRight[lhs] += (pGivenWord / total) * ct
                norm[lhs] += total * ct

                ll += log(total) * ct

        return ll, wordRight, norm

if __name__ == "__main__":
    (grammarStem, out, trees, validation) = sys.argv[1:]

    print "Grammar stem:", grammarStem, "Output:", out, "Training:", trees,\
          "Validation:", validation

    grammar = HierGrammar(out, mode='w')

    topLevel = len(ec05) - 1

    for level,mapping in reversed(list(enumerate(ec05))):
#        print level, mapping
        for anc,children in mapping.items():
            for child in children:
                grammar.addAncestry(level + 1, anc, child)
                grammar.addAncestry(level + 1, "@%s" % anc, "@%s" % child)

#    print grammar.hierarchy

    grammar.makeMapping(topLevel)

#    print grammar.pennToLevel
Exemplo n.º 2
0
        (prob, nt, arrow, word) = fields
        assert(arrow == "->")
        ntToWord[nt][word] = float(prob)
    return ntToWord

def listEval(lst):
    undelimited = lst.lstrip("[").rstrip("]")
    items = undelimited.split(",")
    return items

if __name__ == "__main__":
    (grammarStem, out) = sys.argv[1:]

    print "Grammar stem:", grammarStem, "Output:", out

    grammar = HierGrammar(out, mode='w')

    grammarStem = path(grammarStem).abspath()
    workDir = grammarStem.dirname()
    basename = grammarStem.basename()

    fileLst = workDir.files(basename+"-txt-lvl*")
    fileNums = [re.search("-txt-lvl(\d+)", fileName) for fileName in fileLst]
    fileNums = [int(match.group(1)) for match in fileNums if match]
    maxLevel = max(fileNums)

    print "Max grammar level:", maxLevel

    hierFile = workDir/("%s-txt.hier" % (basename,))
    for line in file(hierFile):
        fields = line.strip().split()
Exemplo n.º 3
0
from AIMA import DefaultDict

from HierGrammar import HierGrammar, HierRule
from convertLCGrammarToHier import readLambdas, readProductionTable, listEval

from path import path

from treeUtils import markovBinarizeTree, treeToTuple, treeToStr, zeroSplit
from ctfScheme import leftCorner, GrammarStats, ValidationEvents

if __name__ == "__main__":
    (out, trees, validation) = sys.argv[1:]

    print "Output:", out, "Training:", trees, "Validation:", validation

    grammar = HierGrammar(out, mode='w')
    binarizeTo = 0

    gstats = GrammarStats()
    vstats = ValidationEvents()

    for ct,line in enumerate(file(trees)):
        if ct % 100 == 0:
            print "read trees", ct
        tree = markovBinarizeTree(treeToTuple(line.strip()), to=0)

#        print treeToStr(tree)
        gstats.record(tree)

    for ct,line in enumerate(file(validation)):
        if ct % 100 == 0: