Пример #1
0
def production_coverage_local(path, oldtable, tables, conf):
    grammar = dict(
      (
        nonterm,
        tuple(sorted(':'.join(p) for p in prods))
      )
      for nonterm, prods in conf['grammar'].iteritems()
    )
    def initstats():
        stats = dict()
        total = 0
        for nonterm, P in grammar.iteritems():
            for i, p in enumerate(P):
                stats[(nonterm, i+1)] = 0
                total += 1
        return stats, total

    for tree in conf['trees']:
        local_cov, total = initstats()
        walktree(tree, functools.partial(callback, grammar, local_cov))
        print float(sum(val for val in local_cov.itervalues()))/total
Пример #2
0
def conditional_counts(path, oldtable, tables, conf):
    grammar = dict((row[0], tuple(row[1:])) for row in tables['infer_grammar'])

    counts = dict() # counts how many times a RULE is reached by a specific
                    # prevTuple (e.g. counts[NT => A:B:C][(NT1,NT2)] == 5)
    nonterminalCounts = dict() # counts how many times a prevTuple reaches a
                               # specific NONTERMINAL (e.g.
                               # nonterminalCounts[(NT1,NT2)][NT] == 78)
    stack = list()
    lookBack = 2 # how many items in prevTuple?

    def count_nonterms(nonterminalCounts, prevAsTuple, node):
        if not nonterminalCounts.has_key(prevAsTuple):
            nonterminalCounts[prevAsTuple] = {node.label : 1}
        else:
            if not nonterminalCounts.get(prevAsTuple).has_key(node.label):
                nonterminalCounts[prevAsTuple][node.label] = 1
            else:
                nonterminalCounts[prevAsTuple][node.label] += 1

    def increase_counts(counts, prevAsTuple, chosenRule):
        if not counts.has_key(chosenRule):
            counts[chosenRule] = {prevAsTuple : 1}
        else:
            if not counts.get(chosenRule).has_key(prevAsTuple):
                counts[chosenRule][prevAsTuple] = 1
            else:
                counts[chosenRule][prevAsTuple] += 1

    def callback(grammar, node, depth):
        #if this is a new ast then we want to clear our stack
        if not stack:
            initStack = (tuple(None for x in range(lookBack)), False)
            stack.append(initStack)

        #print stack
        prev = stack[len(stack)-1][0]
        requirePop = stack[len(stack)-1][1]

        prevAsTuple = tuple(prev[x] for x in range(lookBack))

        if not node.children:
            if requirePop:
                stack.pop()
            return

        if requirePop:
            stack.pop()

        productions = grammar[node.label]
        p = productions.index(':'.join(kid.label for kid in node.children)) + 1

        chosenRule = node.label + " => " + grammar[node.label][p-1]

        increase_counts(counts, prevAsTuple, chosenRule)
        count_nonterms(nonterminalCounts, prevAsTuple, node)

        #append this new rule to the stack as our new "most previous"
        if grammar[node.label][p-1].count(":") > 1: #do we have more than 1 nonterminal in this rule?
            stack.append(
                (
                    tuple(prev[x+1] for x in range(lookBack-1)) + (node.label,),
                    False
                )
            )
        else: #if there is only one nonterminal in this rule then we want to log it as a previous but then pop it from the stack
              #this way, rules that have >1 nonterminals will keep their "prev" relative to what it was originally.
              #e.g. with NT:NT2:NT3, when we get to NT2, we dont want previous to include the previous from when we went down NT's productions
            stack.append(
                (
                    tuple(prev[x+1] for x in range(lookBack-1)) + (node.label,),
                    True
                 )
            )
    for tree in conf['trees']:
        stack = list()
        walktree(tree, functools.partial(callback, grammar))

    retTables = dict()

    retTables[0] = tuple(
        (lookBack, rule) + tuple(nt for nt in prev) + (count,)
        for rule, myCounts in counts.iteritems()
            for prev, count in myCounts.iteritems()
    )

    #we don't save this guy as a csv but we need to log it so that conditional_probabilities() can work right
    retTables[1] = tuple(
            (lookBack,) + tuple(prev for prev in prevAsTuple) + (nonterm, count)
            for prevAsTuple, myCounts in nonterminalCounts.iteritems()
                for nonterm, count in myCounts.iteritems()
            )

    save(path, retTables[0])
    return retTables