Пример #1
0
def pattern_parse(sentence, trace=0):
    tokens = tokenize(sentence.split(". ")[0].strip("?!.")).split()
    chart = pattern_chart(tokens, thegrammar, theunigrams, trace)
    
    bestprob = 1e-60
    bestframe = None
    bestrel = None
    bestmatches = None
    for pprob, rel, pattern in patterns:
        ptok = pattern.split()
        for end, prob, frame, matchdict in match_pattern(ptok, 0, chart, tokens):
            prob *= pprob
            if end == len(tokens):
                if trace > 0:
                    print prob, pattern
                if prob > bestprob:
                    bestprob = prob
                    bestframe = untokenize(' '.join(frame))
                    bestrel = rel
                    bestmatches = matchdict
        #if bestpattern is not None: break
    return bestprob, bestframe, bestrel, bestmatches
Пример #2
0
def match_pattern(rhs, start, chart, tokens):
    if len(rhs) == 0:
        yield start, 1.0, [], {}
        return
    symb = str(rhs[0])
    group = None
    if symb[0] == "{":
        parts = symb[1:-1].split(':')
        symb = parts[0]
        if len(parts) > 1: group = parts[1]
    for next, prob in chart[symb][start].items():
        for end, prob2, frame, matchdict in match_pattern(rhs[1:], next, chart, tokens):
            if group is not None:
                if group in string.digits:    
                    chunk = ["{%s}" % group]
                    groupn = int(group)
                    if groupn == 0: chunk = []
                else:
                    chunk = tokens[start:next]
                    groupn = group
                matchdict[groupn] = untokenize(' '.join(tokens[start:next]))
            else: chunk = tokens[start:next]
            yield end, prob*prob2, chunk + frame, matchdict