def bracketpaths(paths): "add brackets to disambiguate paths (and remove Eq wrapper)" spans = dct.count(concat(paths)) hapax = set(node for (node,n) in spans.items() if n==1) firsts = dict((node,findif(elem(node),paths)[-1]) for node in spans) lasts = dict((node,findif(elem(node),reversed(paths))[-1]) for node in spans) @typecheck([Eq], [str]) def bracket(path): first = edge(path, firsts, hapax) last = edge(path, lasts, hapax) if first != -1: return map(Eq.get,path[:first+1])+["["]+map(Eq.get,path[first+1:]) elif last != -1: return map(Eq.get,path[:last])+["]"]+map(Eq.get,path[last:]) else: return map(Eq.get, path) return map(bracket, paths)
def sentences(lines): #@typecheck([str], [(str, [object])], n=int) def parseloop(lines, n=0): return [(clean(lines[0]), parseloop(lines[1:], n=n+1) if lines[1:] else []) for lines in splitby(lambda line:n==indent(line), lines, True)] return dct.collapse(filter(None, splitby(elem('<sent>'), lines, first=True)), pipe(car, speaker_code), pipe(cdr, cur(filter, useful), parseloop, car))