def corpus(speakers): "Warning! This contains a hard-coded path specific to jones" #@typecheck((str,str), [(str, [object])]) def per_speaker((fname,speaker)): return sentences(open('/Volumes/Data/Corpora/en/ice-gb/ice-gb-2/data/'+ fname.lower()+'.cor'))[speaker] return dct.map(lambda files: mapn(per_speaker, files), speakers)
def paths(tree): paths = [] @typecheck((str,[object]), [Eq], [[Eq]]) def makepaths((head, children), path): path = path + [Eq(head)] if children: return mapn(lambda child: makepaths(child, path), children) else: return [path]
def leaves((head,children)): if children: return mapn(leaves, children) else: return [head]
def tinify(regions): items = sorted(dct.count(mapn(concat, regions.values())).items(), key=snd) code = encode(map(fst, items)) return dct.map(cur(map, cur(map, code.__getitem__)), regions)