def group_words(csv): "[[str]]-> {str:{str:{str:[float]}}} ie {Word:{Segment:{Feature:[Value]}}}" segment_name = lambda s: s[:re.search('[0-9]', s).end()] segment = fnc.pipe(car, dropwhile(str.islower), segment_name) feature = lambda s: s[re.search('[0-9]', s).end():] fillsegments = curried(dct.map_items)(makesegment) features = carcdr(lambda title, data:(feature(title), map(float, data))) phones = lambda l: dct.map(dict, dct.collapse(l, segment, features)) words = dct.collapse(cdr(csv), fnc.pipe(car, takewhile(str.islower)), fnc.ident) return dct.map(fnc.pipe(phones, fillsegments), words)
def variance(freqs): average = avg(cdr(freqs)) return sum((average - c) ** 2 for c in cdr(freqs)) / average