示例#1
0
文件: align.py 项目: sandersn/dialect
def to_html_differences(f, name, combined):
    "file*str*{char:[int]} "
    print >> f, """<h1>%s</h1>""" % name
    print >> f, "<table border=1 cellspacing=0 bordercolor='black'><tr><th>Char</th><th>All</th>",
    print >> f, "".join("<th>%s</th>" % f[21:24] for f in fs[1:]), "<th>Avg</th></tr>"
    for char, counts in combined.items():
        print >> f, "<tr><td>%s</td>" % char,
        print >> f, "".join("<td>%s</td>" % c for c in counts),
        print >> f, "<td>%.2f</td></tr>" % avg(counts[1:])
    print >> f, "</table>"
示例#2
0
def groupavg((c1,c2)):
    "group average"
    return avg(map(compose(edges.__getitem__, frozenset),
                   cross(flatten(c1), flatten(c2))))
示例#3
0
文件: sed.py 项目: sandersn/dialect
def sed_avg_total((region1, region2)):
    "([[{str:[float]}]],[[{str:[float]}]]) -> float"
    return lst.avg(map(sed_avg, region1, region2)) / 2
示例#4
0
文件: sed.py 项目: sandersn/dialect
def sed_avg(ws1, ws2):
    "[{str:[float]}]*[{str:[float]}] -> float"
    segs1,segs2 = (concat(transpose_word(ws1)), concat(transpose_word(ws1)))
    return lst.avg(map(fnc.uncurry(feature_sub), lst.cross(segs1, segs2)))
示例#5
0
文件: sed.py 项目: sandersn/dialect
def analyse(regions, avgs=None):
    keys = lst.all_pairs(sorted(regions.keys()))
    regions = lst.all_pairs(flatten(regions))
    avgregions = lst.avg(map(sed_avg_total, regions))
    return dict(zip(keys, map(sed_distance(avgregions), regions)))
示例#6
0
文件: sed.py 项目: sandersn/dialect
    avgregions = lst.avg(map(sed_avg_total, regions))
    return dict(zip(keys, map(sed_distance(avgregions), regions)))
def feature_sub(seg1, seg2):
    "({str:float}*{str:float}) -> float"
    return (len(set(seg1) ^ set(seg2))
            + sum(abs(f1-f2) for f1,f2 in dct.zip(seg1,seg2).values()))
@curried
def sed_distance(avg, (region1, region2)):
    "float*([[{str:[float]}]],[[{str:[float]}]])->float"
    return sum(map(sed_levenshtein(avg), zip(region1, region2)))
def transpose_word(word):
    "[{str:[float]}] -> [[{str:float}]]"
    def transpose_segment(seg):
        return [dict(zip(seg.keys(), ns)) for ns in lst.transpose(seg.values())]
    return lst.transpose(map(transpose_segment, word))
@curried
def sed_levenshtein(avg,(ws1,ws2)):
    "float*([{str:[float]}],[{str:[float]}])->float"
    def levenshtein((w1, w2)):
        return lev._levenshtein(w1, w2, avg,
                                (lambda _:avg,lambda _:avg,feature_sub))[-1][-1]
    return lst.avg(map(levenshtein,
                       lst.cross(transpose_word(ws1), transpose_word(ws2))))
def sed_avg(ws1, ws2):
    "[{str:[float]}]*[{str:[float]}] -> float"
    segs1,segs2 = (concat(transpose_word(ws1)), concat(transpose_word(ws1)))
    return lst.avg(map(fnc.uncurry(feature_sub), lst.cross(segs1, segs2)))
def sed_avg_total((region1, region2)):
    "([[{str:[float]}]],[[{str:[float]}]]) -> float"
    return lst.avg(map(sed_avg, region1, region2)) / 2
示例#7
0
文件: align.py 项目: sandersn/dialect
def variance(freqs):
    average = avg(cdr(freqs))
    return sum((average - c) ** 2 for c in cdr(freqs)) / average
示例#8
0
文件: align.py 项目: sandersn/dialect
def lst_except(l, *ns):
    """Totally inefficient! You have been warned, dude!
    (requiring ns to be ordered could help a lot if I actually cared)"""
    acc = []
    for i, x in enumerate(l):
        if i not in ns:
            acc.append(x)
    return acc


def find_collapsed(f, collapsed):
    "{char:[int]} -> [(char,int)] (sorted)"
    return sorted(dct.map(f, collapsed).items(), key=snd, reverse=True)


diff = lambda freqs: avg([freqs[2], freqs[8]]) - avg(lst_except(freqs, 0, 2, 8))


def variance(freqs):
    average = avg(cdr(freqs))
    return sum((average - c) ** 2 for c in cdr(freqs)) / average


find_difference = cur(find_collapsed, diff)
find_variance = cur(find_collapsed, variance)


def to_html_group_differences(f, name, differences):
    print >> f, "<h1>%s</h1>" % name
    print >> f, "<table border=1 cellspacing=0 bordercolor='black'><tr><td></td><th>Char</th><th>Variance</th>",
    for i, (sub, variance) in enumerate(differences):