示例#1
0
文件: cat.py 项目: sandersn/dialect
def generate(division):
    if division=='ns':
        each(cat, cross([northern, southern], 'path trigram'.split()))
    else:
        ns = northern[1] + southern[1]
        random.shuffle(ns)
        n = ('NorthernRandom', ns[:len(northern[1])])
        s = ('SouthernRandom', ns[len(northern[1]):])
        each(cat, cross([n, s], 'path trigram'.split()))
示例#2
0
def groupavg((c1,c2)):
    "group average"
    return avg(map(compose(edges.__getitem__, frozenset),
                   cross(flatten(c1), flatten(c2))))
示例#3
0
def complete((c1,c2)):
    "complete link"
    return max(map(compose(edges.__getitem__, frozenset),
                   cross(flatten(c1), flatten(c2))))
示例#4
0
def single((c1,c2)):
    "single link"
    return min(map(compose(edges.__getitem__, frozenset),
                   cross(flatten(c1), flatten(c2))))
示例#5
0
文件: sed.py 项目: sandersn/dialect
def sed_avg(ws1, ws2):
    "[{str:[float]}]*[{str:[float]}] -> float"
    segs1,segs2 = (concat(transpose_word(ws1)), concat(transpose_word(ws1)))
    return lst.avg(map(fnc.uncurry(feature_sub), lst.cross(segs1, segs2)))
示例#6
0
文件: sed.py 项目: sandersn/dialect
    avgregions = lst.avg(map(sed_avg_total, regions))
    return dict(zip(keys, map(sed_distance(avgregions), regions)))
def feature_sub(seg1, seg2):
    "({str:float}*{str:float}) -> float"
    return (len(set(seg1) ^ set(seg2))
            + sum(abs(f1-f2) for f1,f2 in dct.zip(seg1,seg2).values()))
@curried
def sed_distance(avg, (region1, region2)):
    "float*([[{str:[float]}]],[[{str:[float]}]])->float"
    return sum(map(sed_levenshtein(avg), zip(region1, region2)))
def transpose_word(word):
    "[{str:[float]}] -> [[{str:float}]]"
    def transpose_segment(seg):
        return [dict(zip(seg.keys(), ns)) for ns in lst.transpose(seg.values())]
    return lst.transpose(map(transpose_segment, word))
@curried
def sed_levenshtein(avg,(ws1,ws2)):
    "float*([{str:[float]}],[{str:[float]}])->float"
    def levenshtein((w1, w2)):
        return lev._levenshtein(w1, w2, avg,
                                (lambda _:avg,lambda _:avg,feature_sub))[-1][-1]
    return lst.avg(map(levenshtein,
                       lst.cross(transpose_word(ws1), transpose_word(ws2))))
def sed_avg(ws1, ws2):
    "[{str:[float]}]*[{str:[float]}] -> float"
    segs1,segs2 = (concat(transpose_word(ws1)), concat(transpose_word(ws1)))
    return lst.avg(map(fnc.uncurry(feature_sub), lst.cross(segs1, segs2)))
def sed_avg_total((region1, region2)):
    "([[{str:[float]}]],[[{str:[float]}]]) -> float"
    return lst.avg(map(sed_avg, region1, region2)) / 2