def letterFreq(query, title, letter): for i in range(26): cnt_query=0 for q in query: cnt_query+=q.count(letter) cnt_query += 10 cnt_title=0 for t in title: cnt_title+=t.count(letter) cnt_title+=0.5 return try_divide(try_divide(cnt_query, cnt_title), len(query))
def jaccardCoef(A, B): #print A, B A, B = set(A), set(B) intersect = len(A.intersection(B)) union = len(A.union(B)) coef=try_divide(intersect, union) return coef
def edist_norm(query, title): w=0 for q in query: for t in title: lev_dist = seq_matcher(None,q,t).ratio() if lev_dist>0.9: w+=1 return try_divide(w, len(query))
def DiceDist(A, B): A, B = set(A), set(B) intersect = len(A.intersection(B)) union = len(A) + len(B) d = try_divide(2*intersect, union) return d
def count(A, B): #print A, B A, B = set(A), set(B) intersect = len(A.intersection(B)) return try_divide(intersect, len(B))