def f4(self, pos_tuples, neg_tuples, msg_attr_index): i = msg_attr_index distinct_pos = set(map(lambda (u,m,r,d): m[msg_attr_index], pos_tuples)) mtch = lambda v, tups: filter(lambda (u,m,r,d): m[i] == v, tups) wv = lambda v, tups: len(mtch(v, tups)) * sum(map(lambda (u,m,r,d): 1.0 / float(d + 1), mtch(v, tups))) twv = lambda v: wv(v, pos_tuples) - wv(v, neg_tuples) return ut.top_n(map(lambda x: (x, twv(x)), distinct_pos), 1, lambda x: x[1])[0][0]
def build_weighted_max_pos_proportion_selector(weight_f = lambda x: 1): def wps(pos, neg): pv = weighted_agg(pos, weight_f) nv = weighted_agg(neg, weight_f) f = lambda key: float(pv[key]) / float(pv[key]) + float(nv[key]) if key in nv.keys() else 1.0 return map(lambda x: (x, f(x)), pv.keys()) return lambda p, n: ut.top_n(wps(p, n), 1, lambda (a, v): v)[0]
def build_top_n_optimizer(users, inters, resps, top_n, model_builder, relative_percent_compare=False): tups = zip(users, inters, resps) pos_count = lambda y: sum(map(lambda x: x[2], filter(lambda z: y == z[1], tups))) rel_pos_perc = lambda y: float(pos_count(y)) / float(len(filter(lambda z: y == z[1], tups))) top_inters = None if relative_percent_compare: top_inters = ut.top_n(inters, top_n, rel_pos_perc) else: top_inters = ut.top_n(inters, top_n, pos_count) top_tups = filter(lambda x: x[1] in top_inters, tups) print(top_tups[:5]) model_builder.set_data_rows(tups) f = lambda x: ut.top_n(top_inters, 1, lambda y: model_builder.prob_f()(x, y))[0] return f # best_message, best_count = n_best_inters(users, inters, resps, 1)[0] # return lambda x: best_message
def n_best_inters(users, inters, responses, n): rows = zip(users, inters, responses) mcount = lambda m: sum( map(lambda x: x[2], filter(lambda y: y[1] == m, rows))) pos_count = lambda y: sum( map(lambda x: x[2], filter(lambda z: y == z[1], tups))) results = map(lambda msg: (msg, mcount(msg)), inters) return ut.top_n(results, n, lambda x: x[1])
def n_best_messages(users, data_gen, msgs, n): if type(msgs) == type(0): msgs = data_gen.gen_random_inters(msgs) rows = zip(*data_gen.gen_crossprod_rows(users, msgs)) mcount = lambda m: sum(map(lambda x: x[2], filter(lambda y: y[1] == m, rows))) pos_count = lambda y: sum(map(lambda x: x[2], filter(lambda z: y == z[1], tups))) results = map(lambda msg: (msg, mcount(msg)), msgs) return map(lambda (msg, _): msg, ut.top_n(results, n, lambda y: y[1]))
def knn(self, user, k): if not(self.cache.has_key(user)): nbs = map(lambda (u, p, n): (u, p, n, self.similarity_f(user, u)), self.data) nn = ut.top_n(nbs, k, lambda (u,p,n,s): s) pos = self.normalize(map(lambda (u,p,n,s): (p,s), nn)) neg = self.normalize(map(lambda (u,p,n,s): (n,s), nn)) self.cache[user] = (pos, neg) return self.cache[user]
def knn(self, user, k, resp = None): rows = self.data_rows if resp == 1: rows = self.positives elif resp == 0: rows = self.negatives dists = map(lambda (u, m, r): (u, m, r, self.distance_f(user, u)), rows) #print(ut.top_n(dists, lambda (u, m, r, d): 1.0 / float(d + 1)) ) return ut.top_n(dists, lambda (u, m, r, d): 1.0 / float(d + 1))
def f4(self, pos_tuples, neg_tuples, msg_attr_index): i = msg_attr_index distinct_pos = set( map(lambda (u, m, r, d): m[msg_attr_index], pos_tuples)) mtch = lambda v, tups: filter(lambda (u, m, r, d): m[i] == v, tups) wv = lambda v, tups: len(mtch(v, tups)) * sum( map(lambda (u, m, r, d): 1.0 / float(d + 1), mtch(v, tups))) twv = lambda v: wv(v, pos_tuples) - wv(v, neg_tuples) return ut.top_n(map(lambda x: (x, twv(x)), distinct_pos), 1, lambda x: x[1])[0][0]
def n_best_messages(users, data_gen, msgs, n): if type(msgs) == type(0): msgs = data_gen.gen_random_inters(msgs) rows = zip(*data_gen.gen_crossprod_rows(users, msgs)) mcount = lambda m: sum( map(lambda x: x[2], filter(lambda y: y[1] == m, rows))) pos_count = lambda y: sum( map(lambda x: x[2], filter(lambda z: y == z[1], tups))) results = map(lambda msg: (msg, mcount(msg)), msgs) return map(lambda (msg, _): msg, ut.top_n(results, n, lambda y: y[1]))
def build_top_n_optimizer(users, inters, resps, top_n, model_builder, relative_percent_compare=False): tups = zip(users, inters, resps) pos_count = lambda y: sum( map(lambda x: x[2], filter(lambda z: y == z[1], tups))) rel_pos_perc = lambda y: float(pos_count(y)) / float( len(filter(lambda z: y == z[1], tups))) top_inters = None if relative_percent_compare: top_inters = ut.top_n(inters, top_n, rel_pos_perc) else: top_inters = ut.top_n(inters, top_n, pos_count) top_tups = filter(lambda x: x[1] in top_inters, tups) print(top_tups[:5]) model_builder.set_data_rows(tups) f = lambda x: ut.top_n(top_inters, 1, lambda y: model_builder.prob_f() (x, y))[0] return f # best_message, best_count = n_best_inters(users, inters, resps, 1)[0] # return lambda x: best_message
def build_weighted_mode_selector(weight_f = lambda x: 1): return lambda pos, neg: ut.top_n(weighted_agg(pos, weight_f).items(), 1, lambda (a, v): v)[0][0]
def max_ratio_occurrences_in(in_list, out_list): prp_occs = map( lambda x: (x, float(1 + count_in(x, in_list)) / float(1 + count_in( x, out_list))), in_list) return ut.top_n(prp_occs, 1, lambda x: x[1])[0][0]
def max_net_occurrences_in(in_list, out_list): net_occs = map(lambda x: (x, count_in(x, in_list) - count_in(x, out_list)), in_list) return ut.top_n(net_occs, 1, lambda x: x[1])[0][0]
def mode(vals): cts = map(lambda x: (x, count_in(x, vals)), vals) return ut.top_n(cts, 1, lambda x: x[1])[0][0]
def n_best_inters(users, inters, responses, n): rows = zip(users, inters, responses) mcount = lambda m: sum(map(lambda x: x[2], filter(lambda y: y[1] == m, rows))) pos_count = lambda y: sum(map(lambda x: x[2], filter(lambda z: y == z[1], tups))) results = map(lambda msg: (msg, mcount(msg)), inters) return ut.top_n(results, n, lambda x: x[1])
def max_ratio_occurrences_in(in_list, out_list): prp_occs = map(lambda x: (x, float(1 + count_in(x, in_list)) / float(1 + count_in(x, out_list))), in_list) return ut.top_n(prp_occs, 1, lambda x: x[1])[0][0]
def n_best_messages(datagen, users, messages, n): mcount = lambda m: sum(datagen.gen_crossprod_rows(users, [m])[2]) results = map(lambda msg: (msg, mcount(msg)), messages) return ut.top_n(results, n, lambda x: x[1])