def fit(self, ratings): self.avg_item = {iid: np.mean(map(key_r, data)) for iid, data in sgroupby(ratings, key_i)} self.global_avg_item = np.mean(map(key_r, ratings)) self.offset_user = { uid: np.mean([r - self.avg_item[i] for _, i, r in data]) for uid, data in sgroupby(ratings, key_u)} self.global_offset_user = np.mean( [r - self.avg_item[i] for _, i, r in ratings] )
def better_mean(data, key): rs = map(key_r, data) global_var = np.var(rs) global_avg = np.mean(rs) grouped = [(id, map(key_r, d)) for id, d in sgroupby(data, key)] K = {id: np.var(d) / global_var for id, d in grouped} avg_item = {id: (global_avg * K[id] + np.sum(r)) / (K[id] + len(r)) for id, r in grouped} return avg_item, global_avg