def mycmp(user1, user2): if len(supp_user(user1)) < len(supp_user(user2)): return -1 elif len(supp_user(user1)) > len(supp_user(user2)): return 1 else: return set_cmp(user1, user2)
def set_cmp(user1, user2): items_1 = supp_user(user1) items_2 = supp_user(user2) while len(items_1) != 0: if min(items_1) < min(items_2): return -1 elif min(items_1) > min(items_2): return 1 else: min_value = min(items_1) items_1.remove(min_value) items_2.remove(min_value) return 0
def k_corating_slice(sorted_ratings, myslice, items_to_keep=None): # [start,end) logging.info('k corating slice(%s,%s)', myslice.start, myslice.stop) part_ratings = sorted_ratings[myslice, :] items_need_to_rate = set() for record in part_ratings: items_need_to_rate = items_need_to_rate.union(supp_user(record)) if items_to_keep is None: for item_id in items_need_to_rate: for record in part_ratings: if record[item_id] == unknown_rating: record[item_id] = adapter( pd_rating(original_ratings, int(record[-1] - 1), item_id, web)[0]) else: for item_id in items_need_to_rate: if item_id not in items_to_keep: for record in part_ratings: record[item_id] = unknown_rating else: for record in part_ratings: if record[item_id] == unknown_rating: record[item_id] = adapter( pd_rating(original_ratings, int(record[-1] - 1), item_id, web)[0])
def generate_aux(user_id): def random_wrong(start, end, right_value, tp): if tp == int: while True: r = random.randint(start, end) if r != right_value: return r elif tp == float: while True: r = random.uniform(start, end) if r != right_value: return r user = original_ratings[user_id, :] items = list(supp_user(user)) if total < len(items): total_list = random.sample(items, total) else: total_list = items if correct < len(total_list): correct_list = random.sample(total_list, correct) else: correct_list = total_list aux = [0] * item_size for i in total_list: if i in correct_list: aux[i] = user[i] else: aux[i] = random_wrong(min_rating, max_rating, user[i], int) return aux
def score(aux, record): item_ids = supp_user(aux) sum = 0 for item_id in item_ids: weight = item_weight(item_id) sum += weight * sim_rate(aux[item_id], record[item_id]) return sum
def analysis(original_ratings): size_list = [] for user in original_ratings: size = len(supp_user(user)) size_list.append(size) size_list.sort() x = [i for i in range(len(size_list))] plt.figure() plt.plot(x, size_list) plt.savefig('size.jpg') portion = [0] * 10 for size in size_list: index = size // 100 portion[index] += 1 logging.info(portion) logging.info(sum(size_list) / len(size_list))