def mae_of_average(ratio, seed): """ 该方法测试采用平均值的方法得到的mae值,以验证lsh方法的有效性 :param ratio: :param seed: :return: """ org_data, data = wh.prepare_data(ratio, seed) (num_of_users, num_of_services) = data.shape test_samples = wh.prepare_test_data(num_of_users, 50, seed + 1) #保证算法的一次迭代中用到的随机都是不同的 maes = [] reference_columns = [] for i in test_samples: user = data[i] columns = np.argwhere(user == 0) avg = np.average(user[user > 0]) num_of_available = len(user[user > 0]) for c in columns: if org_data[i][c] != -1: maes.append(np.abs(org_data[i][c] - avg)) reference_columns.append(num_of_available) maes = np.array(maes) rmae = np.sqrt(np.dot(maes.T, maes) / maes.shape[0]) return rmae, reference_columns
def evaluate_vary_with_lsh_parameters(ratio, seed, hash_function_options, hash_table_options): ''' 测试不同#hash_function和#hash_table下的mae和fails值 :param ratio: :param seed: :return: ''' org_data, data = wh.prepare_data(ratio, seed) seed += 1 (num_of_users, num_of_services) = data.shape test_samples = wh.prepare_test_data(num_of_users, 50, seed) seed += 1 rmae_of_average = compute_mae_with_average_method(org_data, data, test_samples) num_of_function_options = len(hash_function_options) num_of_table_options = len(hash_table_options) rmaes = np.zeros((num_of_table_options, num_of_function_options)) fails = np.zeros((num_of_table_options, num_of_function_options)) for i in range(num_of_table_options): for j in range(num_of_function_options): seed += 10 rmaes[i][j], fails[i][j] = compute_mae_with_lsh( org_data, data, test_samples, seed, hash_function_options[j], hash_table_options[i]) return rmae_of_average, rmaes, fails
def compare_similar_users_with_old_lsh(ratio, seed): ''' 对比当前的lsh方法和之前的lsh方法的mae值 以验证当前的lsh方法的正确性 :param ratio: :return: ''' org_data, data = wh.prepare_data(ratio, seed) (num_of_users, num_of_services) = data.shape test_samples = wh.prepare_test_data(num_of_users, 1, seed + 1) recommender_old = ItemBasedLSHRecommenderOld(data, 4, 4, seed) recommender_old.classify() recommender = ItemBasedLSHRecommender(data, 4, 4, seed) recommender.classify() num_of_similar_with_lsh = [] num_of_similar_with_lsh_old = [] for j in range(2): # num_of_similar_with_lsh.append(len(recommender.find_similar_services(j))) # num_of_similar_with_lsh_old.append(len(recommender_old.find_similar_services(data[:, j]))) print(recommender.find_similar_services(j)) print(recommender_old.find_similar_services(data[:, j])) print('lsh:', num_of_similar_with_lsh) print('lsh_old:', num_of_similar_with_lsh_old)
def tune_lsh_parameters(ratio, seed): ''' 该方法以平均值方法为参考,对比不同的#hash_function和#hash_table下mae值的变化 以找到最优的#hash_function和#hash_table :param ration: :param seed: :return: ''' org_data, data = wh.prepare_data(ratio, seed) (num_of_users, num_of_services) = data.shape test_samples = wh.prepare_test_data(num_of_users, 50, seed + 1) rmae_of_average = compute_mae_with_average_method(org_data, data, test_samples) hash_function_options = [2, 4, 6, 8, 10] hash_table_options = [2, 4, 6, 8, 10] num_of_function_options = 5 num_of_table_options = 5 rmaes = np.zeros((num_of_table_options, num_of_function_options)) for i in range(num_of_table_options): for j in range(num_of_function_options): seed += 1 rmaes[i][j] = compute_mae_with_lsh(org_data, data, test_samples, seed, hash_function_options[j], hash_table_options[i]) print('========================ratio:', ratio, '===================') print('rmae of average:', rmae_of_average) print('rmae matrix:') print(rmaes)
def test_mae(ratio, seed): org_data, data = wh.prepare_data(ratio, seed) (num_of_users, num_of_services) = data.shape test_samples = wh.prepare_test_data(num_of_users, 50, seed + 1) hash_table_options = [4, 6, 8, 10, 12] hash_function_options = [4, 6, 8, 10] num_table_options = 1 num_function_options = 4 maes = np.zeros((num_table_options, num_function_options)) failed = np.zeros((num_table_options, num_function_options)) for i in range(num_table_options): for j in range(num_function_options): # begin = time.time() recommender = ItemBasedLSHRecommender(data, hash_function_options[j], hash_table_options[i], seed) recommender.classify() # print('prepare cost ', time.time() - begin) # begin = time.time() maes[i][j], failed[i][j] = recommender.evaluate( data[test_samples], org_data[test_samples]) # print('evaluate cost ', time.time() - begin) print('>', end='') print(maes) print(failed) dir = '../../outputs/irecommender/'
def test_predict_with_matrix(): org_data, data = wh.prepare_data(0.9, 2) (num_of_users, num_of_services) = data.shape test_samples = wh.prepare_test_data(num_of_users, 20, 3) recommender = ItemBasedLSHRecommender(data, num_of_functions=8, seed = 5) recommender.classify() print('normal:', recommender.evaluate(data[test_samples], org_data[test_samples])) print('matrix:', recommender.evaluate_with_matrix(data[test_samples], org_data[test_samples]))
def test_mae_of_lsh(ratio, seed, num_of_hash_functions=4, num_of_hash_tables=8): org_data, data = wh.prepare_data(ratio, seed) (num_of_users, num_of_services) = data.shape test_samples = wh.prepare_test_data(num_of_users, 50, seed + 1) recommender = ItemBasedLSHRecommender(data, num_of_hash_functions, num_of_hash_functions, seed + 2) recommender.classify() rmae, failed, reference_columns = recommender.evaluate( data[test_samples], org_data[test_samples]) return rmae, failed, reference_columns
def test_num_of_simliar_users(ratio, seed): """ 测试不同的num_of_hash_table 和 num_of_hash_functions下返回的相似用户个数 :param ratio: :param seed: :return: """ data, org_data = wh.prepare_data(ratio, seed) (num_of_users, num_of_services) = data.shape test_samples = wh.prepare_test_data(num_of_services, 50, seed) hash_table_options = [4, 6, 8, 10, 12] hash_function_options = [4, 6, 8, 10, 12] num_table_options = 1 num_function_options = 5 num_of_similar_services = np.zeros( (num_table_options, num_function_options)) num_of_isolated_services = np.zeros( (num_table_options, num_function_options)) for i in range(num_table_options): for j in range(num_function_options): num_of_hash_table = hash_table_options[i] num_of_hash_function = hash_function_options[j] begin = time.time() recommender = ItemBasedLSHRecommender(data, num_of_hash_function, num_of_hash_table) recommender.classify() # print('num_of_hash_table = %d, prepare cost %.4f' % (num_of_hash_table, time.time() - begin)) begin = time.time() nums = 0 isolated_count = 0 for t in test_samples: similar_services = recommender.find_similar_services(t) num = len(similar_services) if num == 0: isolated_count += 1 nums += num num_of_similar_services[i][j] = nums / 500.0 num_of_isolated_services[i][j] = isolated_count # print('num_of_hash_table = %d, find similar services cost %.4fs' % (num_of_hash_table, time.time() - begin)) print(num_of_similar_services) print(num_of_isolated_services)
def compare_maes_with_old_lsh(ratio, seed): ''' 对比当前的lsh方法和之前的lsh方法的mae值 以验证当前的lsh方法的正确性 :param ratio: :return: ''' org_data, data = wh.prepare_data(ratio, seed) (num_of_users, num_of_services) = data.shape test_samples = wh.prepare_test_data(num_of_users, 50, seed + 1) rmae_of_average = compute_mae_with_average_method(org_data, data, test_samples) hash_function_options = [4, 6, 8] hash_table_options = [4, 6, 8] num_of_function_options = 1 num_of_table_options = 1 rmaes = np.zeros((num_of_table_options, num_of_function_options)) rmaes_old = np.zeros((num_of_table_options, num_of_function_options)) for i in range(num_of_table_options): for j in range(num_of_function_options): seed += 1 rmaes[i][j] = compute_mae_with_lsh(org_data, data, test_samples, seed, hash_function_options[j], hash_table_options[i]) rmaes_old[i][j] = compute_mae_with_old_lsh( org_data, data, test_samples, seed, hash_function_options[j], hash_table_options[i]) print('========================ratio:', ratio, '===================') print('rmae of average:', rmae_of_average) print('rmae matrix:') print(rmaes) print('rmae old matrix:') print(rmaes_old)