Пример #1
0
def getresult(l, table_name, lambdas, ab, simlambda, lr, step, sim_k, ratio):
    MAE = 0
    RMSE = 0
    alpha = ab[0]
    beta = ab[1]
    start_time = time.strftime('%Y-%m-%d-%H-%M-%S',
                               time.localtime(time.time()))
    log.start_log(
        start_time, "../matrix/" + table_name.decode('utf-8') + "/" +
        "矩阵分解结果.txt".decode('utf-8'))
    f = log.write_log()

    lc_table_name = 'lc_' + table_name
    tp_table_name = 'tp_' + table_name
    # start预测部分
    # # 得到的矩阵是挖取过值的矩阵
    # C, original_matrix, changed_zero = dm.getMatrix(tp_table_name, ratio)
    C, original_matrix, changed_zero = dm.get_Matrix_from_lc_tp(
        lc_table_name, tp_table_name, ratio, 1)
    # C = np.array(C)
    d = C.shape
    U = np.random.rand(d[0], l)
    V = np.random.rand(d[1], l)
    print "开始矩阵分解"
    matrix, X, Y, loss = de.matrix_factorization(C, U, V, lambdas, step, alpha,
                                                 beta, simlambda, lr, sim_k,
                                                 tp_table_name)
    # 开始验证
    print "开始验证"
    matrix0, pre_or_mat, num = de.norma_matrix(matrix, original_matrix)
    MAE, RMSE = vali.validate(matrix, original_matrix, changed_zero)
    # #end
    # end预测部分

    file_path = "../matrix/" + table_name.decode('utf-8')
    t = str(ratio) + "_" + str(num) + "_" + start_time + ".txt"
    # start将矩阵分解后的矩阵保存
    np.savetxt(file_path + "/matrix_factorization/matrix_factorization_" +
               str(ratio) + ".txt",
               matrix,
               fmt='%.8f')
    # end将矩阵分解后的矩阵保存
    # start将原矩阵经预测填充后的矩阵保存
    # filematrix0 = open(file_path + "/result/pre_" + str(ratio) + "." + "txt", 'w');
    # filematrix0.close()
    np.savetxt(file_path + "/result/pre_" + str(ratio) + ".txt",
               pre_or_mat,
               fmt='%.8f')
    # end 将原矩阵经预测填充后的矩阵保存
    # start将矩阵分解后的矩阵(处理过的,负数变0)保存
    # filematrix1 = open(file_path + "/out/" + t.decode('utf-8'), 'w');
    # filematrix1.close()
    np.savetxt(file_path + "/out/" + t.decode('utf-8'), matrix0, fmt='%.8f')
    # end 将矩阵分解后的矩阵(处理过的,负数变0)保存
    # end

    # k, disease, mitrax, table_name
    # top_k = dv.getTopK(k, disease, matrix, table_name)
    # 筛选医院,得到与目标人物处在同一个城市的医院
    # pre_top = dv.pre_data(top_k)
    # 筛选医院
    # matrix = dealtxt.load_file_to_array("../matrix/" + table_name.decode('utf-8') + "/result/pre_" + str(ratio) + ".txt");
    # filter_hos = dv.filter_hos_By_sorted100(table_name,matrix,disease,people_locat)
    # dv.getResult(pre_top,disease)
    # dv.getResult1(disease,matrix,table_name,city,people_locat)
    end_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time()))
    print >> f, "开始时间:", start_time
    print >> f, "结束时间:", end_time
    # 显示梯度下降情况
    title = 'lr:{0} alpha:{1} beta:{2} step:{3} lambdas:{4} sim:{5} sim_k:{6}'
    title = title.format(lr, ab[0], ab[1], step, lambdas, simlambda, sim_k)
    print >> f, "参数:", title
    figure_path = "../matrix/" + table_name.decode('utf-8') + "/figure/" + str(
        ratio) + "_" + end_time + ".jpg"
    figure.paint1(X, Y, title, figure_path)
    log.close_log()
    return MAE, RMSE, loss
batch_size = 25
niter = 300

quantile = 0.01

Phi  = lambda x: x[Omega//d2,Omega%d2]

error_batch_dr = np.zeros((batch_size,niter+1))
error_batch_mf = np.zeros((batch_size,niter+1))

for j in tqdm(range(batch_size)) :
    
    M = gaussian_model(d1,d2,r)
    y = Phi(M)
    x0 = np.zeros((d1,d2))
    _,x1 = matrix_factorization(y, Omega, niter, np.ones((d1,r)), np.ones((d2,r)), d1, d2, r, la = 10**-2,eps = -1)
    x2 = dr_nuclear_norm_minimization(x0, y, niter, Omega, mu = 1, gamma = 1,eps = -1)
    error_batch_mf[j,:] = norm(x1 - M,axis = (1,2))/norm(M)
    error_batch_dr[j,:] = norm(x2 - M,axis = (1,2))/norm(M)
#%%

mean_error_dr = np.mean(error_batch_dr,axis = 0)
mean_error_mf = np.mean(error_batch_mf,axis = 0)

q_quantile_dr = np.quantile(error_batch_dr, quantile, axis = 0)
Q_quantile_dr = np.quantile(error_batch_dr, 1 - quantile, axis = 0)

q_quantile_mf = np.quantile(error_batch_mf, quantile, axis = 0)
Q_quantile_mf = np.quantile(error_batch_mf, 1 - quantile, axis = 0)

   #%%
Пример #3
0
    for j in tqdm(range(len(P))):

        omega = Omega[j]
        Phi = lambda x: x[omega // d2, omega % d2]
        error_batch = []

        for _ in (range(batch_size)):

            M = gaussian_model(d1, d2, r[i])
            y = Phi(M)
            x0 = np.zeros((d1, d2))
            x, _ = matrix_factorization(y,
                                        omega,
                                        niter,
                                        np.ones((d1, r[i])),
                                        np.ones((d2, r[i])),
                                        d1,
                                        d2,
                                        r[i],
                                        la=10**-2)
            error_batch.append(norm(x - M) / norm(M))

        mean_error[i, j] = np.mean(np.array(error_batch))

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
r = r
P = P
tsav = np.linspace(0, d - 2, 30, dtype='int')
tsav1, tsav2 = np.meshgrid(tsav, tsav)
r1, P1 = r[tsav], P[tsav]
Пример #4
0
P_dict = {}
Q_dict = {}
b_i_dict = {}
b_u_dict = {}
b_dict = {}
user_count = 0
RMSE = []
for k, _ in user_id_plenty.items():
    user_count += 1
    if user_count % 5 == 0:
        print('User Number : ' + str(user_count))
    P = np.random.rand(len(user_id_biz_list[k]), 2) / 2
    Q = np.random.rand(len(user_id_food_list[k]), 2) / 2
    b_i = np.random.rand(len(user_id_food_list[k]))
    b_u = np.random.rand(len(user_id_biz_list[k]))
    mean = 0
    mean_count = 0
    for i in range(len(user_id_train_data[k])):
        mean += user_id_train_data[k][i][2]
        mean_count += 1
    b = mean / mean_count
    b, b_u, b_i, P, Q, e = matrix_factorization(b, b_u, b_i, P, Q, 2, 30, 0.01,
                                                0.02, user_id_train_data[k],
                                                user_id_test_data[k])
    P_dict[k] = P
    Q_dict[k] = Q
    b_i_dict[k] = b_i
    b_u_dict[k] = b_u
    b_dict[k] = b
    RMSE.append(e)
Пример #5
0
def create_ratings_matrix(ratings_df, user_index, book_index):
    num_users, num_items = (len(user_index), len(book_index))
    ratings = torch.zeros((num_users, num_items))
    for _, rating in ratings_df.iterrows():
        user_i = user_index[rating["UserId"]]
        item_i = book_index[rating["Name"]]
        ratings[user_i][item_i] = rating["Rating"]
    return ratings


ratings = create_ratings_matrix(ratings_df, user_index, book_index)

lr = 0.005

P, Qt = matrix_factorization(R=ratings, K=2, steps=10, lr=lr)

# Predict and view
predictions = torch.matmul(P, Qt)
bruce_predictions = predictions[user_index[0]]

bruce_predictions = sorted([(reverse_index[i], float(score))
                            for i, score in enumerate(bruce_predictions)],
                           key=lambda x: x[2],
                           reverse=True)

self_prediction_df = pd.DataFrame([{
    "rank": i + 1,
    "title": title,
    "score": score,
} for i, (_, title, score) in enumerate(bruce_predictions)])