def encoder_run(spa): train_data = base_path+'/Dataset/ws/train/sparseness%d/training%d.txt'%(spa,case); test_data = base_path+'/Dataset/ws/test/sparseness%d/test%d.txt'%(spa,case); W_path = base_path+'/Dataset/ws/BP_CF_W_spa%d_t%d.txt'%(spa,case); loc_path = base_path+'/Dataset/ws'; values_path=base_path+'/Dataset/ae_values2/spa%d'%(spa); print('开始实验,稀疏度=%d,case=%d'%(spa,case)); print ('加载训练数据开始'); now = time.time(); trdata = np.loadtxt(train_data, dtype=float); n = np.alen(trdata); print ('加载训练数据完成,耗时 %.2f秒,数据总条数%d \n'%((time.time() - now),n)); print ('转换数据到矩阵开始'); tnow = time.time(); u = trdata[:,0]; s = trdata[:,1]; u = np.array(u,int); s = np.array(s,int); R = np.full(us_shape, NoneValue, float); R[u,s]=trdata[:,2]; del trdata,u,s; print ('转换数据到矩阵结束,耗时 %.2f秒 \n'%((time.time() - tnow))); print ('预处理数据开始'); tnow = time.time(); R=preprocess(R); print ('预处理数据结束,耗时 %.2f秒 \n'%((time.time() - tnow))); print ('加载地理位置信息开始'); tnow = time.time(); if isICF: loc_path+='/ws_info.txt'; else: loc_path+='/user_info.txt'; global loc_tab; loc_tab = loadLocation(loc_path); print ('加载地理位置信息完成,耗时 %.2f秒,数据总条数%d \n'%((time.time() - tnow),len(loc_tab))); print ('训练模型开始'); tnow = time.time(); tx = us_shape[0]; if isUserAutoEncoder: tx = us_shape[1]; encoder = BPAutoEncoder(tx,hidden_node, actfunc1,deactfunc1, actfunc1,deactfunc1,check_none); if isUserAutoEncoder: R = R.T; if loadvalues and encoder.exisValues(values_path): encoder.preloadValues(values_path); if continue_train: encoder.train(R, learn_rate, repeat,None); encoder.saveValues(values_path); PR = encoder.calFill(R); print(R); print(); print(PR); print(); ############# PR 还原处理 ############### PR = PR * 20.0; R = R * 20; for i in range(PR.shape[0]): for j in range(PR.shape[1]): if R[i,j]!=NoneValue: PR[i,j]=R[i,j]; print(PR); ############# PR 还原处理 ############### if isUserAutoEncoder: PR = PR.T; R = R.T; print ('训练模型开始结束,耗时 %.2f秒 \n'%((time.time() - tnow))); global W,S; print ('计算相似度矩阵开始'); tnow = time.time(); oR = R; R=PR; if isICF: R = R.T; if readWcache and os.path.exists(W_path): W = np.loadtxt(W_path, np.float128); else: for i in range(axis0-1): if i%50 ==0: print('----->step%d'%(i)) for j in range(i+1,axis0): ws = 0.0; ws += np.sum((R[i,:]-R[j,:])**2); W[i,j]=W[j,i]= 1.0/math.exp(np.sqrt(ws/axis1)); # origin W[i,j]=W[j,i]=1.0/(ws ** (1.0/p)+1.0); # W[i,j]=W[j,i]=1.0/( ((ws/cot) ** (1.0/p))+1.0); # W[i,j]=W[j,i]= 1.0/math.exp(((ws) ** (1.0/p))/cot); np.savetxt(W_path,W,'%.30f'); print ('计算相似度矩阵结束,耗时 %.2f秒 \n'%((time.time() - tnow))); print ('生成相似列表开始'); tnow = time.time(); S = np.argsort(-W)[:,0:k]; print ('生成相似列表开始结束,耗时 %.2f秒 \n'%((time.time() - tnow))); print ('加载测试数据开始'); tnow = time.time(); trdata = np.loadtxt(test_data, dtype=float); n = np.alen(trdata); print ('加载测试数据完成,耗时 %.2f秒,数据总条数%d \n'%((time.time() - tnow),n)); print ('评测开始'); tnow = time.time(); mae=0.0;rmse=0.0;cot=0; print('oR',oR); print('R',R); for tc in trdata: if tc[2]<=0: continue; rt = predict(int(tc[0]),int(tc[1]),R,W,S); mae+=abs(rt-tc[2]); rmse+=(rt-tc[2])**2; cot+=1; mae = mae * 1.0 / cot; rmse= np.sqrt(rmse/cot); print ('评测完成,耗时 %.2f秒\n'%((time.time() - tnow))); print('实验结束,总耗时 %.2f秒,稀疏度=%d,MAE=%.6f,RMSE=%.6f\n'%((time.time()-now),spa,mae,rmse)); print(W)
def encoder_run(spa): train_data = base_path + '/Dataset/ws/train_n/sparseness%d/training%d.txt' % ( spa, case) test_data = base_path + '/Dataset/ws/test_n/sparseness%d/test%d.txt' % ( spa, case) W_path = base_path + '/Dataset/ws/BP_CF_W_spa%d_t%d.txt' % (spa, case) loc_path = base_path + '/Dataset/ws' values_path = base_path + '/Dataset/dae_values/spa%d' % (spa) mf_values_path = base_path + '/Dataset/mf_baseline_values/spa%d' % (spa) print('开始实验,稀疏度=%d,case=%d' % (spa, case)) print('加载训练数据开始') now = time.time() trdata = np.loadtxt(train_data, dtype=float) n = np.alen(trdata) print('加载训练数据完成,耗时 %.2f秒,数据总条数%d \n' % ((time.time() - now), n)) print('转换数据到矩阵开始') tnow = time.time() u = trdata[:, 0] s = trdata[:, 1] u = np.array(u, int) s = np.array(s, int) R = np.full(us_shape, NoneValue, float) R[u, s] = trdata[:, 2] del trdata, u, s print('转换数据到矩阵结束,耗时 %.2f秒 \n' % ((time.time() - tnow))) print('预处理数据开始') tnow = time.time() Preprocess.removeNoneValue(R) oriR = R.copy() ############################ # 矩阵分解填补预处理 mean = np.sum(R) / np.count_nonzero(R) mf = MF_bl(R.shape, f, mean) mf.preloadValues(mf_values_path) ############################ Preprocess.preprocessMF_rat(R, mf, isUAE=False, rat=cmp_rat) print(np.sum(R - oriR)) R /= 20.0 oriR /= 20.0 print('预处理数据结束,耗时 %.2f秒 \n' % ((time.time() - tnow))) print('加载地理位置信息开始') tnow = time.time() if isICF: loc_path += '/ws_info.txt' else: loc_path += '/user_info.txt' global loc_tab loc_tab = loadLocation(loc_path) print('加载地理位置信息完成,耗时 %.2f秒,数据总条数%d \n' % ((time.time() - tnow), len(loc_tab))) print('训练模型开始') tnow = time.time() tx = us_shape[0] if isUserAutoEncoder: tx = us_shape[1] encoder = BPAE.DenoiseAutoEncoder(tx, hidden_node, actfunc1, deactfunc1, actfunc1, deactfunc1, check_none) if not isUserAutoEncoder: R = R.T if loadvalues and encoder.exisValues(values_path): encoder.preloadValues(values_path) if continue_train: encoder.train(R, oriR, learn_param, repeat, None) encoder.saveValues(values_path) # R = oriR; PR = encoder.calFill(R) print(R) print() print(PR) print() ############# PR 还原处理 ############### PR = PR * 20.0 R = R * 20 oriR = oriR * 20 PR = np.where(R != NoneValue, R, PR) print(PR) if not isUserAutoEncoder: PR = PR.T R = R.T ############# PR 还原处理 ############### print('训练模型结束,耗时 %.2f秒 \n' % ((time.time() - tnow))) global W, S print('计算相似度矩阵开始') tnow = time.time() oR = R R = PR if isICF: R = R.T if readWcache and os.path.exists(W_path): W = np.loadtxt(W_path, np.float64) else: for i in range(axis0 - 1): if i % 50 == 0: print('----->step%d' % (i)) for j in range(i + 1, axis0): ws = 0.0 ws += np.sum((R[i, :] - R[j, :])**2) W[i, j] = W[j, i] = 1.0 / math.exp(np.sqrt(ws / axis1)) # origin W[i,j]=W[j,i]=1.0/(ws ** (1.0/p)+1.0); # W[i,j]=W[j,i]=1.0/( ((ws/cot) ** (1.0/p))+1.0); # W[i,j]=W[j,i]= 1.0/math.exp(((ws) ** (1.0/p))/cot); np.savetxt(W_path, W, '%.30f') print('计算相似度矩阵结束,耗时 %.2f秒 \n' % ((time.time() - tnow))) print('生成相似列表开始') tnow = time.time() S = np.argsort(-W)[:, 0:k] print('生成相似列表开始结束,耗时 %.2f秒 \n' % ((time.time() - tnow))) print('加载测试数据开始') tnow = time.time() trdata = np.loadtxt(test_data, dtype=float) n = np.alen(trdata) print('加载测试数据完成,耗时 %.2f秒,数据总条数%d \n' % ((time.time() - tnow), n)) print('评测开始') tnow = time.time() mae = 0.0 rmse = 0.0 cot = 0 # print('oR',oR); # print('R',R); for tc in trdata: if tc[2] <= 0: continue rt = predict(int(tc[0]), int(tc[1]), R, W, S) mae += abs(rt - tc[2]) rmse += (rt - tc[2])**2 cot += 1 mae = mae * 1.0 / cot rmse = np.sqrt(rmse / cot) print('评测完成,耗时 %.2f秒\n' % ((time.time() - tnow))) print('实验结束,总耗时 %.2f秒,稀疏度=%d,MAE=%.6f,RMSE=%.6f\n' % ((time.time() - now), spa, mae, rmse)) print(W)
def encoder_run(spa): train_data = base_path + '/Dataset/ws/train_n/sparseness%d/training%d.txt' % ( spa, case) test_data = base_path + '/Dataset/ws/test_n/sparseness%d/test%d.txt' % ( spa, case) W_path = base_path + '/Dataset/ws/BP_CF_W_spa%d_t%d.txt' % (spa, case) loc_path = base_path + '/Dataset/ws' values_path = base_path + '/Dataset/ae_values_space/spa%d' % (spa) mf_values_path = base_path + '/Dataset/mf_baseline_values/spa%d' % (spa) print('开始实验,稀疏度=%d,case=%d' % (spa, case)) print('加载训练数据开始') now = time.time() trdata = np.loadtxt(train_data, dtype=float) n = np.alen(trdata) print('加载训练数据完成,耗时 %.2f秒,数据总条数%d \n' % ((time.time() - now), n)) print('转换数据到矩阵开始') tnow = time.time() u = trdata[:, 0] s = trdata[:, 1] u = np.array(u, int) s = np.array(s, int) R = np.full(us_shape, NoneValue, float) R[u, s] = trdata[:, 2] del trdata, u, s print('转换数据到矩阵结束,耗时 %.2f秒 \n' % ((time.time() - tnow))) print('预处理数据开始') tnow = time.time() Preprocess.removeNoneValue(R) oriR = R.copy() ############################ # 矩阵分解填补预处理 mean = np.sum(R) / np.count_nonzero(R) mf = MF_bl(R.shape, f, mean) mf.preloadValues(mf_values_path) ############################ # Preprocess.preprocessMF_random_replace(R,mf,rat=cmp_rat); Preprocess.preprocess(R) print(np.sum(R - oriR)) R /= 20.0 oriR /= 20.0 print('预处理数据结束,耗时 %.2f秒 \n' % ((time.time() - tnow))) print('加载地理位置信息开始') tnow = time.time() if isICF: loc_path += '/ws_info.txt' else: loc_path += '/user_info.txt' global loc_tab loc_tab = loadLocation(loc_path) print('加载地理位置信息完成,耗时 %.2f秒,数据总条数%d \n' % ((time.time() - tnow), len(loc_tab))) print('训练模型开始') tnow = time.time() tx = us_shape[0] if isUserAutoEncoder: tx = us_shape[1] encoder = BPAE.BPAutoEncoder(tx, hidden_node, actfunc1, deactfunc1, actfunc1, deactfunc1, check_none) if not isUserAutoEncoder: R = R.T oriR = oriR.T if loadvalues and encoder.exisValues(values_path): encoder.preloadValues(values_path) if continue_train: encoder.train(R, learn_param, repeat, None) encoder.saveValues(values_path) # R = oriR; PR = encoder.calFill(R) # R = oriR; print(R) print() print(PR) print() ############# PR 还原处理 ############### PR = PR * 20.0 R = R * 20 PR = np.where(R != NoneValue, R, PR) print(PR) if not isUserAutoEncoder: PR = PR.T R = R.T ############# PR 还原处理 ############### print('训练模型开始结束,耗时 %.2f秒 \n' % ((time.time() - tnow))) print('加载测试数据开始') tnow = time.time() trdata = np.loadtxt(test_data, dtype=float) n = np.alen(trdata) print('加载测试数据完成,耗时 %.2f秒,数据总条数%d \n' % ((time.time() - tnow), n)) print('评测开始') tnow = time.time() mae = 0.0 rmse = 0.0 cot = 0 ana = np.zeros(us_shape) R_ana = np.zeros(us_shape) for tc in trdata: if tc[2] <= 0: continue u = int(tc[0]) s = int(tc[1]) rt = PR[u, s] t = abs(rt - tc[2]) mae += t ana[u, s] = t R_ana[u, s] = tc[2] rmse += (rt - tc[2])**2 cot += 1 mae = mae * 1.0 / cot rmse = np.sqrt(rmse / cot) list_ana = ana.reshape((-1, )) ind = np.argsort(-list_ana)[:1000] ana_sorted = list_ana[ind] arg_list = [[int(i / us_shape[1]), int(i % us_shape[1])] for i in ind] ori_list = [R_ana[i[0], i[1]] for i in arg_list] np.savetxt(values_path + '/test_ana_value.txt', np.array(ana_sorted), '%.6f') np.savetxt(values_path + '/test_ana_ind.txt', np.array(arg_list), '%d') np.savetxt(values_path + '/test_ana_ori_value.txt', np.array(ori_list), '%.6f') print('评测完成,耗时 %.2f秒\n' % ((time.time() - tnow))) print('实验结束,总耗时 %.2f秒,稀疏度=%d,MAE=%.6f,RMSE=%.6f\n' % ((time.time() - now), spa, mae, rmse)) print(W) print(S)
def encoder_run(spa): train_data = base_path + '/Dataset/ws/train_n/sparseness%d/training%d.txt' % ( spa, case) test_data = base_path + '/Dataset/ws/test_n/sparseness%d/test%d.txt' % ( spa, case) W_path = base_path + '/Dataset/ws/BP_CF_W_spa%d_t%d.txt' % (spa, case) SW_path = base_path + '/Dataset/ws/BP_CF_SW_spa%d_t%d.txt' % (spa, case) loc_path = base_path + '/Dataset/ws' values_path = base_path + '/Dataset/dae_values/spa%d' % (spa) mf_values_path = base_path + '/Dataset/mf_baseline_values/spa%d' % (spa) print('开始实验,稀疏度=%d,case=%d' % (spa, case)) print('加载训练数据开始') now = time.time() trdata = np.loadtxt(train_data, dtype=float) n = np.alen(trdata) print('加载训练数据完成,耗时 %.2f秒,数据总条数%d \n' % ((time.time() - now), n)) print('转换数据到矩阵开始') tnow = time.time() u = trdata[:, 0] s = trdata[:, 1] u = np.array(u, int) s = np.array(s, int) R = np.full(us_shape, NoneValue, float) R[u, s] = trdata[:, 2] del trdata, u, s print('转换数据到矩阵结束,耗时 %.2f秒 \n' % ((time.time() - tnow))) print('预处理数据开始') tnow = time.time() Preprocess.removeNoneValue(R) oriR = R.copy() ############################ # 矩阵分解填补预处理 mean = np.sum(R) / np.count_nonzero(R) mf = MF_bl(R.shape, f, mean) mf.preloadValues(mf_values_path) # 填补处理 Preprocess.preprocessMF_rat(R, mf, isUAE=False, rat=cmp_rat) ############################ print(np.sum(R - oriR)) R /= 20.0 # 归一化 oriR /= 20.0 print('预处理数据结束,耗时 %.2f秒 \n' % ((time.time() - tnow))) print('加载地理位置信息开始') tnow = time.time() if isICF: loc_path += '/ws_info.txt' else: loc_path += '/user_info.txt' global loc_tab loc_tab = loadLocation(loc_path) print('加载地理位置信息完成,耗时 %.2f秒,数据总条数%d \n' % ((time.time() - tnow), len(loc_tab))) print('训练模型开始') tnow = time.time() tx = us_shape[0] if isUserAutoEncoder: tx = us_shape[1] encoder = BPAE.DenoiseAutoEncoder(tx, hidden_node, actfunc1, deactfunc1, actfunc1, deactfunc1) if not isUserAutoEncoder: R = R.T oriR = oriR.T if loadvalues and encoder.exisValues(values_path): encoder.preloadValues(values_path) if continue_train: encoder.train(R, oriR, learn_param, repeat, None) encoder.saveValues(values_path) # R = oriR; PR = encoder.calFill(R) # print(R); # print(); # print(PR); # print(); ############# PR 还原处理 ############### PR = PR * 20.0 R = R * 20 oriR = oriR * 20 PR = np.where(R != NoneValue, R, PR) if not isUserAutoEncoder: PR = PR.T R = R.T oriR = oriR.T ############# PR 还原处理 ############### print('训练模型结束,耗时 %.2f秒 \n' % ((time.time() - tnow))) print('随机删除开始') tnow = time.time() Preprocess.random_empty(PR, cut_rate) print('随机删除开始,耗时 %.2f秒 \n' % ((time.time() - tnow))) ### oriR 原始US矩阵 ### R 经过MF处理的US矩阵 ### PR 经过随机删除的US 预测矩阵 print('生成原矩阵分析开始') tnow = time.time() ## U-S 部分 us_ana = get_oriR_ana(oriR) print('us - ana ') # ## S-U 部分 # su_ana = get_oriR_ana(oriR.T); print('生成原矩阵结束,耗时 %.2f秒 \n' % ((time.time() - tnow))) print('生成特征权重向量开始') tnow = time.time() feat_cout = np.count_nonzero(oriR, axis=0) med = np.median(feat_cout) feat_w_us = np.exp((med - feat_cout) / w_d) # feat_w_us=np.exp(np.log2(med-feat_cout)); feat_cout = np.count_nonzero(oriR, axis=1) med = np.median(feat_cout) feat_w_su = np.exp((med - feat_cout) / sw_d) print('生成特征权重向量结束,耗时 %.2f秒 \n' % ((time.time() - tnow))) print('计算相似度矩阵开始') tnow = time.time() mf_R = R # U-CF R = PR bat_size, feat_size = R.shape W = np.zeros((bat_size, bat_size)) show_step = int(bat_size / 100) if readWcache and os.path.exists(W_path) and False: del W W = np.loadtxt(W_path, np.float64) else: for i in range(bat_size - 1): if i % 30 == 0: print('----->u-cf step%d' % (i)) a = R[i, :] for j in range(i + 1, bat_size): b = R[j, :] log_and = (a != 0) & (b != 0) ws = np.zeros_like(a) ana_chp = us_ana[i][j - i - 1] for indexk in range(3): tmp = log_and & ana_chp[indexk] ws+=np.subtract(a,b,out=np.zeros_like(a),where=tmp) \ * ana_chp[indexk+3] ws = ws * feat_w_us ws = np.sum(ws**2) W[i, j] = W[j, i] = 1.0 / math.exp(np.sqrt(ws / feat_size)) # W[i,j]=W[j,i]= 1.0/(1+np.sqrt(ws/feat_size)); np.savetxt(W_path, W, '%.12f') # S-CF R = PR.T bat_size, feat_size = R.shape SW = np.zeros((bat_size, bat_size)) show_step = int(bat_size / 100) if readWcache and os.path.exists(SW_path): del SW SW = np.loadtxt(SW_path, np.float64) else: for i in range(bat_size - 1): if i % show_step == 0: print('----->s-cf step%d' % (i)) a = R[i, :] for j in range(i + 1, bat_size): b = R[j, :] log_and = (a != 0) & (b != 0) ws = np.zeros_like(a) ana_chp = get_ana_item(R.shape, a, b) for indexk in range(3): tmp = log_and & ana_chp[indexk] ws+=np.subtract(a,b,out=np.zeros_like(a),where=tmp) \ * ana_chp[indexk+3] ws = ws * feat_w_su ws = np.sum(ws**2) SW[i, j] = SW[j, i] = 1.0 / math.exp(np.sqrt(ws / feat_size)) np.savetxt(SW_path, SW, '%.12f') R = PR print('计算相似度矩阵结束,耗时 %.2f秒 \n' % ((time.time() - tnow))) print('生成相似列表开始') tnow = time.time() S = np.argsort(-W)[:, 0:k] SS = np.argsort(-SW)[:, 0:k] print('生成相似列表开始结束,耗时 %.2f秒 \n' % ((time.time() - tnow))) print('加载测试数据开始') tnow = time.time() trdata = np.loadtxt(test_data, dtype=float) n = np.alen(trdata) print('加载测试数据完成,耗时 %.2f秒,数据总条数%d \n' % ((time.time() - tnow), n)) print('评测开始') tnow = time.time() mae = 0.0 rmse = 0.0 cot = 0 # print('oR',oR); # print('R',R); SR = R.T for tc in trdata: if tc[2] <= 0: continue urt = predict(int(tc[0]), int(tc[1]), R, W, S) srt = predict(int(tc[1]), int(tc[0]), SR, SW, SS) rt = cf_w * urt + (1 - cf_w) * srt mae += abs(rt - tc[2]) rmse += (rt - tc[2])**2 cot += 1 mae = mae * 1.0 / cot rmse = np.sqrt(rmse / cot) print('评测完成,耗时 %.2f秒\n' % ((time.time() - tnow))) print('实验结束,总耗时 %.2f秒,稀疏度=%d,MAE=%.6f,RMSE=%.6f\n' % ((time.time() - now), spa, mae, rmse))
def encoder_run(spa): train_data = base_path+'/Dataset/ws/train_n/sparseness%d/training%d.txt'%(spa,case); test_data = base_path+'/Dataset/ws/test_n/sparseness%d/test%d.txt'%(spa,case); W_path = base_path+'/Dataset/ws/BP_CF_W_spa%d_t%d.txt'%(spa,case); loc_path = base_path+'/Dataset/ws'; values_path=base_path+'/Dataset/dae_values/spa%d'%(spa); mf_values_path=base_path+'/Dataset/mf_baseline_values/spa%d'%(spa); print('开始实验,稀疏度=%d,case=%d'%(spa,case)); print ('加载训练数据开始'); now = time.time(); trdata = np.loadtxt(train_data, dtype=float); n = np.alen(trdata); print ('加载训练数据完成,耗时 %.2f秒,数据总条数%d \n'%((time.time() - now),n)); print ('转换数据到矩阵开始'); tnow = time.time(); u = trdata[:,0]; s = trdata[:,1]; u = np.array(u,int); s = np.array(s,int); R = np.full(us_shape, NoneValue, float); R[u,s]=trdata[:,2]; del trdata,u,s; print ('转换数据到矩阵结束,耗时 %.2f秒 \n'%((time.time() - tnow))); print ('预处理数据开始'); tnow = time.time(); Preprocess.removeNoneValue(R); oriR = R.copy(); ############################ # 矩阵分解填补预处理 mean = np.sum(R)/np.count_nonzero(R); mf = MF_bl(R.shape,f,mean); mf.preloadValues(mf_values_path); # 填补处理 Preprocess.preprocessMF_rat(R,mf,isUAE=False,rat=cmp_rat); ############################ print(np.sum(R-oriR)); R/=20.0;# 归一化 oriR/=20.0; print ('预处理数据结束,耗时 %.2f秒 \n'%((time.time() - tnow))); print ('加载地理位置信息开始'); tnow = time.time(); if isICF: loc_path+='/ws_info.txt'; else: loc_path+='/user_info.txt'; global loc_tab; loc_tab = loadLocation(loc_path); print ('加载地理位置信息完成,耗时 %.2f秒,数据总条数%d \n'%((time.time() - tnow),len(loc_tab))); print ('训练模型开始'); tnow = time.time(); tx = us_shape[0]; if isUserAutoEncoder: tx = us_shape[1]; encoder = BPAE.DenoiseAutoEncoder(tx,hidden_node, actfunc1,deactfunc1, actfunc1,deactfunc1); if not isUserAutoEncoder: R = R.T; oriR =oriR.T; if loadvalues and encoder.exisValues(values_path): encoder.preloadValues(values_path); if continue_train: encoder.train(R, oriR,learn_param, repeat,None); encoder.saveValues(values_path); # R = oriR; PR = encoder.calFill(R); # print(R); # print(); # print(PR); # print(); ############# PR 还原处理 ############### PR = PR * 20.0; R = R * 20; oriR=oriR*20; PR = np.where(R!=NoneValue,R,PR); if not isUserAutoEncoder: PR = PR.T; R = R.T; oriR =oriR.T; ############# PR 还原处理 ############### print ('训练模型结束,耗时 %.2f秒 \n'%((time.time() - tnow))); print ('随机删除开始'); tnow = time.time(); Preprocess.random_empty(PR, cut_rate); print ('随机删除开始,耗时 %.2f秒 \n'%((time.time() - tnow))); ### oriR 原始US矩阵 ### R 经过MF处理的US矩阵 ### PR 经过随机删除的US 预测矩阵 print ('生成原矩阵分析开始'); tnow = time.time(); b_s,f_s = us_shape; us_ana = [[] for _ in range(b_s)]; for i in range(b_s-1): a = oriR[i,:]; a_not_none = a!=NoneValue; a_is_none = a==NoneValue; for j in range(i+1,b_s): b = oriR[j,:]; all_have = (b!=NoneValue) & a_not_none; none_have =(b==NoneValue) & a_is_none; any_have = np.logical_not(all_have | none_have); # all_p = np.exp(-1.0*np.count_nonzero(all_have)/f_s); # non_p = np.exp(-1.0*np.count_nonzero(none_have)/f_s); # any_p = np.exp(-1.0*np.count_nonzero(any_have)/f_s); all_p = 1/(np.count_nonzero(all_have)/f_s); non_p = 1/(np.count_nonzero(none_have)/f_s); any_p = 1/(np.count_nonzero(any_have)/f_s); us_ana[i].append([all_have,none_have,any_have,all_p,non_p,any_p]); # us_ana[i].append([all_have,none_have,any_have,150.0,30.0,0.001]); # print(len(us_ana[i])); print ('生成原矩阵结束,耗时 %.2f秒 \n'%((time.time() - tnow))); print ('生成特征权重向量开始'); tnow = time.time(); feat_cout=np.count_nonzero(oriR,axis=0); med = np.median(feat_cout); feat_w=np.exp((med-feat_cout)/w_d); print ('生成特征权重向量结束,耗时 %.2f秒 \n'%((time.time() - tnow))); print ('计算相似度矩阵开始'); tnow = time.time(); mf_R = R; R=PR; # U-CF bat_size,feat_size = R.shape; W = np.zeros((bat_size,bat_size)); show_step = int(bat_size/100); if readWcache and os.path.exists(W_path): del W; W = np.loadtxt(W_path, np.float64); else: for i in range(bat_size-1): if i%show_step ==0: print('----->step%d'%(i)); a = R[i,:]; for j in range(i+1,bat_size): b = R[j,:]; log_and = (a!=0) & (b!=0); # print([i,j]); #################################### ws = np.zeros_like(a); ana_chp= us_ana[i][j-i-1]; for indexk in range(3): tmp = log_and & ana_chp[indexk]; ws+=np.subtract(a,b,out=np.zeros_like(a),where=tmp) \ * ana_chp[indexk+3]; ws=ws*feat_w; ws=np.sum(ws**2); ##################################### # ws=0.0; # ana_chp= us_ana[i][j-i-1]; # deta = np.subtract(a,b,out=np.zeros_like(a), # where=log_and) # for indexk in range(3): # tmp = log_and & ana_chp[indexk]; # ws+=np.multiply(deta,ana_chp[indexk+3],out=np.zeros_like(a),where=tmp); # ws=np.sum(ws**2); #################################### # deta = np.subtract(a,b,out=np.zeros_like(a), # where=log_and) # ws = np.sum(deta**2); ################################### W[i,j]=W[j,i]= 1.0/math.exp(np.sqrt(ws/feat_size)); # W[i,j]=W[j,i]= 1.0/(1+np.sqrt(ws/feat_size)); np.savetxt(W_path,W,'%.30f'); print ('计算相似度矩阵结束,耗时 %.2f秒 \n'%((time.time() - tnow))); print ('生成相似列表开始'); tnow = time.time(); S = np.argsort(-W)[:,0:k]; print ('生成相似列表开始结束,耗时 %.2f秒 \n'%((time.time() - tnow))); print ('加载测试数据开始'); tnow = time.time(); trdata = np.loadtxt(test_data, dtype=float); n = np.alen(trdata); print ('加载测试数据完成,耗时 %.2f秒,数据总条数%d \n'%((time.time() - tnow),n)); print ('评测开始'); tnow = time.time(); mae=0.0;rmse=0.0;cot=0; # print('oR',oR); # print('R',R); for tc in trdata: if tc[2]<=0: continue; rt = predict(int(tc[0]),int(tc[1]),R,W,S); mae+=abs(rt-tc[2]); rmse+=(rt-tc[2])**2; cot+=1; mae = mae * 1.0 / cot; rmse= np.sqrt(rmse/cot); print ('评测完成,耗时 %.2f秒\n'%((time.time() - tnow))); print('实验结束,总耗时 %.2f秒,稀疏度=%d,MAE=%.6f,RMSE=%.6f\n'%((time.time()-now),spa,mae,rmse));
def encoder_run(spa): global last_w_path,tmp_W,tmp_SW; train_data = base_path+'/Dataset/ws/train_n/sparseness%.1f/training%d.txt'%(spa,case); test_data = base_path+'/Dataset/ws/test_n/sparseness%.1f/test%d.txt'%(spa,case); W_path = base_path+'/Dataset/ws/BP_CF_W_spa%.1f_t%d.txt'%(spa,case); SW_path = base_path+'/Dataset/ws/BP_CF_SW_spa%.1f_t%d.txt'%(spa,case); loc_path = base_path+'/Dataset/ws'; values_path=base_path+'/Dataset/dae_values/spa%.1f_case%d'%(spa,case); mf_values_path=base_path+'/Dataset/mf_baseline_values/spa%.1f_case%d'%(spa,case); print('开始实验,稀疏度=%d,case=%d'%(spa,case)); print ('加载训练数据开始'); now = time.time(); trdata = np.loadtxt(train_data, dtype=float); n = np.alen(trdata); print ('加载训练数据完成,耗时 %.2f秒,数据总条数%d \n'%((time.time() - now),n)); print ('转换数据到矩阵开始'); tnow = time.time(); u = trdata[:,0]; s = trdata[:,1]; u = np.array(u,int); s = np.array(s,int); R = np.full(us_shape, NoneValue, float); R[u,s]=trdata[:,2]; del trdata,u,s; print ('转换数据到矩阵结束,耗时 %.2f秒 \n'%((time.time() - tnow))); print ('预处理数据开始'); tnow = time.time(); Preprocess.removeNoneValue(R); oriR = R.copy(); ############################ # 矩阵分解填补预处理 mean = np.sum(R)/np.count_nonzero(R); mf = MF_bl(R.shape,f,mean); print(mf_values_path) mf.preloadValues(mf_values_path); # 填补处理 cmp_rat = out_cmp_rat(spa); print(cmp_rat); Preprocess.preprocessMF_rat(R,mf,isUAE=False,rat=cmp_rat); ############################ print(np.sum(R-oriR)); R/=20.0;# 归一化 oriR/=20.0; print ('预处理数据结束,耗时 %.2f秒 \n'%((time.time() - tnow))); print ('加载地理位置信息开始'); tnow = time.time(); if isICF: loc_path+='/ws_info.txt'; else: loc_path+='/user_info.txt'; global loc_tab; loc_tab = loadLocation(loc_path); print ('加载地理位置信息完成,耗时 %.2f秒,数据总条数%d \n'%((time.time() - tnow),len(loc_tab))); print ('训练模型开始'); tnow = time.time(); tx = us_shape[0]; if isUserAutoEncoder: tx = us_shape[1]; encoder = BPAE.DenoiseAutoEncoder(tx,hidden_node, actfunc1,deactfunc1, actfunc1,deactfunc1); if not isUserAutoEncoder: R = R.T; oriR =oriR.T; if loadvalues and encoder.exisValues(values_path): encoder.preloadValues(values_path); if continue_train: encoder.train(R, oriR,learn_param, repeat,None); encoder.saveValues(values_path); # R = oriR; PR = encoder.calFill(R); # print(R); # print(); # print(PR); # print(); ############# PR 还原处理 ############### PR = PR * 20.0; R = R * 20; oriR=oriR*20; PR = np.where(R!=NoneValue,R,PR); if not isUserAutoEncoder: PR = PR.T; R = R.T; oriR =oriR.T; ############# PR 还原处理 ############### print ('训练模型结束,耗时 %.2f秒 \n'%((time.time() - tnow))); print ('随机删除开始'); tnow = time.time(); Preprocess.random_empty(PR, cut_rate); print ('随机删除开始,耗时 %.2f秒 \n'%((time.time() - tnow))); ### oriR 原始US矩阵 ### R 经过MF处理的US矩阵 ### PR 经过随机删除的US 预测矩阵 print ('生成原矩阵分析开始'); tnow = time.time(); ## U-S 部分 us_ana = get_oriR_ana(oriR); print('us - ana ') # ## S-U 部分 # su_ana = get_oriR_ana(oriR.T); print ('生成原矩阵结束,耗时 %.2f秒 \n'%((time.time() - tnow))); print ('生成特征权重向量开始'); tnow = time.time(); feat_cout=np.count_nonzero(oriR,axis=0); med = np.median(feat_cout); feat_w_us=np.exp((med-feat_cout)/w_d); # feat_w_us=np.exp(np.log2(med-feat_cout)); feat_cout=np.count_nonzero(oriR,axis=1); med = np.median(feat_cout); feat_w_su=np.exp((med-feat_cout)/sw_d); print ('生成特征权重向量结束,耗时 %.2f秒 \n'%((time.time() - tnow))); print ('计算相似度矩阵开始'); tnow = time.time(); mf_R = R; if readWcache and (last_w_path != W_path): last_w_path = W_path; tmp_W = np.loadtxt(W_path, np.float64); tmp_SW = np.loadtxt(SW_path, np.float64); # U-CF R=PR; bat_size,feat_size = R.shape; W = np.zeros((bat_size,bat_size)); show_step = int(bat_size/100); if readWcache and os.path.exists(W_path) : W = tmp_W; else: for i in range(bat_size-1): if i%60 ==0: print('----->u-cf step%d'%(i)); a = R[i,:]; for j in range(i+1,bat_size): b = R[j,:]; log_and = (a!=0) & (b!=0); ws = np.zeros_like(a); ana_chp= us_ana[i][j-i-1]; for indexk in range(3): tmp = log_and & ana_chp[indexk]; ws+=np.subtract(a,b,out=np.zeros_like(a),where=tmp) \ * ana_chp[indexk+3]; ws=ws*feat_w_us; ws=np.sum(ws**2); W[i,j]=W[j,i]= 1.0/math.exp(np.sqrt(ws/feat_size)); # W[i,j]=W[j,i]= 1.0/(1+np.sqrt(ws/feat_size)); np.savetxt(W_path,W,'%.12f'); # S-CF R=PR.T; bat_size,feat_size = R.shape; SW = np.zeros((bat_size,bat_size)); show_step = 500; if readWcache and os.path.exists(SW_path): SW = tmp_SW; else: for i in range(bat_size-1): if i%show_step ==0: print('----->s-cf step%d'%(i)); a = R[i,:]; oria = oriR[:,i]; for j in range(i+1,bat_size): b = R[j,:]; orib = oriR[:,j]; log_and = (a!=0) & (b!=0); ws = np.zeros_like(a); ana_chp= get_ana_item(R.shape,oria,orib); for indexk in range(3): tmp = log_and & ana_chp[indexk]; ws+=np.subtract(a,b,out=np.zeros_like(a),where=tmp) \ * ana_chp[indexk+3]; ws=ws*feat_w_su; ws=np.sum(ws**2); SW[i,j]=SW[j,i]= 1.0/math.exp(np.power(ws/feat_size,1.0/3)); np.savetxt(SW_path,SW,'%.12f'); R = PR; print ('计算相似度矩阵结束,耗时 %.2f秒 \n'%((time.time() - tnow))); print ('生成相似列表开始'); tnow = time.time(); # k = get_cf_k(spa); # sk = get_cf_sk(spa); S = np.argsort(-W)[:,0:k]; SS = np.argsort(-SW)[:,0:sk]; print ('生成相似列表开始结束,耗时 %.2f秒 \n'%((time.time() - tnow))); print ('加载测试数据开始'); tnow = time.time(); trdata = np.loadtxt(test_data, dtype=float); n = np.alen(trdata); print ('加载测试数据完成,耗时 %.2f秒,数据总条数%d \n'%((time.time() - tnow),n)); print ('评测开始'); tnow = time.time(); mae=0.0;rmse=0.0;cot=0; # print('oR',oR); # print('R',R); SR = R.T; for tc in trdata: if tc[2]<=0: continue; urt = predict(int(tc[0]),int(tc[1]),R,W,S); srt = predict(int(tc[1]),int(tc[0]),SR,SW,SS); rt = cf_w * urt + (1-cf_w) * srt; mae+=abs(rt-tc[2]); rmse+=(rt-tc[2])**2; cot+=1; mae = mae * 1.0 / cot; rmse= np.sqrt(rmse/cot); print ('评测完成,耗时 %.2f秒\n'%((time.time() - tnow))); print('实验结束,总耗时 %.2f秒,稀疏度=%d,MAE=%.6f,RMSE=%.6f\n'%((time.time()-now),spa,mae,rmse)); return mae,rmse;
def run_cf(spa): global R,W,S,sumS,loc_tab; train_data = base_path+'/Dataset/ws/train_n/sparseness%d/training%d.txt'%(spa,case); test_data = base_path+'/Dataset/ws/test_n/sparseness%d/test%d.txt'%(spa,case); W_path = base_path+'/Dataset/ws/BP_CF_W_spa%d_t%d.txt'%(spa,case); loc_path = base_path+'/Dataset/ws'; print('开始实验,isICF=%s,稀疏度=%d,case=%d'%(isICF,spa,case)); print ('加载训练数据开始'); now = time.time(); trdata = np.loadtxt(train_data, dtype=float); n = np.alen(trdata); print ('加载训练数据完成,耗时 %.2f秒,数据总条数%d \n'%((time.time() - now),n)); print ('加载地理位置信息开始'); tnow = time.time(); if isICF: loc_path+='/ws_info.txt'; else: loc_path+='/user_info.txt'; loc_tab = loadLocation(loc_path); n = np.alen(trdata); print ('加载地理位置信息完成,耗时 %.2f秒,数据总条数%d \n'%((time.time() - tnow),n)); print ('转换数据到矩阵开始'); tnow = time.time(); u = trdata[:,0]; s = trdata[:,1]; u = np.array(u,int); s = np.array(s,int); R = np.full(us_shape, NoneValue, float); R[u,s]=trdata[:,2]; if isICF: R = R.T; del trdata,u,s; print ('转换数据到矩阵结束,耗时 %.2f秒 \n'%((time.time() - tnow))); print ('计算相似度矩阵开始'); tnow = time.time(); i=0; if readWcache and os.path.exists(W_path): W = np.loadtxt(W_path, np.float128); else: for i in range(axis0): if i%50 ==0: print('----->step%d'%(i)) for j in range(axis0): a = R[i,:]; b = R[j,:]; alog = a!=NoneValue; blog = b!=NoneValue; delta = np.subtract(a,b,out=np.zeros_like(a),where=alog&blog); ws = np.sum(delta**2); W[i,j]= 1.0/math.exp(np.sqrt(ws)); for i in range(axis0): W[i,i]=0; np.savetxt(W_path,W,'%.30f'); print ('计算相似度矩阵结束,耗时 %.2f秒 \n'%((time.time() - tnow))); print ('生成相似列表开始'); tnow = time.time(); S = np.argsort(-W)[:,0:k]; for i in range(axis0): sumS[i] = np.sum(W[i,S[i]]); print ('生成相似列表开始结束,耗时 %.2f秒 \n'%((time.time() - tnow))); print ('加载测试数据开始'); tnow = time.time(); trdata = np.loadtxt(test_data, dtype=float); n = np.alen(trdata); print ('加载测试数据完成,耗时 %.2f秒,数据总条数%d \n'%((time.time() - tnow),n)); print ('评测开始'); tnow = time.time(); mae=0.0;rmse=0.0;cot=0; for tc in trdata: if tc[2]<=0: continue; rt = predict(int(tc[0]),int(tc[1])); mae+=abs(rt-tc[2]); rmse+=(rt-tc[2])**2; cot+=1; mae = mae * 1.0 / cot; rmse= sqrt(rmse/cot); print ('评测完成,耗时 %.2f秒\n'%((time.time() - tnow))); print('实验结束,总耗时 %.2f秒,isICF=%s,稀疏度=%d,MAE=%.3f,RMSE=%.3f\n'%((time.time()-now),isICF,spa,mae,rmse)); print('----------------------------------------------------------\n'); print(W); print(S);