def run(): ser_loc = localload.load(ser_info_path); ser_loc_m = localload.load_locmore(ser_info_more_path); user_class = localtools.load_classif(loc_class_for_user); R = np.loadtxt(origin_path,np.float); if os.path.isfile(loc_class_out): os.remove(loc_class_out); idx = np.where(R<0); R[idx]=0; user_mean = []; for uc in user_class: UR= R[uc]; ser_sum = np.sum(UR,axis=0); ser_cot = np.count_nonzero(UR, axis=0); uc_ser_mean = np.divide(ser_sum,ser_cot, out=np.zeros_like(ser_sum),where=ser_cot!=0); all_mean = np.sum(ser_sum)/np.sum(ser_cot); uc_ser_mean[np.where(ser_cot==0)] = all_mean; user_mean.append(uc_ser_mean); data=[]; names=[]; area=[]; k=6; for sid in range(5825): sn = ser_loc[sid][1]; names.append(sn); area.append(ser_loc_m[sn][0]) lc = []; lc.extend(ser_loc_m[sn][1]); for um in user_mean: lc.append(um[sid]); data.append(lc); data=np.array(data); cent,res = simple_km2(data,k,1); print(cent); print(res); for i in range(k): tmp=[]; tmp2=[]; for id in res[i]: if names[id] not in tmp2: tmp2.append(names[id]); tmp.append(area[id]); print(tmp) print(tmp2); print(); write2file(res); pass;
def run(): ser_loc = localload.load(ser_info_path) user_loc = localload.load_userinfo(user_info_path) ser_loc_m = localload.load_locmore(ser_info_more_path) user_class = localtools.load_classif(loc_class_for_user) R = np.loadtxt(origin_path, np.float) # os.remove(loc_class_out); idx = np.where(R > 0) u = idx[0].astype(np.int) s = idx[1].astype(np.int) dataize = len(u) data = [] names = [] area = [] k = 8 for sid in range(5825): sn = ser_loc[sid][1] names.append(sn) area.append(ser_loc_m[sn][0]) for did in range(dataize): sn = ser_loc[s[did]][1] cl = [] cl.extend(user_loc[u[did]][2]) cl.extend(ser_loc_m[sn][1]) cl.append(R[u[did], s[did]]) data.append(cl) data = np.array(data) cent, res = simple_km(data, k, 6) print(cent) # print(res); for i in range(k): tmp = [] tmp2 = [] for id in res[i]: if names[id] not in tmp2: tmp2.append(names[id]) tmp.append(area[id]) print(tmp) print(tmp2) print() write2file(res) pass
def mf_base_run(spa,case): train_data = base_path+'/Dataset/ws/train_n/sparseness%.1f/training%d.txt'%(spa,case); test_data = base_path+'/Dataset/ws/test_n/sparseness%.1f/test%d.txt'%(spa,case); values_path=base_path+'/Dataset/local_mf_baseline_values/spa%.1f_case%d'%(spa,case); loc_classes = base_path+'/Dataset/ws/ws_classif_out.txt'; print('开始实验,稀疏度=%.1f,case=%d'%(spa,case)); print ('加载训练数据开始'); now = time.time(); trdata = np.loadtxt(train_data, dtype=float); ser_class = localtools.load_classif(loc_classes); classiy_size = len(ser_class); n = np.alen(trdata); print ('加载训练数据完成,耗时 %.2f秒,数据总条数%d \n'%((time.time() - now),n)); print ('加载测试数据开始'); tnow = time.time(); ttrdata = np.loadtxt(test_data, dtype=float); n = np.alen(ttrdata); print ('加载测试数据完成,耗时 %.2f秒,数据总条数%d \n'%((time.time() - tnow),n)); print ('分类数据集开始'); tnow = time.time(); train_sets = localtools.data_split_class(ser_class, trdata); test_sets = localtools.data_split_class(ser_class, ttrdata); del trdata,ttrdata; print ('分类数据集结束,耗时 %.2f秒 \n'%((time.time() - tnow))); print ('预处理数据开始'); tnow = time.time(); means=preprocess(train_sets); # R = R/20.0 # print(mean,Iu_num,len(Iu_num)); print ('预处理数据结束,耗时 %.2f秒 \n'%((time.time() - tnow))); print ('训练模型开始'); tnow = time.time(); ttn = tnow; svdes = [MFS.MF_bl_loc(us_shape,f,means[i]) for i in range(classiy_size)]; if loadvalues: for i in range(classiy_size): vpp = values_path+'/class%d'%(i); svdes[i].preloadValues(vpp); if continue_train: for ep in range(epoch): for i in range(classiy_size): print ('类%d训练开始'%(i)); svdes[i].train_mat(train_sets[i], repeat,learn_rate,lamda,values_path); vpp = values_path+'/class%d'%(i); svdes[i].saveValues(vpp); print ('类%d训练结束,耗时 %.2f秒 \n'%(i,(time.time() - ttn))); ttn = time.time(); mae=0.0;rmse=0.0;cot=0; for i in range(classiy_size): for tc in test_sets[i]: if tc[2]<=0: continue; u = int(tc[0]); s = int(tc[1]); rt = svdes[i].predict(u,s); t =abs(rt-tc[2]); mae+=t; rmse+=(rt-tc[2])**2; cot+=1; mae = mae * 1.0 / cot; rmse= np.sqrt(rmse/cot); print ('-------->>>>ep=%d训练结束,mae=%f耗时 %.2f秒 \n'%(ep,mae,(time.time() - ttn))); print ('训练模型结束,耗时 %.2f秒 \n'%((time.time() - tnow))); print ('评测开始'); tnow = time.time(); mae=0.0;rmse=0.0;cot=0; for i in range(classiy_size): for tc in test_sets[i]: if tc[2]<=0: continue; u = int(tc[0]); s = int(tc[1]); rt = svdes[i].predict(u,s); t =abs(rt-tc[2]); mae+=t; rmse+=(rt-tc[2])**2; cot+=1; mae = mae * 1.0 / cot; rmse= np.sqrt(rmse/cot); print ('评测完成,耗时 %.2f秒\n'%((time.time() - tnow))); print('实验结束,总耗时 %.2f秒,稀疏度=%.1f,MAE=%.6f,RMSE=%.6f\n'%((time.time()-now),spa,mae,rmse));
def mf_base_run(spa, case): train_path = base_path + '/Dataset/ws/train_n/sparseness%.1f/training%d.txt' % ( spa, case) test_path = base_path + '/Dataset/ws/test_n/sparseness%.1f/test%d.txt' % ( spa, case) cache_path = 'value_cache/spa%d_case%d.ckpt' % (spa, case) result_file = 'result/ws_spa%.1f_case%d.txt' % (spa, case) dbug_paht = 'E:/work/Dataset/wst64/rtdata1.txt' loc_classes = base_path + '/Dataset/ws/localinfo/ws_classif_out_by_user.txt' print('开始实验,稀疏度=%.1f,case=%d' % (spa, case)) print('加载训练数据开始') now = time.time() trdata = np.loadtxt(train_path, dtype=float) user_class = localtools.load_classif(loc_classes) classiy_size = len(user_class) n = np.alen(trdata) print('加载训练数据完成,耗时 %.2f秒,数据总条数%d \n' % ((time.time() - now), n)) print('加载测试数据开始') tnow = time.time() ttrdata = np.loadtxt(test_path, dtype=float) tn = np.alen(ttrdata) print('加载测试数据完成,耗时 %.2f秒,数据总条数%d \n' % ((time.time() - tnow), tn)) print('分类数据集开始') tnow = time.time() train_sets = localtools.data_split_class_byuser(user_class, trdata) test_sets = localtools.data_split_class_byuser(user_class, ttrdata) del trdata, ttrdata print('分类数据集结束,耗时 %.2f秒 \n' % ((time.time() - tnow))) cp = NcfCreParam() tp = NcfTraParm() cp.us_shape = (339, 5825) cp.hid_feat = 32 cp.hid_units = [64, 32, 16] cp.drop_p = 0.00001 cp.reg_p = 0.0001 tp.train_data = train_sets tp.test_data = test_sets tp.epoch = 20 tp.batch_size = 5 tp.learn_rate = 0.007 tp.lr_decy_rate = 1.0 tp.lr_decy_step = int(n / tp.batch_size) tp.cache_rec_path = cache_path tp.result_file_path = result_file tp.load_cache_rec = False tp.classif_size = len(train_sets) print('训练模型开始') tnow = time.time() model = hyb_ncf_local(cp) model.train(tp) print('训练模型结束,耗时 %.2f秒 \n' % ((time.time() - tnow))) print('实验结束,总耗时 %.2f秒,稀疏度=%.1f\n' % ((time.time() - now), spa))
def run(): ser_loc = localload.load(ser_info_path) ser_loc_m = localload.load_locmore(ser_info_more_path) user_class = localtools.load_classif(loc_class_for_user) R = np.loadtxt(origin_path, np.float) idx = np.where(R < 0) R[idx] = 0 user_mean = [] for uc in user_class: UR = R[uc] ser_sum = np.sum(UR, axis=0) ser_cot = np.count_nonzero(UR, axis=0) uc_ser_mean = np.divide(ser_sum, ser_cot, out=np.zeros_like(ser_sum), where=ser_cot != 0) all_mean = np.sum(ser_sum) / np.sum(ser_cot) uc_ser_mean[np.where(ser_cot == 0)] = all_mean user_mean.append(uc_ser_mean) data = [] names = [] area = [] k = 6 di = 3 for sid in range(5825): sn = ser_loc[sid][1] names.append(sn) area.append(ser_loc_m[sn][0]) lc = [] lc.extend(ser_loc_m[sn][1]) for um in user_mean: lc.append(um[sid]) data.append(lc) data = np.array(data) # np.random.shuffle(data); cent, res, dis_rate = simple_km(data, k, di) print(cent) print(res) for i in range(k): tmp = [] tmp2 = [] for id in res[i]: if names[id] not in tmp2: tmp2.append(names[id]) tmp.append(area[id]) print(tmp) print(tmp2) print() # 计算类别距离 dis_rate = 1 / dis_rate print(dis_rate) print(np.sort(dis_rate, axis=1)) dis_rate = np.exp(dis_rate) dis_sum = np.sum(dis_rate, axis=0) dis_rate /= dis_sum print(dis_rate) print(np.sort(dis_rate, axis=1)) np.savetxt(loc_class_dis_rate_out, dis_rate, '%.8f') os.remove(loc_class_out) write2file(res) pass
def mf_base_run(spa, case): train_path = base_path + '/Dataset/ws/train_n/sparseness%.1f/training%d.txt' % ( spa, case) test_path = base_path + '/Dataset/ws/test_n/sparseness%.1f/test%d.txt' % ( spa, case) cache_path = 'value_cache/spa%d_case%d.ckpt' % (spa, case) result_file = 'result/ws_spa%.1f_case%d.txt' % (spa, case) dbug_paht = 'E:/work/Dataset/wst64/rtdata1.txt' loc_classes = base_path + '/Dataset/ws/ws_classif_out.txt' print('开始实验,稀疏度=%.1f,case=%d' % (spa, case)) print('加载训练数据开始') now = time.time() trdata = np.loadtxt(train_path, dtype=float) ser_class = localtools.load_classif(loc_classes) classiy_size = len(ser_class) n = np.alen(trdata) print('加载训练数据完成,耗时 %.2f秒,数据总条数%d \n' % ((time.time() - now), n)) print('加载测试数据开始') tnow = time.time() ttrdata = np.loadtxt(test_path, dtype=float) tn = np.alen(ttrdata) print('加载测试数据完成,耗时 %.2f秒,数据总条数%d \n' % ((time.time() - tnow), tn)) print('分类数据集开始') tnow = time.time() train_sets = localtools.data_split_class(ser_class, trdata) test_sets = localtools.data_split_class(ser_class, ttrdata) print('分类数据集结束,耗时 %.2f秒 \n' % ((time.time() - tnow))) cp = NcfCreParam() tp = NcfTraParm() cp.us_shape = (339, 5825) cp.hid_feat = 32 cp.hid_units = [64, 32, 16] cp.drop_p = 0.00001 cp.reg_p = 0.0001 # 处理用户访问服务记录 R = np.zeros(cp.us_shape) u = trdata[:, 0].astype(np.int32) s = trdata[:, 1].astype(np.int32) R[u, s] = 1.0 us_invked = [] for cla in ser_class: hot = np.zeros([cp.us_shape[1]], np.float32) hot[cla] = 1.0 usi = R * hot nonzeroes = np.sqrt(np.count_nonzero(usi, axis=1)) noz = np.divide(1.0, nonzeroes, out=np.zeros_like(nonzeroes), where=nonzeroes != 0) noz = np.reshape(noz, [-1, 1]) us_invked.append((usi * noz).astype(np.float32)) tp.train_data = train_sets tp.test_data = test_sets tp.epoch = 40 tp.batch_size = 5 tp.learn_rate = 0.007 tp.lr_decy_rate = 1.0 tp.lr_decy_step = int(n / tp.batch_size) tp.cache_rec_path = cache_path tp.result_file_path = result_file tp.load_cache_rec = False tp.classif_size = len(train_sets) tp.us_invked = us_invked print('训练模型开始') tnow = time.time() model = ncf_pp_local(cp) model.train(tp) print('训练模型结束,耗时 %.2f秒 \n' % ((time.time() - tnow))) print('实验结束,总耗时 %.2f秒,稀疏度=%.1f\n' % ((time.time() - now), spa))
def run(): ser_loc = localload.load(ser_info_path) ser_loc_m = localload.load_locmore(ser_info_more_path) user_class = localtools.load_classif(loc_class_for_user) R = np.loadtxt(origin_path, np.float) if os.path.isfile(loc_class_out): os.remove(loc_class_out) idx = np.where(R < 0) R[idx] = 0 user_mean = [] for uc in user_class: UR = R[uc] ser_sum = np.sum(UR, axis=0) ser_cot = np.count_nonzero(UR, axis=0) uc_ser_mean = np.divide(ser_sum, ser_cot, out=np.zeros_like(ser_sum), where=ser_cot != 0) all_mean = np.sum(ser_sum) / np.sum(ser_cot) uc_ser_mean[np.where(ser_cot == 0)] = all_mean user_mean.append(uc_ser_mean) data = [] names = [] area = [] k = 6 for sid in range(5825): sn = ser_loc[sid][1] names.append(sn) area.append(ser_loc_m[sn][0]) lc = [] lc.extend(ser_loc_m[sn][1]) # 添加ip lc.extend(ser_loc[sid][2][:2]) for um in user_mean: lc.append(um[sid]) data.append(lc) data = np.array(data) cent, res = simple_km2(data, k, 1) print(cent) print(res) for i in range(k): tmp = [] tmp2 = [] for id in res[i]: if names[id] not in tmp2: tmp2.append(names[id]) tmp.append(area[id]) print(tmp) print(tmp2) print() write2file(res) pass