def encoder_run(spa):
    train_data = base_path+'/Dataset/ws/train/sparseness%d/training%d.txt'%(spa,case);
    test_data = base_path+'/Dataset/ws/test/sparseness%d/test%d.txt'%(spa,case);
    W_path = base_path+'/Dataset/ws/BP_CF_W_spa%d_t%d.txt'%(spa,case);
    loc_path = base_path+'/Dataset/ws';   
    values_path=base_path+'/Dataset/ae_values2/spa%d'%(spa);
    
    print('开始实验,稀疏度=%d,case=%d'%(spa,case));
    print ('加载训练数据开始');
    now = time.time();
    trdata = np.loadtxt(train_data, dtype=float);
    n = np.alen(trdata);
    print ('加载训练数据完成,耗时 %.2f秒,数据总条数%d  \n'%((time.time() - now),n));
    
    print ('转换数据到矩阵开始');
    tnow = time.time();
    u = trdata[:,0];
    s = trdata[:,1];
    u = np.array(u,int);
    s = np.array(s,int);
    R = np.full(us_shape, NoneValue, float);
    R[u,s]=trdata[:,2];
    del trdata,u,s;
    print ('转换数据到矩阵结束,耗时 %.2f秒  \n'%((time.time() - tnow)));
    
    print ('预处理数据开始');
    tnow = time.time();
    R=preprocess(R);
    print ('预处理数据结束,耗时 %.2f秒  \n'%((time.time() - tnow)));
        
    print ('加载地理位置信息开始');
    tnow = time.time();
    if isICF:
        loc_path+='/ws_info.txt';
    else:
        loc_path+='/user_info.txt';
    global loc_tab;        
    loc_tab = loadLocation(loc_path);
    print ('加载地理位置信息完成,耗时 %.2f秒,数据总条数%d  \n'%((time.time() - tnow),len(loc_tab)));    
    
    
    print ('训练模型开始');
    tnow = time.time();
    tx = us_shape[0];
    if isUserAutoEncoder:
        tx = us_shape[1];
    encoder = BPAutoEncoder(tx,hidden_node,
                            actfunc1,deactfunc1,
                             actfunc1,deactfunc1,check_none);
    if isUserAutoEncoder:
        R = R.T;
    if loadvalues and encoder.exisValues(values_path):
        encoder.preloadValues(values_path);
    if continue_train:
        encoder.train(R, learn_rate, repeat,None);
        encoder.saveValues(values_path);
    PR = encoder.calFill(R);
    print(R);
    print();
    print(PR);
    print();
############# PR 还原处理   ###############
    PR = PR * 20.0;
    R = R * 20;
    for i in range(PR.shape[0]):
        for j in range(PR.shape[1]):
            if R[i,j]!=NoneValue:
                PR[i,j]=R[i,j];
    print(PR);
    
############# PR 还原处理   ###############        
    if isUserAutoEncoder:
        PR = PR.T;
        R = R.T;
    print ('训练模型开始结束,耗时 %.2f秒  \n'%((time.time() - tnow)));  


    global W,S;
    print ('计算相似度矩阵开始');
    tnow = time.time();
    oR = R;
    R=PR;
    if isICF:
        R = R.T;
    if readWcache and os.path.exists(W_path):   
        W = np.loadtxt(W_path, np.float128);
    else:
        for i in range(axis0-1):
            if i%50 ==0:
                print('----->step%d'%(i))
            for j in range(i+1,axis0):
                ws = 0.0;
                ws += np.sum((R[i,:]-R[j,:])**2);
                W[i,j]=W[j,i]= 1.0/math.exp(np.sqrt(ws/axis1));

                # origin W[i,j]=W[j,i]=1.0/(ws ** (1.0/p)+1.0);
                # W[i,j]=W[j,i]=1.0/( ((ws/cot) ** (1.0/p))+1.0);
                
                # W[i,j]=W[j,i]= 1.0/math.exp(((ws) ** (1.0/p))/cot);
        np.savetxt(W_path,W,'%.30f');                
    print ('计算相似度矩阵结束,耗时 %.2f秒  \n'%((time.time() - tnow)));


    print ('生成相似列表开始');
    tnow = time.time();
    S = np.argsort(-W)[:,0:k];            
    print ('生成相似列表开始结束,耗时 %.2f秒  \n'%((time.time() - tnow)));




    print ('加载测试数据开始');
    tnow = time.time();
    trdata = np.loadtxt(test_data, dtype=float);
    n = np.alen(trdata);
    print ('加载测试数据完成,耗时 %.2f秒,数据总条数%d  \n'%((time.time() - tnow),n));

    print ('评测开始');
    tnow = time.time();
    mae=0.0;rmse=0.0;cot=0;
    print('oR',oR);
    print('R',R);
    for tc in trdata:
        if tc[2]<=0:
            continue;
        rt = predict(int(tc[0]),int(tc[1]),R,W,S);
        mae+=abs(rt-tc[2]);
        rmse+=(rt-tc[2])**2;
        cot+=1;
    mae = mae * 1.0 / cot;
    rmse= np.sqrt(rmse/cot);
    print ('评测完成,耗时 %.2f秒\n'%((time.time() - tnow)));    

    print('实验结束,总耗时 %.2f秒,稀疏度=%d,MAE=%.6f,RMSE=%.6f\n'%((time.time()-now),spa,mae,rmse));


    print(W)
Пример #2
0
def encoder_run(spa):
    train_data = base_path + '/Dataset/ws/train_n/sparseness%d/training%d.txt' % (
        spa, case)
    test_data = base_path + '/Dataset/ws/test_n/sparseness%d/test%d.txt' % (
        spa, case)
    W_path = base_path + '/Dataset/ws/BP_CF_W_spa%d_t%d.txt' % (spa, case)
    loc_path = base_path + '/Dataset/ws'
    values_path = base_path + '/Dataset/dae_values/spa%d' % (spa)

    mf_values_path = base_path + '/Dataset/mf_baseline_values/spa%d' % (spa)

    print('开始实验,稀疏度=%d,case=%d' % (spa, case))
    print('加载训练数据开始')
    now = time.time()
    trdata = np.loadtxt(train_data, dtype=float)
    n = np.alen(trdata)
    print('加载训练数据完成,耗时 %.2f秒,数据总条数%d  \n' % ((time.time() - now), n))

    print('转换数据到矩阵开始')
    tnow = time.time()
    u = trdata[:, 0]
    s = trdata[:, 1]
    u = np.array(u, int)
    s = np.array(s, int)
    R = np.full(us_shape, NoneValue, float)
    R[u, s] = trdata[:, 2]
    del trdata, u, s
    print('转换数据到矩阵结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    print('预处理数据开始')
    tnow = time.time()
    Preprocess.removeNoneValue(R)
    oriR = R.copy()
    ############################
    # 矩阵分解填补预处理
    mean = np.sum(R) / np.count_nonzero(R)
    mf = MF_bl(R.shape, f, mean)
    mf.preloadValues(mf_values_path)

    ############################
    Preprocess.preprocessMF_rat(R, mf, isUAE=False, rat=cmp_rat)
    print(np.sum(R - oriR))
    R /= 20.0
    oriR /= 20.0
    print('预处理数据结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    print('加载地理位置信息开始')
    tnow = time.time()
    if isICF:
        loc_path += '/ws_info.txt'
    else:
        loc_path += '/user_info.txt'
    global loc_tab
    loc_tab = loadLocation(loc_path)
    print('加载地理位置信息完成,耗时 %.2f秒,数据总条数%d  \n' %
          ((time.time() - tnow), len(loc_tab)))

    print('训练模型开始')
    tnow = time.time()
    tx = us_shape[0]
    if isUserAutoEncoder:
        tx = us_shape[1]
    encoder = BPAE.DenoiseAutoEncoder(tx, hidden_node, actfunc1, deactfunc1,
                                      actfunc1, deactfunc1, check_none)
    if not isUserAutoEncoder:
        R = R.T
    if loadvalues and encoder.exisValues(values_path):
        encoder.preloadValues(values_path)
    if continue_train:
        encoder.train(R, oriR, learn_param, repeat, None)
        encoder.saveValues(values_path)

    # R = oriR;
    PR = encoder.calFill(R)
    print(R)
    print()
    print(PR)
    print()
    ############# PR 还原处理   ###############
    PR = PR * 20.0
    R = R * 20
    oriR = oriR * 20
    PR = np.where(R != NoneValue, R, PR)
    print(PR)
    if not isUserAutoEncoder:
        PR = PR.T
        R = R.T


############# PR 还原处理   ###############
    print('训练模型结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    global W, S
    print('计算相似度矩阵开始')
    tnow = time.time()
    oR = R
    R = PR
    if isICF:
        R = R.T
    if readWcache and os.path.exists(W_path):
        W = np.loadtxt(W_path, np.float64)
    else:
        for i in range(axis0 - 1):
            if i % 50 == 0:
                print('----->step%d' % (i))
            for j in range(i + 1, axis0):
                ws = 0.0
                ws += np.sum((R[i, :] - R[j, :])**2)
                W[i, j] = W[j, i] = 1.0 / math.exp(np.sqrt(ws / axis1))

                # origin W[i,j]=W[j,i]=1.0/(ws ** (1.0/p)+1.0);
                # W[i,j]=W[j,i]=1.0/( ((ws/cot) ** (1.0/p))+1.0);

                # W[i,j]=W[j,i]= 1.0/math.exp(((ws) ** (1.0/p))/cot);
        np.savetxt(W_path, W, '%.30f')
    print('计算相似度矩阵结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    print('生成相似列表开始')
    tnow = time.time()
    S = np.argsort(-W)[:, 0:k]
    print('生成相似列表开始结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    print('加载测试数据开始')
    tnow = time.time()
    trdata = np.loadtxt(test_data, dtype=float)
    n = np.alen(trdata)
    print('加载测试数据完成,耗时 %.2f秒,数据总条数%d  \n' % ((time.time() - tnow), n))

    print('评测开始')
    tnow = time.time()
    mae = 0.0
    rmse = 0.0
    cot = 0
    #     print('oR',oR);
    #     print('R',R);
    for tc in trdata:
        if tc[2] <= 0:
            continue
        rt = predict(int(tc[0]), int(tc[1]), R, W, S)
        mae += abs(rt - tc[2])
        rmse += (rt - tc[2])**2
        cot += 1
    mae = mae * 1.0 / cot
    rmse = np.sqrt(rmse / cot)
    print('评测完成,耗时 %.2f秒\n' % ((time.time() - tnow)))

    print('实验结束,总耗时 %.2f秒,稀疏度=%d,MAE=%.6f,RMSE=%.6f\n' %
          ((time.time() - now), spa, mae, rmse))

    print(W)
Пример #3
0
def encoder_run(spa):
    train_data = base_path + '/Dataset/ws/train_n/sparseness%d/training%d.txt' % (
        spa, case)
    test_data = base_path + '/Dataset/ws/test_n/sparseness%d/test%d.txt' % (
        spa, case)
    W_path = base_path + '/Dataset/ws/BP_CF_W_spa%d_t%d.txt' % (spa, case)
    loc_path = base_path + '/Dataset/ws'
    values_path = base_path + '/Dataset/ae_values_space/spa%d' % (spa)
    mf_values_path = base_path + '/Dataset/mf_baseline_values/spa%d' % (spa)

    print('开始实验,稀疏度=%d,case=%d' % (spa, case))
    print('加载训练数据开始')
    now = time.time()
    trdata = np.loadtxt(train_data, dtype=float)
    n = np.alen(trdata)
    print('加载训练数据完成,耗时 %.2f秒,数据总条数%d  \n' % ((time.time() - now), n))

    print('转换数据到矩阵开始')
    tnow = time.time()
    u = trdata[:, 0]
    s = trdata[:, 1]
    u = np.array(u, int)
    s = np.array(s, int)
    R = np.full(us_shape, NoneValue, float)
    R[u, s] = trdata[:, 2]
    del trdata, u, s
    print('转换数据到矩阵结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    print('预处理数据开始')
    tnow = time.time()
    Preprocess.removeNoneValue(R)
    oriR = R.copy()
    ############################
    # 矩阵分解填补预处理
    mean = np.sum(R) / np.count_nonzero(R)
    mf = MF_bl(R.shape, f, mean)
    mf.preloadValues(mf_values_path)

    ############################
    #     Preprocess.preprocessMF_random_replace(R,mf,rat=cmp_rat);
    Preprocess.preprocess(R)
    print(np.sum(R - oriR))
    R /= 20.0
    oriR /= 20.0
    print('预处理数据结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    print('加载地理位置信息开始')
    tnow = time.time()
    if isICF:
        loc_path += '/ws_info.txt'
    else:
        loc_path += '/user_info.txt'
    global loc_tab
    loc_tab = loadLocation(loc_path)
    print('加载地理位置信息完成,耗时 %.2f秒,数据总条数%d  \n' %
          ((time.time() - tnow), len(loc_tab)))

    print('训练模型开始')
    tnow = time.time()
    tx = us_shape[0]
    if isUserAutoEncoder:
        tx = us_shape[1]
    encoder = BPAE.BPAutoEncoder(tx, hidden_node, actfunc1, deactfunc1,
                                 actfunc1, deactfunc1, check_none)
    if not isUserAutoEncoder:
        R = R.T
        oriR = oriR.T
    if loadvalues and encoder.exisValues(values_path):
        encoder.preloadValues(values_path)
    if continue_train:
        encoder.train(R, learn_param, repeat, None)
        encoder.saveValues(values_path)
    # R = oriR;
    PR = encoder.calFill(R)
    # R = oriR;
    print(R)
    print()
    print(PR)
    print()
    ############# PR 还原处理   ###############
    PR = PR * 20.0
    R = R * 20
    PR = np.where(R != NoneValue, R, PR)
    print(PR)
    if not isUserAutoEncoder:
        PR = PR.T
        R = R.T


############# PR 还原处理   ###############
    print('训练模型开始结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    print('加载测试数据开始')
    tnow = time.time()
    trdata = np.loadtxt(test_data, dtype=float)
    n = np.alen(trdata)
    print('加载测试数据完成,耗时 %.2f秒,数据总条数%d  \n' % ((time.time() - tnow), n))

    print('评测开始')
    tnow = time.time()
    mae = 0.0
    rmse = 0.0
    cot = 0
    ana = np.zeros(us_shape)
    R_ana = np.zeros(us_shape)
    for tc in trdata:
        if tc[2] <= 0:
            continue
        u = int(tc[0])
        s = int(tc[1])
        rt = PR[u, s]
        t = abs(rt - tc[2])
        mae += t
        ana[u, s] = t
        R_ana[u, s] = tc[2]
        rmse += (rt - tc[2])**2
        cot += 1
    mae = mae * 1.0 / cot
    rmse = np.sqrt(rmse / cot)

    list_ana = ana.reshape((-1, ))
    ind = np.argsort(-list_ana)[:1000]
    ana_sorted = list_ana[ind]
    arg_list = [[int(i / us_shape[1]),
                 int(i % us_shape[1])] for i in ind]
    ori_list = [R_ana[i[0], i[1]] for i in arg_list]
    np.savetxt(values_path + '/test_ana_value.txt', np.array(ana_sorted),
               '%.6f')
    np.savetxt(values_path + '/test_ana_ind.txt', np.array(arg_list), '%d')
    np.savetxt(values_path + '/test_ana_ori_value.txt', np.array(ori_list),
               '%.6f')

    print('评测完成,耗时 %.2f秒\n' % ((time.time() - tnow)))

    print('实验结束,总耗时 %.2f秒,稀疏度=%d,MAE=%.6f,RMSE=%.6f\n' %
          ((time.time() - now), spa, mae, rmse))

    print(W)
    print(S)
Пример #4
0
def encoder_run(spa):
    train_data = base_path + '/Dataset/ws/train_n/sparseness%d/training%d.txt' % (
        spa, case)
    test_data = base_path + '/Dataset/ws/test_n/sparseness%d/test%d.txt' % (
        spa, case)
    W_path = base_path + '/Dataset/ws/BP_CF_W_spa%d_t%d.txt' % (spa, case)
    SW_path = base_path + '/Dataset/ws/BP_CF_SW_spa%d_t%d.txt' % (spa, case)
    loc_path = base_path + '/Dataset/ws'
    values_path = base_path + '/Dataset/dae_values/spa%d' % (spa)

    mf_values_path = base_path + '/Dataset/mf_baseline_values/spa%d' % (spa)

    print('开始实验,稀疏度=%d,case=%d' % (spa, case))
    print('加载训练数据开始')
    now = time.time()
    trdata = np.loadtxt(train_data, dtype=float)
    n = np.alen(trdata)
    print('加载训练数据完成,耗时 %.2f秒,数据总条数%d  \n' % ((time.time() - now), n))

    print('转换数据到矩阵开始')
    tnow = time.time()
    u = trdata[:, 0]
    s = trdata[:, 1]
    u = np.array(u, int)
    s = np.array(s, int)
    R = np.full(us_shape, NoneValue, float)
    R[u, s] = trdata[:, 2]
    del trdata, u, s
    print('转换数据到矩阵结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    print('预处理数据开始')
    tnow = time.time()
    Preprocess.removeNoneValue(R)
    oriR = R.copy()
    ############################
    # 矩阵分解填补预处理
    mean = np.sum(R) / np.count_nonzero(R)
    mf = MF_bl(R.shape, f, mean)
    mf.preloadValues(mf_values_path)
    # 填补处理
    Preprocess.preprocessMF_rat(R, mf, isUAE=False, rat=cmp_rat)
    ############################

    print(np.sum(R - oriR))
    R /= 20.0
    # 归一化
    oriR /= 20.0
    print('预处理数据结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    print('加载地理位置信息开始')
    tnow = time.time()
    if isICF:
        loc_path += '/ws_info.txt'
    else:
        loc_path += '/user_info.txt'
    global loc_tab
    loc_tab = loadLocation(loc_path)
    print('加载地理位置信息完成,耗时 %.2f秒,数据总条数%d  \n' %
          ((time.time() - tnow), len(loc_tab)))

    print('训练模型开始')
    tnow = time.time()
    tx = us_shape[0]
    if isUserAutoEncoder:
        tx = us_shape[1]
    encoder = BPAE.DenoiseAutoEncoder(tx, hidden_node, actfunc1, deactfunc1,
                                      actfunc1, deactfunc1)
    if not isUserAutoEncoder:
        R = R.T
        oriR = oriR.T
    if loadvalues and encoder.exisValues(values_path):
        encoder.preloadValues(values_path)
    if continue_train:
        encoder.train(R, oriR, learn_param, repeat, None)
        encoder.saveValues(values_path)

    # R = oriR;
    PR = encoder.calFill(R)
    #     print(R);
    #     print();
    #     print(PR);
    #     print();
    ############# PR 还原处理   ###############
    PR = PR * 20.0
    R = R * 20
    oriR = oriR * 20
    PR = np.where(R != NoneValue, R, PR)
    if not isUserAutoEncoder:
        PR = PR.T
        R = R.T
        oriR = oriR.T


############# PR 还原处理   ###############
    print('训练模型结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    print('随机删除开始')
    tnow = time.time()
    Preprocess.random_empty(PR, cut_rate)
    print('随机删除开始,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    ###  oriR 原始US矩阵
    ###  R    经过MF处理的US矩阵
    ###  PR   经过随机删除的US 预测矩阵

    print('生成原矩阵分析开始')
    tnow = time.time()
    ## U-S 部分
    us_ana = get_oriR_ana(oriR)
    print('us - ana ')
    #     ## S-U 部分
    #     su_ana = get_oriR_ana(oriR.T);
    print('生成原矩阵结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    print('生成特征权重向量开始')
    tnow = time.time()
    feat_cout = np.count_nonzero(oriR, axis=0)
    med = np.median(feat_cout)
    feat_w_us = np.exp((med - feat_cout) / w_d)
    #     feat_w_us=np.exp(np.log2(med-feat_cout));
    feat_cout = np.count_nonzero(oriR, axis=1)
    med = np.median(feat_cout)
    feat_w_su = np.exp((med - feat_cout) / sw_d)
    print('生成特征权重向量结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    print('计算相似度矩阵开始')
    tnow = time.time()
    mf_R = R

    # U-CF
    R = PR
    bat_size, feat_size = R.shape
    W = np.zeros((bat_size, bat_size))
    show_step = int(bat_size / 100)
    if readWcache and os.path.exists(W_path) and False:
        del W
        W = np.loadtxt(W_path, np.float64)
    else:
        for i in range(bat_size - 1):
            if i % 30 == 0:
                print('----->u-cf step%d' % (i))
            a = R[i, :]
            for j in range(i + 1, bat_size):
                b = R[j, :]
                log_and = (a != 0) & (b != 0)
                ws = np.zeros_like(a)
                ana_chp = us_ana[i][j - i - 1]
                for indexk in range(3):
                    tmp = log_and & ana_chp[indexk]
                    ws+=np.subtract(a,b,out=np.zeros_like(a),where=tmp) \
                        * ana_chp[indexk+3]
                ws = ws * feat_w_us
                ws = np.sum(ws**2)
                W[i, j] = W[j, i] = 1.0 / math.exp(np.sqrt(ws / feat_size))
                # W[i,j]=W[j,i]= 1.0/(1+np.sqrt(ws/feat_size));
        np.savetxt(W_path, W, '%.12f')

    # S-CF
    R = PR.T
    bat_size, feat_size = R.shape
    SW = np.zeros((bat_size, bat_size))
    show_step = int(bat_size / 100)

    if readWcache and os.path.exists(SW_path):
        del SW
        SW = np.loadtxt(SW_path, np.float64)
    else:
        for i in range(bat_size - 1):
            if i % show_step == 0:
                print('----->s-cf step%d' % (i))
            a = R[i, :]
            for j in range(i + 1, bat_size):
                b = R[j, :]
                log_and = (a != 0) & (b != 0)
                ws = np.zeros_like(a)
                ana_chp = get_ana_item(R.shape, a, b)
                for indexk in range(3):
                    tmp = log_and & ana_chp[indexk]
                    ws+=np.subtract(a,b,out=np.zeros_like(a),where=tmp) \
                        * ana_chp[indexk+3]
                ws = ws * feat_w_su
                ws = np.sum(ws**2)
                SW[i, j] = SW[j, i] = 1.0 / math.exp(np.sqrt(ws / feat_size))
        np.savetxt(SW_path, SW, '%.12f')

    R = PR
    print('计算相似度矩阵结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    print('生成相似列表开始')
    tnow = time.time()
    S = np.argsort(-W)[:, 0:k]
    SS = np.argsort(-SW)[:, 0:k]
    print('生成相似列表开始结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    print('加载测试数据开始')
    tnow = time.time()
    trdata = np.loadtxt(test_data, dtype=float)
    n = np.alen(trdata)
    print('加载测试数据完成,耗时 %.2f秒,数据总条数%d  \n' % ((time.time() - tnow), n))

    print('评测开始')
    tnow = time.time()
    mae = 0.0
    rmse = 0.0
    cot = 0
    #     print('oR',oR);
    #     print('R',R);
    SR = R.T
    for tc in trdata:
        if tc[2] <= 0:
            continue
        urt = predict(int(tc[0]), int(tc[1]), R, W, S)
        srt = predict(int(tc[1]), int(tc[0]), SR, SW, SS)
        rt = cf_w * urt + (1 - cf_w) * srt
        mae += abs(rt - tc[2])
        rmse += (rt - tc[2])**2
        cot += 1
    mae = mae * 1.0 / cot
    rmse = np.sqrt(rmse / cot)
    print('评测完成,耗时 %.2f秒\n' % ((time.time() - tnow)))

    print('实验结束,总耗时 %.2f秒,稀疏度=%d,MAE=%.6f,RMSE=%.6f\n' %
          ((time.time() - now), spa, mae, rmse))
Пример #5
0
def encoder_run(spa):
    train_data = base_path+'/Dataset/ws/train_n/sparseness%d/training%d.txt'%(spa,case);
    test_data = base_path+'/Dataset/ws/test_n/sparseness%d/test%d.txt'%(spa,case);
    W_path = base_path+'/Dataset/ws/BP_CF_W_spa%d_t%d.txt'%(spa,case);
    loc_path = base_path+'/Dataset/ws';   
    values_path=base_path+'/Dataset/dae_values/spa%d'%(spa);
    
    mf_values_path=base_path+'/Dataset/mf_baseline_values/spa%d'%(spa);
    
    
    
    print('开始实验,稀疏度=%d,case=%d'%(spa,case));
    print ('加载训练数据开始');
    now = time.time();
    trdata = np.loadtxt(train_data, dtype=float);
    n = np.alen(trdata);
    print ('加载训练数据完成,耗时 %.2f秒,数据总条数%d  \n'%((time.time() - now),n));
    
    print ('转换数据到矩阵开始');
    tnow = time.time();
    u = trdata[:,0];
    s = trdata[:,1];
    u = np.array(u,int);
    s = np.array(s,int);
    R = np.full(us_shape, NoneValue, float);
    R[u,s]=trdata[:,2];
    del trdata,u,s;
    print ('转换数据到矩阵结束,耗时 %.2f秒  \n'%((time.time() - tnow)));
    
    print ('预处理数据开始');
    tnow = time.time();
    Preprocess.removeNoneValue(R);
    oriR = R.copy();
    ############################
    # 矩阵分解填补预处理
    mean = np.sum(R)/np.count_nonzero(R);
    mf = MF_bl(R.shape,f,mean);
    mf.preloadValues(mf_values_path);
    # 填补处理
    Preprocess.preprocessMF_rat(R,mf,isUAE=False,rat=cmp_rat);
    ############################
    
    print(np.sum(R-oriR));
    R/=20.0;# 归一化
    oriR/=20.0;
    print ('预处理数据结束,耗时 %.2f秒  \n'%((time.time() - tnow)));
        
    print ('加载地理位置信息开始');
    tnow = time.time();
    if isICF:
        loc_path+='/ws_info.txt';
    else:
        loc_path+='/user_info.txt';
    global loc_tab;        
    loc_tab = loadLocation(loc_path);
    print ('加载地理位置信息完成,耗时 %.2f秒,数据总条数%d  \n'%((time.time() - tnow),len(loc_tab)));    
    
    print ('训练模型开始');
    tnow = time.time();
    tx = us_shape[0];
    if isUserAutoEncoder:
        tx = us_shape[1];
    encoder = BPAE.DenoiseAutoEncoder(tx,hidden_node,
                            actfunc1,deactfunc1,
                             actfunc1,deactfunc1);
    if not isUserAutoEncoder:
        R = R.T;
        oriR =oriR.T;
    if loadvalues and encoder.exisValues(values_path):
        encoder.preloadValues(values_path);
    if continue_train:
        encoder.train(R, oriR,learn_param, repeat,None);
        encoder.saveValues(values_path);
    
    # R = oriR;
    PR = encoder.calFill(R);
#     print(R);
#     print();
#     print(PR);
#     print();
############# PR 还原处理   ###############
    PR = PR * 20.0;
    R = R * 20;
    oriR=oriR*20;
    PR = np.where(R!=NoneValue,R,PR);
    if not isUserAutoEncoder:
        PR = PR.T;
        R = R.T;
        oriR =oriR.T;    
############# PR 还原处理   ###############        
    print ('训练模型结束,耗时 %.2f秒  \n'%((time.time() - tnow)));


    print ('随机删除开始');
    tnow = time.time();
    Preprocess.random_empty(PR, cut_rate);
    print ('随机删除开始,耗时 %.2f秒  \n'%((time.time() - tnow)));



    ###  oriR 原始US矩阵
    ###  R    经过MF处理的US矩阵
    ###  PR   经过随机删除的US 预测矩阵

    print ('生成原矩阵分析开始');
    tnow = time.time();
    b_s,f_s = us_shape;
    us_ana = [[] for _ in range(b_s)];
    for i in range(b_s-1):
        a = oriR[i,:];
        a_not_none = a!=NoneValue;
        a_is_none = a==NoneValue;
        for j in range(i+1,b_s):
            b = oriR[j,:];
            all_have = (b!=NoneValue) & a_not_none;
            none_have =(b==NoneValue) & a_is_none;
            any_have = np.logical_not(all_have | none_have);
            
#             all_p = np.exp(-1.0*np.count_nonzero(all_have)/f_s);
#             non_p = np.exp(-1.0*np.count_nonzero(none_have)/f_s);
#             any_p = np.exp(-1.0*np.count_nonzero(any_have)/f_s);
            
                        
            all_p = 1/(np.count_nonzero(all_have)/f_s);
            non_p = 1/(np.count_nonzero(none_have)/f_s);
            any_p = 1/(np.count_nonzero(any_have)/f_s); 
            
                        
            us_ana[i].append([all_have,none_have,any_have,all_p,non_p,any_p]);
            # us_ana[i].append([all_have,none_have,any_have,150.0,30.0,0.001]);
            # print(len(us_ana[i])); 
    print ('生成原矩阵结束,耗时 %.2f秒  \n'%((time.time() - tnow)));



    print ('生成特征权重向量开始');
    tnow = time.time();
    feat_cout=np.count_nonzero(oriR,axis=0);
    med = np.median(feat_cout);
    feat_w=np.exp((med-feat_cout)/w_d);
    print ('生成特征权重向量结束,耗时 %.2f秒  \n'%((time.time() - tnow)));




    print ('计算相似度矩阵开始');
    tnow = time.time();
    mf_R = R;
    R=PR;
    
    # U-CF
    bat_size,feat_size = R.shape;
    W = np.zeros((bat_size,bat_size));
    show_step = int(bat_size/100);
    
    if readWcache and os.path.exists(W_path): 
        del W;  
        W = np.loadtxt(W_path, np.float64);
    else:
        for i in range(bat_size-1):
            if i%show_step ==0:
                print('----->step%d'%(i));
            a = R[i,:];
            for j in range(i+1,bat_size):
                b = R[j,:];
                
                log_and = (a!=0) & (b!=0);
                
                # print([i,j]);
                ####################################
                ws = np.zeros_like(a);
                ana_chp= us_ana[i][j-i-1];
                for indexk in range(3):
                    tmp = log_and & ana_chp[indexk];
                    ws+=np.subtract(a,b,out=np.zeros_like(a),where=tmp) \
                        * ana_chp[indexk+3];
                ws=ws*feat_w;
                ws=np.sum(ws**2);
                #####################################
#                 ws=0.0;
#                 ana_chp= us_ana[i][j-i-1];
#                 deta = np.subtract(a,b,out=np.zeros_like(a),
#                                    where=log_and)                
#                 for indexk in range(3):
#                     tmp = log_and & ana_chp[indexk];
#                     ws+=np.multiply(deta,ana_chp[indexk+3],out=np.zeros_like(a),where=tmp);
#                 ws=np.sum(ws**2);                    
                ####################################

#                 deta = np.subtract(a,b,out=np.zeros_like(a),
#                                    where=log_and)
#                 ws = np.sum(deta**2);
                                
                ###################################


                W[i,j]=W[j,i]= 1.0/math.exp(np.sqrt(ws/feat_size));
                # W[i,j]=W[j,i]= 1.0/(1+np.sqrt(ws/feat_size));
        np.savetxt(W_path,W,'%.30f');                
    print ('计算相似度矩阵结束,耗时 %.2f秒  \n'%((time.time() - tnow)));


    print ('生成相似列表开始');
    tnow = time.time();
    S = np.argsort(-W)[:,0:k];            
    print ('生成相似列表开始结束,耗时 %.2f秒  \n'%((time.time() - tnow)));


    print ('加载测试数据开始');
    tnow = time.time();
    trdata = np.loadtxt(test_data, dtype=float);
    n = np.alen(trdata);
    print ('加载测试数据完成,耗时 %.2f秒,数据总条数%d  \n'%((time.time() - tnow),n));

    print ('评测开始');
    tnow = time.time();
    mae=0.0;rmse=0.0;cot=0;
#     print('oR',oR);
#     print('R',R);
    for tc in trdata:
        if tc[2]<=0:
            continue;
        rt = predict(int(tc[0]),int(tc[1]),R,W,S);
        mae+=abs(rt-tc[2]);
        rmse+=(rt-tc[2])**2;
        cot+=1;
    mae = mae * 1.0 / cot;
    rmse= np.sqrt(rmse/cot);
    print ('评测完成,耗时 %.2f秒\n'%((time.time() - tnow)));    

    print('实验结束,总耗时 %.2f秒,稀疏度=%d,MAE=%.6f,RMSE=%.6f\n'%((time.time()-now),spa,mae,rmse));
Пример #6
0
def encoder_run(spa):
    
    global last_w_path,tmp_W,tmp_SW;
    train_data = base_path+'/Dataset/ws/train_n/sparseness%.1f/training%d.txt'%(spa,case);
    test_data = base_path+'/Dataset/ws/test_n/sparseness%.1f/test%d.txt'%(spa,case);
    W_path = base_path+'/Dataset/ws/BP_CF_W_spa%.1f_t%d.txt'%(spa,case);
    SW_path = base_path+'/Dataset/ws/BP_CF_SW_spa%.1f_t%d.txt'%(spa,case);
    loc_path = base_path+'/Dataset/ws';   
    values_path=base_path+'/Dataset/dae_values/spa%.1f_case%d'%(spa,case);
    
    mf_values_path=base_path+'/Dataset/mf_baseline_values/spa%.1f_case%d'%(spa,case);
    
    
    
    print('开始实验,稀疏度=%d,case=%d'%(spa,case));
    print ('加载训练数据开始');
    now = time.time();
    trdata = np.loadtxt(train_data, dtype=float);
    n = np.alen(trdata);
    print ('加载训练数据完成,耗时 %.2f秒,数据总条数%d  \n'%((time.time() - now),n));
    
    print ('转换数据到矩阵开始');
    tnow = time.time();
    u = trdata[:,0];
    s = trdata[:,1];
    u = np.array(u,int);
    s = np.array(s,int);
    R = np.full(us_shape, NoneValue, float);
    R[u,s]=trdata[:,2];
    del trdata,u,s;
    print ('转换数据到矩阵结束,耗时 %.2f秒  \n'%((time.time() - tnow)));
    
    print ('预处理数据开始');
    tnow = time.time();
    Preprocess.removeNoneValue(R);
    oriR = R.copy();
    ############################
    # 矩阵分解填补预处理
    mean = np.sum(R)/np.count_nonzero(R);
    mf = MF_bl(R.shape,f,mean);
    print(mf_values_path)
    mf.preloadValues(mf_values_path);
    # 填补处理
    cmp_rat = out_cmp_rat(spa);
    print(cmp_rat);
    Preprocess.preprocessMF_rat(R,mf,isUAE=False,rat=cmp_rat);
    ############################
    
    print(np.sum(R-oriR));
    R/=20.0;# 归一化
    oriR/=20.0;
    print ('预处理数据结束,耗时 %.2f秒  \n'%((time.time() - tnow)));
        
    print ('加载地理位置信息开始');
    tnow = time.time();
    if isICF:
        loc_path+='/ws_info.txt';
    else:
        loc_path+='/user_info.txt';
    global loc_tab;        
    loc_tab = loadLocation(loc_path);
    print ('加载地理位置信息完成,耗时 %.2f秒,数据总条数%d  \n'%((time.time() - tnow),len(loc_tab)));    
    
    print ('训练模型开始');
    tnow = time.time();
    tx = us_shape[0];
    if isUserAutoEncoder:
        tx = us_shape[1];
    encoder = BPAE.DenoiseAutoEncoder(tx,hidden_node,
                            actfunc1,deactfunc1,
                             actfunc1,deactfunc1);
    if not isUserAutoEncoder:
        R = R.T;
        oriR =oriR.T;
    if loadvalues and encoder.exisValues(values_path):
        encoder.preloadValues(values_path);
    if continue_train:
        encoder.train(R, oriR,learn_param, repeat,None);
        encoder.saveValues(values_path);
    
    # R = oriR;
    PR = encoder.calFill(R);
#     print(R);
#     print();
#     print(PR);
#     print();
############# PR 还原处理   ###############
    PR = PR * 20.0;
    R = R * 20;
    oriR=oriR*20;
    PR = np.where(R!=NoneValue,R,PR);
    if not isUserAutoEncoder:
        PR = PR.T;
        R = R.T;
        oriR =oriR.T;    
############# PR 还原处理   ###############        
    print ('训练模型结束,耗时 %.2f秒  \n'%((time.time() - tnow)));


    print ('随机删除开始');
    tnow = time.time();
    Preprocess.random_empty(PR, cut_rate);
    print ('随机删除开始,耗时 %.2f秒  \n'%((time.time() - tnow)));



    ###  oriR 原始US矩阵
    ###  R    经过MF处理的US矩阵
    ###  PR   经过随机删除的US 预测矩阵

    print ('生成原矩阵分析开始');
    tnow = time.time(); 
    ## U-S 部分
    us_ana = get_oriR_ana(oriR);
    print('us - ana ')        
#     ## S-U 部分
#     su_ana = get_oriR_ana(oriR.T);
    print ('生成原矩阵结束,耗时 %.2f秒  \n'%((time.time() - tnow)));

    print ('生成特征权重向量开始');
    tnow = time.time();
    feat_cout=np.count_nonzero(oriR,axis=0);
    med = np.median(feat_cout);
    feat_w_us=np.exp((med-feat_cout)/w_d);
#     feat_w_us=np.exp(np.log2(med-feat_cout));
    feat_cout=np.count_nonzero(oriR,axis=1);
    med = np.median(feat_cout);
    feat_w_su=np.exp((med-feat_cout)/sw_d);        
    print ('生成特征权重向量结束,耗时 %.2f秒  \n'%((time.time() - tnow)));

    print ('计算相似度矩阵开始');
    tnow = time.time();
    mf_R = R;
    
    if readWcache and (last_w_path != W_path):
        last_w_path = W_path;
        tmp_W = np.loadtxt(W_path, np.float64);
        tmp_SW = np.loadtxt(SW_path, np.float64);
          
    # U-CF
    R=PR;
    bat_size,feat_size = R.shape;
    W = np.zeros((bat_size,bat_size));
    show_step = int(bat_size/100);
    if readWcache and os.path.exists(W_path) :  
        W = tmp_W;
    else:
        for i in range(bat_size-1):
            if i%60 ==0:
                print('----->u-cf step%d'%(i));
            a = R[i,:];
            for j in range(i+1,bat_size):
                b = R[j,:];                
                log_and = (a!=0) & (b!=0);
                ws = np.zeros_like(a);
                ana_chp= us_ana[i][j-i-1];
                for indexk in range(3):
                    tmp = log_and & ana_chp[indexk];
                    ws+=np.subtract(a,b,out=np.zeros_like(a),where=tmp) \
                        * ana_chp[indexk+3];
                ws=ws*feat_w_us;
                ws=np.sum(ws**2);
                W[i,j]=W[j,i]= 1.0/math.exp(np.sqrt(ws/feat_size));
                # W[i,j]=W[j,i]= 1.0/(1+np.sqrt(ws/feat_size));
        np.savetxt(W_path,W,'%.12f');
        
    # S-CF
    R=PR.T;
    bat_size,feat_size = R.shape;
    SW = np.zeros((bat_size,bat_size));
    show_step = 500;
    
    if readWcache and os.path.exists(SW_path):  
        SW = tmp_SW;
    else:
        for i in range(bat_size-1):
            if i%show_step ==0:
                print('----->s-cf step%d'%(i));
            a = R[i,:];
            oria = oriR[:,i];
            for j in range(i+1,bat_size):
                b = R[j,:];
                orib = oriR[:,j];                
                log_and = (a!=0) & (b!=0);
                ws = np.zeros_like(a);
                ana_chp= get_ana_item(R.shape,oria,orib);
                for indexk in range(3):
                    tmp = log_and & ana_chp[indexk];
                    ws+=np.subtract(a,b,out=np.zeros_like(a),where=tmp) \
                        * ana_chp[indexk+3];
                ws=ws*feat_w_su;
                ws=np.sum(ws**2);
                SW[i,j]=SW[j,i]= 1.0/math.exp(np.power(ws/feat_size,1.0/3));
        np.savetxt(SW_path,SW,'%.12f');        
        
    R = PR;                    
    print ('计算相似度矩阵结束,耗时 %.2f秒  \n'%((time.time() - tnow)));


    print ('生成相似列表开始');
    tnow = time.time();
#     k  = get_cf_k(spa);
#     sk = get_cf_sk(spa);
    S = np.argsort(-W)[:,0:k];
    SS = np.argsort(-SW)[:,0:sk];
    print ('生成相似列表开始结束,耗时 %.2f秒  \n'%((time.time() - tnow)));


    print ('加载测试数据开始');
    tnow = time.time();
    trdata = np.loadtxt(test_data, dtype=float);
    n = np.alen(trdata);
    print ('加载测试数据完成,耗时 %.2f秒,数据总条数%d  \n'%((time.time() - tnow),n));

    print ('评测开始');
    tnow = time.time();
    mae=0.0;rmse=0.0;cot=0;
#     print('oR',oR);
#     print('R',R);
    SR = R.T;
    for tc in trdata:
        if tc[2]<=0:
            continue;
        urt = predict(int(tc[0]),int(tc[1]),R,W,S);
        srt = predict(int(tc[1]),int(tc[0]),SR,SW,SS);
        rt = cf_w * urt + (1-cf_w) * srt;
        mae+=abs(rt-tc[2]);
        rmse+=(rt-tc[2])**2;
        cot+=1;
    mae = mae * 1.0 / cot;
    rmse= np.sqrt(rmse/cot);
    print ('评测完成,耗时 %.2f秒\n'%((time.time() - tnow)));    

    print('实验结束,总耗时 %.2f秒,稀疏度=%d,MAE=%.6f,RMSE=%.6f\n'%((time.time()-now),spa,mae,rmse));
    return mae,rmse;
Пример #7
0
def run_cf(spa):
    global R,W,S,sumS,loc_tab;
    
    train_data = base_path+'/Dataset/ws/train_n/sparseness%d/training%d.txt'%(spa,case);
    test_data = base_path+'/Dataset/ws/test_n/sparseness%d/test%d.txt'%(spa,case);
    W_path = base_path+'/Dataset/ws/BP_CF_W_spa%d_t%d.txt'%(spa,case);
    loc_path = base_path+'/Dataset/ws';
    
    print('开始实验,isICF=%s,稀疏度=%d,case=%d'%(isICF,spa,case));
    print ('加载训练数据开始');
    now = time.time();
    trdata = np.loadtxt(train_data, dtype=float);
    n = np.alen(trdata);
    print ('加载训练数据完成,耗时 %.2f秒,数据总条数%d  \n'%((time.time() - now),n));

    print ('加载地理位置信息开始');
    tnow = time.time();
    if isICF:
        loc_path+='/ws_info.txt';
    else:
        loc_path+='/user_info.txt';        
    loc_tab = loadLocation(loc_path);
    n = np.alen(trdata);
    print ('加载地理位置信息完成,耗时 %.2f秒,数据总条数%d  \n'%((time.time() - tnow),n));

    
    print ('转换数据到矩阵开始');
    tnow = time.time();
    u = trdata[:,0];
    s = trdata[:,1];
    u = np.array(u,int);
    s = np.array(s,int);
    R = np.full(us_shape, NoneValue, float);
    R[u,s]=trdata[:,2];
    if isICF:
        R = R.T;
    del trdata,u,s;
    print ('转换数据到矩阵结束,耗时 %.2f秒  \n'%((time.time() - tnow)));


    print ('计算相似度矩阵开始');
    tnow = time.time();
    i=0;
    if readWcache and os.path.exists(W_path):
        W = np.loadtxt(W_path, np.float128);
    else:
        for i in range(axis0):
            if i%50 ==0:
                print('----->step%d'%(i))
            for j in range(axis0):
                a = R[i,:];
                b = R[j,:];
                alog = a!=NoneValue;
                blog = b!=NoneValue;
                delta = np.subtract(a,b,out=np.zeros_like(a),where=alog&blog); 
                ws = np.sum(delta**2);
                W[i,j]= 1.0/math.exp(np.sqrt(ws));
        
        for i in range(axis0):
            W[i,i]=0;
                
       
        np.savetxt(W_path,W,'%.30f');                
    print ('计算相似度矩阵结束,耗时 %.2f秒  \n'%((time.time() - tnow)));


    print ('生成相似列表开始');
    tnow = time.time();
    S = np.argsort(-W)[:,0:k];
    for i in range(axis0):
        sumS[i] = np.sum(W[i,S[i]]);            
    print ('生成相似列表开始结束,耗时 %.2f秒  \n'%((time.time() - tnow)));

    
    print ('加载测试数据开始');
    tnow = time.time();
    trdata = np.loadtxt(test_data, dtype=float);
    n = np.alen(trdata);
    print ('加载测试数据完成,耗时 %.2f秒,数据总条数%d  \n'%((time.time() - tnow),n));

    print ('评测开始');
    tnow = time.time();
    mae=0.0;rmse=0.0;cot=0;
    for tc in trdata:
        if tc[2]<=0:
            continue;
        rt = predict(int(tc[0]),int(tc[1]));
        mae+=abs(rt-tc[2]);
        rmse+=(rt-tc[2])**2;
        cot+=1;
    mae = mae * 1.0 / cot;
    rmse= sqrt(rmse/cot);
    print ('评测完成,耗时 %.2f秒\n'%((time.time() - tnow)));    
    
    print('实验结束,总耗时 %.2f秒,isICF=%s,稀疏度=%d,MAE=%.3f,RMSE=%.3f\n'%((time.time()-now),isICF,spa,mae,rmse));
    print('----------------------------------------------------------\n');


    print(W);
    print(S);