Пример #1
0
def run():
    
    ser_loc = localload.load(ser_info_path);
    ser_loc_m = localload.load_locmore(ser_info_more_path);
    user_class = localtools.load_classif(loc_class_for_user);
    R = np.loadtxt(origin_path,np.float);
    
    if os.path.isfile(loc_class_out):
        os.remove(loc_class_out);

    idx = np.where(R<0);
    R[idx]=0;
    user_mean = [];
    for uc in user_class:
        UR= R[uc];
        ser_sum = np.sum(UR,axis=0);
        ser_cot = np.count_nonzero(UR, axis=0);
        uc_ser_mean = np.divide(ser_sum,ser_cot,
            out=np.zeros_like(ser_sum),where=ser_cot!=0);
        all_mean = np.sum(ser_sum)/np.sum(ser_cot);
        uc_ser_mean[np.where(ser_cot==0)] = all_mean;
        
        user_mean.append(uc_ser_mean);
    
    data=[];
    names=[];
    area=[];
    k=6;
    for sid in range(5825):
        sn = ser_loc[sid][1];
        names.append(sn);
        area.append(ser_loc_m[sn][0])
        lc = [];
        lc.extend(ser_loc_m[sn][1]);
        for um in user_mean:
            lc.append(um[sid]);
        data.append(lc);
    data=np.array(data);

    cent,res = simple_km2(data,k,1);
    
    print(cent);
    print(res);
    
    for i in range(k):
        tmp=[];
        tmp2=[];
        for id in res[i]:
            if names[id] not in tmp2:
                tmp2.append(names[id]);
                tmp.append(area[id]);
        print(tmp)
        print(tmp2);
        print();
        
    write2file(res);   
    pass;
Пример #2
0
def run():

    ser_loc = localload.load(ser_info_path)
    user_loc = localload.load_userinfo(user_info_path)
    ser_loc_m = localload.load_locmore(ser_info_more_path)
    user_class = localtools.load_classif(loc_class_for_user)
    R = np.loadtxt(origin_path, np.float)

    #     os.remove(loc_class_out);

    idx = np.where(R > 0)

    u = idx[0].astype(np.int)
    s = idx[1].astype(np.int)
    dataize = len(u)

    data = []
    names = []
    area = []
    k = 8
    for sid in range(5825):
        sn = ser_loc[sid][1]
        names.append(sn)
        area.append(ser_loc_m[sn][0])

    for did in range(dataize):
        sn = ser_loc[s[did]][1]
        cl = []
        cl.extend(user_loc[u[did]][2])
        cl.extend(ser_loc_m[sn][1])
        cl.append(R[u[did], s[did]])
        data.append(cl)
    data = np.array(data)

    cent, res = simple_km(data, k, 6)

    print(cent)
    #     print(res);

    for i in range(k):
        tmp = []
        tmp2 = []
        for id in res[i]:
            if names[id] not in tmp2:
                tmp2.append(names[id])
                tmp.append(area[id])
        print(tmp)
        print(tmp2)
        print()

    write2file(res)
    pass
Пример #3
0
def mf_base_run(spa,case):
    train_data = base_path+'/Dataset/ws/train_n/sparseness%.1f/training%d.txt'%(spa,case);
    test_data = base_path+'/Dataset/ws/test_n/sparseness%.1f/test%d.txt'%(spa,case);
       
    values_path=base_path+'/Dataset/local_mf_baseline_values/spa%.1f_case%d'%(spa,case);
    loc_classes = base_path+'/Dataset/ws/ws_classif_out.txt';
    
    print('开始实验,稀疏度=%.1f,case=%d'%(spa,case));
    print ('加载训练数据开始');
    now = time.time();
    trdata = np.loadtxt(train_data, dtype=float);
    ser_class = localtools.load_classif(loc_classes);
    classiy_size = len(ser_class);
    n = np.alen(trdata);
    print ('加载训练数据完成,耗时 %.2f秒,数据总条数%d  \n'%((time.time() - now),n));
    
    
    print ('加载测试数据开始');
    tnow = time.time();
    ttrdata = np.loadtxt(test_data, dtype=float);
    n = np.alen(ttrdata);
    print ('加载测试数据完成,耗时 %.2f秒,数据总条数%d  \n'%((time.time() - tnow),n));
    
    print ('分类数据集开始');
    tnow = time.time();
    train_sets = localtools.data_split_class(ser_class, trdata);
    test_sets = localtools.data_split_class(ser_class, ttrdata);
    del trdata,ttrdata;
    print ('分类数据集结束,耗时 %.2f秒  \n'%((time.time() - tnow)));
    
    print ('预处理数据开始');
    tnow = time.time();
    means=preprocess(train_sets);
    # R = R/20.0
    # print(mean,Iu_num,len(Iu_num));
    print ('预处理数据结束,耗时 %.2f秒  \n'%((time.time() - tnow)));    
    
    
    print ('训练模型开始');
    tnow = time.time();
    ttn = tnow;
    svdes = [MFS.MF_bl_loc(us_shape,f,means[i]) for i in range(classiy_size)];

    if loadvalues:
        for i in range(classiy_size):
            vpp = values_path+'/class%d'%(i);
            svdes[i].preloadValues(vpp);
    if continue_train:
        
        for ep in range(epoch):
            for i in range(classiy_size):
                print ('类%d训练开始'%(i));
                svdes[i].train_mat(train_sets[i], repeat,learn_rate,lamda,values_path);
                vpp = values_path+'/class%d'%(i);
                svdes[i].saveValues(vpp); 
                print ('类%d训练结束,耗时 %.2f秒  \n'%(i,(time.time() - ttn)));
                ttn = time.time();  
            mae=0.0;rmse=0.0;cot=0;
            for i in range(classiy_size):
                for tc in test_sets[i]:
                    if tc[2]<=0:
                        continue;
                    u = int(tc[0]);
                    s = int(tc[1]);
                    rt = svdes[i].predict(u,s);
                    t =abs(rt-tc[2]);
            
                    mae+=t;
                    rmse+=(rt-tc[2])**2;
                    cot+=1;
           
            mae = mae * 1.0 / cot;
            rmse= np.sqrt(rmse/cot);
            print ('-------->>>>ep=%d训练结束,mae=%f耗时 %.2f秒  \n'%(ep,mae,(time.time() - ttn)));
                     
    print ('训练模型结束,耗时 %.2f秒  \n'%((time.time() - tnow)));  

    print ('评测开始');
    tnow = time.time();
    mae=0.0;rmse=0.0;cot=0;
    for i in range(classiy_size):
        for tc in test_sets[i]:
            if tc[2]<=0:
                continue;
            u = int(tc[0]);
            s = int(tc[1]);
            rt = svdes[i].predict(u,s);
            t =abs(rt-tc[2]);
    
            mae+=t;
            rmse+=(rt-tc[2])**2;
            cot+=1;
   
    mae = mae * 1.0 / cot;
    rmse= np.sqrt(rmse/cot);
    print ('评测完成,耗时 %.2f秒\n'%((time.time() - tnow)));    

    print('实验结束,总耗时 %.2f秒,稀疏度=%.1f,MAE=%.6f,RMSE=%.6f\n'%((time.time()-now),spa,mae,rmse));
Пример #4
0
def mf_base_run(spa, case):
    train_path = base_path + '/Dataset/ws/train_n/sparseness%.1f/training%d.txt' % (
        spa, case)
    test_path = base_path + '/Dataset/ws/test_n/sparseness%.1f/test%d.txt' % (
        spa, case)
    cache_path = 'value_cache/spa%d_case%d.ckpt' % (spa, case)
    result_file = 'result/ws_spa%.1f_case%d.txt' % (spa, case)
    dbug_paht = 'E:/work/Dataset/wst64/rtdata1.txt'

    loc_classes = base_path + '/Dataset/ws/localinfo/ws_classif_out_by_user.txt'

    print('开始实验,稀疏度=%.1f,case=%d' % (spa, case))
    print('加载训练数据开始')
    now = time.time()
    trdata = np.loadtxt(train_path, dtype=float)
    user_class = localtools.load_classif(loc_classes)
    classiy_size = len(user_class)
    n = np.alen(trdata)
    print('加载训练数据完成,耗时 %.2f秒,数据总条数%d  \n' % ((time.time() - now), n))

    print('加载测试数据开始')
    tnow = time.time()
    ttrdata = np.loadtxt(test_path, dtype=float)
    tn = np.alen(ttrdata)
    print('加载测试数据完成,耗时 %.2f秒,数据总条数%d  \n' % ((time.time() - tnow), tn))

    print('分类数据集开始')
    tnow = time.time()
    train_sets = localtools.data_split_class_byuser(user_class, trdata)
    test_sets = localtools.data_split_class_byuser(user_class, ttrdata)
    del trdata, ttrdata
    print('分类数据集结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    cp = NcfCreParam()
    tp = NcfTraParm()
    cp.us_shape = (339, 5825)
    cp.hid_feat = 32
    cp.hid_units = [64, 32, 16]
    cp.drop_p = 0.00001
    cp.reg_p = 0.0001

    tp.train_data = train_sets
    tp.test_data = test_sets
    tp.epoch = 20
    tp.batch_size = 5
    tp.learn_rate = 0.007
    tp.lr_decy_rate = 1.0
    tp.lr_decy_step = int(n / tp.batch_size)
    tp.cache_rec_path = cache_path
    tp.result_file_path = result_file
    tp.load_cache_rec = False
    tp.classif_size = len(train_sets)

    print('训练模型开始')
    tnow = time.time()
    model = hyb_ncf_local(cp)

    model.train(tp)

    print('训练模型结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    print('实验结束,总耗时 %.2f秒,稀疏度=%.1f\n' % ((time.time() - now), spa))
Пример #5
0
def run():

    ser_loc = localload.load(ser_info_path)
    ser_loc_m = localload.load_locmore(ser_info_more_path)
    user_class = localtools.load_classif(loc_class_for_user)
    R = np.loadtxt(origin_path, np.float)

    idx = np.where(R < 0)
    R[idx] = 0
    user_mean = []
    for uc in user_class:
        UR = R[uc]
        ser_sum = np.sum(UR, axis=0)
        ser_cot = np.count_nonzero(UR, axis=0)
        uc_ser_mean = np.divide(ser_sum,
                                ser_cot,
                                out=np.zeros_like(ser_sum),
                                where=ser_cot != 0)
        all_mean = np.sum(ser_sum) / np.sum(ser_cot)
        uc_ser_mean[np.where(ser_cot == 0)] = all_mean

        user_mean.append(uc_ser_mean)

    data = []
    names = []
    area = []
    k = 6
    di = 3
    for sid in range(5825):
        sn = ser_loc[sid][1]
        names.append(sn)
        area.append(ser_loc_m[sn][0])
        lc = []
        lc.extend(ser_loc_m[sn][1])
        for um in user_mean:
            lc.append(um[sid])
        data.append(lc)
    data = np.array(data)
    #     np.random.shuffle(data);
    cent, res, dis_rate = simple_km(data, k, di)

    print(cent)
    print(res)

    for i in range(k):
        tmp = []
        tmp2 = []
        for id in res[i]:
            if names[id] not in tmp2:
                tmp2.append(names[id])
                tmp.append(area[id])
        print(tmp)
        print(tmp2)
        print()

    # 计算类别距离

    dis_rate = 1 / dis_rate
    print(dis_rate)
    print(np.sort(dis_rate, axis=1))

    dis_rate = np.exp(dis_rate)
    dis_sum = np.sum(dis_rate, axis=0)
    dis_rate /= dis_sum

    print(dis_rate)

    print(np.sort(dis_rate, axis=1))

    np.savetxt(loc_class_dis_rate_out, dis_rate, '%.8f')

    os.remove(loc_class_out)
    write2file(res)
    pass
Пример #6
0
def mf_base_run(spa, case):
    train_path = base_path + '/Dataset/ws/train_n/sparseness%.1f/training%d.txt' % (
        spa, case)
    test_path = base_path + '/Dataset/ws/test_n/sparseness%.1f/test%d.txt' % (
        spa, case)
    cache_path = 'value_cache/spa%d_case%d.ckpt' % (spa, case)
    result_file = 'result/ws_spa%.1f_case%d.txt' % (spa, case)
    dbug_paht = 'E:/work/Dataset/wst64/rtdata1.txt'

    loc_classes = base_path + '/Dataset/ws/ws_classif_out.txt'

    print('开始实验,稀疏度=%.1f,case=%d' % (spa, case))
    print('加载训练数据开始')
    now = time.time()
    trdata = np.loadtxt(train_path, dtype=float)
    ser_class = localtools.load_classif(loc_classes)
    classiy_size = len(ser_class)
    n = np.alen(trdata)
    print('加载训练数据完成,耗时 %.2f秒,数据总条数%d  \n' % ((time.time() - now), n))

    print('加载测试数据开始')
    tnow = time.time()
    ttrdata = np.loadtxt(test_path, dtype=float)
    tn = np.alen(ttrdata)
    print('加载测试数据完成,耗时 %.2f秒,数据总条数%d  \n' % ((time.time() - tnow), tn))

    print('分类数据集开始')
    tnow = time.time()
    train_sets = localtools.data_split_class(ser_class, trdata)
    test_sets = localtools.data_split_class(ser_class, ttrdata)
    print('分类数据集结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    cp = NcfCreParam()
    tp = NcfTraParm()
    cp.us_shape = (339, 5825)
    cp.hid_feat = 32
    cp.hid_units = [64, 32, 16]
    cp.drop_p = 0.00001
    cp.reg_p = 0.0001

    # 处理用户访问服务记录
    R = np.zeros(cp.us_shape)
    u = trdata[:, 0].astype(np.int32)
    s = trdata[:, 1].astype(np.int32)
    R[u, s] = 1.0
    us_invked = []
    for cla in ser_class:
        hot = np.zeros([cp.us_shape[1]], np.float32)
        hot[cla] = 1.0
        usi = R * hot
        nonzeroes = np.sqrt(np.count_nonzero(usi, axis=1))
        noz = np.divide(1.0,
                        nonzeroes,
                        out=np.zeros_like(nonzeroes),
                        where=nonzeroes != 0)
        noz = np.reshape(noz, [-1, 1])
        us_invked.append((usi * noz).astype(np.float32))

    tp.train_data = train_sets
    tp.test_data = test_sets
    tp.epoch = 40
    tp.batch_size = 5
    tp.learn_rate = 0.007
    tp.lr_decy_rate = 1.0
    tp.lr_decy_step = int(n / tp.batch_size)
    tp.cache_rec_path = cache_path
    tp.result_file_path = result_file
    tp.load_cache_rec = False
    tp.classif_size = len(train_sets)
    tp.us_invked = us_invked

    print('训练模型开始')
    tnow = time.time()
    model = ncf_pp_local(cp)

    model.train(tp)

    print('训练模型结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    print('实验结束,总耗时 %.2f秒,稀疏度=%.1f\n' % ((time.time() - now), spa))
Пример #7
0
def run():

    ser_loc = localload.load(ser_info_path)
    ser_loc_m = localload.load_locmore(ser_info_more_path)
    user_class = localtools.load_classif(loc_class_for_user)
    R = np.loadtxt(origin_path, np.float)

    if os.path.isfile(loc_class_out):
        os.remove(loc_class_out)

    idx = np.where(R < 0)
    R[idx] = 0
    user_mean = []
    for uc in user_class:
        UR = R[uc]
        ser_sum = np.sum(UR, axis=0)
        ser_cot = np.count_nonzero(UR, axis=0)
        uc_ser_mean = np.divide(ser_sum,
                                ser_cot,
                                out=np.zeros_like(ser_sum),
                                where=ser_cot != 0)
        all_mean = np.sum(ser_sum) / np.sum(ser_cot)
        uc_ser_mean[np.where(ser_cot == 0)] = all_mean

        user_mean.append(uc_ser_mean)

    data = []
    names = []
    area = []
    k = 6
    for sid in range(5825):
        sn = ser_loc[sid][1]
        names.append(sn)
        area.append(ser_loc_m[sn][0])
        lc = []
        lc.extend(ser_loc_m[sn][1])
        # 添加ip
        lc.extend(ser_loc[sid][2][:2])

        for um in user_mean:
            lc.append(um[sid])
        data.append(lc)
    data = np.array(data)

    cent, res = simple_km2(data, k, 1)

    print(cent)
    print(res)

    for i in range(k):
        tmp = []
        tmp2 = []
        for id in res[i]:
            if names[id] not in tmp2:
                tmp2.append(names[id])
                tmp.append(area[id])
        print(tmp)
        print(tmp2)
        print()

    write2file(res)
    pass