from model import bmf import pandas as pd import numpy as np logger = get_logger('e_BMF_list') filename = 'j:/amazon/output/[email protected]' dataname = filename.split('/')[-1] #outpath outpath = 'j:/amazon/result/result4/' #0.读取数据 ratings = preprocess.readdata(filename, ',') #1.判断是否有重复元素,如果有,去除重复元素 ratings_d = preprocess.drop_duplicate(ratings) #2. 替换user_id 与 item_id ratings_r, users, items = preprocess.replace_user_and_item(ratings_d) #基本数据描述(包含数据总数目,用户数,物品数) #用户数 m = len(users) n = len(items) logger.info('dataset:' + dataname + ',ratings:' + str(len(ratings_r)) + ',user:'******',item:' + str(n)) #3. 切分数据 trainset, testset = preprocess.split_data(ratings_r, 0.8) trainset.to_csv(outpath + 'trainset' + '_' + dataname, index=None, header=None) testset.to_csv(outpath + 'testset' + '_' + dataname, index=None, header=None) #4. 构建训练输入 #train_matrix = preprocess.create_matrix_by_trainset(trainset,m,n)
from preprocess import preprocess from model import lmf import pandas as pd from utils.logger import get_logger logger = get_logger('e_MF') filename = 'j:/amazon/output/[email protected]' dataname = filename.split('/')[-1] outpath = 'j:/amazon/result/result2/' #0.读取数据 ratings = preprocess.readdata(filename, ',') #1.判断是否有重复元素,如果有,去除重复元素 ratings_d = preprocess.drop_duplicate(ratings) #2. 替换user_id 与 item_id ratings_r = preprocess.replace_user_and_item(ratings_d) #基本数据描述(包含数据总数目,用户数,物品数) #用户数 m = len(set(ratings_r['user_id'])) n = len(set(ratings_r['item_id'])) logger.info('dataset:' + dataname + ',ratings:' + str(len(ratings_r)) + ',user:'******',item:' + str(n)) #3. 切分数据 trainset, testset = preprocess.split_data(ratings_r, 0.8) trainset.to_csv(outpath + '_MF_' + 'trainset_' + dataname, index=None, header=None) testset.to_csv(outpath + '_MF_' + 'testset_' + dataname, index=None,