if i == k: testUser = KshuffleUserId[i] else: trainUser.extend(KshuffleUserId[i]) data['rnd'] = np.random.random(data.shape[0]) trainInd = (list(map(lambda x: x in trainUser, data['userId'])) | (data['rnd'] < 0.7)) dataTrain = data[trainInd] dataTest = data[np.invert(trainInd)] bob = graphALS(num_factors=5, num_iterations=20, verbose=True) bob.fit(dataTrain[['userId', 'movieId', 'rating']], userContextSparseGraph, itemContextSparseGraph) Rhat = bob.user_vectors.dot(bob.item_vectors.T) R_test = csr_matrix( (dataTest['rating'], (dataTest['userId'], dataTest['movieId'])), shape=(numUser, numItem)) W = R_test.nonzero() rmse = RMSE(R_test, Rhat, W) print(rmse) R = csr_matrix( (dataTrain['rating'], (dataTrain['userId'], dataTrain['movieId'])), shape=(numUser, numItem)) bobALS = ALS(5) bobALS.fit(R) Rhat_ALS = bobALS.U.dot(bobALS.V.T) rmseALS = RMSE(R_test, Rhat_ALS, W) print(rmseALS)
for k in range(K): print("cross-val", k) trainUser = [] testUser = [] for i in range(K): if i == k: testUser = KshuffleUserId[i] else: trainUser.extend(KshuffleUserId[i]) data["rnd"] = np.random.random(data.shape[0]) trainInd = list(map(lambda x: x in trainUser, data["userId"])) | (data["rnd"] < 0.7) dataTrain = data[trainInd] dataTest = data[np.invert(trainInd)] bob = graphALS(num_factors=5, num_iterations=20, verbose=True) bob.fit(dataTrain[["userId", "movieId", "rating"]], userContextSparseGraph, itemContextSparseGraph) Rhat = bob.user_vectors.dot(bob.item_vectors.T) R_test = csr_matrix((dataTest["rating"], (dataTest["userId"], dataTest["movieId"])), shape=(numUser, numItem)) W = R_test.nonzero() rmse = RMSE(R_test, Rhat, W) print(rmse) R = csr_matrix((dataTrain["rating"], (dataTrain["userId"], dataTrain["movieId"])), shape=(numUser, numItem)) bobALS = ALS(5) bobALS.fit(R) Rhat_ALS = bobALS.U.dot(bobALS.V.T) rmseALS = RMSE(R_test, Rhat_ALS, W) print(rmseALS)
#alphas = params['ALS-WR']['params']['alpha'] #epss = params['ALS-WR']['params']['eps'] regs = ["default"] lambdas = [0.8] rank = [20] alphas = [10] epss= [10] current_perf = dict.fromkeys(['model','reg','lambda','rank','alpha','eps','crossval']) for elt in itertools.product(*[regs,lambdas,rank,alphas,epss,[0]]): reg, l, r, alpha, eps, k = elt current_perf.update({'model':'ALS-WR','reg':reg,'lambda':l, 'rank':r,'alpha':alpha,'eps':eps, 'crossval':k}) print(current_perf) bob = ALS(d=r,num_users=numUser,num_items=numItem,lbda=l,seed=0, reg=reg,verbose=True) t0 = time() bob.fitImplicit(data[data['cv']!=k],alpha=alpha,c="log",eps=eps) T = time()-t0 Rhat = bob.U.dot(bob.V.T) R_test = sparseMatrix(data,k,include=True,names=list(data.columns)[:3]) rank = rankMeasure(R_test,Rhat) print(rank) ind = getLine_fromdict(perf,current_perf) perf.loc[ind] = ['ALS-WR',reg,l,r,alpha,eps,k,rank,T] print('-'*50) #============================================================================== # graphALS #==============================================================================
for elt in itertools.product(*[regs, lambdas, rank, alphas, epss, [0]]): reg, l, r, alpha, eps, k = elt current_perf.update({ 'model': 'ALS-WR', 'reg': reg, 'lambda': l, 'rank': r, 'alpha': alpha, 'eps': eps, 'crossval': k }) print(current_perf) bob = ALS(d=r, num_users=numUser, num_items=numItem, lbda=l, seed=0, reg=reg, verbose=True) t0 = time() bob.fitImplicit(data[data['cv'] != k], alpha=alpha, c="log", eps=eps) T = time() - t0 Rhat = bob.U.dot(bob.V.T) R_test = sparseMatrix(data, k, include=True, names=list(data.columns)[:3]) rank = rankMeasure(R_test, Rhat) print(rank) ind = getLine_fromdict(perf, current_perf) perf.loc[ind] = ['ALS-WR', reg, l, r, alpha, eps, k, rank, T]
# ALS-WR #============================================================================== if params['ALS-WR']['learn']=="True": print("Learn ALS-WR") lambdas = params['ALS-WR']['params']['lambda'] rank = params['ALS-WR']['params']['rank'] current_perf = dict.fromkeys(['model','lambda','rank','crossval']) for elt in itertools.product(*[lambdas,rank,range(K)]): l, r, k = elt current_perf['model'] = 'ALS-WR' current_perf['lambda'] = l current_perf['rank'] = r current_perf['crossval'] = k print(current_perf) bob = ALS(d=r,num_users=numUser,num_items=numItem,lbda=l,parallel=True) train = data[data['cv']!=k][['row','col','val']].to_dict(orient='list') test = data[data['cv']==k][['row','col','val']].to_dict(orient='list') t0 = time() bob.fit(train) T = time()-t0 Rhat = bob.U.dot(bob.V.T) R_test = sparse_matrix(test,numUser,numItem) rmse = RMSE(R_test,Rhat) print(rmse) ind = getLine_fromdict(perf,current_perf) perf.loc[ind,['model','rank','lambda','crossval','rmse','runningTime']] = ['ALS-WR',r,l,k,rmse,T] print('-'*50)