for k in range(K): print("cross-val", k) trainUser = [] testUser = [] for i in range(K): if i == k: testUser = KshuffleUserId[i] else: trainUser.extend(KshuffleUserId[i]) data["rnd"] = np.random.random(data.shape[0]) trainInd = list(map(lambda x: x in trainUser, data["userId"])) | (data["rnd"] < 0.7) dataTrain = data[trainInd] dataTest = data[np.invert(trainInd)] bob = graphALS(num_factors=5, num_iterations=20, verbose=True) bob.fit(dataTrain[["userId", "movieId", "rating"]], userContextSparseGraph, itemContextSparseGraph) Rhat = bob.user_vectors.dot(bob.item_vectors.T) R_test = csr_matrix((dataTest["rating"], (dataTest["userId"], dataTest["movieId"])), shape=(numUser, numItem)) W = R_test.nonzero() rmse = RMSE(R_test, Rhat, W) print(rmse) R = csr_matrix((dataTrain["rating"], (dataTrain["userId"], dataTrain["movieId"])), shape=(numUser, numItem)) bobALS = ALS(5) bobALS.fit(R) Rhat_ALS = bobALS.U.dot(bobALS.V.T) rmseALS = RMSE(R_test, Rhat_ALS, W) print(rmseALS)
if i == k: testUser = KshuffleUserId[i] else: trainUser.extend(KshuffleUserId[i]) data['rnd'] = np.random.random(data.shape[0]) trainInd = (list(map(lambda x: x in trainUser, data['userId'])) | (data['rnd'] < 0.7)) dataTrain = data[trainInd] dataTest = data[np.invert(trainInd)] bob = graphALS(num_factors=5, num_iterations=20, verbose=True) bob.fit(dataTrain[['userId', 'movieId', 'rating']], userContextSparseGraph, itemContextSparseGraph) Rhat = bob.user_vectors.dot(bob.item_vectors.T) R_test = csr_matrix( (dataTest['rating'], (dataTest['userId'], dataTest['movieId'])), shape=(numUser, numItem)) W = R_test.nonzero() rmse = RMSE(R_test, Rhat, W) print(rmse) R = csr_matrix( (dataTrain['rating'], (dataTrain['userId'], dataTrain['movieId'])), shape=(numUser, numItem)) bobALS = ALS(5) bobALS.fit(R) Rhat_ALS = bobALS.U.dot(bobALS.V.T) rmseALS = RMSE(R_test, Rhat_ALS, W) print(rmseALS)
lambdas = params['ALS-WR']['params']['lambda'] rank = params['ALS-WR']['params']['rank'] current_perf = dict.fromkeys(['model','lambda','rank','crossval']) for elt in itertools.product(*[lambdas,rank,range(K)]): l, r, k = elt current_perf['model'] = 'ALS-WR' current_perf['lambda'] = l current_perf['rank'] = r current_perf['crossval'] = k print(current_perf) bob = ALS(d=r,num_users=numUser,num_items=numItem,lbda=l,parallel=True) train = data[data['cv']!=k][['row','col','val']].to_dict(orient='list') test = data[data['cv']==k][['row','col','val']].to_dict(orient='list') t0 = time() bob.fit(train) T = time()-t0 Rhat = bob.U.dot(bob.V.T) R_test = sparse_matrix(test,numUser,numItem) rmse = RMSE(R_test,Rhat) print(rmse) ind = getLine_fromdict(perf,current_perf) perf.loc[ind,['model','rank','lambda','crossval','rmse','runningTime']] = ['ALS-WR',r,l,k,rmse,T] print('-'*50) #============================================================================== # bagging ALS-WR #============================================================================== if params['bagging ALS-WR']['learn']=="True":