def model(): '''computes p@k and map@k evaluation metrics and saves model''' sparse_item_user = load_npz( "/Users/maxmaiberger/Documents/board-game-recommender/import/Data/test_data_saved/sparse_item_user.npz" ) train, test = train_test_split(sparse_item_user, train_percentage=0.8) model = implicit.als.AlternatingLeastSquares(factors=100, regularization=0.1, iterations=20, calculate_training_loss=False) model.fit(train) with open( '/Users/maxmaiberger/Documents/board-game-recommender/import/Data/test_data_saved/model.sav', 'wb') as pickle_out: pickle.dump(model, pickle_out) p_at_k = precision_at_k(model, train_user_items=train, test_user_items=test, K=10) m_at_k = mean_average_precision_at_k(model, train, test, K=10) print('precision at k:', p_at_k) print('mean average precision at k:', m_at_k) return p_at_k, m_at_k
def model(sparse_user_item_file_path='files/sparse_user_item.npz'): """Computes p@k and map@k evaluation mettrics and saves model. Args: sparse_user_item_file_path (str): file location for a scipy.sparse.csr_matrix sparse user * item matrix Returns: p_at_k (float): precision @ k recommendations, with k=10 m_at_k (float): mean average precision @ k recommendations, with k=10 """ sparse_user_item = load_npz(sparse_user_item_file_path) train, test = train_test_split(sparse_user_item, train_percentage=0.8) model = implicit.als.AlternatingLeastSquares(factors=100, regularization=0.1, iterations=100, calculate_training_loss=False) model.fit(train) with open('files/model.sav', 'wb') as pickle_out: pickle.dump(model, pickle_out) p_at_k = precision_at_k(model, train_user_items=train, test_user_items=test, K=10) map_at_k = mean_average_precision_at_k(model, train, test, K=10) return p_at_k, map_at_k
def train_als(trn,tst): model.fit(trn,show_progress=True) MAP = mean_average_precision_at_k(model, trn, tst, K=10, show_progress=True, num_threads=1) NDCG = ndcg_at_k(model, trn, tst, K=10, show_progress=True, num_threads=1) print("MAP is %.4f and NDCG is %.4f: " %(MAP,NDCG)) return model
def evaloutput(self, K=10): with open('model_NR.sav', 'rb') as pickle_in: model = pickle.load(pickle_in) sparse_item_user = load_npz("sparse_user_item_NR.npz") train, test = train_test_split(sparse_item_user, train_percentage=0.8) #p_at_k = precision_at_k(model, K, train_user_items=train, test_user_items=test) print("test", test.shape) print("train", train.shape) p_at_k = precision_at_k(model, train, test, K) m_at_k = mean_average_precision_at_k(model, train, test, K) return p_at_k, m_at_k
def model(): '''computes p@k and map@k evaluation mettrics and saves model''' sparse_item_user = load_npz("sparse_item_user.npz") train, test = train_test_split(sparse_item_user, train_percentage=0.8) model = implicit.als.AlternatingLeastSquares(factors=100, regularization=0.1, iterations=20, calculate_training_loss=False) model.fit(train) with open('model.sav', 'wb') as pickle_out: pickle.dump(model, pickle_out) p_at_k = precision_at_k(model, train_user_items=train, test_user_items=test, K=10) m_at_k = mean_average_precision_at_k(model, train, test, K=10) return p_at_k, m_at_k
def learningCurve(model, train, test, epochs, outFile=None, k=5, showProgress=True, numThreads=12): # if not userIndex: # userIndex = range(train.shape[0]) prevEpoch = 0 pAtK = [] MAPatK = [] NDCGatK = [] AUCatK = [] headers = ["epochs", f"p@{k}", f"MAP@{k}", f"NDCG@{k}", f"AUC@{k}"] printLog(headers, header=True, outFile=outFile) for epoch in epochs: model.iterations = epoch - prevEpoch if not hasattr(model, "user_vectors"): model.fit(train, show_progress=showProgress) else: model.fit_partial(train, show_progress=showProgress) pAtK.append(precision_at_k(model, train.T.tocsr(), test.T.tocsr(), K=k, show_progress=showProgress, num_threads=numThreads)) MAPatK.append(mean_average_precision_at_k(model, train.T.tocsr(), test.T.tocsr(), K=k, show_progress=showProgress, num_threads=numThreads)) NDCGatK.append(ndcg_at_k(model, train.T.tocsr(), test.T.tocsr(), K=k, show_progress=showProgress, num_threads=numThreads)) AUCatK.append(AUC_at_k(model, train.T.tocsr(), test.T.tocsr(), K=k, show_progress=showProgress, num_threads=numThreads)) row = [epoch, pAtK[-1], MAPatK[-1], NDCGatK[-1], AUCatK[-1]] printLog(row, outFile=outFile) prevEpoch = epoch return model, pAtK, MAPatK, NDCGatK, AUCatK
trainTscr = train.T.tocsr() testTscr = test.T.tocsr() k = args.k print(f"Computing p@{k} ...", flush=True) t0 = time() pAtK = precision_at_k(model, trainTscr, testTscr, K=k, show_progress=args.progressBar, num_threads=args.numThreads) print(f"Δt: {time() - t0:5.1f}s") print(f"Computing MAP@{k} ...", flush=True) t0 = time() MAPatK = mean_average_precision_at_k(model, trainTscr, testTscr, K=k, show_progress=args.progressBar, num_threads=args.numThreads) print(f"Δt: {time() - t0:5.1f}s") print(f"Computing NDCG@{k} ...", flush=True) t0 = time() NDCGatK = ndcg_at_k(model, trainTscr, testTscr, K=k, show_progress=args.progressBar, num_threads=args.numThreads) AUCatK = AUC_at_k(model, trainTscr, testTscr, K=k, show_progress=args.progressBar, num_threads=args.numThreads) print(f"Δt: {time() - t0:5.1f}s") print(f"p@{k}: {pAtK:6.4f}, MAP@{k}: {MAPatK:6.4f}" f"NDCG@{k}: {NDCGatK:6.4f}, AUC@{k}: {AUCatK:6.4f}", flush=True)
def run(modelName, datasetName, factorCt, k, λ, α, maxIters, showProgress, useGPU, threadCt): if modelName == 'als': model = getModel(modelName, volubility=2, params={'factors': factorCt, 'regularization': λ, 'iterations': maxIters, 'use_gpu': useGPU}) else: model = getModel(modelName, volubility=2, params={'factors': factorCt, 'regularization': λ, 'alpha': α, 'iterations': maxIters, 'use_gpu': useGPU}) artists, users, plays = fetchDataset(datasetName, volubility=2) print(artists.shape, users.shape, plays.shape, flush=True) if issubclass(model.__class__, AlternatingLeastSquares): # lets weight these models by bm25weight. print("weighting matrix by bm25_weight") plays = bm25_weight(plays, K1=100, B=0.8) # also disable building approximate recommend index model.approximate_recommend = False # print(asctime(localtime())) # t0 = time() plays = plays.tocsr() # print(f"Δt: {time() - t0:5.1f}s") train, test = train_test_split(plays, train_percentage=0.8) print("Training model") print(asctime(localtime()), flush=True) t0 = time() model.fit(train, show_progress=showProgress) print(f"Δt: {time() - t0:5.1f}s", flush=True) trainTscr = train.T.tocsr() testTscr = test.T.tocsr() print(f"Computing p@{k} ...", flush=True) t0 = time() pAtK = precision_at_k(model, trainTscr, testTscr, K=k, show_progress=showProgress, num_threads=threadCt) ex.log_scalar(f"p@{k}", pAtK) print(f"Δt: {time() - t0:5.1f}s") print(f"Computing MAP@{k} ...", flush=True) t0 = time() MAPatK = mean_average_precision_at_k(model, trainTscr, testTscr, K=k, show_progress=showProgress, num_threads=threadCt) ex.log_scalar(f"MAP@{k}", MAPatK) print(f"Δt: {time() - t0:5.1f}s") print(f"Computing NDCG@{k} ...", flush=True) t0 = time() NDCGatK = ndcg_at_k(model, trainTscr, testTscr, K=k, show_progress=showProgress, num_threads=threadCt) ex.log_scalar(f"NDCG@{k}", NDCGatK) AUCatK = AUC_at_k(model, trainTscr, testTscr, K=k, show_progress=showProgress, num_threads=threadCt) ex.log_scalar(f"AUC@{k}", AUCatK) print(f"Δt: {time() - t0:5.1f}s") print(f"p@{k}: {pAtK:6.4f}, MAP@{k}: {MAPatK:6.4f}" f"NDCG@{k}: {NDCGatK:6.4f}, AUC@{k}: {AUCatK:6.4f}", flush=True)
model = get_model(model_name) # if we're training an ALS based model, weight input for last.fm # by bm25 if issubclass(model.__class__, AlternatingLeastSquares): if USE_BUILTIN_DATA: bm25_play = bm25_weight(plays, K1=100, B=0.8) train_plays, test_plays = train_test_split(bm25_play) # lets weight these models by bm25weight. logging.debug("weighting matrix by bm25_weight") # also disable building approximate recommend index # model.approximate_similar_items = False else: #perform operation to approximate confidence intervals #paper doesn't specify an alpha value, so just guess alpha=1 alpha = 1 train_plays.data = 1 + np.log(alpha * train_plays.data) test_plays.data = 1 + np.log(alpha * test_plays.data) elif USE_BUILTIN_DATA: train_plays, test_plays = train_test_split(plays) model.fit(train_plays) MAPk = mean_average_precision_at_k(model, train_plays.transpose(), test_plays.transpose(), K=5, num_threads=0) MAPk_scores.append(MAPk) print("MAP for " + str(model_name) + " is: " + str(MAPk))