train_sparse = sparse.csr_matrix((train[2], (train[0], train[1])), shape=(len(customers), len(products))) print("IO done in %f" % io_time.interval) alpha = 15 with Timer() as cython_als_t: user_vecs, item_vecs = implicit.alternating_least_squares( (train_sparse * alpha).astype('double'), factors=64, regularization=0.1, iterations=10, use_gpu=False) print(f"Time spent in implicit: {cython_als_t.interval}") evaluator = Evaluator(test[0], test[1], test[2], threshold=3.0) baseline_model = BaselinePredictor(train[1], train[2]) baseline_fpr, baseline_tpr, baseline_roc = evaluator.roc( lambda user, item: baseline_model.pred(item)) fpr, tpr, roc = evaluator.roc( lambda user, item: np.sum(user_vecs[user, :] * item_vecs[item, :])) print("AUC: %f" % roc) plt.clf() plt.plot(baseline_fpr, baseline_tpr, label='baseline') plt.plot(fpr, tpr, label='als') plt.xlabel('False positive') plt.ylabel('True positive') plt.legend() plt.show()
grouped_purchased.CustomerID.unique())) # Get our unique customers products = list(grouped_purchased.StockCode.unique() ) # Get our unique products that were purchased quantity = list(grouped_purchased.Quantity) # All of our purchases rows = grouped_purchased.CustomerID.astype( pd.CategoricalDtype(categories=customers, ordered=True)).cat.codes # Get the associated row indices cols = grouped_purchased.StockCode.astype( pd.CategoricalDtype(categories=products, ordered=True)).cat.codes train, test = train_test_split(rows.values, cols.values, quantity) evaluator = Evaluator(test[0], test[1], test[2]) baseline_model = BaselinePredictor(train[1], train[2]) baseline_fpr, baseline_tpr, baseline_roc = evaluator.roc( lambda user, item: baseline_model.pred(item)) train_sparse = sparse.csr_matrix((train[2], (train[0], train[1])), shape=(len(customers), len(products))) alpha = 15 with Timer() as cython_als_t: user_vecs, item_vecs = implicit.alternating_least_squares( (train_sparse * alpha).astype('double'), factors=32, regularization=0.1, iterations=50) print(f"Time spent in implicit: {cython_als_t.interval}") svd_predictor = lambda user, item: np.sum(user_vecs[user, :] * item_vecs[ item, :])