def dump_factors(): numfactors = int(request.args['numfactors'].strip()) model = AlternatingLeastSquares(factors=numfactors, dtype=np.float32, use_gpu=False, iterations=30) model.approximate_recommend = False model.approximate_similar_items = False data = {'userid': [], 'productid': [], 'purchase_count': []} for userid in purchases: for productid in purchases[userid]: data['userid'].append(userid) data['productid'].append(productid) data['purchase_count'].append(purchases[userid][productid]) df = pd.DataFrame(data) df['userid'] = df['userid'].astype("category") df['productid'] = df['productid'].astype("category") userids = list(df['userid'].cat.categories) userids_reverse = dict(zip(userids, list(range(len(userids))))) productids = list(df['productid'].cat.categories) productids_reverse = dict(zip(productids, list(range(len(productids))))) purchases_matrix = coo_matrix((df['purchase_count'].astype(np.float32), (df['productid'].cat.codes.copy(), df['userid'].cat.codes.copy()))) print("Matrix shape: %s, max value: %.2f" % (np.shape(purchases_matrix), np.max(purchases_matrix))) purchases_matrix = bm25_weight(purchases_matrix, K1=2.0, B=0.25) purchases_matrix_T = purchases_matrix.T.tocsr() purchases_matrix = purchases_matrix.tocsr() # to support indexing in recommend/similar_items functions model.fit(purchases_matrix) np.savetxt('item_factors.csv', model.item_factors, delimiter=',') np.savetxt('user_factors.csv', model.user_factors, delimiter=',') with open('item_ids.csv', 'w') as f: for pid in productids_reverse: f.write("%s,%d,%s\n" % (pid, productids_reverse[pid], recommendation.sub(r',', ' ', productnames[pid]))) with open('user_ids.csv', 'w') as f: for uid in userids_reverse: f.write("%s,%d,%s\n" % (uid, userids_reverse[uid], recommendation.sub(r',', ' ', usernames[uid]))) return 'OK\n'
def build_imf(prod_user_matrix, alpha): """ Builds models with the utility matrix and model parameters """ # Build model print("Building IMF model with alpha: {} ...".format(alpha)) model = AlternatingLeastSquares(factors=75, regularization=0.01, iterations=15, use_cg=True) model.approximate_similar_items = False model.fit(confidence_matrix(prod_user_matrix, alpha)) return model