def mylightfm(trainmx, testmx, f): print('begin light mf model') model = lightfm.LightFM(no_components=f, loss='warp') coo_trainmx = coo_matrix(trainmx) model.fit(coo_trainmx) pred = np.zeros(trainmx.shape) item = np.arange(0, trainmx.shape[1]) for i in range(trainmx.shape[0]): #for j in range(trainmx.shape[1]): pred[i, :] = model.predict(i, item) print('finish light fm ') return pred
def fit_lightfm_model(interactions, post_features=None, user_features=None, epochs=30): model = lightfm.LightFM(loss='warp', learning_rate=0.01, learning_schedule='adagrad', user_alpha=0.0001, item_alpha=0.0001, no_components=30) model.fit(interactions, item_features=post_features, user_features=user_features, num_threads=4, epochs=epochs) return model
def get_model(args_dict, train): os.makedirs(_script_relative(MODELS_FOLDER), exist_ok=True) path = os.path.join(MODELS_FOLDER, hash_params(args_dict)) path = _script_relative(path) print("Model path {}".format(path)) if os.path.exists(path): print("FOUND cached model ") with open(path, "rb") as f: return Unpickler(f).load() tbef = time.time() print("Train start {}".format(tbef)) model = lightfm.LightFM(loss=args_dict['loss']) model.fit(train, epochs=args_dict['epochs']) taft = time.time() print("Train end {}".format(taft, taft - tbef)) with open(path, "wb") as f: print("Saving model") Pickler(f).dump(model) return model
""" DOCSTRING """ import lightfm import lightfm.datasets as datasets import numpy data = datasets.fetch_movielens(min_rating=4.0) print (repr(data['train'])) print (repr(data['test'])) model = lightfm.LightFM(loss='warp') model.fit(data['train'], epochs=30, num_threads=2) def sample_recommendation(model, data, user_ids): n_users, n_items = data['train'].shape for user_id in user_ids: known_positives = data['item_labels'][data['train'].tocsr()[user_id].indices] scores = model.predict(user_id, numpy.arange(n_items)) top_items = data['item_labels'][numpy.argsort(-scores)] print ("User %s" % user_id) print (" Known positives:") for x in known_positives[:3]: print (" %s" % x) print (" Recommended:") for x in top_items[:3]: print (" %s" % x) sample_recommendation(model, data, [3, 25, 450])
import sys import pandas as pd import numpy as np import lightfm import scipy.sparse as sps import scipy.sparse.linalg as splinalg threads = 10 for i in range(1, 14): print("running batch %d" % i) batch = pd.read_csv("batches/batch_%d_train.dat" % i) test_users = pd.read_csv("batches/batch_%d_test.dat" % i) model = lightfm.LightFM(loss='warp', no_components=10, learning_rate=0.05, learning_schedule="adadelta") maxover = batch.groupby('user').item.count().max() topk = 100 def get_ranklists(model, users, items, test): import concurrent.futures executor = concurrent.futures.ThreadPoolExecutor(threads) def predu(i): scores = model.predict(i, items, num_threads=1) return items[np.argsort(scores)[-(topk + maxover):][::-1]] preds = list(executor.map(predu, users)) lists = pd.DataFrame({ 'user':
print(50*'-') U = set([u for (u, _) in T]) I = set([i for (_, i) in T]) dataset = lfm.data.Dataset() dataset.fit(users=U, items=I) dataset.fit_partial(item_features=set([b for (_, f) in F for b in f])) interactions, weights = dataset.build_interactions(T) item_features = dataset.build_item_features(F, normalize=False) user_id_mapping, user_feature_mapping, item_id_mapping, item_feature_mapping = dataset.mapping() model = lfm.LightFM(no_components=3, loss='warp', learning_schedule='adagrad') model.fit(interactions=interactions, sample_weight=weights, item_features=item_features, epochs=10, verbose=True) avisos_a_predecir = np.array(['i1', 'i2', 'i3', 'i4', 'i5']) avisos_a_predecir = np.array(['i4', 'i1', 'i3']) for _ in range(10): print(50*'-') np.random.shuffle(avisos_a_predecir) print(avisos_a_predecir) p = model.predict(user_id_mapping['u5'], [item_id_mapping[a] for a in avisos_a_predecir]) print(-p) print(np.argsort(-p)) print(avisos_a_predecir[np.argsort(-p)])
def predict_hard_users( train: pd.DataFrame, test: pd.DataFrame, genre: pd.DataFrame, education: pd.DataFrame, notices: pd.DataFrame, available_notices: set, applicant_notice: dict, header=None, ): user_feature = genre.merge(education, on="idpostulante", how="left") user_feature.drop(columns=["fechanacimiento"], inplace=True) user_feature_hard_user = user_feature[user_feature.idpostulante.isin( train.idpostulante)] uf = generate_features(user_feature[["sexo", "nombre", "estado"]]) itf = generate_features(notices[[ "nombre_zona", "tipo_de_trabajo", "nivel_laboral", "nombre_area" ]]) dataset1 = Dataset() dataset1.fit( train.idpostulante.unique(), # all the users notices.idaviso.unique(), user_features=uf, # additional user features item_features=itf, # additional item features ) # plugging in the interactions and their weights (interactions, weights) = dataset1.build_interactions([ (x[1], x[0], x[3]) for x in train.values ]) user_feature_list = generate_in_use_features( user_feature_hard_user[["sexo", "nombre", "estado"]].values, ["sexo", "nombre", "estado"], ) user_tuple = list( zip(user_feature_hard_user.idpostulante, user_feature_list)) user_features = dataset1.build_user_features(user_tuple, normalize=False) ( user_id_map, user_feature_map, item_id_map, item_feature_map, ) = dataset1.mapping() inv_item_id_map = {v: k for k, v in item_id_map.items()} # for component in [10, 35, 50, 80, 100, 200]: component = 35 model = lfm.LightFM(no_components=component, loss="warp", random_state=42) model.fit( interactions, # user_features=user_features, # sample_weight=weights, epochs=150, num_threads=8, verbose=True, ) test_precision = precision_at_k( model, interactions, # user_features=user_features, k=10, num_threads=8, ).mean() logger.info( f"Evaluation for LightFM is: {test_precision} with {component} number of component" ) final_predictions = {} for a_user in tqdm(test.idpostulante.unique()): try: notices_by_user = applicant_notice[a_user] except: notices_by_user = set() try: user_x = user_id_map[a_user] except: user_x = 0 n_users, n_items = interactions.shape prediction = np.argsort( model.predict( user_x, np.arange(n_items), # user_features=user_features, ))[::-1] prediction_for_user = [] for pred in prediction: notice = inv_item_id_map[pred] should_add = (notice in available_notices and notice not in notices_by_user) if should_add: prediction_for_user += [notice] if len(prediction_for_user) == 10: break final_predictions[a_user] = prediction_for_user write_dict(final_predictions, "lightfm", header) return ["lightfm"]