Exemplo n.º 1
0
def mylightfm(trainmx, testmx, f):
    print('begin light mf model')
    model = lightfm.LightFM(no_components=f, loss='warp')
    coo_trainmx = coo_matrix(trainmx)
    model.fit(coo_trainmx)
    pred = np.zeros(trainmx.shape)
    item = np.arange(0, trainmx.shape[1])
    for i in range(trainmx.shape[0]):
        #for j in range(trainmx.shape[1]):
        pred[i, :] = model.predict(i, item)
    print('finish light fm ')
    return pred
Exemplo n.º 2
0
def fit_lightfm_model(interactions, post_features=None, user_features=None, epochs=30):

    model = lightfm.LightFM(loss='warp',
                            learning_rate=0.01,
                            learning_schedule='adagrad',
                            user_alpha=0.0001,
                            item_alpha=0.0001,
                            no_components=30)

    model.fit(interactions,
              item_features=post_features,
              user_features=user_features,
              num_threads=4,
              epochs=epochs)

    return model
Exemplo n.º 3
0
def get_model(args_dict, train):
    os.makedirs(_script_relative(MODELS_FOLDER), exist_ok=True)
    path = os.path.join(MODELS_FOLDER, hash_params(args_dict))
    path = _script_relative(path)
    print("Model path {}".format(path))

    if os.path.exists(path):
        print("FOUND cached model ")
        with open(path, "rb") as f:
            return Unpickler(f).load()

    tbef = time.time()
    print("Train start {}".format(tbef))
    model = lightfm.LightFM(loss=args_dict['loss'])
    model.fit(train, epochs=args_dict['epochs'])
    taft = time.time()
    print("Train end {}".format(taft, taft - tbef))

    with open(path, "wb") as f:
        print("Saving model")
        Pickler(f).dump(model)

    return model
Exemplo n.º 4
0
"""
DOCSTRING
"""
import lightfm
import lightfm.datasets as datasets
import numpy

data = datasets.fetch_movielens(min_rating=4.0)

print (repr(data['train']))
print (repr(data['test']))

model = lightfm.LightFM(loss='warp')
model.fit(data['train'], epochs=30, num_threads=2)

def sample_recommendation(model, data, user_ids):
    n_users, n_items = data['train'].shape
    for user_id in user_ids:
        known_positives = data['item_labels'][data['train'].tocsr()[user_id].indices]
        scores = model.predict(user_id, numpy.arange(n_items))
        top_items = data['item_labels'][numpy.argsort(-scores)]
        print ("User %s" % user_id)
        print ("     Known positives:")
        for x in known_positives[:3]:
            print ("        %s" % x)
        print ("     Recommended:")
        for x in top_items[:3]:
            print ("        %s" % x)

sample_recommendation(model, data, [3, 25, 450])
Exemplo n.º 5
0
import sys
import pandas as pd
import numpy as np
import lightfm
import scipy.sparse as sps
import scipy.sparse.linalg as splinalg

threads = 10

for i in range(1, 14):
    print("running batch %d" % i)
    batch = pd.read_csv("batches/batch_%d_train.dat" % i)
    test_users = pd.read_csv("batches/batch_%d_test.dat" % i)

    model = lightfm.LightFM(loss='warp',
                            no_components=10,
                            learning_rate=0.05,
                            learning_schedule="adadelta")
    maxover = batch.groupby('user').item.count().max()
    topk = 100

    def get_ranklists(model, users, items, test):
        import concurrent.futures
        executor = concurrent.futures.ThreadPoolExecutor(threads)

        def predu(i):
            scores = model.predict(i, items, num_threads=1)
            return items[np.argsort(scores)[-(topk + maxover):][::-1]]

        preds = list(executor.map(predu, users))
        lists = pd.DataFrame({
            'user':
Exemplo n.º 6
0
print(50*'-')


U = set([u for (u, _) in T])
I = set([i for (_, i) in T])

dataset = lfm.data.Dataset()

dataset.fit(users=U, items=I)
dataset.fit_partial(item_features=set([b for (_, f) in F for b in f]))

interactions, weights = dataset.build_interactions(T)
item_features = dataset.build_item_features(F, normalize=False)
user_id_mapping, user_feature_mapping, item_id_mapping, item_feature_mapping = dataset.mapping()

model = lfm.LightFM(no_components=3, loss='warp', learning_schedule='adagrad')
model.fit(interactions=interactions, sample_weight=weights, item_features=item_features, epochs=10, verbose=True)

avisos_a_predecir = np.array(['i1', 'i2', 'i3', 'i4', 'i5'])
avisos_a_predecir = np.array(['i4', 'i1', 'i3'])



for _ in range(10):
    print(50*'-')
    np.random.shuffle(avisos_a_predecir)
    print(avisos_a_predecir)
    p = model.predict(user_id_mapping['u5'], [item_id_mapping[a] for a in avisos_a_predecir])
    print(-p)
    print(np.argsort(-p))
    print(avisos_a_predecir[np.argsort(-p)])
Exemplo n.º 7
0
def predict_hard_users(
    train: pd.DataFrame,
    test: pd.DataFrame,
    genre: pd.DataFrame,
    education: pd.DataFrame,
    notices: pd.DataFrame,
    available_notices: set,
    applicant_notice: dict,
    header=None,
):
    user_feature = genre.merge(education, on="idpostulante", how="left")
    user_feature.drop(columns=["fechanacimiento"], inplace=True)
    user_feature_hard_user = user_feature[user_feature.idpostulante.isin(
        train.idpostulante)]

    uf = generate_features(user_feature[["sexo", "nombre", "estado"]])
    itf = generate_features(notices[[
        "nombre_zona", "tipo_de_trabajo", "nivel_laboral", "nombre_area"
    ]])

    dataset1 = Dataset()
    dataset1.fit(
        train.idpostulante.unique(),  # all the users
        notices.idaviso.unique(),
        user_features=uf,  # additional user features
        item_features=itf,  # additional item features
    )
    # plugging in the interactions and their weights
    (interactions, weights) = dataset1.build_interactions([
        (x[1], x[0], x[3]) for x in train.values
    ])

    user_feature_list = generate_in_use_features(
        user_feature_hard_user[["sexo", "nombre", "estado"]].values,
        ["sexo", "nombre", "estado"],
    )
    user_tuple = list(
        zip(user_feature_hard_user.idpostulante, user_feature_list))

    user_features = dataset1.build_user_features(user_tuple, normalize=False)

    (
        user_id_map,
        user_feature_map,
        item_id_map,
        item_feature_map,
    ) = dataset1.mapping()

    inv_item_id_map = {v: k for k, v in item_id_map.items()}

    # for component in [10, 35, 50, 80, 100, 200]:
    component = 35
    model = lfm.LightFM(no_components=component, loss="warp", random_state=42)
    model.fit(
        interactions,
        # user_features=user_features,
        # sample_weight=weights,
        epochs=150,
        num_threads=8,
        verbose=True,
    )

    test_precision = precision_at_k(
        model,
        interactions,
        # user_features=user_features,
        k=10,
        num_threads=8,
    ).mean()
    logger.info(
        f"Evaluation for LightFM is: {test_precision} with {component} number of component"
    )

    final_predictions = {}
    for a_user in tqdm(test.idpostulante.unique()):
        try:
            notices_by_user = applicant_notice[a_user]
        except:
            notices_by_user = set()
        try:
            user_x = user_id_map[a_user]
        except:
            user_x = 0
        n_users, n_items = interactions.shape
        prediction = np.argsort(
            model.predict(
                user_x,
                np.arange(n_items),
                # user_features=user_features,
            ))[::-1]
        prediction_for_user = []
        for pred in prediction:
            notice = inv_item_id_map[pred]
            should_add = (notice in available_notices
                          and notice not in notices_by_user)
            if should_add:
                prediction_for_user += [notice]
            if len(prediction_for_user) == 10:
                break
        final_predictions[a_user] = prediction_for_user

    write_dict(final_predictions, "lightfm", header)
    return ["lightfm"]