def hybrid_rec(user_ratings, user, icm_m, sim_skr=20, w_cbf=0.82, w_cf=0.18):
    """
    hybrid recommendations for a user
    Parameters
    ----------
    user_ratings: the rating of the user to be recommended
    user: the user to be recommenended
    icm_m: the ICM matrix
    sim_skr: the shrink sim for CBF
    w_cbf: the weight for CBF
    w_cf: the weight for CF

    Returns
    -------
    List of 5 movies recommended
    """
    totals_cbf = {}  # {item : sum (rating * similarity)}
    totals_cf = {}  # {item : sum (rating * similarity)}
    rankings = {}  # {item : (sim_cbf * w_cbf) + (sim_cf * w_cf)}
    avg_rec = [(5.0, 33173), (5.0, 33475), (5.0, 1076), (5.0, 35300), (5.0, 15743)]

    # generiamo il ranking di cbf
    for other_movie in icm_m:  # scandisco tutti i movie non recensiti dall'user e li confronto con quelli recensiti
        if other_movie not in user_ratings:
            for movie in user_ratings:
                if movie != other_movie:
                    # per ogni movie non recensito dall'user calcolo la similarity con quelli recensiti
                    similarity = item_sim(icm_m, movie, other_movie, skr=sim_skr)
                if similarity != 0:
                    totals_cbf.setdefault(other_movie, 0.0)
                    totals_cbf[other_movie] += user_ratings[movie] * similarity

    # generiamo il ranking di cf
    for other in urm:
        # don't compare me to myself
        if other == user:
            continue
        similarity_urm = adj_cosine_sim(urm, user, other, 6)
        # ignore scores of zero or lower
        if similarity_urm <= 0:
            continue
        for item in urm[other]:
            # only score movies I haven't seen yet
            if item not in urm[user] or urm[user][item] == 0:
                # Similarity * Score
                totals_cf.setdefault(item, 0)
                totals_cf[item] += urm[other][item] * similarity_urm

    # mergiamo i ranking di cbf e cf pesando i valori
    for movie in totals_cbf:
        rankings[movie] = totals_cbf[movie] * w_cbf
        if movie in totals_cf:
            rankings[movie] += totals_cf[movie] * w_cf
    for movie in totals_cf:
        if movie not in rankings:
            rankings[movie] = totals_cf[movie] * w_cf

    # togliamo da ranking i movie troppo popolari
    """
    # se il film meno popolare votato dall'user ha meno di 10 voti metto una soglia nel range dove eliminare
    if user_min_pop[user] < 10:
        for i in range(0, 500):
            if sort_popularity[i][0]in rankings:
                del rankings[sort_popularity[i][0]]
    # altrimenti eliminiamo i film piu' popolari del meno popolare votato dall'utente senza nessuna soglia
    else:
        for i in range(0,index_pop[user_min_pop[user]]):
            if sort_popularity[i][0]in rankings:
                del rankings[sort_popularity[i][0]]
    """
    for i in range(0, index_pop[user_min_pop[user]]):
        if sort_popularity[i][0] in rankings:
            del rankings[sort_popularity[i][0]]

    # compute the ranking for every movie, but the due to the shrink term the value are not prediction
    rankings_final = [(total, item) for item, total in rankings.items()]
    sort_rankings = sorted(rankings_final, key=lambda x: -x[0])[0:5]
    # This should happen when there are less than five similar movie for the movies
    if len(sort_rankings) < 5:
        for elem in avg_rec:
            sort_rankings.append(elem)
    sort_rankings = sort_rankings[0:5]
    string_s = ""
    for rate in range(0, len(sort_rankings)):
        string_s = string_s + " " + str(sort_rankings[rate][1])
    return string_s
示例#2
0
文件: cbf.py 项目: P00L/clear-project
def cbf_recommendations(user_ratings,user, icm_m, sim_skr=20, shrink=10):
    """
    * WARNING:
        # This function is very resources and time consuming if it is done in large batch (e.g in a for loop over
        the whole movies)

    This function computes the recommendations using a CBF (Content Based Filtering) technique.

    Right now the implementation supports only a a fast similarity called item_sim (Doc in similarity.py)

    * NOTE:
        # this does not contain any optimization, it is just a simple and raw computation

    * TODO:
        # support for cosine
        # some optimization are needed

    Parameters
    ----------
    user_ratings: ratings of the user, it contains all the rated movie by an user
    icm_m: item content matrix
    sim_skr: shrink term f the similarity
    shrink: shrink of the function itself

    Returns
    -------
    A pre-formatted string containing the 5 best recommendations (NOTE: this has been done to be less time consuming)

    """
    totals = {}  # dizionario {item: sum (rating * similarity)}
    sim_sums = {}  # dizionario {item: sum (similarity)}
    avg_rec = [(5.0, 33173), (5.0, 33475), (5.0, 1076), (5.0, 35300), (5.0, 15743)]

    for other_movie in icm_m:  # scandisco tutti i movie non recensiti dall'user e li confronto con quelli recensiti
        if other_movie not in user_ratings:
            for movie in user_ratings:
                if movie != other_movie:
                    # per ogni movie non recensito dall'user calcolo la similarity con quelli recensiti
                    similarity = item_sim(icm_m, movie, other_movie, skr=sim_skr)
                if similarity != 0:
                    totals.setdefault(other_movie, 0)
                    totals[other_movie] += user_ratings[movie]*similarity
                    sim_sums.setdefault(other_movie, 0)
                    sim_sums[other_movie] += similarity

    #togliamo da totals i film troppo popolari
    for i in range(0, 1000):
        if sort_popularity[i][0]in totals:
            del totals[sort_popularity[i][0]]

    # compute the ranking for every movie, but the due to the shrink term the value are not prediction
    rankings = [(total/(sim_sums[item] + shrink), item) for item, total in totals.items()]
    sort_rankings = sorted(rankings, key=lambda x: -x[0])[0:5]
    # This should happen when there are less than five similar movie for the movies
    if len(sort_rankings) < 5:
        for elem in avg_rec:
            sort_rankings.append(elem)
    sort_rankings = sort_rankings[0:5]
    string_s = ""
    for rate in range(0, len(sort_rankings)):
        string_s = string_s + " " + str(sort_rankings[rate][1])
    return string_s
示例#3
0
def cbf_recommendations(user_ratings, user, icm_m, knn, urm, sim_skr=20, shrink=10, w_cbf=0.9, w_knn=0.1):
    """
    * WARNING:
        # This function is very resources and time consuming if it is done in large batch (e.g in a for loop over
        the whole movies)

    This function computes the recommendations using a CBF (Content Based Filtering) technique.

    Right now the implementation supports only a a fast similarity called item_sim (Doc in similarity.py)

    * NOTE:
        # this does not contain any optimization, it is just a simple and raw computation

    * TODO:
        # support for cosine
        # some optimization are needed

    Parameters
    ----------
    user_ratings: ratings of the user, it contains all the rated movie by an user
    icm_m: item content matrix
    sim_skr: shrink term f the similarity
    shrink: shrink of the function itself
    knn: it is a

    Returns
    -------
    A pre-formatted string containing the 5 best recommendations (NOTE: this has been done to be less time consuming)

    """
    totals_cbf = {}  # dizionario {item: sum (rating * similarity)}
    sim_sums_cbf = {}  # dizionario {item: sum (similarity)}
    avg_rec = [(5.0, 33173), (5.0, 33475), (5.0, 1076), (5.0, 35300), (5.0, 15743)]
    totals_knn = {}
    sim_sums_knn = {}

    # this is the cbf part of the code
    for other_movie in icm_m:  # scandisco tutti i movie non recensiti dall'user e li confronto con quelli recensiti
        if other_movie not in user_ratings:
            for movie in user_ratings:
                if movie != other_movie:
                    # per ogni movie non recensito dall'user calcolo la similarity con quelli recensiti
                    similarity = item_sim(icm_m, movie, other_movie, skr=sim_skr)
                if similarity != 0:
                    totals_cbf.setdefault(other_movie, 0.0)
                    totals_cbf[other_movie] += user_ratings[movie]*similarity
                    sim_sums_cbf.setdefault(other_movie, 0.0)
                    sim_sums_cbf[other_movie] += similarity

    # this one should be the knn on users
    if user in knn:
        for other_user in knn[user]:
            for movie in urm[other_user]:
                if movie not in urm[user]:
                    totals_knn.setdefault(movie, 0.0)
                    totals_knn[movie] += urm[other_user][movie] * knn[user][other_user]

    rankings = {}
    """
    scores_cbf = {}
    scores_knn = {}

    for movie in totals_cbf:
        scores_cbf[movie] = totals_cbf[movie]/sim_sums_cbf[movie]
    for movie in totals_knn:
        scores_knn[movie] = totals_knn[movie]/sim_sums_knn[movie]
    """

    # this part is supposed to build up the totoal recommendations
    for movie in totals_cbf:
        rankings[movie] = totals_cbf[movie]*w_cbf
        if movie in totals_knn:
            rankings[movie] += totals_knn[movie]*w_knn

    for movie in totals_knn:
        if movie not in rankings:
            rankings[movie] = totals_knn[movie]*w_knn

    # this one are non normalized rankings
    """
    rankings = [(total, item) for item, total in rankings.items()]"""
    """
    if you wanna normalized rankings
    rankings = [(round(total/(sim_sums[item]), 3), item) for item, total in rankings.items()]
    """
    # compute the ranking for every movie, but the due to the shrink term the value are not prediction
    rankings_final = [(total, item) for item, total in rankings.items()]
    sort_rankings = sorted(rankings_final, key=lambda x: -x[0])[0:5]
    # This should happen when there are less than five similar movie for the movies
    if len(sort_rankings) < 5:
        for elem in avg_rec:
            sort_rankings.append(elem)
    sort_rankings = sort_rankings[0:5]
    string_s = ""
    for rate in range(0, len(sort_rankings)):
        string_s = string_s + " " + str(sort_rankings[rate][1])
    return string_s