示例#1
0
def main(train_file, user_item_side_information_file, hierarchy_file,
         test_file):
    A = tsv_to_matrix(train_file)
    B = tsv_to_matrix(user_item_side_information_file)
    hierarchy = json.loads(open(hierarchy_file).read())

    W = slim_train(A)

    ## LDA
    #lda = LDAHierarquical(B, hierarchy, topics=20)
    #recommendations_lda = slim_lda_recommender(A, W, lda)
    #compute_precision(recommendations_lda, test_file)
    ###

    recommendations_slim = slim_recommender(A, W)
    ## HSLIM
    from hslim import handle_user_bias, hierarchy_factory, normalize_wline, generate_subitem_hierarchy
    hierarchy = hierarchy_factory('data/hierarchy.json')
    K = slim_train(handle_user_bias(B))
    Wline = generate_subitem_hierarchy(K, W, hierarchy)
    WlineNorm = normalize_wline(Wline)
    recommendations_other = slim_recommender(A, WlineNorm)
    ###

    kendall_tau_values = []
    differences_values = []

    for u in recommendations_slim.iterkeys():
        ranking_slim = recommendations_slim[u][:RANKING_UNTIL]
        ranking_other = recommendations_other[u][:RANKING_UNTIL]

        kendall_tau_values.append(kendalltau(ranking_slim, ranking_other))
        differences_values.append(RANKING_UNTIL -
                                  len(set(ranking_slim) & set(ranking_other)))

    # Differences
    plt.hist(differences_values)
    plt.xlabel('Size of difference')
    plt.ylabel('Amount of rankings')
    plt.title('Differences (novelty) between rankings')

    # Ranking comparison
    show_matplot_fig()
    plt.figure()
    plt.hist([i[0] for i in kendall_tau_values])
    plt.xlabel('KendallTau Distance SLIM/SLIM LDA')
    plt.ylabel('Number of occurrences')
    plt.title('Comparison between rankings')
    show_matplot_fig()
示例#2
0
def main(train_file, user_item_side_information_file, hierarchy_file,
         test_file):
    A = tsv_to_matrix(train_file)
    B = tsv_to_matrix(user_item_side_information_file)
    hierarchy = json.loads(open(hierarchy_file).read())

    lda = LDAHierarquical(B, hierarchy, topics=15)

    #####REMOVE_IT
    def important_topics(x, topics):
        if not x:
            return x
        transf = [(i, j) for i, j in enumerate(x)]
        transf = sorted(transf, cmp=lambda x, y: cmp(x[1], y[1]))
        return [i[0] for i in transf[:topics]]

    topics = 3

    coincidencias = []
    for user in range(1, 101):
        # Topicos do usuario 10
        user_topics = important_topics(lda.model['users'][user], topics)

        # Topicos das cidades de teste do usuario 10
        T = tsv_to_matrix(test_file)
        cities = T[user].nonzero()[0]

        cities_topics = [
            important_topics(lda.model['cities'].get(city, []), topics)
            for city in cities
        ]

        total = 0
        topics_compared = 0
        coinc = 0
        for city_topic in cities_topics:
            if city_topic:
                coinc += len(set(user_topics) & set(city_topic))
                topics_compared += len(user_topics)
                total += 1
            else:
                pass

        if total:
            perc = (coinc / float(topics_compared))
        else:
            perc = -1

        coincidencias.append([coinc, topics_compared, perc])

    aa = open('/tmp/coincidencias.json', 'w')
    aa.write(json.dumps(coincidencias))
    aa.close()
    #####

    W = slim_train(A)

    recommendations = slim_lda_recommender(A, W, lda)

    compute_precision(recommendations, test_file)
示例#3
0
文件: ranking.py 项目: ruhan/toyslim
def main(train_file, user_item_side_information_file, hierarchy_file, test_file):
    A = tsv_to_matrix(train_file)
    B = tsv_to_matrix(user_item_side_information_file)
    hierarchy = json.loads(open(hierarchy_file).read())

    W = slim_train(A)

    ## LDA
    #lda = LDAHierarquical(B, hierarchy, topics=20)
    #recommendations_lda = slim_lda_recommender(A, W, lda)
    #compute_precision(recommendations_lda, test_file)
    ###

    recommendations_slim = slim_recommender(A, W)
    ## HSLIM
    from hslim import handle_user_bias, hierarchy_factory, normalize_wline, generate_subitem_hierarchy
    hierarchy = hierarchy_factory('data/hierarchy.json')
    K = slim_train(handle_user_bias(B))
    Wline = generate_subitem_hierarchy(K, W, hierarchy)
    WlineNorm = normalize_wline(Wline)
    recommendations_other = slim_recommender(A, WlineNorm)
    ###

    kendall_tau_values = []
    differences_values = []

    for u in recommendations_slim.iterkeys():
        ranking_slim = recommendations_slim[u][:RANKING_UNTIL]
        ranking_other = recommendations_other[u][:RANKING_UNTIL]

        kendall_tau_values.append(kendalltau(ranking_slim, ranking_other))
        differences_values.append(RANKING_UNTIL-len(set(ranking_slim) & set(ranking_other)))

    # Differences
    plt.hist(differences_values)
    plt.xlabel('Size of difference')
    plt.ylabel('Amount of rankings')
    plt.title('Differences (novelty) between rankings')

    # Ranking comparison
    show_matplot_fig()
    plt.figure()
    plt.hist([ i[0] for i in kendall_tau_values ])
    plt.xlabel('KendallTau Distance SLIM/SLIM LDA')
    plt.ylabel('Number of occurrences')
    plt.title('Comparison between rankings')
    show_matplot_fig()
示例#4
0
def main(train_file, test_file):
    A = tsv_to_matrix(train_file)

    W = slim_train(A)

    recommendations = slim_recommender(A, W)

    return compute_precision_as_an_oracle(recommendations, test_file)
示例#5
0
def main(train_file, user_item_side_information_file, hierarchy_file, test_file):
    A = tsv_to_matrix(train_file)
    B = tsv_to_matrix(user_item_side_information_file)
    hierarchy = json.loads(open(hierarchy_file).read())

    lda = LDAHierarquical(B, hierarchy, topics=15)

    #####REMOVE_IT
    def important_topics(x, topics):
        if not x:
            return x
        transf = [ (i, j) for i, j in enumerate(x) ]
        transf = sorted(transf, cmp=lambda x, y: cmp(x[1], y[1]))
        return [ i[0] for i in transf[:topics] ]

    topics = 3

    coincidencias = []
    for user in range(1, 101):
        # Topicos do usuario 10
        user_topics = important_topics(lda.model['users'][user], topics)

        # Topicos das cidades de teste do usuario 10
        T = tsv_to_matrix(test_file)
        cities = T[user].nonzero()[0]

        cities_topics = [ important_topics(lda.model['cities'].get(city, []), topics) for city in cities ]


        total = 0
        topics_compared = 0
        coinc = 0
        for city_topic in cities_topics:
            if city_topic:
                coinc += len(set(user_topics) & set(city_topic))
                topics_compared += len(user_topics)
                total += 1
            else:
                pass

        if total:
            perc = (coinc/float(topics_compared))
        else:
            perc = -1

        coincidencias.append([coinc, topics_compared, perc])

    aa = open('/tmp/coincidencias.json', 'w')
    aa.write(json.dumps(coincidencias))
    aa.close()
    #####

    W = slim_train(A)

    recommendations = slim_lda_recommender(A, W, lda)

    compute_precision(recommendations, test_file)
示例#6
0
def main(train_file, user_sideinformation_file, hierarchy_file, test_file):
    A = tsv_to_matrix(train_file)
    B = tsv_to_matrix(user_sideinformation_file, A.shape[0], A.shape[1])
    hierarchy = hierarchy_factory(hierarchy_file)

    # Learning using SLIM
    # We handle user bias only in B because in B we have explicit evaluations
    K = slim_train(handle_user_bias(B))
    W = slim_train(A)

    Wline = generate_subitem_hierarchy(K, W, hierarchy)
    WlineNorm = normalize_wline(Wline)

    #recommendations = slim_recommender(A, W + 0.2 * WlineNorm)
    import pdb;pdb.set_trace()
    recommendations = slim_recommender(A, WlineNorm)

    # See if the predictor is just of not
    #user_cities = np.array([ map(hierarchy, B[i].nonzero()[0].tolist()) for i in range(B.shape[0]) ])
    #G = tsv_to_matrix(test_file)
    #print 'TEM QUE DAR VAZIO: ', set(G[1].nonzero()[0]) & set(user_cities[1])
    ### ---- FIM REMOVAME

    compute_precision(recommendations, test_file)