Пример #1
0
    * {userId: {movieId: rating}}
    * {movieId: {userId: rating}}
"""
urm = {}
movie_ratings = {}
with open('resources/train.csv', 'r') as urm_raw:
    reader = csv.reader(urm_raw)
    # create a nested dict; {userId: {movieId: rating}}
    for row in reader:
        if row[0] != 'userId':
            urm.setdefault(int(row[0]), {}).setdefault(int(row[1]))
            urm[int(row[0])][int(row[1])] = float(row[2])
            movie_ratings.setdefault(int(row[1]), {}).setdefault(int(row[0]))
            movie_ratings[int(row[1])][int(row[0])] = float(row[2])

print(top_n_match(movie_ratings, 1, skr=10, similarity=cosine_sim))
time = datetime.now()

movie_knn = {}
user_knn = {}
"""
with open('resources/test.csv') as test_raw:
    reader = csv.reader(test_raw)
    # create a nested dict for test user
    for row in reader:
        if row[0] != 'userId':
            user_knn[int(row[0])] = top_n_match(urm, int(row[0]), skr=10, n=20, similarity=cosine_sim)
            print('fatto user: ' + row[0])
"""
for movie in movie_ratings:
    movie_knn.setdefault(movie, {})
Пример #2
0
icm = {}
with open('resources/icm.csv', 'r') as urm_raw:
    reader = csv.reader(urm_raw)
    # create a nested dict; {userId: {movieId: rating}}
    for row in reader:
        if row[0] != 'itemId':
            icm.setdefault(int(row[0]), {}).setdefault(int(row[1]))
            icm[int(row[0])][int(row[1])] = 1
print('computing right now')
time = datetime.datetime.now()
print(datetime.datetime.now() - time)

knn = {}
time = datetime.datetime.now()
"""
for movie in sorted(icm):
    knn[movie] = top_n_match(icm, movie, skr=5, n=25, similarity=item_sim)
    print('fatto movie: ' + str(movie))

print(len(knn))
"""
with open('resources/knn_item_movie_25.csv', 'w') as knn_raw:
    fieldnames = ['itemId', 'neighborId', 'similarity']
    w = csv.DictWriter(knn_raw, fieldnames=fieldnames)
    w.writeheader()
    for movie in sorted(icm):
        knn[movie] = top_n_match(icm, movie, skr=5, n=25, similarity=item_sim)
        print('fatto movie: ' + str(movie))
        for other in knn[movie]:
            knn_raw.write(str(movie) + ',' + str(other) + ',' + str(knn[movie][other]) + '\n')
print(datetime.datetime.now() - time)
Пример #3
0
urm = {}
with open('resources/train.csv', 'r') as urm_raw:
    reader = csv.reader(urm_raw)
    # create a nested dict; {userId: {movieId: rating}}
    for row in reader:
        if row[0] != 'userId':
            urm.setdefault(int(row[1]), {}).setdefault(int(row[0]))
            urm[int(row[1])][int(row[0])] = float(row[2]) + item_bias[int(row[1])] + user_bias[int(row[0])]
print('computing right now')
time = datetime.datetime.now()
print(datetime.datetime.now() - time)

knn = {}
time = datetime.datetime.now()
"""
for movie in sorted(icm):
    knn[movie] = top_n_match(icm, movie, skr=5, n=25, similarity=item_sim)
    print('fatto movie: ' + str(movie))

print(len(knn))
"""
with open('resources/knn_movie_urm_25.csv', 'w') as knn_raw:
    fieldnames = ['itemId', 'neighborId', 'similarity']
    w = csv.DictWriter(knn_raw, fieldnames=fieldnames)
    w.writeheader()
    for movie in sorted(urm):
        knn[movie] = top_n_match(urm, movie, skr=5, n=25, similarity=cosine_sim)
        print('fatto movie: ' + str(movie))
        for other in knn[movie]:
            knn_raw.write(str(movie) + ',' + str(other) + ',' + str(knn[movie][other]) + '\n')
print(datetime.datetime.now() - time)
Пример #4
0
        rec = sorted(rankings, key=lambda x: -x[0])[0:5]

    return rec


urm = {}
with open('resources/train.csv', 'r') as urm_raw:
    reader = csv.reader(urm_raw)
    # create a nested dict; {userId: {movieId: rating}}
    for row in reader:
        if row[0] != 'userId':
            urm.setdefault(int(row[0]), {}).setdefault(int(row[1]))
            urm[int(row[0])][int(row[1])] = float(row[2])
print('computing right now')
time = datetime.datetime.now()
print(top_n_match(urm, 4, 7, similarity=similarity_pearson))
print(datetime.datetime.now() - time)

test = {}
time = datetime.datetime.now()
with open('resources/test.csv') as test_raw:
    reader = csv.reader(test_raw)
    # create a nested dict for test user
    for row in reader:
        if row[0] != 'userId':
            test[int(row[0])] = top_n_match(urm, int(row[0]), skr=10, similarity=cosine_sim)
            print('fatto user: ' + row[0])
print(datetime.datetime.now() - time)
print(len(test))

"""