Python load_pickle примеры, data.load_pickle Python примеры использования

Пример #1

0

Показать файл

def main():
    # Load data and parse it:
    # mymovies = data.load_dat("movies.dat")
    # myratings = data.load_dat("ratings.dat")
    # Dump the parsed data to pickles into the file system:
    # data.dump_pickle(mymovies, generate_file_name("movies", "pkl"))
    # data.dump_pickle(myratings, generate_file_name("ratings", "pkl"))
    # Load the pickles (much faster than loading and parsing again the raw data):
    movies_pkl = data.load_pickle(MOVIE_PICKLE_LOCATION)
    ratings_pkl = data.load_pickle(RATINGS_PICKLE_LOCATION)

    # Question 1:
    print "Question 1: simple product association 1 and 1064 = ", calculate_simple_association(
        1, 1064)
    # Question 2:
    print "Question 2: advanced product association 1 and 1064 = ", calculate_advanced_association(
        1, 1064)
    # Question 4:
    print "Question 4: simple product association 1 and 2858 =", calculate_simple_association(
        1, 2858)
    # Question 5:
    print "Question 5: title and genre (1, 1064, 2858)."
    for id in [1, 1064, 2858]:
        title, genre = utils.get_title_genre(id, movies_pkl)
        print "\t-", id, "=", title, "::", genre
    # Question 7:
    print "Question 7: advanced product association 1 and 2858 = ", calculate_advanced_association(
        1, 2858)
    # Question 9:
    print "Question 9: top 10 most rated movies; provide ID, number of users who rated and title."
    top10_movies, top10_ratings, top10_movieids = topN_most_rated_movies(10)
    for i in range(len(top10_movies)):
        print "\tID:", top10_movieids[i], ":: Num. users:", top10_ratings[
            i], ":: Title:", top10_movies[i]
    # Question 10:
    print "Question 10: top 5 movies with highest simple association with movie 3941; provide ID, value and title."
    topn = topN_movies_simple_association(3941, 5)
    for item in topn:
        print "\t", item
    # Question 11:
    print "Question 11: top 5 movies with highest advanced association with movie 3941; provide ID, value and title."
    topn = topN_movies_advanced_association(3941, 5)
    for item in topn:
        print "\t", item
    # Question 14:
    print "Question 14: top 10 most rated movies with at least 4 stars; provide ID, number of users who rated and title."
    top10_movies, top10_ratings, top10_movieids = topN_most_rated_movies(10, 4)
    for i in range(len(top10_movies)):
        print "\tID:", top10_movieids[i], ":: Num. users:", top10_ratings[
            i], ":: Title:", top10_movies[i]

Пример #2

0

Показать файл

def calculate_advanced_association(movieX, movieY, ratings=None):
    """
    Calculates the advanced association value for movieX with respect movieY.
    :param movieX: ID of movie X
    :param movieY: ID of movie Y
    :param ratings: collection of all ratings. If available at calling time, then it can be used, otherwise it will be
    locally retrieved.
    :return: the value computed by the advanced association
    """
    # First get the data (preferably by parameter otherwise from pickles):
    if ratings is None:
        ratings = data.load_pickle(
            RATINGS_PICKLE_LOCATION
        )  # Movies data-set not needed, just ratings.
    X = globals.AMOUNT_RATED_X
    if X is None:
        X = utils.how_many_Z(movieX, ratings)
    Y = utils.how_many_Z(movieY, ratings)
    XY = utils.how_many_X_and_Y(movieX, movieY, ratings)
    notX = len(ratings) - X
    notXY = Y - XY
    try:
        value = (float(XY) / X) / (float(notXY) / notX)
        return value
    except ZeroDivisionError, err:
        pass
        # print "[ERROR] ", err
        # print "X = ", X
        # print "XY = ", XY
        # print "notXY = ", notXY
        # print "notX = ", notX
        return 0.0

Пример #3

0

Показать файл

def topN_movies_advanced_association(movieX_ID, N=10):
    """
    Retrieves a list of movie IDs with the highest advanced association value with respect movieX.
    In case of a tie, the movie with the higher ID is ranked before the movie with lower ID.
    :param movieX_ID: ID of movie X
    :param N: number of movies to put in the returned list (topN)
    :return: a list of tuples with movie ID, association value and title
    """
    # First get the data (preferably from pickles):
    movies = data.load_pickle(
        MOVIE_PICKLE_LOCATION)  # Ratings data-set is not needed in this case.
    ratings = data.load_pickle(RATINGS_PICKLE_LOCATION)
    # To SPEED UP the execution of the algorithm and avoid repetitive jobs/tasks, we will obtain/retrieve now:
    # - How many times movie X was rated
    # - A collection of all ratings indexed by user
    # Normally you would obtain this info in another more appropriate (inner) methods, but since we are now iterating
    # over all movies, there are some variables or data that is common to every iteration, thus we obtain it outside the
    # loop. These are global variables that can be consulted from other modules or Python files.
    globals.AMOUNT_RATED_X = utils.how_many_Z(movieX_ID, ratings)
    globals.RATINGS_BY_USER = utils.extract_ratings_by_user(ratings)

    # Now, we will iterate over all movies to calculate their respective simple association value given the movieX ID.
    aa_values = [
    ]  # A list of tuples with the following form: "(movieID, association value)"

    for i, movieY_ID in enumerate(movies.keys()):  # movies.keys()[:100]
        aa_values.append(
            (int(movieY_ID),
             calculate_advanced_association(movieX_ID, movieY_ID, ratings)))
        # print "(i=", i, ")Appended movie: ", movieY_ID
    mysorted = sorted(
        aa_values, key=operator.itemgetter(1, 0),
        reverse=True)  # tuples sorted from BIG to SMALL association value
    mysorted = mysorted[
        1:N +
        1]  # we are interested just in the top N tuples, except the query movie itself
    topN = []
    for elem in mysorted:
        topN.append((elem[0], elem[1], movies[str(elem[0])]['title']))
    return topN

Пример #4

0

Показать файл

Файл: main.py Проект: dbgithub/recommender-systems

def test():
    """
    This function is used as a test-bed.
    Just for TESTING purposes. It shouldn't be used for production code.
    :return:
    """
    print "HelloWorldTEST!"

    # -------------------------------
    # Testing library against Sugestio API:

    # client = sugestio.Client(ACCOUNT, SECRET)
    # status, content = client.get_recommendations(1, 5)
    # if status == 200:
    #     print("Title\tScore")
    #     for recommendation in content:
    #         print(recommendation.item.title + "\t" + str(recommendation.score))
    # else:
    #     print("server response code:", status)

    # -------------------------------
    # Testing own implemented methods:

    # Data:
    movies_pkl = data.load_pickle(MOVIE_PICKLE_LOCATION)
    ratings_pkl = data.load_pickle(RATINGS_PICKLE_LOCATION)
    # Testing methods:
    submit_metadata_single_movie(83829, movies_pkl['83829'])
    submit_rating_single_movie(8, ratings_pkl[3500])
    update_rating_single_movie(8, ratings_pkl[200])
    s, recommendations = topN_recommendations_user(1, N=5)
    for i, rec in enumerate(recommendations):
        print "\t\t Recommendation#", i, ": ", rec
    s, consumptions = rating_history_user(1)
    for i, con in enumerate(consumptions):
        print "\t\t Consumptions#", i, ": ", con
    s, mov = get_metadata_movie(2)
    print "Movie details: ", mov
    submit_movies_metadata_bulk(movies_pkl.values())
    submit_movies_ratings_bulk(ratings_pkl)

Пример #5

0

Показать файл

def test():
    """
    This function is used as a test-bed.
    Just for TESTING purposes. It shouldn't be used for production code.
    :return:
    """
    # Load data and parse it:
    # mymovies = data.load_dat("movies.dat")
    # myratings = data.load_dat("ratings.dat")
    # Dump the parsed data to pickles into the file system:
    # data.dump_pickle(mymovies, generate_file_name("movies", "pkl"))
    # data.dump_pickle(myratings, generate_file_name("ratings", "pkl"))
    # Load the pickles (much faster than loading and parsing again the raw data):
    movies_pkl = data.load_pickle(MOVIE_PICKLE_LOCATION)
    ratings_pkl = data.load_pickle(RATINGS_PICKLE_LOCATION)
    print "len(movies_pkl): ", len(movies_pkl)
    print "len(ratings_pkl): ", len(ratings_pkl)
    # print movies_pkl['3196']
    # print ratings_pkl[0]

    # Plot top10 rated movies with/without stars:
    top10_movies, top10_ratings, top10_movieids = topN_most_rated_movies(10)
    utils.plot_top10_rated_distribution(top10_movies, top10_ratings)

Пример #6

0

Показать файл

def topN_most_rated_movies(N=10, stars=None):
    """
    Retrieves a list of movie names and another list with their corresponding ratings which are the
    most rated movies.
    :param N: number of movies to put in the returned list (topN)
    :param stars: number of stars (integer) for which movies will be extracted for the topN
    :return: a list of (most rated) movie names AND a list of their ratings as well. Ordered from BIG to SMALL.
    """
    aggregated_ratings = {}
    topN_movies = []
    topN_ratings = []
    # First, let's load the data:
    movies = data.load_pickle(MOVIE_PICKLE_LOCATION)
    ratings = data.load_pickle(RATINGS_PICKLE_LOCATION)
    # Now, we will iterate over all ratings and we will aggregate/count all ratings for every movie:
    for elem in ratings:
        if stars is not None and int(elem['rating']) >= stars:
            continue
        if not elem['movieid'] in aggregated_ratings:
            aggregated_ratings[elem['movieid']] = 1
        else:
            aggregated_ratings[elem['movieid']] += 1
    # print "Num elements (aggregated dictionary): ", len(aggregated_ratings)
    # 'sorted' function sorts the dictionary from SMALL to BIG, returns a list:
    mysorted = sorted(aggregated_ratings.items(),
                      key=operator.itemgetter(1),
                      reverse=True)
    # We take the first N elements (Top N):
    mysorted = mysorted[:N]
    # We collect the movie IDs:
    topN_movieIDs = [elem[0] for elem in mysorted]
    # We are also interested in movie NAMES:
    for elem in mysorted:
        topN_movies.append(movies[elem[0]]['title'])
        topN_ratings.append(elem[1])
    return topN_movies, topN_ratings, topN_movieIDs

Пример #7

0

Показать файл

def test():
    """
    This function is used as a test-bed.
    Just for TESTING purposes. It shouldn't be used for production code.
    :return:
    """
    print "HelloWorldTEST!"

    movies_pkl = data.load_pickle(MOVIE_PICKLE_LOCATION)
    ratings_pkl = data.load_pickle(RATINGS_PICKLE_LOCATION)
    print "len(movies_pkl): ", len(movies_pkl)
    print "len(ratings_pkl): ", len(ratings_pkl)
    globals.RATINGS_BY_USER = utils.extract_ratings_by_users(ratings_pkl)
    globals.RATINGS_BY_USER_MAP = utils.extract_ratings_by_users_map(ratings_pkl)
    # globals.RATINGS_X_BY_USERS = utils.extract_ratings_x_by_users(movies_pkl, ratings_pkl)
    # globals.MEAN_RATINGS_ITEM = utils.extract_mean_ratings(movies_pkl)
    globals.RATINGS_X_BY_USERS = data.load_pickle(RATINGS_X_BY_USERS_PATH)  # load pickle
    globals.MEAN_RATINGS_ITEM = data.load_pickle(MEAN_RATINGS_ITEM_PATH)  # load pickle
    globals.SIMILARITY_TYPE = globals.SimilarityType()
    print "len(RATINGS_BY_USER): ", len(globals.RATINGS_BY_USER)
    print "len(RATINGS_BY_USER_MAP): ", len(globals.RATINGS_BY_USER_MAP)
    print "len(RATINGS_X_BY_USERS): ", len(globals.RATINGS_X_BY_USERS)
    print "len(MEAN_RATINGS_ITEM): ", len(globals.MEAN_RATINGS_ITEM)
    print "SIMILARITY_TYPE.type(): ", globals.SIMILARITY_TYPE.type()

Пример #8

0

Показать файл

def calculate_simple_association(movieX, movieY, ratings=None):
    """
    Calculates the simple association value for movieX with respect movieY.
    :param movieX: ID of movie X
    :param movieY: ID of movie Y
    :param ratings: collection of all ratings. If available at calling time, then it can be used, otherwise it will be
    locally retrieved.
    :return: the value computed by the simple association
    """
    # First get the data (preferably by parameter otherwise from pickles):
    if ratings is None:
        ratings = data.load_pickle(
            RATINGS_PICKLE_LOCATION
        )  # Movies data-set not needed, just ratings.
    XY = float(utils.how_many_X_and_Y(movieX, movieY, ratings))
    X = globals.AMOUNT_RATED_X
    if X is None:
        X = utils.how_many_Z(movieX, ratings)
    value = XY / X
    return value

Пример #9

0

Показать файл

Файл: webpage.py Проект: kroger/uirapuru

            print_attribute("ambitus", collection, out)

            out.write(rst_header("Pickup", 3))
            out.write("1 if has pickup\n\n")
            print_attribute("has_pickup", collection, out)


def make_intervals_webpage(alist):
    with codecs.open("../docs/intervals.rst", 'w', encoding="utf-8") as out:
        out.write(rst_header("Intervals", 1))


        

if __name__ == '__main__':
    brasil = data.load_pickle("brasil")
    europa = data.load_pickle("europa")

    binary = ('2/2', '2/4', '2/8', '4/2', '4/4', '4/8', '6/8', '6/16', '12/8')
    ternary = ('3/4', '3/8', '9/8')

    # alist = intervals.list_songs_with_dissonant_intervals(brasil)
    # make_dissonant_intervals_webpage(alist)

    #make_basic_attributes_webpage((("Brasil", brasil), ("Europa", europa)))

    make_intervals_webpage(None)

    make_contours_webpage({
        "Brasil": brasil,
        "Europa": europa,

Пример #10

0

Показать файл

Файл: main.py Проект: dbgithub/recommender-systems

def main():
    # Load data and parse it:
    # mymovies = data.load_dat("movies.csv")
    # myratings = data.load_dat("ratings.csv")
    # Dump the parsed data to pickles into the file system:
    # data.dump_pickle(mymovies, data.generate_file_name("movies", "pkl"))
    # data.dump_pickle(myratings, data.generate_file_name("ratings", "pkl"))
    # Load the pickles (much faster than loading and parsing again the raw data):
    movies_pkl = data.load_pickle(MOVIE_PICKLE_LOCATION)
    ratings_pkl = data.load_pickle(RATINGS_PICKLE_LOCATION)
    print "len(movies_pkl): ", len(movies_pkl)
    print "len(ratings_pkl): ", len(ratings_pkl)
    # submit_movies_metadata_bulk(movies_pkl.values())
    # submit_movies_ratings_bulk(ratings_pkl)

    # # Question 1:
    print "Question 1: rating of movie 1125 by user 289."
    # s, rating = utils.get_rating(289, 1125)
    # print "\t\t| rating =", utils.decode_stars(rating[0].detail)
    # print "\t\t| date & time =", rating[0].date
    # s, metadata = get_metadata_movie(1125)
    # print "\t\t| title =", metadata.title
    # print "\t\t| genre(s) =",
    # for genre in metadata.category:
    #     print genre,
    # print ""  # this print is just for readability
    # # Question 3:
    print "\nQuestion 3: rating history user 249."
    # s, history = rating_history_user(249)
    # rating_stars = []
    # for rating in history:
    #     s, metadata = get_metadata_movie(rating.itemid)
    #     genres = ""
    #     for genre in metadata.category:
    #         if genres is "":
    #             genres = "{0}".format(genre)
    #         else:
    #             genres = "{0}, {1}".format(genres, genre)
    #     print "\t\t| Movie id =", rating.itemid, ":: Title =", metadata.title, ":: Genre(s) =", genres, ":: Rating =", utils.decode_stars(rating.detail)
    #     rating_stars.append(utils.decode_stars(rating.detail))
    # print "\t\t| Mean =", mean(rating_stars)
    # print "\t\t| Median =", median(rating_stars)
    # print "\t\t| Uniqueness =", dict(zip(*unique(rating_stars, return_counts=True)))
    # # Question 4:
    print "\nQuestion 4: top five collaborative filtering recommendations user 249."
    # s, top5 = topN_recommendations_user(249, 5)
    # for rec in top5:
    #     genres = ""
    #     for genre in rec.item.category:
    #         if genres is "":
    #             genres = "{0}".format(genre)
    #         else:
    #             genres = "{0}, {1}".format(genres, genre)
    #     print "\t\t| Movie id =", rec.item.id, ":: Title =", rec.item.title, ":: Genre(s) =", genres, ":: Score =", rec.score
    # # Question 5:
    print "\nQuestion 5: rating history user 35."
    # s, history = rating_history_user(35)
    # rating_stars = []
    # for rating in history:
    #     s, metadata = get_metadata_movie(rating.itemid)
    #     genres = ""
    #     for genre in metadata.category:
    #         if genres is "":
    #             genres = "{0}".format(genre)
    #         else:
    #             genres = "{0}, {1}".format(genres, genre)
    #     print "\t\t| Movie id =", rating.itemid, ":: Title =", metadata.title, ":: Genre(s) =", genres, ":: Rating =", utils.decode_stars(rating.detail)
    #     rating_stars.append(utils.decode_stars(rating.detail))
    # print "\t\t| Mean =", mean(rating_stars)
    # print "\t\t| Median =", median(rating_stars)
    # print "\t\t| Uniqueness =", dict(zip(*unique(rating_stars, return_counts=True)))
    # # Question 6:
    print "\nQuestion 6: top five collaborative filtering recommendations user 35."
    # s, top5 = topN_recommendations_user(35, 5)
    # for rec in top5:
    #     genres = ""
    #     for genre in rec.item.category:
    #         if genres is "":
    #             genres = "{0}".format(genre)
    #         else:
    #             genres = "{0}, {1}".format(genres, genre)
    #     print "\t\t| Movie id =", rec.item.id, ":: Title =", rec.item.title, ":: Genre(s) =", genres, ":: Score =", rec.score
    # # Question 8:
    print "\nQuestion 8: top five content based recommendations user 249."
    # s, top5 = topN_recommendations_user(249, 5)
    # for rec in top5:
    #     genres = ""
    #     for genre in rec.item.category:
    #         if genres is "":
    #             genres = "{0}".format(genre)
    #         else:
    #             genres = "{0}, {1}".format(genres, genre)
    #     print "\t\t| Movie id =", rec.item.id, ":: Title =", rec.item.title, ":: Genre(s) =", genres, ":: Score =", rec.score
    # # Question 9:
    print "\nQuestion 9: top five content based recommendations user 35."
    # s, top5 = topN_recommendations_user(35, 5)
    # for rec in top5:
    #     genres = ""
    #     for genre in rec.item.category:
    #         if genres is "":
    #             genres = "{0}".format(genre)
    #         else:
    #             genres = "{0}, {1}".format(genres, genre)
    #     print "\t\t| Movie id =", rec.item.id, ":: Title =", rec.item.title, ":: Genre(s) =", genres, ":: Score =", rec.score
    # Question 10a:
    print "\nQuestion 10a: additional raitings for new user 1000."
    # additional_ratings = [
    #     {'userid': 1000, 'movieid': 1590, 'rating': 4.0, 'timestamp': 1476640644},
    #     {'userid': 1000, 'movieid': 1196, 'rating': 4.5, 'timestamp': 1476640644},
    #     {'userid': 1000, 'movieid': 4878, 'rating': 4.0, 'timestamp': 1476640644},
    #     {'userid': 1000, 'movieid': 589, 'rating': 4.5, 'timestamp': 1476640644},
    #     {'userid': 1000, 'movieid': 480, 'rating': 4.5, 'timestamp': 1476640644}
    # ]
    # submit_movies_ratings_bulk(additional_ratings) # Make sure to execute this once and at the correct moment
    # Question 10b:
    print "\nQuestion 10b: rating history user 1000."
    # s, history = rating_history_user(1000)
    # for rating in history:
    #     s, metadata = get_metadata_movie(rating.itemid)
    #     genres = ""
    #     for genre in metadata.category:
    #         if genres is "":
    #             genres = "{0}".format(genre)
    #         else:
    #             genres = "{0}, {1}".format(genres, genre)
    #     print "\t\t| Movie id =", rating.itemid, ":: Title =", metadata.title, ":: Genre(s) =", genres, ":: Rating =", utils.decode_stars(rating.detail)
    # Question 12:
    print "\nQuestion 12: top 10 collaborative filtering recommendations user 1000."
    # s, top10 = topN_recommendations_user(1000, 10)
    # for rec in top10:
    #     genres = ""
    #     for genre in rec.item.category:
    #         if genres is "":
    #             genres = "{0}".format(genre)
    #         else:
    #             genres = "{0}, {1}".format(genres, genre)
    #     print "\t\t| Movie id =", rec.item.id, ":: Title =", rec.item.title, ":: Genre(s) =", genres, ":: Score =", rec.score
    # Question 14:
    print "\nQuestion 14: top 10 content based recommendations user 1000."
    # s, top10 = topN_recommendations_user(1000, 10)
    # for rec in top10:
    #     genres = ""
    #     for genre in rec.item.category:
    #         if genres is "":
    #             genres = "{0}".format(genre)
    #         else:
    #             genres = "{0}, {1}".format(genres, genre)
    #     print "\t\t| Movie id =", rec.item.id, ":: Title =", rec.item.title, ":: Genre(s) =", genres, ":: Score =", rec.score
    # Question 15:
    print "\nQuestion 15: additional rating by user 1000."
    # s = submit_rating_single_movie(6587, {'userid': 1000, 'movieid': 6587, 'rating': 1.0, 'timestamp': 1476640644})
    # s, metadata = get_metadata_movie(6587)
    # genres = ""
    # for genre in metadata.category:
    #     if genres is "":
    #         genres = "{0}".format(genre)
    #     else:
    #         genres = "{0}, {1}".format(genres, genre)
    # print "\t\t| Movie title =", metadata.title, ":: Genre(s) =", genres
    # Question 17:
    print "\nQuestion 17: top 10 content based recommendations user 1000."
    # s, top10 = topN_recommendations_user(1000, 10)
    # for rec in top10:
    #     genres = ""
    #     for genre in rec.item.category:
    #         if genres is "":
    #             genres = "{0}".format(genre)
    #         else:
    #             genres = "{0}, {1}".format(genres, genre)
    #     print "\t\t| Movie id =", rec.item.id, ":: Title =", rec.item.title, ":: Genre(s) =", genres, ":: Score =", rec.score
    # Question 18:
    print "\nQuestion 18: delete all consumptions user 1000."

Пример #11

0

Показать файл

def main():
    # Load data and parse it:
    # mymovies = data.load_dat("movies.csv")
    # myratings = data.load_dat("ratings.csv")
    # Dump the parsed data to pickles into the file system:
    # data.dump_pickle(mymovies, data.generate_file_name("movies", "pkl"))
    # data.dump_pickle(myratings, data.generate_file_name("ratings", "pkl"))
    # Load the pickles (much faster than loading and parsing again the raw data):
    movies_pkl = data.load_pickle(MOVIE_PICKLE_LOCATION)
    ratings_pkl = data.load_pickle(RATINGS_PICKLE_LOCATION)
    print "len(movies_pkl): ", len(movies_pkl)
    print "len(ratings_pkl): ", len(ratings_pkl)

    # Setting some global variables and general information:
    globals.RATINGS_BY_USER = utils.extract_ratings_by_users(ratings_pkl)
    globals.RATINGS_BY_USER_MAP = utils.extract_ratings_by_users_map(ratings_pkl)
    globals.RATINGS_X_BY_USERS = utils.extract_ratings_x_by_users(movies_pkl, ratings_pkl)
    globals.MEAN_RATINGS_ITEM = utils.extract_mean_ratings(movies_pkl)
    # globals.RATINGS_X_BY_USERS = data.load_pickle(RATINGS_X_BY_USERS_PATH)  # Loading the pickle is faster than calculating it in run-time
    # globals.MEAN_RATINGS_ITEM = data.load_pickle(MEAN_RATINGS_ITEM_PATH)  # Loading the pickle is faster than calculating it in run-time
    globals.SIMILARITY_TYPE = globals.SimilarityType()
    print "len(RATINGS_BY_USER): ", len(globals.RATINGS_BY_USER)
    print "len(RATINGS_BY_USER_MAP): ", len(globals.RATINGS_BY_USER_MAP)
    print "len(RATINGS_X_BY_USERS): ", len(globals.RATINGS_X_BY_USERS)
    print "len(MEAN_RATINGS_ITEM): ", len(globals.MEAN_RATINGS_ITEM)
    print "SIMILARITY_TYPE.type(): ", globals.SIMILARITY_TYPE.type()
    # Dump some globals (just for the first time):
    # data.dump_pickle(globals.RATINGS_X_BY_USERS, data.generate_file_name("RATINGS_X_BY_USERS", "pkl"))
    # data.dump_pickle(globals.MEAN_RATINGS_ITEM, data.generate_file_name("MEAN_RATINGS_ITEM", "pkl"))
    # Build IICF model:
    # build_model_iicf(movies_pkl, ratings_pkl)
    # Dump the IICF model to pickle into file system (just for the first time):
    # data.dump_pickle(globals.IICF_MODEL, data.generate_file_name("IICF-model", "pkl"))

    # Question 1:
    print "Question 1: Pearson correlation (without significance weighting) user 1 and 4."
    common_ratings, amount = utils.find_common_ratings(1, 4, None)
    p = calculate_pearson_correlation(common_ratings, 1, 4)
    print "\t| Result =", p
    # Question 2:
    print "Question 2: Pearson correlation (with significance weighting) user 1 and 4."
    common_ratings, amount = utils.find_common_ratings(1, 4, None)
    p = calculate_pearson_correlation(common_ratings, 1, 4)
    result = p * calculate_significance_weighing_factor(1,4,ratings_pkl,amount)
    print "\t| Result =", result
    # Question 5:
    print "Question 5: Amount of neighbors (strict positive similarity) user 1 and item 10."
    neighborsIDs, neighbors_data = top_k_most_similar_neighbors(1,10,1000)
    print "\t| # of neighbors = ", len(neighborsIDs)
    # Question 6:
    print "Question 6: List those neighbors."
    neighborsIDs, neighbors_data = top_k_most_similar_neighbors(1, 10)
    for item in neighborsIDs:
        print "\t| ID: ", item, " | Pearson: ", neighbors_data[item]['pearson']
    # Question 7:
    print "Question 7: Weighted average of the deviation from mean rating. Neighbors of user 1 item 10."
    pre = rating_prediction_user(1,10,neighborsIDs,neighbors_data)
    print "\t| Result = ", pre
    # Question 8:
    print "Question 8: Rating prediction user 1 item 10."
    pre = rating_prediction_user(1,10,neighborsIDs,neighbors_data)
    print "\t| Result = ", pre
    # Question 9:
    print "Question 9: Title item 10."
    print "\t Title = ", movies_pkl['10']['title']
    # Question 10:
    print "Question 10: Amount of neighbors (strict positive similarity) user 1 and item 260."
    neighborsIDs, neighbors_data = top_k_most_similar_neighbors(1,260,1000)
    print "\t| # of neighbors = ", len(neighborsIDs)
    # Question 11:
    print "Question 11: List those neighbors."
    neighborsIDs, neighbors_data = top_k_most_similar_neighbors(1, 260)
    for item in neighborsIDs:
        print "\t| ID: ", item, " | Pearson: ", neighbors_data[item]['pearson']
    # Question 12:
    print "Question 12: Weighted average of the deviation from mean rating. Neighbors of user 1 item 260."
    pre = rating_prediction_user(1, 260, neighborsIDs, neighbors_data)
    print "\t| Result = ", pre
    # Question 13:
    print "Question 13: Rating prediction user 1 item 260."
    pre = rating_prediction_user(1, 260, neighborsIDs, neighbors_data)
    print "\t| Result = ", pre
    # Question 14:
    print "Question 14: Title item 260."
    print "\t Title = ", movies_pkl['260']['title']
    # Question 16:
    print "Question 16: Top-N recommendations user 1."
    topn = topN_recommendations_uucf(1,movies_pkl)
    for item in topn:
        print "\t| ({0},{1},{2})".format(item[0], item[1], item[2])
    # Question 17:
    print "Question 17: Top-N recommendations user 522."
    topn = topN_recommendations_uucf(522,movies_pkl)
    for item in topn:
        print "\t| ({0},{1},{2})".format(item[0], item[1], item[2])
    # Question 19:
    print "Question 19: IICF model. Strict positive similarities."
    globals.IICF_MODEL = data.load_pickle(IICF_MODEL_NAME)  # load the IICF model
    print "\t| Result = ", len(globals.IICF_MODEL[1])
    # Question 20:
    print "Question 20: Cosine similarity between items 594 and 596."
    sim = globals.IICF_MODEL[1][594][596]
    print "\t| Similarity = ", sim
    print "\t| Movie 594 =", movies_pkl['594']['title']
    print "\t| Movie 596 =", movies_pkl['596']['title']
    # Question 21:
    print "Question 21: Rating prediction for user 522 and item 25. Similar neighbors rated by user:"******"\t| Result = ", len(result)
    # Question 22:
    print "Question 22: Top-k similar items for user 522 and item 25."
    globals.SIMILARITY_TYPE.setPositive()
    topk = top_k_most_similar_items(522, 25)
    for item in topk:
        print "\t| ({0} , {1} , {2})".format(item[0], movies_pkl[str(item[0])]['title'], item[1])
    # Question 24:
    print "Question 24: Top-N recommendations for user 522."
    globals.SIMILARITY_TYPE.setPositive()
    topn = topN_recommendations_iicf(522, movies_pkl)
    for item in topn:
        print "\t| ( {0} , {1}, {2} )".format(item[0], item[1], item[2])
    # Question 25:
    print "Question 25: Top-N recommendations basket items: [1]."
    globals.SIMILARITY_TYPE.setPositive()
    topn = topN_recommendations_basket([1], movies_pkl)
    for item in topn:
        print "\t| ( {0} , {1}, {2} )".format(item[0], item[1], item[2])
    # Question 26:
    print "Question 26: Top-N recommendations basket items: [1, 48, 239]."
    globals.SIMILARITY_TYPE.setPositive()
    topn = topN_recommendations_basket([1, 48, 239], movies_pkl)
    for item in topn:
        print "\t| ( {0} , {1}, {2} )".format(item[0], item[1], item[2])
    # Question 27:
    print "Question 27: Top-N recommendations basket items: [1, 48, 239] plus negative similarities."
    globals.SIMILARITY_TYPE.setBoth()
    topn = topN_recommendations_basket([1, 48, 239], movies_pkl)
    for item in topn:
        print "\t| ( {0} , {1}, {2} )".format(item[0], item[1], item[2])
    # Question 29:
    print "Question 29: Top-N recommendations hybrid for user 522."
    globals.SIMILARITY_TYPE.setPositive()
    topn = topN_recommendations_hybrid(522,movies_pkl)
    for item in topn:
        print "\t| ( {0} , {1}, {2} )".format(item[0], item[1], item[2])

Python load_pickle примеры использования