示例#1
0
def analyse(movie_list, rating_list):
    """
    Makes movie recommendation based on provided movie and their rating
    """
    original_movies = movie_list
    input_ratings = rating_list

    all_movies = pd.read_csv(FILE_MOVIES_LABEL)

    input_movies = convert_movie_names(all_movies, original_movies)
    user_id = find_closest_user(input_movies, input_ratings)
    all_movies[USER_LABEL] = user_id

    model = keras.models.load_model(f"{MODEL_NAME}.h5")  # type:keras.Model
    predictions = model.predict(
        [all_movies[MOVIE_LABEL], all_movies[USER_LABEL]])

    all_movies[RATING] = predictions
    all_movies = all_movies.sort_values(by=RATING, ascending=False)
    all_movies = all_movies.head(10 + len(original_movies))
    rating_map = dict(zip(all_movies[MOVIE_ID], all_movies[RATING]))
    all_movies = all_movies[MOVIE_ID].values

    mv = get_movies()
    mv = mv[mv[MOVIE_ID].isin(all_movies)]
    mv = mv[~mv[MOVIE_TITLE].isin(original_movies)]
    mv[RATING] = mv[MOVIE_ID].map(lambda x: rating_map[x])
    mv = mv.sort_values(by=RATING, ascending=False)
    ratings = mv[RATING].values
    mv = mv[MOVIE_TITLE].values
    print("Recommended Movies [score]")
    for i, m in enumerate(zip(mv, ratings)):
        print(f"({i + 1}) {m[0]} [{round(m[1], 3)}]")
示例#2
0
def convert_movie_names(all_movies, names):
    mv = get_movies()
    mv = dict(zip(mv[MOVIE_ID], mv[MOVIE_TITLE]))
    ml = dict(zip(mv.values(), mv.keys()))
    labels = dict(zip(all_movies[MOVIE_ID], all_movies[MOVIE_LABEL]))
    # Convert movie names to ids
    input_movies = [ml[x] for x in names]
    # Convert movie IDs to movie labels
    for x in input_movies:
        try:
            labels[x]
        except KeyError:
            raise KeyError(f"'{mv[x]}' is not available in current analysis. "
                           f"Please use another movie")
    del mv
    return [labels[x] for x in input_movies]
示例#3
0
def get_stats():
    """
     Generates basic statistics of the MovieLens database
     """
    df = get_movies()
    df = df.fillna("")
    df[YEAR] = df[YEAR].map(
        lambda x: "".join([y for y in x if str(y).isnumeric()]))
    df = df[df[YEAR] != ""]
    print(f"Total Movies : {len(df)}")
    cats = df[MOVIE_GENRES].map(lambda x: x.split("|")).sum()
    print(f"Total Categories : {len(set(cats))}")
    print(f"From year {df[YEAR].min()} to {df[YEAR].max()}")
    df = pd.read_csv(FILE_RATINGS)
    print(f"Total ratings : {len(df)}")
    print(f"Total users : {df[USER_ID].nunique()}")
def generate_data():
    # Your movie name
    desired_movie = "Jumanji"

    print(f"Analysis started for '{desired_movie}'")
    movies = get_movies()
    tags = add_tags(movies)
    ratings = add_ratings(tags)
    tag_dm = convert_to_distance(ratings, TAG)
    cat_dm = convert_to_distance(ratings, MOVIE_GENRES)
    ind = list(ratings[MOVIE_TITLE].values).index(desired_movie)
    generate_score(ratings, tag_dm[ind], cat_dm[ind])
    np.savez_compressed("data/tag_matrix", tag_dm)
    np.savez_compressed("data/genre_matrix", cat_dm)
    ratings[RATING] = round(ratings[RATING], 4)
    ratings[[MOVIE_TITLE, RATING]].to_csv("data/names.csv", index=False)
    print("Analysis finished")
示例#5
0
def plot_movies():
    """
    Generates histogram of movies available in MovieLens Database
    """
    p = Palette()
    plt.rcParams['axes.facecolor'] = p.gray(shade=10)
    df = get_movies()
    df = df.fillna("")
    df = df[df[YEAR] != ""]
    df[YEAR] = df[YEAR].map(
        lambda x: "".join([y for y in x if str(y).isnumeric()]))
    df[YEAR] = pd.to_numeric(df[YEAR])
    df = df[df[YEAR] < 3020]
    total_movies = len(df)
    df = df.groupby(by=YEAR).count().sort_index().reset_index()
    data = dict(zip(df[YEAR], df[MOVIE_ID]))
    plt.bar(range(0, len(data.keys())),
            data.values(),
            width=1,
            color=p.aqua(shade=40),
            zorder=3)
    labels = [int(x) for x in data.keys()][::5]
    plt.xticks(range(0, len(labels) * 5, 5), labels, rotation=90)
    plt.ylabel("Frequency")
    plt.xlabel("Year", labelpad=10)
    plt.grid(ls="--", color=p.gray(shade=30), zorder=-1)
    rc('text', usetex=True)
    plt.annotate(f"MovieLens Dataset\n"
                 r"\small{(Total movies: " + str(total_movies) + r")}",
                 (0.1, 0.8),
                 xycoords="axes fraction",
                 fontsize=13,
                 bbox=dict(fc=p.white(), lw=0.1, pad=10))

    plt.tight_layout()
    plt.savefig("plot.png", dpi=150)
    plt.show()
示例#6
0
def prepare_data():
    movies = get_movies()
    print(movies)