示例#1
0
def get_meta(title, m_id):
    rec = Recommendation()
    rec.filter_genres()
    rec.filter_productions()
    df_movies = rec.md
    df_credits = pd.read_csv(PATH_CREDITS)
    df_imdb_link = pd.read_csv(PATH_MOVIELENS_TO_TMDB)
    attributes = [
        "id", "original_title", "genres", "homepage", "overview",
        "release_date", "production_companies", "runtime", "tagline",
        "vote_average", "vote_count"
    ]

    df_title = df_movies.iloc[df_movies.index[df_movies["original_title"] ==
                                              title][0]][attributes]
    df_crew = df_credits.iloc[df_credits.index[df_credits["title"] == title]
                              [0]][["cast", "crew"]]
    cast = [cast["name"] for cast in literal_eval(df_crew["cast"])[0:5]]
    crew = [
        crew["name"] for crew in literal_eval(df_crew["crew"])
        if crew["job"] in ["Director"]
    ]
    try:
        imdb_link = str(df_imdb_link.iloc[df_imdb_link.index[
            df_imdb_link["tmdbId"] == int(m_id)][0]]["imdbId"])[:-2]
        imdb_link = ("https://www.imdb.com/title/tt" + "0" *
                     (IMDB_ID_LEN - len(imdb_link)) + imdb_link)
    except:
        imdb_link = "https://www.imdb.com/search/title/?title=" + title

    return df_title, cast, crew, imdb_link
示例#2
0
def home():
    if "recommend" in request.args:
        try:
            title = request.args["recommend"]
            rec = ContentBased()
            did_you_mean = False
            df = rec.recommend(title,
                               DEFAULT_LIMIT,
                               full_search=True,
                               keywords_and_desc=False,
                               critics=False)
            poster_paths = get_poster_paths(df["id"].tolist(),
                                            df["original_title"].tolist())
            if rec.changed_title != title and rec.changed_title != str():
                did_you_mean = True
            else:
                rec.changed_title = title
            rec_title_meta = get_meta(rec.changed_title, None)
            rec_id = rec_title_meta[0]["id"]

            return render_template('recommendations.html',
                                   titles=df["original_title"].tolist(),
                                   images=poster_paths,
                                   votes=df["vote_average"].tolist(),
                                   m_id=df["id"].tolist(),
                                   rec_title=rec.changed_title,
                                   rec_id=rec_id,
                                   did_you_mean=did_you_mean)
        except:
            abort(404)
    elif "genres" in request.args:
        genre = request.args["genres"]
        if genre == "All":
            genre = None
        offset = int(request.args["offset"])

        gen_rec = Recommendation()
        gen_rec.filter_genres()
        df = gen_rec.top_movies(gen_rec.md,
                                percentile=0.85,
                                limit=DEFAULT_LIMIT,
                                offset=offset,
                                genre=genre)
        poster_paths = get_poster_paths(df["id"].tolist(),
                                        df["original_title"].tolist())

        return render_template('recommendations.html',
                               titles=df["original_title"].tolist(),
                               images=poster_paths,
                               votes=df["vote_average"].tolist(),
                               m_id=df["id"].tolist(),
                               rec_title=request.args["genres"],
                               offset=offset,
                               next_offset=offset + DEFAULT_LIMIT,
                               prev_offset=offset - DEFAULT_LIMIT,
                               rec_id=None,
                               did_you_mean=None)
    else:
        return render_template('homepage.html')
    def recommend(self,
                  title,
                  limit,
                  critics=False,
                  full_search=False,
                  use_pickle=True,
                  keywords_and_desc=False):
        """
            param: title - movie title (as in TMDB dataset)
                   limit - no. of movies to display
                   critics - True - will display critically acclaimed movies
                             False - will not sort movies on basis of their imdb rankings
                             (DEFAULT - False)
                   full_search - True - will search using cast, crew, keywords 
                                        and genre as metadata
                                 False - will search using overview and tagline 
                                         as metadata
                                 (DEFAULT - False)
                   use_pickle - True - will use pickled results
                                False - will compute the results from scratch
                                (DEFAULT - True)
                   keywords_and_desc - True - will merge results of keywords 
                                              and description
                                       False - will not merge results of keywords 
                                               and description

            return: pandas DataFrame object with attributes -
                    original_title, id, vote_average, vote_count, popularity, release_date
		"""
        rec = Recommendation()
        rec.filter_genres()
        title_index = self.verify_title(rec.md, title)

        if keywords_and_desc:
            if isfile(PATH_PICKLE_KEYWORDS) and isfile(
                    PATH_PICKLE_DESC) and use_pickle:
                df_keywords = pd.read_pickle(PATH_PICKLE_KEYWORDS)
                df_desc = pd.read_pickle(PATH_PICKLE_DESC)
                rec_matrix_keywords = self.countvectorize(df_keywords)
                rec_matrix_desc = self.tfidf(df_desc)
                rec_matrix = rec_matrix_keywords + rec_matrix_desc
                df = df_keywords
        elif full_search:
            if isfile(PATH_PICKLE_KEYWORDS) and use_pickle:
                df = pd.read_pickle(PATH_PICKLE_KEYWORDS)
            else:
                df = self.make_keywords(rec.md)
                df.to_pickle(PATH_PICKLE_KEYWORDS)
            rec_matrix = self.countvectorize(df)
        else:
            if isfile(PATH_PICKLE_DESC) and use_pickle:
                df = pd.read_pickle(PATH_PICKLE_DESC)
            else:
                df = self.make_desc(rec.md)
                df.to_pickle(PATH_PICKLE_DESC)
            rec_matrix = self.tfidf(df)

        rec_movie = rec_matrix[title_index]
        ids = rec_movie.argsort()[::-1][1:SCAN_SIZE + 1]

        if critics:
            return rec.top_movies(df.iloc[ids],
                                  percentile=0.50,
                                  limit=limit,
                                  offset=0)
        else:
            return df.iloc[ids[:limit]][[
                "original_title",
                "id",
                "vote_average",
                "vote_count",
                "popularity",
                "release_date",
            ]]