def get_recommendations(self, request_data, top_k=None, min_count=10): if not top_k: top_k = 20 mapped = map(util.map_dict_value_as_array, request_data['ratings']) reduced = reduce(util.merge_two_dicts, mapped) nb_ratings = len(reduced['movieId']) reduced['userId'] = [999999 for x in range(nb_ratings)] reduced = gl.SFrame(reduced) recommendations = self.model.recommend(users=[999999], k=5000, new_observation_data=reduced) recommendations = self.dataset.movies.join(recommendations, on='movieId') recommendations = recommendations[recommendations['ratingCount'] > min_count] if 'filter' in request_data: filtered = recommendations\ .flat_map(['genres', 'movieId'], lambda x: [[g, x['movieId']] for g in x['genres'] for i in range(0, len(x['genres']))])\ .filter_by(request_data['filter'], 'genres') recommendations = recommendations.filter_by(filtered['movieId'], 'movieId') recommendations = recommendations.remove_column('userId') recommendations = recommendations.sort('rank') return util.sframe_to_list(recommendations, top_k)
def get_random(self, top_k): if not top_k: top_k = 3 sampled_movies = self.movies.sample(0.01) # hack to convert SFrame to list of dict return util.sframe_to_list(sampled_movies, top_k)
def find_top_rated(self, min_count=50, top_k=20): if not top_k: top_k = 3 top_rated = self.movies[self.movies['ratingCount'] > min_count].sort('ratingAvg', False) # hack to convert SFrame to list of dict return util.sframe_to_list(top_rated, top_k)
def search(self, query, top_k): if not top_k: top_k = 3 """not sure whether this is a good idea ..""" if query is '': query = 'somestringthatmatchesnothing' query_re = '.*{}.*'.format(query) p = re.compile(query_re, re.IGNORECASE) found_movies = self.movies[self.movies['title'].apply(lambda t: 1 if p.match(t) else 0)] # hack to convert SFrame to list of dict return util.sframe_to_list(found_movies, top_k)
not_yet_enhanced_movies, dropped_movies = not_yet_enhanced_movies.dropna_split(columns="tmdbId") # create client & view rc = Client() dv = rc[:] # use all engines v = rc.load_balanced_view() with dv.sync_imports(): import tmdbsimple tmdbsimple.API_KEY = tmdbsimple._get_env_key("TMDB_API_KEY") import unicodedata for chunk in chunks(not_yet_enhanced_movies, 16): print "chunk starting with movieId %d" % chunk[0]["movieId"] chunk = util.sframe_to_list(chunk) list_tmdb_info = dv.map_sync(tmdb_util.get_tmdb_info, chunk) mapped_tmdb_info = map(util.map_dict_list, list_tmdb_info) dict_tmdb_info = reduce(util.merge_two_dicts, mapped_tmdb_info) if len(dict_tmdb_info) == 0: break sf_tmdb_info = gl.SFrame(dict_tmdb_info) movie_enhanced = original.movies.join(sf_tmdb_info, on="movieId") enhanced = enhanced.append(movie_enhanced) enhanced.export_csv("datasets\\ml-latest-enhanced\\movies.csv") time.sleep(3) movies_have_year, movies_no_year = enhanced.dropna_split(columns="year") for movie in movies_no_year: info = tmdbsimple.Movies(movie["tmdbId"]).info() year = dateutil.parser.parse(info["release_date"]).year