예제 #1
0
    def get_recommendations(self, request_data, top_k=None, min_count=10):
        if not top_k:
            top_k = 20

        mapped = map(util.map_dict_value_as_array, request_data['ratings'])
        reduced = reduce(util.merge_two_dicts, mapped)

        nb_ratings = len(reduced['movieId'])

        reduced['userId'] = [999999 for x in range(nb_ratings)]

        reduced = gl.SFrame(reduced)

        recommendations = self.model.recommend(users=[999999], k=5000, new_observation_data=reduced)

        recommendations = self.dataset.movies.join(recommendations, on='movieId')

        recommendations = recommendations[recommendations['ratingCount'] > min_count]

        if 'filter' in request_data:
            filtered = recommendations\
                .flat_map(['genres', 'movieId'],
                          lambda x: [[g, x['movieId']] for g in x['genres'] for i in range(0, len(x['genres']))])\
                .filter_by(request_data['filter'], 'genres')

            recommendations = recommendations.filter_by(filtered['movieId'], 'movieId')

        recommendations = recommendations.remove_column('userId')

        recommendations = recommendations.sort('rank')

        return util.sframe_to_list(recommendations, top_k)
예제 #2
0
파일: dataset.py 프로젝트: marcuniq/yamr
    def get_random(self, top_k):
        if not top_k:
            top_k = 3

        sampled_movies = self.movies.sample(0.01)

        # hack to convert SFrame to list of dict
        return util.sframe_to_list(sampled_movies, top_k)
예제 #3
0
파일: dataset.py 프로젝트: marcuniq/yamr
    def find_top_rated(self, min_count=50, top_k=20):
        if not top_k:
            top_k = 3

        top_rated = self.movies[self.movies['ratingCount'] > min_count].sort('ratingAvg', False)

        # hack to convert SFrame to list of dict
        return util.sframe_to_list(top_rated, top_k)
예제 #4
0
파일: dataset.py 프로젝트: marcuniq/yamr
    def search(self, query, top_k):
        if not top_k:
            top_k = 3

        """not sure whether this is a good idea .."""
        if query is '':
            query = 'somestringthatmatchesnothing'

        query_re = '.*{}.*'.format(query)
        p = re.compile(query_re, re.IGNORECASE)
        found_movies = self.movies[self.movies['title'].apply(lambda t: 1 if p.match(t) else 0)]

        # hack to convert SFrame to list of dict
        return util.sframe_to_list(found_movies, top_k)
예제 #5
0
not_yet_enhanced_movies, dropped_movies = not_yet_enhanced_movies.dropna_split(columns="tmdbId")

# create client & view
rc = Client()
dv = rc[:]  # use all engines
v = rc.load_balanced_view()

with dv.sync_imports():
    import tmdbsimple

    tmdbsimple.API_KEY = tmdbsimple._get_env_key("TMDB_API_KEY")
    import unicodedata

for chunk in chunks(not_yet_enhanced_movies, 16):
    print "chunk starting with movieId %d" % chunk[0]["movieId"]
    chunk = util.sframe_to_list(chunk)
    list_tmdb_info = dv.map_sync(tmdb_util.get_tmdb_info, chunk)
    mapped_tmdb_info = map(util.map_dict_list, list_tmdb_info)
    dict_tmdb_info = reduce(util.merge_two_dicts, mapped_tmdb_info)
    if len(dict_tmdb_info) == 0:
        break
    sf_tmdb_info = gl.SFrame(dict_tmdb_info)
    movie_enhanced = original.movies.join(sf_tmdb_info, on="movieId")
    enhanced = enhanced.append(movie_enhanced)
    enhanced.export_csv("datasets\\ml-latest-enhanced\\movies.csv")
    time.sleep(3)

movies_have_year, movies_no_year = enhanced.dropna_split(columns="year")
for movie in movies_no_year:
    info = tmdbsimple.Movies(movie["tmdbId"]).info()
    year = dateutil.parser.parse(info["release_date"]).year