def prepare(): print("[prepareDB] Starting pepare database...") startTime = time.time() mongo = Mongo("movieRecommend") # Store MovieLens data into database. # runtime: (1~2hours) movieLensParser.parse(mongo) # Add ANEW all list into database. # runtime: (0.05s) anewParser.parse(mongo) # Pre-computation # runtime: (few seconds) prepare_genres(mongo) prepare_actors(mongo) # all kinds of ranking # runtime: (few seconds) prepare_rankings(mongo) # recommendations for all movies # runtime: (1~2hours) prepare_recommend(mongo) print("[prepareDB] Done (%0.2fs)." % (time.time() - startTime))
def main(): mongo = Mongo("movieRecommend") db_imdb = mongo.client["imdb"] db_imdb["movies"].create_index([("title", pymongo.ASCENDING)]) print("[movieLensFullTitle] Created index for title in movies") retrieve(mongo) db_movieRecommend = mongo.client["movieRecommend"] db_movieRecommend["movie"].create_index([("title_full", pymongo.ASCENDING) ]) print("[movieLensFullTitle] Created index for title_full in movie")
def __init__(self, user_name, exist=False): self.name = user_name self.mongo = Mongo("movieRecommend") self.recommend = MovieRecommend(mongo) if not exist: # tag ids self.tags = set() # movie ids self.movies = set() else: self.load_user()
def main(): textAnalytics = TextAnalytics(Mongo("movieRecommend"), anew=True, aylien=True) # sentence = "Congrats to @HCP_Nevada on their health care headliner win" # sentence = "b'I love you @iHeartRadio! I love you hooligans! love you Sriracha. I love you @LeoDiCaprio. Thinking of u @holyfield https://t.co/iPoHf03G4R'" sentence = "The secret life of Walter Mitty is a fantastic movie" print("[TweetAnalytics] Evaluating sentence: " + sentence) score = textAnalytics.gain_sentiment(sentence) print("[TweetAnalytics] Sentiment score: " + str(score)) hashtag = "Askthedragon" # hashtag = "NothingTo1990sDoA-story" # hashtag = "9/11" # hashtag = "1980sWhereAreYou" print(textAnalytics.get_words_from_hashtag(hashtag)) tweet = "DAMN! Glo-Zell To da No! Why I gotta be 4-2? HAHAHA! http://www.youtube.com/watch?v=aQoDEZI4ces Watch Glozell Snap on me AGAIN! #Damn" # tweet = "@Ellichter if you make a left on Boo Boo lane youll end up @ the Ca Ca Mart. Thats where they sell a wide variety of Doo Doo Spread.. #ill" # tweet = "This year for me is all about Touring and playing shows and i believe im going Everywhere! I cant wait to see you guys live & in concert!" # tweet = "@TheEllenShow Thank you!! cant wait to see you in January.." # tweet = "Performing in 5....4.....3.......2........." print(textAnalytics.get_words_from_tweet(tweet))
def main(): mongo = Mongo() db_imdb = mongo.client["imdb"] db_imdb["movies"].create_index([("keywords", pymongo.ASCENDING)]) print("[keywordsCombine] Created index for keywords in movies") collect_from_keywords(mongo.client) # 34 seconds collect_from_tags(mongo.client) # 5 minutes combine(mongo.client) # 8 seconds db_integration = mongo.client["integration"] db_integration["integrated_tag"].create_index([("tag", pymongo.ASCENDING)]) print("[keywordsCombine] Created index for tag in integrated_tag") # not used due to inaccurate # # fix_popularity(mongo.client) # 3 seconds imdbPeopleIndex.build(mongo) # 3 minutes store_people_name_only(mongo.client) # 36 seconds reconstruct_tags(mongo.client) # 6 seconds db_integration = mongo.client["integration"] db_integration["normalized_tags"].create_index([("tag", pymongo.ASCENDING) ]) print("[keywordsCombine] Created index for tag in normalized_tags") count_movies_with_tags(mongo.client) # 3 seconds copy_movies(mongo.client) # 15 seconds db_integration = mongo.client["integration"] db_integration["copy_movies"].create_index([("imdbtitle", pymongo.ASCENDING)]) print("[keywordsCombine] Created index for imdbtitle in copy_movies")
def main(): recommender = MovieRecommend(Mongo("movieRecommend")) # # ----------------------------------------------------------------- # # unit test, input: User ID = 4 # print("[MovieRecommend] ***** Unit test for recommend_movies_for_user() *****") # user_id = 4 # recommends = recommender.recommend_movies_for_user(user_id) # recommender.print_recommend(recommends) # # ----------------------------------------------------------------- # # unit test, input tags: # # [28, 387, 599, 704, 794] # # ["adventure", "feel-good", "life", "new york city", "police"] # print("[MovieRecommend] ***** Unit test for recommend_movies_based_on_tags() *****") # tags = [28, 387, 599, 704, 794] # recommends = recommender.recommend_movies_based_on_tags(tags) # recommender.print_recommend(recommends) # print("[MovieRecommend] ***** Unit test for recommend_movies_based_on_tags() with tag contents input *****") # tags = ["adventure", "feel-good", "life", "new york city", "police"] # recommends = recommender.recommend_movies_based_on_tags(tags, tagid=False) # recommender.print_recommend(recommends) # # ----------------------------------------------------------------- # # unit test, input: Movie ID = 1 "Toy Story (1995)" # print("[MovieRecommend] ***** Unit test for recommend_movies_for_movie() *****") # movie_id = 1 # recommends = recommender.recommend_movies_for_movie(movie_id) # recommender.print_recommend(recommends) # # ----------------------------------------------------------------- # print("[MovieRecommend] ***** Unit test for recommend_movies_for_twitter() *****") # user_screen_name = "BrunoMars" # # user_screen_name = "LeoDiCaprio" # # user_screen_name = "BarackObama" # # user_screen_name = "sundarpichai" # # user_screen_name = "BillGates" # # user_screen_name = "jhsdfjak" # recommends = recommender.recommend_movies_for_twitter(user_screen_name) # # recommender.print_recommend(recommends) # print(recommender.get_titles_by_mids(recommends)) # # ----------------------------------------------------------------- # print("[MovieRecommend] ***** Unit test for recommend_movies_for_twitter_integrated() *****") # user_screen_name = "BrunoMars" # # user_screen_name = "LeoDiCaprio" # # user_screen_name = "BarackObama" # # user_screen_name = "sundarpichai" # # user_screen_name = "BillGates" # # user_screen_name = "jhsdfjak" # recommends = recommender.recommend_movies_for_twitter_integrated(user_screen_name) # for recommend in recommends: # print(recommend.encode("utf8")) # # ----------------------------------------------------------------- # # unit test, input tags: # # ["adventure", "feel good", "life", "new york city", "police"] # print("[MovieRecommend] ***** Unit test for recommend_movies_based_on_tags_integrated() with tag contents input *****") # tags = ["adventure", "feel good", "life", "new york city", "police"] # recommends = recommender.recommend_movies_based_on_tags_integrated(tags) # for recommend in recommends: # print(recommend.encode("utf8")) # # ----------------------------------------------------------------- # unit test for recommend_movies_based_on_history() print( "[MovieRecommend] ***** Unit test for recommend_movies_based_on_history() *****" ) user_history = [] user_history.append("Toy Story (1995)") user_history.append("Big Hero 6 (2014)") user_history.append("X-Men: Days of Future Past (2014)") user_history.append("The Lego Movie (2014)") user_history.append("The Secret Life of Walter Mitty (2013)") user_history.append("Death Note: Desu nto (2006)") user_history.append("Zombieland (2009)") user_history.append("Fifty Shades of Grey (2015)") user_history.append("The Maze Runner (2014)") recommends = recommender.recommend_movies_based_on_history(user_history) recommends = recommender.get_titles_by_mids(recommends) for recommend in recommends: print(recommend.encode("utf8"))
def main(): mongo = Mongo("movieRecommend") parse(mongo)
def main(): # Add ANEW all list into database. # runtime: (0.05s) mongo = Mongo("movieRecommend") parse(mongo)
def main(): mongo = Mongo("movieRecommend") retrieve(mongo)
def main(): mongo = Mongo("imdb") build(mongo)
def main(): twitter = Tweepy(Mongo("movieRecommend")) # twitter.get_rate_limit() # twitter.extract_profile("LeoDiCaprio") # twitter.extract_profile("BrunoMars") twitter.extract_profile("jhsdfjak")