def get_correlation_coefficient_from_titles(self, title_a, title_b): movie_a_ids = loadmovielens.give_me_movie_id(title_a, self.items_dictionary) movie_b_ids = loadmovielens.give_me_movie_id(title_b, self.items_dictionary) if len(movie_a_ids) == 0 or len(movie_b_ids) == 0: exit("Movie titles not found") if len(movie_a_ids) > 1 or len(movie_b_ids) > 1: exit("Ambigous search titles") return self.get_correlation_coefficient_from_ids(movie_a_ids[0][0], movie_b_ids[0][0])
""" test_cases = ast.literal_eval(sys.argv[1]) results = str(my_info()) + '\t\t' for test_case in test_cases: mode = test_case[0] id_1 = int(test_case[1]) id_2 = int(test_case[2]) if mode == 'jc': results += str(Jaccard_Coefficient(id_1, id_2)) + '\t\t' elif mode == 'cc': results += str(Correlation_Coefficient(id_1, id_2)) + '\t\t' else: exit('bad command') print results + '\n' if __name__ == '__main__': main() """print my_info()""" """print reader.read_movie_lens_data()""" print reader.give_me_movie_id('story', movie_dictionary) print reader.give_me_movie_id('GoldenEye', movie_dictionary) ratings, movie_dictionary, user_ids, item_ids, movie_names = reader.read_movie_lens_data() print len(ratings) i = 0 for x in range(0, len(ratings)):"search full db, when find, sum ratings i += 1 """print ratings[i], movie_names[i]""" print "i %d" % (i) print ratings[1], movie_names[1]
for movie_id in map_from_movie_to_user_id_list: ret.append((movie_id, fast_jaccard_coefficient(map_from_movie_to_user_id_list[movie_id], map_from_movie_to_user_id_list[target_movie_id]))) ret = sorted(ret, key=lambda x: -x[1]) while len(ret) > k + 1: ret.pop() return ret[1:k + 1] print "--- Jaccard coefficient between 'Toy Story' and 'GoldenEye' ---" toy_story_id = reader.give_me_movie_id('Toy Story', movie_dictionary)[0][0] golden_eye_id = reader.give_me_movie_id('GoldenEye', movie_dictionary)[0][0] print jaccard_coefficient(toy_story_id, golden_eye_id) print "--- Jaccard coefficient between 'Three Colors: Red' and 'Three Colors: Blue' ---" red_id = reader.give_me_movie_id('Three Colors: Red', movie_dictionary)[0][0] blue_id = reader.give_me_movie_id('Three Colors: Blue', movie_dictionary)[0][0] print jaccard_coefficient(red_id, blue_id) print "--- Closest 5 movies to 'Taxi Driver'---" taxi_driver_id = reader.give_me_movie_id('Taxi Driver', movie_dictionary)[0][0] taxi_passengers = find_k_closest(taxi_driver_id, 5) for t in taxi_passengers:
users1 = set(seen[id].keys()) for movie in item_ids: users2 = set(seen[movie].keys()) seenBoth = users1 & users2 if len(seenBoth) < 3: continue scores = [] users = [] for user in seenBoth: scores = np.append(scores, seen[id][user]) scores = np.append(scores, seen[movie][user]) users = np.append(users, user) users = np.append(users, user) coef = np.corrcoef(scores, users)[0][1] corcoefs[coef] = movie order = sorted(corcoefs, reverse=True) print movie_dictionary[id], id print "-----" for i in range(1, 6): print movie_dictionary[corcoefs[ order[i]]], "Correlation coefficient:", round(order[i], 3) id1 = reader.give_me_movie_id('toy', movie_dictionary)[0][0] id2 = reader.give_me_movie_id('little big', movie_dictionary)[0][0] #corCoef(id1, id2) corCoefOne(id1)
for movie_id in map_from_movie_to_user_id_list: ret.append((movie_id, fast_jaccard_coefficient( map_from_movie_to_user_id_list[movie_id], map_from_movie_to_user_id_list[target_movie_id]))) ret = sorted(ret, key=lambda x: -x[1]) while len(ret) > k + 1: ret.pop() return ret[1:k + 1] print "--- Jaccard coefficient between 'Toy Story' and 'GoldenEye' ---" toy_story_id = reader.give_me_movie_id('Toy Story', movie_dictionary)[0][0] golden_eye_id = reader.give_me_movie_id('GoldenEye', movie_dictionary)[0][0] print jaccard_coefficient(toy_story_id, golden_eye_id) print "--- Jaccard coefficient between 'Three Colors: Red' and 'Three Colors: Blue' ---" red_id = reader.give_me_movie_id('Three Colors: Red', movie_dictionary)[0][0] blue_id = reader.give_me_movie_id('Three Colors: Blue', movie_dictionary)[0][0] print jaccard_coefficient(red_id, blue_id) print "--- Closest 5 movies to 'Taxi Driver'---" taxi_driver_id = reader.give_me_movie_id('Taxi Driver', movie_dictionary)[0][0] taxi_passengers = find_k_closest(taxi_driver_id, 5) for t in taxi_passengers:
seenBoth = users1 & users2 if len(seenBoth) < len(users1) // 5: continue scores1 = [] scores2 = [] for user in seenBoth: scores1 = np.append(scores1, seen[id][user]) scores2 = np.append(scores2, seen[movie][user]) if np.std(scores1) == 0 or np.std(scores2) == 0: continue coef = np.corrcoef(scores1, scores2)[0][1] corcoefs[coef] = movie n[coef] = len(seenBoth) order = sorted(corcoefs, reverse=True) print movie_dictionary[id], id print "-----" for i in range(1, 6): print movie_dictionary[corcoefs[order[i]]], "Correlation:", round( order[i], 3), len(seen[corcoefs[order[i]]]) id1 = reader.give_me_movie_id('Three colors', movie_dictionary)[0][0] jaccardForOne(id1) corCoefOne(id1)