def RMSerror(trainingFileName, testFileName): """Returns the RMS error of predictions made from the training file compared to the test file. Keyword arguments: trainingFileName -- partial set of user data testFileName -- remainder of user data """ try: trainingFile = open(trainingFileName, 'r') testFile = open(testFileName, 'r') except IOError: print 'Cannot open training/test files.' sys.exit() users, numUsers, numMovies = part1.getUserData(trainingFile) part1.setPearsons(numUsers, users) rmse = 0.0 testRatings = map(part1.dataPattern.findall, testFile.readlines()) n = len(testRatings) for rating in testRatings: userID = int(rating[0]) movieID = int(rating[1]) rating = float(rating[2]) rmse += pow(rating - part2.getPrediction(users, userID, movieID), 2) return math.sqrt(rmse/n)
def getTopRecommendations(users, numMovies): """Updates users by finding all their recommendations for movies they have not rated and also returns a list of the top 3 recommendations for each User. Users must already have their similarity coefficients calculated! Keyword arguments: users -- list of User objects numMovies """ topRecs = [] for user in users: rated = user.getRatings() for movie in range(1, numMovies + 1): if movie not in rated: p = part2.getPrediction(users, user.getNumber(), movie) user.addPrediction(movie, p) topRecs.append(user.getPredictions()[:3]) return topRecs