def RMSerror(trainingFileName, testFileName): """Returns the RMS error of predictions made from the training file compared to the test file. Keyword arguments: trainingFileName -- partial set of user data testFileName -- remainder of user data """ try: trainingFile = open(trainingFileName, 'r') testFile = open(testFileName, 'r') except IOError: print 'Cannot open training/test files.' sys.exit() users, numUsers, numMovies = part1.getUserData(trainingFile) part1.setPearsons(numUsers, users) rmse = 0.0 testRatings = map(part1.dataPattern.findall, testFile.readlines()) n = len(testRatings) for rating in testRatings: userID = int(rating[0]) movieID = int(rating[1]) rating = float(rating[2]) rmse += pow(rating - part2.getPrediction(users, userID, movieID), 2) return math.sqrt(rmse/n)
def part4Wrapper(dataFileName, recFileName): """Combines functionality from part4 module. Takes a data file such as u.data and outputs the top three recommendations to recFileName. Keyword arguments: dataFileName -- file name, format is u.data recFileName -- writes here """ try: userData = open(dataFileName, "r") recFile = open(recFileName, "w") except IOError: print "Cannot use given filenames: " + str(dataFileName) + " " + str(recFileName) sys.exit() users, numUsers, numMovies = part1.getUserData(userData) part1.setPearsons(numUsers, users) topRecs = getTopRecommendations(users, numMovies) writeRecommendations(users, recFile) userData.close() recFile.close()