def RMSerror(trainingFileName, testFileName): """Returns the RMS error of predictions made from the training file compared to the test file. Keyword arguments: trainingFileName -- partial set of user data testFileName -- remainder of user data """ try: trainingFile = open(trainingFileName, 'r') testFile = open(testFileName, 'r') except IOError: print 'Cannot open training/test files.' sys.exit() users, numUsers, numMovies = part1.getUserData(trainingFile) part1.setPearsons(numUsers, users) rmse = 0.0 testRatings = map(part1.dataPattern.findall, testFile.readlines()) n = len(testRatings) for rating in testRatings: userID = int(rating[0]) movieID = int(rating[1]) rating = float(rating[2]) rmse += pow(rating - part2.getPrediction(users, userID, movieID), 2) return math.sqrt(rmse/n)
def part2Wrapper(dataFileName, simFileName): """Reads a u.data like file and a similarities file in the format used by module part1. Returns a list of User objects which reflect the two input files. Keyword arguments: dataFileName -- input file, like u.data simFileName -- similarities file, part1 module format """ try: simFile = open(simFileName, 'r') except IOError: print 'Cannot open similarities file: ' + simFileName + '\n' sys.exit() try: uData = open(dataFileName, 'r') except IOError: print 'Cannot open user file: ' + dataFileName + '\n' sys.exit() simLines = simFile.readlines() IDs = map(idPat.findall, simLines) PCCs = map(pearsonPat.findall, simLines) users, numUsers, numMovies = part1.getUserData(uData) setPearsonsMan(numUsers, users, IDs, PCCs) simFile.close() uData.close() return users
def part4Wrapper(dataFileName, recFileName): """Combines functionality from part4 module. Takes a data file such as u.data and outputs the top three recommendations to recFileName. Keyword arguments: dataFileName -- file name, format is u.data recFileName -- writes here """ try: userData = open(dataFileName, "r") recFile = open(recFileName, "w") except IOError: print "Cannot use given filenames: " + str(dataFileName) + " " + str(recFileName) sys.exit() users, numUsers, numMovies = part1.getUserData(userData) part1.setPearsons(numUsers, users) topRecs = getTopRecommendations(users, numMovies) writeRecommendations(users, recFile) userData.close() recFile.close()