def test_train_inPlaceSplit_Frame(ratingList, testSize=0.2, relativeSplit=True, shuffle=False, random_state=None, axis=0): test = [] train = [] if shuffle is True: random_state = (int)( np.random.rand() * MAX_SEED) if random_state is None else random_state pprint("-> Random State %d" % random_state) group = ratingList.groupby('userId') for key in group.groups.keys(): iTest, iTrain = test_train_split(pd.Series(group.groups[key]), testSize, relativeSplit, shuffle, random_state, axis) test.extend(iTest) train.extend(iTrain) testSet = ratingList.loc[test] for key in test: ratingList.loc[key, 'rating'] = 0 return (testSet, ratingList)
def calculate(self, ratingTable, avgRating, **params): self.name = "%s+%s" % (params["algo1"].name, params["algo2"].name) pprint('Calculating %s Scores' % self.name) self.alpha = params["alpha"] self.algo1 = params["algo1"] self.algo2 = params["algo2"]
def calculate(self, ratingTable, avgRating, **params): pprint('Calculating %s Scores' % self.name) personalityScoresFrame = pd.DataFrame(index=ratingTable.columns) for i in ratingTable.columns: personalityScoresFrame[i] = self.__personalityScoreUsers( i, ratingTable, params["persScores"]) self.score = personalityScoresFrame
def calculate(self, ratingTable, avgRating, **params): pprint('Calculating %s Scores' % self.name) gamma = params["k"] pearsonScoresFrame = pd.DataFrame(index = ratingTable.columns) for i in ratingTable.columns: pearsonScoresFrame[i] = self.__pearsonScoreUsers(i, ratingTable, avgRating, gamma) self.score = pearsonScoresFrame
def calculate(self, ratingTable, avgRating, **params): pprint('Calculating %s Scores' % self.name) pipScoresFrame = pd.DataFrame(index=ratingTable.columns) for i in ratingTable.columns: pipScoresFrame[i] = self.__pipScoreUsers(i, ratingTable, params["itemsAvgRating"]) pipScoresFrame[i] = normalizeScore(pipScoresFrame[i]) self.score = pipScoresFrame
def predict_evaluate(self, ratingTable, avgRating, testRatingList, k, **params): pprint("Evaluating %s Method" % self.name) if self.score is None: self.calculate(ratingTable, avgRating, **params) self.predict(ratingTable, avgRating, testRatingList, k) testScores = metrics.specificity_precision_accuracy(testRatingList['rating'], self.prediction) testScores.extend([metrics.mae(testRatingList['rating'], self.prediction), metrics.rmse(testRatingList['rating'], self.prediction)]) self.metrics = testScores
def run(): keys = list(dataset.DATASETS.keys()) if MULTI_TYPE_TEST is False: keys = keys[:1] for key in keys: pprint("Testing for %s" % key, symbolCount=16, sepCount=1) # Load the Datasets ratingList = dataset.getRatingsList(key) persScores = dataset.getPersonalityDataset() # Minimise dataset for Optimization if SHOULD_MINIMIZE_SET is True: ratingList, persScores = dataset.minimizeSet( ratingList, persScores, MINIMUM_ITEM_RATED_COUNT, MINIMUM_USER_RATE_COUNT) testRatingList, trainRatingList = splitting.test_train_inPlaceSplit_Frame( ratingList, 1, relativeSplit=False, shuffle=SHOULD_SHUFFLE, random_state=RANDOM_STATE) testRatingList.index = testRatingList['itemId'] ratingTable = dataset.getRatingTable(trainRatingList) sparsity = 1 - len(trainRatingList) / np.prod(ratingTable.shape) pprint("-> Sparsity: %f%%" % float(sparsity * 100)) # Calculate Timings of High Computation Tasks with Timing() as startTime: # Get Average Ratings avgRating = rating.getUsersAverageRating(ratingTable) itemsAvgRating = rating.getItemsAverageRating(ratingTable) # Calculating Scores methods = { algo.Pearson.TASK: algo.Pearson(ratingTable, avgRating, k=NEIGHBOURS_COUNT), algo.Pip.TASK: algo.Pip(ratingTable, avgRating, itemsAvgRating=itemsAvgRating, k=NEIGHBOURS_COUNT), algo.Personality.TASK: algo.Personality(ratingTable, avgRating, persScores=persScores, k=NEIGHBOURS_COUNT), } hybrids = { "pipPer": algo.Hybrid(ratingTable, avgRating, algo1=methods[algo.Pip.TASK], algo2=methods[algo.Personality.TASK], alpha=HYBRID_ALPHA), "persPer": algo.Hybrid(ratingTable, avgRating, algo1=methods[algo.Personality.TASK], algo2=methods[algo.Personality.TASK], alpha=HYBRID_ALPHA), } pprint("-> Scores Calculated in %.4f seconds" % startTime.getElapsedTime()) # Calculate Timings of High Computation Tasks with Timing() as startTime: # Calculating Ratings and Metrics for method in methods.values(): method.predict_evaluate(ratingTable, avgRating, testRatingList, k=NEIGHBOURS_COUNT, itemsAvgRating=itemsAvgRating) for method in hybrids.values(): method.predict_evaluate(ratingTable, avgRating, testRatingList, k=NEIGHBOURS_COUNT, itemsAvgRating=itemsAvgRating) methods = {**methods, **hybrids} pprint("-> Ratings Calculated in %.4f seconds" % startTime.getElapsedTime()) testLabels = [ 'Method', 'Specificity', 'Precision', 'Accuracy', 'MAE', 'RMSE' ] resultLabel = " Test Scores " pprint(resultLabel, symbolCount=int( (COLUMN_LENGTH * len(testLabels) - len(resultLabel)) / 2)) rowFormat = getRowFormat(len(testLabels)) print(rowFormat.format(*testLabels)) for method in methods.values(): print( rowFormat.format(method.name, *["%.4f" % val for val in method.metrics])) print("*" * (COLUMN_LENGTH * len(testLabels)), end="\n\n\n")